Loading...
1// SPDX-License-Identifier: GPL-2.0 OR MIT
2/*
3 * Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
4 * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
5 */
6
7#include <crypto/curve25519.h>
8#include <crypto/internal/kpp.h>
9
10#include <linux/types.h>
11#include <linux/jump_label.h>
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/scatterlist.h>
15
16#include <asm/cpufeature.h>
17#include <asm/processor.h>
18
19static __always_inline u64 eq_mask(u64 a, u64 b)
20{
21 u64 x = a ^ b;
22 u64 minus_x = ~x + (u64)1U;
23 u64 x_or_minus_x = x | minus_x;
24 u64 xnx = x_or_minus_x >> (u32)63U;
25 return xnx - (u64)1U;
26}
27
28static __always_inline u64 gte_mask(u64 a, u64 b)
29{
30 u64 x = a;
31 u64 y = b;
32 u64 x_xor_y = x ^ y;
33 u64 x_sub_y = x - y;
34 u64 x_sub_y_xor_y = x_sub_y ^ y;
35 u64 q = x_xor_y | x_sub_y_xor_y;
36 u64 x_xor_q = x ^ q;
37 u64 x_xor_q_ = x_xor_q >> (u32)63U;
38 return x_xor_q_ - (u64)1U;
39}
40
41/* Computes the addition of four-element f1 with value in f2
42 * and returns the carry (if any) */
43static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2)
44{
45 u64 carry_r;
46
47 asm volatile(
48 /* Clear registers to propagate the carry bit */
49 " xor %%r8d, %%r8d;"
50 " xor %%r9d, %%r9d;"
51 " xor %%r10d, %%r10d;"
52 " xor %%r11d, %%r11d;"
53 " xor %k1, %k1;"
54
55 /* Begin addition chain */
56 " addq 0(%3), %0;"
57 " movq %0, 0(%2);"
58 " adcxq 8(%3), %%r8;"
59 " movq %%r8, 8(%2);"
60 " adcxq 16(%3), %%r9;"
61 " movq %%r9, 16(%2);"
62 " adcxq 24(%3), %%r10;"
63 " movq %%r10, 24(%2);"
64
65 /* Return the carry bit in a register */
66 " adcx %%r11, %1;"
67 : "+&r"(f2), "=&r"(carry_r)
68 : "r"(out), "r"(f1)
69 : "%r8", "%r9", "%r10", "%r11", "memory", "cc");
70
71 return carry_r;
72}
73
74/* Computes the field addition of two field elements */
75static inline void fadd(u64 *out, const u64 *f1, const u64 *f2)
76{
77 asm volatile(
78 /* Compute the raw addition of f1 + f2 */
79 " movq 0(%0), %%r8;"
80 " addq 0(%2), %%r8;"
81 " movq 8(%0), %%r9;"
82 " adcxq 8(%2), %%r9;"
83 " movq 16(%0), %%r10;"
84 " adcxq 16(%2), %%r10;"
85 " movq 24(%0), %%r11;"
86 " adcxq 24(%2), %%r11;"
87
88 /* Wrap the result back into the field */
89
90 /* Step 1: Compute carry*38 */
91 " mov $0, %%rax;"
92 " mov $38, %0;"
93 " cmovc %0, %%rax;"
94
95 /* Step 2: Add carry*38 to the original sum */
96 " xor %%ecx, %%ecx;"
97 " add %%rax, %%r8;"
98 " adcx %%rcx, %%r9;"
99 " movq %%r9, 8(%1);"
100 " adcx %%rcx, %%r10;"
101 " movq %%r10, 16(%1);"
102 " adcx %%rcx, %%r11;"
103 " movq %%r11, 24(%1);"
104
105 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
106 " mov $0, %%rax;"
107 " cmovc %0, %%rax;"
108 " add %%rax, %%r8;"
109 " movq %%r8, 0(%1);"
110 : "+&r"(f2)
111 : "r"(out), "r"(f1)
112 : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc");
113}
114
115/* Computes the field subtraction of two field elements */
116static inline void fsub(u64 *out, const u64 *f1, const u64 *f2)
117{
118 asm volatile(
119 /* Compute the raw subtraction of f1-f2 */
120 " movq 0(%1), %%r8;"
121 " subq 0(%2), %%r8;"
122 " movq 8(%1), %%r9;"
123 " sbbq 8(%2), %%r9;"
124 " movq 16(%1), %%r10;"
125 " sbbq 16(%2), %%r10;"
126 " movq 24(%1), %%r11;"
127 " sbbq 24(%2), %%r11;"
128
129 /* Wrap the result back into the field */
130
131 /* Step 1: Compute carry*38 */
132 " mov $0, %%rax;"
133 " mov $38, %%rcx;"
134 " cmovc %%rcx, %%rax;"
135
136 /* Step 2: Subtract carry*38 from the original difference */
137 " sub %%rax, %%r8;"
138 " sbb $0, %%r9;"
139 " sbb $0, %%r10;"
140 " sbb $0, %%r11;"
141
142 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
143 " mov $0, %%rax;"
144 " cmovc %%rcx, %%rax;"
145 " sub %%rax, %%r8;"
146
147 /* Store the result */
148 " movq %%r8, 0(%0);"
149 " movq %%r9, 8(%0);"
150 " movq %%r10, 16(%0);"
151 " movq %%r11, 24(%0);"
152 :
153 : "r"(out), "r"(f1), "r"(f2)
154 : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc");
155}
156
157/* Computes a field multiplication: out <- f1 * f2
158 * Uses the 8-element buffer tmp for intermediate results */
159static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
160{
161 asm volatile(
162
163 /* Compute the raw multiplication: tmp <- src1 * src2 */
164
165 /* Compute src1[0] * src2 */
166 " movq 0(%0), %%rdx;"
167 " mulxq 0(%1), %%r8, %%r9;"
168 " xor %%r10d, %%r10d;"
169 " movq %%r8, 0(%2);"
170 " mulxq 8(%1), %%r10, %%r11;"
171 " adox %%r9, %%r10;"
172 " movq %%r10, 8(%2);"
173 " mulxq 16(%1), %%rbx, %%r13;"
174 " adox %%r11, %%rbx;"
175 " mulxq 24(%1), %%r14, %%rdx;"
176 " adox %%r13, %%r14;"
177 " mov $0, %%rax;"
178 " adox %%rdx, %%rax;"
179
180 /* Compute src1[1] * src2 */
181 " movq 8(%0), %%rdx;"
182 " mulxq 0(%1), %%r8, %%r9;"
183 " xor %%r10d, %%r10d;"
184 " adcxq 8(%2), %%r8;"
185 " movq %%r8, 8(%2);"
186 " mulxq 8(%1), %%r10, %%r11;"
187 " adox %%r9, %%r10;"
188 " adcx %%rbx, %%r10;"
189 " movq %%r10, 16(%2);"
190 " mulxq 16(%1), %%rbx, %%r13;"
191 " adox %%r11, %%rbx;"
192 " adcx %%r14, %%rbx;"
193 " mov $0, %%r8;"
194 " mulxq 24(%1), %%r14, %%rdx;"
195 " adox %%r13, %%r14;"
196 " adcx %%rax, %%r14;"
197 " mov $0, %%rax;"
198 " adox %%rdx, %%rax;"
199 " adcx %%r8, %%rax;"
200
201 /* Compute src1[2] * src2 */
202 " movq 16(%0), %%rdx;"
203 " mulxq 0(%1), %%r8, %%r9;"
204 " xor %%r10d, %%r10d;"
205 " adcxq 16(%2), %%r8;"
206 " movq %%r8, 16(%2);"
207 " mulxq 8(%1), %%r10, %%r11;"
208 " adox %%r9, %%r10;"
209 " adcx %%rbx, %%r10;"
210 " movq %%r10, 24(%2);"
211 " mulxq 16(%1), %%rbx, %%r13;"
212 " adox %%r11, %%rbx;"
213 " adcx %%r14, %%rbx;"
214 " mov $0, %%r8;"
215 " mulxq 24(%1), %%r14, %%rdx;"
216 " adox %%r13, %%r14;"
217 " adcx %%rax, %%r14;"
218 " mov $0, %%rax;"
219 " adox %%rdx, %%rax;"
220 " adcx %%r8, %%rax;"
221
222 /* Compute src1[3] * src2 */
223 " movq 24(%0), %%rdx;"
224 " mulxq 0(%1), %%r8, %%r9;"
225 " xor %%r10d, %%r10d;"
226 " adcxq 24(%2), %%r8;"
227 " movq %%r8, 24(%2);"
228 " mulxq 8(%1), %%r10, %%r11;"
229 " adox %%r9, %%r10;"
230 " adcx %%rbx, %%r10;"
231 " movq %%r10, 32(%2);"
232 " mulxq 16(%1), %%rbx, %%r13;"
233 " adox %%r11, %%rbx;"
234 " adcx %%r14, %%rbx;"
235 " movq %%rbx, 40(%2);"
236 " mov $0, %%r8;"
237 " mulxq 24(%1), %%r14, %%rdx;"
238 " adox %%r13, %%r14;"
239 " adcx %%rax, %%r14;"
240 " movq %%r14, 48(%2);"
241 " mov $0, %%rax;"
242 " adox %%rdx, %%rax;"
243 " adcx %%r8, %%rax;"
244 " movq %%rax, 56(%2);"
245
246 /* Line up pointers */
247 " mov %2, %0;"
248 " mov %3, %2;"
249
250 /* Wrap the result back into the field */
251
252 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
253 " mov $38, %%rdx;"
254 " mulxq 32(%0), %%r8, %%r13;"
255 " xor %k1, %k1;"
256 " adoxq 0(%0), %%r8;"
257 " mulxq 40(%0), %%r9, %%rbx;"
258 " adcx %%r13, %%r9;"
259 " adoxq 8(%0), %%r9;"
260 " mulxq 48(%0), %%r10, %%r13;"
261 " adcx %%rbx, %%r10;"
262 " adoxq 16(%0), %%r10;"
263 " mulxq 56(%0), %%r11, %%rax;"
264 " adcx %%r13, %%r11;"
265 " adoxq 24(%0), %%r11;"
266 " adcx %1, %%rax;"
267 " adox %1, %%rax;"
268 " imul %%rdx, %%rax;"
269
270 /* Step 2: Fold the carry back into dst */
271 " add %%rax, %%r8;"
272 " adcx %1, %%r9;"
273 " movq %%r9, 8(%2);"
274 " adcx %1, %%r10;"
275 " movq %%r10, 16(%2);"
276 " adcx %1, %%r11;"
277 " movq %%r11, 24(%2);"
278
279 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
280 " mov $0, %%rax;"
281 " cmovc %%rdx, %%rax;"
282 " add %%rax, %%r8;"
283 " movq %%r8, 0(%2);"
284 : "+&r"(f1), "+&r"(f2), "+&r"(tmp)
285 : "r"(out)
286 : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13",
287 "%r14", "memory", "cc");
288}
289
290/* Computes two field multiplications:
291 * out[0] <- f1[0] * f2[0]
292 * out[1] <- f1[1] * f2[1]
293 * Uses the 16-element buffer tmp for intermediate results: */
294static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
295{
296 asm volatile(
297
298 /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */
299
300 /* Compute src1[0] * src2 */
301 " movq 0(%0), %%rdx;"
302 " mulxq 0(%1), %%r8, %%r9;"
303 " xor %%r10d, %%r10d;"
304 " movq %%r8, 0(%2);"
305 " mulxq 8(%1), %%r10, %%r11;"
306 " adox %%r9, %%r10;"
307 " movq %%r10, 8(%2);"
308 " mulxq 16(%1), %%rbx, %%r13;"
309 " adox %%r11, %%rbx;"
310 " mulxq 24(%1), %%r14, %%rdx;"
311 " adox %%r13, %%r14;"
312 " mov $0, %%rax;"
313 " adox %%rdx, %%rax;"
314
315 /* Compute src1[1] * src2 */
316 " movq 8(%0), %%rdx;"
317 " mulxq 0(%1), %%r8, %%r9;"
318 " xor %%r10d, %%r10d;"
319 " adcxq 8(%2), %%r8;"
320 " movq %%r8, 8(%2);"
321 " mulxq 8(%1), %%r10, %%r11;"
322 " adox %%r9, %%r10;"
323 " adcx %%rbx, %%r10;"
324 " movq %%r10, 16(%2);"
325 " mulxq 16(%1), %%rbx, %%r13;"
326 " adox %%r11, %%rbx;"
327 " adcx %%r14, %%rbx;"
328 " mov $0, %%r8;"
329 " mulxq 24(%1), %%r14, %%rdx;"
330 " adox %%r13, %%r14;"
331 " adcx %%rax, %%r14;"
332 " mov $0, %%rax;"
333 " adox %%rdx, %%rax;"
334 " adcx %%r8, %%rax;"
335
336 /* Compute src1[2] * src2 */
337 " movq 16(%0), %%rdx;"
338 " mulxq 0(%1), %%r8, %%r9;"
339 " xor %%r10d, %%r10d;"
340 " adcxq 16(%2), %%r8;"
341 " movq %%r8, 16(%2);"
342 " mulxq 8(%1), %%r10, %%r11;"
343 " adox %%r9, %%r10;"
344 " adcx %%rbx, %%r10;"
345 " movq %%r10, 24(%2);"
346 " mulxq 16(%1), %%rbx, %%r13;"
347 " adox %%r11, %%rbx;"
348 " adcx %%r14, %%rbx;"
349 " mov $0, %%r8;"
350 " mulxq 24(%1), %%r14, %%rdx;"
351 " adox %%r13, %%r14;"
352 " adcx %%rax, %%r14;"
353 " mov $0, %%rax;"
354 " adox %%rdx, %%rax;"
355 " adcx %%r8, %%rax;"
356
357 /* Compute src1[3] * src2 */
358 " movq 24(%0), %%rdx;"
359 " mulxq 0(%1), %%r8, %%r9;"
360 " xor %%r10d, %%r10d;"
361 " adcxq 24(%2), %%r8;"
362 " movq %%r8, 24(%2);"
363 " mulxq 8(%1), %%r10, %%r11;"
364 " adox %%r9, %%r10;"
365 " adcx %%rbx, %%r10;"
366 " movq %%r10, 32(%2);"
367 " mulxq 16(%1), %%rbx, %%r13;"
368 " adox %%r11, %%rbx;"
369 " adcx %%r14, %%rbx;"
370 " movq %%rbx, 40(%2);"
371 " mov $0, %%r8;"
372 " mulxq 24(%1), %%r14, %%rdx;"
373 " adox %%r13, %%r14;"
374 " adcx %%rax, %%r14;"
375 " movq %%r14, 48(%2);"
376 " mov $0, %%rax;"
377 " adox %%rdx, %%rax;"
378 " adcx %%r8, %%rax;"
379 " movq %%rax, 56(%2);"
380
381 /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */
382
383 /* Compute src1[0] * src2 */
384 " movq 32(%0), %%rdx;"
385 " mulxq 32(%1), %%r8, %%r9;"
386 " xor %%r10d, %%r10d;"
387 " movq %%r8, 64(%2);"
388 " mulxq 40(%1), %%r10, %%r11;"
389 " adox %%r9, %%r10;"
390 " movq %%r10, 72(%2);"
391 " mulxq 48(%1), %%rbx, %%r13;"
392 " adox %%r11, %%rbx;"
393 " mulxq 56(%1), %%r14, %%rdx;"
394 " adox %%r13, %%r14;"
395 " mov $0, %%rax;"
396 " adox %%rdx, %%rax;"
397
398 /* Compute src1[1] * src2 */
399 " movq 40(%0), %%rdx;"
400 " mulxq 32(%1), %%r8, %%r9;"
401 " xor %%r10d, %%r10d;"
402 " adcxq 72(%2), %%r8;"
403 " movq %%r8, 72(%2);"
404 " mulxq 40(%1), %%r10, %%r11;"
405 " adox %%r9, %%r10;"
406 " adcx %%rbx, %%r10;"
407 " movq %%r10, 80(%2);"
408 " mulxq 48(%1), %%rbx, %%r13;"
409 " adox %%r11, %%rbx;"
410 " adcx %%r14, %%rbx;"
411 " mov $0, %%r8;"
412 " mulxq 56(%1), %%r14, %%rdx;"
413 " adox %%r13, %%r14;"
414 " adcx %%rax, %%r14;"
415 " mov $0, %%rax;"
416 " adox %%rdx, %%rax;"
417 " adcx %%r8, %%rax;"
418
419 /* Compute src1[2] * src2 */
420 " movq 48(%0), %%rdx;"
421 " mulxq 32(%1), %%r8, %%r9;"
422 " xor %%r10d, %%r10d;"
423 " adcxq 80(%2), %%r8;"
424 " movq %%r8, 80(%2);"
425 " mulxq 40(%1), %%r10, %%r11;"
426 " adox %%r9, %%r10;"
427 " adcx %%rbx, %%r10;"
428 " movq %%r10, 88(%2);"
429 " mulxq 48(%1), %%rbx, %%r13;"
430 " adox %%r11, %%rbx;"
431 " adcx %%r14, %%rbx;"
432 " mov $0, %%r8;"
433 " mulxq 56(%1), %%r14, %%rdx;"
434 " adox %%r13, %%r14;"
435 " adcx %%rax, %%r14;"
436 " mov $0, %%rax;"
437 " adox %%rdx, %%rax;"
438 " adcx %%r8, %%rax;"
439
440 /* Compute src1[3] * src2 */
441 " movq 56(%0), %%rdx;"
442 " mulxq 32(%1), %%r8, %%r9;"
443 " xor %%r10d, %%r10d;"
444 " adcxq 88(%2), %%r8;"
445 " movq %%r8, 88(%2);"
446 " mulxq 40(%1), %%r10, %%r11;"
447 " adox %%r9, %%r10;"
448 " adcx %%rbx, %%r10;"
449 " movq %%r10, 96(%2);"
450 " mulxq 48(%1), %%rbx, %%r13;"
451 " adox %%r11, %%rbx;"
452 " adcx %%r14, %%rbx;"
453 " movq %%rbx, 104(%2);"
454 " mov $0, %%r8;"
455 " mulxq 56(%1), %%r14, %%rdx;"
456 " adox %%r13, %%r14;"
457 " adcx %%rax, %%r14;"
458 " movq %%r14, 112(%2);"
459 " mov $0, %%rax;"
460 " adox %%rdx, %%rax;"
461 " adcx %%r8, %%rax;"
462 " movq %%rax, 120(%2);"
463
464 /* Line up pointers */
465 " mov %2, %0;"
466 " mov %3, %2;"
467
468 /* Wrap the results back into the field */
469
470 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
471 " mov $38, %%rdx;"
472 " mulxq 32(%0), %%r8, %%r13;"
473 " xor %k1, %k1;"
474 " adoxq 0(%0), %%r8;"
475 " mulxq 40(%0), %%r9, %%rbx;"
476 " adcx %%r13, %%r9;"
477 " adoxq 8(%0), %%r9;"
478 " mulxq 48(%0), %%r10, %%r13;"
479 " adcx %%rbx, %%r10;"
480 " adoxq 16(%0), %%r10;"
481 " mulxq 56(%0), %%r11, %%rax;"
482 " adcx %%r13, %%r11;"
483 " adoxq 24(%0), %%r11;"
484 " adcx %1, %%rax;"
485 " adox %1, %%rax;"
486 " imul %%rdx, %%rax;"
487
488 /* Step 2: Fold the carry back into dst */
489 " add %%rax, %%r8;"
490 " adcx %1, %%r9;"
491 " movq %%r9, 8(%2);"
492 " adcx %1, %%r10;"
493 " movq %%r10, 16(%2);"
494 " adcx %1, %%r11;"
495 " movq %%r11, 24(%2);"
496
497 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
498 " mov $0, %%rax;"
499 " cmovc %%rdx, %%rax;"
500 " add %%rax, %%r8;"
501 " movq %%r8, 0(%2);"
502
503 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
504 " mov $38, %%rdx;"
505 " mulxq 96(%0), %%r8, %%r13;"
506 " xor %k1, %k1;"
507 " adoxq 64(%0), %%r8;"
508 " mulxq 104(%0), %%r9, %%rbx;"
509 " adcx %%r13, %%r9;"
510 " adoxq 72(%0), %%r9;"
511 " mulxq 112(%0), %%r10, %%r13;"
512 " adcx %%rbx, %%r10;"
513 " adoxq 80(%0), %%r10;"
514 " mulxq 120(%0), %%r11, %%rax;"
515 " adcx %%r13, %%r11;"
516 " adoxq 88(%0), %%r11;"
517 " adcx %1, %%rax;"
518 " adox %1, %%rax;"
519 " imul %%rdx, %%rax;"
520
521 /* Step 2: Fold the carry back into dst */
522 " add %%rax, %%r8;"
523 " adcx %1, %%r9;"
524 " movq %%r9, 40(%2);"
525 " adcx %1, %%r10;"
526 " movq %%r10, 48(%2);"
527 " adcx %1, %%r11;"
528 " movq %%r11, 56(%2);"
529
530 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
531 " mov $0, %%rax;"
532 " cmovc %%rdx, %%rax;"
533 " add %%rax, %%r8;"
534 " movq %%r8, 32(%2);"
535 : "+&r"(f1), "+&r"(f2), "+&r"(tmp)
536 : "r"(out)
537 : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13",
538 "%r14", "memory", "cc");
539}
540
541/* Computes the field multiplication of four-element f1 with value in f2
542 * Requires f2 to be smaller than 2^17 */
543static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
544{
545 register u64 f2_r asm("rdx") = f2;
546
547 asm volatile(
548 /* Compute the raw multiplication of f1*f2 */
549 " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
550 " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */
551 " add %%rcx, %%r9;"
552 " mov $0, %%rcx;"
553 " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
554 " adcx %%rbx, %%r10;"
555 " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
556 " adcx %%r13, %%r11;"
557 " adcx %%rcx, %%rax;"
558
559 /* Wrap the result back into the field */
560
561 /* Step 1: Compute carry*38 */
562 " mov $38, %%rdx;"
563 " imul %%rdx, %%rax;"
564
565 /* Step 2: Fold the carry back into dst */
566 " add %%rax, %%r8;"
567 " adcx %%rcx, %%r9;"
568 " movq %%r9, 8(%1);"
569 " adcx %%rcx, %%r10;"
570 " movq %%r10, 16(%1);"
571 " adcx %%rcx, %%r11;"
572 " movq %%r11, 24(%1);"
573
574 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
575 " mov $0, %%rax;"
576 " cmovc %%rdx, %%rax;"
577 " add %%rax, %%r8;"
578 " movq %%r8, 0(%1);"
579 : "+&r"(f2_r)
580 : "r"(out), "r"(f1)
581 : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13",
582 "memory", "cc");
583}
584
585/* Computes p1 <- bit ? p2 : p1 in constant time */
586static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2)
587{
588 asm volatile(
589 /* Transfer bit into CF flag */
590 " add $18446744073709551615, %0;"
591
592 /* cswap p1[0], p2[0] */
593 " movq 0(%1), %%r8;"
594 " movq 0(%2), %%r9;"
595 " mov %%r8, %%r10;"
596 " cmovc %%r9, %%r8;"
597 " cmovc %%r10, %%r9;"
598 " movq %%r8, 0(%1);"
599 " movq %%r9, 0(%2);"
600
601 /* cswap p1[1], p2[1] */
602 " movq 8(%1), %%r8;"
603 " movq 8(%2), %%r9;"
604 " mov %%r8, %%r10;"
605 " cmovc %%r9, %%r8;"
606 " cmovc %%r10, %%r9;"
607 " movq %%r8, 8(%1);"
608 " movq %%r9, 8(%2);"
609
610 /* cswap p1[2], p2[2] */
611 " movq 16(%1), %%r8;"
612 " movq 16(%2), %%r9;"
613 " mov %%r8, %%r10;"
614 " cmovc %%r9, %%r8;"
615 " cmovc %%r10, %%r9;"
616 " movq %%r8, 16(%1);"
617 " movq %%r9, 16(%2);"
618
619 /* cswap p1[3], p2[3] */
620 " movq 24(%1), %%r8;"
621 " movq 24(%2), %%r9;"
622 " mov %%r8, %%r10;"
623 " cmovc %%r9, %%r8;"
624 " cmovc %%r10, %%r9;"
625 " movq %%r8, 24(%1);"
626 " movq %%r9, 24(%2);"
627
628 /* cswap p1[4], p2[4] */
629 " movq 32(%1), %%r8;"
630 " movq 32(%2), %%r9;"
631 " mov %%r8, %%r10;"
632 " cmovc %%r9, %%r8;"
633 " cmovc %%r10, %%r9;"
634 " movq %%r8, 32(%1);"
635 " movq %%r9, 32(%2);"
636
637 /* cswap p1[5], p2[5] */
638 " movq 40(%1), %%r8;"
639 " movq 40(%2), %%r9;"
640 " mov %%r8, %%r10;"
641 " cmovc %%r9, %%r8;"
642 " cmovc %%r10, %%r9;"
643 " movq %%r8, 40(%1);"
644 " movq %%r9, 40(%2);"
645
646 /* cswap p1[6], p2[6] */
647 " movq 48(%1), %%r8;"
648 " movq 48(%2), %%r9;"
649 " mov %%r8, %%r10;"
650 " cmovc %%r9, %%r8;"
651 " cmovc %%r10, %%r9;"
652 " movq %%r8, 48(%1);"
653 " movq %%r9, 48(%2);"
654
655 /* cswap p1[7], p2[7] */
656 " movq 56(%1), %%r8;"
657 " movq 56(%2), %%r9;"
658 " mov %%r8, %%r10;"
659 " cmovc %%r9, %%r8;"
660 " cmovc %%r10, %%r9;"
661 " movq %%r8, 56(%1);"
662 " movq %%r9, 56(%2);"
663 : "+&r"(bit)
664 : "r"(p1), "r"(p2)
665 : "%r8", "%r9", "%r10", "memory", "cc");
666}
667
668/* Computes the square of a field element: out <- f * f
669 * Uses the 8-element buffer tmp for intermediate results */
670static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
671{
672 asm volatile(
673 /* Compute the raw multiplication: tmp <- f * f */
674
675 /* Step 1: Compute all partial products */
676 " movq 0(%0), %%rdx;" /* f[0] */
677 " mulxq 8(%0), %%r8, %%r14;"
678 " xor %%r15d, %%r15d;" /* f[1]*f[0] */
679 " mulxq 16(%0), %%r9, %%r10;"
680 " adcx %%r14, %%r9;" /* f[2]*f[0] */
681 " mulxq 24(%0), %%rax, %%rcx;"
682 " adcx %%rax, %%r10;" /* f[3]*f[0] */
683 " movq 24(%0), %%rdx;" /* f[3] */
684 " mulxq 8(%0), %%r11, %%rbx;"
685 " adcx %%rcx, %%r11;" /* f[1]*f[3] */
686 " mulxq 16(%0), %%rax, %%r13;"
687 " adcx %%rax, %%rbx;" /* f[2]*f[3] */
688 " movq 8(%0), %%rdx;"
689 " adcx %%r15, %%r13;" /* f1 */
690 " mulxq 16(%0), %%rax, %%rcx;"
691 " mov $0, %%r14;" /* f[2]*f[1] */
692
693 /* Step 2: Compute two parallel carry chains */
694 " xor %%r15d, %%r15d;"
695 " adox %%rax, %%r10;"
696 " adcx %%r8, %%r8;"
697 " adox %%rcx, %%r11;"
698 " adcx %%r9, %%r9;"
699 " adox %%r15, %%rbx;"
700 " adcx %%r10, %%r10;"
701 " adox %%r15, %%r13;"
702 " adcx %%r11, %%r11;"
703 " adox %%r15, %%r14;"
704 " adcx %%rbx, %%rbx;"
705 " adcx %%r13, %%r13;"
706 " adcx %%r14, %%r14;"
707
708 /* Step 3: Compute intermediate squares */
709 " movq 0(%0), %%rdx;"
710 " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
711 " movq %%rax, 0(%1);"
712 " add %%rcx, %%r8;"
713 " movq %%r8, 8(%1);"
714 " movq 8(%0), %%rdx;"
715 " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
716 " adcx %%rax, %%r9;"
717 " movq %%r9, 16(%1);"
718 " adcx %%rcx, %%r10;"
719 " movq %%r10, 24(%1);"
720 " movq 16(%0), %%rdx;"
721 " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
722 " adcx %%rax, %%r11;"
723 " movq %%r11, 32(%1);"
724 " adcx %%rcx, %%rbx;"
725 " movq %%rbx, 40(%1);"
726 " movq 24(%0), %%rdx;"
727 " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
728 " adcx %%rax, %%r13;"
729 " movq %%r13, 48(%1);"
730 " adcx %%rcx, %%r14;"
731 " movq %%r14, 56(%1);"
732
733 /* Line up pointers */
734 " mov %1, %0;"
735 " mov %2, %1;"
736
737 /* Wrap the result back into the field */
738
739 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
740 " mov $38, %%rdx;"
741 " mulxq 32(%0), %%r8, %%r13;"
742 " xor %%ecx, %%ecx;"
743 " adoxq 0(%0), %%r8;"
744 " mulxq 40(%0), %%r9, %%rbx;"
745 " adcx %%r13, %%r9;"
746 " adoxq 8(%0), %%r9;"
747 " mulxq 48(%0), %%r10, %%r13;"
748 " adcx %%rbx, %%r10;"
749 " adoxq 16(%0), %%r10;"
750 " mulxq 56(%0), %%r11, %%rax;"
751 " adcx %%r13, %%r11;"
752 " adoxq 24(%0), %%r11;"
753 " adcx %%rcx, %%rax;"
754 " adox %%rcx, %%rax;"
755 " imul %%rdx, %%rax;"
756
757 /* Step 2: Fold the carry back into dst */
758 " add %%rax, %%r8;"
759 " adcx %%rcx, %%r9;"
760 " movq %%r9, 8(%1);"
761 " adcx %%rcx, %%r10;"
762 " movq %%r10, 16(%1);"
763 " adcx %%rcx, %%r11;"
764 " movq %%r11, 24(%1);"
765
766 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
767 " mov $0, %%rax;"
768 " cmovc %%rdx, %%rax;"
769 " add %%rax, %%r8;"
770 " movq %%r8, 0(%1);"
771 : "+&r"(f), "+&r"(tmp)
772 : "r"(out)
773 : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11",
774 "%r13", "%r14", "%r15", "memory", "cc");
775}
776
777/* Computes two field squarings:
778 * out[0] <- f[0] * f[0]
779 * out[1] <- f[1] * f[1]
780 * Uses the 16-element buffer tmp for intermediate results */
781static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
782{
783 asm volatile(
784 /* Step 1: Compute all partial products */
785 " movq 0(%0), %%rdx;" /* f[0] */
786 " mulxq 8(%0), %%r8, %%r14;"
787 " xor %%r15d, %%r15d;" /* f[1]*f[0] */
788 " mulxq 16(%0), %%r9, %%r10;"
789 " adcx %%r14, %%r9;" /* f[2]*f[0] */
790 " mulxq 24(%0), %%rax, %%rcx;"
791 " adcx %%rax, %%r10;" /* f[3]*f[0] */
792 " movq 24(%0), %%rdx;" /* f[3] */
793 " mulxq 8(%0), %%r11, %%rbx;"
794 " adcx %%rcx, %%r11;" /* f[1]*f[3] */
795 " mulxq 16(%0), %%rax, %%r13;"
796 " adcx %%rax, %%rbx;" /* f[2]*f[3] */
797 " movq 8(%0), %%rdx;"
798 " adcx %%r15, %%r13;" /* f1 */
799 " mulxq 16(%0), %%rax, %%rcx;"
800 " mov $0, %%r14;" /* f[2]*f[1] */
801
802 /* Step 2: Compute two parallel carry chains */
803 " xor %%r15d, %%r15d;"
804 " adox %%rax, %%r10;"
805 " adcx %%r8, %%r8;"
806 " adox %%rcx, %%r11;"
807 " adcx %%r9, %%r9;"
808 " adox %%r15, %%rbx;"
809 " adcx %%r10, %%r10;"
810 " adox %%r15, %%r13;"
811 " adcx %%r11, %%r11;"
812 " adox %%r15, %%r14;"
813 " adcx %%rbx, %%rbx;"
814 " adcx %%r13, %%r13;"
815 " adcx %%r14, %%r14;"
816
817 /* Step 3: Compute intermediate squares */
818 " movq 0(%0), %%rdx;"
819 " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
820 " movq %%rax, 0(%1);"
821 " add %%rcx, %%r8;"
822 " movq %%r8, 8(%1);"
823 " movq 8(%0), %%rdx;"
824 " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
825 " adcx %%rax, %%r9;"
826 " movq %%r9, 16(%1);"
827 " adcx %%rcx, %%r10;"
828 " movq %%r10, 24(%1);"
829 " movq 16(%0), %%rdx;"
830 " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
831 " adcx %%rax, %%r11;"
832 " movq %%r11, 32(%1);"
833 " adcx %%rcx, %%rbx;"
834 " movq %%rbx, 40(%1);"
835 " movq 24(%0), %%rdx;"
836 " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
837 " adcx %%rax, %%r13;"
838 " movq %%r13, 48(%1);"
839 " adcx %%rcx, %%r14;"
840 " movq %%r14, 56(%1);"
841
842 /* Step 1: Compute all partial products */
843 " movq 32(%0), %%rdx;" /* f[0] */
844 " mulxq 40(%0), %%r8, %%r14;"
845 " xor %%r15d, %%r15d;" /* f[1]*f[0] */
846 " mulxq 48(%0), %%r9, %%r10;"
847 " adcx %%r14, %%r9;" /* f[2]*f[0] */
848 " mulxq 56(%0), %%rax, %%rcx;"
849 " adcx %%rax, %%r10;" /* f[3]*f[0] */
850 " movq 56(%0), %%rdx;" /* f[3] */
851 " mulxq 40(%0), %%r11, %%rbx;"
852 " adcx %%rcx, %%r11;" /* f[1]*f[3] */
853 " mulxq 48(%0), %%rax, %%r13;"
854 " adcx %%rax, %%rbx;" /* f[2]*f[3] */
855 " movq 40(%0), %%rdx;"
856 " adcx %%r15, %%r13;" /* f1 */
857 " mulxq 48(%0), %%rax, %%rcx;"
858 " mov $0, %%r14;" /* f[2]*f[1] */
859
860 /* Step 2: Compute two parallel carry chains */
861 " xor %%r15d, %%r15d;"
862 " adox %%rax, %%r10;"
863 " adcx %%r8, %%r8;"
864 " adox %%rcx, %%r11;"
865 " adcx %%r9, %%r9;"
866 " adox %%r15, %%rbx;"
867 " adcx %%r10, %%r10;"
868 " adox %%r15, %%r13;"
869 " adcx %%r11, %%r11;"
870 " adox %%r15, %%r14;"
871 " adcx %%rbx, %%rbx;"
872 " adcx %%r13, %%r13;"
873 " adcx %%r14, %%r14;"
874
875 /* Step 3: Compute intermediate squares */
876 " movq 32(%0), %%rdx;"
877 " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
878 " movq %%rax, 64(%1);"
879 " add %%rcx, %%r8;"
880 " movq %%r8, 72(%1);"
881 " movq 40(%0), %%rdx;"
882 " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
883 " adcx %%rax, %%r9;"
884 " movq %%r9, 80(%1);"
885 " adcx %%rcx, %%r10;"
886 " movq %%r10, 88(%1);"
887 " movq 48(%0), %%rdx;"
888 " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
889 " adcx %%rax, %%r11;"
890 " movq %%r11, 96(%1);"
891 " adcx %%rcx, %%rbx;"
892 " movq %%rbx, 104(%1);"
893 " movq 56(%0), %%rdx;"
894 " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
895 " adcx %%rax, %%r13;"
896 " movq %%r13, 112(%1);"
897 " adcx %%rcx, %%r14;"
898 " movq %%r14, 120(%1);"
899
900 /* Line up pointers */
901 " mov %1, %0;"
902 " mov %2, %1;"
903
904 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
905 " mov $38, %%rdx;"
906 " mulxq 32(%0), %%r8, %%r13;"
907 " xor %%ecx, %%ecx;"
908 " adoxq 0(%0), %%r8;"
909 " mulxq 40(%0), %%r9, %%rbx;"
910 " adcx %%r13, %%r9;"
911 " adoxq 8(%0), %%r9;"
912 " mulxq 48(%0), %%r10, %%r13;"
913 " adcx %%rbx, %%r10;"
914 " adoxq 16(%0), %%r10;"
915 " mulxq 56(%0), %%r11, %%rax;"
916 " adcx %%r13, %%r11;"
917 " adoxq 24(%0), %%r11;"
918 " adcx %%rcx, %%rax;"
919 " adox %%rcx, %%rax;"
920 " imul %%rdx, %%rax;"
921
922 /* Step 2: Fold the carry back into dst */
923 " add %%rax, %%r8;"
924 " adcx %%rcx, %%r9;"
925 " movq %%r9, 8(%1);"
926 " adcx %%rcx, %%r10;"
927 " movq %%r10, 16(%1);"
928 " adcx %%rcx, %%r11;"
929 " movq %%r11, 24(%1);"
930
931 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
932 " mov $0, %%rax;"
933 " cmovc %%rdx, %%rax;"
934 " add %%rax, %%r8;"
935 " movq %%r8, 0(%1);"
936
937 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
938 " mov $38, %%rdx;"
939 " mulxq 96(%0), %%r8, %%r13;"
940 " xor %%ecx, %%ecx;"
941 " adoxq 64(%0), %%r8;"
942 " mulxq 104(%0), %%r9, %%rbx;"
943 " adcx %%r13, %%r9;"
944 " adoxq 72(%0), %%r9;"
945 " mulxq 112(%0), %%r10, %%r13;"
946 " adcx %%rbx, %%r10;"
947 " adoxq 80(%0), %%r10;"
948 " mulxq 120(%0), %%r11, %%rax;"
949 " adcx %%r13, %%r11;"
950 " adoxq 88(%0), %%r11;"
951 " adcx %%rcx, %%rax;"
952 " adox %%rcx, %%rax;"
953 " imul %%rdx, %%rax;"
954
955 /* Step 2: Fold the carry back into dst */
956 " add %%rax, %%r8;"
957 " adcx %%rcx, %%r9;"
958 " movq %%r9, 40(%1);"
959 " adcx %%rcx, %%r10;"
960 " movq %%r10, 48(%1);"
961 " adcx %%rcx, %%r11;"
962 " movq %%r11, 56(%1);"
963
964 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
965 " mov $0, %%rax;"
966 " cmovc %%rdx, %%rax;"
967 " add %%rax, %%r8;"
968 " movq %%r8, 32(%1);"
969 : "+&r"(f), "+&r"(tmp)
970 : "r"(out)
971 : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11",
972 "%r13", "%r14", "%r15", "memory", "cc");
973}
974
975static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2)
976{
977 u64 *nq = p01_tmp1;
978 u64 *nq_p1 = p01_tmp1 + (u32)8U;
979 u64 *tmp1 = p01_tmp1 + (u32)16U;
980 u64 *x1 = q;
981 u64 *x2 = nq;
982 u64 *z2 = nq + (u32)4U;
983 u64 *z3 = nq_p1 + (u32)4U;
984 u64 *a = tmp1;
985 u64 *b = tmp1 + (u32)4U;
986 u64 *ab = tmp1;
987 u64 *dc = tmp1 + (u32)8U;
988 u64 *x3;
989 u64 *z31;
990 u64 *d0;
991 u64 *c0;
992 u64 *a1;
993 u64 *b1;
994 u64 *d;
995 u64 *c;
996 u64 *ab1;
997 u64 *dc1;
998 fadd(a, x2, z2);
999 fsub(b, x2, z2);
1000 x3 = nq_p1;
1001 z31 = nq_p1 + (u32)4U;
1002 d0 = dc;
1003 c0 = dc + (u32)4U;
1004 fadd(c0, x3, z31);
1005 fsub(d0, x3, z31);
1006 fmul2(dc, dc, ab, tmp2);
1007 fadd(x3, d0, c0);
1008 fsub(z31, d0, c0);
1009 a1 = tmp1;
1010 b1 = tmp1 + (u32)4U;
1011 d = tmp1 + (u32)8U;
1012 c = tmp1 + (u32)12U;
1013 ab1 = tmp1;
1014 dc1 = tmp1 + (u32)8U;
1015 fsqr2(dc1, ab1, tmp2);
1016 fsqr2(nq_p1, nq_p1, tmp2);
1017 a1[0U] = c[0U];
1018 a1[1U] = c[1U];
1019 a1[2U] = c[2U];
1020 a1[3U] = c[3U];
1021 fsub(c, d, c);
1022 fmul_scalar(b1, c, (u64)121665U);
1023 fadd(b1, b1, d);
1024 fmul2(nq, dc1, ab1, tmp2);
1025 fmul(z3, z3, x1, tmp2);
1026}
1027
1028static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2)
1029{
1030 u64 *x2 = nq;
1031 u64 *z2 = nq + (u32)4U;
1032 u64 *a = tmp1;
1033 u64 *b = tmp1 + (u32)4U;
1034 u64 *d = tmp1 + (u32)8U;
1035 u64 *c = tmp1 + (u32)12U;
1036 u64 *ab = tmp1;
1037 u64 *dc = tmp1 + (u32)8U;
1038 fadd(a, x2, z2);
1039 fsub(b, x2, z2);
1040 fsqr2(dc, ab, tmp2);
1041 a[0U] = c[0U];
1042 a[1U] = c[1U];
1043 a[2U] = c[2U];
1044 a[3U] = c[3U];
1045 fsub(c, d, c);
1046 fmul_scalar(b, c, (u64)121665U);
1047 fadd(b, b, d);
1048 fmul2(nq, dc, ab, tmp2);
1049}
1050
1051static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1)
1052{
1053 u64 tmp2[16U] = { 0U };
1054 u64 p01_tmp1_swap[33U] = { 0U };
1055 u64 *p0 = p01_tmp1_swap;
1056 u64 *p01 = p01_tmp1_swap;
1057 u64 *p03 = p01;
1058 u64 *p11 = p01 + (u32)8U;
1059 u64 *x0;
1060 u64 *z0;
1061 u64 *p01_tmp1;
1062 u64 *p01_tmp11;
1063 u64 *nq10;
1064 u64 *nq_p11;
1065 u64 *swap1;
1066 u64 sw0;
1067 u64 *nq1;
1068 u64 *tmp1;
1069 memcpy(p11, init1, (u32)8U * sizeof(init1[0U]));
1070 x0 = p03;
1071 z0 = p03 + (u32)4U;
1072 x0[0U] = (u64)1U;
1073 x0[1U] = (u64)0U;
1074 x0[2U] = (u64)0U;
1075 x0[3U] = (u64)0U;
1076 z0[0U] = (u64)0U;
1077 z0[1U] = (u64)0U;
1078 z0[2U] = (u64)0U;
1079 z0[3U] = (u64)0U;
1080 p01_tmp1 = p01_tmp1_swap;
1081 p01_tmp11 = p01_tmp1_swap;
1082 nq10 = p01_tmp1_swap;
1083 nq_p11 = p01_tmp1_swap + (u32)8U;
1084 swap1 = p01_tmp1_swap + (u32)32U;
1085 cswap2((u64)1U, nq10, nq_p11);
1086 point_add_and_double(init1, p01_tmp11, tmp2);
1087 swap1[0U] = (u64)1U;
1088 {
1089 u32 i;
1090 for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) {
1091 u64 *p01_tmp12 = p01_tmp1_swap;
1092 u64 *swap2 = p01_tmp1_swap + (u32)32U;
1093 u64 *nq2 = p01_tmp12;
1094 u64 *nq_p12 = p01_tmp12 + (u32)8U;
1095 u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U);
1096 u64 sw = swap2[0U] ^ bit;
1097 cswap2(sw, nq2, nq_p12);
1098 point_add_and_double(init1, p01_tmp12, tmp2);
1099 swap2[0U] = bit;
1100 }
1101 }
1102 sw0 = swap1[0U];
1103 cswap2(sw0, nq10, nq_p11);
1104 nq1 = p01_tmp1;
1105 tmp1 = p01_tmp1 + (u32)16U;
1106 point_double(nq1, tmp1, tmp2);
1107 point_double(nq1, tmp1, tmp2);
1108 point_double(nq1, tmp1, tmp2);
1109 memcpy(out, p0, (u32)8U * sizeof(p0[0U]));
1110
1111 memzero_explicit(tmp2, sizeof(tmp2));
1112 memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap));
1113}
1114
1115static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1)
1116{
1117 u32 i;
1118 fsqr(o, inp, tmp);
1119 for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U)
1120 fsqr(o, o, tmp);
1121}
1122
1123static void finv(u64 *o, const u64 *i, u64 *tmp)
1124{
1125 u64 t1[16U] = { 0U };
1126 u64 *a0 = t1;
1127 u64 *b = t1 + (u32)4U;
1128 u64 *c = t1 + (u32)8U;
1129 u64 *t00 = t1 + (u32)12U;
1130 u64 *tmp1 = tmp;
1131 u64 *a;
1132 u64 *t0;
1133 fsquare_times(a0, i, tmp1, (u32)1U);
1134 fsquare_times(t00, a0, tmp1, (u32)2U);
1135 fmul(b, t00, i, tmp);
1136 fmul(a0, b, a0, tmp);
1137 fsquare_times(t00, a0, tmp1, (u32)1U);
1138 fmul(b, t00, b, tmp);
1139 fsquare_times(t00, b, tmp1, (u32)5U);
1140 fmul(b, t00, b, tmp);
1141 fsquare_times(t00, b, tmp1, (u32)10U);
1142 fmul(c, t00, b, tmp);
1143 fsquare_times(t00, c, tmp1, (u32)20U);
1144 fmul(t00, t00, c, tmp);
1145 fsquare_times(t00, t00, tmp1, (u32)10U);
1146 fmul(b, t00, b, tmp);
1147 fsquare_times(t00, b, tmp1, (u32)50U);
1148 fmul(c, t00, b, tmp);
1149 fsquare_times(t00, c, tmp1, (u32)100U);
1150 fmul(t00, t00, c, tmp);
1151 fsquare_times(t00, t00, tmp1, (u32)50U);
1152 fmul(t00, t00, b, tmp);
1153 fsquare_times(t00, t00, tmp1, (u32)5U);
1154 a = t1;
1155 t0 = t1 + (u32)12U;
1156 fmul(o, t0, a, tmp);
1157}
1158
1159static void store_felem(u64 *b, u64 *f)
1160{
1161 u64 f30 = f[3U];
1162 u64 top_bit0 = f30 >> (u32)63U;
1163 u64 f31;
1164 u64 top_bit;
1165 u64 f0;
1166 u64 f1;
1167 u64 f2;
1168 u64 f3;
1169 u64 m0;
1170 u64 m1;
1171 u64 m2;
1172 u64 m3;
1173 u64 mask;
1174 u64 f0_;
1175 u64 f1_;
1176 u64 f2_;
1177 u64 f3_;
1178 u64 o0;
1179 u64 o1;
1180 u64 o2;
1181 u64 o3;
1182 f[3U] = f30 & (u64)0x7fffffffffffffffU;
1183 add_scalar(f, f, (u64)19U * top_bit0);
1184 f31 = f[3U];
1185 top_bit = f31 >> (u32)63U;
1186 f[3U] = f31 & (u64)0x7fffffffffffffffU;
1187 add_scalar(f, f, (u64)19U * top_bit);
1188 f0 = f[0U];
1189 f1 = f[1U];
1190 f2 = f[2U];
1191 f3 = f[3U];
1192 m0 = gte_mask(f0, (u64)0xffffffffffffffedU);
1193 m1 = eq_mask(f1, (u64)0xffffffffffffffffU);
1194 m2 = eq_mask(f2, (u64)0xffffffffffffffffU);
1195 m3 = eq_mask(f3, (u64)0x7fffffffffffffffU);
1196 mask = ((m0 & m1) & m2) & m3;
1197 f0_ = f0 - (mask & (u64)0xffffffffffffffedU);
1198 f1_ = f1 - (mask & (u64)0xffffffffffffffffU);
1199 f2_ = f2 - (mask & (u64)0xffffffffffffffffU);
1200 f3_ = f3 - (mask & (u64)0x7fffffffffffffffU);
1201 o0 = f0_;
1202 o1 = f1_;
1203 o2 = f2_;
1204 o3 = f3_;
1205 b[0U] = o0;
1206 b[1U] = o1;
1207 b[2U] = o2;
1208 b[3U] = o3;
1209}
1210
1211static void encode_point(u8 *o, const u64 *i)
1212{
1213 const u64 *x = i;
1214 const u64 *z = i + (u32)4U;
1215 u64 tmp[4U] = { 0U };
1216 u64 tmp_w[16U] = { 0U };
1217 finv(tmp, z, tmp_w);
1218 fmul(tmp, tmp, x, tmp_w);
1219 store_felem((u64 *)o, tmp);
1220}
1221
1222static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub)
1223{
1224 u64 init1[8U] = { 0U };
1225 u64 tmp[4U] = { 0U };
1226 u64 tmp3;
1227 u64 *x;
1228 u64 *z;
1229 {
1230 u32 i;
1231 for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) {
1232 u64 *os = tmp;
1233 const u8 *bj = pub + i * (u32)8U;
1234 u64 u = *(u64 *)bj;
1235 u64 r = u;
1236 u64 x0 = r;
1237 os[i] = x0;
1238 }
1239 }
1240 tmp3 = tmp[3U];
1241 tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU;
1242 x = init1;
1243 z = init1 + (u32)4U;
1244 z[0U] = (u64)1U;
1245 z[1U] = (u64)0U;
1246 z[2U] = (u64)0U;
1247 z[3U] = (u64)0U;
1248 x[0U] = tmp[0U];
1249 x[1U] = tmp[1U];
1250 x[2U] = tmp[2U];
1251 x[3U] = tmp[3U];
1252 montgomery_ladder(init1, priv, init1);
1253 encode_point(out, init1);
1254}
1255
1256/* The below constants were generated using this sage script:
1257 *
1258 * #!/usr/bin/env sage
1259 * import sys
1260 * from sage.all import *
1261 * def limbs(n):
1262 * n = int(n)
1263 * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64)
1264 * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l
1265 * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0])
1266 * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0]
1267 * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s))
1268 * print("static const u64 table_ladder[] = {")
1269 * p = ec.lift_x(9)
1270 * for i in range(252):
1271 * l = (p[0] + p[2]) / (p[0] - p[2])
1272 * print(("\t%s" + ("," if i != 251 else "")) % limbs(l))
1273 * p = p * 2
1274 * print("};")
1275 *
1276 */
1277
1278static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL };
1279
1280static const u64 table_ladder[] = {
1281 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL,
1282 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL,
1283 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL,
1284 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL,
1285 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL,
1286 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL,
1287 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL,
1288 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL,
1289 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL,
1290 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL,
1291 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL,
1292 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL,
1293 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL,
1294 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL,
1295 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL,
1296 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL,
1297 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL,
1298 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL,
1299 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL,
1300 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL,
1301 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL,
1302 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL,
1303 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL,
1304 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL,
1305 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL,
1306 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL,
1307 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL,
1308 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL,
1309 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL,
1310 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL,
1311 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL,
1312 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL,
1313 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL,
1314 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL,
1315 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL,
1316 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL,
1317 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL,
1318 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL,
1319 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL,
1320 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL,
1321 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL,
1322 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL,
1323 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL,
1324 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL,
1325 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL,
1326 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL,
1327 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL,
1328 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL,
1329 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL,
1330 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL,
1331 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL,
1332 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL,
1333 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL,
1334 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL,
1335 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL,
1336 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL,
1337 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL,
1338 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL,
1339 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL,
1340 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL,
1341 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL,
1342 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL,
1343 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL,
1344 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL,
1345 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL,
1346 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL,
1347 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL,
1348 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL,
1349 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL,
1350 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL,
1351 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL,
1352 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL,
1353 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL,
1354 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL,
1355 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL,
1356 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL,
1357 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL,
1358 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL,
1359 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL,
1360 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL,
1361 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL,
1362 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL,
1363 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL,
1364 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL,
1365 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL,
1366 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL,
1367 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL,
1368 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL,
1369 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL,
1370 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL,
1371 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL,
1372 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL,
1373 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL,
1374 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL,
1375 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL,
1376 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL,
1377 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL,
1378 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL,
1379 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL,
1380 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL,
1381 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL,
1382 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL,
1383 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL,
1384 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL,
1385 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL,
1386 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL,
1387 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL,
1388 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL,
1389 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL,
1390 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL,
1391 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL,
1392 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL,
1393 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL,
1394 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL,
1395 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL,
1396 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL,
1397 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL,
1398 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL,
1399 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL,
1400 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL,
1401 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL,
1402 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL,
1403 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL,
1404 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL,
1405 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL,
1406 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL,
1407 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL,
1408 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL,
1409 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL,
1410 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL,
1411 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL,
1412 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL,
1413 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL,
1414 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL,
1415 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL,
1416 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL,
1417 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL,
1418 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL,
1419 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL,
1420 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL,
1421 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL,
1422 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL,
1423 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL,
1424 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL,
1425 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL,
1426 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL,
1427 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL,
1428 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL,
1429 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL,
1430 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL,
1431 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL,
1432 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL,
1433 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL,
1434 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL,
1435 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL,
1436 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL,
1437 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL,
1438 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL,
1439 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL,
1440 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL,
1441 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL,
1442 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL,
1443 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL,
1444 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL,
1445 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL,
1446 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL,
1447 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL,
1448 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL,
1449 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL,
1450 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL,
1451 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL,
1452 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL,
1453 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL,
1454 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL,
1455 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL,
1456 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL,
1457 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL,
1458 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL,
1459 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL,
1460 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL,
1461 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL,
1462 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL,
1463 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL,
1464 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL,
1465 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL,
1466 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL,
1467 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL,
1468 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL,
1469 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL,
1470 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL,
1471 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL,
1472 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL,
1473 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL,
1474 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL,
1475 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL,
1476 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL,
1477 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL,
1478 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL,
1479 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL,
1480 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL,
1481 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL,
1482 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL,
1483 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL,
1484 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL,
1485 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL,
1486 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL,
1487 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL,
1488 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL,
1489 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL,
1490 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL,
1491 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL,
1492 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL,
1493 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL,
1494 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL,
1495 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL,
1496 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL,
1497 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL,
1498 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL,
1499 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL,
1500 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL,
1501 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL,
1502 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL,
1503 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL,
1504 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL,
1505 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL,
1506 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL,
1507 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL,
1508 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL,
1509 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL,
1510 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL,
1511 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL,
1512 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL,
1513 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL,
1514 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL,
1515 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL,
1516 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL,
1517 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL,
1518 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL,
1519 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL,
1520 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL,
1521 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL,
1522 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL,
1523 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL,
1524 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL,
1525 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL,
1526 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL,
1527 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL,
1528 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL,
1529 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL,
1530 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL,
1531 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL,
1532 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL
1533};
1534
1535static void curve25519_ever64_base(u8 *out, const u8 *priv)
1536{
1537 u64 swap = 1;
1538 int i, j, k;
1539 u64 tmp[16 + 32 + 4];
1540 u64 *x1 = &tmp[0];
1541 u64 *z1 = &tmp[4];
1542 u64 *x2 = &tmp[8];
1543 u64 *z2 = &tmp[12];
1544 u64 *xz1 = &tmp[0];
1545 u64 *xz2 = &tmp[8];
1546 u64 *a = &tmp[0 + 16];
1547 u64 *b = &tmp[4 + 16];
1548 u64 *c = &tmp[8 + 16];
1549 u64 *ab = &tmp[0 + 16];
1550 u64 *abcd = &tmp[0 + 16];
1551 u64 *ef = &tmp[16 + 16];
1552 u64 *efgh = &tmp[16 + 16];
1553 u64 *key = &tmp[0 + 16 + 32];
1554
1555 memcpy(key, priv, 32);
1556 ((u8 *)key)[0] &= 248;
1557 ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64;
1558
1559 x1[0] = 1, x1[1] = x1[2] = x1[3] = 0;
1560 z1[0] = 1, z1[1] = z1[2] = z1[3] = 0;
1561 z2[0] = 1, z2[1] = z2[2] = z2[3] = 0;
1562 memcpy(x2, p_minus_s, sizeof(p_minus_s));
1563
1564 j = 3;
1565 for (i = 0; i < 4; ++i) {
1566 while (j < (const int[]){ 64, 64, 64, 63 }[i]) {
1567 u64 bit = (key[i] >> j) & 1;
1568 k = (64 * i + j - 3);
1569 swap = swap ^ bit;
1570 cswap2(swap, xz1, xz2);
1571 swap = bit;
1572 fsub(b, x1, z1);
1573 fadd(a, x1, z1);
1574 fmul(c, &table_ladder[4 * k], b, ef);
1575 fsub(b, a, c);
1576 fadd(a, a, c);
1577 fsqr2(ab, ab, efgh);
1578 fmul2(xz1, xz2, ab, efgh);
1579 ++j;
1580 }
1581 j = 0;
1582 }
1583
1584 point_double(xz1, abcd, efgh);
1585 point_double(xz1, abcd, efgh);
1586 point_double(xz1, abcd, efgh);
1587 encode_point(out, xz1);
1588
1589 memzero_explicit(tmp, sizeof(tmp));
1590}
1591
1592static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx);
1593
1594void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
1595 const u8 secret[CURVE25519_KEY_SIZE],
1596 const u8 basepoint[CURVE25519_KEY_SIZE])
1597{
1598 if (static_branch_likely(&curve25519_use_bmi2_adx))
1599 curve25519_ever64(mypublic, secret, basepoint);
1600 else
1601 curve25519_generic(mypublic, secret, basepoint);
1602}
1603EXPORT_SYMBOL(curve25519_arch);
1604
1605void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
1606 const u8 secret[CURVE25519_KEY_SIZE])
1607{
1608 if (static_branch_likely(&curve25519_use_bmi2_adx))
1609 curve25519_ever64_base(pub, secret);
1610 else
1611 curve25519_generic(pub, secret, curve25519_base_point);
1612}
1613EXPORT_SYMBOL(curve25519_base_arch);
1614
1615static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
1616 unsigned int len)
1617{
1618 u8 *secret = kpp_tfm_ctx(tfm);
1619
1620 if (!len)
1621 curve25519_generate_secret(secret);
1622 else if (len == CURVE25519_KEY_SIZE &&
1623 crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
1624 memcpy(secret, buf, CURVE25519_KEY_SIZE);
1625 else
1626 return -EINVAL;
1627 return 0;
1628}
1629
1630static int curve25519_generate_public_key(struct kpp_request *req)
1631{
1632 struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
1633 const u8 *secret = kpp_tfm_ctx(tfm);
1634 u8 buf[CURVE25519_KEY_SIZE];
1635 int copied, nbytes;
1636
1637 if (req->src)
1638 return -EINVAL;
1639
1640 curve25519_base_arch(buf, secret);
1641
1642 /* might want less than we've got */
1643 nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
1644 copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
1645 nbytes),
1646 buf, nbytes);
1647 if (copied != nbytes)
1648 return -EINVAL;
1649 return 0;
1650}
1651
1652static int curve25519_compute_shared_secret(struct kpp_request *req)
1653{
1654 struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
1655 const u8 *secret = kpp_tfm_ctx(tfm);
1656 u8 public_key[CURVE25519_KEY_SIZE];
1657 u8 buf[CURVE25519_KEY_SIZE];
1658 int copied, nbytes;
1659
1660 if (!req->src)
1661 return -EINVAL;
1662
1663 copied = sg_copy_to_buffer(req->src,
1664 sg_nents_for_len(req->src,
1665 CURVE25519_KEY_SIZE),
1666 public_key, CURVE25519_KEY_SIZE);
1667 if (copied != CURVE25519_KEY_SIZE)
1668 return -EINVAL;
1669
1670 curve25519_arch(buf, secret, public_key);
1671
1672 /* might want less than we've got */
1673 nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
1674 copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
1675 nbytes),
1676 buf, nbytes);
1677 if (copied != nbytes)
1678 return -EINVAL;
1679 return 0;
1680}
1681
1682static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
1683{
1684 return CURVE25519_KEY_SIZE;
1685}
1686
1687static struct kpp_alg curve25519_alg = {
1688 .base.cra_name = "curve25519",
1689 .base.cra_driver_name = "curve25519-x86",
1690 .base.cra_priority = 200,
1691 .base.cra_module = THIS_MODULE,
1692 .base.cra_ctxsize = CURVE25519_KEY_SIZE,
1693
1694 .set_secret = curve25519_set_secret,
1695 .generate_public_key = curve25519_generate_public_key,
1696 .compute_shared_secret = curve25519_compute_shared_secret,
1697 .max_size = curve25519_max_size,
1698};
1699
1700
1701static int __init curve25519_mod_init(void)
1702{
1703 if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX))
1704 static_branch_enable(&curve25519_use_bmi2_adx);
1705 else
1706 return 0;
1707 return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
1708 crypto_register_kpp(&curve25519_alg) : 0;
1709}
1710
1711static void __exit curve25519_mod_exit(void)
1712{
1713 if (IS_REACHABLE(CONFIG_CRYPTO_KPP) &&
1714 static_branch_likely(&curve25519_use_bmi2_adx))
1715 crypto_unregister_kpp(&curve25519_alg);
1716}
1717
1718module_init(curve25519_mod_init);
1719module_exit(curve25519_mod_exit);
1720
1721MODULE_ALIAS_CRYPTO("curve25519");
1722MODULE_ALIAS_CRYPTO("curve25519-x86");
1723MODULE_DESCRIPTION("Curve25519 algorithm, ADX optimized");
1724MODULE_LICENSE("GPL v2");
1725MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
1// SPDX-License-Identifier: GPL-2.0 OR MIT
2/*
3 * Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
4 * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
5 */
6
7#include <crypto/curve25519.h>
8#include <crypto/internal/kpp.h>
9
10#include <linux/types.h>
11#include <linux/jump_label.h>
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/scatterlist.h>
15
16#include <asm/cpufeature.h>
17#include <asm/processor.h>
18
19static __always_inline u64 eq_mask(u64 a, u64 b)
20{
21 u64 x = a ^ b;
22 u64 minus_x = ~x + (u64)1U;
23 u64 x_or_minus_x = x | minus_x;
24 u64 xnx = x_or_minus_x >> (u32)63U;
25 return xnx - (u64)1U;
26}
27
28static __always_inline u64 gte_mask(u64 a, u64 b)
29{
30 u64 x = a;
31 u64 y = b;
32 u64 x_xor_y = x ^ y;
33 u64 x_sub_y = x - y;
34 u64 x_sub_y_xor_y = x_sub_y ^ y;
35 u64 q = x_xor_y | x_sub_y_xor_y;
36 u64 x_xor_q = x ^ q;
37 u64 x_xor_q_ = x_xor_q >> (u32)63U;
38 return x_xor_q_ - (u64)1U;
39}
40
41/* Computes the addition of four-element f1 with value in f2
42 * and returns the carry (if any) */
43static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2)
44{
45 u64 carry_r;
46
47 asm volatile(
48 /* Clear registers to propagate the carry bit */
49 " xor %%r8d, %%r8d;"
50 " xor %%r9d, %%r9d;"
51 " xor %%r10d, %%r10d;"
52 " xor %%r11d, %%r11d;"
53 " xor %k1, %k1;"
54
55 /* Begin addition chain */
56 " addq 0(%3), %0;"
57 " movq %0, 0(%2);"
58 " adcxq 8(%3), %%r8;"
59 " movq %%r8, 8(%2);"
60 " adcxq 16(%3), %%r9;"
61 " movq %%r9, 16(%2);"
62 " adcxq 24(%3), %%r10;"
63 " movq %%r10, 24(%2);"
64
65 /* Return the carry bit in a register */
66 " adcx %%r11, %1;"
67 : "+&r" (f2), "=&r" (carry_r)
68 : "r" (out), "r" (f1)
69 : "%r8", "%r9", "%r10", "%r11", "memory", "cc"
70 );
71
72 return carry_r;
73}
74
75/* Computes the field addition of two field elements */
76static inline void fadd(u64 *out, const u64 *f1, const u64 *f2)
77{
78 asm volatile(
79 /* Compute the raw addition of f1 + f2 */
80 " movq 0(%0), %%r8;"
81 " addq 0(%2), %%r8;"
82 " movq 8(%0), %%r9;"
83 " adcxq 8(%2), %%r9;"
84 " movq 16(%0), %%r10;"
85 " adcxq 16(%2), %%r10;"
86 " movq 24(%0), %%r11;"
87 " adcxq 24(%2), %%r11;"
88
89 /* Wrap the result back into the field */
90
91 /* Step 1: Compute carry*38 */
92 " mov $0, %%rax;"
93 " mov $38, %0;"
94 " cmovc %0, %%rax;"
95
96 /* Step 2: Add carry*38 to the original sum */
97 " xor %%ecx, %%ecx;"
98 " add %%rax, %%r8;"
99 " adcx %%rcx, %%r9;"
100 " movq %%r9, 8(%1);"
101 " adcx %%rcx, %%r10;"
102 " movq %%r10, 16(%1);"
103 " adcx %%rcx, %%r11;"
104 " movq %%r11, 24(%1);"
105
106 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
107 " mov $0, %%rax;"
108 " cmovc %0, %%rax;"
109 " add %%rax, %%r8;"
110 " movq %%r8, 0(%1);"
111 : "+&r" (f2)
112 : "r" (out), "r" (f1)
113 : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
114 );
115}
116
117/* Computes the field subtraction of two field elements */
118static inline void fsub(u64 *out, const u64 *f1, const u64 *f2)
119{
120 asm volatile(
121 /* Compute the raw subtraction of f1-f2 */
122 " movq 0(%1), %%r8;"
123 " subq 0(%2), %%r8;"
124 " movq 8(%1), %%r9;"
125 " sbbq 8(%2), %%r9;"
126 " movq 16(%1), %%r10;"
127 " sbbq 16(%2), %%r10;"
128 " movq 24(%1), %%r11;"
129 " sbbq 24(%2), %%r11;"
130
131 /* Wrap the result back into the field */
132
133 /* Step 1: Compute carry*38 */
134 " mov $0, %%rax;"
135 " mov $38, %%rcx;"
136 " cmovc %%rcx, %%rax;"
137
138 /* Step 2: Subtract carry*38 from the original difference */
139 " sub %%rax, %%r8;"
140 " sbb $0, %%r9;"
141 " sbb $0, %%r10;"
142 " sbb $0, %%r11;"
143
144 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
145 " mov $0, %%rax;"
146 " cmovc %%rcx, %%rax;"
147 " sub %%rax, %%r8;"
148
149 /* Store the result */
150 " movq %%r8, 0(%0);"
151 " movq %%r9, 8(%0);"
152 " movq %%r10, 16(%0);"
153 " movq %%r11, 24(%0);"
154 :
155 : "r" (out), "r" (f1), "r" (f2)
156 : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
157 );
158}
159
160/* Computes a field multiplication: out <- f1 * f2
161 * Uses the 8-element buffer tmp for intermediate results */
162static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
163{
164 asm volatile(
165 /* Compute the raw multiplication: tmp <- src1 * src2 */
166
167 /* Compute src1[0] * src2 */
168 " movq 0(%1), %%rdx;"
169 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);"
170 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
171 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
172 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
173 " adox %%rdx, %%rax;"
174 /* Compute src1[1] * src2 */
175 " movq 8(%1), %%rdx;"
176 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
177 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
178 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
179 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
180 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
181 /* Compute src1[2] * src2 */
182 " movq 16(%1), %%rdx;"
183 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
184 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
185 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
186 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
187 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
188 /* Compute src1[3] * src2 */
189 " movq 24(%1), %%rdx;"
190 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
191 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
192 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
193 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
194 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
195 /* Line up pointers */
196 " mov %0, %1;"
197 " mov %2, %0;"
198
199 /* Wrap the result back into the field */
200
201 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
202 " mov $38, %%rdx;"
203 " mulxq 32(%1), %%r8, %%r13;"
204 " xor %k3, %k3;"
205 " adoxq 0(%1), %%r8;"
206 " mulxq 40(%1), %%r9, %%rbx;"
207 " adcx %%r13, %%r9;"
208 " adoxq 8(%1), %%r9;"
209 " mulxq 48(%1), %%r10, %%r13;"
210 " adcx %%rbx, %%r10;"
211 " adoxq 16(%1), %%r10;"
212 " mulxq 56(%1), %%r11, %%rax;"
213 " adcx %%r13, %%r11;"
214 " adoxq 24(%1), %%r11;"
215 " adcx %3, %%rax;"
216 " adox %3, %%rax;"
217 " imul %%rdx, %%rax;"
218
219 /* Step 2: Fold the carry back into dst */
220 " add %%rax, %%r8;"
221 " adcx %3, %%r9;"
222 " movq %%r9, 8(%0);"
223 " adcx %3, %%r10;"
224 " movq %%r10, 16(%0);"
225 " adcx %3, %%r11;"
226 " movq %%r11, 24(%0);"
227
228 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
229 " mov $0, %%rax;"
230 " cmovc %%rdx, %%rax;"
231 " add %%rax, %%r8;"
232 " movq %%r8, 0(%0);"
233 : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
234 :
235 : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
236 );
237}
238
239/* Computes two field multiplications:
240 * out[0] <- f1[0] * f2[0]
241 * out[1] <- f1[1] * f2[1]
242 * Uses the 16-element buffer tmp for intermediate results. */
243static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
244{
245 asm volatile(
246 /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */
247
248 /* Compute src1[0] * src2 */
249 " movq 0(%1), %%rdx;"
250 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);"
251 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
252 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
253 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
254 " adox %%rdx, %%rax;"
255 /* Compute src1[1] * src2 */
256 " movq 8(%1), %%rdx;"
257 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
258 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
259 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
260 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
261 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
262 /* Compute src1[2] * src2 */
263 " movq 16(%1), %%rdx;"
264 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
265 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
266 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
267 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
268 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
269 /* Compute src1[3] * src2 */
270 " movq 24(%1), %%rdx;"
271 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
272 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
273 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
274 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
275 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
276
277 /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */
278
279 /* Compute src1[0] * src2 */
280 " movq 32(%1), %%rdx;"
281 " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 64(%0);"
282 " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
283 " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
284 " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
285 " adox %%rdx, %%rax;"
286 /* Compute src1[1] * src2 */
287 " movq 40(%1), %%rdx;"
288 " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
289 " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);"
290 " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
291 " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
292 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
293 /* Compute src1[2] * src2 */
294 " movq 48(%1), %%rdx;"
295 " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
296 " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);"
297 " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
298 " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
299 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
300 /* Compute src1[3] * src2 */
301 " movq 56(%1), %%rdx;"
302 " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
303 " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);"
304 " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;"
305 " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;"
306 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);"
307 /* Line up pointers */
308 " mov %0, %1;"
309 " mov %2, %0;"
310
311 /* Wrap the results back into the field */
312
313 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
314 " mov $38, %%rdx;"
315 " mulxq 32(%1), %%r8, %%r13;"
316 " xor %k3, %k3;"
317 " adoxq 0(%1), %%r8;"
318 " mulxq 40(%1), %%r9, %%rbx;"
319 " adcx %%r13, %%r9;"
320 " adoxq 8(%1), %%r9;"
321 " mulxq 48(%1), %%r10, %%r13;"
322 " adcx %%rbx, %%r10;"
323 " adoxq 16(%1), %%r10;"
324 " mulxq 56(%1), %%r11, %%rax;"
325 " adcx %%r13, %%r11;"
326 " adoxq 24(%1), %%r11;"
327 " adcx %3, %%rax;"
328 " adox %3, %%rax;"
329 " imul %%rdx, %%rax;"
330
331 /* Step 2: Fold the carry back into dst */
332 " add %%rax, %%r8;"
333 " adcx %3, %%r9;"
334 " movq %%r9, 8(%0);"
335 " adcx %3, %%r10;"
336 " movq %%r10, 16(%0);"
337 " adcx %3, %%r11;"
338 " movq %%r11, 24(%0);"
339
340 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
341 " mov $0, %%rax;"
342 " cmovc %%rdx, %%rax;"
343 " add %%rax, %%r8;"
344 " movq %%r8, 0(%0);"
345
346 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
347 " mov $38, %%rdx;"
348 " mulxq 96(%1), %%r8, %%r13;"
349 " xor %k3, %k3;"
350 " adoxq 64(%1), %%r8;"
351 " mulxq 104(%1), %%r9, %%rbx;"
352 " adcx %%r13, %%r9;"
353 " adoxq 72(%1), %%r9;"
354 " mulxq 112(%1), %%r10, %%r13;"
355 " adcx %%rbx, %%r10;"
356 " adoxq 80(%1), %%r10;"
357 " mulxq 120(%1), %%r11, %%rax;"
358 " adcx %%r13, %%r11;"
359 " adoxq 88(%1), %%r11;"
360 " adcx %3, %%rax;"
361 " adox %3, %%rax;"
362 " imul %%rdx, %%rax;"
363
364 /* Step 2: Fold the carry back into dst */
365 " add %%rax, %%r8;"
366 " adcx %3, %%r9;"
367 " movq %%r9, 40(%0);"
368 " adcx %3, %%r10;"
369 " movq %%r10, 48(%0);"
370 " adcx %3, %%r11;"
371 " movq %%r11, 56(%0);"
372
373 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
374 " mov $0, %%rax;"
375 " cmovc %%rdx, %%rax;"
376 " add %%rax, %%r8;"
377 " movq %%r8, 32(%0);"
378 : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
379 :
380 : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
381 );
382}
383
384/* Computes the field multiplication of four-element f1 with value in f2 */
385static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
386{
387 register u64 f2_r asm("rdx") = f2;
388
389 asm volatile(
390 /* Compute the raw multiplication of f1*f2 */
391 " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
392 " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */
393 " add %%rcx, %%r9;"
394 " mov $0, %%rcx;"
395 " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
396 " adcx %%rbx, %%r10;"
397 " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
398 " adcx %%r13, %%r11;"
399 " adcx %%rcx, %%rax;"
400
401 /* Wrap the result back into the field */
402
403 /* Step 1: Compute carry*38 */
404 " mov $38, %%rdx;"
405 " imul %%rdx, %%rax;"
406
407 /* Step 2: Fold the carry back into dst */
408 " add %%rax, %%r8;"
409 " adcx %%rcx, %%r9;"
410 " movq %%r9, 8(%1);"
411 " adcx %%rcx, %%r10;"
412 " movq %%r10, 16(%1);"
413 " adcx %%rcx, %%r11;"
414 " movq %%r11, 24(%1);"
415
416 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
417 " mov $0, %%rax;"
418 " cmovc %%rdx, %%rax;"
419 " add %%rax, %%r8;"
420 " movq %%r8, 0(%1);"
421 : "+&r" (f2_r)
422 : "r" (out), "r" (f1)
423 : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc"
424 );
425}
426
427/* Computes p1 <- bit ? p2 : p1 in constant time */
428static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2)
429{
430 asm volatile(
431 /* Invert the polarity of bit to match cmov expectations */
432 " add $18446744073709551615, %0;"
433
434 /* cswap p1[0], p2[0] */
435 " movq 0(%1), %%r8;"
436 " movq 0(%2), %%r9;"
437 " mov %%r8, %%r10;"
438 " cmovc %%r9, %%r8;"
439 " cmovc %%r10, %%r9;"
440 " movq %%r8, 0(%1);"
441 " movq %%r9, 0(%2);"
442
443 /* cswap p1[1], p2[1] */
444 " movq 8(%1), %%r8;"
445 " movq 8(%2), %%r9;"
446 " mov %%r8, %%r10;"
447 " cmovc %%r9, %%r8;"
448 " cmovc %%r10, %%r9;"
449 " movq %%r8, 8(%1);"
450 " movq %%r9, 8(%2);"
451
452 /* cswap p1[2], p2[2] */
453 " movq 16(%1), %%r8;"
454 " movq 16(%2), %%r9;"
455 " mov %%r8, %%r10;"
456 " cmovc %%r9, %%r8;"
457 " cmovc %%r10, %%r9;"
458 " movq %%r8, 16(%1);"
459 " movq %%r9, 16(%2);"
460
461 /* cswap p1[3], p2[3] */
462 " movq 24(%1), %%r8;"
463 " movq 24(%2), %%r9;"
464 " mov %%r8, %%r10;"
465 " cmovc %%r9, %%r8;"
466 " cmovc %%r10, %%r9;"
467 " movq %%r8, 24(%1);"
468 " movq %%r9, 24(%2);"
469
470 /* cswap p1[4], p2[4] */
471 " movq 32(%1), %%r8;"
472 " movq 32(%2), %%r9;"
473 " mov %%r8, %%r10;"
474 " cmovc %%r9, %%r8;"
475 " cmovc %%r10, %%r9;"
476 " movq %%r8, 32(%1);"
477 " movq %%r9, 32(%2);"
478
479 /* cswap p1[5], p2[5] */
480 " movq 40(%1), %%r8;"
481 " movq 40(%2), %%r9;"
482 " mov %%r8, %%r10;"
483 " cmovc %%r9, %%r8;"
484 " cmovc %%r10, %%r9;"
485 " movq %%r8, 40(%1);"
486 " movq %%r9, 40(%2);"
487
488 /* cswap p1[6], p2[6] */
489 " movq 48(%1), %%r8;"
490 " movq 48(%2), %%r9;"
491 " mov %%r8, %%r10;"
492 " cmovc %%r9, %%r8;"
493 " cmovc %%r10, %%r9;"
494 " movq %%r8, 48(%1);"
495 " movq %%r9, 48(%2);"
496
497 /* cswap p1[7], p2[7] */
498 " movq 56(%1), %%r8;"
499 " movq 56(%2), %%r9;"
500 " mov %%r8, %%r10;"
501 " cmovc %%r9, %%r8;"
502 " cmovc %%r10, %%r9;"
503 " movq %%r8, 56(%1);"
504 " movq %%r9, 56(%2);"
505 : "+&r" (bit)
506 : "r" (p1), "r" (p2)
507 : "%r8", "%r9", "%r10", "memory", "cc"
508 );
509}
510
511/* Computes the square of a field element: out <- f * f
512 * Uses the 8-element buffer tmp for intermediate results */
513static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
514{
515 asm volatile(
516 /* Compute the raw multiplication: tmp <- f * f */
517
518 /* Step 1: Compute all partial products */
519 " movq 0(%1), %%rdx;" /* f[0] */
520 " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
521 " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
522 " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
523 " movq 24(%1), %%rdx;" /* f[3] */
524 " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
525 " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
526 " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
527 " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
528
529 /* Step 2: Compute two parallel carry chains */
530 " xor %%r15d, %%r15d;"
531 " adox %%rax, %%r10;"
532 " adcx %%r8, %%r8;"
533 " adox %%rcx, %%r11;"
534 " adcx %%r9, %%r9;"
535 " adox %%r15, %%rbx;"
536 " adcx %%r10, %%r10;"
537 " adox %%r15, %%r13;"
538 " adcx %%r11, %%r11;"
539 " adox %%r15, %%r14;"
540 " adcx %%rbx, %%rbx;"
541 " adcx %%r13, %%r13;"
542 " adcx %%r14, %%r14;"
543
544 /* Step 3: Compute intermediate squares */
545 " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
546 " movq %%rax, 0(%0);"
547 " add %%rcx, %%r8;" " movq %%r8, 8(%0);"
548 " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
549 " adcx %%rax, %%r9;" " movq %%r9, 16(%0);"
550 " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
551 " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
552 " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
553 " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
554 " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
555 " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
556 " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
557
558 /* Line up pointers */
559 " mov %0, %1;"
560 " mov %2, %0;"
561
562 /* Wrap the result back into the field */
563
564 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
565 " mov $38, %%rdx;"
566 " mulxq 32(%1), %%r8, %%r13;"
567 " xor %%ecx, %%ecx;"
568 " adoxq 0(%1), %%r8;"
569 " mulxq 40(%1), %%r9, %%rbx;"
570 " adcx %%r13, %%r9;"
571 " adoxq 8(%1), %%r9;"
572 " mulxq 48(%1), %%r10, %%r13;"
573 " adcx %%rbx, %%r10;"
574 " adoxq 16(%1), %%r10;"
575 " mulxq 56(%1), %%r11, %%rax;"
576 " adcx %%r13, %%r11;"
577 " adoxq 24(%1), %%r11;"
578 " adcx %%rcx, %%rax;"
579 " adox %%rcx, %%rax;"
580 " imul %%rdx, %%rax;"
581
582 /* Step 2: Fold the carry back into dst */
583 " add %%rax, %%r8;"
584 " adcx %%rcx, %%r9;"
585 " movq %%r9, 8(%0);"
586 " adcx %%rcx, %%r10;"
587 " movq %%r10, 16(%0);"
588 " adcx %%rcx, %%r11;"
589 " movq %%r11, 24(%0);"
590
591 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
592 " mov $0, %%rax;"
593 " cmovc %%rdx, %%rax;"
594 " add %%rax, %%r8;"
595 " movq %%r8, 0(%0);"
596 : "+&r" (tmp), "+&r" (f), "+&r" (out)
597 :
598 : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
599 );
600}
601
602/* Computes two field squarings:
603 * out[0] <- f[0] * f[0]
604 * out[1] <- f[1] * f[1]
605 * Uses the 16-element buffer tmp for intermediate results */
606static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
607{
608 asm volatile(
609 /* Step 1: Compute all partial products */
610 " movq 0(%1), %%rdx;" /* f[0] */
611 " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
612 " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
613 " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
614 " movq 24(%1), %%rdx;" /* f[3] */
615 " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
616 " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
617 " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
618 " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
619
620 /* Step 2: Compute two parallel carry chains */
621 " xor %%r15d, %%r15d;"
622 " adox %%rax, %%r10;"
623 " adcx %%r8, %%r8;"
624 " adox %%rcx, %%r11;"
625 " adcx %%r9, %%r9;"
626 " adox %%r15, %%rbx;"
627 " adcx %%r10, %%r10;"
628 " adox %%r15, %%r13;"
629 " adcx %%r11, %%r11;"
630 " adox %%r15, %%r14;"
631 " adcx %%rbx, %%rbx;"
632 " adcx %%r13, %%r13;"
633 " adcx %%r14, %%r14;"
634
635 /* Step 3: Compute intermediate squares */
636 " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
637 " movq %%rax, 0(%0);"
638 " add %%rcx, %%r8;" " movq %%r8, 8(%0);"
639 " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
640 " adcx %%rax, %%r9;" " movq %%r9, 16(%0);"
641 " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
642 " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
643 " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
644 " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
645 " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
646 " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
647 " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
648
649 /* Step 1: Compute all partial products */
650 " movq 32(%1), %%rdx;" /* f[0] */
651 " mulxq 40(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
652 " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
653 " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
654 " movq 56(%1), %%rdx;" /* f[3] */
655 " mulxq 40(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
656 " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
657 " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
658 " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
659
660 /* Step 2: Compute two parallel carry chains */
661 " xor %%r15d, %%r15d;"
662 " adox %%rax, %%r10;"
663 " adcx %%r8, %%r8;"
664 " adox %%rcx, %%r11;"
665 " adcx %%r9, %%r9;"
666 " adox %%r15, %%rbx;"
667 " adcx %%r10, %%r10;"
668 " adox %%r15, %%r13;"
669 " adcx %%r11, %%r11;"
670 " adox %%r15, %%r14;"
671 " adcx %%rbx, %%rbx;"
672 " adcx %%r13, %%r13;"
673 " adcx %%r14, %%r14;"
674
675 /* Step 3: Compute intermediate squares */
676 " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
677 " movq %%rax, 64(%0);"
678 " add %%rcx, %%r8;" " movq %%r8, 72(%0);"
679 " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
680 " adcx %%rax, %%r9;" " movq %%r9, 80(%0);"
681 " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);"
682 " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
683 " adcx %%rax, %%r11;" " movq %%r11, 96(%0);"
684 " adcx %%rcx, %%rbx;" " movq %%rbx, 104(%0);"
685 " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
686 " adcx %%rax, %%r13;" " movq %%r13, 112(%0);"
687 " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);"
688
689 /* Line up pointers */
690 " mov %0, %1;"
691 " mov %2, %0;"
692
693 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
694 " mov $38, %%rdx;"
695 " mulxq 32(%1), %%r8, %%r13;"
696 " xor %%ecx, %%ecx;"
697 " adoxq 0(%1), %%r8;"
698 " mulxq 40(%1), %%r9, %%rbx;"
699 " adcx %%r13, %%r9;"
700 " adoxq 8(%1), %%r9;"
701 " mulxq 48(%1), %%r10, %%r13;"
702 " adcx %%rbx, %%r10;"
703 " adoxq 16(%1), %%r10;"
704 " mulxq 56(%1), %%r11, %%rax;"
705 " adcx %%r13, %%r11;"
706 " adoxq 24(%1), %%r11;"
707 " adcx %%rcx, %%rax;"
708 " adox %%rcx, %%rax;"
709 " imul %%rdx, %%rax;"
710
711 /* Step 2: Fold the carry back into dst */
712 " add %%rax, %%r8;"
713 " adcx %%rcx, %%r9;"
714 " movq %%r9, 8(%0);"
715 " adcx %%rcx, %%r10;"
716 " movq %%r10, 16(%0);"
717 " adcx %%rcx, %%r11;"
718 " movq %%r11, 24(%0);"
719
720 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
721 " mov $0, %%rax;"
722 " cmovc %%rdx, %%rax;"
723 " add %%rax, %%r8;"
724 " movq %%r8, 0(%0);"
725
726 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
727 " mov $38, %%rdx;"
728 " mulxq 96(%1), %%r8, %%r13;"
729 " xor %%ecx, %%ecx;"
730 " adoxq 64(%1), %%r8;"
731 " mulxq 104(%1), %%r9, %%rbx;"
732 " adcx %%r13, %%r9;"
733 " adoxq 72(%1), %%r9;"
734 " mulxq 112(%1), %%r10, %%r13;"
735 " adcx %%rbx, %%r10;"
736 " adoxq 80(%1), %%r10;"
737 " mulxq 120(%1), %%r11, %%rax;"
738 " adcx %%r13, %%r11;"
739 " adoxq 88(%1), %%r11;"
740 " adcx %%rcx, %%rax;"
741 " adox %%rcx, %%rax;"
742 " imul %%rdx, %%rax;"
743
744 /* Step 2: Fold the carry back into dst */
745 " add %%rax, %%r8;"
746 " adcx %%rcx, %%r9;"
747 " movq %%r9, 40(%0);"
748 " adcx %%rcx, %%r10;"
749 " movq %%r10, 48(%0);"
750 " adcx %%rcx, %%r11;"
751 " movq %%r11, 56(%0);"
752
753 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
754 " mov $0, %%rax;"
755 " cmovc %%rdx, %%rax;"
756 " add %%rax, %%r8;"
757 " movq %%r8, 32(%0);"
758 : "+&r" (tmp), "+&r" (f), "+&r" (out)
759 :
760 : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
761 );
762}
763
764static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2)
765{
766 u64 *nq = p01_tmp1;
767 u64 *nq_p1 = p01_tmp1 + (u32)8U;
768 u64 *tmp1 = p01_tmp1 + (u32)16U;
769 u64 *x1 = q;
770 u64 *x2 = nq;
771 u64 *z2 = nq + (u32)4U;
772 u64 *z3 = nq_p1 + (u32)4U;
773 u64 *a = tmp1;
774 u64 *b = tmp1 + (u32)4U;
775 u64 *ab = tmp1;
776 u64 *dc = tmp1 + (u32)8U;
777 u64 *x3;
778 u64 *z31;
779 u64 *d0;
780 u64 *c0;
781 u64 *a1;
782 u64 *b1;
783 u64 *d;
784 u64 *c;
785 u64 *ab1;
786 u64 *dc1;
787 fadd(a, x2, z2);
788 fsub(b, x2, z2);
789 x3 = nq_p1;
790 z31 = nq_p1 + (u32)4U;
791 d0 = dc;
792 c0 = dc + (u32)4U;
793 fadd(c0, x3, z31);
794 fsub(d0, x3, z31);
795 fmul2(dc, dc, ab, tmp2);
796 fadd(x3, d0, c0);
797 fsub(z31, d0, c0);
798 a1 = tmp1;
799 b1 = tmp1 + (u32)4U;
800 d = tmp1 + (u32)8U;
801 c = tmp1 + (u32)12U;
802 ab1 = tmp1;
803 dc1 = tmp1 + (u32)8U;
804 fsqr2(dc1, ab1, tmp2);
805 fsqr2(nq_p1, nq_p1, tmp2);
806 a1[0U] = c[0U];
807 a1[1U] = c[1U];
808 a1[2U] = c[2U];
809 a1[3U] = c[3U];
810 fsub(c, d, c);
811 fmul_scalar(b1, c, (u64)121665U);
812 fadd(b1, b1, d);
813 fmul2(nq, dc1, ab1, tmp2);
814 fmul(z3, z3, x1, tmp2);
815}
816
817static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2)
818{
819 u64 *x2 = nq;
820 u64 *z2 = nq + (u32)4U;
821 u64 *a = tmp1;
822 u64 *b = tmp1 + (u32)4U;
823 u64 *d = tmp1 + (u32)8U;
824 u64 *c = tmp1 + (u32)12U;
825 u64 *ab = tmp1;
826 u64 *dc = tmp1 + (u32)8U;
827 fadd(a, x2, z2);
828 fsub(b, x2, z2);
829 fsqr2(dc, ab, tmp2);
830 a[0U] = c[0U];
831 a[1U] = c[1U];
832 a[2U] = c[2U];
833 a[3U] = c[3U];
834 fsub(c, d, c);
835 fmul_scalar(b, c, (u64)121665U);
836 fadd(b, b, d);
837 fmul2(nq, dc, ab, tmp2);
838}
839
840static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1)
841{
842 u64 tmp2[16U] = { 0U };
843 u64 p01_tmp1_swap[33U] = { 0U };
844 u64 *p0 = p01_tmp1_swap;
845 u64 *p01 = p01_tmp1_swap;
846 u64 *p03 = p01;
847 u64 *p11 = p01 + (u32)8U;
848 u64 *x0;
849 u64 *z0;
850 u64 *p01_tmp1;
851 u64 *p01_tmp11;
852 u64 *nq10;
853 u64 *nq_p11;
854 u64 *swap1;
855 u64 sw0;
856 u64 *nq1;
857 u64 *tmp1;
858 memcpy(p11, init1, (u32)8U * sizeof(init1[0U]));
859 x0 = p03;
860 z0 = p03 + (u32)4U;
861 x0[0U] = (u64)1U;
862 x0[1U] = (u64)0U;
863 x0[2U] = (u64)0U;
864 x0[3U] = (u64)0U;
865 z0[0U] = (u64)0U;
866 z0[1U] = (u64)0U;
867 z0[2U] = (u64)0U;
868 z0[3U] = (u64)0U;
869 p01_tmp1 = p01_tmp1_swap;
870 p01_tmp11 = p01_tmp1_swap;
871 nq10 = p01_tmp1_swap;
872 nq_p11 = p01_tmp1_swap + (u32)8U;
873 swap1 = p01_tmp1_swap + (u32)32U;
874 cswap2((u64)1U, nq10, nq_p11);
875 point_add_and_double(init1, p01_tmp11, tmp2);
876 swap1[0U] = (u64)1U;
877 {
878 u32 i;
879 for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) {
880 u64 *p01_tmp12 = p01_tmp1_swap;
881 u64 *swap2 = p01_tmp1_swap + (u32)32U;
882 u64 *nq2 = p01_tmp12;
883 u64 *nq_p12 = p01_tmp12 + (u32)8U;
884 u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U);
885 u64 sw = swap2[0U] ^ bit;
886 cswap2(sw, nq2, nq_p12);
887 point_add_and_double(init1, p01_tmp12, tmp2);
888 swap2[0U] = bit;
889 }
890 }
891 sw0 = swap1[0U];
892 cswap2(sw0, nq10, nq_p11);
893 nq1 = p01_tmp1;
894 tmp1 = p01_tmp1 + (u32)16U;
895 point_double(nq1, tmp1, tmp2);
896 point_double(nq1, tmp1, tmp2);
897 point_double(nq1, tmp1, tmp2);
898 memcpy(out, p0, (u32)8U * sizeof(p0[0U]));
899
900 memzero_explicit(tmp2, sizeof(tmp2));
901 memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap));
902}
903
904static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1)
905{
906 u32 i;
907 fsqr(o, inp, tmp);
908 for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U)
909 fsqr(o, o, tmp);
910}
911
912static void finv(u64 *o, const u64 *i, u64 *tmp)
913{
914 u64 t1[16U] = { 0U };
915 u64 *a0 = t1;
916 u64 *b = t1 + (u32)4U;
917 u64 *c = t1 + (u32)8U;
918 u64 *t00 = t1 + (u32)12U;
919 u64 *tmp1 = tmp;
920 u64 *a;
921 u64 *t0;
922 fsquare_times(a0, i, tmp1, (u32)1U);
923 fsquare_times(t00, a0, tmp1, (u32)2U);
924 fmul(b, t00, i, tmp);
925 fmul(a0, b, a0, tmp);
926 fsquare_times(t00, a0, tmp1, (u32)1U);
927 fmul(b, t00, b, tmp);
928 fsquare_times(t00, b, tmp1, (u32)5U);
929 fmul(b, t00, b, tmp);
930 fsquare_times(t00, b, tmp1, (u32)10U);
931 fmul(c, t00, b, tmp);
932 fsquare_times(t00, c, tmp1, (u32)20U);
933 fmul(t00, t00, c, tmp);
934 fsquare_times(t00, t00, tmp1, (u32)10U);
935 fmul(b, t00, b, tmp);
936 fsquare_times(t00, b, tmp1, (u32)50U);
937 fmul(c, t00, b, tmp);
938 fsquare_times(t00, c, tmp1, (u32)100U);
939 fmul(t00, t00, c, tmp);
940 fsquare_times(t00, t00, tmp1, (u32)50U);
941 fmul(t00, t00, b, tmp);
942 fsquare_times(t00, t00, tmp1, (u32)5U);
943 a = t1;
944 t0 = t1 + (u32)12U;
945 fmul(o, t0, a, tmp);
946}
947
948static void store_felem(u64 *b, u64 *f)
949{
950 u64 f30 = f[3U];
951 u64 top_bit0 = f30 >> (u32)63U;
952 u64 f31;
953 u64 top_bit;
954 u64 f0;
955 u64 f1;
956 u64 f2;
957 u64 f3;
958 u64 m0;
959 u64 m1;
960 u64 m2;
961 u64 m3;
962 u64 mask;
963 u64 f0_;
964 u64 f1_;
965 u64 f2_;
966 u64 f3_;
967 u64 o0;
968 u64 o1;
969 u64 o2;
970 u64 o3;
971 f[3U] = f30 & (u64)0x7fffffffffffffffU;
972 add_scalar(f, f, (u64)19U * top_bit0);
973 f31 = f[3U];
974 top_bit = f31 >> (u32)63U;
975 f[3U] = f31 & (u64)0x7fffffffffffffffU;
976 add_scalar(f, f, (u64)19U * top_bit);
977 f0 = f[0U];
978 f1 = f[1U];
979 f2 = f[2U];
980 f3 = f[3U];
981 m0 = gte_mask(f0, (u64)0xffffffffffffffedU);
982 m1 = eq_mask(f1, (u64)0xffffffffffffffffU);
983 m2 = eq_mask(f2, (u64)0xffffffffffffffffU);
984 m3 = eq_mask(f3, (u64)0x7fffffffffffffffU);
985 mask = ((m0 & m1) & m2) & m3;
986 f0_ = f0 - (mask & (u64)0xffffffffffffffedU);
987 f1_ = f1 - (mask & (u64)0xffffffffffffffffU);
988 f2_ = f2 - (mask & (u64)0xffffffffffffffffU);
989 f3_ = f3 - (mask & (u64)0x7fffffffffffffffU);
990 o0 = f0_;
991 o1 = f1_;
992 o2 = f2_;
993 o3 = f3_;
994 b[0U] = o0;
995 b[1U] = o1;
996 b[2U] = o2;
997 b[3U] = o3;
998}
999
1000static void encode_point(u8 *o, const u64 *i)
1001{
1002 const u64 *x = i;
1003 const u64 *z = i + (u32)4U;
1004 u64 tmp[4U] = { 0U };
1005 u64 tmp_w[16U] = { 0U };
1006 finv(tmp, z, tmp_w);
1007 fmul(tmp, tmp, x, tmp_w);
1008 store_felem((u64 *)o, tmp);
1009}
1010
1011static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub)
1012{
1013 u64 init1[8U] = { 0U };
1014 u64 tmp[4U] = { 0U };
1015 u64 tmp3;
1016 u64 *x;
1017 u64 *z;
1018 {
1019 u32 i;
1020 for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) {
1021 u64 *os = tmp;
1022 const u8 *bj = pub + i * (u32)8U;
1023 u64 u = *(u64 *)bj;
1024 u64 r = u;
1025 u64 x0 = r;
1026 os[i] = x0;
1027 }
1028 }
1029 tmp3 = tmp[3U];
1030 tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU;
1031 x = init1;
1032 z = init1 + (u32)4U;
1033 z[0U] = (u64)1U;
1034 z[1U] = (u64)0U;
1035 z[2U] = (u64)0U;
1036 z[3U] = (u64)0U;
1037 x[0U] = tmp[0U];
1038 x[1U] = tmp[1U];
1039 x[2U] = tmp[2U];
1040 x[3U] = tmp[3U];
1041 montgomery_ladder(init1, priv, init1);
1042 encode_point(out, init1);
1043}
1044
1045/* The below constants were generated using this sage script:
1046 *
1047 * #!/usr/bin/env sage
1048 * import sys
1049 * from sage.all import *
1050 * def limbs(n):
1051 * n = int(n)
1052 * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64)
1053 * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l
1054 * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0])
1055 * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0]
1056 * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s))
1057 * print("static const u64 table_ladder[] = {")
1058 * p = ec.lift_x(9)
1059 * for i in range(252):
1060 * l = (p[0] + p[2]) / (p[0] - p[2])
1061 * print(("\t%s" + ("," if i != 251 else "")) % limbs(l))
1062 * p = p * 2
1063 * print("};")
1064 *
1065 */
1066
1067static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL };
1068
1069static const u64 table_ladder[] = {
1070 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL,
1071 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL,
1072 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL,
1073 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL,
1074 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL,
1075 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL,
1076 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL,
1077 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL,
1078 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL,
1079 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL,
1080 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL,
1081 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL,
1082 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL,
1083 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL,
1084 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL,
1085 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL,
1086 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL,
1087 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL,
1088 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL,
1089 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL,
1090 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL,
1091 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL,
1092 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL,
1093 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL,
1094 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL,
1095 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL,
1096 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL,
1097 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL,
1098 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL,
1099 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL,
1100 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL,
1101 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL,
1102 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL,
1103 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL,
1104 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL,
1105 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL,
1106 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL,
1107 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL,
1108 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL,
1109 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL,
1110 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL,
1111 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL,
1112 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL,
1113 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL,
1114 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL,
1115 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL,
1116 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL,
1117 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL,
1118 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL,
1119 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL,
1120 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL,
1121 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL,
1122 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL,
1123 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL,
1124 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL,
1125 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL,
1126 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL,
1127 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL,
1128 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL,
1129 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL,
1130 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL,
1131 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL,
1132 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL,
1133 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL,
1134 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL,
1135 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL,
1136 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL,
1137 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL,
1138 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL,
1139 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL,
1140 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL,
1141 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL,
1142 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL,
1143 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL,
1144 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL,
1145 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL,
1146 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL,
1147 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL,
1148 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL,
1149 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL,
1150 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL,
1151 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL,
1152 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL,
1153 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL,
1154 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL,
1155 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL,
1156 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL,
1157 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL,
1158 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL,
1159 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL,
1160 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL,
1161 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL,
1162 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL,
1163 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL,
1164 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL,
1165 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL,
1166 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL,
1167 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL,
1168 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL,
1169 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL,
1170 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL,
1171 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL,
1172 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL,
1173 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL,
1174 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL,
1175 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL,
1176 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL,
1177 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL,
1178 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL,
1179 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL,
1180 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL,
1181 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL,
1182 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL,
1183 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL,
1184 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL,
1185 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL,
1186 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL,
1187 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL,
1188 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL,
1189 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL,
1190 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL,
1191 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL,
1192 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL,
1193 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL,
1194 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL,
1195 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL,
1196 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL,
1197 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL,
1198 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL,
1199 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL,
1200 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL,
1201 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL,
1202 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL,
1203 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL,
1204 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL,
1205 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL,
1206 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL,
1207 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL,
1208 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL,
1209 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL,
1210 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL,
1211 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL,
1212 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL,
1213 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL,
1214 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL,
1215 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL,
1216 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL,
1217 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL,
1218 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL,
1219 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL,
1220 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL,
1221 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL,
1222 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL,
1223 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL,
1224 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL,
1225 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL,
1226 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL,
1227 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL,
1228 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL,
1229 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL,
1230 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL,
1231 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL,
1232 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL,
1233 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL,
1234 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL,
1235 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL,
1236 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL,
1237 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL,
1238 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL,
1239 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL,
1240 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL,
1241 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL,
1242 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL,
1243 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL,
1244 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL,
1245 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL,
1246 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL,
1247 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL,
1248 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL,
1249 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL,
1250 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL,
1251 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL,
1252 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL,
1253 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL,
1254 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL,
1255 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL,
1256 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL,
1257 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL,
1258 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL,
1259 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL,
1260 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL,
1261 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL,
1262 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL,
1263 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL,
1264 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL,
1265 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL,
1266 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL,
1267 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL,
1268 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL,
1269 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL,
1270 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL,
1271 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL,
1272 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL,
1273 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL,
1274 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL,
1275 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL,
1276 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL,
1277 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL,
1278 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL,
1279 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL,
1280 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL,
1281 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL,
1282 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL,
1283 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL,
1284 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL,
1285 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL,
1286 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL,
1287 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL,
1288 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL,
1289 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL,
1290 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL,
1291 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL,
1292 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL,
1293 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL,
1294 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL,
1295 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL,
1296 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL,
1297 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL,
1298 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL,
1299 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL,
1300 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL,
1301 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL,
1302 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL,
1303 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL,
1304 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL,
1305 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL,
1306 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL,
1307 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL,
1308 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL,
1309 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL,
1310 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL,
1311 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL,
1312 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL,
1313 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL,
1314 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL,
1315 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL,
1316 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL,
1317 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL,
1318 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL,
1319 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL,
1320 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL,
1321 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL
1322};
1323
1324static void curve25519_ever64_base(u8 *out, const u8 *priv)
1325{
1326 u64 swap = 1;
1327 int i, j, k;
1328 u64 tmp[16 + 32 + 4];
1329 u64 *x1 = &tmp[0];
1330 u64 *z1 = &tmp[4];
1331 u64 *x2 = &tmp[8];
1332 u64 *z2 = &tmp[12];
1333 u64 *xz1 = &tmp[0];
1334 u64 *xz2 = &tmp[8];
1335 u64 *a = &tmp[0 + 16];
1336 u64 *b = &tmp[4 + 16];
1337 u64 *c = &tmp[8 + 16];
1338 u64 *ab = &tmp[0 + 16];
1339 u64 *abcd = &tmp[0 + 16];
1340 u64 *ef = &tmp[16 + 16];
1341 u64 *efgh = &tmp[16 + 16];
1342 u64 *key = &tmp[0 + 16 + 32];
1343
1344 memcpy(key, priv, 32);
1345 ((u8 *)key)[0] &= 248;
1346 ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64;
1347
1348 x1[0] = 1, x1[1] = x1[2] = x1[3] = 0;
1349 z1[0] = 1, z1[1] = z1[2] = z1[3] = 0;
1350 z2[0] = 1, z2[1] = z2[2] = z2[3] = 0;
1351 memcpy(x2, p_minus_s, sizeof(p_minus_s));
1352
1353 j = 3;
1354 for (i = 0; i < 4; ++i) {
1355 while (j < (const int[]){ 64, 64, 64, 63 }[i]) {
1356 u64 bit = (key[i] >> j) & 1;
1357 k = (64 * i + j - 3);
1358 swap = swap ^ bit;
1359 cswap2(swap, xz1, xz2);
1360 swap = bit;
1361 fsub(b, x1, z1);
1362 fadd(a, x1, z1);
1363 fmul(c, &table_ladder[4 * k], b, ef);
1364 fsub(b, a, c);
1365 fadd(a, a, c);
1366 fsqr2(ab, ab, efgh);
1367 fmul2(xz1, xz2, ab, efgh);
1368 ++j;
1369 }
1370 j = 0;
1371 }
1372
1373 point_double(xz1, abcd, efgh);
1374 point_double(xz1, abcd, efgh);
1375 point_double(xz1, abcd, efgh);
1376 encode_point(out, xz1);
1377
1378 memzero_explicit(tmp, sizeof(tmp));
1379}
1380
1381static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx);
1382
1383void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
1384 const u8 secret[CURVE25519_KEY_SIZE],
1385 const u8 basepoint[CURVE25519_KEY_SIZE])
1386{
1387 if (static_branch_likely(&curve25519_use_bmi2_adx))
1388 curve25519_ever64(mypublic, secret, basepoint);
1389 else
1390 curve25519_generic(mypublic, secret, basepoint);
1391}
1392EXPORT_SYMBOL(curve25519_arch);
1393
1394void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
1395 const u8 secret[CURVE25519_KEY_SIZE])
1396{
1397 if (static_branch_likely(&curve25519_use_bmi2_adx))
1398 curve25519_ever64_base(pub, secret);
1399 else
1400 curve25519_generic(pub, secret, curve25519_base_point);
1401}
1402EXPORT_SYMBOL(curve25519_base_arch);
1403
1404static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
1405 unsigned int len)
1406{
1407 u8 *secret = kpp_tfm_ctx(tfm);
1408
1409 if (!len)
1410 curve25519_generate_secret(secret);
1411 else if (len == CURVE25519_KEY_SIZE &&
1412 crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
1413 memcpy(secret, buf, CURVE25519_KEY_SIZE);
1414 else
1415 return -EINVAL;
1416 return 0;
1417}
1418
1419static int curve25519_generate_public_key(struct kpp_request *req)
1420{
1421 struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
1422 const u8 *secret = kpp_tfm_ctx(tfm);
1423 u8 buf[CURVE25519_KEY_SIZE];
1424 int copied, nbytes;
1425
1426 if (req->src)
1427 return -EINVAL;
1428
1429 curve25519_base_arch(buf, secret);
1430
1431 /* might want less than we've got */
1432 nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
1433 copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
1434 nbytes),
1435 buf, nbytes);
1436 if (copied != nbytes)
1437 return -EINVAL;
1438 return 0;
1439}
1440
1441static int curve25519_compute_shared_secret(struct kpp_request *req)
1442{
1443 struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
1444 const u8 *secret = kpp_tfm_ctx(tfm);
1445 u8 public_key[CURVE25519_KEY_SIZE];
1446 u8 buf[CURVE25519_KEY_SIZE];
1447 int copied, nbytes;
1448
1449 if (!req->src)
1450 return -EINVAL;
1451
1452 copied = sg_copy_to_buffer(req->src,
1453 sg_nents_for_len(req->src,
1454 CURVE25519_KEY_SIZE),
1455 public_key, CURVE25519_KEY_SIZE);
1456 if (copied != CURVE25519_KEY_SIZE)
1457 return -EINVAL;
1458
1459 curve25519_arch(buf, secret, public_key);
1460
1461 /* might want less than we've got */
1462 nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
1463 copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
1464 nbytes),
1465 buf, nbytes);
1466 if (copied != nbytes)
1467 return -EINVAL;
1468 return 0;
1469}
1470
1471static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
1472{
1473 return CURVE25519_KEY_SIZE;
1474}
1475
1476static struct kpp_alg curve25519_alg = {
1477 .base.cra_name = "curve25519",
1478 .base.cra_driver_name = "curve25519-x86",
1479 .base.cra_priority = 200,
1480 .base.cra_module = THIS_MODULE,
1481 .base.cra_ctxsize = CURVE25519_KEY_SIZE,
1482
1483 .set_secret = curve25519_set_secret,
1484 .generate_public_key = curve25519_generate_public_key,
1485 .compute_shared_secret = curve25519_compute_shared_secret,
1486 .max_size = curve25519_max_size,
1487};
1488
1489
1490static int __init curve25519_mod_init(void)
1491{
1492 if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX))
1493 static_branch_enable(&curve25519_use_bmi2_adx);
1494 else
1495 return 0;
1496 return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
1497 crypto_register_kpp(&curve25519_alg) : 0;
1498}
1499
1500static void __exit curve25519_mod_exit(void)
1501{
1502 if (IS_REACHABLE(CONFIG_CRYPTO_KPP) &&
1503 static_branch_likely(&curve25519_use_bmi2_adx))
1504 crypto_unregister_kpp(&curve25519_alg);
1505}
1506
1507module_init(curve25519_mod_init);
1508module_exit(curve25519_mod_exit);
1509
1510MODULE_ALIAS_CRYPTO("curve25519");
1511MODULE_ALIAS_CRYPTO("curve25519-x86");
1512MODULE_LICENSE("GPL v2");
1513MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");