Loading...
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher
4 *
5 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
6 */
7
8#include <linux/linkage.h>
9
10.file "des3_ede-asm_64.S"
11.text
12
13#define s1 .L_s1
14#define s2 ((s1) + (64*8))
15#define s3 ((s2) + (64*8))
16#define s4 ((s3) + (64*8))
17#define s5 ((s4) + (64*8))
18#define s6 ((s5) + (64*8))
19#define s7 ((s6) + (64*8))
20#define s8 ((s7) + (64*8))
21
22/* register macros */
23#define CTX %rdi
24
25#define RL0 %r8
26#define RL1 %r9
27#define RL2 %r10
28
29#define RL0d %r8d
30#define RL1d %r9d
31#define RL2d %r10d
32
33#define RR0 %r11
34#define RR1 %r12
35#define RR2 %r13
36
37#define RR0d %r11d
38#define RR1d %r12d
39#define RR2d %r13d
40
41#define RW0 %rax
42#define RW1 %rbx
43#define RW2 %rcx
44
45#define RW0d %eax
46#define RW1d %ebx
47#define RW2d %ecx
48
49#define RW0bl %al
50#define RW1bl %bl
51#define RW2bl %cl
52
53#define RW0bh %ah
54#define RW1bh %bh
55#define RW2bh %ch
56
57#define RT0 %r15
58#define RT1 %rsi
59#define RT2 %r14
60#define RT3 %rdx
61
62#define RT0d %r15d
63#define RT1d %esi
64#define RT2d %r14d
65#define RT3d %edx
66
67/***********************************************************************
68 * 1-way 3DES
69 ***********************************************************************/
70#define do_permutation(a, b, offset, mask) \
71 movl a, RT0d; \
72 shrl $(offset), RT0d; \
73 xorl b, RT0d; \
74 andl $(mask), RT0d; \
75 xorl RT0d, b; \
76 shll $(offset), RT0d; \
77 xorl RT0d, a;
78
79#define expand_to_64bits(val, mask) \
80 movl val##d, RT0d; \
81 rorl $4, RT0d; \
82 shlq $32, RT0; \
83 orq RT0, val; \
84 andq mask, val;
85
86#define compress_to_64bits(val) \
87 movq val, RT0; \
88 shrq $32, RT0; \
89 roll $4, RT0d; \
90 orl RT0d, val##d;
91
92#define initial_permutation(left, right) \
93 do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \
94 do_permutation(left##d, right##d, 16, 0x0000ffff); \
95 do_permutation(right##d, left##d, 2, 0x33333333); \
96 do_permutation(right##d, left##d, 8, 0x00ff00ff); \
97 movabs $0x3f3f3f3f3f3f3f3f, RT3; \
98 movl left##d, RW0d; \
99 roll $1, right##d; \
100 xorl right##d, RW0d; \
101 andl $0xaaaaaaaa, RW0d; \
102 xorl RW0d, left##d; \
103 xorl RW0d, right##d; \
104 roll $1, left##d; \
105 expand_to_64bits(right, RT3); \
106 expand_to_64bits(left, RT3);
107
108#define final_permutation(left, right) \
109 compress_to_64bits(right); \
110 compress_to_64bits(left); \
111 movl right##d, RW0d; \
112 rorl $1, left##d; \
113 xorl left##d, RW0d; \
114 andl $0xaaaaaaaa, RW0d; \
115 xorl RW0d, right##d; \
116 xorl RW0d, left##d; \
117 rorl $1, right##d; \
118 do_permutation(right##d, left##d, 8, 0x00ff00ff); \
119 do_permutation(right##d, left##d, 2, 0x33333333); \
120 do_permutation(left##d, right##d, 16, 0x0000ffff); \
121 do_permutation(left##d, right##d, 4, 0x0f0f0f0f);
122
123#define round1(n, from, to, load_next_key) \
124 xorq from, RW0; \
125 \
126 movzbl RW0bl, RT0d; \
127 movzbl RW0bh, RT1d; \
128 shrq $16, RW0; \
129 movzbl RW0bl, RT2d; \
130 movzbl RW0bh, RT3d; \
131 shrq $16, RW0; \
132 leaq s8(%rip), RW1; \
133 movq (RW1, RT0, 8), RT0; \
134 leaq s6(%rip), RW1; \
135 xorq (RW1, RT1, 8), to; \
136 movzbl RW0bl, RL1d; \
137 movzbl RW0bh, RT1d; \
138 shrl $16, RW0d; \
139 leaq s4(%rip), RW1; \
140 xorq (RW1, RT2, 8), RT0; \
141 leaq s2(%rip), RW1; \
142 xorq (RW1, RT3, 8), to; \
143 movzbl RW0bl, RT2d; \
144 movzbl RW0bh, RT3d; \
145 leaq s7(%rip), RW1; \
146 xorq (RW1, RL1, 8), RT0; \
147 leaq s5(%rip), RW1; \
148 xorq (RW1, RT1, 8), to; \
149 leaq s3(%rip), RW1; \
150 xorq (RW1, RT2, 8), RT0; \
151 load_next_key(n, RW0); \
152 xorq RT0, to; \
153 leaq s1(%rip), RW1; \
154 xorq (RW1, RT3, 8), to; \
155
156#define load_next_key(n, RWx) \
157 movq (((n) + 1) * 8)(CTX), RWx;
158
159#define dummy2(a, b) /*_*/
160
161#define read_block(io, left, right) \
162 movl (io), left##d; \
163 movl 4(io), right##d; \
164 bswapl left##d; \
165 bswapl right##d;
166
167#define write_block(io, left, right) \
168 bswapl left##d; \
169 bswapl right##d; \
170 movl left##d, (io); \
171 movl right##d, 4(io);
172
173SYM_FUNC_START(des3_ede_x86_64_crypt_blk)
174 /* input:
175 * %rdi: round keys, CTX
176 * %rsi: dst
177 * %rdx: src
178 */
179 pushq %rbx;
180 pushq %r12;
181 pushq %r13;
182 pushq %r14;
183 pushq %r15;
184
185 pushq %rsi; /* dst */
186
187 read_block(%rdx, RL0, RR0);
188 initial_permutation(RL0, RR0);
189
190 movq (CTX), RW0;
191
192 round1(0, RR0, RL0, load_next_key);
193 round1(1, RL0, RR0, load_next_key);
194 round1(2, RR0, RL0, load_next_key);
195 round1(3, RL0, RR0, load_next_key);
196 round1(4, RR0, RL0, load_next_key);
197 round1(5, RL0, RR0, load_next_key);
198 round1(6, RR0, RL0, load_next_key);
199 round1(7, RL0, RR0, load_next_key);
200 round1(8, RR0, RL0, load_next_key);
201 round1(9, RL0, RR0, load_next_key);
202 round1(10, RR0, RL0, load_next_key);
203 round1(11, RL0, RR0, load_next_key);
204 round1(12, RR0, RL0, load_next_key);
205 round1(13, RL0, RR0, load_next_key);
206 round1(14, RR0, RL0, load_next_key);
207 round1(15, RL0, RR0, load_next_key);
208
209 round1(16+0, RL0, RR0, load_next_key);
210 round1(16+1, RR0, RL0, load_next_key);
211 round1(16+2, RL0, RR0, load_next_key);
212 round1(16+3, RR0, RL0, load_next_key);
213 round1(16+4, RL0, RR0, load_next_key);
214 round1(16+5, RR0, RL0, load_next_key);
215 round1(16+6, RL0, RR0, load_next_key);
216 round1(16+7, RR0, RL0, load_next_key);
217 round1(16+8, RL0, RR0, load_next_key);
218 round1(16+9, RR0, RL0, load_next_key);
219 round1(16+10, RL0, RR0, load_next_key);
220 round1(16+11, RR0, RL0, load_next_key);
221 round1(16+12, RL0, RR0, load_next_key);
222 round1(16+13, RR0, RL0, load_next_key);
223 round1(16+14, RL0, RR0, load_next_key);
224 round1(16+15, RR0, RL0, load_next_key);
225
226 round1(32+0, RR0, RL0, load_next_key);
227 round1(32+1, RL0, RR0, load_next_key);
228 round1(32+2, RR0, RL0, load_next_key);
229 round1(32+3, RL0, RR0, load_next_key);
230 round1(32+4, RR0, RL0, load_next_key);
231 round1(32+5, RL0, RR0, load_next_key);
232 round1(32+6, RR0, RL0, load_next_key);
233 round1(32+7, RL0, RR0, load_next_key);
234 round1(32+8, RR0, RL0, load_next_key);
235 round1(32+9, RL0, RR0, load_next_key);
236 round1(32+10, RR0, RL0, load_next_key);
237 round1(32+11, RL0, RR0, load_next_key);
238 round1(32+12, RR0, RL0, load_next_key);
239 round1(32+13, RL0, RR0, load_next_key);
240 round1(32+14, RR0, RL0, load_next_key);
241 round1(32+15, RL0, RR0, dummy2);
242
243 final_permutation(RR0, RL0);
244
245 popq %rsi /* dst */
246 write_block(%rsi, RR0, RL0);
247
248 popq %r15;
249 popq %r14;
250 popq %r13;
251 popq %r12;
252 popq %rbx;
253
254 RET;
255SYM_FUNC_END(des3_ede_x86_64_crypt_blk)
256
257/***********************************************************************
258 * 3-way 3DES
259 ***********************************************************************/
260#define expand_to_64bits(val, mask) \
261 movl val##d, RT0d; \
262 rorl $4, RT0d; \
263 shlq $32, RT0; \
264 orq RT0, val; \
265 andq mask, val;
266
267#define compress_to_64bits(val) \
268 movq val, RT0; \
269 shrq $32, RT0; \
270 roll $4, RT0d; \
271 orl RT0d, val##d;
272
273#define initial_permutation3(left, right) \
274 do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
275 do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
276 do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
277 do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
278 do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \
279 do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
280 \
281 do_permutation(right##0d, left##0d, 2, 0x33333333); \
282 do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
283 do_permutation(right##1d, left##1d, 2, 0x33333333); \
284 do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
285 do_permutation(right##2d, left##2d, 2, 0x33333333); \
286 do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
287 \
288 movabs $0x3f3f3f3f3f3f3f3f, RT3; \
289 \
290 movl left##0d, RW0d; \
291 roll $1, right##0d; \
292 xorl right##0d, RW0d; \
293 andl $0xaaaaaaaa, RW0d; \
294 xorl RW0d, left##0d; \
295 xorl RW0d, right##0d; \
296 roll $1, left##0d; \
297 expand_to_64bits(right##0, RT3); \
298 expand_to_64bits(left##0, RT3); \
299 movl left##1d, RW1d; \
300 roll $1, right##1d; \
301 xorl right##1d, RW1d; \
302 andl $0xaaaaaaaa, RW1d; \
303 xorl RW1d, left##1d; \
304 xorl RW1d, right##1d; \
305 roll $1, left##1d; \
306 expand_to_64bits(right##1, RT3); \
307 expand_to_64bits(left##1, RT3); \
308 movl left##2d, RW2d; \
309 roll $1, right##2d; \
310 xorl right##2d, RW2d; \
311 andl $0xaaaaaaaa, RW2d; \
312 xorl RW2d, left##2d; \
313 xorl RW2d, right##2d; \
314 roll $1, left##2d; \
315 expand_to_64bits(right##2, RT3); \
316 expand_to_64bits(left##2, RT3);
317
318#define final_permutation3(left, right) \
319 compress_to_64bits(right##0); \
320 compress_to_64bits(left##0); \
321 movl right##0d, RW0d; \
322 rorl $1, left##0d; \
323 xorl left##0d, RW0d; \
324 andl $0xaaaaaaaa, RW0d; \
325 xorl RW0d, right##0d; \
326 xorl RW0d, left##0d; \
327 rorl $1, right##0d; \
328 compress_to_64bits(right##1); \
329 compress_to_64bits(left##1); \
330 movl right##1d, RW1d; \
331 rorl $1, left##1d; \
332 xorl left##1d, RW1d; \
333 andl $0xaaaaaaaa, RW1d; \
334 xorl RW1d, right##1d; \
335 xorl RW1d, left##1d; \
336 rorl $1, right##1d; \
337 compress_to_64bits(right##2); \
338 compress_to_64bits(left##2); \
339 movl right##2d, RW2d; \
340 rorl $1, left##2d; \
341 xorl left##2d, RW2d; \
342 andl $0xaaaaaaaa, RW2d; \
343 xorl RW2d, right##2d; \
344 xorl RW2d, left##2d; \
345 rorl $1, right##2d; \
346 \
347 do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
348 do_permutation(right##0d, left##0d, 2, 0x33333333); \
349 do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
350 do_permutation(right##1d, left##1d, 2, 0x33333333); \
351 do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
352 do_permutation(right##2d, left##2d, 2, 0x33333333); \
353 \
354 do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
355 do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
356 do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
357 do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
358 do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
359 do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f);
360
361#define round3(n, from, to, load_next_key, do_movq) \
362 xorq from##0, RW0; \
363 movzbl RW0bl, RT3d; \
364 movzbl RW0bh, RT1d; \
365 shrq $16, RW0; \
366 leaq s8(%rip), RT2; \
367 xorq (RT2, RT3, 8), to##0; \
368 leaq s6(%rip), RT2; \
369 xorq (RT2, RT1, 8), to##0; \
370 movzbl RW0bl, RT3d; \
371 movzbl RW0bh, RT1d; \
372 shrq $16, RW0; \
373 leaq s4(%rip), RT2; \
374 xorq (RT2, RT3, 8), to##0; \
375 leaq s2(%rip), RT2; \
376 xorq (RT2, RT1, 8), to##0; \
377 movzbl RW0bl, RT3d; \
378 movzbl RW0bh, RT1d; \
379 shrl $16, RW0d; \
380 leaq s7(%rip), RT2; \
381 xorq (RT2, RT3, 8), to##0; \
382 leaq s5(%rip), RT2; \
383 xorq (RT2, RT1, 8), to##0; \
384 movzbl RW0bl, RT3d; \
385 movzbl RW0bh, RT1d; \
386 load_next_key(n, RW0); \
387 leaq s3(%rip), RT2; \
388 xorq (RT2, RT3, 8), to##0; \
389 leaq s1(%rip), RT2; \
390 xorq (RT2, RT1, 8), to##0; \
391 xorq from##1, RW1; \
392 movzbl RW1bl, RT3d; \
393 movzbl RW1bh, RT1d; \
394 shrq $16, RW1; \
395 leaq s8(%rip), RT2; \
396 xorq (RT2, RT3, 8), to##1; \
397 leaq s6(%rip), RT2; \
398 xorq (RT2, RT1, 8), to##1; \
399 movzbl RW1bl, RT3d; \
400 movzbl RW1bh, RT1d; \
401 shrq $16, RW1; \
402 leaq s4(%rip), RT2; \
403 xorq (RT2, RT3, 8), to##1; \
404 leaq s2(%rip), RT2; \
405 xorq (RT2, RT1, 8), to##1; \
406 movzbl RW1bl, RT3d; \
407 movzbl RW1bh, RT1d; \
408 shrl $16, RW1d; \
409 leaq s7(%rip), RT2; \
410 xorq (RT2, RT3, 8), to##1; \
411 leaq s5(%rip), RT2; \
412 xorq (RT2, RT1, 8), to##1; \
413 movzbl RW1bl, RT3d; \
414 movzbl RW1bh, RT1d; \
415 do_movq(RW0, RW1); \
416 leaq s3(%rip), RT2; \
417 xorq (RT2, RT3, 8), to##1; \
418 leaq s1(%rip), RT2; \
419 xorq (RT2, RT1, 8), to##1; \
420 xorq from##2, RW2; \
421 movzbl RW2bl, RT3d; \
422 movzbl RW2bh, RT1d; \
423 shrq $16, RW2; \
424 leaq s8(%rip), RT2; \
425 xorq (RT2, RT3, 8), to##2; \
426 leaq s6(%rip), RT2; \
427 xorq (RT2, RT1, 8), to##2; \
428 movzbl RW2bl, RT3d; \
429 movzbl RW2bh, RT1d; \
430 shrq $16, RW2; \
431 leaq s4(%rip), RT2; \
432 xorq (RT2, RT3, 8), to##2; \
433 leaq s2(%rip), RT2; \
434 xorq (RT2, RT1, 8), to##2; \
435 movzbl RW2bl, RT3d; \
436 movzbl RW2bh, RT1d; \
437 shrl $16, RW2d; \
438 leaq s7(%rip), RT2; \
439 xorq (RT2, RT3, 8), to##2; \
440 leaq s5(%rip), RT2; \
441 xorq (RT2, RT1, 8), to##2; \
442 movzbl RW2bl, RT3d; \
443 movzbl RW2bh, RT1d; \
444 do_movq(RW0, RW2); \
445 leaq s3(%rip), RT2; \
446 xorq (RT2, RT3, 8), to##2; \
447 leaq s1(%rip), RT2; \
448 xorq (RT2, RT1, 8), to##2;
449
450#define __movq(src, dst) \
451 movq src, dst;
452
453SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way)
454 /* input:
455 * %rdi: ctx, round keys
456 * %rsi: dst (3 blocks)
457 * %rdx: src (3 blocks)
458 */
459
460 pushq %rbx;
461 pushq %r12;
462 pushq %r13;
463 pushq %r14;
464 pushq %r15;
465
466 pushq %rsi /* dst */
467
468 /* load input */
469 movl 0 * 4(%rdx), RL0d;
470 movl 1 * 4(%rdx), RR0d;
471 movl 2 * 4(%rdx), RL1d;
472 movl 3 * 4(%rdx), RR1d;
473 movl 4 * 4(%rdx), RL2d;
474 movl 5 * 4(%rdx), RR2d;
475
476 bswapl RL0d;
477 bswapl RR0d;
478 bswapl RL1d;
479 bswapl RR1d;
480 bswapl RL2d;
481 bswapl RR2d;
482
483 initial_permutation3(RL, RR);
484
485 movq 0(CTX), RW0;
486 movq RW0, RW1;
487 movq RW0, RW2;
488
489 round3(0, RR, RL, load_next_key, __movq);
490 round3(1, RL, RR, load_next_key, __movq);
491 round3(2, RR, RL, load_next_key, __movq);
492 round3(3, RL, RR, load_next_key, __movq);
493 round3(4, RR, RL, load_next_key, __movq);
494 round3(5, RL, RR, load_next_key, __movq);
495 round3(6, RR, RL, load_next_key, __movq);
496 round3(7, RL, RR, load_next_key, __movq);
497 round3(8, RR, RL, load_next_key, __movq);
498 round3(9, RL, RR, load_next_key, __movq);
499 round3(10, RR, RL, load_next_key, __movq);
500 round3(11, RL, RR, load_next_key, __movq);
501 round3(12, RR, RL, load_next_key, __movq);
502 round3(13, RL, RR, load_next_key, __movq);
503 round3(14, RR, RL, load_next_key, __movq);
504 round3(15, RL, RR, load_next_key, __movq);
505
506 round3(16+0, RL, RR, load_next_key, __movq);
507 round3(16+1, RR, RL, load_next_key, __movq);
508 round3(16+2, RL, RR, load_next_key, __movq);
509 round3(16+3, RR, RL, load_next_key, __movq);
510 round3(16+4, RL, RR, load_next_key, __movq);
511 round3(16+5, RR, RL, load_next_key, __movq);
512 round3(16+6, RL, RR, load_next_key, __movq);
513 round3(16+7, RR, RL, load_next_key, __movq);
514 round3(16+8, RL, RR, load_next_key, __movq);
515 round3(16+9, RR, RL, load_next_key, __movq);
516 round3(16+10, RL, RR, load_next_key, __movq);
517 round3(16+11, RR, RL, load_next_key, __movq);
518 round3(16+12, RL, RR, load_next_key, __movq);
519 round3(16+13, RR, RL, load_next_key, __movq);
520 round3(16+14, RL, RR, load_next_key, __movq);
521 round3(16+15, RR, RL, load_next_key, __movq);
522
523 round3(32+0, RR, RL, load_next_key, __movq);
524 round3(32+1, RL, RR, load_next_key, __movq);
525 round3(32+2, RR, RL, load_next_key, __movq);
526 round3(32+3, RL, RR, load_next_key, __movq);
527 round3(32+4, RR, RL, load_next_key, __movq);
528 round3(32+5, RL, RR, load_next_key, __movq);
529 round3(32+6, RR, RL, load_next_key, __movq);
530 round3(32+7, RL, RR, load_next_key, __movq);
531 round3(32+8, RR, RL, load_next_key, __movq);
532 round3(32+9, RL, RR, load_next_key, __movq);
533 round3(32+10, RR, RL, load_next_key, __movq);
534 round3(32+11, RL, RR, load_next_key, __movq);
535 round3(32+12, RR, RL, load_next_key, __movq);
536 round3(32+13, RL, RR, load_next_key, __movq);
537 round3(32+14, RR, RL, load_next_key, __movq);
538 round3(32+15, RL, RR, dummy2, dummy2);
539
540 final_permutation3(RR, RL);
541
542 bswapl RR0d;
543 bswapl RL0d;
544 bswapl RR1d;
545 bswapl RL1d;
546 bswapl RR2d;
547 bswapl RL2d;
548
549 popq %rsi /* dst */
550 movl RR0d, 0 * 4(%rsi);
551 movl RL0d, 1 * 4(%rsi);
552 movl RR1d, 2 * 4(%rsi);
553 movl RL1d, 3 * 4(%rsi);
554 movl RR2d, 4 * 4(%rsi);
555 movl RL2d, 5 * 4(%rsi);
556
557 popq %r15;
558 popq %r14;
559 popq %r13;
560 popq %r12;
561 popq %rbx;
562
563 RET;
564SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
565
566.section .rodata, "a", @progbits
567.align 16
568.L_s1:
569 .quad 0x0010100001010400, 0x0000000000000000
570 .quad 0x0000100000010000, 0x0010100001010404
571 .quad 0x0010100001010004, 0x0000100000010404
572 .quad 0x0000000000000004, 0x0000100000010000
573 .quad 0x0000000000000400, 0x0010100001010400
574 .quad 0x0010100001010404, 0x0000000000000400
575 .quad 0x0010000001000404, 0x0010100001010004
576 .quad 0x0010000001000000, 0x0000000000000004
577 .quad 0x0000000000000404, 0x0010000001000400
578 .quad 0x0010000001000400, 0x0000100000010400
579 .quad 0x0000100000010400, 0x0010100001010000
580 .quad 0x0010100001010000, 0x0010000001000404
581 .quad 0x0000100000010004, 0x0010000001000004
582 .quad 0x0010000001000004, 0x0000100000010004
583 .quad 0x0000000000000000, 0x0000000000000404
584 .quad 0x0000100000010404, 0x0010000001000000
585 .quad 0x0000100000010000, 0x0010100001010404
586 .quad 0x0000000000000004, 0x0010100001010000
587 .quad 0x0010100001010400, 0x0010000001000000
588 .quad 0x0010000001000000, 0x0000000000000400
589 .quad 0x0010100001010004, 0x0000100000010000
590 .quad 0x0000100000010400, 0x0010000001000004
591 .quad 0x0000000000000400, 0x0000000000000004
592 .quad 0x0010000001000404, 0x0000100000010404
593 .quad 0x0010100001010404, 0x0000100000010004
594 .quad 0x0010100001010000, 0x0010000001000404
595 .quad 0x0010000001000004, 0x0000000000000404
596 .quad 0x0000100000010404, 0x0010100001010400
597 .quad 0x0000000000000404, 0x0010000001000400
598 .quad 0x0010000001000400, 0x0000000000000000
599 .quad 0x0000100000010004, 0x0000100000010400
600 .quad 0x0000000000000000, 0x0010100001010004
601.L_s2:
602 .quad 0x0801080200100020, 0x0800080000000000
603 .quad 0x0000080000000000, 0x0001080200100020
604 .quad 0x0001000000100000, 0x0000000200000020
605 .quad 0x0801000200100020, 0x0800080200000020
606 .quad 0x0800000200000020, 0x0801080200100020
607 .quad 0x0801080000100000, 0x0800000000000000
608 .quad 0x0800080000000000, 0x0001000000100000
609 .quad 0x0000000200000020, 0x0801000200100020
610 .quad 0x0001080000100000, 0x0001000200100020
611 .quad 0x0800080200000020, 0x0000000000000000
612 .quad 0x0800000000000000, 0x0000080000000000
613 .quad 0x0001080200100020, 0x0801000000100000
614 .quad 0x0001000200100020, 0x0800000200000020
615 .quad 0x0000000000000000, 0x0001080000100000
616 .quad 0x0000080200000020, 0x0801080000100000
617 .quad 0x0801000000100000, 0x0000080200000020
618 .quad 0x0000000000000000, 0x0001080200100020
619 .quad 0x0801000200100020, 0x0001000000100000
620 .quad 0x0800080200000020, 0x0801000000100000
621 .quad 0x0801080000100000, 0x0000080000000000
622 .quad 0x0801000000100000, 0x0800080000000000
623 .quad 0x0000000200000020, 0x0801080200100020
624 .quad 0x0001080200100020, 0x0000000200000020
625 .quad 0x0000080000000000, 0x0800000000000000
626 .quad 0x0000080200000020, 0x0801080000100000
627 .quad 0x0001000000100000, 0x0800000200000020
628 .quad 0x0001000200100020, 0x0800080200000020
629 .quad 0x0800000200000020, 0x0001000200100020
630 .quad 0x0001080000100000, 0x0000000000000000
631 .quad 0x0800080000000000, 0x0000080200000020
632 .quad 0x0800000000000000, 0x0801000200100020
633 .quad 0x0801080200100020, 0x0001080000100000
634.L_s3:
635 .quad 0x0000002000000208, 0x0000202008020200
636 .quad 0x0000000000000000, 0x0000200008020008
637 .quad 0x0000002008000200, 0x0000000000000000
638 .quad 0x0000202000020208, 0x0000002008000200
639 .quad 0x0000200000020008, 0x0000000008000008
640 .quad 0x0000000008000008, 0x0000200000020000
641 .quad 0x0000202008020208, 0x0000200000020008
642 .quad 0x0000200008020000, 0x0000002000000208
643 .quad 0x0000000008000000, 0x0000000000000008
644 .quad 0x0000202008020200, 0x0000002000000200
645 .quad 0x0000202000020200, 0x0000200008020000
646 .quad 0x0000200008020008, 0x0000202000020208
647 .quad 0x0000002008000208, 0x0000202000020200
648 .quad 0x0000200000020000, 0x0000002008000208
649 .quad 0x0000000000000008, 0x0000202008020208
650 .quad 0x0000002000000200, 0x0000000008000000
651 .quad 0x0000202008020200, 0x0000000008000000
652 .quad 0x0000200000020008, 0x0000002000000208
653 .quad 0x0000200000020000, 0x0000202008020200
654 .quad 0x0000002008000200, 0x0000000000000000
655 .quad 0x0000002000000200, 0x0000200000020008
656 .quad 0x0000202008020208, 0x0000002008000200
657 .quad 0x0000000008000008, 0x0000002000000200
658 .quad 0x0000000000000000, 0x0000200008020008
659 .quad 0x0000002008000208, 0x0000200000020000
660 .quad 0x0000000008000000, 0x0000202008020208
661 .quad 0x0000000000000008, 0x0000202000020208
662 .quad 0x0000202000020200, 0x0000000008000008
663 .quad 0x0000200008020000, 0x0000002008000208
664 .quad 0x0000002000000208, 0x0000200008020000
665 .quad 0x0000202000020208, 0x0000000000000008
666 .quad 0x0000200008020008, 0x0000202000020200
667.L_s4:
668 .quad 0x1008020000002001, 0x1000020800002001
669 .quad 0x1000020800002001, 0x0000000800000000
670 .quad 0x0008020800002000, 0x1008000800000001
671 .quad 0x1008000000000001, 0x1000020000002001
672 .quad 0x0000000000000000, 0x0008020000002000
673 .quad 0x0008020000002000, 0x1008020800002001
674 .quad 0x1000000800000001, 0x0000000000000000
675 .quad 0x0008000800000000, 0x1008000000000001
676 .quad 0x1000000000000001, 0x0000020000002000
677 .quad 0x0008000000000000, 0x1008020000002001
678 .quad 0x0000000800000000, 0x0008000000000000
679 .quad 0x1000020000002001, 0x0000020800002000
680 .quad 0x1008000800000001, 0x1000000000000001
681 .quad 0x0000020800002000, 0x0008000800000000
682 .quad 0x0000020000002000, 0x0008020800002000
683 .quad 0x1008020800002001, 0x1000000800000001
684 .quad 0x0008000800000000, 0x1008000000000001
685 .quad 0x0008020000002000, 0x1008020800002001
686 .quad 0x1000000800000001, 0x0000000000000000
687 .quad 0x0000000000000000, 0x0008020000002000
688 .quad 0x0000020800002000, 0x0008000800000000
689 .quad 0x1008000800000001, 0x1000000000000001
690 .quad 0x1008020000002001, 0x1000020800002001
691 .quad 0x1000020800002001, 0x0000000800000000
692 .quad 0x1008020800002001, 0x1000000800000001
693 .quad 0x1000000000000001, 0x0000020000002000
694 .quad 0x1008000000000001, 0x1000020000002001
695 .quad 0x0008020800002000, 0x1008000800000001
696 .quad 0x1000020000002001, 0x0000020800002000
697 .quad 0x0008000000000000, 0x1008020000002001
698 .quad 0x0000000800000000, 0x0008000000000000
699 .quad 0x0000020000002000, 0x0008020800002000
700.L_s5:
701 .quad 0x0000001000000100, 0x0020001002080100
702 .quad 0x0020000002080000, 0x0420001002000100
703 .quad 0x0000000000080000, 0x0000001000000100
704 .quad 0x0400000000000000, 0x0020000002080000
705 .quad 0x0400001000080100, 0x0000000000080000
706 .quad 0x0020001002000100, 0x0400001000080100
707 .quad 0x0420001002000100, 0x0420000002080000
708 .quad 0x0000001000080100, 0x0400000000000000
709 .quad 0x0020000002000000, 0x0400000000080000
710 .quad 0x0400000000080000, 0x0000000000000000
711 .quad 0x0400001000000100, 0x0420001002080100
712 .quad 0x0420001002080100, 0x0020001002000100
713 .quad 0x0420000002080000, 0x0400001000000100
714 .quad 0x0000000000000000, 0x0420000002000000
715 .quad 0x0020001002080100, 0x0020000002000000
716 .quad 0x0420000002000000, 0x0000001000080100
717 .quad 0x0000000000080000, 0x0420001002000100
718 .quad 0x0000001000000100, 0x0020000002000000
719 .quad 0x0400000000000000, 0x0020000002080000
720 .quad 0x0420001002000100, 0x0400001000080100
721 .quad 0x0020001002000100, 0x0400000000000000
722 .quad 0x0420000002080000, 0x0020001002080100
723 .quad 0x0400001000080100, 0x0000001000000100
724 .quad 0x0020000002000000, 0x0420000002080000
725 .quad 0x0420001002080100, 0x0000001000080100
726 .quad 0x0420000002000000, 0x0420001002080100
727 .quad 0x0020000002080000, 0x0000000000000000
728 .quad 0x0400000000080000, 0x0420000002000000
729 .quad 0x0000001000080100, 0x0020001002000100
730 .quad 0x0400001000000100, 0x0000000000080000
731 .quad 0x0000000000000000, 0x0400000000080000
732 .quad 0x0020001002080100, 0x0400001000000100
733.L_s6:
734 .quad 0x0200000120000010, 0x0204000020000000
735 .quad 0x0000040000000000, 0x0204040120000010
736 .quad 0x0204000020000000, 0x0000000100000010
737 .quad 0x0204040120000010, 0x0004000000000000
738 .quad 0x0200040020000000, 0x0004040100000010
739 .quad 0x0004000000000000, 0x0200000120000010
740 .quad 0x0004000100000010, 0x0200040020000000
741 .quad 0x0200000020000000, 0x0000040100000010
742 .quad 0x0000000000000000, 0x0004000100000010
743 .quad 0x0200040120000010, 0x0000040000000000
744 .quad 0x0004040000000000, 0x0200040120000010
745 .quad 0x0000000100000010, 0x0204000120000010
746 .quad 0x0204000120000010, 0x0000000000000000
747 .quad 0x0004040100000010, 0x0204040020000000
748 .quad 0x0000040100000010, 0x0004040000000000
749 .quad 0x0204040020000000, 0x0200000020000000
750 .quad 0x0200040020000000, 0x0000000100000010
751 .quad 0x0204000120000010, 0x0004040000000000
752 .quad 0x0204040120000010, 0x0004000000000000
753 .quad 0x0000040100000010, 0x0200000120000010
754 .quad 0x0004000000000000, 0x0200040020000000
755 .quad 0x0200000020000000, 0x0000040100000010
756 .quad 0x0200000120000010, 0x0204040120000010
757 .quad 0x0004040000000000, 0x0204000020000000
758 .quad 0x0004040100000010, 0x0204040020000000
759 .quad 0x0000000000000000, 0x0204000120000010
760 .quad 0x0000000100000010, 0x0000040000000000
761 .quad 0x0204000020000000, 0x0004040100000010
762 .quad 0x0000040000000000, 0x0004000100000010
763 .quad 0x0200040120000010, 0x0000000000000000
764 .quad 0x0204040020000000, 0x0200000020000000
765 .quad 0x0004000100000010, 0x0200040120000010
766.L_s7:
767 .quad 0x0002000000200000, 0x2002000004200002
768 .quad 0x2000000004000802, 0x0000000000000000
769 .quad 0x0000000000000800, 0x2000000004000802
770 .quad 0x2002000000200802, 0x0002000004200800
771 .quad 0x2002000004200802, 0x0002000000200000
772 .quad 0x0000000000000000, 0x2000000004000002
773 .quad 0x2000000000000002, 0x0000000004000000
774 .quad 0x2002000004200002, 0x2000000000000802
775 .quad 0x0000000004000800, 0x2002000000200802
776 .quad 0x2002000000200002, 0x0000000004000800
777 .quad 0x2000000004000002, 0x0002000004200000
778 .quad 0x0002000004200800, 0x2002000000200002
779 .quad 0x0002000004200000, 0x0000000000000800
780 .quad 0x2000000000000802, 0x2002000004200802
781 .quad 0x0002000000200800, 0x2000000000000002
782 .quad 0x0000000004000000, 0x0002000000200800
783 .quad 0x0000000004000000, 0x0002000000200800
784 .quad 0x0002000000200000, 0x2000000004000802
785 .quad 0x2000000004000802, 0x2002000004200002
786 .quad 0x2002000004200002, 0x2000000000000002
787 .quad 0x2002000000200002, 0x0000000004000000
788 .quad 0x0000000004000800, 0x0002000000200000
789 .quad 0x0002000004200800, 0x2000000000000802
790 .quad 0x2002000000200802, 0x0002000004200800
791 .quad 0x2000000000000802, 0x2000000004000002
792 .quad 0x2002000004200802, 0x0002000004200000
793 .quad 0x0002000000200800, 0x0000000000000000
794 .quad 0x2000000000000002, 0x2002000004200802
795 .quad 0x0000000000000000, 0x2002000000200802
796 .quad 0x0002000004200000, 0x0000000000000800
797 .quad 0x2000000004000002, 0x0000000004000800
798 .quad 0x0000000000000800, 0x2002000000200002
799.L_s8:
800 .quad 0x0100010410001000, 0x0000010000001000
801 .quad 0x0000000000040000, 0x0100010410041000
802 .quad 0x0100000010000000, 0x0100010410001000
803 .quad 0x0000000400000000, 0x0100000010000000
804 .quad 0x0000000400040000, 0x0100000010040000
805 .quad 0x0100010410041000, 0x0000010000041000
806 .quad 0x0100010010041000, 0x0000010400041000
807 .quad 0x0000010000001000, 0x0000000400000000
808 .quad 0x0100000010040000, 0x0100000410000000
809 .quad 0x0100010010001000, 0x0000010400001000
810 .quad 0x0000010000041000, 0x0000000400040000
811 .quad 0x0100000410040000, 0x0100010010041000
812 .quad 0x0000010400001000, 0x0000000000000000
813 .quad 0x0000000000000000, 0x0100000410040000
814 .quad 0x0100000410000000, 0x0100010010001000
815 .quad 0x0000010400041000, 0x0000000000040000
816 .quad 0x0000010400041000, 0x0000000000040000
817 .quad 0x0100010010041000, 0x0000010000001000
818 .quad 0x0000000400000000, 0x0100000410040000
819 .quad 0x0000010000001000, 0x0000010400041000
820 .quad 0x0100010010001000, 0x0000000400000000
821 .quad 0x0100000410000000, 0x0100000010040000
822 .quad 0x0100000410040000, 0x0100000010000000
823 .quad 0x0000000000040000, 0x0100010410001000
824 .quad 0x0000000000000000, 0x0100010410041000
825 .quad 0x0000000400040000, 0x0100000410000000
826 .quad 0x0100000010040000, 0x0100010010001000
827 .quad 0x0100010410001000, 0x0000000000000000
828 .quad 0x0100010410041000, 0x0000010000041000
829 .quad 0x0000010000041000, 0x0000010400001000
830 .quad 0x0000010400001000, 0x0000000400040000
831 .quad 0x0100000010000000, 0x0100010010041000
1/*
2 * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher
3 *
4 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 */
16
17#include <linux/linkage.h>
18
19.file "des3_ede-asm_64.S"
20.text
21
22#define s1 .L_s1
23#define s2 ((s1) + (64*8))
24#define s3 ((s2) + (64*8))
25#define s4 ((s3) + (64*8))
26#define s5 ((s4) + (64*8))
27#define s6 ((s5) + (64*8))
28#define s7 ((s6) + (64*8))
29#define s8 ((s7) + (64*8))
30
31/* register macros */
32#define CTX %rdi
33
34#define RL0 %r8
35#define RL1 %r9
36#define RL2 %r10
37
38#define RL0d %r8d
39#define RL1d %r9d
40#define RL2d %r10d
41
42#define RR0 %r11
43#define RR1 %r12
44#define RR2 %r13
45
46#define RR0d %r11d
47#define RR1d %r12d
48#define RR2d %r13d
49
50#define RW0 %rax
51#define RW1 %rbx
52#define RW2 %rcx
53
54#define RW0d %eax
55#define RW1d %ebx
56#define RW2d %ecx
57
58#define RW0bl %al
59#define RW1bl %bl
60#define RW2bl %cl
61
62#define RW0bh %ah
63#define RW1bh %bh
64#define RW2bh %ch
65
66#define RT0 %r15
67#define RT1 %rsi
68#define RT2 %r14
69#define RT3 %rdx
70
71#define RT0d %r15d
72#define RT1d %esi
73#define RT2d %r14d
74#define RT3d %edx
75
76/***********************************************************************
77 * 1-way 3DES
78 ***********************************************************************/
79#define do_permutation(a, b, offset, mask) \
80 movl a, RT0d; \
81 shrl $(offset), RT0d; \
82 xorl b, RT0d; \
83 andl $(mask), RT0d; \
84 xorl RT0d, b; \
85 shll $(offset), RT0d; \
86 xorl RT0d, a;
87
88#define expand_to_64bits(val, mask) \
89 movl val##d, RT0d; \
90 rorl $4, RT0d; \
91 shlq $32, RT0; \
92 orq RT0, val; \
93 andq mask, val;
94
95#define compress_to_64bits(val) \
96 movq val, RT0; \
97 shrq $32, RT0; \
98 roll $4, RT0d; \
99 orl RT0d, val##d;
100
101#define initial_permutation(left, right) \
102 do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \
103 do_permutation(left##d, right##d, 16, 0x0000ffff); \
104 do_permutation(right##d, left##d, 2, 0x33333333); \
105 do_permutation(right##d, left##d, 8, 0x00ff00ff); \
106 movabs $0x3f3f3f3f3f3f3f3f, RT3; \
107 movl left##d, RW0d; \
108 roll $1, right##d; \
109 xorl right##d, RW0d; \
110 andl $0xaaaaaaaa, RW0d; \
111 xorl RW0d, left##d; \
112 xorl RW0d, right##d; \
113 roll $1, left##d; \
114 expand_to_64bits(right, RT3); \
115 expand_to_64bits(left, RT3);
116
117#define final_permutation(left, right) \
118 compress_to_64bits(right); \
119 compress_to_64bits(left); \
120 movl right##d, RW0d; \
121 rorl $1, left##d; \
122 xorl left##d, RW0d; \
123 andl $0xaaaaaaaa, RW0d; \
124 xorl RW0d, right##d; \
125 xorl RW0d, left##d; \
126 rorl $1, right##d; \
127 do_permutation(right##d, left##d, 8, 0x00ff00ff); \
128 do_permutation(right##d, left##d, 2, 0x33333333); \
129 do_permutation(left##d, right##d, 16, 0x0000ffff); \
130 do_permutation(left##d, right##d, 4, 0x0f0f0f0f);
131
132#define round1(n, from, to, load_next_key) \
133 xorq from, RW0; \
134 \
135 movzbl RW0bl, RT0d; \
136 movzbl RW0bh, RT1d; \
137 shrq $16, RW0; \
138 movzbl RW0bl, RT2d; \
139 movzbl RW0bh, RT3d; \
140 shrq $16, RW0; \
141 movq s8(, RT0, 8), RT0; \
142 xorq s6(, RT1, 8), to; \
143 movzbl RW0bl, RL1d; \
144 movzbl RW0bh, RT1d; \
145 shrl $16, RW0d; \
146 xorq s4(, RT2, 8), RT0; \
147 xorq s2(, RT3, 8), to; \
148 movzbl RW0bl, RT2d; \
149 movzbl RW0bh, RT3d; \
150 xorq s7(, RL1, 8), RT0; \
151 xorq s5(, RT1, 8), to; \
152 xorq s3(, RT2, 8), RT0; \
153 load_next_key(n, RW0); \
154 xorq RT0, to; \
155 xorq s1(, RT3, 8), to; \
156
157#define load_next_key(n, RWx) \
158 movq (((n) + 1) * 8)(CTX), RWx;
159
160#define dummy2(a, b) /*_*/
161
162#define read_block(io, left, right) \
163 movl (io), left##d; \
164 movl 4(io), right##d; \
165 bswapl left##d; \
166 bswapl right##d;
167
168#define write_block(io, left, right) \
169 bswapl left##d; \
170 bswapl right##d; \
171 movl left##d, (io); \
172 movl right##d, 4(io);
173
174ENTRY(des3_ede_x86_64_crypt_blk)
175 /* input:
176 * %rdi: round keys, CTX
177 * %rsi: dst
178 * %rdx: src
179 */
180 pushq %rbx;
181 pushq %r12;
182 pushq %r13;
183 pushq %r14;
184 pushq %r15;
185
186 pushq %rsi; /* dst */
187
188 read_block(%rdx, RL0, RR0);
189 initial_permutation(RL0, RR0);
190
191 movq (CTX), RW0;
192
193 round1(0, RR0, RL0, load_next_key);
194 round1(1, RL0, RR0, load_next_key);
195 round1(2, RR0, RL0, load_next_key);
196 round1(3, RL0, RR0, load_next_key);
197 round1(4, RR0, RL0, load_next_key);
198 round1(5, RL0, RR0, load_next_key);
199 round1(6, RR0, RL0, load_next_key);
200 round1(7, RL0, RR0, load_next_key);
201 round1(8, RR0, RL0, load_next_key);
202 round1(9, RL0, RR0, load_next_key);
203 round1(10, RR0, RL0, load_next_key);
204 round1(11, RL0, RR0, load_next_key);
205 round1(12, RR0, RL0, load_next_key);
206 round1(13, RL0, RR0, load_next_key);
207 round1(14, RR0, RL0, load_next_key);
208 round1(15, RL0, RR0, load_next_key);
209
210 round1(16+0, RL0, RR0, load_next_key);
211 round1(16+1, RR0, RL0, load_next_key);
212 round1(16+2, RL0, RR0, load_next_key);
213 round1(16+3, RR0, RL0, load_next_key);
214 round1(16+4, RL0, RR0, load_next_key);
215 round1(16+5, RR0, RL0, load_next_key);
216 round1(16+6, RL0, RR0, load_next_key);
217 round1(16+7, RR0, RL0, load_next_key);
218 round1(16+8, RL0, RR0, load_next_key);
219 round1(16+9, RR0, RL0, load_next_key);
220 round1(16+10, RL0, RR0, load_next_key);
221 round1(16+11, RR0, RL0, load_next_key);
222 round1(16+12, RL0, RR0, load_next_key);
223 round1(16+13, RR0, RL0, load_next_key);
224 round1(16+14, RL0, RR0, load_next_key);
225 round1(16+15, RR0, RL0, load_next_key);
226
227 round1(32+0, RR0, RL0, load_next_key);
228 round1(32+1, RL0, RR0, load_next_key);
229 round1(32+2, RR0, RL0, load_next_key);
230 round1(32+3, RL0, RR0, load_next_key);
231 round1(32+4, RR0, RL0, load_next_key);
232 round1(32+5, RL0, RR0, load_next_key);
233 round1(32+6, RR0, RL0, load_next_key);
234 round1(32+7, RL0, RR0, load_next_key);
235 round1(32+8, RR0, RL0, load_next_key);
236 round1(32+9, RL0, RR0, load_next_key);
237 round1(32+10, RR0, RL0, load_next_key);
238 round1(32+11, RL0, RR0, load_next_key);
239 round1(32+12, RR0, RL0, load_next_key);
240 round1(32+13, RL0, RR0, load_next_key);
241 round1(32+14, RR0, RL0, load_next_key);
242 round1(32+15, RL0, RR0, dummy2);
243
244 final_permutation(RR0, RL0);
245
246 popq %rsi /* dst */
247 write_block(%rsi, RR0, RL0);
248
249 popq %r15;
250 popq %r14;
251 popq %r13;
252 popq %r12;
253 popq %rbx;
254
255 ret;
256ENDPROC(des3_ede_x86_64_crypt_blk)
257
258/***********************************************************************
259 * 3-way 3DES
260 ***********************************************************************/
261#define expand_to_64bits(val, mask) \
262 movl val##d, RT0d; \
263 rorl $4, RT0d; \
264 shlq $32, RT0; \
265 orq RT0, val; \
266 andq mask, val;
267
268#define compress_to_64bits(val) \
269 movq val, RT0; \
270 shrq $32, RT0; \
271 roll $4, RT0d; \
272 orl RT0d, val##d;
273
274#define initial_permutation3(left, right) \
275 do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
276 do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
277 do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
278 do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
279 do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \
280 do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
281 \
282 do_permutation(right##0d, left##0d, 2, 0x33333333); \
283 do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
284 do_permutation(right##1d, left##1d, 2, 0x33333333); \
285 do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
286 do_permutation(right##2d, left##2d, 2, 0x33333333); \
287 do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
288 \
289 movabs $0x3f3f3f3f3f3f3f3f, RT3; \
290 \
291 movl left##0d, RW0d; \
292 roll $1, right##0d; \
293 xorl right##0d, RW0d; \
294 andl $0xaaaaaaaa, RW0d; \
295 xorl RW0d, left##0d; \
296 xorl RW0d, right##0d; \
297 roll $1, left##0d; \
298 expand_to_64bits(right##0, RT3); \
299 expand_to_64bits(left##0, RT3); \
300 movl left##1d, RW1d; \
301 roll $1, right##1d; \
302 xorl right##1d, RW1d; \
303 andl $0xaaaaaaaa, RW1d; \
304 xorl RW1d, left##1d; \
305 xorl RW1d, right##1d; \
306 roll $1, left##1d; \
307 expand_to_64bits(right##1, RT3); \
308 expand_to_64bits(left##1, RT3); \
309 movl left##2d, RW2d; \
310 roll $1, right##2d; \
311 xorl right##2d, RW2d; \
312 andl $0xaaaaaaaa, RW2d; \
313 xorl RW2d, left##2d; \
314 xorl RW2d, right##2d; \
315 roll $1, left##2d; \
316 expand_to_64bits(right##2, RT3); \
317 expand_to_64bits(left##2, RT3);
318
319#define final_permutation3(left, right) \
320 compress_to_64bits(right##0); \
321 compress_to_64bits(left##0); \
322 movl right##0d, RW0d; \
323 rorl $1, left##0d; \
324 xorl left##0d, RW0d; \
325 andl $0xaaaaaaaa, RW0d; \
326 xorl RW0d, right##0d; \
327 xorl RW0d, left##0d; \
328 rorl $1, right##0d; \
329 compress_to_64bits(right##1); \
330 compress_to_64bits(left##1); \
331 movl right##1d, RW1d; \
332 rorl $1, left##1d; \
333 xorl left##1d, RW1d; \
334 andl $0xaaaaaaaa, RW1d; \
335 xorl RW1d, right##1d; \
336 xorl RW1d, left##1d; \
337 rorl $1, right##1d; \
338 compress_to_64bits(right##2); \
339 compress_to_64bits(left##2); \
340 movl right##2d, RW2d; \
341 rorl $1, left##2d; \
342 xorl left##2d, RW2d; \
343 andl $0xaaaaaaaa, RW2d; \
344 xorl RW2d, right##2d; \
345 xorl RW2d, left##2d; \
346 rorl $1, right##2d; \
347 \
348 do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
349 do_permutation(right##0d, left##0d, 2, 0x33333333); \
350 do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
351 do_permutation(right##1d, left##1d, 2, 0x33333333); \
352 do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
353 do_permutation(right##2d, left##2d, 2, 0x33333333); \
354 \
355 do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
356 do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
357 do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
358 do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
359 do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
360 do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f);
361
362#define round3(n, from, to, load_next_key, do_movq) \
363 xorq from##0, RW0; \
364 movzbl RW0bl, RT3d; \
365 movzbl RW0bh, RT1d; \
366 shrq $16, RW0; \
367 xorq s8(, RT3, 8), to##0; \
368 xorq s6(, RT1, 8), to##0; \
369 movzbl RW0bl, RT3d; \
370 movzbl RW0bh, RT1d; \
371 shrq $16, RW0; \
372 xorq s4(, RT3, 8), to##0; \
373 xorq s2(, RT1, 8), to##0; \
374 movzbl RW0bl, RT3d; \
375 movzbl RW0bh, RT1d; \
376 shrl $16, RW0d; \
377 xorq s7(, RT3, 8), to##0; \
378 xorq s5(, RT1, 8), to##0; \
379 movzbl RW0bl, RT3d; \
380 movzbl RW0bh, RT1d; \
381 load_next_key(n, RW0); \
382 xorq s3(, RT3, 8), to##0; \
383 xorq s1(, RT1, 8), to##0; \
384 xorq from##1, RW1; \
385 movzbl RW1bl, RT3d; \
386 movzbl RW1bh, RT1d; \
387 shrq $16, RW1; \
388 xorq s8(, RT3, 8), to##1; \
389 xorq s6(, RT1, 8), to##1; \
390 movzbl RW1bl, RT3d; \
391 movzbl RW1bh, RT1d; \
392 shrq $16, RW1; \
393 xorq s4(, RT3, 8), to##1; \
394 xorq s2(, RT1, 8), to##1; \
395 movzbl RW1bl, RT3d; \
396 movzbl RW1bh, RT1d; \
397 shrl $16, RW1d; \
398 xorq s7(, RT3, 8), to##1; \
399 xorq s5(, RT1, 8), to##1; \
400 movzbl RW1bl, RT3d; \
401 movzbl RW1bh, RT1d; \
402 do_movq(RW0, RW1); \
403 xorq s3(, RT3, 8), to##1; \
404 xorq s1(, RT1, 8), to##1; \
405 xorq from##2, RW2; \
406 movzbl RW2bl, RT3d; \
407 movzbl RW2bh, RT1d; \
408 shrq $16, RW2; \
409 xorq s8(, RT3, 8), to##2; \
410 xorq s6(, RT1, 8), to##2; \
411 movzbl RW2bl, RT3d; \
412 movzbl RW2bh, RT1d; \
413 shrq $16, RW2; \
414 xorq s4(, RT3, 8), to##2; \
415 xorq s2(, RT1, 8), to##2; \
416 movzbl RW2bl, RT3d; \
417 movzbl RW2bh, RT1d; \
418 shrl $16, RW2d; \
419 xorq s7(, RT3, 8), to##2; \
420 xorq s5(, RT1, 8), to##2; \
421 movzbl RW2bl, RT3d; \
422 movzbl RW2bh, RT1d; \
423 do_movq(RW0, RW2); \
424 xorq s3(, RT3, 8), to##2; \
425 xorq s1(, RT1, 8), to##2;
426
427#define __movq(src, dst) \
428 movq src, dst;
429
430ENTRY(des3_ede_x86_64_crypt_blk_3way)
431 /* input:
432 * %rdi: ctx, round keys
433 * %rsi: dst (3 blocks)
434 * %rdx: src (3 blocks)
435 */
436
437 pushq %rbx;
438 pushq %r12;
439 pushq %r13;
440 pushq %r14;
441 pushq %r15;
442
443 pushq %rsi /* dst */
444
445 /* load input */
446 movl 0 * 4(%rdx), RL0d;
447 movl 1 * 4(%rdx), RR0d;
448 movl 2 * 4(%rdx), RL1d;
449 movl 3 * 4(%rdx), RR1d;
450 movl 4 * 4(%rdx), RL2d;
451 movl 5 * 4(%rdx), RR2d;
452
453 bswapl RL0d;
454 bswapl RR0d;
455 bswapl RL1d;
456 bswapl RR1d;
457 bswapl RL2d;
458 bswapl RR2d;
459
460 initial_permutation3(RL, RR);
461
462 movq 0(CTX), RW0;
463 movq RW0, RW1;
464 movq RW0, RW2;
465
466 round3(0, RR, RL, load_next_key, __movq);
467 round3(1, RL, RR, load_next_key, __movq);
468 round3(2, RR, RL, load_next_key, __movq);
469 round3(3, RL, RR, load_next_key, __movq);
470 round3(4, RR, RL, load_next_key, __movq);
471 round3(5, RL, RR, load_next_key, __movq);
472 round3(6, RR, RL, load_next_key, __movq);
473 round3(7, RL, RR, load_next_key, __movq);
474 round3(8, RR, RL, load_next_key, __movq);
475 round3(9, RL, RR, load_next_key, __movq);
476 round3(10, RR, RL, load_next_key, __movq);
477 round3(11, RL, RR, load_next_key, __movq);
478 round3(12, RR, RL, load_next_key, __movq);
479 round3(13, RL, RR, load_next_key, __movq);
480 round3(14, RR, RL, load_next_key, __movq);
481 round3(15, RL, RR, load_next_key, __movq);
482
483 round3(16+0, RL, RR, load_next_key, __movq);
484 round3(16+1, RR, RL, load_next_key, __movq);
485 round3(16+2, RL, RR, load_next_key, __movq);
486 round3(16+3, RR, RL, load_next_key, __movq);
487 round3(16+4, RL, RR, load_next_key, __movq);
488 round3(16+5, RR, RL, load_next_key, __movq);
489 round3(16+6, RL, RR, load_next_key, __movq);
490 round3(16+7, RR, RL, load_next_key, __movq);
491 round3(16+8, RL, RR, load_next_key, __movq);
492 round3(16+9, RR, RL, load_next_key, __movq);
493 round3(16+10, RL, RR, load_next_key, __movq);
494 round3(16+11, RR, RL, load_next_key, __movq);
495 round3(16+12, RL, RR, load_next_key, __movq);
496 round3(16+13, RR, RL, load_next_key, __movq);
497 round3(16+14, RL, RR, load_next_key, __movq);
498 round3(16+15, RR, RL, load_next_key, __movq);
499
500 round3(32+0, RR, RL, load_next_key, __movq);
501 round3(32+1, RL, RR, load_next_key, __movq);
502 round3(32+2, RR, RL, load_next_key, __movq);
503 round3(32+3, RL, RR, load_next_key, __movq);
504 round3(32+4, RR, RL, load_next_key, __movq);
505 round3(32+5, RL, RR, load_next_key, __movq);
506 round3(32+6, RR, RL, load_next_key, __movq);
507 round3(32+7, RL, RR, load_next_key, __movq);
508 round3(32+8, RR, RL, load_next_key, __movq);
509 round3(32+9, RL, RR, load_next_key, __movq);
510 round3(32+10, RR, RL, load_next_key, __movq);
511 round3(32+11, RL, RR, load_next_key, __movq);
512 round3(32+12, RR, RL, load_next_key, __movq);
513 round3(32+13, RL, RR, load_next_key, __movq);
514 round3(32+14, RR, RL, load_next_key, __movq);
515 round3(32+15, RL, RR, dummy2, dummy2);
516
517 final_permutation3(RR, RL);
518
519 bswapl RR0d;
520 bswapl RL0d;
521 bswapl RR1d;
522 bswapl RL1d;
523 bswapl RR2d;
524 bswapl RL2d;
525
526 popq %rsi /* dst */
527 movl RR0d, 0 * 4(%rsi);
528 movl RL0d, 1 * 4(%rsi);
529 movl RR1d, 2 * 4(%rsi);
530 movl RL1d, 3 * 4(%rsi);
531 movl RR2d, 4 * 4(%rsi);
532 movl RL2d, 5 * 4(%rsi);
533
534 popq %r15;
535 popq %r14;
536 popq %r13;
537 popq %r12;
538 popq %rbx;
539
540 ret;
541ENDPROC(des3_ede_x86_64_crypt_blk_3way)
542
543.section .rodata, "a", @progbits
544.align 16
545.L_s1:
546 .quad 0x0010100001010400, 0x0000000000000000
547 .quad 0x0000100000010000, 0x0010100001010404
548 .quad 0x0010100001010004, 0x0000100000010404
549 .quad 0x0000000000000004, 0x0000100000010000
550 .quad 0x0000000000000400, 0x0010100001010400
551 .quad 0x0010100001010404, 0x0000000000000400
552 .quad 0x0010000001000404, 0x0010100001010004
553 .quad 0x0010000001000000, 0x0000000000000004
554 .quad 0x0000000000000404, 0x0010000001000400
555 .quad 0x0010000001000400, 0x0000100000010400
556 .quad 0x0000100000010400, 0x0010100001010000
557 .quad 0x0010100001010000, 0x0010000001000404
558 .quad 0x0000100000010004, 0x0010000001000004
559 .quad 0x0010000001000004, 0x0000100000010004
560 .quad 0x0000000000000000, 0x0000000000000404
561 .quad 0x0000100000010404, 0x0010000001000000
562 .quad 0x0000100000010000, 0x0010100001010404
563 .quad 0x0000000000000004, 0x0010100001010000
564 .quad 0x0010100001010400, 0x0010000001000000
565 .quad 0x0010000001000000, 0x0000000000000400
566 .quad 0x0010100001010004, 0x0000100000010000
567 .quad 0x0000100000010400, 0x0010000001000004
568 .quad 0x0000000000000400, 0x0000000000000004
569 .quad 0x0010000001000404, 0x0000100000010404
570 .quad 0x0010100001010404, 0x0000100000010004
571 .quad 0x0010100001010000, 0x0010000001000404
572 .quad 0x0010000001000004, 0x0000000000000404
573 .quad 0x0000100000010404, 0x0010100001010400
574 .quad 0x0000000000000404, 0x0010000001000400
575 .quad 0x0010000001000400, 0x0000000000000000
576 .quad 0x0000100000010004, 0x0000100000010400
577 .quad 0x0000000000000000, 0x0010100001010004
578.L_s2:
579 .quad 0x0801080200100020, 0x0800080000000000
580 .quad 0x0000080000000000, 0x0001080200100020
581 .quad 0x0001000000100000, 0x0000000200000020
582 .quad 0x0801000200100020, 0x0800080200000020
583 .quad 0x0800000200000020, 0x0801080200100020
584 .quad 0x0801080000100000, 0x0800000000000000
585 .quad 0x0800080000000000, 0x0001000000100000
586 .quad 0x0000000200000020, 0x0801000200100020
587 .quad 0x0001080000100000, 0x0001000200100020
588 .quad 0x0800080200000020, 0x0000000000000000
589 .quad 0x0800000000000000, 0x0000080000000000
590 .quad 0x0001080200100020, 0x0801000000100000
591 .quad 0x0001000200100020, 0x0800000200000020
592 .quad 0x0000000000000000, 0x0001080000100000
593 .quad 0x0000080200000020, 0x0801080000100000
594 .quad 0x0801000000100000, 0x0000080200000020
595 .quad 0x0000000000000000, 0x0001080200100020
596 .quad 0x0801000200100020, 0x0001000000100000
597 .quad 0x0800080200000020, 0x0801000000100000
598 .quad 0x0801080000100000, 0x0000080000000000
599 .quad 0x0801000000100000, 0x0800080000000000
600 .quad 0x0000000200000020, 0x0801080200100020
601 .quad 0x0001080200100020, 0x0000000200000020
602 .quad 0x0000080000000000, 0x0800000000000000
603 .quad 0x0000080200000020, 0x0801080000100000
604 .quad 0x0001000000100000, 0x0800000200000020
605 .quad 0x0001000200100020, 0x0800080200000020
606 .quad 0x0800000200000020, 0x0001000200100020
607 .quad 0x0001080000100000, 0x0000000000000000
608 .quad 0x0800080000000000, 0x0000080200000020
609 .quad 0x0800000000000000, 0x0801000200100020
610 .quad 0x0801080200100020, 0x0001080000100000
611.L_s3:
612 .quad 0x0000002000000208, 0x0000202008020200
613 .quad 0x0000000000000000, 0x0000200008020008
614 .quad 0x0000002008000200, 0x0000000000000000
615 .quad 0x0000202000020208, 0x0000002008000200
616 .quad 0x0000200000020008, 0x0000000008000008
617 .quad 0x0000000008000008, 0x0000200000020000
618 .quad 0x0000202008020208, 0x0000200000020008
619 .quad 0x0000200008020000, 0x0000002000000208
620 .quad 0x0000000008000000, 0x0000000000000008
621 .quad 0x0000202008020200, 0x0000002000000200
622 .quad 0x0000202000020200, 0x0000200008020000
623 .quad 0x0000200008020008, 0x0000202000020208
624 .quad 0x0000002008000208, 0x0000202000020200
625 .quad 0x0000200000020000, 0x0000002008000208
626 .quad 0x0000000000000008, 0x0000202008020208
627 .quad 0x0000002000000200, 0x0000000008000000
628 .quad 0x0000202008020200, 0x0000000008000000
629 .quad 0x0000200000020008, 0x0000002000000208
630 .quad 0x0000200000020000, 0x0000202008020200
631 .quad 0x0000002008000200, 0x0000000000000000
632 .quad 0x0000002000000200, 0x0000200000020008
633 .quad 0x0000202008020208, 0x0000002008000200
634 .quad 0x0000000008000008, 0x0000002000000200
635 .quad 0x0000000000000000, 0x0000200008020008
636 .quad 0x0000002008000208, 0x0000200000020000
637 .quad 0x0000000008000000, 0x0000202008020208
638 .quad 0x0000000000000008, 0x0000202000020208
639 .quad 0x0000202000020200, 0x0000000008000008
640 .quad 0x0000200008020000, 0x0000002008000208
641 .quad 0x0000002000000208, 0x0000200008020000
642 .quad 0x0000202000020208, 0x0000000000000008
643 .quad 0x0000200008020008, 0x0000202000020200
644.L_s4:
645 .quad 0x1008020000002001, 0x1000020800002001
646 .quad 0x1000020800002001, 0x0000000800000000
647 .quad 0x0008020800002000, 0x1008000800000001
648 .quad 0x1008000000000001, 0x1000020000002001
649 .quad 0x0000000000000000, 0x0008020000002000
650 .quad 0x0008020000002000, 0x1008020800002001
651 .quad 0x1000000800000001, 0x0000000000000000
652 .quad 0x0008000800000000, 0x1008000000000001
653 .quad 0x1000000000000001, 0x0000020000002000
654 .quad 0x0008000000000000, 0x1008020000002001
655 .quad 0x0000000800000000, 0x0008000000000000
656 .quad 0x1000020000002001, 0x0000020800002000
657 .quad 0x1008000800000001, 0x1000000000000001
658 .quad 0x0000020800002000, 0x0008000800000000
659 .quad 0x0000020000002000, 0x0008020800002000
660 .quad 0x1008020800002001, 0x1000000800000001
661 .quad 0x0008000800000000, 0x1008000000000001
662 .quad 0x0008020000002000, 0x1008020800002001
663 .quad 0x1000000800000001, 0x0000000000000000
664 .quad 0x0000000000000000, 0x0008020000002000
665 .quad 0x0000020800002000, 0x0008000800000000
666 .quad 0x1008000800000001, 0x1000000000000001
667 .quad 0x1008020000002001, 0x1000020800002001
668 .quad 0x1000020800002001, 0x0000000800000000
669 .quad 0x1008020800002001, 0x1000000800000001
670 .quad 0x1000000000000001, 0x0000020000002000
671 .quad 0x1008000000000001, 0x1000020000002001
672 .quad 0x0008020800002000, 0x1008000800000001
673 .quad 0x1000020000002001, 0x0000020800002000
674 .quad 0x0008000000000000, 0x1008020000002001
675 .quad 0x0000000800000000, 0x0008000000000000
676 .quad 0x0000020000002000, 0x0008020800002000
677.L_s5:
678 .quad 0x0000001000000100, 0x0020001002080100
679 .quad 0x0020000002080000, 0x0420001002000100
680 .quad 0x0000000000080000, 0x0000001000000100
681 .quad 0x0400000000000000, 0x0020000002080000
682 .quad 0x0400001000080100, 0x0000000000080000
683 .quad 0x0020001002000100, 0x0400001000080100
684 .quad 0x0420001002000100, 0x0420000002080000
685 .quad 0x0000001000080100, 0x0400000000000000
686 .quad 0x0020000002000000, 0x0400000000080000
687 .quad 0x0400000000080000, 0x0000000000000000
688 .quad 0x0400001000000100, 0x0420001002080100
689 .quad 0x0420001002080100, 0x0020001002000100
690 .quad 0x0420000002080000, 0x0400001000000100
691 .quad 0x0000000000000000, 0x0420000002000000
692 .quad 0x0020001002080100, 0x0020000002000000
693 .quad 0x0420000002000000, 0x0000001000080100
694 .quad 0x0000000000080000, 0x0420001002000100
695 .quad 0x0000001000000100, 0x0020000002000000
696 .quad 0x0400000000000000, 0x0020000002080000
697 .quad 0x0420001002000100, 0x0400001000080100
698 .quad 0x0020001002000100, 0x0400000000000000
699 .quad 0x0420000002080000, 0x0020001002080100
700 .quad 0x0400001000080100, 0x0000001000000100
701 .quad 0x0020000002000000, 0x0420000002080000
702 .quad 0x0420001002080100, 0x0000001000080100
703 .quad 0x0420000002000000, 0x0420001002080100
704 .quad 0x0020000002080000, 0x0000000000000000
705 .quad 0x0400000000080000, 0x0420000002000000
706 .quad 0x0000001000080100, 0x0020001002000100
707 .quad 0x0400001000000100, 0x0000000000080000
708 .quad 0x0000000000000000, 0x0400000000080000
709 .quad 0x0020001002080100, 0x0400001000000100
710.L_s6:
711 .quad 0x0200000120000010, 0x0204000020000000
712 .quad 0x0000040000000000, 0x0204040120000010
713 .quad 0x0204000020000000, 0x0000000100000010
714 .quad 0x0204040120000010, 0x0004000000000000
715 .quad 0x0200040020000000, 0x0004040100000010
716 .quad 0x0004000000000000, 0x0200000120000010
717 .quad 0x0004000100000010, 0x0200040020000000
718 .quad 0x0200000020000000, 0x0000040100000010
719 .quad 0x0000000000000000, 0x0004000100000010
720 .quad 0x0200040120000010, 0x0000040000000000
721 .quad 0x0004040000000000, 0x0200040120000010
722 .quad 0x0000000100000010, 0x0204000120000010
723 .quad 0x0204000120000010, 0x0000000000000000
724 .quad 0x0004040100000010, 0x0204040020000000
725 .quad 0x0000040100000010, 0x0004040000000000
726 .quad 0x0204040020000000, 0x0200000020000000
727 .quad 0x0200040020000000, 0x0000000100000010
728 .quad 0x0204000120000010, 0x0004040000000000
729 .quad 0x0204040120000010, 0x0004000000000000
730 .quad 0x0000040100000010, 0x0200000120000010
731 .quad 0x0004000000000000, 0x0200040020000000
732 .quad 0x0200000020000000, 0x0000040100000010
733 .quad 0x0200000120000010, 0x0204040120000010
734 .quad 0x0004040000000000, 0x0204000020000000
735 .quad 0x0004040100000010, 0x0204040020000000
736 .quad 0x0000000000000000, 0x0204000120000010
737 .quad 0x0000000100000010, 0x0000040000000000
738 .quad 0x0204000020000000, 0x0004040100000010
739 .quad 0x0000040000000000, 0x0004000100000010
740 .quad 0x0200040120000010, 0x0000000000000000
741 .quad 0x0204040020000000, 0x0200000020000000
742 .quad 0x0004000100000010, 0x0200040120000010
743.L_s7:
744 .quad 0x0002000000200000, 0x2002000004200002
745 .quad 0x2000000004000802, 0x0000000000000000
746 .quad 0x0000000000000800, 0x2000000004000802
747 .quad 0x2002000000200802, 0x0002000004200800
748 .quad 0x2002000004200802, 0x0002000000200000
749 .quad 0x0000000000000000, 0x2000000004000002
750 .quad 0x2000000000000002, 0x0000000004000000
751 .quad 0x2002000004200002, 0x2000000000000802
752 .quad 0x0000000004000800, 0x2002000000200802
753 .quad 0x2002000000200002, 0x0000000004000800
754 .quad 0x2000000004000002, 0x0002000004200000
755 .quad 0x0002000004200800, 0x2002000000200002
756 .quad 0x0002000004200000, 0x0000000000000800
757 .quad 0x2000000000000802, 0x2002000004200802
758 .quad 0x0002000000200800, 0x2000000000000002
759 .quad 0x0000000004000000, 0x0002000000200800
760 .quad 0x0000000004000000, 0x0002000000200800
761 .quad 0x0002000000200000, 0x2000000004000802
762 .quad 0x2000000004000802, 0x2002000004200002
763 .quad 0x2002000004200002, 0x2000000000000002
764 .quad 0x2002000000200002, 0x0000000004000000
765 .quad 0x0000000004000800, 0x0002000000200000
766 .quad 0x0002000004200800, 0x2000000000000802
767 .quad 0x2002000000200802, 0x0002000004200800
768 .quad 0x2000000000000802, 0x2000000004000002
769 .quad 0x2002000004200802, 0x0002000004200000
770 .quad 0x0002000000200800, 0x0000000000000000
771 .quad 0x2000000000000002, 0x2002000004200802
772 .quad 0x0000000000000000, 0x2002000000200802
773 .quad 0x0002000004200000, 0x0000000000000800
774 .quad 0x2000000004000002, 0x0000000004000800
775 .quad 0x0000000000000800, 0x2002000000200002
776.L_s8:
777 .quad 0x0100010410001000, 0x0000010000001000
778 .quad 0x0000000000040000, 0x0100010410041000
779 .quad 0x0100000010000000, 0x0100010410001000
780 .quad 0x0000000400000000, 0x0100000010000000
781 .quad 0x0000000400040000, 0x0100000010040000
782 .quad 0x0100010410041000, 0x0000010000041000
783 .quad 0x0100010010041000, 0x0000010400041000
784 .quad 0x0000010000001000, 0x0000000400000000
785 .quad 0x0100000010040000, 0x0100000410000000
786 .quad 0x0100010010001000, 0x0000010400001000
787 .quad 0x0000010000041000, 0x0000000400040000
788 .quad 0x0100000410040000, 0x0100010010041000
789 .quad 0x0000010400001000, 0x0000000000000000
790 .quad 0x0000000000000000, 0x0100000410040000
791 .quad 0x0100000410000000, 0x0100010010001000
792 .quad 0x0000010400041000, 0x0000000000040000
793 .quad 0x0000010400041000, 0x0000000000040000
794 .quad 0x0100010010041000, 0x0000010000001000
795 .quad 0x0000000400000000, 0x0100000410040000
796 .quad 0x0000010000001000, 0x0000010400041000
797 .quad 0x0100010010001000, 0x0000000400000000
798 .quad 0x0100000410000000, 0x0100000010040000
799 .quad 0x0100000410040000, 0x0100000010000000
800 .quad 0x0000000000040000, 0x0100010410001000
801 .quad 0x0000000000000000, 0x0100010410041000
802 .quad 0x0000000400040000, 0x0100000410000000
803 .quad 0x0100000010040000, 0x0100010010001000
804 .quad 0x0100010410001000, 0x0000000000000000
805 .quad 0x0100010410041000, 0x0000010000041000
806 .quad 0x0000010000041000, 0x0000010400001000
807 .quad 0x0000010400001000, 0x0000000400040000
808 .quad 0x0100000010000000, 0x0100010010041000