Loading...
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher
4 *
5 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
6 */
7
8#include <linux/linkage.h>
9
10.file "des3_ede-asm_64.S"
11.text
12
13#define s1 .L_s1
14#define s2 ((s1) + (64*8))
15#define s3 ((s2) + (64*8))
16#define s4 ((s3) + (64*8))
17#define s5 ((s4) + (64*8))
18#define s6 ((s5) + (64*8))
19#define s7 ((s6) + (64*8))
20#define s8 ((s7) + (64*8))
21
22/* register macros */
23#define CTX %rdi
24
25#define RL0 %r8
26#define RL1 %r9
27#define RL2 %r10
28
29#define RL0d %r8d
30#define RL1d %r9d
31#define RL2d %r10d
32
33#define RR0 %r11
34#define RR1 %r12
35#define RR2 %r13
36
37#define RR0d %r11d
38#define RR1d %r12d
39#define RR2d %r13d
40
41#define RW0 %rax
42#define RW1 %rbx
43#define RW2 %rcx
44
45#define RW0d %eax
46#define RW1d %ebx
47#define RW2d %ecx
48
49#define RW0bl %al
50#define RW1bl %bl
51#define RW2bl %cl
52
53#define RW0bh %ah
54#define RW1bh %bh
55#define RW2bh %ch
56
57#define RT0 %r15
58#define RT1 %rsi
59#define RT2 %r14
60#define RT3 %rdx
61
62#define RT0d %r15d
63#define RT1d %esi
64#define RT2d %r14d
65#define RT3d %edx
66
67/***********************************************************************
68 * 1-way 3DES
69 ***********************************************************************/
70#define do_permutation(a, b, offset, mask) \
71 movl a, RT0d; \
72 shrl $(offset), RT0d; \
73 xorl b, RT0d; \
74 andl $(mask), RT0d; \
75 xorl RT0d, b; \
76 shll $(offset), RT0d; \
77 xorl RT0d, a;
78
79#define expand_to_64bits(val, mask) \
80 movl val##d, RT0d; \
81 rorl $4, RT0d; \
82 shlq $32, RT0; \
83 orq RT0, val; \
84 andq mask, val;
85
86#define compress_to_64bits(val) \
87 movq val, RT0; \
88 shrq $32, RT0; \
89 roll $4, RT0d; \
90 orl RT0d, val##d;
91
92#define initial_permutation(left, right) \
93 do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \
94 do_permutation(left##d, right##d, 16, 0x0000ffff); \
95 do_permutation(right##d, left##d, 2, 0x33333333); \
96 do_permutation(right##d, left##d, 8, 0x00ff00ff); \
97 movabs $0x3f3f3f3f3f3f3f3f, RT3; \
98 movl left##d, RW0d; \
99 roll $1, right##d; \
100 xorl right##d, RW0d; \
101 andl $0xaaaaaaaa, RW0d; \
102 xorl RW0d, left##d; \
103 xorl RW0d, right##d; \
104 roll $1, left##d; \
105 expand_to_64bits(right, RT3); \
106 expand_to_64bits(left, RT3);
107
108#define final_permutation(left, right) \
109 compress_to_64bits(right); \
110 compress_to_64bits(left); \
111 movl right##d, RW0d; \
112 rorl $1, left##d; \
113 xorl left##d, RW0d; \
114 andl $0xaaaaaaaa, RW0d; \
115 xorl RW0d, right##d; \
116 xorl RW0d, left##d; \
117 rorl $1, right##d; \
118 do_permutation(right##d, left##d, 8, 0x00ff00ff); \
119 do_permutation(right##d, left##d, 2, 0x33333333); \
120 do_permutation(left##d, right##d, 16, 0x0000ffff); \
121 do_permutation(left##d, right##d, 4, 0x0f0f0f0f);
122
123#define round1(n, from, to, load_next_key) \
124 xorq from, RW0; \
125 \
126 movzbl RW0bl, RT0d; \
127 movzbl RW0bh, RT1d; \
128 shrq $16, RW0; \
129 movzbl RW0bl, RT2d; \
130 movzbl RW0bh, RT3d; \
131 shrq $16, RW0; \
132 movq s8(, RT0, 8), RT0; \
133 xorq s6(, RT1, 8), to; \
134 movzbl RW0bl, RL1d; \
135 movzbl RW0bh, RT1d; \
136 shrl $16, RW0d; \
137 xorq s4(, RT2, 8), RT0; \
138 xorq s2(, RT3, 8), to; \
139 movzbl RW0bl, RT2d; \
140 movzbl RW0bh, RT3d; \
141 xorq s7(, RL1, 8), RT0; \
142 xorq s5(, RT1, 8), to; \
143 xorq s3(, RT2, 8), RT0; \
144 load_next_key(n, RW0); \
145 xorq RT0, to; \
146 xorq s1(, RT3, 8), to; \
147
148#define load_next_key(n, RWx) \
149 movq (((n) + 1) * 8)(CTX), RWx;
150
151#define dummy2(a, b) /*_*/
152
153#define read_block(io, left, right) \
154 movl (io), left##d; \
155 movl 4(io), right##d; \
156 bswapl left##d; \
157 bswapl right##d;
158
159#define write_block(io, left, right) \
160 bswapl left##d; \
161 bswapl right##d; \
162 movl left##d, (io); \
163 movl right##d, 4(io);
164
165SYM_FUNC_START(des3_ede_x86_64_crypt_blk)
166 /* input:
167 * %rdi: round keys, CTX
168 * %rsi: dst
169 * %rdx: src
170 */
171 pushq %rbx;
172 pushq %r12;
173 pushq %r13;
174 pushq %r14;
175 pushq %r15;
176
177 pushq %rsi; /* dst */
178
179 read_block(%rdx, RL0, RR0);
180 initial_permutation(RL0, RR0);
181
182 movq (CTX), RW0;
183
184 round1(0, RR0, RL0, load_next_key);
185 round1(1, RL0, RR0, load_next_key);
186 round1(2, RR0, RL0, load_next_key);
187 round1(3, RL0, RR0, load_next_key);
188 round1(4, RR0, RL0, load_next_key);
189 round1(5, RL0, RR0, load_next_key);
190 round1(6, RR0, RL0, load_next_key);
191 round1(7, RL0, RR0, load_next_key);
192 round1(8, RR0, RL0, load_next_key);
193 round1(9, RL0, RR0, load_next_key);
194 round1(10, RR0, RL0, load_next_key);
195 round1(11, RL0, RR0, load_next_key);
196 round1(12, RR0, RL0, load_next_key);
197 round1(13, RL0, RR0, load_next_key);
198 round1(14, RR0, RL0, load_next_key);
199 round1(15, RL0, RR0, load_next_key);
200
201 round1(16+0, RL0, RR0, load_next_key);
202 round1(16+1, RR0, RL0, load_next_key);
203 round1(16+2, RL0, RR0, load_next_key);
204 round1(16+3, RR0, RL0, load_next_key);
205 round1(16+4, RL0, RR0, load_next_key);
206 round1(16+5, RR0, RL0, load_next_key);
207 round1(16+6, RL0, RR0, load_next_key);
208 round1(16+7, RR0, RL0, load_next_key);
209 round1(16+8, RL0, RR0, load_next_key);
210 round1(16+9, RR0, RL0, load_next_key);
211 round1(16+10, RL0, RR0, load_next_key);
212 round1(16+11, RR0, RL0, load_next_key);
213 round1(16+12, RL0, RR0, load_next_key);
214 round1(16+13, RR0, RL0, load_next_key);
215 round1(16+14, RL0, RR0, load_next_key);
216 round1(16+15, RR0, RL0, load_next_key);
217
218 round1(32+0, RR0, RL0, load_next_key);
219 round1(32+1, RL0, RR0, load_next_key);
220 round1(32+2, RR0, RL0, load_next_key);
221 round1(32+3, RL0, RR0, load_next_key);
222 round1(32+4, RR0, RL0, load_next_key);
223 round1(32+5, RL0, RR0, load_next_key);
224 round1(32+6, RR0, RL0, load_next_key);
225 round1(32+7, RL0, RR0, load_next_key);
226 round1(32+8, RR0, RL0, load_next_key);
227 round1(32+9, RL0, RR0, load_next_key);
228 round1(32+10, RR0, RL0, load_next_key);
229 round1(32+11, RL0, RR0, load_next_key);
230 round1(32+12, RR0, RL0, load_next_key);
231 round1(32+13, RL0, RR0, load_next_key);
232 round1(32+14, RR0, RL0, load_next_key);
233 round1(32+15, RL0, RR0, dummy2);
234
235 final_permutation(RR0, RL0);
236
237 popq %rsi /* dst */
238 write_block(%rsi, RR0, RL0);
239
240 popq %r15;
241 popq %r14;
242 popq %r13;
243 popq %r12;
244 popq %rbx;
245
246 RET;
247SYM_FUNC_END(des3_ede_x86_64_crypt_blk)
248
249/***********************************************************************
250 * 3-way 3DES
251 ***********************************************************************/
252#define expand_to_64bits(val, mask) \
253 movl val##d, RT0d; \
254 rorl $4, RT0d; \
255 shlq $32, RT0; \
256 orq RT0, val; \
257 andq mask, val;
258
259#define compress_to_64bits(val) \
260 movq val, RT0; \
261 shrq $32, RT0; \
262 roll $4, RT0d; \
263 orl RT0d, val##d;
264
265#define initial_permutation3(left, right) \
266 do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
267 do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
268 do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
269 do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
270 do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \
271 do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
272 \
273 do_permutation(right##0d, left##0d, 2, 0x33333333); \
274 do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
275 do_permutation(right##1d, left##1d, 2, 0x33333333); \
276 do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
277 do_permutation(right##2d, left##2d, 2, 0x33333333); \
278 do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
279 \
280 movabs $0x3f3f3f3f3f3f3f3f, RT3; \
281 \
282 movl left##0d, RW0d; \
283 roll $1, right##0d; \
284 xorl right##0d, RW0d; \
285 andl $0xaaaaaaaa, RW0d; \
286 xorl RW0d, left##0d; \
287 xorl RW0d, right##0d; \
288 roll $1, left##0d; \
289 expand_to_64bits(right##0, RT3); \
290 expand_to_64bits(left##0, RT3); \
291 movl left##1d, RW1d; \
292 roll $1, right##1d; \
293 xorl right##1d, RW1d; \
294 andl $0xaaaaaaaa, RW1d; \
295 xorl RW1d, left##1d; \
296 xorl RW1d, right##1d; \
297 roll $1, left##1d; \
298 expand_to_64bits(right##1, RT3); \
299 expand_to_64bits(left##1, RT3); \
300 movl left##2d, RW2d; \
301 roll $1, right##2d; \
302 xorl right##2d, RW2d; \
303 andl $0xaaaaaaaa, RW2d; \
304 xorl RW2d, left##2d; \
305 xorl RW2d, right##2d; \
306 roll $1, left##2d; \
307 expand_to_64bits(right##2, RT3); \
308 expand_to_64bits(left##2, RT3);
309
310#define final_permutation3(left, right) \
311 compress_to_64bits(right##0); \
312 compress_to_64bits(left##0); \
313 movl right##0d, RW0d; \
314 rorl $1, left##0d; \
315 xorl left##0d, RW0d; \
316 andl $0xaaaaaaaa, RW0d; \
317 xorl RW0d, right##0d; \
318 xorl RW0d, left##0d; \
319 rorl $1, right##0d; \
320 compress_to_64bits(right##1); \
321 compress_to_64bits(left##1); \
322 movl right##1d, RW1d; \
323 rorl $1, left##1d; \
324 xorl left##1d, RW1d; \
325 andl $0xaaaaaaaa, RW1d; \
326 xorl RW1d, right##1d; \
327 xorl RW1d, left##1d; \
328 rorl $1, right##1d; \
329 compress_to_64bits(right##2); \
330 compress_to_64bits(left##2); \
331 movl right##2d, RW2d; \
332 rorl $1, left##2d; \
333 xorl left##2d, RW2d; \
334 andl $0xaaaaaaaa, RW2d; \
335 xorl RW2d, right##2d; \
336 xorl RW2d, left##2d; \
337 rorl $1, right##2d; \
338 \
339 do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
340 do_permutation(right##0d, left##0d, 2, 0x33333333); \
341 do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
342 do_permutation(right##1d, left##1d, 2, 0x33333333); \
343 do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
344 do_permutation(right##2d, left##2d, 2, 0x33333333); \
345 \
346 do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
347 do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
348 do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
349 do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
350 do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
351 do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f);
352
353#define round3(n, from, to, load_next_key, do_movq) \
354 xorq from##0, RW0; \
355 movzbl RW0bl, RT3d; \
356 movzbl RW0bh, RT1d; \
357 shrq $16, RW0; \
358 xorq s8(, RT3, 8), to##0; \
359 xorq s6(, RT1, 8), to##0; \
360 movzbl RW0bl, RT3d; \
361 movzbl RW0bh, RT1d; \
362 shrq $16, RW0; \
363 xorq s4(, RT3, 8), to##0; \
364 xorq s2(, RT1, 8), to##0; \
365 movzbl RW0bl, RT3d; \
366 movzbl RW0bh, RT1d; \
367 shrl $16, RW0d; \
368 xorq s7(, RT3, 8), to##0; \
369 xorq s5(, RT1, 8), to##0; \
370 movzbl RW0bl, RT3d; \
371 movzbl RW0bh, RT1d; \
372 load_next_key(n, RW0); \
373 xorq s3(, RT3, 8), to##0; \
374 xorq s1(, RT1, 8), to##0; \
375 xorq from##1, RW1; \
376 movzbl RW1bl, RT3d; \
377 movzbl RW1bh, RT1d; \
378 shrq $16, RW1; \
379 xorq s8(, RT3, 8), to##1; \
380 xorq s6(, RT1, 8), to##1; \
381 movzbl RW1bl, RT3d; \
382 movzbl RW1bh, RT1d; \
383 shrq $16, RW1; \
384 xorq s4(, RT3, 8), to##1; \
385 xorq s2(, RT1, 8), to##1; \
386 movzbl RW1bl, RT3d; \
387 movzbl RW1bh, RT1d; \
388 shrl $16, RW1d; \
389 xorq s7(, RT3, 8), to##1; \
390 xorq s5(, RT1, 8), to##1; \
391 movzbl RW1bl, RT3d; \
392 movzbl RW1bh, RT1d; \
393 do_movq(RW0, RW1); \
394 xorq s3(, RT3, 8), to##1; \
395 xorq s1(, RT1, 8), to##1; \
396 xorq from##2, RW2; \
397 movzbl RW2bl, RT3d; \
398 movzbl RW2bh, RT1d; \
399 shrq $16, RW2; \
400 xorq s8(, RT3, 8), to##2; \
401 xorq s6(, RT1, 8), to##2; \
402 movzbl RW2bl, RT3d; \
403 movzbl RW2bh, RT1d; \
404 shrq $16, RW2; \
405 xorq s4(, RT3, 8), to##2; \
406 xorq s2(, RT1, 8), to##2; \
407 movzbl RW2bl, RT3d; \
408 movzbl RW2bh, RT1d; \
409 shrl $16, RW2d; \
410 xorq s7(, RT3, 8), to##2; \
411 xorq s5(, RT1, 8), to##2; \
412 movzbl RW2bl, RT3d; \
413 movzbl RW2bh, RT1d; \
414 do_movq(RW0, RW2); \
415 xorq s3(, RT3, 8), to##2; \
416 xorq s1(, RT1, 8), to##2;
417
418#define __movq(src, dst) \
419 movq src, dst;
420
421SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way)
422 /* input:
423 * %rdi: ctx, round keys
424 * %rsi: dst (3 blocks)
425 * %rdx: src (3 blocks)
426 */
427
428 pushq %rbx;
429 pushq %r12;
430 pushq %r13;
431 pushq %r14;
432 pushq %r15;
433
434 pushq %rsi /* dst */
435
436 /* load input */
437 movl 0 * 4(%rdx), RL0d;
438 movl 1 * 4(%rdx), RR0d;
439 movl 2 * 4(%rdx), RL1d;
440 movl 3 * 4(%rdx), RR1d;
441 movl 4 * 4(%rdx), RL2d;
442 movl 5 * 4(%rdx), RR2d;
443
444 bswapl RL0d;
445 bswapl RR0d;
446 bswapl RL1d;
447 bswapl RR1d;
448 bswapl RL2d;
449 bswapl RR2d;
450
451 initial_permutation3(RL, RR);
452
453 movq 0(CTX), RW0;
454 movq RW0, RW1;
455 movq RW0, RW2;
456
457 round3(0, RR, RL, load_next_key, __movq);
458 round3(1, RL, RR, load_next_key, __movq);
459 round3(2, RR, RL, load_next_key, __movq);
460 round3(3, RL, RR, load_next_key, __movq);
461 round3(4, RR, RL, load_next_key, __movq);
462 round3(5, RL, RR, load_next_key, __movq);
463 round3(6, RR, RL, load_next_key, __movq);
464 round3(7, RL, RR, load_next_key, __movq);
465 round3(8, RR, RL, load_next_key, __movq);
466 round3(9, RL, RR, load_next_key, __movq);
467 round3(10, RR, RL, load_next_key, __movq);
468 round3(11, RL, RR, load_next_key, __movq);
469 round3(12, RR, RL, load_next_key, __movq);
470 round3(13, RL, RR, load_next_key, __movq);
471 round3(14, RR, RL, load_next_key, __movq);
472 round3(15, RL, RR, load_next_key, __movq);
473
474 round3(16+0, RL, RR, load_next_key, __movq);
475 round3(16+1, RR, RL, load_next_key, __movq);
476 round3(16+2, RL, RR, load_next_key, __movq);
477 round3(16+3, RR, RL, load_next_key, __movq);
478 round3(16+4, RL, RR, load_next_key, __movq);
479 round3(16+5, RR, RL, load_next_key, __movq);
480 round3(16+6, RL, RR, load_next_key, __movq);
481 round3(16+7, RR, RL, load_next_key, __movq);
482 round3(16+8, RL, RR, load_next_key, __movq);
483 round3(16+9, RR, RL, load_next_key, __movq);
484 round3(16+10, RL, RR, load_next_key, __movq);
485 round3(16+11, RR, RL, load_next_key, __movq);
486 round3(16+12, RL, RR, load_next_key, __movq);
487 round3(16+13, RR, RL, load_next_key, __movq);
488 round3(16+14, RL, RR, load_next_key, __movq);
489 round3(16+15, RR, RL, load_next_key, __movq);
490
491 round3(32+0, RR, RL, load_next_key, __movq);
492 round3(32+1, RL, RR, load_next_key, __movq);
493 round3(32+2, RR, RL, load_next_key, __movq);
494 round3(32+3, RL, RR, load_next_key, __movq);
495 round3(32+4, RR, RL, load_next_key, __movq);
496 round3(32+5, RL, RR, load_next_key, __movq);
497 round3(32+6, RR, RL, load_next_key, __movq);
498 round3(32+7, RL, RR, load_next_key, __movq);
499 round3(32+8, RR, RL, load_next_key, __movq);
500 round3(32+9, RL, RR, load_next_key, __movq);
501 round3(32+10, RR, RL, load_next_key, __movq);
502 round3(32+11, RL, RR, load_next_key, __movq);
503 round3(32+12, RR, RL, load_next_key, __movq);
504 round3(32+13, RL, RR, load_next_key, __movq);
505 round3(32+14, RR, RL, load_next_key, __movq);
506 round3(32+15, RL, RR, dummy2, dummy2);
507
508 final_permutation3(RR, RL);
509
510 bswapl RR0d;
511 bswapl RL0d;
512 bswapl RR1d;
513 bswapl RL1d;
514 bswapl RR2d;
515 bswapl RL2d;
516
517 popq %rsi /* dst */
518 movl RR0d, 0 * 4(%rsi);
519 movl RL0d, 1 * 4(%rsi);
520 movl RR1d, 2 * 4(%rsi);
521 movl RL1d, 3 * 4(%rsi);
522 movl RR2d, 4 * 4(%rsi);
523 movl RL2d, 5 * 4(%rsi);
524
525 popq %r15;
526 popq %r14;
527 popq %r13;
528 popq %r12;
529 popq %rbx;
530
531 RET;
532SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
533
534.section .rodata, "a", @progbits
535.align 16
536.L_s1:
537 .quad 0x0010100001010400, 0x0000000000000000
538 .quad 0x0000100000010000, 0x0010100001010404
539 .quad 0x0010100001010004, 0x0000100000010404
540 .quad 0x0000000000000004, 0x0000100000010000
541 .quad 0x0000000000000400, 0x0010100001010400
542 .quad 0x0010100001010404, 0x0000000000000400
543 .quad 0x0010000001000404, 0x0010100001010004
544 .quad 0x0010000001000000, 0x0000000000000004
545 .quad 0x0000000000000404, 0x0010000001000400
546 .quad 0x0010000001000400, 0x0000100000010400
547 .quad 0x0000100000010400, 0x0010100001010000
548 .quad 0x0010100001010000, 0x0010000001000404
549 .quad 0x0000100000010004, 0x0010000001000004
550 .quad 0x0010000001000004, 0x0000100000010004
551 .quad 0x0000000000000000, 0x0000000000000404
552 .quad 0x0000100000010404, 0x0010000001000000
553 .quad 0x0000100000010000, 0x0010100001010404
554 .quad 0x0000000000000004, 0x0010100001010000
555 .quad 0x0010100001010400, 0x0010000001000000
556 .quad 0x0010000001000000, 0x0000000000000400
557 .quad 0x0010100001010004, 0x0000100000010000
558 .quad 0x0000100000010400, 0x0010000001000004
559 .quad 0x0000000000000400, 0x0000000000000004
560 .quad 0x0010000001000404, 0x0000100000010404
561 .quad 0x0010100001010404, 0x0000100000010004
562 .quad 0x0010100001010000, 0x0010000001000404
563 .quad 0x0010000001000004, 0x0000000000000404
564 .quad 0x0000100000010404, 0x0010100001010400
565 .quad 0x0000000000000404, 0x0010000001000400
566 .quad 0x0010000001000400, 0x0000000000000000
567 .quad 0x0000100000010004, 0x0000100000010400
568 .quad 0x0000000000000000, 0x0010100001010004
569.L_s2:
570 .quad 0x0801080200100020, 0x0800080000000000
571 .quad 0x0000080000000000, 0x0001080200100020
572 .quad 0x0001000000100000, 0x0000000200000020
573 .quad 0x0801000200100020, 0x0800080200000020
574 .quad 0x0800000200000020, 0x0801080200100020
575 .quad 0x0801080000100000, 0x0800000000000000
576 .quad 0x0800080000000000, 0x0001000000100000
577 .quad 0x0000000200000020, 0x0801000200100020
578 .quad 0x0001080000100000, 0x0001000200100020
579 .quad 0x0800080200000020, 0x0000000000000000
580 .quad 0x0800000000000000, 0x0000080000000000
581 .quad 0x0001080200100020, 0x0801000000100000
582 .quad 0x0001000200100020, 0x0800000200000020
583 .quad 0x0000000000000000, 0x0001080000100000
584 .quad 0x0000080200000020, 0x0801080000100000
585 .quad 0x0801000000100000, 0x0000080200000020
586 .quad 0x0000000000000000, 0x0001080200100020
587 .quad 0x0801000200100020, 0x0001000000100000
588 .quad 0x0800080200000020, 0x0801000000100000
589 .quad 0x0801080000100000, 0x0000080000000000
590 .quad 0x0801000000100000, 0x0800080000000000
591 .quad 0x0000000200000020, 0x0801080200100020
592 .quad 0x0001080200100020, 0x0000000200000020
593 .quad 0x0000080000000000, 0x0800000000000000
594 .quad 0x0000080200000020, 0x0801080000100000
595 .quad 0x0001000000100000, 0x0800000200000020
596 .quad 0x0001000200100020, 0x0800080200000020
597 .quad 0x0800000200000020, 0x0001000200100020
598 .quad 0x0001080000100000, 0x0000000000000000
599 .quad 0x0800080000000000, 0x0000080200000020
600 .quad 0x0800000000000000, 0x0801000200100020
601 .quad 0x0801080200100020, 0x0001080000100000
602.L_s3:
603 .quad 0x0000002000000208, 0x0000202008020200
604 .quad 0x0000000000000000, 0x0000200008020008
605 .quad 0x0000002008000200, 0x0000000000000000
606 .quad 0x0000202000020208, 0x0000002008000200
607 .quad 0x0000200000020008, 0x0000000008000008
608 .quad 0x0000000008000008, 0x0000200000020000
609 .quad 0x0000202008020208, 0x0000200000020008
610 .quad 0x0000200008020000, 0x0000002000000208
611 .quad 0x0000000008000000, 0x0000000000000008
612 .quad 0x0000202008020200, 0x0000002000000200
613 .quad 0x0000202000020200, 0x0000200008020000
614 .quad 0x0000200008020008, 0x0000202000020208
615 .quad 0x0000002008000208, 0x0000202000020200
616 .quad 0x0000200000020000, 0x0000002008000208
617 .quad 0x0000000000000008, 0x0000202008020208
618 .quad 0x0000002000000200, 0x0000000008000000
619 .quad 0x0000202008020200, 0x0000000008000000
620 .quad 0x0000200000020008, 0x0000002000000208
621 .quad 0x0000200000020000, 0x0000202008020200
622 .quad 0x0000002008000200, 0x0000000000000000
623 .quad 0x0000002000000200, 0x0000200000020008
624 .quad 0x0000202008020208, 0x0000002008000200
625 .quad 0x0000000008000008, 0x0000002000000200
626 .quad 0x0000000000000000, 0x0000200008020008
627 .quad 0x0000002008000208, 0x0000200000020000
628 .quad 0x0000000008000000, 0x0000202008020208
629 .quad 0x0000000000000008, 0x0000202000020208
630 .quad 0x0000202000020200, 0x0000000008000008
631 .quad 0x0000200008020000, 0x0000002008000208
632 .quad 0x0000002000000208, 0x0000200008020000
633 .quad 0x0000202000020208, 0x0000000000000008
634 .quad 0x0000200008020008, 0x0000202000020200
635.L_s4:
636 .quad 0x1008020000002001, 0x1000020800002001
637 .quad 0x1000020800002001, 0x0000000800000000
638 .quad 0x0008020800002000, 0x1008000800000001
639 .quad 0x1008000000000001, 0x1000020000002001
640 .quad 0x0000000000000000, 0x0008020000002000
641 .quad 0x0008020000002000, 0x1008020800002001
642 .quad 0x1000000800000001, 0x0000000000000000
643 .quad 0x0008000800000000, 0x1008000000000001
644 .quad 0x1000000000000001, 0x0000020000002000
645 .quad 0x0008000000000000, 0x1008020000002001
646 .quad 0x0000000800000000, 0x0008000000000000
647 .quad 0x1000020000002001, 0x0000020800002000
648 .quad 0x1008000800000001, 0x1000000000000001
649 .quad 0x0000020800002000, 0x0008000800000000
650 .quad 0x0000020000002000, 0x0008020800002000
651 .quad 0x1008020800002001, 0x1000000800000001
652 .quad 0x0008000800000000, 0x1008000000000001
653 .quad 0x0008020000002000, 0x1008020800002001
654 .quad 0x1000000800000001, 0x0000000000000000
655 .quad 0x0000000000000000, 0x0008020000002000
656 .quad 0x0000020800002000, 0x0008000800000000
657 .quad 0x1008000800000001, 0x1000000000000001
658 .quad 0x1008020000002001, 0x1000020800002001
659 .quad 0x1000020800002001, 0x0000000800000000
660 .quad 0x1008020800002001, 0x1000000800000001
661 .quad 0x1000000000000001, 0x0000020000002000
662 .quad 0x1008000000000001, 0x1000020000002001
663 .quad 0x0008020800002000, 0x1008000800000001
664 .quad 0x1000020000002001, 0x0000020800002000
665 .quad 0x0008000000000000, 0x1008020000002001
666 .quad 0x0000000800000000, 0x0008000000000000
667 .quad 0x0000020000002000, 0x0008020800002000
668.L_s5:
669 .quad 0x0000001000000100, 0x0020001002080100
670 .quad 0x0020000002080000, 0x0420001002000100
671 .quad 0x0000000000080000, 0x0000001000000100
672 .quad 0x0400000000000000, 0x0020000002080000
673 .quad 0x0400001000080100, 0x0000000000080000
674 .quad 0x0020001002000100, 0x0400001000080100
675 .quad 0x0420001002000100, 0x0420000002080000
676 .quad 0x0000001000080100, 0x0400000000000000
677 .quad 0x0020000002000000, 0x0400000000080000
678 .quad 0x0400000000080000, 0x0000000000000000
679 .quad 0x0400001000000100, 0x0420001002080100
680 .quad 0x0420001002080100, 0x0020001002000100
681 .quad 0x0420000002080000, 0x0400001000000100
682 .quad 0x0000000000000000, 0x0420000002000000
683 .quad 0x0020001002080100, 0x0020000002000000
684 .quad 0x0420000002000000, 0x0000001000080100
685 .quad 0x0000000000080000, 0x0420001002000100
686 .quad 0x0000001000000100, 0x0020000002000000
687 .quad 0x0400000000000000, 0x0020000002080000
688 .quad 0x0420001002000100, 0x0400001000080100
689 .quad 0x0020001002000100, 0x0400000000000000
690 .quad 0x0420000002080000, 0x0020001002080100
691 .quad 0x0400001000080100, 0x0000001000000100
692 .quad 0x0020000002000000, 0x0420000002080000
693 .quad 0x0420001002080100, 0x0000001000080100
694 .quad 0x0420000002000000, 0x0420001002080100
695 .quad 0x0020000002080000, 0x0000000000000000
696 .quad 0x0400000000080000, 0x0420000002000000
697 .quad 0x0000001000080100, 0x0020001002000100
698 .quad 0x0400001000000100, 0x0000000000080000
699 .quad 0x0000000000000000, 0x0400000000080000
700 .quad 0x0020001002080100, 0x0400001000000100
701.L_s6:
702 .quad 0x0200000120000010, 0x0204000020000000
703 .quad 0x0000040000000000, 0x0204040120000010
704 .quad 0x0204000020000000, 0x0000000100000010
705 .quad 0x0204040120000010, 0x0004000000000000
706 .quad 0x0200040020000000, 0x0004040100000010
707 .quad 0x0004000000000000, 0x0200000120000010
708 .quad 0x0004000100000010, 0x0200040020000000
709 .quad 0x0200000020000000, 0x0000040100000010
710 .quad 0x0000000000000000, 0x0004000100000010
711 .quad 0x0200040120000010, 0x0000040000000000
712 .quad 0x0004040000000000, 0x0200040120000010
713 .quad 0x0000000100000010, 0x0204000120000010
714 .quad 0x0204000120000010, 0x0000000000000000
715 .quad 0x0004040100000010, 0x0204040020000000
716 .quad 0x0000040100000010, 0x0004040000000000
717 .quad 0x0204040020000000, 0x0200000020000000
718 .quad 0x0200040020000000, 0x0000000100000010
719 .quad 0x0204000120000010, 0x0004040000000000
720 .quad 0x0204040120000010, 0x0004000000000000
721 .quad 0x0000040100000010, 0x0200000120000010
722 .quad 0x0004000000000000, 0x0200040020000000
723 .quad 0x0200000020000000, 0x0000040100000010
724 .quad 0x0200000120000010, 0x0204040120000010
725 .quad 0x0004040000000000, 0x0204000020000000
726 .quad 0x0004040100000010, 0x0204040020000000
727 .quad 0x0000000000000000, 0x0204000120000010
728 .quad 0x0000000100000010, 0x0000040000000000
729 .quad 0x0204000020000000, 0x0004040100000010
730 .quad 0x0000040000000000, 0x0004000100000010
731 .quad 0x0200040120000010, 0x0000000000000000
732 .quad 0x0204040020000000, 0x0200000020000000
733 .quad 0x0004000100000010, 0x0200040120000010
734.L_s7:
735 .quad 0x0002000000200000, 0x2002000004200002
736 .quad 0x2000000004000802, 0x0000000000000000
737 .quad 0x0000000000000800, 0x2000000004000802
738 .quad 0x2002000000200802, 0x0002000004200800
739 .quad 0x2002000004200802, 0x0002000000200000
740 .quad 0x0000000000000000, 0x2000000004000002
741 .quad 0x2000000000000002, 0x0000000004000000
742 .quad 0x2002000004200002, 0x2000000000000802
743 .quad 0x0000000004000800, 0x2002000000200802
744 .quad 0x2002000000200002, 0x0000000004000800
745 .quad 0x2000000004000002, 0x0002000004200000
746 .quad 0x0002000004200800, 0x2002000000200002
747 .quad 0x0002000004200000, 0x0000000000000800
748 .quad 0x2000000000000802, 0x2002000004200802
749 .quad 0x0002000000200800, 0x2000000000000002
750 .quad 0x0000000004000000, 0x0002000000200800
751 .quad 0x0000000004000000, 0x0002000000200800
752 .quad 0x0002000000200000, 0x2000000004000802
753 .quad 0x2000000004000802, 0x2002000004200002
754 .quad 0x2002000004200002, 0x2000000000000002
755 .quad 0x2002000000200002, 0x0000000004000000
756 .quad 0x0000000004000800, 0x0002000000200000
757 .quad 0x0002000004200800, 0x2000000000000802
758 .quad 0x2002000000200802, 0x0002000004200800
759 .quad 0x2000000000000802, 0x2000000004000002
760 .quad 0x2002000004200802, 0x0002000004200000
761 .quad 0x0002000000200800, 0x0000000000000000
762 .quad 0x2000000000000002, 0x2002000004200802
763 .quad 0x0000000000000000, 0x2002000000200802
764 .quad 0x0002000004200000, 0x0000000000000800
765 .quad 0x2000000004000002, 0x0000000004000800
766 .quad 0x0000000000000800, 0x2002000000200002
767.L_s8:
768 .quad 0x0100010410001000, 0x0000010000001000
769 .quad 0x0000000000040000, 0x0100010410041000
770 .quad 0x0100000010000000, 0x0100010410001000
771 .quad 0x0000000400000000, 0x0100000010000000
772 .quad 0x0000000400040000, 0x0100000010040000
773 .quad 0x0100010410041000, 0x0000010000041000
774 .quad 0x0100010010041000, 0x0000010400041000
775 .quad 0x0000010000001000, 0x0000000400000000
776 .quad 0x0100000010040000, 0x0100000410000000
777 .quad 0x0100010010001000, 0x0000010400001000
778 .quad 0x0000010000041000, 0x0000000400040000
779 .quad 0x0100000410040000, 0x0100010010041000
780 .quad 0x0000010400001000, 0x0000000000000000
781 .quad 0x0000000000000000, 0x0100000410040000
782 .quad 0x0100000410000000, 0x0100010010001000
783 .quad 0x0000010400041000, 0x0000000000040000
784 .quad 0x0000010400041000, 0x0000000000040000
785 .quad 0x0100010010041000, 0x0000010000001000
786 .quad 0x0000000400000000, 0x0100000410040000
787 .quad 0x0000010000001000, 0x0000010400041000
788 .quad 0x0100010010001000, 0x0000000400000000
789 .quad 0x0100000410000000, 0x0100000010040000
790 .quad 0x0100000410040000, 0x0100000010000000
791 .quad 0x0000000000040000, 0x0100010410001000
792 .quad 0x0000000000000000, 0x0100010410041000
793 .quad 0x0000000400040000, 0x0100000410000000
794 .quad 0x0100000010040000, 0x0100010010001000
795 .quad 0x0100010410001000, 0x0000000000000000
796 .quad 0x0100010410041000, 0x0000010000041000
797 .quad 0x0000010000041000, 0x0000010400001000
798 .quad 0x0000010400001000, 0x0000000400040000
799 .quad 0x0100000010000000, 0x0100010010041000
1/*
2 * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher
3 *
4 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 */
16
17#include <linux/linkage.h>
18
19.file "des3_ede-asm_64.S"
20.text
21
22#define s1 .L_s1
23#define s2 ((s1) + (64*8))
24#define s3 ((s2) + (64*8))
25#define s4 ((s3) + (64*8))
26#define s5 ((s4) + (64*8))
27#define s6 ((s5) + (64*8))
28#define s7 ((s6) + (64*8))
29#define s8 ((s7) + (64*8))
30
31/* register macros */
32#define CTX %rdi
33
34#define RL0 %r8
35#define RL1 %r9
36#define RL2 %r10
37
38#define RL0d %r8d
39#define RL1d %r9d
40#define RL2d %r10d
41
42#define RR0 %r11
43#define RR1 %r12
44#define RR2 %r13
45
46#define RR0d %r11d
47#define RR1d %r12d
48#define RR2d %r13d
49
50#define RW0 %rax
51#define RW1 %rbx
52#define RW2 %rcx
53
54#define RW0d %eax
55#define RW1d %ebx
56#define RW2d %ecx
57
58#define RW0bl %al
59#define RW1bl %bl
60#define RW2bl %cl
61
62#define RW0bh %ah
63#define RW1bh %bh
64#define RW2bh %ch
65
66#define RT0 %r15
67#define RT1 %rbp
68#define RT2 %r14
69#define RT3 %rdx
70
71#define RT0d %r15d
72#define RT1d %ebp
73#define RT2d %r14d
74#define RT3d %edx
75
76/***********************************************************************
77 * 1-way 3DES
78 ***********************************************************************/
79#define do_permutation(a, b, offset, mask) \
80 movl a, RT0d; \
81 shrl $(offset), RT0d; \
82 xorl b, RT0d; \
83 andl $(mask), RT0d; \
84 xorl RT0d, b; \
85 shll $(offset), RT0d; \
86 xorl RT0d, a;
87
88#define expand_to_64bits(val, mask) \
89 movl val##d, RT0d; \
90 rorl $4, RT0d; \
91 shlq $32, RT0; \
92 orq RT0, val; \
93 andq mask, val;
94
95#define compress_to_64bits(val) \
96 movq val, RT0; \
97 shrq $32, RT0; \
98 roll $4, RT0d; \
99 orl RT0d, val##d;
100
101#define initial_permutation(left, right) \
102 do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \
103 do_permutation(left##d, right##d, 16, 0x0000ffff); \
104 do_permutation(right##d, left##d, 2, 0x33333333); \
105 do_permutation(right##d, left##d, 8, 0x00ff00ff); \
106 movabs $0x3f3f3f3f3f3f3f3f, RT3; \
107 movl left##d, RW0d; \
108 roll $1, right##d; \
109 xorl right##d, RW0d; \
110 andl $0xaaaaaaaa, RW0d; \
111 xorl RW0d, left##d; \
112 xorl RW0d, right##d; \
113 roll $1, left##d; \
114 expand_to_64bits(right, RT3); \
115 expand_to_64bits(left, RT3);
116
117#define final_permutation(left, right) \
118 compress_to_64bits(right); \
119 compress_to_64bits(left); \
120 movl right##d, RW0d; \
121 rorl $1, left##d; \
122 xorl left##d, RW0d; \
123 andl $0xaaaaaaaa, RW0d; \
124 xorl RW0d, right##d; \
125 xorl RW0d, left##d; \
126 rorl $1, right##d; \
127 do_permutation(right##d, left##d, 8, 0x00ff00ff); \
128 do_permutation(right##d, left##d, 2, 0x33333333); \
129 do_permutation(left##d, right##d, 16, 0x0000ffff); \
130 do_permutation(left##d, right##d, 4, 0x0f0f0f0f);
131
132#define round1(n, from, to, load_next_key) \
133 xorq from, RW0; \
134 \
135 movzbl RW0bl, RT0d; \
136 movzbl RW0bh, RT1d; \
137 shrq $16, RW0; \
138 movzbl RW0bl, RT2d; \
139 movzbl RW0bh, RT3d; \
140 shrq $16, RW0; \
141 movq s8(, RT0, 8), RT0; \
142 xorq s6(, RT1, 8), to; \
143 movzbl RW0bl, RL1d; \
144 movzbl RW0bh, RT1d; \
145 shrl $16, RW0d; \
146 xorq s4(, RT2, 8), RT0; \
147 xorq s2(, RT3, 8), to; \
148 movzbl RW0bl, RT2d; \
149 movzbl RW0bh, RT3d; \
150 xorq s7(, RL1, 8), RT0; \
151 xorq s5(, RT1, 8), to; \
152 xorq s3(, RT2, 8), RT0; \
153 load_next_key(n, RW0); \
154 xorq RT0, to; \
155 xorq s1(, RT3, 8), to; \
156
157#define load_next_key(n, RWx) \
158 movq (((n) + 1) * 8)(CTX), RWx;
159
160#define dummy2(a, b) /*_*/
161
162#define read_block(io, left, right) \
163 movl (io), left##d; \
164 movl 4(io), right##d; \
165 bswapl left##d; \
166 bswapl right##d;
167
168#define write_block(io, left, right) \
169 bswapl left##d; \
170 bswapl right##d; \
171 movl left##d, (io); \
172 movl right##d, 4(io);
173
174ENTRY(des3_ede_x86_64_crypt_blk)
175 /* input:
176 * %rdi: round keys, CTX
177 * %rsi: dst
178 * %rdx: src
179 */
180 pushq %rbp;
181 pushq %rbx;
182 pushq %r12;
183 pushq %r13;
184 pushq %r14;
185 pushq %r15;
186
187 read_block(%rdx, RL0, RR0);
188 initial_permutation(RL0, RR0);
189
190 movq (CTX), RW0;
191
192 round1(0, RR0, RL0, load_next_key);
193 round1(1, RL0, RR0, load_next_key);
194 round1(2, RR0, RL0, load_next_key);
195 round1(3, RL0, RR0, load_next_key);
196 round1(4, RR0, RL0, load_next_key);
197 round1(5, RL0, RR0, load_next_key);
198 round1(6, RR0, RL0, load_next_key);
199 round1(7, RL0, RR0, load_next_key);
200 round1(8, RR0, RL0, load_next_key);
201 round1(9, RL0, RR0, load_next_key);
202 round1(10, RR0, RL0, load_next_key);
203 round1(11, RL0, RR0, load_next_key);
204 round1(12, RR0, RL0, load_next_key);
205 round1(13, RL0, RR0, load_next_key);
206 round1(14, RR0, RL0, load_next_key);
207 round1(15, RL0, RR0, load_next_key);
208
209 round1(16+0, RL0, RR0, load_next_key);
210 round1(16+1, RR0, RL0, load_next_key);
211 round1(16+2, RL0, RR0, load_next_key);
212 round1(16+3, RR0, RL0, load_next_key);
213 round1(16+4, RL0, RR0, load_next_key);
214 round1(16+5, RR0, RL0, load_next_key);
215 round1(16+6, RL0, RR0, load_next_key);
216 round1(16+7, RR0, RL0, load_next_key);
217 round1(16+8, RL0, RR0, load_next_key);
218 round1(16+9, RR0, RL0, load_next_key);
219 round1(16+10, RL0, RR0, load_next_key);
220 round1(16+11, RR0, RL0, load_next_key);
221 round1(16+12, RL0, RR0, load_next_key);
222 round1(16+13, RR0, RL0, load_next_key);
223 round1(16+14, RL0, RR0, load_next_key);
224 round1(16+15, RR0, RL0, load_next_key);
225
226 round1(32+0, RR0, RL0, load_next_key);
227 round1(32+1, RL0, RR0, load_next_key);
228 round1(32+2, RR0, RL0, load_next_key);
229 round1(32+3, RL0, RR0, load_next_key);
230 round1(32+4, RR0, RL0, load_next_key);
231 round1(32+5, RL0, RR0, load_next_key);
232 round1(32+6, RR0, RL0, load_next_key);
233 round1(32+7, RL0, RR0, load_next_key);
234 round1(32+8, RR0, RL0, load_next_key);
235 round1(32+9, RL0, RR0, load_next_key);
236 round1(32+10, RR0, RL0, load_next_key);
237 round1(32+11, RL0, RR0, load_next_key);
238 round1(32+12, RR0, RL0, load_next_key);
239 round1(32+13, RL0, RR0, load_next_key);
240 round1(32+14, RR0, RL0, load_next_key);
241 round1(32+15, RL0, RR0, dummy2);
242
243 final_permutation(RR0, RL0);
244 write_block(%rsi, RR0, RL0);
245
246 popq %r15;
247 popq %r14;
248 popq %r13;
249 popq %r12;
250 popq %rbx;
251 popq %rbp;
252
253 ret;
254ENDPROC(des3_ede_x86_64_crypt_blk)
255
256/***********************************************************************
257 * 3-way 3DES
258 ***********************************************************************/
259#define expand_to_64bits(val, mask) \
260 movl val##d, RT0d; \
261 rorl $4, RT0d; \
262 shlq $32, RT0; \
263 orq RT0, val; \
264 andq mask, val;
265
266#define compress_to_64bits(val) \
267 movq val, RT0; \
268 shrq $32, RT0; \
269 roll $4, RT0d; \
270 orl RT0d, val##d;
271
272#define initial_permutation3(left, right) \
273 do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
274 do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
275 do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
276 do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
277 do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \
278 do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
279 \
280 do_permutation(right##0d, left##0d, 2, 0x33333333); \
281 do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
282 do_permutation(right##1d, left##1d, 2, 0x33333333); \
283 do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
284 do_permutation(right##2d, left##2d, 2, 0x33333333); \
285 do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
286 \
287 movabs $0x3f3f3f3f3f3f3f3f, RT3; \
288 \
289 movl left##0d, RW0d; \
290 roll $1, right##0d; \
291 xorl right##0d, RW0d; \
292 andl $0xaaaaaaaa, RW0d; \
293 xorl RW0d, left##0d; \
294 xorl RW0d, right##0d; \
295 roll $1, left##0d; \
296 expand_to_64bits(right##0, RT3); \
297 expand_to_64bits(left##0, RT3); \
298 movl left##1d, RW1d; \
299 roll $1, right##1d; \
300 xorl right##1d, RW1d; \
301 andl $0xaaaaaaaa, RW1d; \
302 xorl RW1d, left##1d; \
303 xorl RW1d, right##1d; \
304 roll $1, left##1d; \
305 expand_to_64bits(right##1, RT3); \
306 expand_to_64bits(left##1, RT3); \
307 movl left##2d, RW2d; \
308 roll $1, right##2d; \
309 xorl right##2d, RW2d; \
310 andl $0xaaaaaaaa, RW2d; \
311 xorl RW2d, left##2d; \
312 xorl RW2d, right##2d; \
313 roll $1, left##2d; \
314 expand_to_64bits(right##2, RT3); \
315 expand_to_64bits(left##2, RT3);
316
317#define final_permutation3(left, right) \
318 compress_to_64bits(right##0); \
319 compress_to_64bits(left##0); \
320 movl right##0d, RW0d; \
321 rorl $1, left##0d; \
322 xorl left##0d, RW0d; \
323 andl $0xaaaaaaaa, RW0d; \
324 xorl RW0d, right##0d; \
325 xorl RW0d, left##0d; \
326 rorl $1, right##0d; \
327 compress_to_64bits(right##1); \
328 compress_to_64bits(left##1); \
329 movl right##1d, RW1d; \
330 rorl $1, left##1d; \
331 xorl left##1d, RW1d; \
332 andl $0xaaaaaaaa, RW1d; \
333 xorl RW1d, right##1d; \
334 xorl RW1d, left##1d; \
335 rorl $1, right##1d; \
336 compress_to_64bits(right##2); \
337 compress_to_64bits(left##2); \
338 movl right##2d, RW2d; \
339 rorl $1, left##2d; \
340 xorl left##2d, RW2d; \
341 andl $0xaaaaaaaa, RW2d; \
342 xorl RW2d, right##2d; \
343 xorl RW2d, left##2d; \
344 rorl $1, right##2d; \
345 \
346 do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
347 do_permutation(right##0d, left##0d, 2, 0x33333333); \
348 do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
349 do_permutation(right##1d, left##1d, 2, 0x33333333); \
350 do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
351 do_permutation(right##2d, left##2d, 2, 0x33333333); \
352 \
353 do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
354 do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
355 do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
356 do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
357 do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
358 do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f);
359
360#define round3(n, from, to, load_next_key, do_movq) \
361 xorq from##0, RW0; \
362 movzbl RW0bl, RT3d; \
363 movzbl RW0bh, RT1d; \
364 shrq $16, RW0; \
365 xorq s8(, RT3, 8), to##0; \
366 xorq s6(, RT1, 8), to##0; \
367 movzbl RW0bl, RT3d; \
368 movzbl RW0bh, RT1d; \
369 shrq $16, RW0; \
370 xorq s4(, RT3, 8), to##0; \
371 xorq s2(, RT1, 8), to##0; \
372 movzbl RW0bl, RT3d; \
373 movzbl RW0bh, RT1d; \
374 shrl $16, RW0d; \
375 xorq s7(, RT3, 8), to##0; \
376 xorq s5(, RT1, 8), to##0; \
377 movzbl RW0bl, RT3d; \
378 movzbl RW0bh, RT1d; \
379 load_next_key(n, RW0); \
380 xorq s3(, RT3, 8), to##0; \
381 xorq s1(, RT1, 8), to##0; \
382 xorq from##1, RW1; \
383 movzbl RW1bl, RT3d; \
384 movzbl RW1bh, RT1d; \
385 shrq $16, RW1; \
386 xorq s8(, RT3, 8), to##1; \
387 xorq s6(, RT1, 8), to##1; \
388 movzbl RW1bl, RT3d; \
389 movzbl RW1bh, RT1d; \
390 shrq $16, RW1; \
391 xorq s4(, RT3, 8), to##1; \
392 xorq s2(, RT1, 8), to##1; \
393 movzbl RW1bl, RT3d; \
394 movzbl RW1bh, RT1d; \
395 shrl $16, RW1d; \
396 xorq s7(, RT3, 8), to##1; \
397 xorq s5(, RT1, 8), to##1; \
398 movzbl RW1bl, RT3d; \
399 movzbl RW1bh, RT1d; \
400 do_movq(RW0, RW1); \
401 xorq s3(, RT3, 8), to##1; \
402 xorq s1(, RT1, 8), to##1; \
403 xorq from##2, RW2; \
404 movzbl RW2bl, RT3d; \
405 movzbl RW2bh, RT1d; \
406 shrq $16, RW2; \
407 xorq s8(, RT3, 8), to##2; \
408 xorq s6(, RT1, 8), to##2; \
409 movzbl RW2bl, RT3d; \
410 movzbl RW2bh, RT1d; \
411 shrq $16, RW2; \
412 xorq s4(, RT3, 8), to##2; \
413 xorq s2(, RT1, 8), to##2; \
414 movzbl RW2bl, RT3d; \
415 movzbl RW2bh, RT1d; \
416 shrl $16, RW2d; \
417 xorq s7(, RT3, 8), to##2; \
418 xorq s5(, RT1, 8), to##2; \
419 movzbl RW2bl, RT3d; \
420 movzbl RW2bh, RT1d; \
421 do_movq(RW0, RW2); \
422 xorq s3(, RT3, 8), to##2; \
423 xorq s1(, RT1, 8), to##2;
424
425#define __movq(src, dst) \
426 movq src, dst;
427
428ENTRY(des3_ede_x86_64_crypt_blk_3way)
429 /* input:
430 * %rdi: ctx, round keys
431 * %rsi: dst (3 blocks)
432 * %rdx: src (3 blocks)
433 */
434
435 pushq %rbp;
436 pushq %rbx;
437 pushq %r12;
438 pushq %r13;
439 pushq %r14;
440 pushq %r15;
441
442 /* load input */
443 movl 0 * 4(%rdx), RL0d;
444 movl 1 * 4(%rdx), RR0d;
445 movl 2 * 4(%rdx), RL1d;
446 movl 3 * 4(%rdx), RR1d;
447 movl 4 * 4(%rdx), RL2d;
448 movl 5 * 4(%rdx), RR2d;
449
450 bswapl RL0d;
451 bswapl RR0d;
452 bswapl RL1d;
453 bswapl RR1d;
454 bswapl RL2d;
455 bswapl RR2d;
456
457 initial_permutation3(RL, RR);
458
459 movq 0(CTX), RW0;
460 movq RW0, RW1;
461 movq RW0, RW2;
462
463 round3(0, RR, RL, load_next_key, __movq);
464 round3(1, RL, RR, load_next_key, __movq);
465 round3(2, RR, RL, load_next_key, __movq);
466 round3(3, RL, RR, load_next_key, __movq);
467 round3(4, RR, RL, load_next_key, __movq);
468 round3(5, RL, RR, load_next_key, __movq);
469 round3(6, RR, RL, load_next_key, __movq);
470 round3(7, RL, RR, load_next_key, __movq);
471 round3(8, RR, RL, load_next_key, __movq);
472 round3(9, RL, RR, load_next_key, __movq);
473 round3(10, RR, RL, load_next_key, __movq);
474 round3(11, RL, RR, load_next_key, __movq);
475 round3(12, RR, RL, load_next_key, __movq);
476 round3(13, RL, RR, load_next_key, __movq);
477 round3(14, RR, RL, load_next_key, __movq);
478 round3(15, RL, RR, load_next_key, __movq);
479
480 round3(16+0, RL, RR, load_next_key, __movq);
481 round3(16+1, RR, RL, load_next_key, __movq);
482 round3(16+2, RL, RR, load_next_key, __movq);
483 round3(16+3, RR, RL, load_next_key, __movq);
484 round3(16+4, RL, RR, load_next_key, __movq);
485 round3(16+5, RR, RL, load_next_key, __movq);
486 round3(16+6, RL, RR, load_next_key, __movq);
487 round3(16+7, RR, RL, load_next_key, __movq);
488 round3(16+8, RL, RR, load_next_key, __movq);
489 round3(16+9, RR, RL, load_next_key, __movq);
490 round3(16+10, RL, RR, load_next_key, __movq);
491 round3(16+11, RR, RL, load_next_key, __movq);
492 round3(16+12, RL, RR, load_next_key, __movq);
493 round3(16+13, RR, RL, load_next_key, __movq);
494 round3(16+14, RL, RR, load_next_key, __movq);
495 round3(16+15, RR, RL, load_next_key, __movq);
496
497 round3(32+0, RR, RL, load_next_key, __movq);
498 round3(32+1, RL, RR, load_next_key, __movq);
499 round3(32+2, RR, RL, load_next_key, __movq);
500 round3(32+3, RL, RR, load_next_key, __movq);
501 round3(32+4, RR, RL, load_next_key, __movq);
502 round3(32+5, RL, RR, load_next_key, __movq);
503 round3(32+6, RR, RL, load_next_key, __movq);
504 round3(32+7, RL, RR, load_next_key, __movq);
505 round3(32+8, RR, RL, load_next_key, __movq);
506 round3(32+9, RL, RR, load_next_key, __movq);
507 round3(32+10, RR, RL, load_next_key, __movq);
508 round3(32+11, RL, RR, load_next_key, __movq);
509 round3(32+12, RR, RL, load_next_key, __movq);
510 round3(32+13, RL, RR, load_next_key, __movq);
511 round3(32+14, RR, RL, load_next_key, __movq);
512 round3(32+15, RL, RR, dummy2, dummy2);
513
514 final_permutation3(RR, RL);
515
516 bswapl RR0d;
517 bswapl RL0d;
518 bswapl RR1d;
519 bswapl RL1d;
520 bswapl RR2d;
521 bswapl RL2d;
522
523 movl RR0d, 0 * 4(%rsi);
524 movl RL0d, 1 * 4(%rsi);
525 movl RR1d, 2 * 4(%rsi);
526 movl RL1d, 3 * 4(%rsi);
527 movl RR2d, 4 * 4(%rsi);
528 movl RL2d, 5 * 4(%rsi);
529
530 popq %r15;
531 popq %r14;
532 popq %r13;
533 popq %r12;
534 popq %rbx;
535 popq %rbp;
536
537 ret;
538ENDPROC(des3_ede_x86_64_crypt_blk_3way)
539
540.data
541.align 16
542.L_s1:
543 .quad 0x0010100001010400, 0x0000000000000000
544 .quad 0x0000100000010000, 0x0010100001010404
545 .quad 0x0010100001010004, 0x0000100000010404
546 .quad 0x0000000000000004, 0x0000100000010000
547 .quad 0x0000000000000400, 0x0010100001010400
548 .quad 0x0010100001010404, 0x0000000000000400
549 .quad 0x0010000001000404, 0x0010100001010004
550 .quad 0x0010000001000000, 0x0000000000000004
551 .quad 0x0000000000000404, 0x0010000001000400
552 .quad 0x0010000001000400, 0x0000100000010400
553 .quad 0x0000100000010400, 0x0010100001010000
554 .quad 0x0010100001010000, 0x0010000001000404
555 .quad 0x0000100000010004, 0x0010000001000004
556 .quad 0x0010000001000004, 0x0000100000010004
557 .quad 0x0000000000000000, 0x0000000000000404
558 .quad 0x0000100000010404, 0x0010000001000000
559 .quad 0x0000100000010000, 0x0010100001010404
560 .quad 0x0000000000000004, 0x0010100001010000
561 .quad 0x0010100001010400, 0x0010000001000000
562 .quad 0x0010000001000000, 0x0000000000000400
563 .quad 0x0010100001010004, 0x0000100000010000
564 .quad 0x0000100000010400, 0x0010000001000004
565 .quad 0x0000000000000400, 0x0000000000000004
566 .quad 0x0010000001000404, 0x0000100000010404
567 .quad 0x0010100001010404, 0x0000100000010004
568 .quad 0x0010100001010000, 0x0010000001000404
569 .quad 0x0010000001000004, 0x0000000000000404
570 .quad 0x0000100000010404, 0x0010100001010400
571 .quad 0x0000000000000404, 0x0010000001000400
572 .quad 0x0010000001000400, 0x0000000000000000
573 .quad 0x0000100000010004, 0x0000100000010400
574 .quad 0x0000000000000000, 0x0010100001010004
575.L_s2:
576 .quad 0x0801080200100020, 0x0800080000000000
577 .quad 0x0000080000000000, 0x0001080200100020
578 .quad 0x0001000000100000, 0x0000000200000020
579 .quad 0x0801000200100020, 0x0800080200000020
580 .quad 0x0800000200000020, 0x0801080200100020
581 .quad 0x0801080000100000, 0x0800000000000000
582 .quad 0x0800080000000000, 0x0001000000100000
583 .quad 0x0000000200000020, 0x0801000200100020
584 .quad 0x0001080000100000, 0x0001000200100020
585 .quad 0x0800080200000020, 0x0000000000000000
586 .quad 0x0800000000000000, 0x0000080000000000
587 .quad 0x0001080200100020, 0x0801000000100000
588 .quad 0x0001000200100020, 0x0800000200000020
589 .quad 0x0000000000000000, 0x0001080000100000
590 .quad 0x0000080200000020, 0x0801080000100000
591 .quad 0x0801000000100000, 0x0000080200000020
592 .quad 0x0000000000000000, 0x0001080200100020
593 .quad 0x0801000200100020, 0x0001000000100000
594 .quad 0x0800080200000020, 0x0801000000100000
595 .quad 0x0801080000100000, 0x0000080000000000
596 .quad 0x0801000000100000, 0x0800080000000000
597 .quad 0x0000000200000020, 0x0801080200100020
598 .quad 0x0001080200100020, 0x0000000200000020
599 .quad 0x0000080000000000, 0x0800000000000000
600 .quad 0x0000080200000020, 0x0801080000100000
601 .quad 0x0001000000100000, 0x0800000200000020
602 .quad 0x0001000200100020, 0x0800080200000020
603 .quad 0x0800000200000020, 0x0001000200100020
604 .quad 0x0001080000100000, 0x0000000000000000
605 .quad 0x0800080000000000, 0x0000080200000020
606 .quad 0x0800000000000000, 0x0801000200100020
607 .quad 0x0801080200100020, 0x0001080000100000
608.L_s3:
609 .quad 0x0000002000000208, 0x0000202008020200
610 .quad 0x0000000000000000, 0x0000200008020008
611 .quad 0x0000002008000200, 0x0000000000000000
612 .quad 0x0000202000020208, 0x0000002008000200
613 .quad 0x0000200000020008, 0x0000000008000008
614 .quad 0x0000000008000008, 0x0000200000020000
615 .quad 0x0000202008020208, 0x0000200000020008
616 .quad 0x0000200008020000, 0x0000002000000208
617 .quad 0x0000000008000000, 0x0000000000000008
618 .quad 0x0000202008020200, 0x0000002000000200
619 .quad 0x0000202000020200, 0x0000200008020000
620 .quad 0x0000200008020008, 0x0000202000020208
621 .quad 0x0000002008000208, 0x0000202000020200
622 .quad 0x0000200000020000, 0x0000002008000208
623 .quad 0x0000000000000008, 0x0000202008020208
624 .quad 0x0000002000000200, 0x0000000008000000
625 .quad 0x0000202008020200, 0x0000000008000000
626 .quad 0x0000200000020008, 0x0000002000000208
627 .quad 0x0000200000020000, 0x0000202008020200
628 .quad 0x0000002008000200, 0x0000000000000000
629 .quad 0x0000002000000200, 0x0000200000020008
630 .quad 0x0000202008020208, 0x0000002008000200
631 .quad 0x0000000008000008, 0x0000002000000200
632 .quad 0x0000000000000000, 0x0000200008020008
633 .quad 0x0000002008000208, 0x0000200000020000
634 .quad 0x0000000008000000, 0x0000202008020208
635 .quad 0x0000000000000008, 0x0000202000020208
636 .quad 0x0000202000020200, 0x0000000008000008
637 .quad 0x0000200008020000, 0x0000002008000208
638 .quad 0x0000002000000208, 0x0000200008020000
639 .quad 0x0000202000020208, 0x0000000000000008
640 .quad 0x0000200008020008, 0x0000202000020200
641.L_s4:
642 .quad 0x1008020000002001, 0x1000020800002001
643 .quad 0x1000020800002001, 0x0000000800000000
644 .quad 0x0008020800002000, 0x1008000800000001
645 .quad 0x1008000000000001, 0x1000020000002001
646 .quad 0x0000000000000000, 0x0008020000002000
647 .quad 0x0008020000002000, 0x1008020800002001
648 .quad 0x1000000800000001, 0x0000000000000000
649 .quad 0x0008000800000000, 0x1008000000000001
650 .quad 0x1000000000000001, 0x0000020000002000
651 .quad 0x0008000000000000, 0x1008020000002001
652 .quad 0x0000000800000000, 0x0008000000000000
653 .quad 0x1000020000002001, 0x0000020800002000
654 .quad 0x1008000800000001, 0x1000000000000001
655 .quad 0x0000020800002000, 0x0008000800000000
656 .quad 0x0000020000002000, 0x0008020800002000
657 .quad 0x1008020800002001, 0x1000000800000001
658 .quad 0x0008000800000000, 0x1008000000000001
659 .quad 0x0008020000002000, 0x1008020800002001
660 .quad 0x1000000800000001, 0x0000000000000000
661 .quad 0x0000000000000000, 0x0008020000002000
662 .quad 0x0000020800002000, 0x0008000800000000
663 .quad 0x1008000800000001, 0x1000000000000001
664 .quad 0x1008020000002001, 0x1000020800002001
665 .quad 0x1000020800002001, 0x0000000800000000
666 .quad 0x1008020800002001, 0x1000000800000001
667 .quad 0x1000000000000001, 0x0000020000002000
668 .quad 0x1008000000000001, 0x1000020000002001
669 .quad 0x0008020800002000, 0x1008000800000001
670 .quad 0x1000020000002001, 0x0000020800002000
671 .quad 0x0008000000000000, 0x1008020000002001
672 .quad 0x0000000800000000, 0x0008000000000000
673 .quad 0x0000020000002000, 0x0008020800002000
674.L_s5:
675 .quad 0x0000001000000100, 0x0020001002080100
676 .quad 0x0020000002080000, 0x0420001002000100
677 .quad 0x0000000000080000, 0x0000001000000100
678 .quad 0x0400000000000000, 0x0020000002080000
679 .quad 0x0400001000080100, 0x0000000000080000
680 .quad 0x0020001002000100, 0x0400001000080100
681 .quad 0x0420001002000100, 0x0420000002080000
682 .quad 0x0000001000080100, 0x0400000000000000
683 .quad 0x0020000002000000, 0x0400000000080000
684 .quad 0x0400000000080000, 0x0000000000000000
685 .quad 0x0400001000000100, 0x0420001002080100
686 .quad 0x0420001002080100, 0x0020001002000100
687 .quad 0x0420000002080000, 0x0400001000000100
688 .quad 0x0000000000000000, 0x0420000002000000
689 .quad 0x0020001002080100, 0x0020000002000000
690 .quad 0x0420000002000000, 0x0000001000080100
691 .quad 0x0000000000080000, 0x0420001002000100
692 .quad 0x0000001000000100, 0x0020000002000000
693 .quad 0x0400000000000000, 0x0020000002080000
694 .quad 0x0420001002000100, 0x0400001000080100
695 .quad 0x0020001002000100, 0x0400000000000000
696 .quad 0x0420000002080000, 0x0020001002080100
697 .quad 0x0400001000080100, 0x0000001000000100
698 .quad 0x0020000002000000, 0x0420000002080000
699 .quad 0x0420001002080100, 0x0000001000080100
700 .quad 0x0420000002000000, 0x0420001002080100
701 .quad 0x0020000002080000, 0x0000000000000000
702 .quad 0x0400000000080000, 0x0420000002000000
703 .quad 0x0000001000080100, 0x0020001002000100
704 .quad 0x0400001000000100, 0x0000000000080000
705 .quad 0x0000000000000000, 0x0400000000080000
706 .quad 0x0020001002080100, 0x0400001000000100
707.L_s6:
708 .quad 0x0200000120000010, 0x0204000020000000
709 .quad 0x0000040000000000, 0x0204040120000010
710 .quad 0x0204000020000000, 0x0000000100000010
711 .quad 0x0204040120000010, 0x0004000000000000
712 .quad 0x0200040020000000, 0x0004040100000010
713 .quad 0x0004000000000000, 0x0200000120000010
714 .quad 0x0004000100000010, 0x0200040020000000
715 .quad 0x0200000020000000, 0x0000040100000010
716 .quad 0x0000000000000000, 0x0004000100000010
717 .quad 0x0200040120000010, 0x0000040000000000
718 .quad 0x0004040000000000, 0x0200040120000010
719 .quad 0x0000000100000010, 0x0204000120000010
720 .quad 0x0204000120000010, 0x0000000000000000
721 .quad 0x0004040100000010, 0x0204040020000000
722 .quad 0x0000040100000010, 0x0004040000000000
723 .quad 0x0204040020000000, 0x0200000020000000
724 .quad 0x0200040020000000, 0x0000000100000010
725 .quad 0x0204000120000010, 0x0004040000000000
726 .quad 0x0204040120000010, 0x0004000000000000
727 .quad 0x0000040100000010, 0x0200000120000010
728 .quad 0x0004000000000000, 0x0200040020000000
729 .quad 0x0200000020000000, 0x0000040100000010
730 .quad 0x0200000120000010, 0x0204040120000010
731 .quad 0x0004040000000000, 0x0204000020000000
732 .quad 0x0004040100000010, 0x0204040020000000
733 .quad 0x0000000000000000, 0x0204000120000010
734 .quad 0x0000000100000010, 0x0000040000000000
735 .quad 0x0204000020000000, 0x0004040100000010
736 .quad 0x0000040000000000, 0x0004000100000010
737 .quad 0x0200040120000010, 0x0000000000000000
738 .quad 0x0204040020000000, 0x0200000020000000
739 .quad 0x0004000100000010, 0x0200040120000010
740.L_s7:
741 .quad 0x0002000000200000, 0x2002000004200002
742 .quad 0x2000000004000802, 0x0000000000000000
743 .quad 0x0000000000000800, 0x2000000004000802
744 .quad 0x2002000000200802, 0x0002000004200800
745 .quad 0x2002000004200802, 0x0002000000200000
746 .quad 0x0000000000000000, 0x2000000004000002
747 .quad 0x2000000000000002, 0x0000000004000000
748 .quad 0x2002000004200002, 0x2000000000000802
749 .quad 0x0000000004000800, 0x2002000000200802
750 .quad 0x2002000000200002, 0x0000000004000800
751 .quad 0x2000000004000002, 0x0002000004200000
752 .quad 0x0002000004200800, 0x2002000000200002
753 .quad 0x0002000004200000, 0x0000000000000800
754 .quad 0x2000000000000802, 0x2002000004200802
755 .quad 0x0002000000200800, 0x2000000000000002
756 .quad 0x0000000004000000, 0x0002000000200800
757 .quad 0x0000000004000000, 0x0002000000200800
758 .quad 0x0002000000200000, 0x2000000004000802
759 .quad 0x2000000004000802, 0x2002000004200002
760 .quad 0x2002000004200002, 0x2000000000000002
761 .quad 0x2002000000200002, 0x0000000004000000
762 .quad 0x0000000004000800, 0x0002000000200000
763 .quad 0x0002000004200800, 0x2000000000000802
764 .quad 0x2002000000200802, 0x0002000004200800
765 .quad 0x2000000000000802, 0x2000000004000002
766 .quad 0x2002000004200802, 0x0002000004200000
767 .quad 0x0002000000200800, 0x0000000000000000
768 .quad 0x2000000000000002, 0x2002000004200802
769 .quad 0x0000000000000000, 0x2002000000200802
770 .quad 0x0002000004200000, 0x0000000000000800
771 .quad 0x2000000004000002, 0x0000000004000800
772 .quad 0x0000000000000800, 0x2002000000200002
773.L_s8:
774 .quad 0x0100010410001000, 0x0000010000001000
775 .quad 0x0000000000040000, 0x0100010410041000
776 .quad 0x0100000010000000, 0x0100010410001000
777 .quad 0x0000000400000000, 0x0100000010000000
778 .quad 0x0000000400040000, 0x0100000010040000
779 .quad 0x0100010410041000, 0x0000010000041000
780 .quad 0x0100010010041000, 0x0000010400041000
781 .quad 0x0000010000001000, 0x0000000400000000
782 .quad 0x0100000010040000, 0x0100000410000000
783 .quad 0x0100010010001000, 0x0000010400001000
784 .quad 0x0000010000041000, 0x0000000400040000
785 .quad 0x0100000410040000, 0x0100010010041000
786 .quad 0x0000010400001000, 0x0000000000000000
787 .quad 0x0000000000000000, 0x0100000410040000
788 .quad 0x0100000410000000, 0x0100010010001000
789 .quad 0x0000010400041000, 0x0000000000040000
790 .quad 0x0000010400041000, 0x0000000000040000
791 .quad 0x0100010010041000, 0x0000010000001000
792 .quad 0x0000000400000000, 0x0100000410040000
793 .quad 0x0000010000001000, 0x0000010400041000
794 .quad 0x0100010010001000, 0x0000000400000000
795 .quad 0x0100000410000000, 0x0100000010040000
796 .quad 0x0100000410040000, 0x0100000010000000
797 .quad 0x0000000000040000, 0x0100010410001000
798 .quad 0x0000000000000000, 0x0100010410041000
799 .quad 0x0000000400040000, 0x0100000410000000
800 .quad 0x0100000010040000, 0x0100010010001000
801 .quad 0x0100010410001000, 0x0000000000000000
802 .quad 0x0100010410041000, 0x0000010000041000
803 .quad 0x0000010000041000, 0x0000010400001000
804 .quad 0x0000010400001000, 0x0000000400040000
805 .quad 0x0100000010000000, 0x0100010010041000