Linux Audio

Check our new training course

Loading...
v6.13.7
  1/* SPDX-License-Identifier: GPL-2.0-or-later */
  2/*
  3 * des3_ede-asm_64.S  -  x86-64 assembly implementation of 3DES cipher
  4 *
  5 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  6 */
  7
  8#include <linux/linkage.h>
  9
 10.file "des3_ede-asm_64.S"
 11.text
 12
 13#define s1 .L_s1
 14#define s2 ((s1) + (64*8))
 15#define s3 ((s2) + (64*8))
 16#define s4 ((s3) + (64*8))
 17#define s5 ((s4) + (64*8))
 18#define s6 ((s5) + (64*8))
 19#define s7 ((s6) + (64*8))
 20#define s8 ((s7) + (64*8))
 21
 22/* register macros */
 23#define CTX %rdi
 24
 25#define RL0 %r8
 26#define RL1 %r9
 27#define RL2 %r10
 28
 29#define RL0d %r8d
 30#define RL1d %r9d
 31#define RL2d %r10d
 32
 33#define RR0 %r11
 34#define RR1 %r12
 35#define RR2 %r13
 36
 37#define RR0d %r11d
 38#define RR1d %r12d
 39#define RR2d %r13d
 40
 41#define RW0 %rax
 42#define RW1 %rbx
 43#define RW2 %rcx
 44
 45#define RW0d %eax
 46#define RW1d %ebx
 47#define RW2d %ecx
 48
 49#define RW0bl %al
 50#define RW1bl %bl
 51#define RW2bl %cl
 52
 53#define RW0bh %ah
 54#define RW1bh %bh
 55#define RW2bh %ch
 56
 57#define RT0 %r15
 58#define RT1 %rsi
 59#define RT2 %r14
 60#define RT3 %rdx
 61
 62#define RT0d %r15d
 63#define RT1d %esi
 64#define RT2d %r14d
 65#define RT3d %edx
 66
 67/***********************************************************************
 68 * 1-way 3DES
 69 ***********************************************************************/
 70#define do_permutation(a, b, offset, mask) \
 71	movl a, RT0d; \
 72	shrl $(offset), RT0d; \
 73	xorl b, RT0d; \
 74	andl $(mask), RT0d; \
 75	xorl RT0d, b; \
 76	shll $(offset), RT0d; \
 77	xorl RT0d, a;
 78
 79#define expand_to_64bits(val, mask) \
 80	movl val##d, RT0d; \
 81	rorl $4, RT0d; \
 82	shlq $32, RT0; \
 83	orq RT0, val; \
 84	andq mask, val;
 85
 86#define compress_to_64bits(val) \
 87	movq val, RT0; \
 88	shrq $32, RT0; \
 89	roll $4, RT0d; \
 90	orl RT0d, val##d;
 91
 92#define initial_permutation(left, right) \
 93	do_permutation(left##d, right##d,  4, 0x0f0f0f0f); \
 94	do_permutation(left##d, right##d, 16, 0x0000ffff); \
 95	do_permutation(right##d, left##d,  2, 0x33333333); \
 96	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
 97	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
 98	movl left##d, RW0d; \
 99	roll $1, right##d; \
100	xorl right##d, RW0d; \
101	andl $0xaaaaaaaa, RW0d; \
102	xorl RW0d, left##d; \
103	xorl RW0d, right##d; \
104	roll $1, left##d; \
105	expand_to_64bits(right, RT3); \
106	expand_to_64bits(left, RT3);
107
108#define final_permutation(left, right) \
109	compress_to_64bits(right); \
110	compress_to_64bits(left); \
111	movl right##d, RW0d; \
112	rorl $1, left##d; \
113	xorl left##d, RW0d; \
114	andl $0xaaaaaaaa, RW0d; \
115	xorl RW0d, right##d; \
116	xorl RW0d, left##d; \
117	rorl $1, right##d; \
118	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
119	do_permutation(right##d, left##d,  2, 0x33333333); \
120	do_permutation(left##d, right##d, 16, 0x0000ffff); \
121	do_permutation(left##d, right##d,  4, 0x0f0f0f0f);
122
123#define round1(n, from, to, load_next_key) \
124	xorq from, RW0; \
125	\
126	movzbl RW0bl, RT0d; \
127	movzbl RW0bh, RT1d; \
128	shrq $16, RW0; \
129	movzbl RW0bl, RT2d; \
130	movzbl RW0bh, RT3d; \
131	shrq $16, RW0; \
132	leaq s8(%rip), RW1; \
133	movq (RW1, RT0, 8), RT0; \
134	leaq s6(%rip), RW1; \
135	xorq (RW1, RT1, 8), to; \
136	movzbl RW0bl, RL1d; \
137	movzbl RW0bh, RT1d; \
138	shrl $16, RW0d; \
139	leaq s4(%rip), RW1; \
140	xorq (RW1, RT2, 8), RT0; \
141	leaq s2(%rip), RW1; \
142	xorq (RW1, RT3, 8), to; \
143	movzbl RW0bl, RT2d; \
144	movzbl RW0bh, RT3d; \
145	leaq s7(%rip), RW1; \
146	xorq (RW1, RL1, 8), RT0; \
147	leaq s5(%rip), RW1; \
148	xorq (RW1, RT1, 8), to; \
149	leaq s3(%rip), RW1; \
150	xorq (RW1, RT2, 8), RT0; \
151	load_next_key(n, RW0); \
152	xorq RT0, to; \
153	leaq s1(%rip), RW1; \
154	xorq (RW1, RT3, 8), to; \
155
156#define load_next_key(n, RWx) \
157	movq (((n) + 1) * 8)(CTX), RWx;
158
159#define dummy2(a, b) /*_*/
160
161#define read_block(io, left, right) \
162	movl    (io), left##d; \
163	movl   4(io), right##d; \
164	bswapl left##d; \
165	bswapl right##d;
166
167#define write_block(io, left, right) \
168	bswapl left##d; \
169	bswapl right##d; \
170	movl   left##d,   (io); \
171	movl   right##d, 4(io);
172
173SYM_FUNC_START(des3_ede_x86_64_crypt_blk)
174	/* input:
175	 *	%rdi: round keys, CTX
176	 *	%rsi: dst
177	 *	%rdx: src
178	 */
179	pushq %rbx;
180	pushq %r12;
181	pushq %r13;
182	pushq %r14;
183	pushq %r15;
184
185	pushq %rsi; /* dst */
186
187	read_block(%rdx, RL0, RR0);
188	initial_permutation(RL0, RR0);
189
190	movq (CTX), RW0;
191
192	round1(0, RR0, RL0, load_next_key);
193	round1(1, RL0, RR0, load_next_key);
194	round1(2, RR0, RL0, load_next_key);
195	round1(3, RL0, RR0, load_next_key);
196	round1(4, RR0, RL0, load_next_key);
197	round1(5, RL0, RR0, load_next_key);
198	round1(6, RR0, RL0, load_next_key);
199	round1(7, RL0, RR0, load_next_key);
200	round1(8, RR0, RL0, load_next_key);
201	round1(9, RL0, RR0, load_next_key);
202	round1(10, RR0, RL0, load_next_key);
203	round1(11, RL0, RR0, load_next_key);
204	round1(12, RR0, RL0, load_next_key);
205	round1(13, RL0, RR0, load_next_key);
206	round1(14, RR0, RL0, load_next_key);
207	round1(15, RL0, RR0, load_next_key);
208
209	round1(16+0, RL0, RR0, load_next_key);
210	round1(16+1, RR0, RL0, load_next_key);
211	round1(16+2, RL0, RR0, load_next_key);
212	round1(16+3, RR0, RL0, load_next_key);
213	round1(16+4, RL0, RR0, load_next_key);
214	round1(16+5, RR0, RL0, load_next_key);
215	round1(16+6, RL0, RR0, load_next_key);
216	round1(16+7, RR0, RL0, load_next_key);
217	round1(16+8, RL0, RR0, load_next_key);
218	round1(16+9, RR0, RL0, load_next_key);
219	round1(16+10, RL0, RR0, load_next_key);
220	round1(16+11, RR0, RL0, load_next_key);
221	round1(16+12, RL0, RR0, load_next_key);
222	round1(16+13, RR0, RL0, load_next_key);
223	round1(16+14, RL0, RR0, load_next_key);
224	round1(16+15, RR0, RL0, load_next_key);
225
226	round1(32+0, RR0, RL0, load_next_key);
227	round1(32+1, RL0, RR0, load_next_key);
228	round1(32+2, RR0, RL0, load_next_key);
229	round1(32+3, RL0, RR0, load_next_key);
230	round1(32+4, RR0, RL0, load_next_key);
231	round1(32+5, RL0, RR0, load_next_key);
232	round1(32+6, RR0, RL0, load_next_key);
233	round1(32+7, RL0, RR0, load_next_key);
234	round1(32+8, RR0, RL0, load_next_key);
235	round1(32+9, RL0, RR0, load_next_key);
236	round1(32+10, RR0, RL0, load_next_key);
237	round1(32+11, RL0, RR0, load_next_key);
238	round1(32+12, RR0, RL0, load_next_key);
239	round1(32+13, RL0, RR0, load_next_key);
240	round1(32+14, RR0, RL0, load_next_key);
241	round1(32+15, RL0, RR0, dummy2);
242
243	final_permutation(RR0, RL0);
244
245	popq %rsi /* dst */
246	write_block(%rsi, RR0, RL0);
247
248	popq %r15;
249	popq %r14;
250	popq %r13;
251	popq %r12;
252	popq %rbx;
253
254	RET;
255SYM_FUNC_END(des3_ede_x86_64_crypt_blk)
256
257/***********************************************************************
258 * 3-way 3DES
259 ***********************************************************************/
260#define expand_to_64bits(val, mask) \
261	movl val##d, RT0d; \
262	rorl $4, RT0d; \
263	shlq $32, RT0; \
264	orq RT0, val; \
265	andq mask, val;
266
267#define compress_to_64bits(val) \
268	movq val, RT0; \
269	shrq $32, RT0; \
270	roll $4, RT0d; \
271	orl RT0d, val##d;
272
273#define initial_permutation3(left, right) \
274	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
275	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
276	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
277	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
278	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f); \
279	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
280	    \
281	do_permutation(right##0d, left##0d,  2, 0x33333333); \
282	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
283	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
284	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
285	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
286	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
287	    \
288	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
289	    \
290	movl left##0d, RW0d; \
291	roll $1, right##0d; \
292	xorl right##0d, RW0d; \
293	andl $0xaaaaaaaa, RW0d; \
294	xorl RW0d, left##0d; \
295	xorl RW0d, right##0d; \
296	roll $1, left##0d; \
297	expand_to_64bits(right##0, RT3); \
298	expand_to_64bits(left##0, RT3); \
299	  movl left##1d, RW1d; \
300	  roll $1, right##1d; \
301	  xorl right##1d, RW1d; \
302	  andl $0xaaaaaaaa, RW1d; \
303	  xorl RW1d, left##1d; \
304	  xorl RW1d, right##1d; \
305	  roll $1, left##1d; \
306	  expand_to_64bits(right##1, RT3); \
307	  expand_to_64bits(left##1, RT3); \
308	    movl left##2d, RW2d; \
309	    roll $1, right##2d; \
310	    xorl right##2d, RW2d; \
311	    andl $0xaaaaaaaa, RW2d; \
312	    xorl RW2d, left##2d; \
313	    xorl RW2d, right##2d; \
314	    roll $1, left##2d; \
315	    expand_to_64bits(right##2, RT3); \
316	    expand_to_64bits(left##2, RT3);
317
318#define final_permutation3(left, right) \
319	compress_to_64bits(right##0); \
320	compress_to_64bits(left##0); \
321	movl right##0d, RW0d; \
322	rorl $1, left##0d; \
323	xorl left##0d, RW0d; \
324	andl $0xaaaaaaaa, RW0d; \
325	xorl RW0d, right##0d; \
326	xorl RW0d, left##0d; \
327	rorl $1, right##0d; \
328	  compress_to_64bits(right##1); \
329	  compress_to_64bits(left##1); \
330	  movl right##1d, RW1d; \
331	  rorl $1, left##1d; \
332	  xorl left##1d, RW1d; \
333	  andl $0xaaaaaaaa, RW1d; \
334	  xorl RW1d, right##1d; \
335	  xorl RW1d, left##1d; \
336	  rorl $1, right##1d; \
337	    compress_to_64bits(right##2); \
338	    compress_to_64bits(left##2); \
339	    movl right##2d, RW2d; \
340	    rorl $1, left##2d; \
341	    xorl left##2d, RW2d; \
342	    andl $0xaaaaaaaa, RW2d; \
343	    xorl RW2d, right##2d; \
344	    xorl RW2d, left##2d; \
345	    rorl $1, right##2d; \
346	    \
347	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
348	do_permutation(right##0d, left##0d,  2, 0x33333333); \
349	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
350	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
351	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
352	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
353	    \
354	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
355	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
356	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
357	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
358	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
359	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f);
360
361#define round3(n, from, to, load_next_key, do_movq) \
362	xorq from##0, RW0; \
363	movzbl RW0bl, RT3d; \
364	movzbl RW0bh, RT1d; \
365	shrq $16, RW0; \
366	leaq s8(%rip), RT2; \
367	xorq (RT2, RT3, 8), to##0; \
368	leaq s6(%rip), RT2; \
369	xorq (RT2, RT1, 8), to##0; \
370	movzbl RW0bl, RT3d; \
371	movzbl RW0bh, RT1d; \
372	shrq $16, RW0; \
373	leaq s4(%rip), RT2; \
374	xorq (RT2, RT3, 8), to##0; \
375	leaq s2(%rip), RT2; \
376	xorq (RT2, RT1, 8), to##0; \
377	movzbl RW0bl, RT3d; \
378	movzbl RW0bh, RT1d; \
379	shrl $16, RW0d; \
380	leaq s7(%rip), RT2; \
381	xorq (RT2, RT3, 8), to##0; \
382	leaq s5(%rip), RT2; \
383	xorq (RT2, RT1, 8), to##0; \
384	movzbl RW0bl, RT3d; \
385	movzbl RW0bh, RT1d; \
386	load_next_key(n, RW0); \
387	leaq s3(%rip), RT2; \
388	xorq (RT2, RT3, 8), to##0; \
389	leaq s1(%rip), RT2; \
390	xorq (RT2, RT1, 8), to##0; \
391		xorq from##1, RW1; \
392		movzbl RW1bl, RT3d; \
393		movzbl RW1bh, RT1d; \
394		shrq $16, RW1; \
395		leaq s8(%rip), RT2; \
396		xorq (RT2, RT3, 8), to##1; \
397		leaq s6(%rip), RT2; \
398		xorq (RT2, RT1, 8), to##1; \
399		movzbl RW1bl, RT3d; \
400		movzbl RW1bh, RT1d; \
401		shrq $16, RW1; \
402		leaq s4(%rip), RT2; \
403		xorq (RT2, RT3, 8), to##1; \
404		leaq s2(%rip), RT2; \
405		xorq (RT2, RT1, 8), to##1; \
406		movzbl RW1bl, RT3d; \
407		movzbl RW1bh, RT1d; \
408		shrl $16, RW1d; \
409		leaq s7(%rip), RT2; \
410		xorq (RT2, RT3, 8), to##1; \
411		leaq s5(%rip), RT2; \
412		xorq (RT2, RT1, 8), to##1; \
413		movzbl RW1bl, RT3d; \
414		movzbl RW1bh, RT1d; \
415		do_movq(RW0, RW1); \
416		leaq s3(%rip), RT2; \
417		xorq (RT2, RT3, 8), to##1; \
418		leaq s1(%rip), RT2; \
419		xorq (RT2, RT1, 8), to##1; \
420			xorq from##2, RW2; \
421			movzbl RW2bl, RT3d; \
422			movzbl RW2bh, RT1d; \
423			shrq $16, RW2; \
424			leaq s8(%rip), RT2; \
425			xorq (RT2, RT3, 8), to##2; \
426			leaq s6(%rip), RT2; \
427			xorq (RT2, RT1, 8), to##2; \
428			movzbl RW2bl, RT3d; \
429			movzbl RW2bh, RT1d; \
430			shrq $16, RW2; \
431			leaq s4(%rip), RT2; \
432			xorq (RT2, RT3, 8), to##2; \
433			leaq s2(%rip), RT2; \
434			xorq (RT2, RT1, 8), to##2; \
435			movzbl RW2bl, RT3d; \
436			movzbl RW2bh, RT1d; \
437			shrl $16, RW2d; \
438			leaq s7(%rip), RT2; \
439			xorq (RT2, RT3, 8), to##2; \
440			leaq s5(%rip), RT2; \
441			xorq (RT2, RT1, 8), to##2; \
442			movzbl RW2bl, RT3d; \
443			movzbl RW2bh, RT1d; \
444			do_movq(RW0, RW2); \
445			leaq s3(%rip), RT2; \
446			xorq (RT2, RT3, 8), to##2; \
447			leaq s1(%rip), RT2; \
448			xorq (RT2, RT1, 8), to##2;
449
450#define __movq(src, dst) \
451	movq src, dst;
452
453SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way)
454	/* input:
455	 *	%rdi: ctx, round keys
456	 *	%rsi: dst (3 blocks)
457	 *	%rdx: src (3 blocks)
458	 */
459
460	pushq %rbx;
461	pushq %r12;
462	pushq %r13;
463	pushq %r14;
464	pushq %r15;
465
466	pushq %rsi /* dst */
467
468	/* load input */
469	movl 0 * 4(%rdx), RL0d;
470	movl 1 * 4(%rdx), RR0d;
471	movl 2 * 4(%rdx), RL1d;
472	movl 3 * 4(%rdx), RR1d;
473	movl 4 * 4(%rdx), RL2d;
474	movl 5 * 4(%rdx), RR2d;
475
476	bswapl RL0d;
477	bswapl RR0d;
478	bswapl RL1d;
479	bswapl RR1d;
480	bswapl RL2d;
481	bswapl RR2d;
482
483	initial_permutation3(RL, RR);
484
485	movq 0(CTX), RW0;
486	movq RW0, RW1;
487	movq RW0, RW2;
488
489	round3(0, RR, RL, load_next_key, __movq);
490	round3(1, RL, RR, load_next_key, __movq);
491	round3(2, RR, RL, load_next_key, __movq);
492	round3(3, RL, RR, load_next_key, __movq);
493	round3(4, RR, RL, load_next_key, __movq);
494	round3(5, RL, RR, load_next_key, __movq);
495	round3(6, RR, RL, load_next_key, __movq);
496	round3(7, RL, RR, load_next_key, __movq);
497	round3(8, RR, RL, load_next_key, __movq);
498	round3(9, RL, RR, load_next_key, __movq);
499	round3(10, RR, RL, load_next_key, __movq);
500	round3(11, RL, RR, load_next_key, __movq);
501	round3(12, RR, RL, load_next_key, __movq);
502	round3(13, RL, RR, load_next_key, __movq);
503	round3(14, RR, RL, load_next_key, __movq);
504	round3(15, RL, RR, load_next_key, __movq);
505
506	round3(16+0, RL, RR, load_next_key, __movq);
507	round3(16+1, RR, RL, load_next_key, __movq);
508	round3(16+2, RL, RR, load_next_key, __movq);
509	round3(16+3, RR, RL, load_next_key, __movq);
510	round3(16+4, RL, RR, load_next_key, __movq);
511	round3(16+5, RR, RL, load_next_key, __movq);
512	round3(16+6, RL, RR, load_next_key, __movq);
513	round3(16+7, RR, RL, load_next_key, __movq);
514	round3(16+8, RL, RR, load_next_key, __movq);
515	round3(16+9, RR, RL, load_next_key, __movq);
516	round3(16+10, RL, RR, load_next_key, __movq);
517	round3(16+11, RR, RL, load_next_key, __movq);
518	round3(16+12, RL, RR, load_next_key, __movq);
519	round3(16+13, RR, RL, load_next_key, __movq);
520	round3(16+14, RL, RR, load_next_key, __movq);
521	round3(16+15, RR, RL, load_next_key, __movq);
522
523	round3(32+0, RR, RL, load_next_key, __movq);
524	round3(32+1, RL, RR, load_next_key, __movq);
525	round3(32+2, RR, RL, load_next_key, __movq);
526	round3(32+3, RL, RR, load_next_key, __movq);
527	round3(32+4, RR, RL, load_next_key, __movq);
528	round3(32+5, RL, RR, load_next_key, __movq);
529	round3(32+6, RR, RL, load_next_key, __movq);
530	round3(32+7, RL, RR, load_next_key, __movq);
531	round3(32+8, RR, RL, load_next_key, __movq);
532	round3(32+9, RL, RR, load_next_key, __movq);
533	round3(32+10, RR, RL, load_next_key, __movq);
534	round3(32+11, RL, RR, load_next_key, __movq);
535	round3(32+12, RR, RL, load_next_key, __movq);
536	round3(32+13, RL, RR, load_next_key, __movq);
537	round3(32+14, RR, RL, load_next_key, __movq);
538	round3(32+15, RL, RR, dummy2, dummy2);
539
540	final_permutation3(RR, RL);
541
542	bswapl RR0d;
543	bswapl RL0d;
544	bswapl RR1d;
545	bswapl RL1d;
546	bswapl RR2d;
547	bswapl RL2d;
548
549	popq %rsi /* dst */
550	movl RR0d, 0 * 4(%rsi);
551	movl RL0d, 1 * 4(%rsi);
552	movl RR1d, 2 * 4(%rsi);
553	movl RL1d, 3 * 4(%rsi);
554	movl RR2d, 4 * 4(%rsi);
555	movl RL2d, 5 * 4(%rsi);
556
557	popq %r15;
558	popq %r14;
559	popq %r13;
560	popq %r12;
561	popq %rbx;
562
563	RET;
564SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
565
566.section	.rodata, "a", @progbits
567.align 16
568.L_s1:
569	.quad 0x0010100001010400, 0x0000000000000000
570	.quad 0x0000100000010000, 0x0010100001010404
571	.quad 0x0010100001010004, 0x0000100000010404
572	.quad 0x0000000000000004, 0x0000100000010000
573	.quad 0x0000000000000400, 0x0010100001010400
574	.quad 0x0010100001010404, 0x0000000000000400
575	.quad 0x0010000001000404, 0x0010100001010004
576	.quad 0x0010000001000000, 0x0000000000000004
577	.quad 0x0000000000000404, 0x0010000001000400
578	.quad 0x0010000001000400, 0x0000100000010400
579	.quad 0x0000100000010400, 0x0010100001010000
580	.quad 0x0010100001010000, 0x0010000001000404
581	.quad 0x0000100000010004, 0x0010000001000004
582	.quad 0x0010000001000004, 0x0000100000010004
583	.quad 0x0000000000000000, 0x0000000000000404
584	.quad 0x0000100000010404, 0x0010000001000000
585	.quad 0x0000100000010000, 0x0010100001010404
586	.quad 0x0000000000000004, 0x0010100001010000
587	.quad 0x0010100001010400, 0x0010000001000000
588	.quad 0x0010000001000000, 0x0000000000000400
589	.quad 0x0010100001010004, 0x0000100000010000
590	.quad 0x0000100000010400, 0x0010000001000004
591	.quad 0x0000000000000400, 0x0000000000000004
592	.quad 0x0010000001000404, 0x0000100000010404
593	.quad 0x0010100001010404, 0x0000100000010004
594	.quad 0x0010100001010000, 0x0010000001000404
595	.quad 0x0010000001000004, 0x0000000000000404
596	.quad 0x0000100000010404, 0x0010100001010400
597	.quad 0x0000000000000404, 0x0010000001000400
598	.quad 0x0010000001000400, 0x0000000000000000
599	.quad 0x0000100000010004, 0x0000100000010400
600	.quad 0x0000000000000000, 0x0010100001010004
601.L_s2:
602	.quad 0x0801080200100020, 0x0800080000000000
603	.quad 0x0000080000000000, 0x0001080200100020
604	.quad 0x0001000000100000, 0x0000000200000020
605	.quad 0x0801000200100020, 0x0800080200000020
606	.quad 0x0800000200000020, 0x0801080200100020
607	.quad 0x0801080000100000, 0x0800000000000000
608	.quad 0x0800080000000000, 0x0001000000100000
609	.quad 0x0000000200000020, 0x0801000200100020
610	.quad 0x0001080000100000, 0x0001000200100020
611	.quad 0x0800080200000020, 0x0000000000000000
612	.quad 0x0800000000000000, 0x0000080000000000
613	.quad 0x0001080200100020, 0x0801000000100000
614	.quad 0x0001000200100020, 0x0800000200000020
615	.quad 0x0000000000000000, 0x0001080000100000
616	.quad 0x0000080200000020, 0x0801080000100000
617	.quad 0x0801000000100000, 0x0000080200000020
618	.quad 0x0000000000000000, 0x0001080200100020
619	.quad 0x0801000200100020, 0x0001000000100000
620	.quad 0x0800080200000020, 0x0801000000100000
621	.quad 0x0801080000100000, 0x0000080000000000
622	.quad 0x0801000000100000, 0x0800080000000000
623	.quad 0x0000000200000020, 0x0801080200100020
624	.quad 0x0001080200100020, 0x0000000200000020
625	.quad 0x0000080000000000, 0x0800000000000000
626	.quad 0x0000080200000020, 0x0801080000100000
627	.quad 0x0001000000100000, 0x0800000200000020
628	.quad 0x0001000200100020, 0x0800080200000020
629	.quad 0x0800000200000020, 0x0001000200100020
630	.quad 0x0001080000100000, 0x0000000000000000
631	.quad 0x0800080000000000, 0x0000080200000020
632	.quad 0x0800000000000000, 0x0801000200100020
633	.quad 0x0801080200100020, 0x0001080000100000
634.L_s3:
635	.quad 0x0000002000000208, 0x0000202008020200
636	.quad 0x0000000000000000, 0x0000200008020008
637	.quad 0x0000002008000200, 0x0000000000000000
638	.quad 0x0000202000020208, 0x0000002008000200
639	.quad 0x0000200000020008, 0x0000000008000008
640	.quad 0x0000000008000008, 0x0000200000020000
641	.quad 0x0000202008020208, 0x0000200000020008
642	.quad 0x0000200008020000, 0x0000002000000208
643	.quad 0x0000000008000000, 0x0000000000000008
644	.quad 0x0000202008020200, 0x0000002000000200
645	.quad 0x0000202000020200, 0x0000200008020000
646	.quad 0x0000200008020008, 0x0000202000020208
647	.quad 0x0000002008000208, 0x0000202000020200
648	.quad 0x0000200000020000, 0x0000002008000208
649	.quad 0x0000000000000008, 0x0000202008020208
650	.quad 0x0000002000000200, 0x0000000008000000
651	.quad 0x0000202008020200, 0x0000000008000000
652	.quad 0x0000200000020008, 0x0000002000000208
653	.quad 0x0000200000020000, 0x0000202008020200
654	.quad 0x0000002008000200, 0x0000000000000000
655	.quad 0x0000002000000200, 0x0000200000020008
656	.quad 0x0000202008020208, 0x0000002008000200
657	.quad 0x0000000008000008, 0x0000002000000200
658	.quad 0x0000000000000000, 0x0000200008020008
659	.quad 0x0000002008000208, 0x0000200000020000
660	.quad 0x0000000008000000, 0x0000202008020208
661	.quad 0x0000000000000008, 0x0000202000020208
662	.quad 0x0000202000020200, 0x0000000008000008
663	.quad 0x0000200008020000, 0x0000002008000208
664	.quad 0x0000002000000208, 0x0000200008020000
665	.quad 0x0000202000020208, 0x0000000000000008
666	.quad 0x0000200008020008, 0x0000202000020200
667.L_s4:
668	.quad 0x1008020000002001, 0x1000020800002001
669	.quad 0x1000020800002001, 0x0000000800000000
670	.quad 0x0008020800002000, 0x1008000800000001
671	.quad 0x1008000000000001, 0x1000020000002001
672	.quad 0x0000000000000000, 0x0008020000002000
673	.quad 0x0008020000002000, 0x1008020800002001
674	.quad 0x1000000800000001, 0x0000000000000000
675	.quad 0x0008000800000000, 0x1008000000000001
676	.quad 0x1000000000000001, 0x0000020000002000
677	.quad 0x0008000000000000, 0x1008020000002001
678	.quad 0x0000000800000000, 0x0008000000000000
679	.quad 0x1000020000002001, 0x0000020800002000
680	.quad 0x1008000800000001, 0x1000000000000001
681	.quad 0x0000020800002000, 0x0008000800000000
682	.quad 0x0000020000002000, 0x0008020800002000
683	.quad 0x1008020800002001, 0x1000000800000001
684	.quad 0x0008000800000000, 0x1008000000000001
685	.quad 0x0008020000002000, 0x1008020800002001
686	.quad 0x1000000800000001, 0x0000000000000000
687	.quad 0x0000000000000000, 0x0008020000002000
688	.quad 0x0000020800002000, 0x0008000800000000
689	.quad 0x1008000800000001, 0x1000000000000001
690	.quad 0x1008020000002001, 0x1000020800002001
691	.quad 0x1000020800002001, 0x0000000800000000
692	.quad 0x1008020800002001, 0x1000000800000001
693	.quad 0x1000000000000001, 0x0000020000002000
694	.quad 0x1008000000000001, 0x1000020000002001
695	.quad 0x0008020800002000, 0x1008000800000001
696	.quad 0x1000020000002001, 0x0000020800002000
697	.quad 0x0008000000000000, 0x1008020000002001
698	.quad 0x0000000800000000, 0x0008000000000000
699	.quad 0x0000020000002000, 0x0008020800002000
700.L_s5:
701	.quad 0x0000001000000100, 0x0020001002080100
702	.quad 0x0020000002080000, 0x0420001002000100
703	.quad 0x0000000000080000, 0x0000001000000100
704	.quad 0x0400000000000000, 0x0020000002080000
705	.quad 0x0400001000080100, 0x0000000000080000
706	.quad 0x0020001002000100, 0x0400001000080100
707	.quad 0x0420001002000100, 0x0420000002080000
708	.quad 0x0000001000080100, 0x0400000000000000
709	.quad 0x0020000002000000, 0x0400000000080000
710	.quad 0x0400000000080000, 0x0000000000000000
711	.quad 0x0400001000000100, 0x0420001002080100
712	.quad 0x0420001002080100, 0x0020001002000100
713	.quad 0x0420000002080000, 0x0400001000000100
714	.quad 0x0000000000000000, 0x0420000002000000
715	.quad 0x0020001002080100, 0x0020000002000000
716	.quad 0x0420000002000000, 0x0000001000080100
717	.quad 0x0000000000080000, 0x0420001002000100
718	.quad 0x0000001000000100, 0x0020000002000000
719	.quad 0x0400000000000000, 0x0020000002080000
720	.quad 0x0420001002000100, 0x0400001000080100
721	.quad 0x0020001002000100, 0x0400000000000000
722	.quad 0x0420000002080000, 0x0020001002080100
723	.quad 0x0400001000080100, 0x0000001000000100
724	.quad 0x0020000002000000, 0x0420000002080000
725	.quad 0x0420001002080100, 0x0000001000080100
726	.quad 0x0420000002000000, 0x0420001002080100
727	.quad 0x0020000002080000, 0x0000000000000000
728	.quad 0x0400000000080000, 0x0420000002000000
729	.quad 0x0000001000080100, 0x0020001002000100
730	.quad 0x0400001000000100, 0x0000000000080000
731	.quad 0x0000000000000000, 0x0400000000080000
732	.quad 0x0020001002080100, 0x0400001000000100
733.L_s6:
734	.quad 0x0200000120000010, 0x0204000020000000
735	.quad 0x0000040000000000, 0x0204040120000010
736	.quad 0x0204000020000000, 0x0000000100000010
737	.quad 0x0204040120000010, 0x0004000000000000
738	.quad 0x0200040020000000, 0x0004040100000010
739	.quad 0x0004000000000000, 0x0200000120000010
740	.quad 0x0004000100000010, 0x0200040020000000
741	.quad 0x0200000020000000, 0x0000040100000010
742	.quad 0x0000000000000000, 0x0004000100000010
743	.quad 0x0200040120000010, 0x0000040000000000
744	.quad 0x0004040000000000, 0x0200040120000010
745	.quad 0x0000000100000010, 0x0204000120000010
746	.quad 0x0204000120000010, 0x0000000000000000
747	.quad 0x0004040100000010, 0x0204040020000000
748	.quad 0x0000040100000010, 0x0004040000000000
749	.quad 0x0204040020000000, 0x0200000020000000
750	.quad 0x0200040020000000, 0x0000000100000010
751	.quad 0x0204000120000010, 0x0004040000000000
752	.quad 0x0204040120000010, 0x0004000000000000
753	.quad 0x0000040100000010, 0x0200000120000010
754	.quad 0x0004000000000000, 0x0200040020000000
755	.quad 0x0200000020000000, 0x0000040100000010
756	.quad 0x0200000120000010, 0x0204040120000010
757	.quad 0x0004040000000000, 0x0204000020000000
758	.quad 0x0004040100000010, 0x0204040020000000
759	.quad 0x0000000000000000, 0x0204000120000010
760	.quad 0x0000000100000010, 0x0000040000000000
761	.quad 0x0204000020000000, 0x0004040100000010
762	.quad 0x0000040000000000, 0x0004000100000010
763	.quad 0x0200040120000010, 0x0000000000000000
764	.quad 0x0204040020000000, 0x0200000020000000
765	.quad 0x0004000100000010, 0x0200040120000010
766.L_s7:
767	.quad 0x0002000000200000, 0x2002000004200002
768	.quad 0x2000000004000802, 0x0000000000000000
769	.quad 0x0000000000000800, 0x2000000004000802
770	.quad 0x2002000000200802, 0x0002000004200800
771	.quad 0x2002000004200802, 0x0002000000200000
772	.quad 0x0000000000000000, 0x2000000004000002
773	.quad 0x2000000000000002, 0x0000000004000000
774	.quad 0x2002000004200002, 0x2000000000000802
775	.quad 0x0000000004000800, 0x2002000000200802
776	.quad 0x2002000000200002, 0x0000000004000800
777	.quad 0x2000000004000002, 0x0002000004200000
778	.quad 0x0002000004200800, 0x2002000000200002
779	.quad 0x0002000004200000, 0x0000000000000800
780	.quad 0x2000000000000802, 0x2002000004200802
781	.quad 0x0002000000200800, 0x2000000000000002
782	.quad 0x0000000004000000, 0x0002000000200800
783	.quad 0x0000000004000000, 0x0002000000200800
784	.quad 0x0002000000200000, 0x2000000004000802
785	.quad 0x2000000004000802, 0x2002000004200002
786	.quad 0x2002000004200002, 0x2000000000000002
787	.quad 0x2002000000200002, 0x0000000004000000
788	.quad 0x0000000004000800, 0x0002000000200000
789	.quad 0x0002000004200800, 0x2000000000000802
790	.quad 0x2002000000200802, 0x0002000004200800
791	.quad 0x2000000000000802, 0x2000000004000002
792	.quad 0x2002000004200802, 0x0002000004200000
793	.quad 0x0002000000200800, 0x0000000000000000
794	.quad 0x2000000000000002, 0x2002000004200802
795	.quad 0x0000000000000000, 0x2002000000200802
796	.quad 0x0002000004200000, 0x0000000000000800
797	.quad 0x2000000004000002, 0x0000000004000800
798	.quad 0x0000000000000800, 0x2002000000200002
799.L_s8:
800	.quad 0x0100010410001000, 0x0000010000001000
801	.quad 0x0000000000040000, 0x0100010410041000
802	.quad 0x0100000010000000, 0x0100010410001000
803	.quad 0x0000000400000000, 0x0100000010000000
804	.quad 0x0000000400040000, 0x0100000010040000
805	.quad 0x0100010410041000, 0x0000010000041000
806	.quad 0x0100010010041000, 0x0000010400041000
807	.quad 0x0000010000001000, 0x0000000400000000
808	.quad 0x0100000010040000, 0x0100000410000000
809	.quad 0x0100010010001000, 0x0000010400001000
810	.quad 0x0000010000041000, 0x0000000400040000
811	.quad 0x0100000410040000, 0x0100010010041000
812	.quad 0x0000010400001000, 0x0000000000000000
813	.quad 0x0000000000000000, 0x0100000410040000
814	.quad 0x0100000410000000, 0x0100010010001000
815	.quad 0x0000010400041000, 0x0000000000040000
816	.quad 0x0000010400041000, 0x0000000000040000
817	.quad 0x0100010010041000, 0x0000010000001000
818	.quad 0x0000000400000000, 0x0100000410040000
819	.quad 0x0000010000001000, 0x0000010400041000
820	.quad 0x0100010010001000, 0x0000000400000000
821	.quad 0x0100000410000000, 0x0100000010040000
822	.quad 0x0100000410040000, 0x0100000010000000
823	.quad 0x0000000000040000, 0x0100010410001000
824	.quad 0x0000000000000000, 0x0100010410041000
825	.quad 0x0000000400040000, 0x0100000410000000
826	.quad 0x0100000010040000, 0x0100010010001000
827	.quad 0x0100010410001000, 0x0000000000000000
828	.quad 0x0100010410041000, 0x0000010000041000
829	.quad 0x0000010000041000, 0x0000010400001000
830	.quad 0x0000010400001000, 0x0000000400040000
831	.quad 0x0100000010000000, 0x0100010010041000
v5.14.15
  1/* SPDX-License-Identifier: GPL-2.0-or-later */
  2/*
  3 * des3_ede-asm_64.S  -  x86-64 assembly implementation of 3DES cipher
  4 *
  5 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  6 */
  7
  8#include <linux/linkage.h>
  9
 10.file "des3_ede-asm_64.S"
 11.text
 12
 13#define s1 .L_s1
 14#define s2 ((s1) + (64*8))
 15#define s3 ((s2) + (64*8))
 16#define s4 ((s3) + (64*8))
 17#define s5 ((s4) + (64*8))
 18#define s6 ((s5) + (64*8))
 19#define s7 ((s6) + (64*8))
 20#define s8 ((s7) + (64*8))
 21
 22/* register macros */
 23#define CTX %rdi
 24
 25#define RL0 %r8
 26#define RL1 %r9
 27#define RL2 %r10
 28
 29#define RL0d %r8d
 30#define RL1d %r9d
 31#define RL2d %r10d
 32
 33#define RR0 %r11
 34#define RR1 %r12
 35#define RR2 %r13
 36
 37#define RR0d %r11d
 38#define RR1d %r12d
 39#define RR2d %r13d
 40
 41#define RW0 %rax
 42#define RW1 %rbx
 43#define RW2 %rcx
 44
 45#define RW0d %eax
 46#define RW1d %ebx
 47#define RW2d %ecx
 48
 49#define RW0bl %al
 50#define RW1bl %bl
 51#define RW2bl %cl
 52
 53#define RW0bh %ah
 54#define RW1bh %bh
 55#define RW2bh %ch
 56
 57#define RT0 %r15
 58#define RT1 %rsi
 59#define RT2 %r14
 60#define RT3 %rdx
 61
 62#define RT0d %r15d
 63#define RT1d %esi
 64#define RT2d %r14d
 65#define RT3d %edx
 66
 67/***********************************************************************
 68 * 1-way 3DES
 69 ***********************************************************************/
 70#define do_permutation(a, b, offset, mask) \
 71	movl a, RT0d; \
 72	shrl $(offset), RT0d; \
 73	xorl b, RT0d; \
 74	andl $(mask), RT0d; \
 75	xorl RT0d, b; \
 76	shll $(offset), RT0d; \
 77	xorl RT0d, a;
 78
 79#define expand_to_64bits(val, mask) \
 80	movl val##d, RT0d; \
 81	rorl $4, RT0d; \
 82	shlq $32, RT0; \
 83	orq RT0, val; \
 84	andq mask, val;
 85
 86#define compress_to_64bits(val) \
 87	movq val, RT0; \
 88	shrq $32, RT0; \
 89	roll $4, RT0d; \
 90	orl RT0d, val##d;
 91
 92#define initial_permutation(left, right) \
 93	do_permutation(left##d, right##d,  4, 0x0f0f0f0f); \
 94	do_permutation(left##d, right##d, 16, 0x0000ffff); \
 95	do_permutation(right##d, left##d,  2, 0x33333333); \
 96	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
 97	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
 98	movl left##d, RW0d; \
 99	roll $1, right##d; \
100	xorl right##d, RW0d; \
101	andl $0xaaaaaaaa, RW0d; \
102	xorl RW0d, left##d; \
103	xorl RW0d, right##d; \
104	roll $1, left##d; \
105	expand_to_64bits(right, RT3); \
106	expand_to_64bits(left, RT3);
107
108#define final_permutation(left, right) \
109	compress_to_64bits(right); \
110	compress_to_64bits(left); \
111	movl right##d, RW0d; \
112	rorl $1, left##d; \
113	xorl left##d, RW0d; \
114	andl $0xaaaaaaaa, RW0d; \
115	xorl RW0d, right##d; \
116	xorl RW0d, left##d; \
117	rorl $1, right##d; \
118	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
119	do_permutation(right##d, left##d,  2, 0x33333333); \
120	do_permutation(left##d, right##d, 16, 0x0000ffff); \
121	do_permutation(left##d, right##d,  4, 0x0f0f0f0f);
122
123#define round1(n, from, to, load_next_key) \
124	xorq from, RW0; \
125	\
126	movzbl RW0bl, RT0d; \
127	movzbl RW0bh, RT1d; \
128	shrq $16, RW0; \
129	movzbl RW0bl, RT2d; \
130	movzbl RW0bh, RT3d; \
131	shrq $16, RW0; \
132	movq s8(, RT0, 8), RT0; \
133	xorq s6(, RT1, 8), to; \
 
 
134	movzbl RW0bl, RL1d; \
135	movzbl RW0bh, RT1d; \
136	shrl $16, RW0d; \
137	xorq s4(, RT2, 8), RT0; \
138	xorq s2(, RT3, 8), to; \
 
 
139	movzbl RW0bl, RT2d; \
140	movzbl RW0bh, RT3d; \
141	xorq s7(, RL1, 8), RT0; \
142	xorq s5(, RT1, 8), to; \
143	xorq s3(, RT2, 8), RT0; \
 
 
 
144	load_next_key(n, RW0); \
145	xorq RT0, to; \
146	xorq s1(, RT3, 8), to; \
 
147
148#define load_next_key(n, RWx) \
149	movq (((n) + 1) * 8)(CTX), RWx;
150
151#define dummy2(a, b) /*_*/
152
153#define read_block(io, left, right) \
154	movl    (io), left##d; \
155	movl   4(io), right##d; \
156	bswapl left##d; \
157	bswapl right##d;
158
159#define write_block(io, left, right) \
160	bswapl left##d; \
161	bswapl right##d; \
162	movl   left##d,   (io); \
163	movl   right##d, 4(io);
164
165SYM_FUNC_START(des3_ede_x86_64_crypt_blk)
166	/* input:
167	 *	%rdi: round keys, CTX
168	 *	%rsi: dst
169	 *	%rdx: src
170	 */
171	pushq %rbx;
172	pushq %r12;
173	pushq %r13;
174	pushq %r14;
175	pushq %r15;
176
177	pushq %rsi; /* dst */
178
179	read_block(%rdx, RL0, RR0);
180	initial_permutation(RL0, RR0);
181
182	movq (CTX), RW0;
183
184	round1(0, RR0, RL0, load_next_key);
185	round1(1, RL0, RR0, load_next_key);
186	round1(2, RR0, RL0, load_next_key);
187	round1(3, RL0, RR0, load_next_key);
188	round1(4, RR0, RL0, load_next_key);
189	round1(5, RL0, RR0, load_next_key);
190	round1(6, RR0, RL0, load_next_key);
191	round1(7, RL0, RR0, load_next_key);
192	round1(8, RR0, RL0, load_next_key);
193	round1(9, RL0, RR0, load_next_key);
194	round1(10, RR0, RL0, load_next_key);
195	round1(11, RL0, RR0, load_next_key);
196	round1(12, RR0, RL0, load_next_key);
197	round1(13, RL0, RR0, load_next_key);
198	round1(14, RR0, RL0, load_next_key);
199	round1(15, RL0, RR0, load_next_key);
200
201	round1(16+0, RL0, RR0, load_next_key);
202	round1(16+1, RR0, RL0, load_next_key);
203	round1(16+2, RL0, RR0, load_next_key);
204	round1(16+3, RR0, RL0, load_next_key);
205	round1(16+4, RL0, RR0, load_next_key);
206	round1(16+5, RR0, RL0, load_next_key);
207	round1(16+6, RL0, RR0, load_next_key);
208	round1(16+7, RR0, RL0, load_next_key);
209	round1(16+8, RL0, RR0, load_next_key);
210	round1(16+9, RR0, RL0, load_next_key);
211	round1(16+10, RL0, RR0, load_next_key);
212	round1(16+11, RR0, RL0, load_next_key);
213	round1(16+12, RL0, RR0, load_next_key);
214	round1(16+13, RR0, RL0, load_next_key);
215	round1(16+14, RL0, RR0, load_next_key);
216	round1(16+15, RR0, RL0, load_next_key);
217
218	round1(32+0, RR0, RL0, load_next_key);
219	round1(32+1, RL0, RR0, load_next_key);
220	round1(32+2, RR0, RL0, load_next_key);
221	round1(32+3, RL0, RR0, load_next_key);
222	round1(32+4, RR0, RL0, load_next_key);
223	round1(32+5, RL0, RR0, load_next_key);
224	round1(32+6, RR0, RL0, load_next_key);
225	round1(32+7, RL0, RR0, load_next_key);
226	round1(32+8, RR0, RL0, load_next_key);
227	round1(32+9, RL0, RR0, load_next_key);
228	round1(32+10, RR0, RL0, load_next_key);
229	round1(32+11, RL0, RR0, load_next_key);
230	round1(32+12, RR0, RL0, load_next_key);
231	round1(32+13, RL0, RR0, load_next_key);
232	round1(32+14, RR0, RL0, load_next_key);
233	round1(32+15, RL0, RR0, dummy2);
234
235	final_permutation(RR0, RL0);
236
237	popq %rsi /* dst */
238	write_block(%rsi, RR0, RL0);
239
240	popq %r15;
241	popq %r14;
242	popq %r13;
243	popq %r12;
244	popq %rbx;
245
246	ret;
247SYM_FUNC_END(des3_ede_x86_64_crypt_blk)
248
249/***********************************************************************
250 * 3-way 3DES
251 ***********************************************************************/
252#define expand_to_64bits(val, mask) \
253	movl val##d, RT0d; \
254	rorl $4, RT0d; \
255	shlq $32, RT0; \
256	orq RT0, val; \
257	andq mask, val;
258
259#define compress_to_64bits(val) \
260	movq val, RT0; \
261	shrq $32, RT0; \
262	roll $4, RT0d; \
263	orl RT0d, val##d;
264
265#define initial_permutation3(left, right) \
266	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
267	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
268	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
269	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
270	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f); \
271	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
272	    \
273	do_permutation(right##0d, left##0d,  2, 0x33333333); \
274	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
275	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
276	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
277	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
278	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
279	    \
280	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
281	    \
282	movl left##0d, RW0d; \
283	roll $1, right##0d; \
284	xorl right##0d, RW0d; \
285	andl $0xaaaaaaaa, RW0d; \
286	xorl RW0d, left##0d; \
287	xorl RW0d, right##0d; \
288	roll $1, left##0d; \
289	expand_to_64bits(right##0, RT3); \
290	expand_to_64bits(left##0, RT3); \
291	  movl left##1d, RW1d; \
292	  roll $1, right##1d; \
293	  xorl right##1d, RW1d; \
294	  andl $0xaaaaaaaa, RW1d; \
295	  xorl RW1d, left##1d; \
296	  xorl RW1d, right##1d; \
297	  roll $1, left##1d; \
298	  expand_to_64bits(right##1, RT3); \
299	  expand_to_64bits(left##1, RT3); \
300	    movl left##2d, RW2d; \
301	    roll $1, right##2d; \
302	    xorl right##2d, RW2d; \
303	    andl $0xaaaaaaaa, RW2d; \
304	    xorl RW2d, left##2d; \
305	    xorl RW2d, right##2d; \
306	    roll $1, left##2d; \
307	    expand_to_64bits(right##2, RT3); \
308	    expand_to_64bits(left##2, RT3);
309
310#define final_permutation3(left, right) \
311	compress_to_64bits(right##0); \
312	compress_to_64bits(left##0); \
313	movl right##0d, RW0d; \
314	rorl $1, left##0d; \
315	xorl left##0d, RW0d; \
316	andl $0xaaaaaaaa, RW0d; \
317	xorl RW0d, right##0d; \
318	xorl RW0d, left##0d; \
319	rorl $1, right##0d; \
320	  compress_to_64bits(right##1); \
321	  compress_to_64bits(left##1); \
322	  movl right##1d, RW1d; \
323	  rorl $1, left##1d; \
324	  xorl left##1d, RW1d; \
325	  andl $0xaaaaaaaa, RW1d; \
326	  xorl RW1d, right##1d; \
327	  xorl RW1d, left##1d; \
328	  rorl $1, right##1d; \
329	    compress_to_64bits(right##2); \
330	    compress_to_64bits(left##2); \
331	    movl right##2d, RW2d; \
332	    rorl $1, left##2d; \
333	    xorl left##2d, RW2d; \
334	    andl $0xaaaaaaaa, RW2d; \
335	    xorl RW2d, right##2d; \
336	    xorl RW2d, left##2d; \
337	    rorl $1, right##2d; \
338	    \
339	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
340	do_permutation(right##0d, left##0d,  2, 0x33333333); \
341	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
342	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
343	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
344	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
345	    \
346	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
347	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
348	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
349	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
350	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
351	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f);
352
353#define round3(n, from, to, load_next_key, do_movq) \
354	xorq from##0, RW0; \
355	movzbl RW0bl, RT3d; \
356	movzbl RW0bh, RT1d; \
357	shrq $16, RW0; \
358	xorq s8(, RT3, 8), to##0; \
359	xorq s6(, RT1, 8), to##0; \
 
 
360	movzbl RW0bl, RT3d; \
361	movzbl RW0bh, RT1d; \
362	shrq $16, RW0; \
363	xorq s4(, RT3, 8), to##0; \
364	xorq s2(, RT1, 8), to##0; \
 
 
365	movzbl RW0bl, RT3d; \
366	movzbl RW0bh, RT1d; \
367	shrl $16, RW0d; \
368	xorq s7(, RT3, 8), to##0; \
369	xorq s5(, RT1, 8), to##0; \
 
 
370	movzbl RW0bl, RT3d; \
371	movzbl RW0bh, RT1d; \
372	load_next_key(n, RW0); \
373	xorq s3(, RT3, 8), to##0; \
374	xorq s1(, RT1, 8), to##0; \
 
 
375		xorq from##1, RW1; \
376		movzbl RW1bl, RT3d; \
377		movzbl RW1bh, RT1d; \
378		shrq $16, RW1; \
379		xorq s8(, RT3, 8), to##1; \
380		xorq s6(, RT1, 8), to##1; \
 
 
381		movzbl RW1bl, RT3d; \
382		movzbl RW1bh, RT1d; \
383		shrq $16, RW1; \
384		xorq s4(, RT3, 8), to##1; \
385		xorq s2(, RT1, 8), to##1; \
 
 
386		movzbl RW1bl, RT3d; \
387		movzbl RW1bh, RT1d; \
388		shrl $16, RW1d; \
389		xorq s7(, RT3, 8), to##1; \
390		xorq s5(, RT1, 8), to##1; \
 
 
391		movzbl RW1bl, RT3d; \
392		movzbl RW1bh, RT1d; \
393		do_movq(RW0, RW1); \
394		xorq s3(, RT3, 8), to##1; \
395		xorq s1(, RT1, 8), to##1; \
 
 
396			xorq from##2, RW2; \
397			movzbl RW2bl, RT3d; \
398			movzbl RW2bh, RT1d; \
399			shrq $16, RW2; \
400			xorq s8(, RT3, 8), to##2; \
401			xorq s6(, RT1, 8), to##2; \
 
 
402			movzbl RW2bl, RT3d; \
403			movzbl RW2bh, RT1d; \
404			shrq $16, RW2; \
405			xorq s4(, RT3, 8), to##2; \
406			xorq s2(, RT1, 8), to##2; \
 
 
407			movzbl RW2bl, RT3d; \
408			movzbl RW2bh, RT1d; \
409			shrl $16, RW2d; \
410			xorq s7(, RT3, 8), to##2; \
411			xorq s5(, RT1, 8), to##2; \
 
 
412			movzbl RW2bl, RT3d; \
413			movzbl RW2bh, RT1d; \
414			do_movq(RW0, RW2); \
415			xorq s3(, RT3, 8), to##2; \
416			xorq s1(, RT1, 8), to##2;
 
 
417
418#define __movq(src, dst) \
419	movq src, dst;
420
421SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way)
422	/* input:
423	 *	%rdi: ctx, round keys
424	 *	%rsi: dst (3 blocks)
425	 *	%rdx: src (3 blocks)
426	 */
427
428	pushq %rbx;
429	pushq %r12;
430	pushq %r13;
431	pushq %r14;
432	pushq %r15;
433
434	pushq %rsi /* dst */
435
436	/* load input */
437	movl 0 * 4(%rdx), RL0d;
438	movl 1 * 4(%rdx), RR0d;
439	movl 2 * 4(%rdx), RL1d;
440	movl 3 * 4(%rdx), RR1d;
441	movl 4 * 4(%rdx), RL2d;
442	movl 5 * 4(%rdx), RR2d;
443
444	bswapl RL0d;
445	bswapl RR0d;
446	bswapl RL1d;
447	bswapl RR1d;
448	bswapl RL2d;
449	bswapl RR2d;
450
451	initial_permutation3(RL, RR);
452
453	movq 0(CTX), RW0;
454	movq RW0, RW1;
455	movq RW0, RW2;
456
457	round3(0, RR, RL, load_next_key, __movq);
458	round3(1, RL, RR, load_next_key, __movq);
459	round3(2, RR, RL, load_next_key, __movq);
460	round3(3, RL, RR, load_next_key, __movq);
461	round3(4, RR, RL, load_next_key, __movq);
462	round3(5, RL, RR, load_next_key, __movq);
463	round3(6, RR, RL, load_next_key, __movq);
464	round3(7, RL, RR, load_next_key, __movq);
465	round3(8, RR, RL, load_next_key, __movq);
466	round3(9, RL, RR, load_next_key, __movq);
467	round3(10, RR, RL, load_next_key, __movq);
468	round3(11, RL, RR, load_next_key, __movq);
469	round3(12, RR, RL, load_next_key, __movq);
470	round3(13, RL, RR, load_next_key, __movq);
471	round3(14, RR, RL, load_next_key, __movq);
472	round3(15, RL, RR, load_next_key, __movq);
473
474	round3(16+0, RL, RR, load_next_key, __movq);
475	round3(16+1, RR, RL, load_next_key, __movq);
476	round3(16+2, RL, RR, load_next_key, __movq);
477	round3(16+3, RR, RL, load_next_key, __movq);
478	round3(16+4, RL, RR, load_next_key, __movq);
479	round3(16+5, RR, RL, load_next_key, __movq);
480	round3(16+6, RL, RR, load_next_key, __movq);
481	round3(16+7, RR, RL, load_next_key, __movq);
482	round3(16+8, RL, RR, load_next_key, __movq);
483	round3(16+9, RR, RL, load_next_key, __movq);
484	round3(16+10, RL, RR, load_next_key, __movq);
485	round3(16+11, RR, RL, load_next_key, __movq);
486	round3(16+12, RL, RR, load_next_key, __movq);
487	round3(16+13, RR, RL, load_next_key, __movq);
488	round3(16+14, RL, RR, load_next_key, __movq);
489	round3(16+15, RR, RL, load_next_key, __movq);
490
491	round3(32+0, RR, RL, load_next_key, __movq);
492	round3(32+1, RL, RR, load_next_key, __movq);
493	round3(32+2, RR, RL, load_next_key, __movq);
494	round3(32+3, RL, RR, load_next_key, __movq);
495	round3(32+4, RR, RL, load_next_key, __movq);
496	round3(32+5, RL, RR, load_next_key, __movq);
497	round3(32+6, RR, RL, load_next_key, __movq);
498	round3(32+7, RL, RR, load_next_key, __movq);
499	round3(32+8, RR, RL, load_next_key, __movq);
500	round3(32+9, RL, RR, load_next_key, __movq);
501	round3(32+10, RR, RL, load_next_key, __movq);
502	round3(32+11, RL, RR, load_next_key, __movq);
503	round3(32+12, RR, RL, load_next_key, __movq);
504	round3(32+13, RL, RR, load_next_key, __movq);
505	round3(32+14, RR, RL, load_next_key, __movq);
506	round3(32+15, RL, RR, dummy2, dummy2);
507
508	final_permutation3(RR, RL);
509
510	bswapl RR0d;
511	bswapl RL0d;
512	bswapl RR1d;
513	bswapl RL1d;
514	bswapl RR2d;
515	bswapl RL2d;
516
517	popq %rsi /* dst */
518	movl RR0d, 0 * 4(%rsi);
519	movl RL0d, 1 * 4(%rsi);
520	movl RR1d, 2 * 4(%rsi);
521	movl RL1d, 3 * 4(%rsi);
522	movl RR2d, 4 * 4(%rsi);
523	movl RL2d, 5 * 4(%rsi);
524
525	popq %r15;
526	popq %r14;
527	popq %r13;
528	popq %r12;
529	popq %rbx;
530
531	ret;
532SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
533
534.section	.rodata, "a", @progbits
535.align 16
536.L_s1:
537	.quad 0x0010100001010400, 0x0000000000000000
538	.quad 0x0000100000010000, 0x0010100001010404
539	.quad 0x0010100001010004, 0x0000100000010404
540	.quad 0x0000000000000004, 0x0000100000010000
541	.quad 0x0000000000000400, 0x0010100001010400
542	.quad 0x0010100001010404, 0x0000000000000400
543	.quad 0x0010000001000404, 0x0010100001010004
544	.quad 0x0010000001000000, 0x0000000000000004
545	.quad 0x0000000000000404, 0x0010000001000400
546	.quad 0x0010000001000400, 0x0000100000010400
547	.quad 0x0000100000010400, 0x0010100001010000
548	.quad 0x0010100001010000, 0x0010000001000404
549	.quad 0x0000100000010004, 0x0010000001000004
550	.quad 0x0010000001000004, 0x0000100000010004
551	.quad 0x0000000000000000, 0x0000000000000404
552	.quad 0x0000100000010404, 0x0010000001000000
553	.quad 0x0000100000010000, 0x0010100001010404
554	.quad 0x0000000000000004, 0x0010100001010000
555	.quad 0x0010100001010400, 0x0010000001000000
556	.quad 0x0010000001000000, 0x0000000000000400
557	.quad 0x0010100001010004, 0x0000100000010000
558	.quad 0x0000100000010400, 0x0010000001000004
559	.quad 0x0000000000000400, 0x0000000000000004
560	.quad 0x0010000001000404, 0x0000100000010404
561	.quad 0x0010100001010404, 0x0000100000010004
562	.quad 0x0010100001010000, 0x0010000001000404
563	.quad 0x0010000001000004, 0x0000000000000404
564	.quad 0x0000100000010404, 0x0010100001010400
565	.quad 0x0000000000000404, 0x0010000001000400
566	.quad 0x0010000001000400, 0x0000000000000000
567	.quad 0x0000100000010004, 0x0000100000010400
568	.quad 0x0000000000000000, 0x0010100001010004
569.L_s2:
570	.quad 0x0801080200100020, 0x0800080000000000
571	.quad 0x0000080000000000, 0x0001080200100020
572	.quad 0x0001000000100000, 0x0000000200000020
573	.quad 0x0801000200100020, 0x0800080200000020
574	.quad 0x0800000200000020, 0x0801080200100020
575	.quad 0x0801080000100000, 0x0800000000000000
576	.quad 0x0800080000000000, 0x0001000000100000
577	.quad 0x0000000200000020, 0x0801000200100020
578	.quad 0x0001080000100000, 0x0001000200100020
579	.quad 0x0800080200000020, 0x0000000000000000
580	.quad 0x0800000000000000, 0x0000080000000000
581	.quad 0x0001080200100020, 0x0801000000100000
582	.quad 0x0001000200100020, 0x0800000200000020
583	.quad 0x0000000000000000, 0x0001080000100000
584	.quad 0x0000080200000020, 0x0801080000100000
585	.quad 0x0801000000100000, 0x0000080200000020
586	.quad 0x0000000000000000, 0x0001080200100020
587	.quad 0x0801000200100020, 0x0001000000100000
588	.quad 0x0800080200000020, 0x0801000000100000
589	.quad 0x0801080000100000, 0x0000080000000000
590	.quad 0x0801000000100000, 0x0800080000000000
591	.quad 0x0000000200000020, 0x0801080200100020
592	.quad 0x0001080200100020, 0x0000000200000020
593	.quad 0x0000080000000000, 0x0800000000000000
594	.quad 0x0000080200000020, 0x0801080000100000
595	.quad 0x0001000000100000, 0x0800000200000020
596	.quad 0x0001000200100020, 0x0800080200000020
597	.quad 0x0800000200000020, 0x0001000200100020
598	.quad 0x0001080000100000, 0x0000000000000000
599	.quad 0x0800080000000000, 0x0000080200000020
600	.quad 0x0800000000000000, 0x0801000200100020
601	.quad 0x0801080200100020, 0x0001080000100000
602.L_s3:
603	.quad 0x0000002000000208, 0x0000202008020200
604	.quad 0x0000000000000000, 0x0000200008020008
605	.quad 0x0000002008000200, 0x0000000000000000
606	.quad 0x0000202000020208, 0x0000002008000200
607	.quad 0x0000200000020008, 0x0000000008000008
608	.quad 0x0000000008000008, 0x0000200000020000
609	.quad 0x0000202008020208, 0x0000200000020008
610	.quad 0x0000200008020000, 0x0000002000000208
611	.quad 0x0000000008000000, 0x0000000000000008
612	.quad 0x0000202008020200, 0x0000002000000200
613	.quad 0x0000202000020200, 0x0000200008020000
614	.quad 0x0000200008020008, 0x0000202000020208
615	.quad 0x0000002008000208, 0x0000202000020200
616	.quad 0x0000200000020000, 0x0000002008000208
617	.quad 0x0000000000000008, 0x0000202008020208
618	.quad 0x0000002000000200, 0x0000000008000000
619	.quad 0x0000202008020200, 0x0000000008000000
620	.quad 0x0000200000020008, 0x0000002000000208
621	.quad 0x0000200000020000, 0x0000202008020200
622	.quad 0x0000002008000200, 0x0000000000000000
623	.quad 0x0000002000000200, 0x0000200000020008
624	.quad 0x0000202008020208, 0x0000002008000200
625	.quad 0x0000000008000008, 0x0000002000000200
626	.quad 0x0000000000000000, 0x0000200008020008
627	.quad 0x0000002008000208, 0x0000200000020000
628	.quad 0x0000000008000000, 0x0000202008020208
629	.quad 0x0000000000000008, 0x0000202000020208
630	.quad 0x0000202000020200, 0x0000000008000008
631	.quad 0x0000200008020000, 0x0000002008000208
632	.quad 0x0000002000000208, 0x0000200008020000
633	.quad 0x0000202000020208, 0x0000000000000008
634	.quad 0x0000200008020008, 0x0000202000020200
635.L_s4:
636	.quad 0x1008020000002001, 0x1000020800002001
637	.quad 0x1000020800002001, 0x0000000800000000
638	.quad 0x0008020800002000, 0x1008000800000001
639	.quad 0x1008000000000001, 0x1000020000002001
640	.quad 0x0000000000000000, 0x0008020000002000
641	.quad 0x0008020000002000, 0x1008020800002001
642	.quad 0x1000000800000001, 0x0000000000000000
643	.quad 0x0008000800000000, 0x1008000000000001
644	.quad 0x1000000000000001, 0x0000020000002000
645	.quad 0x0008000000000000, 0x1008020000002001
646	.quad 0x0000000800000000, 0x0008000000000000
647	.quad 0x1000020000002001, 0x0000020800002000
648	.quad 0x1008000800000001, 0x1000000000000001
649	.quad 0x0000020800002000, 0x0008000800000000
650	.quad 0x0000020000002000, 0x0008020800002000
651	.quad 0x1008020800002001, 0x1000000800000001
652	.quad 0x0008000800000000, 0x1008000000000001
653	.quad 0x0008020000002000, 0x1008020800002001
654	.quad 0x1000000800000001, 0x0000000000000000
655	.quad 0x0000000000000000, 0x0008020000002000
656	.quad 0x0000020800002000, 0x0008000800000000
657	.quad 0x1008000800000001, 0x1000000000000001
658	.quad 0x1008020000002001, 0x1000020800002001
659	.quad 0x1000020800002001, 0x0000000800000000
660	.quad 0x1008020800002001, 0x1000000800000001
661	.quad 0x1000000000000001, 0x0000020000002000
662	.quad 0x1008000000000001, 0x1000020000002001
663	.quad 0x0008020800002000, 0x1008000800000001
664	.quad 0x1000020000002001, 0x0000020800002000
665	.quad 0x0008000000000000, 0x1008020000002001
666	.quad 0x0000000800000000, 0x0008000000000000
667	.quad 0x0000020000002000, 0x0008020800002000
668.L_s5:
669	.quad 0x0000001000000100, 0x0020001002080100
670	.quad 0x0020000002080000, 0x0420001002000100
671	.quad 0x0000000000080000, 0x0000001000000100
672	.quad 0x0400000000000000, 0x0020000002080000
673	.quad 0x0400001000080100, 0x0000000000080000
674	.quad 0x0020001002000100, 0x0400001000080100
675	.quad 0x0420001002000100, 0x0420000002080000
676	.quad 0x0000001000080100, 0x0400000000000000
677	.quad 0x0020000002000000, 0x0400000000080000
678	.quad 0x0400000000080000, 0x0000000000000000
679	.quad 0x0400001000000100, 0x0420001002080100
680	.quad 0x0420001002080100, 0x0020001002000100
681	.quad 0x0420000002080000, 0x0400001000000100
682	.quad 0x0000000000000000, 0x0420000002000000
683	.quad 0x0020001002080100, 0x0020000002000000
684	.quad 0x0420000002000000, 0x0000001000080100
685	.quad 0x0000000000080000, 0x0420001002000100
686	.quad 0x0000001000000100, 0x0020000002000000
687	.quad 0x0400000000000000, 0x0020000002080000
688	.quad 0x0420001002000100, 0x0400001000080100
689	.quad 0x0020001002000100, 0x0400000000000000
690	.quad 0x0420000002080000, 0x0020001002080100
691	.quad 0x0400001000080100, 0x0000001000000100
692	.quad 0x0020000002000000, 0x0420000002080000
693	.quad 0x0420001002080100, 0x0000001000080100
694	.quad 0x0420000002000000, 0x0420001002080100
695	.quad 0x0020000002080000, 0x0000000000000000
696	.quad 0x0400000000080000, 0x0420000002000000
697	.quad 0x0000001000080100, 0x0020001002000100
698	.quad 0x0400001000000100, 0x0000000000080000
699	.quad 0x0000000000000000, 0x0400000000080000
700	.quad 0x0020001002080100, 0x0400001000000100
701.L_s6:
702	.quad 0x0200000120000010, 0x0204000020000000
703	.quad 0x0000040000000000, 0x0204040120000010
704	.quad 0x0204000020000000, 0x0000000100000010
705	.quad 0x0204040120000010, 0x0004000000000000
706	.quad 0x0200040020000000, 0x0004040100000010
707	.quad 0x0004000000000000, 0x0200000120000010
708	.quad 0x0004000100000010, 0x0200040020000000
709	.quad 0x0200000020000000, 0x0000040100000010
710	.quad 0x0000000000000000, 0x0004000100000010
711	.quad 0x0200040120000010, 0x0000040000000000
712	.quad 0x0004040000000000, 0x0200040120000010
713	.quad 0x0000000100000010, 0x0204000120000010
714	.quad 0x0204000120000010, 0x0000000000000000
715	.quad 0x0004040100000010, 0x0204040020000000
716	.quad 0x0000040100000010, 0x0004040000000000
717	.quad 0x0204040020000000, 0x0200000020000000
718	.quad 0x0200040020000000, 0x0000000100000010
719	.quad 0x0204000120000010, 0x0004040000000000
720	.quad 0x0204040120000010, 0x0004000000000000
721	.quad 0x0000040100000010, 0x0200000120000010
722	.quad 0x0004000000000000, 0x0200040020000000
723	.quad 0x0200000020000000, 0x0000040100000010
724	.quad 0x0200000120000010, 0x0204040120000010
725	.quad 0x0004040000000000, 0x0204000020000000
726	.quad 0x0004040100000010, 0x0204040020000000
727	.quad 0x0000000000000000, 0x0204000120000010
728	.quad 0x0000000100000010, 0x0000040000000000
729	.quad 0x0204000020000000, 0x0004040100000010
730	.quad 0x0000040000000000, 0x0004000100000010
731	.quad 0x0200040120000010, 0x0000000000000000
732	.quad 0x0204040020000000, 0x0200000020000000
733	.quad 0x0004000100000010, 0x0200040120000010
734.L_s7:
735	.quad 0x0002000000200000, 0x2002000004200002
736	.quad 0x2000000004000802, 0x0000000000000000
737	.quad 0x0000000000000800, 0x2000000004000802
738	.quad 0x2002000000200802, 0x0002000004200800
739	.quad 0x2002000004200802, 0x0002000000200000
740	.quad 0x0000000000000000, 0x2000000004000002
741	.quad 0x2000000000000002, 0x0000000004000000
742	.quad 0x2002000004200002, 0x2000000000000802
743	.quad 0x0000000004000800, 0x2002000000200802
744	.quad 0x2002000000200002, 0x0000000004000800
745	.quad 0x2000000004000002, 0x0002000004200000
746	.quad 0x0002000004200800, 0x2002000000200002
747	.quad 0x0002000004200000, 0x0000000000000800
748	.quad 0x2000000000000802, 0x2002000004200802
749	.quad 0x0002000000200800, 0x2000000000000002
750	.quad 0x0000000004000000, 0x0002000000200800
751	.quad 0x0000000004000000, 0x0002000000200800
752	.quad 0x0002000000200000, 0x2000000004000802
753	.quad 0x2000000004000802, 0x2002000004200002
754	.quad 0x2002000004200002, 0x2000000000000002
755	.quad 0x2002000000200002, 0x0000000004000000
756	.quad 0x0000000004000800, 0x0002000000200000
757	.quad 0x0002000004200800, 0x2000000000000802
758	.quad 0x2002000000200802, 0x0002000004200800
759	.quad 0x2000000000000802, 0x2000000004000002
760	.quad 0x2002000004200802, 0x0002000004200000
761	.quad 0x0002000000200800, 0x0000000000000000
762	.quad 0x2000000000000002, 0x2002000004200802
763	.quad 0x0000000000000000, 0x2002000000200802
764	.quad 0x0002000004200000, 0x0000000000000800
765	.quad 0x2000000004000002, 0x0000000004000800
766	.quad 0x0000000000000800, 0x2002000000200002
767.L_s8:
768	.quad 0x0100010410001000, 0x0000010000001000
769	.quad 0x0000000000040000, 0x0100010410041000
770	.quad 0x0100000010000000, 0x0100010410001000
771	.quad 0x0000000400000000, 0x0100000010000000
772	.quad 0x0000000400040000, 0x0100000010040000
773	.quad 0x0100010410041000, 0x0000010000041000
774	.quad 0x0100010010041000, 0x0000010400041000
775	.quad 0x0000010000001000, 0x0000000400000000
776	.quad 0x0100000010040000, 0x0100000410000000
777	.quad 0x0100010010001000, 0x0000010400001000
778	.quad 0x0000010000041000, 0x0000000400040000
779	.quad 0x0100000410040000, 0x0100010010041000
780	.quad 0x0000010400001000, 0x0000000000000000
781	.quad 0x0000000000000000, 0x0100000410040000
782	.quad 0x0100000410000000, 0x0100010010001000
783	.quad 0x0000010400041000, 0x0000000000040000
784	.quad 0x0000010400041000, 0x0000000000040000
785	.quad 0x0100010010041000, 0x0000010000001000
786	.quad 0x0000000400000000, 0x0100000410040000
787	.quad 0x0000010000001000, 0x0000010400041000
788	.quad 0x0100010010001000, 0x0000000400000000
789	.quad 0x0100000410000000, 0x0100000010040000
790	.quad 0x0100000410040000, 0x0100000010000000
791	.quad 0x0000000000040000, 0x0100010410001000
792	.quad 0x0000000000000000, 0x0100010410041000
793	.quad 0x0000000400040000, 0x0100000410000000
794	.quad 0x0100000010040000, 0x0100010010001000
795	.quad 0x0100010410001000, 0x0000000000000000
796	.quad 0x0100010410041000, 0x0000010000041000
797	.quad 0x0000010000041000, 0x0000010400001000
798	.quad 0x0000010400001000, 0x0000000400040000
799	.quad 0x0100000010000000, 0x0100010010041000