Linux Audio

Check our new training course

Loading...
v6.13.7
  1/* SPDX-License-Identifier: GPL-2.0-only */
  2/*
  3 * AES-NI + SSE4.1 implementation of AEGIS-128
  4 *
  5 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
  6 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
  7 * Copyright 2024 Google LLC
  8 */
  9
 10#include <linux/linkage.h>
 
 
 11
 12#define STATE0	%xmm0
 13#define STATE1	%xmm1
 14#define STATE2	%xmm2
 15#define STATE3	%xmm3
 16#define STATE4	%xmm4
 17#define KEY	%xmm5
 18#define MSG	%xmm5
 19#define T0	%xmm6
 20#define T1	%xmm7
 21
 
 
 
 
 
 22.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
 23.align 16
 24.Laegis128_const_0:
 25	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
 26	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
 27.Laegis128_const_1:
 28	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
 29	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
 30
 31.section .rodata.cst32.zeropad_mask, "aM", @progbits, 32
 32.align 32
 33.Lzeropad_mask:
 34	.octa 0xffffffffffffffffffffffffffffffff
 35	.octa 0
 36
 37.text
 38
 39/*
 40 * aegis128_update
 41 * input:
 42 *   STATE[0-4] - input state
 43 * output:
 44 *   STATE[0-4] - output state (shifted positions)
 45 * changed:
 46 *   T0
 47 */
 48.macro aegis128_update
 49	movdqa STATE4, T0
 50	aesenc STATE0, STATE4
 51	aesenc STATE1, STATE0
 52	aesenc STATE2, STATE1
 53	aesenc STATE3, STATE2
 54	aesenc T0,     STATE3
 55.endm
 56
 57/*
 58 * Load 1 <= LEN (%ecx) <= 15 bytes from the pointer SRC into the xmm register
 59 * MSG and zeroize any remaining bytes.  Clobbers %rax, %rcx, and %r8.
 
 
 
 
 
 
 
 
 60 */
 61.macro load_partial
 62	sub $8, %ecx			/* LEN - 8 */
 63	jle .Lle8\@
 64
 65	/* Load 9 <= LEN <= 15 bytes: */
 66	movq (SRC), MSG			/* Load first 8 bytes */
 67	mov (SRC, %rcx), %rax		/* Load last 8 bytes */
 68	neg %ecx
 69	shl $3, %ecx
 70	shr %cl, %rax			/* Discard overlapping bytes */
 71	pinsrq $1, %rax, MSG
 72	jmp .Ldone\@
 73
 74.Lle8\@:
 75	add $4, %ecx			/* LEN - 4 */
 76	jl .Llt4\@
 77
 78	/* Load 4 <= LEN <= 8 bytes: */
 79	mov (SRC), %eax			/* Load first 4 bytes */
 80	mov (SRC, %rcx), %r8d		/* Load last 4 bytes */
 81	jmp .Lcombine\@
 82
 83.Llt4\@:
 84	/* Load 1 <= LEN <= 3 bytes: */
 85	add $2, %ecx			/* LEN - 2 */
 86	movzbl (SRC), %eax		/* Load first byte */
 87	jl .Lmovq\@
 88	movzwl (SRC, %rcx), %r8d	/* Load last 2 bytes */
 89.Lcombine\@:
 90	shl $3, %ecx
 91	shl %cl, %r8
 92	or %r8, %rax			/* Combine the two parts */
 93.Lmovq\@:
 94	movq %rax, MSG
 95.Ldone\@:
 96.endm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 97
 98/*
 99 * Store 1 <= LEN (%ecx) <= 15 bytes from the xmm register \msg to the pointer
100 * DST.  Clobbers %rax, %rcx, and %r8.
 
 
 
 
 
 
 
 
101 */
102.macro store_partial msg
103	sub $8, %ecx			/* LEN - 8 */
104	jl .Llt8\@
105
106	/* Store 8 <= LEN <= 15 bytes: */
107	pextrq $1, \msg, %rax
108	mov %ecx, %r8d
109	shl $3, %ecx
110	ror %cl, %rax
111	mov %rax, (DST, %r8)		/* Store last LEN - 8 bytes */
112	movq \msg, (DST)		/* Store first 8 bytes */
113	jmp .Ldone\@
114
115.Llt8\@:
116	add $4, %ecx			/* LEN - 4 */
117	jl .Llt4\@
118
119	/* Store 4 <= LEN <= 7 bytes: */
120	pextrd $1, \msg, %eax
121	mov %ecx, %r8d
122	shl $3, %ecx
123	ror %cl, %eax
124	mov %eax, (DST, %r8)		/* Store last LEN - 4 bytes */
125	movd \msg, (DST)		/* Store first 4 bytes */
126	jmp .Ldone\@
127
128.Llt4\@:
129	/* Store 1 <= LEN <= 3 bytes: */
130	pextrb $0, \msg, 0(DST)
131	cmp $-2, %ecx			/* LEN - 4 == -2, i.e. LEN == 2? */
132	jl .Ldone\@
133	pextrb $1, \msg, 1(DST)
134	je .Ldone\@
135	pextrb $2, \msg, 2(DST)
136.Ldone\@:
137.endm
 
 
 
 
 
 
 
 
 
138
139/*
140 * void aegis128_aesni_init(struct aegis_state *state,
141 *			    const struct aegis_block *key,
142 *			    const u8 iv[AEGIS128_NONCE_SIZE]);
143 */
144SYM_FUNC_START(aegis128_aesni_init)
145	.set STATEP, %rdi
146	.set KEYP, %rsi
147	.set IVP, %rdx
148
149	/* load IV: */
150	movdqu (IVP), T1
151
152	/* load key: */
153	movdqa (KEYP), KEY
154	pxor KEY, T1
155	movdqa T1, STATE0
156	movdqa KEY, STATE3
157	movdqa KEY, STATE4
158
159	/* load the constants: */
160	movdqa .Laegis128_const_0(%rip), STATE2
161	movdqa .Laegis128_const_1(%rip), STATE1
162	pxor STATE2, STATE3
163	pxor STATE1, STATE4
164
165	/* update 10 times with KEY / KEY xor IV: */
166	aegis128_update; pxor KEY, STATE4
167	aegis128_update; pxor T1,  STATE3
168	aegis128_update; pxor KEY, STATE2
169	aegis128_update; pxor T1,  STATE1
170	aegis128_update; pxor KEY, STATE0
171	aegis128_update; pxor T1,  STATE4
172	aegis128_update; pxor KEY, STATE3
173	aegis128_update; pxor T1,  STATE2
174	aegis128_update; pxor KEY, STATE1
175	aegis128_update; pxor T1,  STATE0
176
177	/* store the state: */
178	movdqu STATE0, 0x00(STATEP)
179	movdqu STATE1, 0x10(STATEP)
180	movdqu STATE2, 0x20(STATEP)
181	movdqu STATE3, 0x30(STATEP)
182	movdqu STATE4, 0x40(STATEP)
 
 
183	RET
184SYM_FUNC_END(aegis128_aesni_init)
185
186/*
187 * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data,
188 *			  unsigned int len);
189 *
190 * len must be a multiple of 16.
191 */
192SYM_FUNC_START(aegis128_aesni_ad)
193	.set STATEP, %rdi
194	.set SRC, %rsi
195	.set LEN, %edx
196
197	test LEN, LEN
198	jz .Lad_out
199
200	/* load the state: */
201	movdqu 0x00(STATEP), STATE0
202	movdqu 0x10(STATEP), STATE1
203	movdqu 0x20(STATEP), STATE2
204	movdqu 0x30(STATEP), STATE3
205	movdqu 0x40(STATEP), STATE4
206
 
 
 
 
207.align 8
208.Lad_loop:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209	movdqu 0x00(SRC), MSG
210	aegis128_update
211	pxor MSG, STATE4
212	sub $0x10, LEN
213	jz .Lad_out_1
 
214
215	movdqu 0x10(SRC), MSG
216	aegis128_update
217	pxor MSG, STATE3
218	sub $0x10, LEN
219	jz .Lad_out_2
 
220
221	movdqu 0x20(SRC), MSG
222	aegis128_update
223	pxor MSG, STATE2
224	sub $0x10, LEN
225	jz .Lad_out_3
 
226
227	movdqu 0x30(SRC), MSG
228	aegis128_update
229	pxor MSG, STATE1
230	sub $0x10, LEN
231	jz .Lad_out_4
 
232
233	movdqu 0x40(SRC), MSG
234	aegis128_update
235	pxor MSG, STATE0
236	sub $0x10, LEN
237	jz .Lad_out_0
 
238
239	add $0x50, SRC
240	jmp .Lad_loop
241
242	/* store the state: */
243.Lad_out_0:
244	movdqu STATE0, 0x00(STATEP)
245	movdqu STATE1, 0x10(STATEP)
246	movdqu STATE2, 0x20(STATEP)
247	movdqu STATE3, 0x30(STATEP)
248	movdqu STATE4, 0x40(STATEP)
 
249	RET
250
251.Lad_out_1:
252	movdqu STATE4, 0x00(STATEP)
253	movdqu STATE0, 0x10(STATEP)
254	movdqu STATE1, 0x20(STATEP)
255	movdqu STATE2, 0x30(STATEP)
256	movdqu STATE3, 0x40(STATEP)
 
257	RET
258
259.Lad_out_2:
260	movdqu STATE3, 0x00(STATEP)
261	movdqu STATE4, 0x10(STATEP)
262	movdqu STATE0, 0x20(STATEP)
263	movdqu STATE1, 0x30(STATEP)
264	movdqu STATE2, 0x40(STATEP)
 
265	RET
266
267.Lad_out_3:
268	movdqu STATE2, 0x00(STATEP)
269	movdqu STATE3, 0x10(STATEP)
270	movdqu STATE4, 0x20(STATEP)
271	movdqu STATE0, 0x30(STATEP)
272	movdqu STATE1, 0x40(STATEP)
 
273	RET
274
275.Lad_out_4:
276	movdqu STATE1, 0x00(STATEP)
277	movdqu STATE2, 0x10(STATEP)
278	movdqu STATE3, 0x20(STATEP)
279	movdqu STATE4, 0x30(STATEP)
280	movdqu STATE0, 0x40(STATEP)
 
 
 
281.Lad_out:
 
282	RET
283SYM_FUNC_END(aegis128_aesni_ad)
284
285.macro encrypt_block s0 s1 s2 s3 s4 i
286	movdqu (\i * 0x10)(SRC), MSG
287	movdqa MSG, T0
288	pxor \s1, T0
289	pxor \s4, T0
290	movdqa \s2, T1
291	pand \s3, T1
292	pxor T1, T0
293	movdqu T0, (\i * 0x10)(DST)
294
295	aegis128_update
296	pxor MSG, \s4
297
298	sub $0x10, LEN
299	jz .Lenc_out_\i
 
300.endm
301
302/*
303 * void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, u8 *dst,
304 *			   unsigned int len);
305 *
306 * len must be nonzero and a multiple of 16.
307 */
308SYM_FUNC_START(aegis128_aesni_enc)
309	.set STATEP, %rdi
310	.set SRC, %rsi
311	.set DST, %rdx
312	.set LEN, %ecx
313
314	/* load the state: */
315	movdqu 0x00(STATEP), STATE0
316	movdqu 0x10(STATEP), STATE1
317	movdqu 0x20(STATEP), STATE2
318	movdqu 0x30(STATEP), STATE3
319	movdqu 0x40(STATEP), STATE4
320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321.align 8
322.Lenc_loop:
323	encrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
324	encrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
325	encrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
326	encrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
327	encrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
328
329	add $0x50, SRC
330	add $0x50, DST
331	jmp .Lenc_loop
332
333	/* store the state: */
334.Lenc_out_0:
335	movdqu STATE4, 0x00(STATEP)
336	movdqu STATE0, 0x10(STATEP)
337	movdqu STATE1, 0x20(STATEP)
338	movdqu STATE2, 0x30(STATEP)
339	movdqu STATE3, 0x40(STATEP)
 
340	RET
341
342.Lenc_out_1:
343	movdqu STATE3, 0x00(STATEP)
344	movdqu STATE4, 0x10(STATEP)
345	movdqu STATE0, 0x20(STATEP)
346	movdqu STATE1, 0x30(STATEP)
347	movdqu STATE2, 0x40(STATEP)
 
348	RET
349
350.Lenc_out_2:
351	movdqu STATE2, 0x00(STATEP)
352	movdqu STATE3, 0x10(STATEP)
353	movdqu STATE4, 0x20(STATEP)
354	movdqu STATE0, 0x30(STATEP)
355	movdqu STATE1, 0x40(STATEP)
 
356	RET
357
358.Lenc_out_3:
359	movdqu STATE1, 0x00(STATEP)
360	movdqu STATE2, 0x10(STATEP)
361	movdqu STATE3, 0x20(STATEP)
362	movdqu STATE4, 0x30(STATEP)
363	movdqu STATE0, 0x40(STATEP)
 
364	RET
365
366.Lenc_out_4:
367	movdqu STATE0, 0x00(STATEP)
368	movdqu STATE1, 0x10(STATEP)
369	movdqu STATE2, 0x20(STATEP)
370	movdqu STATE3, 0x30(STATEP)
371	movdqu STATE4, 0x40(STATEP)
 
 
 
372.Lenc_out:
 
373	RET
374SYM_FUNC_END(aegis128_aesni_enc)
375
376/*
377 * void aegis128_aesni_enc_tail(struct aegis_state *state, const u8 *src,
378 *				u8 *dst, unsigned int len);
379 */
380SYM_FUNC_START(aegis128_aesni_enc_tail)
381	.set STATEP, %rdi
382	.set SRC, %rsi
383	.set DST, %rdx
384	.set LEN, %ecx	/* {load,store}_partial rely on this being %ecx */
385
386	/* load the state: */
387	movdqu 0x00(STATEP), STATE0
388	movdqu 0x10(STATEP), STATE1
389	movdqu 0x20(STATEP), STATE2
390	movdqu 0x30(STATEP), STATE3
391	movdqu 0x40(STATEP), STATE4
392
393	/* encrypt message: */
394	mov LEN, %r9d
395	load_partial
396
397	movdqa MSG, T0
398	pxor STATE1, T0
399	pxor STATE4, T0
400	movdqa STATE2, T1
401	pand STATE3, T1
402	pxor T1, T0
403
404	mov %r9d, LEN
405	store_partial T0
406
407	aegis128_update
408	pxor MSG, STATE4
409
410	/* store the state: */
411	movdqu STATE4, 0x00(STATEP)
412	movdqu STATE0, 0x10(STATEP)
413	movdqu STATE1, 0x20(STATEP)
414	movdqu STATE2, 0x30(STATEP)
415	movdqu STATE3, 0x40(STATEP)
 
 
416	RET
417SYM_FUNC_END(aegis128_aesni_enc_tail)
418
419.macro decrypt_block s0 s1 s2 s3 s4 i
420	movdqu (\i * 0x10)(SRC), MSG
421	pxor \s1, MSG
422	pxor \s4, MSG
423	movdqa \s2, T1
424	pand \s3, T1
425	pxor T1, MSG
426	movdqu MSG, (\i * 0x10)(DST)
427
428	aegis128_update
429	pxor MSG, \s4
430
431	sub $0x10, LEN
432	jz .Ldec_out_\i
 
433.endm
434
435/*
436 * void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, u8 *dst,
437 *			   unsigned int len);
438 *
439 * len must be nonzero and a multiple of 16.
440 */
441SYM_FUNC_START(aegis128_aesni_dec)
442	.set STATEP, %rdi
443	.set SRC, %rsi
444	.set DST, %rdx
445	.set LEN, %ecx
446
447	/* load the state: */
448	movdqu 0x00(STATEP), STATE0
449	movdqu 0x10(STATEP), STATE1
450	movdqu 0x20(STATEP), STATE2
451	movdqu 0x30(STATEP), STATE3
452	movdqu 0x40(STATEP), STATE4
453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454.align 8
455.Ldec_loop:
456	decrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
457	decrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
458	decrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
459	decrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
460	decrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
461
462	add $0x50, SRC
463	add $0x50, DST
464	jmp .Ldec_loop
465
466	/* store the state: */
467.Ldec_out_0:
468	movdqu STATE4, 0x00(STATEP)
469	movdqu STATE0, 0x10(STATEP)
470	movdqu STATE1, 0x20(STATEP)
471	movdqu STATE2, 0x30(STATEP)
472	movdqu STATE3, 0x40(STATEP)
 
473	RET
474
475.Ldec_out_1:
476	movdqu STATE3, 0x00(STATEP)
477	movdqu STATE4, 0x10(STATEP)
478	movdqu STATE0, 0x20(STATEP)
479	movdqu STATE1, 0x30(STATEP)
480	movdqu STATE2, 0x40(STATEP)
 
481	RET
482
483.Ldec_out_2:
484	movdqu STATE2, 0x00(STATEP)
485	movdqu STATE3, 0x10(STATEP)
486	movdqu STATE4, 0x20(STATEP)
487	movdqu STATE0, 0x30(STATEP)
488	movdqu STATE1, 0x40(STATEP)
 
489	RET
490
491.Ldec_out_3:
492	movdqu STATE1, 0x00(STATEP)
493	movdqu STATE2, 0x10(STATEP)
494	movdqu STATE3, 0x20(STATEP)
495	movdqu STATE4, 0x30(STATEP)
496	movdqu STATE0, 0x40(STATEP)
 
497	RET
498
499.Ldec_out_4:
500	movdqu STATE0, 0x00(STATEP)
501	movdqu STATE1, 0x10(STATEP)
502	movdqu STATE2, 0x20(STATEP)
503	movdqu STATE3, 0x30(STATEP)
504	movdqu STATE4, 0x40(STATEP)
 
 
 
505.Ldec_out:
 
506	RET
507SYM_FUNC_END(aegis128_aesni_dec)
508
509/*
510 * void aegis128_aesni_dec_tail(struct aegis_state *state, const u8 *src,
511 *				u8 *dst, unsigned int len);
512 */
513SYM_FUNC_START(aegis128_aesni_dec_tail)
514	.set STATEP, %rdi
515	.set SRC, %rsi
516	.set DST, %rdx
517	.set LEN, %ecx	/* {load,store}_partial rely on this being %ecx */
518
519	/* load the state: */
520	movdqu 0x00(STATEP), STATE0
521	movdqu 0x10(STATEP), STATE1
522	movdqu 0x20(STATEP), STATE2
523	movdqu 0x30(STATEP), STATE3
524	movdqu 0x40(STATEP), STATE4
525
526	/* decrypt message: */
527	mov LEN, %r9d
528	load_partial
529
530	pxor STATE1, MSG
531	pxor STATE4, MSG
532	movdqa STATE2, T1
533	pand STATE3, T1
534	pxor T1, MSG
535
536	mov %r9d, LEN
537	store_partial MSG
538
539	/* mask with byte count: */
540	lea .Lzeropad_mask+16(%rip), %rax
541	sub %r9, %rax
542	movdqu (%rax), T0
 
 
 
 
543	pand T0, MSG
544
545	aegis128_update
546	pxor MSG, STATE4
547
548	/* store the state: */
549	movdqu STATE4, 0x00(STATEP)
550	movdqu STATE0, 0x10(STATEP)
551	movdqu STATE1, 0x20(STATEP)
552	movdqu STATE2, 0x30(STATEP)
553	movdqu STATE3, 0x40(STATEP)
 
 
554	RET
555SYM_FUNC_END(aegis128_aesni_dec_tail)
556
557/*
558 * void aegis128_aesni_final(struct aegis_state *state,
559 *			     struct aegis_block *tag_xor,
560 *			     unsigned int assoclen, unsigned int cryptlen);
561 */
562SYM_FUNC_START(aegis128_aesni_final)
563	.set STATEP, %rdi
564	.set TAG_XOR, %rsi
565	.set ASSOCLEN, %edx
566	.set CRYPTLEN, %ecx
567
568	/* load the state: */
569	movdqu 0x00(STATEP), STATE0
570	movdqu 0x10(STATEP), STATE1
571	movdqu 0x20(STATEP), STATE2
572	movdqu 0x30(STATEP), STATE3
573	movdqu 0x40(STATEP), STATE4
574
575	/* prepare length block: */
576	movd ASSOCLEN, MSG
577	pinsrd $2, CRYPTLEN, MSG
 
 
578	psllq $3, MSG /* multiply by 8 (to get bit count) */
579
580	pxor STATE3, MSG
581
582	/* update state: */
583	aegis128_update; pxor MSG, STATE4
584	aegis128_update; pxor MSG, STATE3
585	aegis128_update; pxor MSG, STATE2
586	aegis128_update; pxor MSG, STATE1
587	aegis128_update; pxor MSG, STATE0
588	aegis128_update; pxor MSG, STATE4
589	aegis128_update; pxor MSG, STATE3
590
591	/* xor tag: */
592	movdqu (TAG_XOR), MSG
593
594	pxor STATE0, MSG
595	pxor STATE1, MSG
596	pxor STATE2, MSG
597	pxor STATE3, MSG
598	pxor STATE4, MSG
599
600	movdqu MSG, (TAG_XOR)
 
 
601	RET
602SYM_FUNC_END(aegis128_aesni_final)
v6.2
  1/* SPDX-License-Identifier: GPL-2.0-only */
  2/*
  3 * AES-NI + SSE2 implementation of AEGIS-128
  4 *
  5 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
  6 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
 
  7 */
  8
  9#include <linux/linkage.h>
 10#include <linux/cfi_types.h>
 11#include <asm/frame.h>
 12
 13#define STATE0	%xmm0
 14#define STATE1	%xmm1
 15#define STATE2	%xmm2
 16#define STATE3	%xmm3
 17#define STATE4	%xmm4
 18#define KEY	%xmm5
 19#define MSG	%xmm5
 20#define T0	%xmm6
 21#define T1	%xmm7
 22
 23#define STATEP	%rdi
 24#define LEN	%rsi
 25#define SRC	%rdx
 26#define DST	%rcx
 27
 28.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
 29.align 16
 30.Laegis128_const_0:
 31	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
 32	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
 33.Laegis128_const_1:
 34	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
 35	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
 36
 37.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
 38.align 16
 39.Laegis128_counter:
 40	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
 41	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
 42
 43.text
 44
 45/*
 46 * aegis128_update
 47 * input:
 48 *   STATE[0-4] - input state
 49 * output:
 50 *   STATE[0-4] - output state (shifted positions)
 51 * changed:
 52 *   T0
 53 */
 54.macro aegis128_update
 55	movdqa STATE4, T0
 56	aesenc STATE0, STATE4
 57	aesenc STATE1, STATE0
 58	aesenc STATE2, STATE1
 59	aesenc STATE3, STATE2
 60	aesenc T0,     STATE3
 61.endm
 62
 63/*
 64 * __load_partial: internal ABI
 65 * input:
 66 *   LEN - bytes
 67 *   SRC - src
 68 * output:
 69 *   MSG  - message block
 70 * changed:
 71 *   T0
 72 *   %r8
 73 *   %r9
 74 */
 75SYM_FUNC_START_LOCAL(__load_partial)
 76	xor %r9d, %r9d
 77	pxor MSG, MSG
 78
 79	mov LEN, %r8
 80	and $0x1, %r8
 81	jz .Lld_partial_1
 82
 83	mov LEN, %r8
 84	and $0x1E, %r8
 85	add SRC, %r8
 86	mov (%r8), %r9b
 87
 88.Lld_partial_1:
 89	mov LEN, %r8
 90	and $0x2, %r8
 91	jz .Lld_partial_2
 92
 93	mov LEN, %r8
 94	and $0x1C, %r8
 95	add SRC, %r8
 96	shl $0x10, %r9
 97	mov (%r8), %r9w
 98
 99.Lld_partial_2:
100	mov LEN, %r8
101	and $0x4, %r8
102	jz .Lld_partial_4
103
104	mov LEN, %r8
105	and $0x18, %r8
106	add SRC, %r8
107	shl $32, %r9
108	mov (%r8), %r8d
109	xor %r8, %r9
110
111.Lld_partial_4:
112	movq %r9, MSG
113
114	mov LEN, %r8
115	and $0x8, %r8
116	jz .Lld_partial_8
117
118	mov LEN, %r8
119	and $0x10, %r8
120	add SRC, %r8
121	pslldq $8, MSG
122	movq (%r8), T0
123	pxor T0, MSG
124
125.Lld_partial_8:
126	RET
127SYM_FUNC_END(__load_partial)
128
129/*
130 * __store_partial: internal ABI
131 * input:
132 *   LEN - bytes
133 *   DST - dst
134 * output:
135 *   T0   - message block
136 * changed:
137 *   %r8
138 *   %r9
139 *   %r10
140 */
141SYM_FUNC_START_LOCAL(__store_partial)
142	mov LEN, %r8
143	mov DST, %r9
144
145	movq T0, %r10
146
147	cmp $8, %r8
148	jl .Lst_partial_8
149
150	mov %r10, (%r9)
151	psrldq $8, T0
152	movq T0, %r10
153
154	sub $8, %r8
155	add $8, %r9
156
157.Lst_partial_8:
158	cmp $4, %r8
159	jl .Lst_partial_4
160
161	mov %r10d, (%r9)
162	shr $32, %r10
163
164	sub $4, %r8
165	add $4, %r9
166
167.Lst_partial_4:
168	cmp $2, %r8
169	jl .Lst_partial_2
170
171	mov %r10w, (%r9)
172	shr $0x10, %r10
173
174	sub $2, %r8
175	add $2, %r9
176
177.Lst_partial_2:
178	cmp $1, %r8
179	jl .Lst_partial_1
180
181	mov %r10b, (%r9)
182
183.Lst_partial_1:
184	RET
185SYM_FUNC_END(__store_partial)
186
187/*
188 * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
189 */
190SYM_FUNC_START(crypto_aegis128_aesni_init)
191	FRAME_BEGIN
 
 
 
 
192
193	/* load IV: */
194	movdqu (%rdx), T1
195
196	/* load key: */
197	movdqa (%rsi), KEY
198	pxor KEY, T1
199	movdqa T1, STATE0
200	movdqa KEY, STATE3
201	movdqa KEY, STATE4
202
203	/* load the constants: */
204	movdqa .Laegis128_const_0, STATE2
205	movdqa .Laegis128_const_1, STATE1
206	pxor STATE2, STATE3
207	pxor STATE1, STATE4
208
209	/* update 10 times with KEY / KEY xor IV: */
210	aegis128_update; pxor KEY, STATE4
211	aegis128_update; pxor T1,  STATE3
212	aegis128_update; pxor KEY, STATE2
213	aegis128_update; pxor T1,  STATE1
214	aegis128_update; pxor KEY, STATE0
215	aegis128_update; pxor T1,  STATE4
216	aegis128_update; pxor KEY, STATE3
217	aegis128_update; pxor T1,  STATE2
218	aegis128_update; pxor KEY, STATE1
219	aegis128_update; pxor T1,  STATE0
220
221	/* store the state: */
222	movdqu STATE0, 0x00(STATEP)
223	movdqu STATE1, 0x10(STATEP)
224	movdqu STATE2, 0x20(STATEP)
225	movdqu STATE3, 0x30(STATEP)
226	movdqu STATE4, 0x40(STATEP)
227
228	FRAME_END
229	RET
230SYM_FUNC_END(crypto_aegis128_aesni_init)
231
232/*
233 * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
234 *                               const void *data);
 
 
235 */
236SYM_FUNC_START(crypto_aegis128_aesni_ad)
237	FRAME_BEGIN
 
 
238
239	cmp $0x10, LEN
240	jb .Lad_out
241
242	/* load the state: */
243	movdqu 0x00(STATEP), STATE0
244	movdqu 0x10(STATEP), STATE1
245	movdqu 0x20(STATEP), STATE2
246	movdqu 0x30(STATEP), STATE3
247	movdqu 0x40(STATEP), STATE4
248
249	mov SRC, %r8
250	and $0xF, %r8
251	jnz .Lad_u_loop
252
253.align 8
254.Lad_a_loop:
255	movdqa 0x00(SRC), MSG
256	aegis128_update
257	pxor MSG, STATE4
258	sub $0x10, LEN
259	cmp $0x10, LEN
260	jl .Lad_out_1
261
262	movdqa 0x10(SRC), MSG
263	aegis128_update
264	pxor MSG, STATE3
265	sub $0x10, LEN
266	cmp $0x10, LEN
267	jl .Lad_out_2
268
269	movdqa 0x20(SRC), MSG
270	aegis128_update
271	pxor MSG, STATE2
272	sub $0x10, LEN
273	cmp $0x10, LEN
274	jl .Lad_out_3
275
276	movdqa 0x30(SRC), MSG
277	aegis128_update
278	pxor MSG, STATE1
279	sub $0x10, LEN
280	cmp $0x10, LEN
281	jl .Lad_out_4
282
283	movdqa 0x40(SRC), MSG
284	aegis128_update
285	pxor MSG, STATE0
286	sub $0x10, LEN
287	cmp $0x10, LEN
288	jl .Lad_out_0
289
290	add $0x50, SRC
291	jmp .Lad_a_loop
292
293.align 8
294.Lad_u_loop:
295	movdqu 0x00(SRC), MSG
296	aegis128_update
297	pxor MSG, STATE4
298	sub $0x10, LEN
299	cmp $0x10, LEN
300	jl .Lad_out_1
301
302	movdqu 0x10(SRC), MSG
303	aegis128_update
304	pxor MSG, STATE3
305	sub $0x10, LEN
306	cmp $0x10, LEN
307	jl .Lad_out_2
308
309	movdqu 0x20(SRC), MSG
310	aegis128_update
311	pxor MSG, STATE2
312	sub $0x10, LEN
313	cmp $0x10, LEN
314	jl .Lad_out_3
315
316	movdqu 0x30(SRC), MSG
317	aegis128_update
318	pxor MSG, STATE1
319	sub $0x10, LEN
320	cmp $0x10, LEN
321	jl .Lad_out_4
322
323	movdqu 0x40(SRC), MSG
324	aegis128_update
325	pxor MSG, STATE0
326	sub $0x10, LEN
327	cmp $0x10, LEN
328	jl .Lad_out_0
329
330	add $0x50, SRC
331	jmp .Lad_u_loop
332
333	/* store the state: */
334.Lad_out_0:
335	movdqu STATE0, 0x00(STATEP)
336	movdqu STATE1, 0x10(STATEP)
337	movdqu STATE2, 0x20(STATEP)
338	movdqu STATE3, 0x30(STATEP)
339	movdqu STATE4, 0x40(STATEP)
340	FRAME_END
341	RET
342
343.Lad_out_1:
344	movdqu STATE4, 0x00(STATEP)
345	movdqu STATE0, 0x10(STATEP)
346	movdqu STATE1, 0x20(STATEP)
347	movdqu STATE2, 0x30(STATEP)
348	movdqu STATE3, 0x40(STATEP)
349	FRAME_END
350	RET
351
352.Lad_out_2:
353	movdqu STATE3, 0x00(STATEP)
354	movdqu STATE4, 0x10(STATEP)
355	movdqu STATE0, 0x20(STATEP)
356	movdqu STATE1, 0x30(STATEP)
357	movdqu STATE2, 0x40(STATEP)
358	FRAME_END
359	RET
360
361.Lad_out_3:
362	movdqu STATE2, 0x00(STATEP)
363	movdqu STATE3, 0x10(STATEP)
364	movdqu STATE4, 0x20(STATEP)
365	movdqu STATE0, 0x30(STATEP)
366	movdqu STATE1, 0x40(STATEP)
367	FRAME_END
368	RET
369
370.Lad_out_4:
371	movdqu STATE1, 0x00(STATEP)
372	movdqu STATE2, 0x10(STATEP)
373	movdqu STATE3, 0x20(STATEP)
374	movdqu STATE4, 0x30(STATEP)
375	movdqu STATE0, 0x40(STATEP)
376	FRAME_END
377	RET
378
379.Lad_out:
380	FRAME_END
381	RET
382SYM_FUNC_END(crypto_aegis128_aesni_ad)
383
384.macro encrypt_block a s0 s1 s2 s3 s4 i
385	movdq\a (\i * 0x10)(SRC), MSG
386	movdqa MSG, T0
387	pxor \s1, T0
388	pxor \s4, T0
389	movdqa \s2, T1
390	pand \s3, T1
391	pxor T1, T0
392	movdq\a T0, (\i * 0x10)(DST)
393
394	aegis128_update
395	pxor MSG, \s4
396
397	sub $0x10, LEN
398	cmp $0x10, LEN
399	jl .Lenc_out_\i
400.endm
401
402/*
403 * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
404 *                                const void *src, void *dst);
 
 
405 */
406SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc)
407	FRAME_BEGIN
408
409	cmp $0x10, LEN
410	jb .Lenc_out
411
412	/* load the state: */
413	movdqu 0x00(STATEP), STATE0
414	movdqu 0x10(STATEP), STATE1
415	movdqu 0x20(STATEP), STATE2
416	movdqu 0x30(STATEP), STATE3
417	movdqu 0x40(STATEP), STATE4
418
419	mov  SRC,  %r8
420	or   DST,  %r8
421	and $0xF, %r8
422	jnz .Lenc_u_loop
423
424.align 8
425.Lenc_a_loop:
426	encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
427	encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
428	encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
429	encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
430	encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
431
432	add $0x50, SRC
433	add $0x50, DST
434	jmp .Lenc_a_loop
435
436.align 8
437.Lenc_u_loop:
438	encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
439	encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
440	encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
441	encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
442	encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
443
444	add $0x50, SRC
445	add $0x50, DST
446	jmp .Lenc_u_loop
447
448	/* store the state: */
449.Lenc_out_0:
450	movdqu STATE4, 0x00(STATEP)
451	movdqu STATE0, 0x10(STATEP)
452	movdqu STATE1, 0x20(STATEP)
453	movdqu STATE2, 0x30(STATEP)
454	movdqu STATE3, 0x40(STATEP)
455	FRAME_END
456	RET
457
458.Lenc_out_1:
459	movdqu STATE3, 0x00(STATEP)
460	movdqu STATE4, 0x10(STATEP)
461	movdqu STATE0, 0x20(STATEP)
462	movdqu STATE1, 0x30(STATEP)
463	movdqu STATE2, 0x40(STATEP)
464	FRAME_END
465	RET
466
467.Lenc_out_2:
468	movdqu STATE2, 0x00(STATEP)
469	movdqu STATE3, 0x10(STATEP)
470	movdqu STATE4, 0x20(STATEP)
471	movdqu STATE0, 0x30(STATEP)
472	movdqu STATE1, 0x40(STATEP)
473	FRAME_END
474	RET
475
476.Lenc_out_3:
477	movdqu STATE1, 0x00(STATEP)
478	movdqu STATE2, 0x10(STATEP)
479	movdqu STATE3, 0x20(STATEP)
480	movdqu STATE4, 0x30(STATEP)
481	movdqu STATE0, 0x40(STATEP)
482	FRAME_END
483	RET
484
485.Lenc_out_4:
486	movdqu STATE0, 0x00(STATEP)
487	movdqu STATE1, 0x10(STATEP)
488	movdqu STATE2, 0x20(STATEP)
489	movdqu STATE3, 0x30(STATEP)
490	movdqu STATE4, 0x40(STATEP)
491	FRAME_END
492	RET
493
494.Lenc_out:
495	FRAME_END
496	RET
497SYM_FUNC_END(crypto_aegis128_aesni_enc)
498
499/*
500 * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
501 *                                     const void *src, void *dst);
502 */
503SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail)
504	FRAME_BEGIN
 
 
 
505
506	/* load the state: */
507	movdqu 0x00(STATEP), STATE0
508	movdqu 0x10(STATEP), STATE1
509	movdqu 0x20(STATEP), STATE2
510	movdqu 0x30(STATEP), STATE3
511	movdqu 0x40(STATEP), STATE4
512
513	/* encrypt message: */
514	call __load_partial
 
515
516	movdqa MSG, T0
517	pxor STATE1, T0
518	pxor STATE4, T0
519	movdqa STATE2, T1
520	pand STATE3, T1
521	pxor T1, T0
522
523	call __store_partial
 
524
525	aegis128_update
526	pxor MSG, STATE4
527
528	/* store the state: */
529	movdqu STATE4, 0x00(STATEP)
530	movdqu STATE0, 0x10(STATEP)
531	movdqu STATE1, 0x20(STATEP)
532	movdqu STATE2, 0x30(STATEP)
533	movdqu STATE3, 0x40(STATEP)
534
535	FRAME_END
536	RET
537SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
538
539.macro decrypt_block a s0 s1 s2 s3 s4 i
540	movdq\a (\i * 0x10)(SRC), MSG
541	pxor \s1, MSG
542	pxor \s4, MSG
543	movdqa \s2, T1
544	pand \s3, T1
545	pxor T1, MSG
546	movdq\a MSG, (\i * 0x10)(DST)
547
548	aegis128_update
549	pxor MSG, \s4
550
551	sub $0x10, LEN
552	cmp $0x10, LEN
553	jl .Ldec_out_\i
554.endm
555
556/*
557 * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
558 *                                const void *src, void *dst);
 
 
559 */
560SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec)
561	FRAME_BEGIN
562
563	cmp $0x10, LEN
564	jb .Ldec_out
565
566	/* load the state: */
567	movdqu 0x00(STATEP), STATE0
568	movdqu 0x10(STATEP), STATE1
569	movdqu 0x20(STATEP), STATE2
570	movdqu 0x30(STATEP), STATE3
571	movdqu 0x40(STATEP), STATE4
572
573	mov  SRC, %r8
574	or   DST, %r8
575	and $0xF, %r8
576	jnz .Ldec_u_loop
577
578.align 8
579.Ldec_a_loop:
580	decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
581	decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
582	decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
583	decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
584	decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
585
586	add $0x50, SRC
587	add $0x50, DST
588	jmp .Ldec_a_loop
589
590.align 8
591.Ldec_u_loop:
592	decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
593	decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
594	decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
595	decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
596	decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
597
598	add $0x50, SRC
599	add $0x50, DST
600	jmp .Ldec_u_loop
601
602	/* store the state: */
603.Ldec_out_0:
604	movdqu STATE4, 0x00(STATEP)
605	movdqu STATE0, 0x10(STATEP)
606	movdqu STATE1, 0x20(STATEP)
607	movdqu STATE2, 0x30(STATEP)
608	movdqu STATE3, 0x40(STATEP)
609	FRAME_END
610	RET
611
612.Ldec_out_1:
613	movdqu STATE3, 0x00(STATEP)
614	movdqu STATE4, 0x10(STATEP)
615	movdqu STATE0, 0x20(STATEP)
616	movdqu STATE1, 0x30(STATEP)
617	movdqu STATE2, 0x40(STATEP)
618	FRAME_END
619	RET
620
621.Ldec_out_2:
622	movdqu STATE2, 0x00(STATEP)
623	movdqu STATE3, 0x10(STATEP)
624	movdqu STATE4, 0x20(STATEP)
625	movdqu STATE0, 0x30(STATEP)
626	movdqu STATE1, 0x40(STATEP)
627	FRAME_END
628	RET
629
630.Ldec_out_3:
631	movdqu STATE1, 0x00(STATEP)
632	movdqu STATE2, 0x10(STATEP)
633	movdqu STATE3, 0x20(STATEP)
634	movdqu STATE4, 0x30(STATEP)
635	movdqu STATE0, 0x40(STATEP)
636	FRAME_END
637	RET
638
639.Ldec_out_4:
640	movdqu STATE0, 0x00(STATEP)
641	movdqu STATE1, 0x10(STATEP)
642	movdqu STATE2, 0x20(STATEP)
643	movdqu STATE3, 0x30(STATEP)
644	movdqu STATE4, 0x40(STATEP)
645	FRAME_END
646	RET
647
648.Ldec_out:
649	FRAME_END
650	RET
651SYM_FUNC_END(crypto_aegis128_aesni_dec)
652
653/*
654 * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
655 *                                     const void *src, void *dst);
656 */
657SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail)
658	FRAME_BEGIN
 
 
 
659
660	/* load the state: */
661	movdqu 0x00(STATEP), STATE0
662	movdqu 0x10(STATEP), STATE1
663	movdqu 0x20(STATEP), STATE2
664	movdqu 0x30(STATEP), STATE3
665	movdqu 0x40(STATEP), STATE4
666
667	/* decrypt message: */
668	call __load_partial
 
669
670	pxor STATE1, MSG
671	pxor STATE4, MSG
672	movdqa STATE2, T1
673	pand STATE3, T1
674	pxor T1, MSG
675
676	movdqa MSG, T0
677	call __store_partial
678
679	/* mask with byte count: */
680	movq LEN, T0
681	punpcklbw T0, T0
682	punpcklbw T0, T0
683	punpcklbw T0, T0
684	punpcklbw T0, T0
685	movdqa .Laegis128_counter, T1
686	pcmpgtb T1, T0
687	pand T0, MSG
688
689	aegis128_update
690	pxor MSG, STATE4
691
692	/* store the state: */
693	movdqu STATE4, 0x00(STATEP)
694	movdqu STATE0, 0x10(STATEP)
695	movdqu STATE1, 0x20(STATEP)
696	movdqu STATE2, 0x30(STATEP)
697	movdqu STATE3, 0x40(STATEP)
698
699	FRAME_END
700	RET
701SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
702
703/*
704 * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
705 *                                  u64 assoclen, u64 cryptlen);
706 */
707SYM_FUNC_START(crypto_aegis128_aesni_final)
708	FRAME_BEGIN
 
 
 
 
709
710	/* load the state: */
711	movdqu 0x00(STATEP), STATE0
712	movdqu 0x10(STATEP), STATE1
713	movdqu 0x20(STATEP), STATE2
714	movdqu 0x30(STATEP), STATE3
715	movdqu 0x40(STATEP), STATE4
716
717	/* prepare length block: */
718	movq %rdx, MSG
719	movq %rcx, T0
720	pslldq $8, T0
721	pxor T0, MSG
722	psllq $3, MSG /* multiply by 8 (to get bit count) */
723
724	pxor STATE3, MSG
725
726	/* update state: */
727	aegis128_update; pxor MSG, STATE4
728	aegis128_update; pxor MSG, STATE3
729	aegis128_update; pxor MSG, STATE2
730	aegis128_update; pxor MSG, STATE1
731	aegis128_update; pxor MSG, STATE0
732	aegis128_update; pxor MSG, STATE4
733	aegis128_update; pxor MSG, STATE3
734
735	/* xor tag: */
736	movdqu (%rsi), MSG
737
738	pxor STATE0, MSG
739	pxor STATE1, MSG
740	pxor STATE2, MSG
741	pxor STATE3, MSG
742	pxor STATE4, MSG
743
744	movdqu MSG, (%rsi)
745
746	FRAME_END
747	RET
748SYM_FUNC_END(crypto_aegis128_aesni_final)