Loading...
Note: File does not exist in v3.1.
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * AES-NI + SSE4.1 implementation of AEGIS-128
4 *
5 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
6 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
7 * Copyright 2024 Google LLC
8 */
9
10#include <linux/linkage.h>
11
12#define STATE0 %xmm0
13#define STATE1 %xmm1
14#define STATE2 %xmm2
15#define STATE3 %xmm3
16#define STATE4 %xmm4
17#define KEY %xmm5
18#define MSG %xmm5
19#define T0 %xmm6
20#define T1 %xmm7
21
22.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
23.align 16
24.Laegis128_const_0:
25 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
26 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
27.Laegis128_const_1:
28 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
29 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
30
31.section .rodata.cst32.zeropad_mask, "aM", @progbits, 32
32.align 32
33.Lzeropad_mask:
34 .octa 0xffffffffffffffffffffffffffffffff
35 .octa 0
36
37.text
38
39/*
40 * aegis128_update
41 * input:
42 * STATE[0-4] - input state
43 * output:
44 * STATE[0-4] - output state (shifted positions)
45 * changed:
46 * T0
47 */
48.macro aegis128_update
49 movdqa STATE4, T0
50 aesenc STATE0, STATE4
51 aesenc STATE1, STATE0
52 aesenc STATE2, STATE1
53 aesenc STATE3, STATE2
54 aesenc T0, STATE3
55.endm
56
57/*
58 * Load 1 <= LEN (%ecx) <= 15 bytes from the pointer SRC into the xmm register
59 * MSG and zeroize any remaining bytes. Clobbers %rax, %rcx, and %r8.
60 */
61.macro load_partial
62 sub $8, %ecx /* LEN - 8 */
63 jle .Lle8\@
64
65 /* Load 9 <= LEN <= 15 bytes: */
66 movq (SRC), MSG /* Load first 8 bytes */
67 mov (SRC, %rcx), %rax /* Load last 8 bytes */
68 neg %ecx
69 shl $3, %ecx
70 shr %cl, %rax /* Discard overlapping bytes */
71 pinsrq $1, %rax, MSG
72 jmp .Ldone\@
73
74.Lle8\@:
75 add $4, %ecx /* LEN - 4 */
76 jl .Llt4\@
77
78 /* Load 4 <= LEN <= 8 bytes: */
79 mov (SRC), %eax /* Load first 4 bytes */
80 mov (SRC, %rcx), %r8d /* Load last 4 bytes */
81 jmp .Lcombine\@
82
83.Llt4\@:
84 /* Load 1 <= LEN <= 3 bytes: */
85 add $2, %ecx /* LEN - 2 */
86 movzbl (SRC), %eax /* Load first byte */
87 jl .Lmovq\@
88 movzwl (SRC, %rcx), %r8d /* Load last 2 bytes */
89.Lcombine\@:
90 shl $3, %ecx
91 shl %cl, %r8
92 or %r8, %rax /* Combine the two parts */
93.Lmovq\@:
94 movq %rax, MSG
95.Ldone\@:
96.endm
97
98/*
99 * Store 1 <= LEN (%ecx) <= 15 bytes from the xmm register \msg to the pointer
100 * DST. Clobbers %rax, %rcx, and %r8.
101 */
102.macro store_partial msg
103 sub $8, %ecx /* LEN - 8 */
104 jl .Llt8\@
105
106 /* Store 8 <= LEN <= 15 bytes: */
107 pextrq $1, \msg, %rax
108 mov %ecx, %r8d
109 shl $3, %ecx
110 ror %cl, %rax
111 mov %rax, (DST, %r8) /* Store last LEN - 8 bytes */
112 movq \msg, (DST) /* Store first 8 bytes */
113 jmp .Ldone\@
114
115.Llt8\@:
116 add $4, %ecx /* LEN - 4 */
117 jl .Llt4\@
118
119 /* Store 4 <= LEN <= 7 bytes: */
120 pextrd $1, \msg, %eax
121 mov %ecx, %r8d
122 shl $3, %ecx
123 ror %cl, %eax
124 mov %eax, (DST, %r8) /* Store last LEN - 4 bytes */
125 movd \msg, (DST) /* Store first 4 bytes */
126 jmp .Ldone\@
127
128.Llt4\@:
129 /* Store 1 <= LEN <= 3 bytes: */
130 pextrb $0, \msg, 0(DST)
131 cmp $-2, %ecx /* LEN - 4 == -2, i.e. LEN == 2? */
132 jl .Ldone\@
133 pextrb $1, \msg, 1(DST)
134 je .Ldone\@
135 pextrb $2, \msg, 2(DST)
136.Ldone\@:
137.endm
138
139/*
140 * void aegis128_aesni_init(struct aegis_state *state,
141 * const struct aegis_block *key,
142 * const u8 iv[AEGIS128_NONCE_SIZE]);
143 */
144SYM_FUNC_START(aegis128_aesni_init)
145 .set STATEP, %rdi
146 .set KEYP, %rsi
147 .set IVP, %rdx
148
149 /* load IV: */
150 movdqu (IVP), T1
151
152 /* load key: */
153 movdqa (KEYP), KEY
154 pxor KEY, T1
155 movdqa T1, STATE0
156 movdqa KEY, STATE3
157 movdqa KEY, STATE4
158
159 /* load the constants: */
160 movdqa .Laegis128_const_0(%rip), STATE2
161 movdqa .Laegis128_const_1(%rip), STATE1
162 pxor STATE2, STATE3
163 pxor STATE1, STATE4
164
165 /* update 10 times with KEY / KEY xor IV: */
166 aegis128_update; pxor KEY, STATE4
167 aegis128_update; pxor T1, STATE3
168 aegis128_update; pxor KEY, STATE2
169 aegis128_update; pxor T1, STATE1
170 aegis128_update; pxor KEY, STATE0
171 aegis128_update; pxor T1, STATE4
172 aegis128_update; pxor KEY, STATE3
173 aegis128_update; pxor T1, STATE2
174 aegis128_update; pxor KEY, STATE1
175 aegis128_update; pxor T1, STATE0
176
177 /* store the state: */
178 movdqu STATE0, 0x00(STATEP)
179 movdqu STATE1, 0x10(STATEP)
180 movdqu STATE2, 0x20(STATEP)
181 movdqu STATE3, 0x30(STATEP)
182 movdqu STATE4, 0x40(STATEP)
183 RET
184SYM_FUNC_END(aegis128_aesni_init)
185
186/*
187 * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data,
188 * unsigned int len);
189 *
190 * len must be a multiple of 16.
191 */
192SYM_FUNC_START(aegis128_aesni_ad)
193 .set STATEP, %rdi
194 .set SRC, %rsi
195 .set LEN, %edx
196
197 test LEN, LEN
198 jz .Lad_out
199
200 /* load the state: */
201 movdqu 0x00(STATEP), STATE0
202 movdqu 0x10(STATEP), STATE1
203 movdqu 0x20(STATEP), STATE2
204 movdqu 0x30(STATEP), STATE3
205 movdqu 0x40(STATEP), STATE4
206
207.align 8
208.Lad_loop:
209 movdqu 0x00(SRC), MSG
210 aegis128_update
211 pxor MSG, STATE4
212 sub $0x10, LEN
213 jz .Lad_out_1
214
215 movdqu 0x10(SRC), MSG
216 aegis128_update
217 pxor MSG, STATE3
218 sub $0x10, LEN
219 jz .Lad_out_2
220
221 movdqu 0x20(SRC), MSG
222 aegis128_update
223 pxor MSG, STATE2
224 sub $0x10, LEN
225 jz .Lad_out_3
226
227 movdqu 0x30(SRC), MSG
228 aegis128_update
229 pxor MSG, STATE1
230 sub $0x10, LEN
231 jz .Lad_out_4
232
233 movdqu 0x40(SRC), MSG
234 aegis128_update
235 pxor MSG, STATE0
236 sub $0x10, LEN
237 jz .Lad_out_0
238
239 add $0x50, SRC
240 jmp .Lad_loop
241
242 /* store the state: */
243.Lad_out_0:
244 movdqu STATE0, 0x00(STATEP)
245 movdqu STATE1, 0x10(STATEP)
246 movdqu STATE2, 0x20(STATEP)
247 movdqu STATE3, 0x30(STATEP)
248 movdqu STATE4, 0x40(STATEP)
249 RET
250
251.Lad_out_1:
252 movdqu STATE4, 0x00(STATEP)
253 movdqu STATE0, 0x10(STATEP)
254 movdqu STATE1, 0x20(STATEP)
255 movdqu STATE2, 0x30(STATEP)
256 movdqu STATE3, 0x40(STATEP)
257 RET
258
259.Lad_out_2:
260 movdqu STATE3, 0x00(STATEP)
261 movdqu STATE4, 0x10(STATEP)
262 movdqu STATE0, 0x20(STATEP)
263 movdqu STATE1, 0x30(STATEP)
264 movdqu STATE2, 0x40(STATEP)
265 RET
266
267.Lad_out_3:
268 movdqu STATE2, 0x00(STATEP)
269 movdqu STATE3, 0x10(STATEP)
270 movdqu STATE4, 0x20(STATEP)
271 movdqu STATE0, 0x30(STATEP)
272 movdqu STATE1, 0x40(STATEP)
273 RET
274
275.Lad_out_4:
276 movdqu STATE1, 0x00(STATEP)
277 movdqu STATE2, 0x10(STATEP)
278 movdqu STATE3, 0x20(STATEP)
279 movdqu STATE4, 0x30(STATEP)
280 movdqu STATE0, 0x40(STATEP)
281.Lad_out:
282 RET
283SYM_FUNC_END(aegis128_aesni_ad)
284
285.macro encrypt_block s0 s1 s2 s3 s4 i
286 movdqu (\i * 0x10)(SRC), MSG
287 movdqa MSG, T0
288 pxor \s1, T0
289 pxor \s4, T0
290 movdqa \s2, T1
291 pand \s3, T1
292 pxor T1, T0
293 movdqu T0, (\i * 0x10)(DST)
294
295 aegis128_update
296 pxor MSG, \s4
297
298 sub $0x10, LEN
299 jz .Lenc_out_\i
300.endm
301
302/*
303 * void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, u8 *dst,
304 * unsigned int len);
305 *
306 * len must be nonzero and a multiple of 16.
307 */
308SYM_FUNC_START(aegis128_aesni_enc)
309 .set STATEP, %rdi
310 .set SRC, %rsi
311 .set DST, %rdx
312 .set LEN, %ecx
313
314 /* load the state: */
315 movdqu 0x00(STATEP), STATE0
316 movdqu 0x10(STATEP), STATE1
317 movdqu 0x20(STATEP), STATE2
318 movdqu 0x30(STATEP), STATE3
319 movdqu 0x40(STATEP), STATE4
320
321.align 8
322.Lenc_loop:
323 encrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
324 encrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
325 encrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
326 encrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
327 encrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
328
329 add $0x50, SRC
330 add $0x50, DST
331 jmp .Lenc_loop
332
333 /* store the state: */
334.Lenc_out_0:
335 movdqu STATE4, 0x00(STATEP)
336 movdqu STATE0, 0x10(STATEP)
337 movdqu STATE1, 0x20(STATEP)
338 movdqu STATE2, 0x30(STATEP)
339 movdqu STATE3, 0x40(STATEP)
340 RET
341
342.Lenc_out_1:
343 movdqu STATE3, 0x00(STATEP)
344 movdqu STATE4, 0x10(STATEP)
345 movdqu STATE0, 0x20(STATEP)
346 movdqu STATE1, 0x30(STATEP)
347 movdqu STATE2, 0x40(STATEP)
348 RET
349
350.Lenc_out_2:
351 movdqu STATE2, 0x00(STATEP)
352 movdqu STATE3, 0x10(STATEP)
353 movdqu STATE4, 0x20(STATEP)
354 movdqu STATE0, 0x30(STATEP)
355 movdqu STATE1, 0x40(STATEP)
356 RET
357
358.Lenc_out_3:
359 movdqu STATE1, 0x00(STATEP)
360 movdqu STATE2, 0x10(STATEP)
361 movdqu STATE3, 0x20(STATEP)
362 movdqu STATE4, 0x30(STATEP)
363 movdqu STATE0, 0x40(STATEP)
364 RET
365
366.Lenc_out_4:
367 movdqu STATE0, 0x00(STATEP)
368 movdqu STATE1, 0x10(STATEP)
369 movdqu STATE2, 0x20(STATEP)
370 movdqu STATE3, 0x30(STATEP)
371 movdqu STATE4, 0x40(STATEP)
372.Lenc_out:
373 RET
374SYM_FUNC_END(aegis128_aesni_enc)
375
376/*
377 * void aegis128_aesni_enc_tail(struct aegis_state *state, const u8 *src,
378 * u8 *dst, unsigned int len);
379 */
380SYM_FUNC_START(aegis128_aesni_enc_tail)
381 .set STATEP, %rdi
382 .set SRC, %rsi
383 .set DST, %rdx
384 .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */
385
386 /* load the state: */
387 movdqu 0x00(STATEP), STATE0
388 movdqu 0x10(STATEP), STATE1
389 movdqu 0x20(STATEP), STATE2
390 movdqu 0x30(STATEP), STATE3
391 movdqu 0x40(STATEP), STATE4
392
393 /* encrypt message: */
394 mov LEN, %r9d
395 load_partial
396
397 movdqa MSG, T0
398 pxor STATE1, T0
399 pxor STATE4, T0
400 movdqa STATE2, T1
401 pand STATE3, T1
402 pxor T1, T0
403
404 mov %r9d, LEN
405 store_partial T0
406
407 aegis128_update
408 pxor MSG, STATE4
409
410 /* store the state: */
411 movdqu STATE4, 0x00(STATEP)
412 movdqu STATE0, 0x10(STATEP)
413 movdqu STATE1, 0x20(STATEP)
414 movdqu STATE2, 0x30(STATEP)
415 movdqu STATE3, 0x40(STATEP)
416 RET
417SYM_FUNC_END(aegis128_aesni_enc_tail)
418
419.macro decrypt_block s0 s1 s2 s3 s4 i
420 movdqu (\i * 0x10)(SRC), MSG
421 pxor \s1, MSG
422 pxor \s4, MSG
423 movdqa \s2, T1
424 pand \s3, T1
425 pxor T1, MSG
426 movdqu MSG, (\i * 0x10)(DST)
427
428 aegis128_update
429 pxor MSG, \s4
430
431 sub $0x10, LEN
432 jz .Ldec_out_\i
433.endm
434
435/*
436 * void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, u8 *dst,
437 * unsigned int len);
438 *
439 * len must be nonzero and a multiple of 16.
440 */
441SYM_FUNC_START(aegis128_aesni_dec)
442 .set STATEP, %rdi
443 .set SRC, %rsi
444 .set DST, %rdx
445 .set LEN, %ecx
446
447 /* load the state: */
448 movdqu 0x00(STATEP), STATE0
449 movdqu 0x10(STATEP), STATE1
450 movdqu 0x20(STATEP), STATE2
451 movdqu 0x30(STATEP), STATE3
452 movdqu 0x40(STATEP), STATE4
453
454.align 8
455.Ldec_loop:
456 decrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
457 decrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
458 decrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
459 decrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
460 decrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
461
462 add $0x50, SRC
463 add $0x50, DST
464 jmp .Ldec_loop
465
466 /* store the state: */
467.Ldec_out_0:
468 movdqu STATE4, 0x00(STATEP)
469 movdqu STATE0, 0x10(STATEP)
470 movdqu STATE1, 0x20(STATEP)
471 movdqu STATE2, 0x30(STATEP)
472 movdqu STATE3, 0x40(STATEP)
473 RET
474
475.Ldec_out_1:
476 movdqu STATE3, 0x00(STATEP)
477 movdqu STATE4, 0x10(STATEP)
478 movdqu STATE0, 0x20(STATEP)
479 movdqu STATE1, 0x30(STATEP)
480 movdqu STATE2, 0x40(STATEP)
481 RET
482
483.Ldec_out_2:
484 movdqu STATE2, 0x00(STATEP)
485 movdqu STATE3, 0x10(STATEP)
486 movdqu STATE4, 0x20(STATEP)
487 movdqu STATE0, 0x30(STATEP)
488 movdqu STATE1, 0x40(STATEP)
489 RET
490
491.Ldec_out_3:
492 movdqu STATE1, 0x00(STATEP)
493 movdqu STATE2, 0x10(STATEP)
494 movdqu STATE3, 0x20(STATEP)
495 movdqu STATE4, 0x30(STATEP)
496 movdqu STATE0, 0x40(STATEP)
497 RET
498
499.Ldec_out_4:
500 movdqu STATE0, 0x00(STATEP)
501 movdqu STATE1, 0x10(STATEP)
502 movdqu STATE2, 0x20(STATEP)
503 movdqu STATE3, 0x30(STATEP)
504 movdqu STATE4, 0x40(STATEP)
505.Ldec_out:
506 RET
507SYM_FUNC_END(aegis128_aesni_dec)
508
509/*
510 * void aegis128_aesni_dec_tail(struct aegis_state *state, const u8 *src,
511 * u8 *dst, unsigned int len);
512 */
513SYM_FUNC_START(aegis128_aesni_dec_tail)
514 .set STATEP, %rdi
515 .set SRC, %rsi
516 .set DST, %rdx
517 .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */
518
519 /* load the state: */
520 movdqu 0x00(STATEP), STATE0
521 movdqu 0x10(STATEP), STATE1
522 movdqu 0x20(STATEP), STATE2
523 movdqu 0x30(STATEP), STATE3
524 movdqu 0x40(STATEP), STATE4
525
526 /* decrypt message: */
527 mov LEN, %r9d
528 load_partial
529
530 pxor STATE1, MSG
531 pxor STATE4, MSG
532 movdqa STATE2, T1
533 pand STATE3, T1
534 pxor T1, MSG
535
536 mov %r9d, LEN
537 store_partial MSG
538
539 /* mask with byte count: */
540 lea .Lzeropad_mask+16(%rip), %rax
541 sub %r9, %rax
542 movdqu (%rax), T0
543 pand T0, MSG
544
545 aegis128_update
546 pxor MSG, STATE4
547
548 /* store the state: */
549 movdqu STATE4, 0x00(STATEP)
550 movdqu STATE0, 0x10(STATEP)
551 movdqu STATE1, 0x20(STATEP)
552 movdqu STATE2, 0x30(STATEP)
553 movdqu STATE3, 0x40(STATEP)
554 RET
555SYM_FUNC_END(aegis128_aesni_dec_tail)
556
557/*
558 * void aegis128_aesni_final(struct aegis_state *state,
559 * struct aegis_block *tag_xor,
560 * unsigned int assoclen, unsigned int cryptlen);
561 */
562SYM_FUNC_START(aegis128_aesni_final)
563 .set STATEP, %rdi
564 .set TAG_XOR, %rsi
565 .set ASSOCLEN, %edx
566 .set CRYPTLEN, %ecx
567
568 /* load the state: */
569 movdqu 0x00(STATEP), STATE0
570 movdqu 0x10(STATEP), STATE1
571 movdqu 0x20(STATEP), STATE2
572 movdqu 0x30(STATEP), STATE3
573 movdqu 0x40(STATEP), STATE4
574
575 /* prepare length block: */
576 movd ASSOCLEN, MSG
577 pinsrd $2, CRYPTLEN, MSG
578 psllq $3, MSG /* multiply by 8 (to get bit count) */
579
580 pxor STATE3, MSG
581
582 /* update state: */
583 aegis128_update; pxor MSG, STATE4
584 aegis128_update; pxor MSG, STATE3
585 aegis128_update; pxor MSG, STATE2
586 aegis128_update; pxor MSG, STATE1
587 aegis128_update; pxor MSG, STATE0
588 aegis128_update; pxor MSG, STATE4
589 aegis128_update; pxor MSG, STATE3
590
591 /* xor tag: */
592 movdqu (TAG_XOR), MSG
593
594 pxor STATE0, MSG
595 pxor STATE1, MSG
596 pxor STATE2, MSG
597 pxor STATE3, MSG
598 pxor STATE4, MSG
599
600 movdqu MSG, (TAG_XOR)
601 RET
602SYM_FUNC_END(aegis128_aesni_final)