Linux Audio

Check our new training course

Loading...
  1/* SPDX-License-Identifier: GPL-2.0-only */
  2/*
  3 * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
  4 *
  5 * Copyright (C) 2013 - 2017 Linaro Ltd.
  6 * Copyright (C) 2024 Google LLC
  7 *
  8 * Author: Ard Biesheuvel <ardb@kernel.org>
  9 */
 10
 11#include <linux/linkage.h>
 12#include <asm/assembler.h>
 13
 14	.text
 15	.arch	armv8-a+crypto
 16
 17	.macro	load_round_keys, rk, nr, tmp
 18	sub	w\tmp, \nr, #10
 19	add	\tmp, \rk, w\tmp, sxtw #4
 20	ld1	{v10.4s-v13.4s}, [\rk]
 21	ld1	{v14.4s-v17.4s}, [\tmp], #64
 22	ld1	{v18.4s-v21.4s}, [\tmp], #64
 23	ld1	{v3.4s-v5.4s}, [\tmp]
 24	.endm
 25
 26	.macro	dround, va, vb, vk
 27	aese	\va\().16b, \vk\().16b
 28	aesmc	\va\().16b, \va\().16b
 29	aese	\vb\().16b, \vk\().16b
 30	aesmc	\vb\().16b, \vb\().16b
 31	.endm
 32
 33	.macro	aes_encrypt, va, vb, nr
 34	tbz	\nr, #2, .L\@
 35	dround	\va, \vb, v10
 36	dround	\va, \vb, v11
 37	tbz	\nr, #1, .L\@
 38	dround	\va, \vb, v12
 39	dround	\va, \vb, v13
 40.L\@:	.irp	v, v14, v15, v16, v17, v18, v19, v20, v21, v3
 41	dround	\va, \vb, \v
 42	.endr
 43	aese	\va\().16b, v4.16b
 44	aese	\vb\().16b, v4.16b
 45	.endm
 46
 47	.macro	aes_ccm_do_crypt,enc
 48	load_round_keys	x3, w4, x10
 49
 50	ld1	{v0.16b}, [x5]			/* load mac */
 51	cbz	x2, ce_aes_ccm_final
 52	ldr	x8, [x6, #8]			/* load lower ctr */
 53CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
 540:	/* outer loop */
 55	ld1	{v1.8b}, [x6]			/* load upper ctr */
 56	prfm	pldl1strm, [x1]
 57	add	x8, x8, #1
 58	rev	x9, x8
 59	ins	v1.d[1], x9			/* no carry in lower ctr */
 60
 61	aes_encrypt	v0, v1, w4
 62
 63	subs	w2, w2, #16
 64	bmi	ce_aes_ccm_crypt_tail
 65	ld1	{v2.16b}, [x1], #16		/* load next input block */
 66	.if	\enc == 1
 67	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
 68	eor	v6.16b, v1.16b, v2.16b		/* xor with crypted ctr */
 69	.else
 70	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
 71	eor	v6.16b, v2.16b, v5.16b		/* final round enc */
 72	.endif
 73	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
 74	st1	{v6.16b}, [x0], #16		/* write output block */
 75	bne	0b
 76CPU_LE(	rev	x8, x8			)
 77	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
 78	cbnz	x7, ce_aes_ccm_final
 79	st1	{v0.16b}, [x5]			/* store mac */
 80	ret
 81	.endm
 82
 83SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
 84	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
 85	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
 86
 87	add	x1, x1, w2, sxtw		/* rewind the input pointer (w2 < 0) */
 88	add	x0, x0, w2, sxtw		/* rewind the output pointer */
 89
 90	adr_l	x8, .Lpermute			/* load permute vectors */
 91	add	x9, x8, w2, sxtw
 92	sub	x8, x8, w2, sxtw
 93	ld1	{v7.16b-v8.16b}, [x9]
 94	ld1	{v9.16b}, [x8]
 95
 96	ld1	{v2.16b}, [x1]			/* load a full block of input */
 97	tbl	v1.16b, {v1.16b}, v7.16b	/* move keystream to end of register */
 98	eor	v7.16b, v2.16b, v1.16b		/* encrypt partial input block */
 99	bif	v2.16b, v7.16b, v22.16b		/* select plaintext */
100	tbx	v7.16b, {v6.16b}, v8.16b	/* insert output from previous iteration */
101	tbl	v2.16b, {v2.16b}, v9.16b	/* copy plaintext to start of v2 */
102	eor	v0.16b, v0.16b, v2.16b		/* fold plaintext into mac */
103
104	st1	{v7.16b}, [x0]			/* store output block */
105	cbz	x7, 0f
106
107SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
108	ld1	{v1.16b}, [x7]			/* load 1st ctriv */
109
110	aes_encrypt	v0, v1, w4
111
112	/* final round key cancels out */
113	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
1140:	st1	{v0.16b}, [x5]			/* store result */
115	ret
116SYM_FUNC_END(ce_aes_ccm_crypt_tail)
117
118	/*
119	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
120	 * 			   u8 const rk[], u32 rounds, u8 mac[],
121	 * 			   u8 ctr[], u8 const final_iv[]);
122	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
123	 * 			   u8 const rk[], u32 rounds, u8 mac[],
124	 * 			   u8 ctr[], u8 const final_iv[]);
125	 */
126SYM_FUNC_START(ce_aes_ccm_encrypt)
127	movi	v22.16b, #255
128	aes_ccm_do_crypt	1
129SYM_FUNC_END(ce_aes_ccm_encrypt)
130
131SYM_FUNC_START(ce_aes_ccm_decrypt)
132	movi	v22.16b, #0
133	aes_ccm_do_crypt	0
134SYM_FUNC_END(ce_aes_ccm_decrypt)
135
136	.section ".rodata", "a"
137	.align	6
138	.fill	15, 1, 0xff
139.Lpermute:
140	.byte	0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
141	.byte	0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
142	.fill	15, 1, 0xff