Loading...
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Scalar AES core transform
4 *
5 * Copyright (C) 2017 Linaro Ltd.
6 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
7 */
8
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11#include <asm/cache.h>
12
13 .text
14 .align 5
15
16 rk .req r0
17 rounds .req r1
18 in .req r2
19 out .req r3
20 ttab .req ip
21
22 t0 .req lr
23 t1 .req r2
24 t2 .req r3
25
26 .macro __select, out, in, idx
27 .if __LINUX_ARM_ARCH__ < 7
28 and \out, \in, #0xff << (8 * \idx)
29 .else
30 ubfx \out, \in, #(8 * \idx), #8
31 .endif
32 .endm
33
34 .macro __load, out, in, idx, sz, op
35 .if __LINUX_ARM_ARCH__ < 7 && \idx > 0
36 ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz]
37 .else
38 ldr\op \out, [ttab, \in, lsl #\sz]
39 .endif
40 .endm
41
42 .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
43 __select \out0, \in0, 0
44 __select t0, \in1, 1
45 __load \out0, \out0, 0, \sz, \op
46 __load t0, t0, 1, \sz, \op
47
48 .if \enc
49 __select \out1, \in1, 0
50 __select t1, \in2, 1
51 .else
52 __select \out1, \in3, 0
53 __select t1, \in0, 1
54 .endif
55 __load \out1, \out1, 0, \sz, \op
56 __select t2, \in2, 2
57 __load t1, t1, 1, \sz, \op
58 __load t2, t2, 2, \sz, \op
59
60 eor \out0, \out0, t0, ror #24
61
62 __select t0, \in3, 3
63 .if \enc
64 __select \t3, \in3, 2
65 __select \t4, \in0, 3
66 .else
67 __select \t3, \in1, 2
68 __select \t4, \in2, 3
69 .endif
70 __load \t3, \t3, 2, \sz, \op
71 __load t0, t0, 3, \sz, \op
72 __load \t4, \t4, 3, \sz, \op
73
74 .ifnb \oldcpsr
75 /*
76 * This is the final round and we're done with all data-dependent table
77 * lookups, so we can safely re-enable interrupts.
78 */
79 restore_irqs \oldcpsr
80 .endif
81
82 eor \out1, \out1, t1, ror #24
83 eor \out0, \out0, t2, ror #16
84 ldm rk!, {t1, t2}
85 eor \out1, \out1, \t3, ror #16
86 eor \out0, \out0, t0, ror #8
87 eor \out1, \out1, \t4, ror #8
88 eor \out0, \out0, t1
89 eor \out1, \out1, t2
90 .endm
91
92 .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
93 __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
94 __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
95 .endm
96
97 .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
98 __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
99 __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
100 .endm
101
102 .macro __rev, out, in
103 .if __LINUX_ARM_ARCH__ < 6
104 lsl t0, \in, #24
105 and t1, \in, #0xff00
106 and t2, \in, #0xff0000
107 orr \out, t0, \in, lsr #24
108 orr \out, \out, t1, lsl #8
109 orr \out, \out, t2, lsr #8
110 .else
111 rev \out, \in
112 .endif
113 .endm
114
115 .macro __adrl, out, sym, c
116 .if __LINUX_ARM_ARCH__ < 7
117 ldr\c \out, =\sym
118 .else
119 movw\c \out, #:lower16:\sym
120 movt\c \out, #:upper16:\sym
121 .endif
122 .endm
123
124 .macro do_crypt, round, ttab, ltab, bsz
125 push {r3-r11, lr}
126
127 // Load keys first, to reduce latency in case they're not cached yet.
128 ldm rk!, {r8-r11}
129
130 ldr r4, [in]
131 ldr r5, [in, #4]
132 ldr r6, [in, #8]
133 ldr r7, [in, #12]
134
135#ifdef CONFIG_CPU_BIG_ENDIAN
136 __rev r4, r4
137 __rev r5, r5
138 __rev r6, r6
139 __rev r7, r7
140#endif
141
142 eor r4, r4, r8
143 eor r5, r5, r9
144 eor r6, r6, r10
145 eor r7, r7, r11
146
147 __adrl ttab, \ttab
148 /*
149 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
150 * L1 cache, assuming cacheline size >= 32. This is a hardening measure
151 * intended to make cache-timing attacks more difficult. They may not
152 * be fully prevented, however; see the paper
153 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
154 * ("Cache-timing attacks on AES") for a discussion of the many
155 * difficulties involved in writing truly constant-time AES software.
156 */
157 save_and_disable_irqs t0
158 .set i, 0
159 .rept 1024 / 128
160 ldr r8, [ttab, #i + 0]
161 ldr r9, [ttab, #i + 32]
162 ldr r10, [ttab, #i + 64]
163 ldr r11, [ttab, #i + 96]
164 .set i, i + 128
165 .endr
166 push {t0} // oldcpsr
167
168 tst rounds, #2
169 bne 1f
170
1710: \round r8, r9, r10, r11, r4, r5, r6, r7
172 \round r4, r5, r6, r7, r8, r9, r10, r11
173
1741: subs rounds, rounds, #4
175 \round r8, r9, r10, r11, r4, r5, r6, r7
176 bls 2f
177 \round r4, r5, r6, r7, r8, r9, r10, r11
178 b 0b
179
1802: .ifb \ltab
181 add ttab, ttab, #1
182 .else
183 __adrl ttab, \ltab
184 // Prefetch inverse S-box for final round; see explanation above
185 .set i, 0
186 .rept 256 / 64
187 ldr t0, [ttab, #i + 0]
188 ldr t1, [ttab, #i + 32]
189 .set i, i + 64
190 .endr
191 .endif
192
193 pop {rounds} // oldcpsr
194 \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
195
196#ifdef CONFIG_CPU_BIG_ENDIAN
197 __rev r4, r4
198 __rev r5, r5
199 __rev r6, r6
200 __rev r7, r7
201#endif
202
203 ldr out, [sp]
204
205 str r4, [out]
206 str r5, [out, #4]
207 str r6, [out, #8]
208 str r7, [out, #12]
209
210 pop {r3-r11, pc}
211
212 .align 3
213 .ltorg
214 .endm
215
216ENTRY(__aes_arm_encrypt)
217 do_crypt fround, crypto_ft_tab,, 2
218ENDPROC(__aes_arm_encrypt)
219
220 .align 5
221ENTRY(__aes_arm_decrypt)
222 do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0
223ENDPROC(__aes_arm_decrypt)