Loading...
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Scalar AES core transform
4 *
5 * Copyright (C) 2017 Linaro Ltd.
6 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
7 */
8
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11#include <asm/cache.h>
12
13 .text
14 .align 5
15
16 rk .req r0
17 rounds .req r1
18 in .req r2
19 out .req r3
20 ttab .req ip
21
22 t0 .req lr
23 t1 .req r2
24 t2 .req r3
25
26 .macro __select, out, in, idx
27 .if __LINUX_ARM_ARCH__ < 7
28 and \out, \in, #0xff << (8 * \idx)
29 .else
30 ubfx \out, \in, #(8 * \idx), #8
31 .endif
32 .endm
33
34 .macro __load, out, in, idx, sz, op
35 .if __LINUX_ARM_ARCH__ < 7 && \idx > 0
36 ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz]
37 .else
38 ldr\op \out, [ttab, \in, lsl #\sz]
39 .endif
40 .endm
41
42 .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
43 __select \out0, \in0, 0
44 __select t0, \in1, 1
45 __load \out0, \out0, 0, \sz, \op
46 __load t0, t0, 1, \sz, \op
47
48 .if \enc
49 __select \out1, \in1, 0
50 __select t1, \in2, 1
51 .else
52 __select \out1, \in3, 0
53 __select t1, \in0, 1
54 .endif
55 __load \out1, \out1, 0, \sz, \op
56 __select t2, \in2, 2
57 __load t1, t1, 1, \sz, \op
58 __load t2, t2, 2, \sz, \op
59
60 eor \out0, \out0, t0, ror #24
61
62 __select t0, \in3, 3
63 .if \enc
64 __select \t3, \in3, 2
65 __select \t4, \in0, 3
66 .else
67 __select \t3, \in1, 2
68 __select \t4, \in2, 3
69 .endif
70 __load \t3, \t3, 2, \sz, \op
71 __load t0, t0, 3, \sz, \op
72 __load \t4, \t4, 3, \sz, \op
73
74 .ifnb \oldcpsr
75 /*
76 * This is the final round and we're done with all data-dependent table
77 * lookups, so we can safely re-enable interrupts.
78 */
79 restore_irqs \oldcpsr
80 .endif
81
82 eor \out1, \out1, t1, ror #24
83 eor \out0, \out0, t2, ror #16
84 ldm rk!, {t1, t2}
85 eor \out1, \out1, \t3, ror #16
86 eor \out0, \out0, t0, ror #8
87 eor \out1, \out1, \t4, ror #8
88 eor \out0, \out0, t1
89 eor \out1, \out1, t2
90 .endm
91
92 .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
93 __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
94 __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
95 .endm
96
97 .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
98 __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
99 __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
100 .endm
101
102 .macro do_crypt, round, ttab, ltab, bsz
103 push {r3-r11, lr}
104
105 // Load keys first, to reduce latency in case they're not cached yet.
106 ldm rk!, {r8-r11}
107
108 ldr r4, [in]
109 ldr r5, [in, #4]
110 ldr r6, [in, #8]
111 ldr r7, [in, #12]
112
113#ifdef CONFIG_CPU_BIG_ENDIAN
114 rev_l r4, t0
115 rev_l r5, t0
116 rev_l r6, t0
117 rev_l r7, t0
118#endif
119
120 eor r4, r4, r8
121 eor r5, r5, r9
122 eor r6, r6, r10
123 eor r7, r7, r11
124
125 mov_l ttab, \ttab
126 /*
127 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
128 * L1 cache, assuming cacheline size >= 32. This is a hardening measure
129 * intended to make cache-timing attacks more difficult. They may not
130 * be fully prevented, however; see the paper
131 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
132 * ("Cache-timing attacks on AES") for a discussion of the many
133 * difficulties involved in writing truly constant-time AES software.
134 */
135 save_and_disable_irqs t0
136 .set i, 0
137 .rept 1024 / 128
138 ldr r8, [ttab, #i + 0]
139 ldr r9, [ttab, #i + 32]
140 ldr r10, [ttab, #i + 64]
141 ldr r11, [ttab, #i + 96]
142 .set i, i + 128
143 .endr
144 push {t0} // oldcpsr
145
146 tst rounds, #2
147 bne 1f
148
1490: \round r8, r9, r10, r11, r4, r5, r6, r7
150 \round r4, r5, r6, r7, r8, r9, r10, r11
151
1521: subs rounds, rounds, #4
153 \round r8, r9, r10, r11, r4, r5, r6, r7
154 bls 2f
155 \round r4, r5, r6, r7, r8, r9, r10, r11
156 b 0b
157
1582: .ifb \ltab
159 add ttab, ttab, #1
160 .else
161 mov_l ttab, \ltab
162 // Prefetch inverse S-box for final round; see explanation above
163 .set i, 0
164 .rept 256 / 64
165 ldr t0, [ttab, #i + 0]
166 ldr t1, [ttab, #i + 32]
167 .set i, i + 64
168 .endr
169 .endif
170
171 pop {rounds} // oldcpsr
172 \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
173
174#ifdef CONFIG_CPU_BIG_ENDIAN
175 rev_l r4, t0
176 rev_l r5, t0
177 rev_l r6, t0
178 rev_l r7, t0
179#endif
180
181 ldr out, [sp]
182
183 str r4, [out]
184 str r5, [out, #4]
185 str r6, [out, #8]
186 str r7, [out, #12]
187
188 pop {r3-r11, pc}
189
190 .align 3
191 .ltorg
192 .endm
193
194ENTRY(__aes_arm_encrypt)
195 do_crypt fround, crypto_ft_tab,, 2
196ENDPROC(__aes_arm_encrypt)
197
198 .align 5
199ENTRY(__aes_arm_decrypt)
200 do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0
201ENDPROC(__aes_arm_decrypt)
1/*
2 * Scalar AES core transform
3 *
4 * Copyright (C) 2017 Linaro Ltd.
5 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/linkage.h>
13#include <asm/cache.h>
14
15 .text
16 .align 5
17
18 rk .req r0
19 rounds .req r1
20 in .req r2
21 out .req r3
22 ttab .req ip
23
24 t0 .req lr
25 t1 .req r2
26 t2 .req r3
27
28 .macro __select, out, in, idx
29 .if __LINUX_ARM_ARCH__ < 7
30 and \out, \in, #0xff << (8 * \idx)
31 .else
32 ubfx \out, \in, #(8 * \idx), #8
33 .endif
34 .endm
35
36 .macro __load, out, in, idx, sz, op
37 .if __LINUX_ARM_ARCH__ < 7 && \idx > 0
38 ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz]
39 .else
40 ldr\op \out, [ttab, \in, lsl #\sz]
41 .endif
42 .endm
43
44 .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
45 __select \out0, \in0, 0
46 __select t0, \in1, 1
47 __load \out0, \out0, 0, \sz, \op
48 __load t0, t0, 1, \sz, \op
49
50 .if \enc
51 __select \out1, \in1, 0
52 __select t1, \in2, 1
53 .else
54 __select \out1, \in3, 0
55 __select t1, \in0, 1
56 .endif
57 __load \out1, \out1, 0, \sz, \op
58 __select t2, \in2, 2
59 __load t1, t1, 1, \sz, \op
60 __load t2, t2, 2, \sz, \op
61
62 eor \out0, \out0, t0, ror #24
63
64 __select t0, \in3, 3
65 .if \enc
66 __select \t3, \in3, 2
67 __select \t4, \in0, 3
68 .else
69 __select \t3, \in1, 2
70 __select \t4, \in2, 3
71 .endif
72 __load \t3, \t3, 2, \sz, \op
73 __load t0, t0, 3, \sz, \op
74 __load \t4, \t4, 3, \sz, \op
75
76 eor \out1, \out1, t1, ror #24
77 eor \out0, \out0, t2, ror #16
78 ldm rk!, {t1, t2}
79 eor \out1, \out1, \t3, ror #16
80 eor \out0, \out0, t0, ror #8
81 eor \out1, \out1, \t4, ror #8
82 eor \out0, \out0, t1
83 eor \out1, \out1, t2
84 .endm
85
86 .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
87 __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
88 __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
89 .endm
90
91 .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
92 __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
93 __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
94 .endm
95
96 .macro __rev, out, in
97 .if __LINUX_ARM_ARCH__ < 6
98 lsl t0, \in, #24
99 and t1, \in, #0xff00
100 and t2, \in, #0xff0000
101 orr \out, t0, \in, lsr #24
102 orr \out, \out, t1, lsl #8
103 orr \out, \out, t2, lsr #8
104 .else
105 rev \out, \in
106 .endif
107 .endm
108
109 .macro __adrl, out, sym, c
110 .if __LINUX_ARM_ARCH__ < 7
111 ldr\c \out, =\sym
112 .else
113 movw\c \out, #:lower16:\sym
114 movt\c \out, #:upper16:\sym
115 .endif
116 .endm
117
118 .macro do_crypt, round, ttab, ltab, bsz
119 push {r3-r11, lr}
120
121 ldr r4, [in]
122 ldr r5, [in, #4]
123 ldr r6, [in, #8]
124 ldr r7, [in, #12]
125
126 ldm rk!, {r8-r11}
127
128#ifdef CONFIG_CPU_BIG_ENDIAN
129 __rev r4, r4
130 __rev r5, r5
131 __rev r6, r6
132 __rev r7, r7
133#endif
134
135 eor r4, r4, r8
136 eor r5, r5, r9
137 eor r6, r6, r10
138 eor r7, r7, r11
139
140 __adrl ttab, \ttab
141
142 tst rounds, #2
143 bne 1f
144
1450: \round r8, r9, r10, r11, r4, r5, r6, r7
146 \round r4, r5, r6, r7, r8, r9, r10, r11
147
1481: subs rounds, rounds, #4
149 \round r8, r9, r10, r11, r4, r5, r6, r7
150 bls 2f
151 \round r4, r5, r6, r7, r8, r9, r10, r11
152 b 0b
153
1542: __adrl ttab, \ltab
155 \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
156
157#ifdef CONFIG_CPU_BIG_ENDIAN
158 __rev r4, r4
159 __rev r5, r5
160 __rev r6, r6
161 __rev r7, r7
162#endif
163
164 ldr out, [sp]
165
166 str r4, [out]
167 str r5, [out, #4]
168 str r6, [out, #8]
169 str r7, [out, #12]
170
171 pop {r3-r11, pc}
172
173 .align 3
174 .ltorg
175 .endm
176
177ENTRY(__aes_arm_encrypt)
178 do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
179ENDPROC(__aes_arm_encrypt)
180
181 .align 5
182ENTRY(__aes_arm_decrypt)
183 do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
184ENDPROC(__aes_arm_decrypt)
185
186 .section ".rodata", "a"
187 .align L1_CACHE_SHIFT
188 .type __aes_arm_inverse_sbox, %object
189__aes_arm_inverse_sbox:
190 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
191 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
192 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
193 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
194 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
195 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
196 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
197 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
198 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
199 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
200 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
201 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
202 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
203 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
204 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
205 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
206 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
207 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
208 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
209 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
210 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
211 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
212 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
213 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
214 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
215 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
216 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
217 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
218 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
219 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
220 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
221 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
222 .size __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox