Loading...
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Scalar AES core transform
4 *
5 * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6 */
7
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10#include <asm/cache.h>
11
12 .text
13
14 rk .req x0
15 out .req x1
16 in .req x2
17 rounds .req x3
18 tt .req x2
19
20 .macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
21 .ifc \op\shift, b0
22 ubfiz \reg0, \in0, #2, #8
23 ubfiz \reg1, \in1e, #2, #8
24 .else
25 ubfx \reg0, \in0, #\shift, #8
26 ubfx \reg1, \in1e, #\shift, #8
27 .endif
28
29 /*
30 * AArch64 cannot do byte size indexed loads from a table containing
31 * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
32 * valid instruction. So perform the shift explicitly first for the
33 * high bytes (the low byte is shifted implicitly by using ubfiz rather
34 * than ubfx above)
35 */
36 .ifnc \op, b
37 ldr \reg0, [tt, \reg0, uxtw #2]
38 ldr \reg1, [tt, \reg1, uxtw #2]
39 .else
40 .if \shift > 0
41 lsl \reg0, \reg0, #2
42 lsl \reg1, \reg1, #2
43 .endif
44 ldrb \reg0, [tt, \reg0, uxtw]
45 ldrb \reg1, [tt, \reg1, uxtw]
46 .endif
47 .endm
48
49 .macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
50 ubfx \reg0, \in0, #\shift, #8
51 ubfx \reg1, \in1d, #\shift, #8
52 ldr\op \reg0, [tt, \reg0, uxtw #\sz]
53 ldr\op \reg1, [tt, \reg1, uxtw #\sz]
54 .endm
55
56 .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
57 ldp \out0, \out1, [rk], #8
58
59 __pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0
60 __pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8
61 __pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16
62 __pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24
63
64 eor \out0, \out0, w12
65 eor \out1, \out1, w13
66 eor \out0, \out0, w14, ror #24
67 eor \out1, \out1, w15, ror #24
68 eor \out0, \out0, w16, ror #16
69 eor \out1, \out1, w17, ror #16
70 eor \out0, \out0, \t0, ror #8
71 eor \out1, \out1, \t1, ror #8
72 .endm
73
74 .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
75 __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
76 __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
77 .endm
78
79 .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
80 __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
81 __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
82 .endm
83
84 .macro do_crypt, round, ttab, ltab, bsz
85 ldp w4, w5, [in]
86 ldp w6, w7, [in, #8]
87 ldp w8, w9, [rk], #16
88 ldp w10, w11, [rk, #-8]
89
90CPU_BE( rev w4, w4 )
91CPU_BE( rev w5, w5 )
92CPU_BE( rev w6, w6 )
93CPU_BE( rev w7, w7 )
94
95 eor w4, w4, w8
96 eor w5, w5, w9
97 eor w6, w6, w10
98 eor w7, w7, w11
99
100 adr_l tt, \ttab
101
102 tbnz rounds, #1, 1f
103
1040: \round w8, w9, w10, w11, w4, w5, w6, w7
105 \round w4, w5, w6, w7, w8, w9, w10, w11
106
1071: subs rounds, rounds, #4
108 \round w8, w9, w10, w11, w4, w5, w6, w7
109 b.ls 3f
1102: \round w4, w5, w6, w7, w8, w9, w10, w11
111 b 0b
1123: adr_l tt, \ltab
113 \round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
114
115CPU_BE( rev w4, w4 )
116CPU_BE( rev w5, w5 )
117CPU_BE( rev w6, w6 )
118CPU_BE( rev w7, w7 )
119
120 stp w4, w5, [out]
121 stp w6, w7, [out, #8]
122 ret
123 .endm
124
125SYM_FUNC_START(__aes_arm64_encrypt)
126 do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
127SYM_FUNC_END(__aes_arm64_encrypt)
128
129 .align 5
130SYM_FUNC_START(__aes_arm64_decrypt)
131 do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0
132SYM_FUNC_END(__aes_arm64_decrypt)
1/*
2 * Scalar AES core transform
3 *
4 * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13#include <asm/cache.h>
14
15 .text
16
17 rk .req x0
18 out .req x1
19 in .req x2
20 rounds .req x3
21 tt .req x2
22
23 .macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
24 .ifc \op\shift, b0
25 ubfiz \reg0, \in0, #2, #8
26 ubfiz \reg1, \in1e, #2, #8
27 .else
28 ubfx \reg0, \in0, #\shift, #8
29 ubfx \reg1, \in1e, #\shift, #8
30 .endif
31
32 /*
33 * AArch64 cannot do byte size indexed loads from a table containing
34 * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
35 * valid instruction. So perform the shift explicitly first for the
36 * high bytes (the low byte is shifted implicitly by using ubfiz rather
37 * than ubfx above)
38 */
39 .ifnc \op, b
40 ldr \reg0, [tt, \reg0, uxtw #2]
41 ldr \reg1, [tt, \reg1, uxtw #2]
42 .else
43 .if \shift > 0
44 lsl \reg0, \reg0, #2
45 lsl \reg1, \reg1, #2
46 .endif
47 ldrb \reg0, [tt, \reg0, uxtw]
48 ldrb \reg1, [tt, \reg1, uxtw]
49 .endif
50 .endm
51
52 .macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
53 ubfx \reg0, \in0, #\shift, #8
54 ubfx \reg1, \in1d, #\shift, #8
55 ldr\op \reg0, [tt, \reg0, uxtw #\sz]
56 ldr\op \reg1, [tt, \reg1, uxtw #\sz]
57 .endm
58
59 .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
60 ldp \out0, \out1, [rk], #8
61
62 __pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0
63 __pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8
64 __pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16
65 __pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24
66
67 eor \out0, \out0, w12
68 eor \out1, \out1, w13
69 eor \out0, \out0, w14, ror #24
70 eor \out1, \out1, w15, ror #24
71 eor \out0, \out0, w16, ror #16
72 eor \out1, \out1, w17, ror #16
73 eor \out0, \out0, \t0, ror #8
74 eor \out1, \out1, \t1, ror #8
75 .endm
76
77 .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
78 __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
79 __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
80 .endm
81
82 .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
83 __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
84 __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
85 .endm
86
87 .macro do_crypt, round, ttab, ltab, bsz
88 ldp w4, w5, [in]
89 ldp w6, w7, [in, #8]
90 ldp w8, w9, [rk], #16
91 ldp w10, w11, [rk, #-8]
92
93CPU_BE( rev w4, w4 )
94CPU_BE( rev w5, w5 )
95CPU_BE( rev w6, w6 )
96CPU_BE( rev w7, w7 )
97
98 eor w4, w4, w8
99 eor w5, w5, w9
100 eor w6, w6, w10
101 eor w7, w7, w11
102
103 adr_l tt, \ttab
104
105 tbnz rounds, #1, 1f
106
1070: \round w8, w9, w10, w11, w4, w5, w6, w7
108 \round w4, w5, w6, w7, w8, w9, w10, w11
109
1101: subs rounds, rounds, #4
111 \round w8, w9, w10, w11, w4, w5, w6, w7
112 b.ls 3f
1132: \round w4, w5, w6, w7, w8, w9, w10, w11
114 b 0b
1153: adr_l tt, \ltab
116 \round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
117
118CPU_BE( rev w4, w4 )
119CPU_BE( rev w5, w5 )
120CPU_BE( rev w6, w6 )
121CPU_BE( rev w7, w7 )
122
123 stp w4, w5, [out]
124 stp w6, w7, [out, #8]
125 ret
126 .endm
127
128ENTRY(__aes_arm64_encrypt)
129 do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
130ENDPROC(__aes_arm64_encrypt)
131
132 .align 5
133ENTRY(__aes_arm64_decrypt)
134 do_crypt iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
135ENDPROC(__aes_arm64_decrypt)
136
137 .section ".rodata", "a"
138 .align L1_CACHE_SHIFT
139 .type __aes_arm64_inverse_sbox, %object
140__aes_arm64_inverse_sbox:
141 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
142 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
143 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
144 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
145 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
146 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
147 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
148 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
149 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
150 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
151 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
152 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
153 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
154 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
155 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
156 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
157 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
158 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
159 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
160 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
161 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
162 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
163 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
164 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
165 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
166 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
167 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
168 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
169 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
170 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
171 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
172 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
173 .size __aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox