Loading...
Note: File does not exist in v5.4.
1/* SPDX-License-Identifier: GPL-2.0 */
2
3#include <linux/stringify.h>
4#include <linux/linkage.h>
5#include <asm/alternative.h>
6#include <asm/dwarf.h>
7#include <asm/fpu-insn.h>
8
9#define STATE0 %v0
10#define STATE1 %v1
11#define STATE2 %v2
12#define STATE3 %v3
13#define COPY0 %v4
14#define COPY1 %v5
15#define COPY2 %v6
16#define COPY3 %v7
17#define BEPERM %v19
18#define TMP0 %v20
19#define TMP1 %v21
20#define TMP2 %v22
21#define TMP3 %v23
22
23 .section .rodata
24
25 .balign 32
26SYM_DATA_START_LOCAL(chacha20_constants)
27 .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
28 .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
29SYM_DATA_END(chacha20_constants)
30
31 .text
32/*
33 * s390 ChaCha20 implementation meant for vDSO. Produces a given positive
34 * number of blocks of output with nonce 0, taking an input key and 8-bytes
35 * counter. Does not spill to the stack.
36 *
37 * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
38 * const uint8_t *key,
39 * uint32_t *counter,
40 * size_t nblocks)
41 */
42SYM_FUNC_START(__arch_chacha20_blocks_nostack)
43 CFI_STARTPROC
44 larl %r1,chacha20_constants
45
46 /* COPY0 = "expand 32-byte k" */
47 VL COPY0,0,,%r1
48
49 /* BEPERM = byte selectors for VPERM */
50 ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148)
51
52 /* COPY1,COPY2 = key */
53 VLM COPY1,COPY2,0,%r3
54
55 /* COPY3 = counter || zero nonce */
56 lg %r3,0(%r4)
57 VZERO COPY3
58 VLVGG COPY3,%r3,0
59
60 lghi %r1,0
61.Lblock:
62 VLR STATE0,COPY0
63 VLR STATE1,COPY1
64 VLR STATE2,COPY2
65 VLR STATE3,COPY3
66
67 lghi %r0,10
68.Ldoubleround:
69 /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
70 VAF STATE0,STATE0,STATE1
71 VX STATE3,STATE3,STATE0
72 VERLLF STATE3,STATE3,16
73
74 /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
75 VAF STATE2,STATE2,STATE3
76 VX STATE1,STATE1,STATE2
77 VERLLF STATE1,STATE1,12
78
79 /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
80 VAF STATE0,STATE0,STATE1
81 VX STATE3,STATE3,STATE0
82 VERLLF STATE3,STATE3,8
83
84 /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
85 VAF STATE2,STATE2,STATE3
86 VX STATE1,STATE1,STATE2
87 VERLLF STATE1,STATE1,7
88
89 /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
90 VSLDB STATE1,STATE1,STATE1,4
91 /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
92 VSLDB STATE2,STATE2,STATE2,8
93 /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
94 VSLDB STATE3,STATE3,STATE3,12
95
96 /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
97 VAF STATE0,STATE0,STATE1
98 VX STATE3,STATE3,STATE0
99 VERLLF STATE3,STATE3,16
100
101 /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
102 VAF STATE2,STATE2,STATE3
103 VX STATE1,STATE1,STATE2
104 VERLLF STATE1,STATE1,12
105
106 /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
107 VAF STATE0,STATE0,STATE1
108 VX STATE3,STATE3,STATE0
109 VERLLF STATE3,STATE3,8
110
111 /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
112 VAF STATE2,STATE2,STATE3
113 VX STATE1,STATE1,STATE2
114 VERLLF STATE1,STATE1,7
115
116 /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
117 VSLDB STATE1,STATE1,STATE1,12
118 /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
119 VSLDB STATE2,STATE2,STATE2,8
120 /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
121 VSLDB STATE3,STATE3,STATE3,4
122 brctg %r0,.Ldoubleround
123
124 /* OUTPUT0 = STATE0 + COPY0 */
125 VAF STATE0,STATE0,COPY0
126 /* OUTPUT1 = STATE1 + COPY1 */
127 VAF STATE1,STATE1,COPY1
128 /* OUTPUT2 = STATE2 + COPY2 */
129 VAF STATE2,STATE2,COPY2
130 /* OUTPUT3 = STATE3 + COPY3 */
131 VAF STATE3,STATE3,COPY3
132
133 ALTERNATIVE \
134 __stringify( \
135 /* Convert STATE to little endian and store to OUTPUT */\
136 VPERM TMP0,STATE0,STATE0,BEPERM; \
137 VPERM TMP1,STATE1,STATE1,BEPERM; \
138 VPERM TMP2,STATE2,STATE2,BEPERM; \
139 VPERM TMP3,STATE3,STATE3,BEPERM; \
140 VSTM TMP0,TMP3,0,%r2), \
141 __stringify( \
142 /* 32 bit wise little endian store to OUTPUT */ \
143 VSTBRF STATE0,0,,%r2; \
144 VSTBRF STATE1,16,,%r2; \
145 VSTBRF STATE2,32,,%r2; \
146 VSTBRF STATE3,48,,%r2; \
147 brcl 0,0), \
148 ALT_FACILITY(148)
149
150 /* ++COPY3.COUNTER */
151 /* alsih %r3,1 */
152 .insn rilu,0xcc0a00000000,%r3,1
153 alcr %r3,%r1
154 VLVGG COPY3,%r3,0
155
156 /* OUTPUT += 64, --NBLOCKS */
157 aghi %r2,64
158 brctg %r5,.Lblock
159
160 /* COUNTER = COPY3.COUNTER */
161 stg %r3,0(%r4)
162
163 /* Zero out potentially sensitive regs */
164 VZERO STATE0
165 VZERO STATE1
166 VZERO STATE2
167 VZERO STATE3
168 VZERO COPY1
169 VZERO COPY2
170
171 /* Early exit if TMP0-TMP3 have not been used */
172 ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148)
173
174 VZERO TMP0
175 VZERO TMP1
176 VZERO TMP2
177 VZERO TMP3
178
179 br %r14
180 CFI_ENDPROC
181SYM_FUNC_END(__arch_chacha20_blocks_nostack)