Linux Audio

Check our new training course

Embedded Linux training

Mar 10-20, 2025, special US time zones
Register
Loading...
Note: File does not exist in v5.4.
  1/* SPDX-License-Identifier: GPL-2.0 */
  2
  3#include <linux/stringify.h>
  4#include <linux/linkage.h>
  5#include <asm/alternative.h>
  6#include <asm/dwarf.h>
  7#include <asm/fpu-insn.h>
  8
  9#define STATE0	%v0
 10#define STATE1	%v1
 11#define STATE2	%v2
 12#define STATE3	%v3
 13#define COPY0	%v4
 14#define COPY1	%v5
 15#define COPY2	%v6
 16#define COPY3	%v7
 17#define BEPERM	%v19
 18#define TMP0	%v20
 19#define TMP1	%v21
 20#define TMP2	%v22
 21#define TMP3	%v23
 22
 23	.section .rodata
 24
 25	.balign 32
 26SYM_DATA_START_LOCAL(chacha20_constants)
 27	.long	0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
 28	.long	0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
 29SYM_DATA_END(chacha20_constants)
 30
 31	.text
 32/*
 33 * s390 ChaCha20 implementation meant for vDSO. Produces a given positive
 34 * number of blocks of output with nonce 0, taking an input key and 8-bytes
 35 * counter. Does not spill to the stack.
 36 *
 37 * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
 38 *				       const uint8_t *key,
 39 *				       uint32_t *counter,
 40 *				       size_t nblocks)
 41 */
 42SYM_FUNC_START(__arch_chacha20_blocks_nostack)
 43	CFI_STARTPROC
 44	larl	%r1,chacha20_constants
 45
 46	/* COPY0 = "expand 32-byte k" */
 47	VL	COPY0,0,,%r1
 48
 49	/* BEPERM = byte selectors for VPERM */
 50	ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148)
 51
 52	/* COPY1,COPY2 = key */
 53	VLM	COPY1,COPY2,0,%r3
 54
 55	/* COPY3 = counter || zero nonce  */
 56	lg	%r3,0(%r4)
 57	VZERO	COPY3
 58	VLVGG	COPY3,%r3,0
 59
 60	lghi	%r1,0
 61.Lblock:
 62	VLR	STATE0,COPY0
 63	VLR	STATE1,COPY1
 64	VLR	STATE2,COPY2
 65	VLR	STATE3,COPY3
 66
 67	lghi	%r0,10
 68.Ldoubleround:
 69	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
 70	VAF	STATE0,STATE0,STATE1
 71	VX	STATE3,STATE3,STATE0
 72	VERLLF	STATE3,STATE3,16
 73
 74	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
 75	VAF	STATE2,STATE2,STATE3
 76	VX	STATE1,STATE1,STATE2
 77	VERLLF	STATE1,STATE1,12
 78
 79	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
 80	VAF	STATE0,STATE0,STATE1
 81	VX	STATE3,STATE3,STATE0
 82	VERLLF	STATE3,STATE3,8
 83
 84	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
 85	VAF	STATE2,STATE2,STATE3
 86	VX	STATE1,STATE1,STATE2
 87	VERLLF	STATE1,STATE1,7
 88
 89	/* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
 90	VSLDB	STATE1,STATE1,STATE1,4
 91	/* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
 92	VSLDB	STATE2,STATE2,STATE2,8
 93	/* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
 94	VSLDB	STATE3,STATE3,STATE3,12
 95
 96	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
 97	VAF	STATE0,STATE0,STATE1
 98	VX	STATE3,STATE3,STATE0
 99	VERLLF	STATE3,STATE3,16
100
101	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
102	VAF	STATE2,STATE2,STATE3
103	VX	STATE1,STATE1,STATE2
104	VERLLF	STATE1,STATE1,12
105
106	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
107	VAF	STATE0,STATE0,STATE1
108	VX	STATE3,STATE3,STATE0
109	VERLLF	STATE3,STATE3,8
110
111	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
112	VAF	STATE2,STATE2,STATE3
113	VX	STATE1,STATE1,STATE2
114	VERLLF	STATE1,STATE1,7
115
116	/* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
117	VSLDB	STATE1,STATE1,STATE1,12
118	/* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
119	VSLDB	STATE2,STATE2,STATE2,8
120	/* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
121	VSLDB	STATE3,STATE3,STATE3,4
122	brctg	%r0,.Ldoubleround
123
124	/* OUTPUT0 = STATE0 + COPY0 */
125	VAF	STATE0,STATE0,COPY0
126	/* OUTPUT1 = STATE1 + COPY1 */
127	VAF	STATE1,STATE1,COPY1
128	/* OUTPUT2 = STATE2 + COPY2 */
129	VAF	STATE2,STATE2,COPY2
130	/* OUTPUT3 = STATE3 + COPY3 */
131	VAF	STATE3,STATE3,COPY3
132
133	ALTERNATIVE							\
134		__stringify(						\
135		/* Convert STATE to little endian and store to OUTPUT */\
136		VPERM	TMP0,STATE0,STATE0,BEPERM;			\
137		VPERM	TMP1,STATE1,STATE1,BEPERM;			\
138		VPERM	TMP2,STATE2,STATE2,BEPERM;			\
139		VPERM	TMP3,STATE3,STATE3,BEPERM;			\
140		VSTM	TMP0,TMP3,0,%r2),				\
141		__stringify(						\
142		/* 32 bit wise little endian store to OUTPUT */		\
143		VSTBRF	STATE0,0,,%r2;					\
144		VSTBRF	STATE1,16,,%r2;					\
145		VSTBRF	STATE2,32,,%r2;					\
146		VSTBRF	STATE3,48,,%r2;					\
147		brcl	0,0),						\
148		ALT_FACILITY(148)
149
150	/* ++COPY3.COUNTER */
151	/* alsih %r3,1 */
152	.insn	rilu,0xcc0a00000000,%r3,1
153	alcr	%r3,%r1
154	VLVGG	COPY3,%r3,0
155
156	/* OUTPUT += 64, --NBLOCKS */
157	aghi	%r2,64
158	brctg	%r5,.Lblock
159
160	/* COUNTER = COPY3.COUNTER */
161	stg	%r3,0(%r4)
162
163	/* Zero out potentially sensitive regs */
164	VZERO	STATE0
165	VZERO	STATE1
166	VZERO	STATE2
167	VZERO	STATE3
168	VZERO	COPY1
169	VZERO	COPY2
170
171	/* Early exit if TMP0-TMP3 have not been used */
172	ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148)
173
174	VZERO	TMP0
175	VZERO	TMP1
176	VZERO	TMP2
177	VZERO	TMP3
178
179	br	%r14
180	CFI_ENDPROC
181SYM_FUNC_END(__arch_chacha20_blocks_nostack)