csum.c - arch/riscv/lib/csum.c - Linux source code v5.4

Note: File does not exist in v5.4.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Checksum library
  4 *
  5 * Influenced by arch/arm64/lib/csum.c
  6 * Copyright (C) 2023 Rivos Inc.
  7 */
  8#include <linux/bitops.h>
  9#include <linux/compiler.h>
 10#include <linux/jump_label.h>
 11#include <linux/kasan-checks.h>
 12#include <linux/kernel.h>
 13
 14#include <asm/cpufeature.h>
 15
 16#include <net/checksum.h>
 17
 18/* Default version is sufficient for 32 bit */
 19#ifndef CONFIG_32BIT
 20__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 21			const struct in6_addr *daddr,
 22			__u32 len, __u8 proto, __wsum csum)
 23{
 24	unsigned int ulen, uproto;
 25	unsigned long sum = (__force unsigned long)csum;
 26
 27	sum += (__force unsigned long)saddr->s6_addr32[0];
 28	sum += (__force unsigned long)saddr->s6_addr32[1];
 29	sum += (__force unsigned long)saddr->s6_addr32[2];
 30	sum += (__force unsigned long)saddr->s6_addr32[3];
 31
 32	sum += (__force unsigned long)daddr->s6_addr32[0];
 33	sum += (__force unsigned long)daddr->s6_addr32[1];
 34	sum += (__force unsigned long)daddr->s6_addr32[2];
 35	sum += (__force unsigned long)daddr->s6_addr32[3];
 36
 37	ulen = (__force unsigned int)htonl((unsigned int)len);
 38	sum += ulen;
 39
 40	uproto = (__force unsigned int)htonl(proto);
 41	sum += uproto;
 42
 43	/*
 44	 * Zbb support saves 4 instructions, so not worth checking without
 45	 * alternatives if supported
 46	 */
 47	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
 48	    IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
 49		unsigned long fold_temp;
 50
 51		/*
 52		 * Zbb is likely available when the kernel is compiled with Zbb
 53		 * support, so nop when Zbb is available and jump when Zbb is
 54		 * not available.
 55		 */
 56		asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
 57					      RISCV_ISA_EXT_ZBB, 1)
 58				  :
 59				  :
 60				  :
 61				  : no_zbb);
 62		asm(".option push					\n\
 63		.option arch,+zbb					\n\
 64			rori	%[fold_temp], %[sum], 32		\n\
 65			add	%[sum], %[fold_temp], %[sum]		\n\
 66			srli	%[sum], %[sum], 32			\n\
 67			not	%[fold_temp], %[sum]			\n\
 68			roriw	%[sum], %[sum], 16			\n\
 69			subw	%[sum], %[fold_temp], %[sum]		\n\
 70		.option pop"
 71		: [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp));
 72		return (__force __sum16)(sum >> 16);
 73	}
 74no_zbb:
 75	sum += ror64(sum, 32);
 76	sum >>= 32;
 77	return csum_fold((__force __wsum)sum);
 78}
 79EXPORT_SYMBOL(csum_ipv6_magic);
 80#endif /* !CONFIG_32BIT */
 81
 82#ifdef CONFIG_32BIT
 83#define OFFSET_MASK 3
 84#elif CONFIG_64BIT
 85#define OFFSET_MASK 7
 86#endif
 87
 88static inline __no_sanitize_address unsigned long
 89do_csum_common(const unsigned long *ptr, const unsigned long *end,
 90	       unsigned long data)
 91{
 92	unsigned int shift;
 93	unsigned long csum = 0, carry = 0;
 94
 95	/*
 96	 * Do 32-bit reads on RV32 and 64-bit reads otherwise. This should be
 97	 * faster than doing 32-bit reads on architectures that support larger
 98	 * reads.
 99	 */
100	while (ptr < end) {
101		csum += data;
102		carry += csum < data;
103		data = *(ptr++);
104	}
105
106	/*
107	 * Perform alignment (and over-read) bytes on the tail if any bytes
108	 * leftover.
109	 */
110	shift = ((long)ptr - (long)end) * 8;
111#ifdef __LITTLE_ENDIAN
112	data = (data << shift) >> shift;
113#else
114	data = (data >> shift) << shift;
115#endif
116	csum += data;
117	carry += csum < data;
118	csum += carry;
119	csum += csum < carry;
120
121	return csum;
122}
123
124/*
125 * Algorithm accounts for buff being misaligned.
126 * If buff is not aligned, will over-read bytes but not use the bytes that it
127 * shouldn't. The same thing will occur on the tail-end of the read.
128 */
129static inline __no_sanitize_address unsigned int
130do_csum_with_alignment(const unsigned char *buff, int len)
131{
132	unsigned int offset, shift;
133	unsigned long csum, data;
134	const unsigned long *ptr, *end;
135
136	/*
137	 * Align address to closest word (double word on rv64) that comes before
138	 * buff. This should always be in the same page and cache line.
139	 * Directly call KASAN with the alignment we will be using.
140	 */
141	offset = (unsigned long)buff & OFFSET_MASK;
142	kasan_check_read(buff, len);
143	ptr = (const unsigned long *)(buff - offset);
144
145	/*
146	 * Clear the most significant bytes that were over-read if buff was not
147	 * aligned.
148	 */
149	shift = offset * 8;
150	data = *(ptr++);
151#ifdef __LITTLE_ENDIAN
152	data = (data >> shift) << shift;
153#else
154	data = (data << shift) >> shift;
155#endif
156	end = (const unsigned long *)(buff + len);
157	csum = do_csum_common(ptr, end, data);
158
159#ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT
160	/*
161	 * Zbb support saves 6 instructions, so not worth checking without
162	 * alternatives if supported
163	 */
164	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
165	    IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
166		unsigned long fold_temp;
167
168		/*
169		 * Zbb is likely available when the kernel is compiled with Zbb
170		 * support, so nop when Zbb is available and jump when Zbb is
171		 * not available.
172		 */
173		asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
174					      RISCV_ISA_EXT_ZBB, 1)
175				  :
176				  :
177				  :
178				  : no_zbb);
179
180#ifdef CONFIG_32BIT
181		asm_goto_output(".option push			\n\
182		.option arch,+zbb				\n\
183			rori	%[fold_temp], %[csum], 16	\n\
184			andi	%[offset], %[offset], 1		\n\
185			add	%[csum], %[fold_temp], %[csum]	\n\
186			beq	%[offset], zero, %l[end]	\n\
187			rev8	%[csum], %[csum]		\n\
188		.option pop"
189			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
190			: [offset] "r" (offset)
191			:
192			: end);
193
194		return (unsigned short)csum;
195#else /* !CONFIG_32BIT */
196		asm_goto_output(".option push			\n\
197		.option arch,+zbb				\n\
198			rori	%[fold_temp], %[csum], 32	\n\
199			add	%[csum], %[fold_temp], %[csum]	\n\
200			srli	%[csum], %[csum], 32		\n\
201			roriw	%[fold_temp], %[csum], 16	\n\
202			addw	%[csum], %[fold_temp], %[csum]	\n\
203			andi	%[offset], %[offset], 1		\n\
204			beq	%[offset], zero, %l[end]	\n\
205			rev8	%[csum], %[csum]		\n\
206		.option pop"
207			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
208			: [offset] "r" (offset)
209			:
210			: end);
211
212		return (csum << 16) >> 48;
213#endif /* !CONFIG_32BIT */
214end:
215		return csum >> 16;
216	}
217no_zbb:
218#endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */
219#ifndef CONFIG_32BIT
220	csum += ror64(csum, 32);
221	csum >>= 32;
222#endif
223	csum = (u32)csum + ror32((u32)csum, 16);
224	if (offset & 1)
225		return (u16)swab32(csum);
226	return csum >> 16;
227}
228
229/*
230 * Does not perform alignment, should only be used if machine has fast
231 * misaligned accesses, or when buff is known to be aligned.
232 */
233static inline __no_sanitize_address unsigned int
234do_csum_no_alignment(const unsigned char *buff, int len)
235{
236	unsigned long csum, data;
237	const unsigned long *ptr, *end;
238
239	ptr = (const unsigned long *)(buff);
240	data = *(ptr++);
241
242	kasan_check_read(buff, len);
243
244	end = (const unsigned long *)(buff + len);
245	csum = do_csum_common(ptr, end, data);
246
247	/*
248	 * Zbb support saves 6 instructions, so not worth checking without
249	 * alternatives if supported
250	 */
251	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
252	    IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
253		unsigned long fold_temp;
254
255		/*
256		 * Zbb is likely available when the kernel is compiled with Zbb
257		 * support, so nop when Zbb is available and jump when Zbb is
258		 * not available.
259		 */
260		asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
261					      RISCV_ISA_EXT_ZBB, 1)
262				  :
263				  :
264				  :
265				  : no_zbb);
266
267#ifdef CONFIG_32BIT
268		asm (".option push				\n\
269		.option arch,+zbb				\n\
270			rori	%[fold_temp], %[csum], 16	\n\
271			add	%[csum], %[fold_temp], %[csum]	\n\
272		.option pop"
273			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
274			:
275			: );
276
277#else /* !CONFIG_32BIT */
278		asm (".option push				\n\
279		.option arch,+zbb				\n\
280			rori	%[fold_temp], %[csum], 32	\n\
281			add	%[csum], %[fold_temp], %[csum]	\n\
282			srli	%[csum], %[csum], 32		\n\
283			roriw	%[fold_temp], %[csum], 16	\n\
284			addw	%[csum], %[fold_temp], %[csum]	\n\
285		.option pop"
286			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
287			:
288			: );
289#endif /* !CONFIG_32BIT */
290		return csum >> 16;
291	}
292no_zbb:
293#ifndef CONFIG_32BIT
294	csum += ror64(csum, 32);
295	csum >>= 32;
296#endif
297	csum = (u32)csum + ror32((u32)csum, 16);
298	return csum >> 16;
299}
300
301/*
302 * Perform a checksum on an arbitrary memory address.
303 * Will do a light-weight address alignment if buff is misaligned, unless
304 * cpu supports fast misaligned accesses.
305 */
306unsigned int do_csum(const unsigned char *buff, int len)
307{
308	if (unlikely(len <= 0))
309		return 0;
310
311	/*
312	 * Significant performance gains can be seen by not doing alignment
313	 * on machines with fast misaligned accesses.
314	 *
315	 * There is some duplicate code between the "with_alignment" and
316	 * "no_alignment" implmentations, but the overlap is too awkward to be
317	 * able to fit in one function without introducing multiple static
318	 * branches. The largest chunk of overlap was delegated into the
319	 * do_csum_common function.
320	 */
321	if (static_branch_likely(&fast_misaligned_access_speed_key))
322		return do_csum_no_alignment(buff, len);
323
324	if (((unsigned long)buff & OFFSET_MASK) == 0)
325		return do_csum_no_alignment(buff, len);
326
327	return do_csum_with_alignment(buff, len);
328}