Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Accelerated CRC32 implementation with Zbc extension.
  4 *
  5 * Copyright (C) 2024 Intel Corporation
  6 */
  7
  8#include <asm/hwcap.h>
  9#include <asm/alternative-macros.h>
 10#include <asm/byteorder.h>
 11
 12#include <linux/types.h>
 13#include <linux/minmax.h>
 14#include <linux/crc32poly.h>
 15#include <linux/crc32.h>
 16#include <linux/byteorder/generic.h>
 17
 18/*
 19 * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
 20 * better understanding of how this math works.
 21 *
 22 * let "+" denotes polynomial add (XOR)
 23 * let "-" denotes polynomial sub (XOR)
 24 * let "*" denotes polynomial multiplication
 25 * let "/" denotes polynomial floor division
 26 * let "S" denotes source data, XLEN bit wide
 27 * let "P" denotes CRC32 polynomial
 28 * let "T" denotes 2^(XLEN+32)
 29 * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
 30 *
 31 * crc32(S, P)
 32 * => S * (2^32) - S * (2^32) / P * P
 33 * => lowest 32 bits of: S * (2^32) / P * P
 34 * => lowest 32 bits of: S * (2^32) * (T / P) / T * P
 35 * => lowest 32 bits of: S * (2^32) * quotient / T * P
 36 * => lowest 32 bits of: S * quotient / 2^XLEN * P
 37 * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
 38 * => clmul_low_part(clmul_high_part(S, QT) + S, P)
 39 *
 40 * In terms of below implementations, the BE case is more intuitive, since the
 41 * higher order bit sits at more significant position.
 42 */
 43
 44#if __riscv_xlen == 64
 45/* Slide by XLEN bits per iteration */
 46# define STEP_ORDER 3
 47
 48/* Each below polynomial quotient has an implicit bit for 2^XLEN */
 49
 50/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
 51# define CRC32_POLY_QT_LE	0x5a72d812fb808b20
 52
 53/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
 54# define CRC32C_POLY_QT_LE	0xa434f61c6f5389f8
 55
 56/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
 57 * the same as the bit-reversed version of CRC32_POLY_QT_LE
 58 */
 59# define CRC32_POLY_QT_BE	0x04d101df481b4e5a
 60
 61static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
 62{
 63	return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
 64}
 65
 66static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
 67{
 68	u32 crc;
 69
 70	/* We don't have a "clmulrh" insn, so use clmul + slli instead. */
 71	asm volatile (".option push\n"
 72		      ".option arch,+zbc\n"
 73		      "clmul	%0, %1, %2\n"
 74		      "slli	%0, %0, 1\n"
 75		      "xor	%0, %0, %1\n"
 76		      "clmulr	%0, %0, %3\n"
 77		      "srli	%0, %0, 32\n"
 78		      ".option pop\n"
 79		      : "=&r" (crc)
 80		      : "r" (s),
 81			"r" (poly_qt),
 82			"r" ((u64)poly << 32)
 83		      :);
 84	return crc;
 85}
 86
 87static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
 88{
 89	return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
 90}
 91
 92#elif __riscv_xlen == 32
 93# define STEP_ORDER 2
 94/* Each quotient should match the upper half of its analog in RV64 */
 95# define CRC32_POLY_QT_LE	0xfb808b20
 96# define CRC32C_POLY_QT_LE	0x6f5389f8
 97# define CRC32_POLY_QT_BE	0x04d101df
 98
 99static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
100{
101	return crc ^ (__force u32)__cpu_to_le32(*ptr);
102}
103
104static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
105{
106	u32 crc;
107
108	/* We don't have a "clmulrh" insn, so use clmul + slli instead. */
109	asm volatile (".option push\n"
110		      ".option arch,+zbc\n"
111		      "clmul	%0, %1, %2\n"
112		      "slli	%0, %0, 1\n"
113		      "xor	%0, %0, %1\n"
114		      "clmulr	%0, %0, %3\n"
115		      ".option pop\n"
116		      : "=&r" (crc)
117		      : "r" (s),
118			"r" (poly_qt),
119			"r" (poly)
120		      :);
121	return crc;
122}
123
124static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
125{
126	return crc ^ (__force u32)__cpu_to_be32(*ptr);
127}
128
129#else
130# error "Unexpected __riscv_xlen"
131#endif
132
133static inline u32 crc32_be_zbc(unsigned long s)
134{
135	u32 crc;
136
137	asm volatile (".option push\n"
138		      ".option arch,+zbc\n"
139		      "clmulh	%0, %1, %2\n"
140		      "xor	%0, %0, %1\n"
141		      "clmul	%0, %0, %3\n"
142		      ".option pop\n"
143		      : "=&r" (crc)
144		      : "r" (s),
145			"r" (CRC32_POLY_QT_BE),
146			"r" (CRC32_POLY_BE)
147		      :);
148	return crc;
149}
150
151#define STEP		(1 << STEP_ORDER)
152#define OFFSET_MASK	(STEP - 1)
153
154typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
155
156static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
157				     size_t len, u32 poly,
158				     unsigned long poly_qt)
159{
160	size_t bits = len * 8;
161	unsigned long s = 0;
162	u32 crc_low = 0;
163
164	for (int i = 0; i < len; i++)
165		s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
166
167	s ^= (unsigned long)crc << (__riscv_xlen - bits);
168	if (__riscv_xlen == 32 || len < sizeof(u32))
169		crc_low = crc >> bits;
170
171	crc = crc32_le_zbc(s, poly, poly_qt);
172	crc ^= crc_low;
173
174	return crc;
175}
176
177static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
178					  size_t len, u32 poly,
179					  unsigned long poly_qt,
180					  fallback crc_fb)
181{
182	size_t offset, head_len, tail_len;
183	unsigned long const *p_ul;
184	unsigned long s;
185
186	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
187			     RISCV_ISA_EXT_ZBC, 1)
188		 : : : : legacy);
189
190	/* Handle the unaligned head. */
191	offset = (unsigned long)p & OFFSET_MASK;
192	if (offset && len) {
193		head_len = min(STEP - offset, len);
194		crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
195		p += head_len;
196		len -= head_len;
197	}
198
199	tail_len = len & OFFSET_MASK;
200	len = len >> STEP_ORDER;
201	p_ul = (unsigned long const *)p;
202
203	for (int i = 0; i < len; i++) {
204		s = crc32_le_prep(crc, p_ul);
205		crc = crc32_le_zbc(s, poly, poly_qt);
206		p_ul++;
207	}
208
209	/* Handle the tail bytes. */
210	p = (unsigned char const *)p_ul;
211	if (tail_len)
212		crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
213
214	return crc;
215
216legacy:
217	return crc_fb(crc, p, len);
218}
219
220u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
221{
222	return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
223				crc32_le_base);
224}
225
226u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
227{
228	return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
229				CRC32C_POLY_QT_LE, __crc32c_le_base);
230}
231
232static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
233				     size_t len)
234{
235	size_t bits = len * 8;
236	unsigned long s = 0;
237	u32 crc_low = 0;
238
239	s = 0;
240	for (int i = 0; i < len; i++)
241		s = *p++ | (s << 8);
242
243	if (__riscv_xlen == 32 || len < sizeof(u32)) {
244		s ^= crc >> (32 - bits);
245		crc_low = crc << bits;
246	} else {
247		s ^= (unsigned long)crc << (bits - 32);
248	}
249
250	crc = crc32_be_zbc(s);
251	crc ^= crc_low;
252
253	return crc;
254}
255
256u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
257{
258	size_t offset, head_len, tail_len;
259	unsigned long const *p_ul;
260	unsigned long s;
261
262	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
263			     RISCV_ISA_EXT_ZBC, 1)
264		 : : : : legacy);
265
266	/* Handle the unaligned head. */
267	offset = (unsigned long)p & OFFSET_MASK;
268	if (offset && len) {
269		head_len = min(STEP - offset, len);
270		crc = crc32_be_unaligned(crc, p, head_len);
271		p += head_len;
272		len -= head_len;
273	}
274
275	tail_len = len & OFFSET_MASK;
276	len = len >> STEP_ORDER;
277	p_ul = (unsigned long const *)p;
278
279	for (int i = 0; i < len; i++) {
280		s = crc32_be_prep(crc, p_ul);
281		crc = crc32_be_zbc(s);
282		p_ul++;
283	}
284
285	/* Handle the tail bytes. */
286	p = (unsigned char const *)p_ul;
287	if (tail_len)
288		crc = crc32_be_unaligned(crc, p, tail_len);
289
290	return crc;
291
292legacy:
293	return crc32_be_base(crc, p, len);
294}