Loading...
Note: File does not exist in v3.1.
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Accelerated CRC32 implementation with Zbc extension.
4 *
5 * Copyright (C) 2024 Intel Corporation
6 */
7
8#include <asm/hwcap.h>
9#include <asm/alternative-macros.h>
10#include <asm/byteorder.h>
11
12#include <linux/types.h>
13#include <linux/minmax.h>
14#include <linux/crc32poly.h>
15#include <linux/crc32.h>
16#include <linux/byteorder/generic.h>
17
18/*
19 * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
20 * better understanding of how this math works.
21 *
22 * let "+" denotes polynomial add (XOR)
23 * let "-" denotes polynomial sub (XOR)
24 * let "*" denotes polynomial multiplication
25 * let "/" denotes polynomial floor division
26 * let "S" denotes source data, XLEN bit wide
27 * let "P" denotes CRC32 polynomial
28 * let "T" denotes 2^(XLEN+32)
29 * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
30 *
31 * crc32(S, P)
32 * => S * (2^32) - S * (2^32) / P * P
33 * => lowest 32 bits of: S * (2^32) / P * P
34 * => lowest 32 bits of: S * (2^32) * (T / P) / T * P
35 * => lowest 32 bits of: S * (2^32) * quotient / T * P
36 * => lowest 32 bits of: S * quotient / 2^XLEN * P
37 * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
38 * => clmul_low_part(clmul_high_part(S, QT) + S, P)
39 *
40 * In terms of below implementations, the BE case is more intuitive, since the
41 * higher order bit sits at more significant position.
42 */
43
44#if __riscv_xlen == 64
45/* Slide by XLEN bits per iteration */
46# define STEP_ORDER 3
47
48/* Each below polynomial quotient has an implicit bit for 2^XLEN */
49
50/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
51# define CRC32_POLY_QT_LE 0x5a72d812fb808b20
52
53/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
54# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8
55
56/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
57 * the same as the bit-reversed version of CRC32_POLY_QT_LE
58 */
59# define CRC32_POLY_QT_BE 0x04d101df481b4e5a
60
61static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
62{
63 return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
64}
65
66static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
67{
68 u32 crc;
69
70 /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
71 asm volatile (".option push\n"
72 ".option arch,+zbc\n"
73 "clmul %0, %1, %2\n"
74 "slli %0, %0, 1\n"
75 "xor %0, %0, %1\n"
76 "clmulr %0, %0, %3\n"
77 "srli %0, %0, 32\n"
78 ".option pop\n"
79 : "=&r" (crc)
80 : "r" (s),
81 "r" (poly_qt),
82 "r" ((u64)poly << 32)
83 :);
84 return crc;
85}
86
87static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
88{
89 return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
90}
91
92#elif __riscv_xlen == 32
93# define STEP_ORDER 2
94/* Each quotient should match the upper half of its analog in RV64 */
95# define CRC32_POLY_QT_LE 0xfb808b20
96# define CRC32C_POLY_QT_LE 0x6f5389f8
97# define CRC32_POLY_QT_BE 0x04d101df
98
99static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
100{
101 return crc ^ (__force u32)__cpu_to_le32(*ptr);
102}
103
104static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
105{
106 u32 crc;
107
108 /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
109 asm volatile (".option push\n"
110 ".option arch,+zbc\n"
111 "clmul %0, %1, %2\n"
112 "slli %0, %0, 1\n"
113 "xor %0, %0, %1\n"
114 "clmulr %0, %0, %3\n"
115 ".option pop\n"
116 : "=&r" (crc)
117 : "r" (s),
118 "r" (poly_qt),
119 "r" (poly)
120 :);
121 return crc;
122}
123
124static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
125{
126 return crc ^ (__force u32)__cpu_to_be32(*ptr);
127}
128
129#else
130# error "Unexpected __riscv_xlen"
131#endif
132
133static inline u32 crc32_be_zbc(unsigned long s)
134{
135 u32 crc;
136
137 asm volatile (".option push\n"
138 ".option arch,+zbc\n"
139 "clmulh %0, %1, %2\n"
140 "xor %0, %0, %1\n"
141 "clmul %0, %0, %3\n"
142 ".option pop\n"
143 : "=&r" (crc)
144 : "r" (s),
145 "r" (CRC32_POLY_QT_BE),
146 "r" (CRC32_POLY_BE)
147 :);
148 return crc;
149}
150
151#define STEP (1 << STEP_ORDER)
152#define OFFSET_MASK (STEP - 1)
153
154typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
155
156static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
157 size_t len, u32 poly,
158 unsigned long poly_qt)
159{
160 size_t bits = len * 8;
161 unsigned long s = 0;
162 u32 crc_low = 0;
163
164 for (int i = 0; i < len; i++)
165 s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
166
167 s ^= (unsigned long)crc << (__riscv_xlen - bits);
168 if (__riscv_xlen == 32 || len < sizeof(u32))
169 crc_low = crc >> bits;
170
171 crc = crc32_le_zbc(s, poly, poly_qt);
172 crc ^= crc_low;
173
174 return crc;
175}
176
177static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
178 size_t len, u32 poly,
179 unsigned long poly_qt,
180 fallback crc_fb)
181{
182 size_t offset, head_len, tail_len;
183 unsigned long const *p_ul;
184 unsigned long s;
185
186 asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
187 RISCV_ISA_EXT_ZBC, 1)
188 : : : : legacy);
189
190 /* Handle the unaligned head. */
191 offset = (unsigned long)p & OFFSET_MASK;
192 if (offset && len) {
193 head_len = min(STEP - offset, len);
194 crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
195 p += head_len;
196 len -= head_len;
197 }
198
199 tail_len = len & OFFSET_MASK;
200 len = len >> STEP_ORDER;
201 p_ul = (unsigned long const *)p;
202
203 for (int i = 0; i < len; i++) {
204 s = crc32_le_prep(crc, p_ul);
205 crc = crc32_le_zbc(s, poly, poly_qt);
206 p_ul++;
207 }
208
209 /* Handle the tail bytes. */
210 p = (unsigned char const *)p_ul;
211 if (tail_len)
212 crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
213
214 return crc;
215
216legacy:
217 return crc_fb(crc, p, len);
218}
219
220u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
221{
222 return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
223 crc32_le_base);
224}
225
226u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
227{
228 return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
229 CRC32C_POLY_QT_LE, __crc32c_le_base);
230}
231
232static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
233 size_t len)
234{
235 size_t bits = len * 8;
236 unsigned long s = 0;
237 u32 crc_low = 0;
238
239 s = 0;
240 for (int i = 0; i < len; i++)
241 s = *p++ | (s << 8);
242
243 if (__riscv_xlen == 32 || len < sizeof(u32)) {
244 s ^= crc >> (32 - bits);
245 crc_low = crc << bits;
246 } else {
247 s ^= (unsigned long)crc << (bits - 32);
248 }
249
250 crc = crc32_be_zbc(s);
251 crc ^= crc_low;
252
253 return crc;
254}
255
256u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
257{
258 size_t offset, head_len, tail_len;
259 unsigned long const *p_ul;
260 unsigned long s;
261
262 asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
263 RISCV_ISA_EXT_ZBC, 1)
264 : : : : legacy);
265
266 /* Handle the unaligned head. */
267 offset = (unsigned long)p & OFFSET_MASK;
268 if (offset && len) {
269 head_len = min(STEP - offset, len);
270 crc = crc32_be_unaligned(crc, p, head_len);
271 p += head_len;
272 len -= head_len;
273 }
274
275 tail_len = len & OFFSET_MASK;
276 len = len >> STEP_ORDER;
277 p_ul = (unsigned long const *)p;
278
279 for (int i = 0; i < len; i++) {
280 s = crc32_be_prep(crc, p_ul);
281 crc = crc32_be_zbc(s);
282 p_ul++;
283 }
284
285 /* Handle the tail bytes. */
286 p = (unsigned char const *)p_ul;
287 if (tail_len)
288 crc = crc32_be_unaligned(crc, p, tail_len);
289
290 return crc;
291
292legacy:
293 return crc32_be_base(crc, p, len);
294}