Loading...
1/*
2 * Generic binary BCH encoding/decoding library
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 51
15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 * Copyright © 2011 Parrot S.A.
18 *
19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
20 *
21 * Description:
22 *
23 * This library provides runtime configurable encoding/decoding of binary
24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
25 *
26 * Call bch_init to get a pointer to a newly allocated bch_control structure for
27 * the given m (Galois field order), t (error correction capability) and
28 * (optional) primitive polynomial parameters.
29 *
30 * Call bch_encode to compute and store ecc parity bytes to a given buffer.
31 * Call bch_decode to detect and locate errors in received data.
32 *
33 * On systems supporting hw BCH features, intermediate results may be provided
34 * to bch_decode in order to skip certain steps. See bch_decode() documentation
35 * for details.
36 *
37 * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
38 * parameters m and t; thus allowing extra compiler optimizations and providing
39 * better (up to 2x) encoding performance. Using this option makes sense when
40 * (m,t) are fixed and known in advance, e.g. when using BCH error correction
41 * on a particular NAND flash device.
42 *
43 * Algorithmic details:
44 *
45 * Encoding is performed by processing 32 input bits in parallel, using 4
46 * remainder lookup tables.
47 *
48 * The final stage of decoding involves the following internal steps:
49 * a. Syndrome computation
50 * b. Error locator polynomial computation using Berlekamp-Massey algorithm
51 * c. Error locator root finding (by far the most expensive step)
52 *
53 * In this implementation, step c is not performed using the usual Chien search.
54 * Instead, an alternative approach described in [1] is used. It consists in
55 * factoring the error locator polynomial using the Berlekamp Trace algorithm
56 * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
57 * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
58 * much better performance than Chien search for usual (m,t) values (typically
59 * m >= 13, t < 32, see [1]).
60 *
61 * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
62 * of characteristic 2, in: Western European Workshop on Research in Cryptology
63 * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
64 * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
65 * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
66 */
67
68#include <linux/kernel.h>
69#include <linux/errno.h>
70#include <linux/init.h>
71#include <linux/module.h>
72#include <linux/slab.h>
73#include <linux/bitops.h>
74#include <asm/byteorder.h>
75#include <linux/bch.h>
76
77#if defined(CONFIG_BCH_CONST_PARAMS)
78#define GF_M(_p) (CONFIG_BCH_CONST_M)
79#define GF_T(_p) (CONFIG_BCH_CONST_T)
80#define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1)
81#define BCH_MAX_M (CONFIG_BCH_CONST_M)
82#define BCH_MAX_T (CONFIG_BCH_CONST_T)
83#else
84#define GF_M(_p) ((_p)->m)
85#define GF_T(_p) ((_p)->t)
86#define GF_N(_p) ((_p)->n)
87#define BCH_MAX_M 15 /* 2KB */
88#define BCH_MAX_T 64 /* 64 bit correction */
89#endif
90
91#define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
92#define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
93
94#define BCH_ECC_MAX_WORDS DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
95
96#ifndef dbg
97#define dbg(_fmt, args...) do {} while (0)
98#endif
99
100/*
101 * represent a polynomial over GF(2^m)
102 */
103struct gf_poly {
104 unsigned int deg; /* polynomial degree */
105 unsigned int c[]; /* polynomial terms */
106};
107
108/* given its degree, compute a polynomial size in bytes */
109#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
110
111/* polynomial of degree 1 */
112struct gf_poly_deg1 {
113 struct gf_poly poly;
114 unsigned int c[2];
115};
116
117static u8 swap_bits_table[] = {
118 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
119 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
120 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
121 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
122 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
123 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
124 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
125 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
126 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
127 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
128 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
129 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
130 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
131 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
132 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
133 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
134 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
135 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
136 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
137 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
138 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
139 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
140 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
141 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
142 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
143 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
144 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
145 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
146 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
147 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
148 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
149 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
150};
151
152static u8 swap_bits(struct bch_control *bch, u8 in)
153{
154 if (!bch->swap_bits)
155 return in;
156
157 return swap_bits_table[in];
158}
159
160/*
161 * same as bch_encode(), but process input data one byte at a time
162 */
163static void bch_encode_unaligned(struct bch_control *bch,
164 const unsigned char *data, unsigned int len,
165 uint32_t *ecc)
166{
167 int i;
168 const uint32_t *p;
169 const int l = BCH_ECC_WORDS(bch)-1;
170
171 while (len--) {
172 u8 tmp = swap_bits(bch, *data++);
173
174 p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff);
175
176 for (i = 0; i < l; i++)
177 ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
178
179 ecc[l] = (ecc[l] << 8)^(*p);
180 }
181}
182
183/*
184 * convert ecc bytes to aligned, zero-padded 32-bit ecc words
185 */
186static void load_ecc8(struct bch_control *bch, uint32_t *dst,
187 const uint8_t *src)
188{
189 uint8_t pad[4] = {0, 0, 0, 0};
190 unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
191
192 for (i = 0; i < nwords; i++, src += 4)
193 dst[i] = ((u32)swap_bits(bch, src[0]) << 24) |
194 ((u32)swap_bits(bch, src[1]) << 16) |
195 ((u32)swap_bits(bch, src[2]) << 8) |
196 swap_bits(bch, src[3]);
197
198 memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
199 dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) |
200 ((u32)swap_bits(bch, pad[1]) << 16) |
201 ((u32)swap_bits(bch, pad[2]) << 8) |
202 swap_bits(bch, pad[3]);
203}
204
205/*
206 * convert 32-bit ecc words to ecc bytes
207 */
208static void store_ecc8(struct bch_control *bch, uint8_t *dst,
209 const uint32_t *src)
210{
211 uint8_t pad[4];
212 unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
213
214 for (i = 0; i < nwords; i++) {
215 *dst++ = swap_bits(bch, src[i] >> 24);
216 *dst++ = swap_bits(bch, src[i] >> 16);
217 *dst++ = swap_bits(bch, src[i] >> 8);
218 *dst++ = swap_bits(bch, src[i]);
219 }
220 pad[0] = swap_bits(bch, src[nwords] >> 24);
221 pad[1] = swap_bits(bch, src[nwords] >> 16);
222 pad[2] = swap_bits(bch, src[nwords] >> 8);
223 pad[3] = swap_bits(bch, src[nwords]);
224 memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
225}
226
227/**
228 * bch_encode - calculate BCH ecc parity of data
229 * @bch: BCH control structure
230 * @data: data to encode
231 * @len: data length in bytes
232 * @ecc: ecc parity data, must be initialized by caller
233 *
234 * The @ecc parity array is used both as input and output parameter, in order to
235 * allow incremental computations. It should be of the size indicated by member
236 * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
237 *
238 * The exact number of computed ecc parity bits is given by member @ecc_bits of
239 * @bch; it may be less than m*t for large values of t.
240 */
241void bch_encode(struct bch_control *bch, const uint8_t *data,
242 unsigned int len, uint8_t *ecc)
243{
244 const unsigned int l = BCH_ECC_WORDS(bch)-1;
245 unsigned int i, mlen;
246 unsigned long m;
247 uint32_t w, r[BCH_ECC_MAX_WORDS];
248 const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
249 const uint32_t * const tab0 = bch->mod8_tab;
250 const uint32_t * const tab1 = tab0 + 256*(l+1);
251 const uint32_t * const tab2 = tab1 + 256*(l+1);
252 const uint32_t * const tab3 = tab2 + 256*(l+1);
253 const uint32_t *pdata, *p0, *p1, *p2, *p3;
254
255 if (WARN_ON(r_bytes > sizeof(r)))
256 return;
257
258 if (ecc) {
259 /* load ecc parity bytes into internal 32-bit buffer */
260 load_ecc8(bch, bch->ecc_buf, ecc);
261 } else {
262 memset(bch->ecc_buf, 0, r_bytes);
263 }
264
265 /* process first unaligned data bytes */
266 m = ((unsigned long)data) & 3;
267 if (m) {
268 mlen = (len < (4-m)) ? len : 4-m;
269 bch_encode_unaligned(bch, data, mlen, bch->ecc_buf);
270 data += mlen;
271 len -= mlen;
272 }
273
274 /* process 32-bit aligned data words */
275 pdata = (uint32_t *)data;
276 mlen = len/4;
277 data += 4*mlen;
278 len -= 4*mlen;
279 memcpy(r, bch->ecc_buf, r_bytes);
280
281 /*
282 * split each 32-bit word into 4 polynomials of weight 8 as follows:
283 *
284 * 31 ...24 23 ...16 15 ... 8 7 ... 0
285 * xxxxxxxx yyyyyyyy zzzzzzzz tttttttt
286 * tttttttt mod g = r0 (precomputed)
287 * zzzzzzzz 00000000 mod g = r1 (precomputed)
288 * yyyyyyyy 00000000 00000000 mod g = r2 (precomputed)
289 * xxxxxxxx 00000000 00000000 00000000 mod g = r3 (precomputed)
290 * xxxxxxxx yyyyyyyy zzzzzzzz tttttttt mod g = r0^r1^r2^r3
291 */
292 while (mlen--) {
293 /* input data is read in big-endian format */
294 w = cpu_to_be32(*pdata++);
295 if (bch->swap_bits)
296 w = (u32)swap_bits(bch, w) |
297 ((u32)swap_bits(bch, w >> 8) << 8) |
298 ((u32)swap_bits(bch, w >> 16) << 16) |
299 ((u32)swap_bits(bch, w >> 24) << 24);
300 w ^= r[0];
301 p0 = tab0 + (l+1)*((w >> 0) & 0xff);
302 p1 = tab1 + (l+1)*((w >> 8) & 0xff);
303 p2 = tab2 + (l+1)*((w >> 16) & 0xff);
304 p3 = tab3 + (l+1)*((w >> 24) & 0xff);
305
306 for (i = 0; i < l; i++)
307 r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
308
309 r[l] = p0[l]^p1[l]^p2[l]^p3[l];
310 }
311 memcpy(bch->ecc_buf, r, r_bytes);
312
313 /* process last unaligned bytes */
314 if (len)
315 bch_encode_unaligned(bch, data, len, bch->ecc_buf);
316
317 /* store ecc parity bytes into original parity buffer */
318 if (ecc)
319 store_ecc8(bch, ecc, bch->ecc_buf);
320}
321EXPORT_SYMBOL_GPL(bch_encode);
322
323static inline int modulo(struct bch_control *bch, unsigned int v)
324{
325 const unsigned int n = GF_N(bch);
326 while (v >= n) {
327 v -= n;
328 v = (v & n) + (v >> GF_M(bch));
329 }
330 return v;
331}
332
333/*
334 * shorter and faster modulo function, only works when v < 2N.
335 */
336static inline int mod_s(struct bch_control *bch, unsigned int v)
337{
338 const unsigned int n = GF_N(bch);
339 return (v < n) ? v : v-n;
340}
341
342static inline int deg(unsigned int poly)
343{
344 /* polynomial degree is the most-significant bit index */
345 return fls(poly)-1;
346}
347
348static inline int parity(unsigned int x)
349{
350 /*
351 * public domain code snippet, lifted from
352 * http://www-graphics.stanford.edu/~seander/bithacks.html
353 */
354 x ^= x >> 1;
355 x ^= x >> 2;
356 x = (x & 0x11111111U) * 0x11111111U;
357 return (x >> 28) & 1;
358}
359
360/* Galois field basic operations: multiply, divide, inverse, etc. */
361
362static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
363 unsigned int b)
364{
365 return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
366 bch->a_log_tab[b])] : 0;
367}
368
369static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
370{
371 return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
372}
373
374static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
375 unsigned int b)
376{
377 return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
378 GF_N(bch)-bch->a_log_tab[b])] : 0;
379}
380
381static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
382{
383 return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
384}
385
386static inline unsigned int a_pow(struct bch_control *bch, int i)
387{
388 return bch->a_pow_tab[modulo(bch, i)];
389}
390
391static inline int a_log(struct bch_control *bch, unsigned int x)
392{
393 return bch->a_log_tab[x];
394}
395
396static inline int a_ilog(struct bch_control *bch, unsigned int x)
397{
398 return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
399}
400
401/*
402 * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
403 */
404static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
405 unsigned int *syn)
406{
407 int i, j, s;
408 unsigned int m;
409 uint32_t poly;
410 const int t = GF_T(bch);
411
412 s = bch->ecc_bits;
413
414 /* make sure extra bits in last ecc word are cleared */
415 m = ((unsigned int)s) & 31;
416 if (m)
417 ecc[s/32] &= ~((1u << (32-m))-1);
418 memset(syn, 0, 2*t*sizeof(*syn));
419
420 /* compute v(a^j) for j=1 .. 2t-1 */
421 do {
422 poly = *ecc++;
423 s -= 32;
424 while (poly) {
425 i = deg(poly);
426 for (j = 0; j < 2*t; j += 2)
427 syn[j] ^= a_pow(bch, (j+1)*(i+s));
428
429 poly ^= (1 << i);
430 }
431 } while (s > 0);
432
433 /* v(a^(2j)) = v(a^j)^2 */
434 for (j = 0; j < t; j++)
435 syn[2*j+1] = gf_sqr(bch, syn[j]);
436}
437
438static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
439{
440 memcpy(dst, src, GF_POLY_SZ(src->deg));
441}
442
443static int compute_error_locator_polynomial(struct bch_control *bch,
444 const unsigned int *syn)
445{
446 const unsigned int t = GF_T(bch);
447 const unsigned int n = GF_N(bch);
448 unsigned int i, j, tmp, l, pd = 1, d = syn[0];
449 struct gf_poly *elp = bch->elp;
450 struct gf_poly *pelp = bch->poly_2t[0];
451 struct gf_poly *elp_copy = bch->poly_2t[1];
452 int k, pp = -1;
453
454 memset(pelp, 0, GF_POLY_SZ(2*t));
455 memset(elp, 0, GF_POLY_SZ(2*t));
456
457 pelp->deg = 0;
458 pelp->c[0] = 1;
459 elp->deg = 0;
460 elp->c[0] = 1;
461
462 /* use simplified binary Berlekamp-Massey algorithm */
463 for (i = 0; (i < t) && (elp->deg <= t); i++) {
464 if (d) {
465 k = 2*i-pp;
466 gf_poly_copy(elp_copy, elp);
467 /* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
468 tmp = a_log(bch, d)+n-a_log(bch, pd);
469 for (j = 0; j <= pelp->deg; j++) {
470 if (pelp->c[j]) {
471 l = a_log(bch, pelp->c[j]);
472 elp->c[j+k] ^= a_pow(bch, tmp+l);
473 }
474 }
475 /* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
476 tmp = pelp->deg+k;
477 if (tmp > elp->deg) {
478 elp->deg = tmp;
479 gf_poly_copy(pelp, elp_copy);
480 pd = d;
481 pp = 2*i;
482 }
483 }
484 /* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
485 if (i < t-1) {
486 d = syn[2*i+2];
487 for (j = 1; j <= elp->deg; j++)
488 d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
489 }
490 }
491 dbg("elp=%s\n", gf_poly_str(elp));
492 return (elp->deg > t) ? -1 : (int)elp->deg;
493}
494
495/*
496 * solve a m x m linear system in GF(2) with an expected number of solutions,
497 * and return the number of found solutions
498 */
499static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
500 unsigned int *sol, int nsol)
501{
502 const int m = GF_M(bch);
503 unsigned int tmp, mask;
504 int rem, c, r, p, k, param[BCH_MAX_M];
505
506 k = 0;
507 mask = 1 << m;
508
509 /* Gaussian elimination */
510 for (c = 0; c < m; c++) {
511 rem = 0;
512 p = c-k;
513 /* find suitable row for elimination */
514 for (r = p; r < m; r++) {
515 if (rows[r] & mask) {
516 if (r != p) {
517 tmp = rows[r];
518 rows[r] = rows[p];
519 rows[p] = tmp;
520 }
521 rem = r+1;
522 break;
523 }
524 }
525 if (rem) {
526 /* perform elimination on remaining rows */
527 tmp = rows[p];
528 for (r = rem; r < m; r++) {
529 if (rows[r] & mask)
530 rows[r] ^= tmp;
531 }
532 } else {
533 /* elimination not needed, store defective row index */
534 param[k++] = c;
535 }
536 mask >>= 1;
537 }
538 /* rewrite system, inserting fake parameter rows */
539 if (k > 0) {
540 p = k;
541 for (r = m-1; r >= 0; r--) {
542 if ((r > m-1-k) && rows[r])
543 /* system has no solution */
544 return 0;
545
546 rows[r] = (p && (r == param[p-1])) ?
547 p--, 1u << (m-r) : rows[r-p];
548 }
549 }
550
551 if (nsol != (1 << k))
552 /* unexpected number of solutions */
553 return 0;
554
555 for (p = 0; p < nsol; p++) {
556 /* set parameters for p-th solution */
557 for (c = 0; c < k; c++)
558 rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
559
560 /* compute unique solution */
561 tmp = 0;
562 for (r = m-1; r >= 0; r--) {
563 mask = rows[r] & (tmp|1);
564 tmp |= parity(mask) << (m-r);
565 }
566 sol[p] = tmp >> 1;
567 }
568 return nsol;
569}
570
571/*
572 * this function builds and solves a linear system for finding roots of a degree
573 * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
574 */
575static int find_affine4_roots(struct bch_control *bch, unsigned int a,
576 unsigned int b, unsigned int c,
577 unsigned int *roots)
578{
579 int i, j, k;
580 const int m = GF_M(bch);
581 unsigned int mask = 0xff, t, rows[16] = {0,};
582
583 j = a_log(bch, b);
584 k = a_log(bch, a);
585 rows[0] = c;
586
587 /* buid linear system to solve X^4+aX^2+bX+c = 0 */
588 for (i = 0; i < m; i++) {
589 rows[i+1] = bch->a_pow_tab[4*i]^
590 (a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
591 (b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
592 j++;
593 k += 2;
594 }
595 /*
596 * transpose 16x16 matrix before passing it to linear solver
597 * warning: this code assumes m < 16
598 */
599 for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
600 for (k = 0; k < 16; k = (k+j+1) & ~j) {
601 t = ((rows[k] >> j)^rows[k+j]) & mask;
602 rows[k] ^= (t << j);
603 rows[k+j] ^= t;
604 }
605 }
606 return solve_linear_system(bch, rows, roots, 4);
607}
608
609/*
610 * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
611 */
612static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
613 unsigned int *roots)
614{
615 int n = 0;
616
617 if (poly->c[0])
618 /* poly[X] = bX+c with c!=0, root=c/b */
619 roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
620 bch->a_log_tab[poly->c[1]]);
621 return n;
622}
623
624/*
625 * compute roots of a degree 2 polynomial over GF(2^m)
626 */
627static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
628 unsigned int *roots)
629{
630 int n = 0, i, l0, l1, l2;
631 unsigned int u, v, r;
632
633 if (poly->c[0] && poly->c[1]) {
634
635 l0 = bch->a_log_tab[poly->c[0]];
636 l1 = bch->a_log_tab[poly->c[1]];
637 l2 = bch->a_log_tab[poly->c[2]];
638
639 /* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
640 u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
641 /*
642 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
643 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
644 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
645 * i.e. r and r+1 are roots iff Tr(u)=0
646 */
647 r = 0;
648 v = u;
649 while (v) {
650 i = deg(v);
651 r ^= bch->xi_tab[i];
652 v ^= (1 << i);
653 }
654 /* verify root */
655 if ((gf_sqr(bch, r)^r) == u) {
656 /* reverse z=a/bX transformation and compute log(1/r) */
657 roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
658 bch->a_log_tab[r]+l2);
659 roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
660 bch->a_log_tab[r^1]+l2);
661 }
662 }
663 return n;
664}
665
666/*
667 * compute roots of a degree 3 polynomial over GF(2^m)
668 */
669static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
670 unsigned int *roots)
671{
672 int i, n = 0;
673 unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
674
675 if (poly->c[0]) {
676 /* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
677 e3 = poly->c[3];
678 c2 = gf_div(bch, poly->c[0], e3);
679 b2 = gf_div(bch, poly->c[1], e3);
680 a2 = gf_div(bch, poly->c[2], e3);
681
682 /* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
683 c = gf_mul(bch, a2, c2); /* c = a2c2 */
684 b = gf_mul(bch, a2, b2)^c2; /* b = a2b2 + c2 */
685 a = gf_sqr(bch, a2)^b2; /* a = a2^2 + b2 */
686
687 /* find the 4 roots of this affine polynomial */
688 if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
689 /* remove a2 from final list of roots */
690 for (i = 0; i < 4; i++) {
691 if (tmp[i] != a2)
692 roots[n++] = a_ilog(bch, tmp[i]);
693 }
694 }
695 }
696 return n;
697}
698
699/*
700 * compute roots of a degree 4 polynomial over GF(2^m)
701 */
702static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
703 unsigned int *roots)
704{
705 int i, l, n = 0;
706 unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
707
708 if (poly->c[0] == 0)
709 return 0;
710
711 /* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
712 e4 = poly->c[4];
713 d = gf_div(bch, poly->c[0], e4);
714 c = gf_div(bch, poly->c[1], e4);
715 b = gf_div(bch, poly->c[2], e4);
716 a = gf_div(bch, poly->c[3], e4);
717
718 /* use Y=1/X transformation to get an affine polynomial */
719 if (a) {
720 /* first, eliminate cX by using z=X+e with ae^2+c=0 */
721 if (c) {
722 /* compute e such that e^2 = c/a */
723 f = gf_div(bch, c, a);
724 l = a_log(bch, f);
725 l += (l & 1) ? GF_N(bch) : 0;
726 e = a_pow(bch, l/2);
727 /*
728 * use transformation z=X+e:
729 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
730 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
731 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
732 * z^4 + az^3 + b'z^2 + d'
733 */
734 d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
735 b = gf_mul(bch, a, e)^b;
736 }
737 /* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
738 if (d == 0)
739 /* assume all roots have multiplicity 1 */
740 return 0;
741
742 c2 = gf_inv(bch, d);
743 b2 = gf_div(bch, a, d);
744 a2 = gf_div(bch, b, d);
745 } else {
746 /* polynomial is already affine */
747 c2 = d;
748 b2 = c;
749 a2 = b;
750 }
751 /* find the 4 roots of this affine polynomial */
752 if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
753 for (i = 0; i < 4; i++) {
754 /* post-process roots (reverse transformations) */
755 f = a ? gf_inv(bch, roots[i]) : roots[i];
756 roots[i] = a_ilog(bch, f^e);
757 }
758 n = 4;
759 }
760 return n;
761}
762
763/*
764 * build monic, log-based representation of a polynomial
765 */
766static void gf_poly_logrep(struct bch_control *bch,
767 const struct gf_poly *a, int *rep)
768{
769 int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
770
771 /* represent 0 values with -1; warning, rep[d] is not set to 1 */
772 for (i = 0; i < d; i++)
773 rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
774}
775
776/*
777 * compute polynomial Euclidean division remainder in GF(2^m)[X]
778 */
779static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
780 const struct gf_poly *b, int *rep)
781{
782 int la, p, m;
783 unsigned int i, j, *c = a->c;
784 const unsigned int d = b->deg;
785
786 if (a->deg < d)
787 return;
788
789 /* reuse or compute log representation of denominator */
790 if (!rep) {
791 rep = bch->cache;
792 gf_poly_logrep(bch, b, rep);
793 }
794
795 for (j = a->deg; j >= d; j--) {
796 if (c[j]) {
797 la = a_log(bch, c[j]);
798 p = j-d;
799 for (i = 0; i < d; i++, p++) {
800 m = rep[i];
801 if (m >= 0)
802 c[p] ^= bch->a_pow_tab[mod_s(bch,
803 m+la)];
804 }
805 }
806 }
807 a->deg = d-1;
808 while (!c[a->deg] && a->deg)
809 a->deg--;
810}
811
812/*
813 * compute polynomial Euclidean division quotient in GF(2^m)[X]
814 */
815static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
816 const struct gf_poly *b, struct gf_poly *q)
817{
818 if (a->deg >= b->deg) {
819 q->deg = a->deg-b->deg;
820 /* compute a mod b (modifies a) */
821 gf_poly_mod(bch, a, b, NULL);
822 /* quotient is stored in upper part of polynomial a */
823 memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
824 } else {
825 q->deg = 0;
826 q->c[0] = 0;
827 }
828}
829
830/*
831 * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
832 */
833static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
834 struct gf_poly *b)
835{
836 struct gf_poly *tmp;
837
838 dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
839
840 if (a->deg < b->deg) {
841 tmp = b;
842 b = a;
843 a = tmp;
844 }
845
846 while (b->deg > 0) {
847 gf_poly_mod(bch, a, b, NULL);
848 tmp = b;
849 b = a;
850 a = tmp;
851 }
852
853 dbg("%s\n", gf_poly_str(a));
854
855 return a;
856}
857
858/*
859 * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
860 * This is used in Berlekamp Trace algorithm for splitting polynomials
861 */
862static void compute_trace_bk_mod(struct bch_control *bch, int k,
863 const struct gf_poly *f, struct gf_poly *z,
864 struct gf_poly *out)
865{
866 const int m = GF_M(bch);
867 int i, j;
868
869 /* z contains z^2j mod f */
870 z->deg = 1;
871 z->c[0] = 0;
872 z->c[1] = bch->a_pow_tab[k];
873
874 out->deg = 0;
875 memset(out, 0, GF_POLY_SZ(f->deg));
876
877 /* compute f log representation only once */
878 gf_poly_logrep(bch, f, bch->cache);
879
880 for (i = 0; i < m; i++) {
881 /* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
882 for (j = z->deg; j >= 0; j--) {
883 out->c[j] ^= z->c[j];
884 z->c[2*j] = gf_sqr(bch, z->c[j]);
885 z->c[2*j+1] = 0;
886 }
887 if (z->deg > out->deg)
888 out->deg = z->deg;
889
890 if (i < m-1) {
891 z->deg *= 2;
892 /* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
893 gf_poly_mod(bch, z, f, bch->cache);
894 }
895 }
896 while (!out->c[out->deg] && out->deg)
897 out->deg--;
898
899 dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
900}
901
902/*
903 * factor a polynomial using Berlekamp Trace algorithm (BTA)
904 */
905static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
906 struct gf_poly **g, struct gf_poly **h)
907{
908 struct gf_poly *f2 = bch->poly_2t[0];
909 struct gf_poly *q = bch->poly_2t[1];
910 struct gf_poly *tk = bch->poly_2t[2];
911 struct gf_poly *z = bch->poly_2t[3];
912 struct gf_poly *gcd;
913
914 dbg("factoring %s...\n", gf_poly_str(f));
915
916 *g = f;
917 *h = NULL;
918
919 /* tk = Tr(a^k.X) mod f */
920 compute_trace_bk_mod(bch, k, f, z, tk);
921
922 if (tk->deg > 0) {
923 /* compute g = gcd(f, tk) (destructive operation) */
924 gf_poly_copy(f2, f);
925 gcd = gf_poly_gcd(bch, f2, tk);
926 if (gcd->deg < f->deg) {
927 /* compute h=f/gcd(f,tk); this will modify f and q */
928 gf_poly_div(bch, f, gcd, q);
929 /* store g and h in-place (clobbering f) */
930 *h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
931 gf_poly_copy(*g, gcd);
932 gf_poly_copy(*h, q);
933 }
934 }
935}
936
937/*
938 * find roots of a polynomial, using BTZ algorithm; see the beginning of this
939 * file for details
940 */
941static int find_poly_roots(struct bch_control *bch, unsigned int k,
942 struct gf_poly *poly, unsigned int *roots)
943{
944 int cnt;
945 struct gf_poly *f1, *f2;
946
947 switch (poly->deg) {
948 /* handle low degree polynomials with ad hoc techniques */
949 case 1:
950 cnt = find_poly_deg1_roots(bch, poly, roots);
951 break;
952 case 2:
953 cnt = find_poly_deg2_roots(bch, poly, roots);
954 break;
955 case 3:
956 cnt = find_poly_deg3_roots(bch, poly, roots);
957 break;
958 case 4:
959 cnt = find_poly_deg4_roots(bch, poly, roots);
960 break;
961 default:
962 /* factor polynomial using Berlekamp Trace Algorithm (BTA) */
963 cnt = 0;
964 if (poly->deg && (k <= GF_M(bch))) {
965 factor_polynomial(bch, k, poly, &f1, &f2);
966 if (f1)
967 cnt += find_poly_roots(bch, k+1, f1, roots);
968 if (f2)
969 cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
970 }
971 break;
972 }
973 return cnt;
974}
975
976#if defined(USE_CHIEN_SEARCH)
977/*
978 * exhaustive root search (Chien) implementation - not used, included only for
979 * reference/comparison tests
980 */
981static int chien_search(struct bch_control *bch, unsigned int len,
982 struct gf_poly *p, unsigned int *roots)
983{
984 int m;
985 unsigned int i, j, syn, syn0, count = 0;
986 const unsigned int k = 8*len+bch->ecc_bits;
987
988 /* use a log-based representation of polynomial */
989 gf_poly_logrep(bch, p, bch->cache);
990 bch->cache[p->deg] = 0;
991 syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
992
993 for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
994 /* compute elp(a^i) */
995 for (j = 1, syn = syn0; j <= p->deg; j++) {
996 m = bch->cache[j];
997 if (m >= 0)
998 syn ^= a_pow(bch, m+j*i);
999 }
1000 if (syn == 0) {
1001 roots[count++] = GF_N(bch)-i;
1002 if (count == p->deg)
1003 break;
1004 }
1005 }
1006 return (count == p->deg) ? count : 0;
1007}
1008#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
1009#endif /* USE_CHIEN_SEARCH */
1010
1011/**
1012 * bch_decode - decode received codeword and find bit error locations
1013 * @bch: BCH control structure
1014 * @data: received data, ignored if @calc_ecc is provided
1015 * @len: data length in bytes, must always be provided
1016 * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
1017 * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
1018 * @syn: hw computed syndrome data (if NULL, syndrome is calculated)
1019 * @errloc: output array of error locations
1020 *
1021 * Returns:
1022 * The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
1023 * invalid parameters were provided
1024 *
1025 * Depending on the available hw BCH support and the need to compute @calc_ecc
1026 * separately (using bch_encode()), this function should be called with one of
1027 * the following parameter configurations -
1028 *
1029 * by providing @data and @recv_ecc only:
1030 * bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
1031 *
1032 * by providing @recv_ecc and @calc_ecc:
1033 * bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
1034 *
1035 * by providing ecc = recv_ecc XOR calc_ecc:
1036 * bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
1037 *
1038 * by providing syndrome results @syn:
1039 * bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
1040 *
1041 * Once bch_decode() has successfully returned with a positive value, error
1042 * locations returned in array @errloc should be interpreted as follows -
1043 *
1044 * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1045 * data correction)
1046 *
1047 * if (errloc[n] < 8*len), then n-th error is located in data and can be
1048 * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1049 *
1050 * Note that this function does not perform any data correction by itself, it
1051 * merely indicates error locations.
1052 */
1053int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len,
1054 const uint8_t *recv_ecc, const uint8_t *calc_ecc,
1055 const unsigned int *syn, unsigned int *errloc)
1056{
1057 const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1058 unsigned int nbits;
1059 int i, err, nroots;
1060 uint32_t sum;
1061
1062 /* sanity check: make sure data length can be handled */
1063 if (8*len > (bch->n-bch->ecc_bits))
1064 return -EINVAL;
1065
1066 /* if caller does not provide syndromes, compute them */
1067 if (!syn) {
1068 if (!calc_ecc) {
1069 /* compute received data ecc into an internal buffer */
1070 if (!data || !recv_ecc)
1071 return -EINVAL;
1072 bch_encode(bch, data, len, NULL);
1073 } else {
1074 /* load provided calculated ecc */
1075 load_ecc8(bch, bch->ecc_buf, calc_ecc);
1076 }
1077 /* load received ecc or assume it was XORed in calc_ecc */
1078 if (recv_ecc) {
1079 load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1080 /* XOR received and calculated ecc */
1081 for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1082 bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1083 sum |= bch->ecc_buf[i];
1084 }
1085 if (!sum)
1086 /* no error found */
1087 return 0;
1088 }
1089 compute_syndromes(bch, bch->ecc_buf, bch->syn);
1090 syn = bch->syn;
1091 }
1092
1093 err = compute_error_locator_polynomial(bch, syn);
1094 if (err > 0) {
1095 nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1096 if (err != nroots)
1097 err = -1;
1098 }
1099 if (err > 0) {
1100 /* post-process raw error locations for easier correction */
1101 nbits = (len*8)+bch->ecc_bits;
1102 for (i = 0; i < err; i++) {
1103 if (errloc[i] >= nbits) {
1104 err = -1;
1105 break;
1106 }
1107 errloc[i] = nbits-1-errloc[i];
1108 if (!bch->swap_bits)
1109 errloc[i] = (errloc[i] & ~7) |
1110 (7-(errloc[i] & 7));
1111 }
1112 }
1113 return (err >= 0) ? err : -EBADMSG;
1114}
1115EXPORT_SYMBOL_GPL(bch_decode);
1116
1117/*
1118 * generate Galois field lookup tables
1119 */
1120static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1121{
1122 unsigned int i, x = 1;
1123 const unsigned int k = 1 << deg(poly);
1124
1125 /* primitive polynomial must be of degree m */
1126 if (k != (1u << GF_M(bch)))
1127 return -1;
1128
1129 for (i = 0; i < GF_N(bch); i++) {
1130 bch->a_pow_tab[i] = x;
1131 bch->a_log_tab[x] = i;
1132 if (i && (x == 1))
1133 /* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1134 return -1;
1135 x <<= 1;
1136 if (x & k)
1137 x ^= poly;
1138 }
1139 bch->a_pow_tab[GF_N(bch)] = 1;
1140 bch->a_log_tab[0] = 0;
1141
1142 return 0;
1143}
1144
1145/*
1146 * compute generator polynomial remainder tables for fast encoding
1147 */
1148static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1149{
1150 int i, j, b, d;
1151 uint32_t data, hi, lo, *tab;
1152 const int l = BCH_ECC_WORDS(bch);
1153 const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1154 const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1155
1156 memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1157
1158 for (i = 0; i < 256; i++) {
1159 /* p(X)=i is a small polynomial of weight <= 8 */
1160 for (b = 0; b < 4; b++) {
1161 /* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1162 tab = bch->mod8_tab + (b*256+i)*l;
1163 data = i << (8*b);
1164 while (data) {
1165 d = deg(data);
1166 /* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1167 data ^= g[0] >> (31-d);
1168 for (j = 0; j < ecclen; j++) {
1169 hi = (d < 31) ? g[j] << (d+1) : 0;
1170 lo = (j+1 < plen) ?
1171 g[j+1] >> (31-d) : 0;
1172 tab[j] ^= hi|lo;
1173 }
1174 }
1175 }
1176 }
1177}
1178
1179/*
1180 * build a base for factoring degree 2 polynomials
1181 */
1182static int build_deg2_base(struct bch_control *bch)
1183{
1184 const int m = GF_M(bch);
1185 int i, j, r;
1186 unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M];
1187
1188 /* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1189 for (i = 0; i < m; i++) {
1190 for (j = 0, sum = 0; j < m; j++)
1191 sum ^= a_pow(bch, i*(1 << j));
1192
1193 if (sum) {
1194 ak = bch->a_pow_tab[i];
1195 break;
1196 }
1197 }
1198 /* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1199 remaining = m;
1200 memset(xi, 0, sizeof(xi));
1201
1202 for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1203 y = gf_sqr(bch, x)^x;
1204 for (i = 0; i < 2; i++) {
1205 r = a_log(bch, y);
1206 if (y && (r < m) && !xi[r]) {
1207 bch->xi_tab[r] = x;
1208 xi[r] = 1;
1209 remaining--;
1210 dbg("x%d = %x\n", r, x);
1211 break;
1212 }
1213 y ^= ak;
1214 }
1215 }
1216 /* should not happen but check anyway */
1217 return remaining ? -1 : 0;
1218}
1219
1220static void *bch_alloc(size_t size, int *err)
1221{
1222 void *ptr;
1223
1224 ptr = kmalloc(size, GFP_KERNEL);
1225 if (ptr == NULL)
1226 *err = 1;
1227 return ptr;
1228}
1229
1230/*
1231 * compute generator polynomial for given (m,t) parameters.
1232 */
1233static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1234{
1235 const unsigned int m = GF_M(bch);
1236 const unsigned int t = GF_T(bch);
1237 int n, err = 0;
1238 unsigned int i, j, nbits, r, word, *roots;
1239 struct gf_poly *g;
1240 uint32_t *genpoly;
1241
1242 g = bch_alloc(GF_POLY_SZ(m*t), &err);
1243 roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1244 genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1245
1246 if (err) {
1247 kfree(genpoly);
1248 genpoly = NULL;
1249 goto finish;
1250 }
1251
1252 /* enumerate all roots of g(X) */
1253 memset(roots , 0, (bch->n+1)*sizeof(*roots));
1254 for (i = 0; i < t; i++) {
1255 for (j = 0, r = 2*i+1; j < m; j++) {
1256 roots[r] = 1;
1257 r = mod_s(bch, 2*r);
1258 }
1259 }
1260 /* build generator polynomial g(X) */
1261 g->deg = 0;
1262 g->c[0] = 1;
1263 for (i = 0; i < GF_N(bch); i++) {
1264 if (roots[i]) {
1265 /* multiply g(X) by (X+root) */
1266 r = bch->a_pow_tab[i];
1267 g->c[g->deg+1] = 1;
1268 for (j = g->deg; j > 0; j--)
1269 g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1270
1271 g->c[0] = gf_mul(bch, g->c[0], r);
1272 g->deg++;
1273 }
1274 }
1275 /* store left-justified binary representation of g(X) */
1276 n = g->deg+1;
1277 i = 0;
1278
1279 while (n > 0) {
1280 nbits = (n > 32) ? 32 : n;
1281 for (j = 0, word = 0; j < nbits; j++) {
1282 if (g->c[n-1-j])
1283 word |= 1u << (31-j);
1284 }
1285 genpoly[i++] = word;
1286 n -= nbits;
1287 }
1288 bch->ecc_bits = g->deg;
1289
1290finish:
1291 kfree(g);
1292 kfree(roots);
1293
1294 return genpoly;
1295}
1296
1297/**
1298 * bch_init - initialize a BCH encoder/decoder
1299 * @m: Galois field order, should be in the range 5-15
1300 * @t: maximum error correction capability, in bits
1301 * @prim_poly: user-provided primitive polynomial (or 0 to use default)
1302 * @swap_bits: swap bits within data and syndrome bytes
1303 *
1304 * Returns:
1305 * a newly allocated BCH control structure if successful, NULL otherwise
1306 *
1307 * This initialization can take some time, as lookup tables are built for fast
1308 * encoding/decoding; make sure not to call this function from a time critical
1309 * path. Usually, bch_init() should be called on module/driver init and
1310 * bch_free() should be called to release memory on exit.
1311 *
1312 * You may provide your own primitive polynomial of degree @m in argument
1313 * @prim_poly, or let bch_init() use its default polynomial.
1314 *
1315 * Once bch_init() has successfully returned a pointer to a newly allocated
1316 * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1317 * the structure.
1318 */
1319struct bch_control *bch_init(int m, int t, unsigned int prim_poly,
1320 bool swap_bits)
1321{
1322 int err = 0;
1323 unsigned int i, words;
1324 uint32_t *genpoly;
1325 struct bch_control *bch = NULL;
1326
1327 const int min_m = 5;
1328
1329 /* default primitive polynomials */
1330 static const unsigned int prim_poly_tab[] = {
1331 0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1332 0x402b, 0x8003,
1333 };
1334
1335#if defined(CONFIG_BCH_CONST_PARAMS)
1336 if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1337 printk(KERN_ERR "bch encoder/decoder was configured to support "
1338 "parameters m=%d, t=%d only!\n",
1339 CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1340 goto fail;
1341 }
1342#endif
1343 if ((m < min_m) || (m > BCH_MAX_M))
1344 /*
1345 * values of m greater than 15 are not currently supported;
1346 * supporting m > 15 would require changing table base type
1347 * (uint16_t) and a small patch in matrix transposition
1348 */
1349 goto fail;
1350
1351 if (t > BCH_MAX_T)
1352 /*
1353 * we can support larger than 64 bits if necessary, at the
1354 * cost of higher stack usage.
1355 */
1356 goto fail;
1357
1358 /* sanity checks */
1359 if ((t < 1) || (m*t >= ((1 << m)-1)))
1360 /* invalid t value */
1361 goto fail;
1362
1363 /* select a primitive polynomial for generating GF(2^m) */
1364 if (prim_poly == 0)
1365 prim_poly = prim_poly_tab[m-min_m];
1366
1367 bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1368 if (bch == NULL)
1369 goto fail;
1370
1371 bch->m = m;
1372 bch->t = t;
1373 bch->n = (1 << m)-1;
1374 words = DIV_ROUND_UP(m*t, 32);
1375 bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1376 bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1377 bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1378 bch->mod8_tab = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1379 bch->ecc_buf = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1380 bch->ecc_buf2 = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1381 bch->xi_tab = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1382 bch->syn = bch_alloc(2*t*sizeof(*bch->syn), &err);
1383 bch->cache = bch_alloc(2*t*sizeof(*bch->cache), &err);
1384 bch->elp = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
1385 bch->swap_bits = swap_bits;
1386
1387 for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1388 bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1389
1390 if (err)
1391 goto fail;
1392
1393 err = build_gf_tables(bch, prim_poly);
1394 if (err)
1395 goto fail;
1396
1397 /* use generator polynomial for computing encoding tables */
1398 genpoly = compute_generator_polynomial(bch);
1399 if (genpoly == NULL)
1400 goto fail;
1401
1402 build_mod8_tables(bch, genpoly);
1403 kfree(genpoly);
1404
1405 err = build_deg2_base(bch);
1406 if (err)
1407 goto fail;
1408
1409 return bch;
1410
1411fail:
1412 bch_free(bch);
1413 return NULL;
1414}
1415EXPORT_SYMBOL_GPL(bch_init);
1416
1417/**
1418 * bch_free - free the BCH control structure
1419 * @bch: BCH control structure to release
1420 */
1421void bch_free(struct bch_control *bch)
1422{
1423 unsigned int i;
1424
1425 if (bch) {
1426 kfree(bch->a_pow_tab);
1427 kfree(bch->a_log_tab);
1428 kfree(bch->mod8_tab);
1429 kfree(bch->ecc_buf);
1430 kfree(bch->ecc_buf2);
1431 kfree(bch->xi_tab);
1432 kfree(bch->syn);
1433 kfree(bch->cache);
1434 kfree(bch->elp);
1435
1436 for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1437 kfree(bch->poly_2t[i]);
1438
1439 kfree(bch);
1440 }
1441}
1442EXPORT_SYMBOL_GPL(bch_free);
1443
1444MODULE_LICENSE("GPL");
1445MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1446MODULE_DESCRIPTION("Binary BCH encoder/decoder");
1/*
2 * Generic binary BCH encoding/decoding library
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 51
15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 * Copyright © 2011 Parrot S.A.
18 *
19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
20 *
21 * Description:
22 *
23 * This library provides runtime configurable encoding/decoding of binary
24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
25 *
26 * Call bch_init to get a pointer to a newly allocated bch_control structure for
27 * the given m (Galois field order), t (error correction capability) and
28 * (optional) primitive polynomial parameters.
29 *
30 * Call bch_encode to compute and store ecc parity bytes to a given buffer.
31 * Call bch_decode to detect and locate errors in received data.
32 *
33 * On systems supporting hw BCH features, intermediate results may be provided
34 * to bch_decode in order to skip certain steps. See bch_decode() documentation
35 * for details.
36 *
37 * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
38 * parameters m and t; thus allowing extra compiler optimizations and providing
39 * better (up to 2x) encoding performance. Using this option makes sense when
40 * (m,t) are fixed and known in advance, e.g. when using BCH error correction
41 * on a particular NAND flash device.
42 *
43 * Algorithmic details:
44 *
45 * Encoding is performed by processing 32 input bits in parallel, using 4
46 * remainder lookup tables.
47 *
48 * The final stage of decoding involves the following internal steps:
49 * a. Syndrome computation
50 * b. Error locator polynomial computation using Berlekamp-Massey algorithm
51 * c. Error locator root finding (by far the most expensive step)
52 *
53 * In this implementation, step c is not performed using the usual Chien search.
54 * Instead, an alternative approach described in [1] is used. It consists in
55 * factoring the error locator polynomial using the Berlekamp Trace algorithm
56 * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
57 * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
58 * much better performance than Chien search for usual (m,t) values (typically
59 * m >= 13, t < 32, see [1]).
60 *
61 * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
62 * of characteristic 2, in: Western European Workshop on Research in Cryptology
63 * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
64 * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
65 * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
66 */
67
68#include <linux/kernel.h>
69#include <linux/errno.h>
70#include <linux/init.h>
71#include <linux/module.h>
72#include <linux/slab.h>
73#include <linux/bitops.h>
74#include <linux/bitrev.h>
75#include <asm/byteorder.h>
76#include <linux/bch.h>
77
78#if defined(CONFIG_BCH_CONST_PARAMS)
79#define GF_M(_p) (CONFIG_BCH_CONST_M)
80#define GF_T(_p) (CONFIG_BCH_CONST_T)
81#define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1)
82#define BCH_MAX_M (CONFIG_BCH_CONST_M)
83#define BCH_MAX_T (CONFIG_BCH_CONST_T)
84#else
85#define GF_M(_p) ((_p)->m)
86#define GF_T(_p) ((_p)->t)
87#define GF_N(_p) ((_p)->n)
88#define BCH_MAX_M 15 /* 2KB */
89#define BCH_MAX_T 64 /* 64 bit correction */
90#endif
91
92#define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
93#define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
94
95#define BCH_ECC_MAX_WORDS DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
96
97#ifndef dbg
98#define dbg(_fmt, args...) do {} while (0)
99#endif
100
101/*
102 * represent a polynomial over GF(2^m)
103 */
104struct gf_poly {
105 unsigned int deg; /* polynomial degree */
106 unsigned int c[]; /* polynomial terms */
107};
108
109/* given its degree, compute a polynomial size in bytes */
110#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
111
112/* polynomial of degree 1 */
113struct gf_poly_deg1 {
114 struct gf_poly poly;
115 unsigned int c[2];
116};
117
118static u8 swap_bits(struct bch_control *bch, u8 in)
119{
120 if (!bch->swap_bits)
121 return in;
122
123 return bitrev8(in);
124}
125
126/*
127 * same as bch_encode(), but process input data one byte at a time
128 */
129static void bch_encode_unaligned(struct bch_control *bch,
130 const unsigned char *data, unsigned int len,
131 uint32_t *ecc)
132{
133 int i;
134 const uint32_t *p;
135 const int l = BCH_ECC_WORDS(bch)-1;
136
137 while (len--) {
138 u8 tmp = swap_bits(bch, *data++);
139
140 p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff);
141
142 for (i = 0; i < l; i++)
143 ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
144
145 ecc[l] = (ecc[l] << 8)^(*p);
146 }
147}
148
149/*
150 * convert ecc bytes to aligned, zero-padded 32-bit ecc words
151 */
152static void load_ecc8(struct bch_control *bch, uint32_t *dst,
153 const uint8_t *src)
154{
155 uint8_t pad[4] = {0, 0, 0, 0};
156 unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
157
158 for (i = 0; i < nwords; i++, src += 4)
159 dst[i] = ((u32)swap_bits(bch, src[0]) << 24) |
160 ((u32)swap_bits(bch, src[1]) << 16) |
161 ((u32)swap_bits(bch, src[2]) << 8) |
162 swap_bits(bch, src[3]);
163
164 memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
165 dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) |
166 ((u32)swap_bits(bch, pad[1]) << 16) |
167 ((u32)swap_bits(bch, pad[2]) << 8) |
168 swap_bits(bch, pad[3]);
169}
170
171/*
172 * convert 32-bit ecc words to ecc bytes
173 */
174static void store_ecc8(struct bch_control *bch, uint8_t *dst,
175 const uint32_t *src)
176{
177 uint8_t pad[4];
178 unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
179
180 for (i = 0; i < nwords; i++) {
181 *dst++ = swap_bits(bch, src[i] >> 24);
182 *dst++ = swap_bits(bch, src[i] >> 16);
183 *dst++ = swap_bits(bch, src[i] >> 8);
184 *dst++ = swap_bits(bch, src[i]);
185 }
186 pad[0] = swap_bits(bch, src[nwords] >> 24);
187 pad[1] = swap_bits(bch, src[nwords] >> 16);
188 pad[2] = swap_bits(bch, src[nwords] >> 8);
189 pad[3] = swap_bits(bch, src[nwords]);
190 memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
191}
192
193/**
194 * bch_encode - calculate BCH ecc parity of data
195 * @bch: BCH control structure
196 * @data: data to encode
197 * @len: data length in bytes
198 * @ecc: ecc parity data, must be initialized by caller
199 *
200 * The @ecc parity array is used both as input and output parameter, in order to
201 * allow incremental computations. It should be of the size indicated by member
202 * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
203 *
204 * The exact number of computed ecc parity bits is given by member @ecc_bits of
205 * @bch; it may be less than m*t for large values of t.
206 */
207void bch_encode(struct bch_control *bch, const uint8_t *data,
208 unsigned int len, uint8_t *ecc)
209{
210 const unsigned int l = BCH_ECC_WORDS(bch)-1;
211 unsigned int i, mlen;
212 unsigned long m;
213 uint32_t w, r[BCH_ECC_MAX_WORDS];
214 const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
215 const uint32_t * const tab0 = bch->mod8_tab;
216 const uint32_t * const tab1 = tab0 + 256*(l+1);
217 const uint32_t * const tab2 = tab1 + 256*(l+1);
218 const uint32_t * const tab3 = tab2 + 256*(l+1);
219 const uint32_t *pdata, *p0, *p1, *p2, *p3;
220
221 if (WARN_ON(r_bytes > sizeof(r)))
222 return;
223
224 if (ecc) {
225 /* load ecc parity bytes into internal 32-bit buffer */
226 load_ecc8(bch, bch->ecc_buf, ecc);
227 } else {
228 memset(bch->ecc_buf, 0, r_bytes);
229 }
230
231 /* process first unaligned data bytes */
232 m = ((unsigned long)data) & 3;
233 if (m) {
234 mlen = (len < (4-m)) ? len : 4-m;
235 bch_encode_unaligned(bch, data, mlen, bch->ecc_buf);
236 data += mlen;
237 len -= mlen;
238 }
239
240 /* process 32-bit aligned data words */
241 pdata = (uint32_t *)data;
242 mlen = len/4;
243 data += 4*mlen;
244 len -= 4*mlen;
245 memcpy(r, bch->ecc_buf, r_bytes);
246
247 /*
248 * split each 32-bit word into 4 polynomials of weight 8 as follows:
249 *
250 * 31 ...24 23 ...16 15 ... 8 7 ... 0
251 * xxxxxxxx yyyyyyyy zzzzzzzz tttttttt
252 * tttttttt mod g = r0 (precomputed)
253 * zzzzzzzz 00000000 mod g = r1 (precomputed)
254 * yyyyyyyy 00000000 00000000 mod g = r2 (precomputed)
255 * xxxxxxxx 00000000 00000000 00000000 mod g = r3 (precomputed)
256 * xxxxxxxx yyyyyyyy zzzzzzzz tttttttt mod g = r0^r1^r2^r3
257 */
258 while (mlen--) {
259 /* input data is read in big-endian format */
260 w = cpu_to_be32(*pdata++);
261 if (bch->swap_bits)
262 w = (u32)swap_bits(bch, w) |
263 ((u32)swap_bits(bch, w >> 8) << 8) |
264 ((u32)swap_bits(bch, w >> 16) << 16) |
265 ((u32)swap_bits(bch, w >> 24) << 24);
266 w ^= r[0];
267 p0 = tab0 + (l+1)*((w >> 0) & 0xff);
268 p1 = tab1 + (l+1)*((w >> 8) & 0xff);
269 p2 = tab2 + (l+1)*((w >> 16) & 0xff);
270 p3 = tab3 + (l+1)*((w >> 24) & 0xff);
271
272 for (i = 0; i < l; i++)
273 r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
274
275 r[l] = p0[l]^p1[l]^p2[l]^p3[l];
276 }
277 memcpy(bch->ecc_buf, r, r_bytes);
278
279 /* process last unaligned bytes */
280 if (len)
281 bch_encode_unaligned(bch, data, len, bch->ecc_buf);
282
283 /* store ecc parity bytes into original parity buffer */
284 if (ecc)
285 store_ecc8(bch, ecc, bch->ecc_buf);
286}
287EXPORT_SYMBOL_GPL(bch_encode);
288
289static inline int modulo(struct bch_control *bch, unsigned int v)
290{
291 const unsigned int n = GF_N(bch);
292 while (v >= n) {
293 v -= n;
294 v = (v & n) + (v >> GF_M(bch));
295 }
296 return v;
297}
298
299/*
300 * shorter and faster modulo function, only works when v < 2N.
301 */
302static inline int mod_s(struct bch_control *bch, unsigned int v)
303{
304 const unsigned int n = GF_N(bch);
305 return (v < n) ? v : v-n;
306}
307
308static inline int deg(unsigned int poly)
309{
310 /* polynomial degree is the most-significant bit index */
311 return fls(poly)-1;
312}
313
314static inline int parity(unsigned int x)
315{
316 /*
317 * public domain code snippet, lifted from
318 * http://www-graphics.stanford.edu/~seander/bithacks.html
319 */
320 x ^= x >> 1;
321 x ^= x >> 2;
322 x = (x & 0x11111111U) * 0x11111111U;
323 return (x >> 28) & 1;
324}
325
326/* Galois field basic operations: multiply, divide, inverse, etc. */
327
328static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
329 unsigned int b)
330{
331 return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
332 bch->a_log_tab[b])] : 0;
333}
334
335static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
336{
337 return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
338}
339
340static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
341 unsigned int b)
342{
343 return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
344 GF_N(bch)-bch->a_log_tab[b])] : 0;
345}
346
347static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
348{
349 return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
350}
351
352static inline unsigned int a_pow(struct bch_control *bch, int i)
353{
354 return bch->a_pow_tab[modulo(bch, i)];
355}
356
357static inline int a_log(struct bch_control *bch, unsigned int x)
358{
359 return bch->a_log_tab[x];
360}
361
362static inline int a_ilog(struct bch_control *bch, unsigned int x)
363{
364 return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
365}
366
367/*
368 * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
369 */
370static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
371 unsigned int *syn)
372{
373 int i, j, s;
374 unsigned int m;
375 uint32_t poly;
376 const int t = GF_T(bch);
377
378 s = bch->ecc_bits;
379
380 /* make sure extra bits in last ecc word are cleared */
381 m = ((unsigned int)s) & 31;
382 if (m)
383 ecc[s/32] &= ~((1u << (32-m))-1);
384 memset(syn, 0, 2*t*sizeof(*syn));
385
386 /* compute v(a^j) for j=1 .. 2t-1 */
387 do {
388 poly = *ecc++;
389 s -= 32;
390 while (poly) {
391 i = deg(poly);
392 for (j = 0; j < 2*t; j += 2)
393 syn[j] ^= a_pow(bch, (j+1)*(i+s));
394
395 poly ^= (1 << i);
396 }
397 } while (s > 0);
398
399 /* v(a^(2j)) = v(a^j)^2 */
400 for (j = 0; j < t; j++)
401 syn[2*j+1] = gf_sqr(bch, syn[j]);
402}
403
404static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
405{
406 memcpy(dst, src, GF_POLY_SZ(src->deg));
407}
408
409static int compute_error_locator_polynomial(struct bch_control *bch,
410 const unsigned int *syn)
411{
412 const unsigned int t = GF_T(bch);
413 const unsigned int n = GF_N(bch);
414 unsigned int i, j, tmp, l, pd = 1, d = syn[0];
415 struct gf_poly *elp = bch->elp;
416 struct gf_poly *pelp = bch->poly_2t[0];
417 struct gf_poly *elp_copy = bch->poly_2t[1];
418 int k, pp = -1;
419
420 memset(pelp, 0, GF_POLY_SZ(2*t));
421 memset(elp, 0, GF_POLY_SZ(2*t));
422
423 pelp->deg = 0;
424 pelp->c[0] = 1;
425 elp->deg = 0;
426 elp->c[0] = 1;
427
428 /* use simplified binary Berlekamp-Massey algorithm */
429 for (i = 0; (i < t) && (elp->deg <= t); i++) {
430 if (d) {
431 k = 2*i-pp;
432 gf_poly_copy(elp_copy, elp);
433 /* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
434 tmp = a_log(bch, d)+n-a_log(bch, pd);
435 for (j = 0; j <= pelp->deg; j++) {
436 if (pelp->c[j]) {
437 l = a_log(bch, pelp->c[j]);
438 elp->c[j+k] ^= a_pow(bch, tmp+l);
439 }
440 }
441 /* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
442 tmp = pelp->deg+k;
443 if (tmp > elp->deg) {
444 elp->deg = tmp;
445 gf_poly_copy(pelp, elp_copy);
446 pd = d;
447 pp = 2*i;
448 }
449 }
450 /* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
451 if (i < t-1) {
452 d = syn[2*i+2];
453 for (j = 1; j <= elp->deg; j++)
454 d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
455 }
456 }
457 dbg("elp=%s\n", gf_poly_str(elp));
458 return (elp->deg > t) ? -1 : (int)elp->deg;
459}
460
461/*
462 * solve a m x m linear system in GF(2) with an expected number of solutions,
463 * and return the number of found solutions
464 */
465static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
466 unsigned int *sol, int nsol)
467{
468 const int m = GF_M(bch);
469 unsigned int tmp, mask;
470 int rem, c, r, p, k, param[BCH_MAX_M];
471
472 k = 0;
473 mask = 1 << m;
474
475 /* Gaussian elimination */
476 for (c = 0; c < m; c++) {
477 rem = 0;
478 p = c-k;
479 /* find suitable row for elimination */
480 for (r = p; r < m; r++) {
481 if (rows[r] & mask) {
482 if (r != p) {
483 tmp = rows[r];
484 rows[r] = rows[p];
485 rows[p] = tmp;
486 }
487 rem = r+1;
488 break;
489 }
490 }
491 if (rem) {
492 /* perform elimination on remaining rows */
493 tmp = rows[p];
494 for (r = rem; r < m; r++) {
495 if (rows[r] & mask)
496 rows[r] ^= tmp;
497 }
498 } else {
499 /* elimination not needed, store defective row index */
500 param[k++] = c;
501 }
502 mask >>= 1;
503 }
504 /* rewrite system, inserting fake parameter rows */
505 if (k > 0) {
506 p = k;
507 for (r = m-1; r >= 0; r--) {
508 if ((r > m-1-k) && rows[r])
509 /* system has no solution */
510 return 0;
511
512 rows[r] = (p && (r == param[p-1])) ?
513 p--, 1u << (m-r) : rows[r-p];
514 }
515 }
516
517 if (nsol != (1 << k))
518 /* unexpected number of solutions */
519 return 0;
520
521 for (p = 0; p < nsol; p++) {
522 /* set parameters for p-th solution */
523 for (c = 0; c < k; c++)
524 rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
525
526 /* compute unique solution */
527 tmp = 0;
528 for (r = m-1; r >= 0; r--) {
529 mask = rows[r] & (tmp|1);
530 tmp |= parity(mask) << (m-r);
531 }
532 sol[p] = tmp >> 1;
533 }
534 return nsol;
535}
536
537/*
538 * this function builds and solves a linear system for finding roots of a degree
539 * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
540 */
541static int find_affine4_roots(struct bch_control *bch, unsigned int a,
542 unsigned int b, unsigned int c,
543 unsigned int *roots)
544{
545 int i, j, k;
546 const int m = GF_M(bch);
547 unsigned int mask = 0xff, t, rows[16] = {0,};
548
549 j = a_log(bch, b);
550 k = a_log(bch, a);
551 rows[0] = c;
552
553 /* build linear system to solve X^4+aX^2+bX+c = 0 */
554 for (i = 0; i < m; i++) {
555 rows[i+1] = bch->a_pow_tab[4*i]^
556 (a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
557 (b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
558 j++;
559 k += 2;
560 }
561 /*
562 * transpose 16x16 matrix before passing it to linear solver
563 * warning: this code assumes m < 16
564 */
565 for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
566 for (k = 0; k < 16; k = (k+j+1) & ~j) {
567 t = ((rows[k] >> j)^rows[k+j]) & mask;
568 rows[k] ^= (t << j);
569 rows[k+j] ^= t;
570 }
571 }
572 return solve_linear_system(bch, rows, roots, 4);
573}
574
575/*
576 * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
577 */
578static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
579 unsigned int *roots)
580{
581 int n = 0;
582
583 if (poly->c[0])
584 /* poly[X] = bX+c with c!=0, root=c/b */
585 roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
586 bch->a_log_tab[poly->c[1]]);
587 return n;
588}
589
590/*
591 * compute roots of a degree 2 polynomial over GF(2^m)
592 */
593static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
594 unsigned int *roots)
595{
596 int n = 0, i, l0, l1, l2;
597 unsigned int u, v, r;
598
599 if (poly->c[0] && poly->c[1]) {
600
601 l0 = bch->a_log_tab[poly->c[0]];
602 l1 = bch->a_log_tab[poly->c[1]];
603 l2 = bch->a_log_tab[poly->c[2]];
604
605 /* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
606 u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
607 /*
608 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
609 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
610 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
611 * i.e. r and r+1 are roots iff Tr(u)=0
612 */
613 r = 0;
614 v = u;
615 while (v) {
616 i = deg(v);
617 r ^= bch->xi_tab[i];
618 v ^= (1 << i);
619 }
620 /* verify root */
621 if ((gf_sqr(bch, r)^r) == u) {
622 /* reverse z=a/bX transformation and compute log(1/r) */
623 roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
624 bch->a_log_tab[r]+l2);
625 roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
626 bch->a_log_tab[r^1]+l2);
627 }
628 }
629 return n;
630}
631
632/*
633 * compute roots of a degree 3 polynomial over GF(2^m)
634 */
635static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
636 unsigned int *roots)
637{
638 int i, n = 0;
639 unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
640
641 if (poly->c[0]) {
642 /* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
643 e3 = poly->c[3];
644 c2 = gf_div(bch, poly->c[0], e3);
645 b2 = gf_div(bch, poly->c[1], e3);
646 a2 = gf_div(bch, poly->c[2], e3);
647
648 /* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
649 c = gf_mul(bch, a2, c2); /* c = a2c2 */
650 b = gf_mul(bch, a2, b2)^c2; /* b = a2b2 + c2 */
651 a = gf_sqr(bch, a2)^b2; /* a = a2^2 + b2 */
652
653 /* find the 4 roots of this affine polynomial */
654 if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
655 /* remove a2 from final list of roots */
656 for (i = 0; i < 4; i++) {
657 if (tmp[i] != a2)
658 roots[n++] = a_ilog(bch, tmp[i]);
659 }
660 }
661 }
662 return n;
663}
664
665/*
666 * compute roots of a degree 4 polynomial over GF(2^m)
667 */
668static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
669 unsigned int *roots)
670{
671 int i, l, n = 0;
672 unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
673
674 if (poly->c[0] == 0)
675 return 0;
676
677 /* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
678 e4 = poly->c[4];
679 d = gf_div(bch, poly->c[0], e4);
680 c = gf_div(bch, poly->c[1], e4);
681 b = gf_div(bch, poly->c[2], e4);
682 a = gf_div(bch, poly->c[3], e4);
683
684 /* use Y=1/X transformation to get an affine polynomial */
685 if (a) {
686 /* first, eliminate cX by using z=X+e with ae^2+c=0 */
687 if (c) {
688 /* compute e such that e^2 = c/a */
689 f = gf_div(bch, c, a);
690 l = a_log(bch, f);
691 l += (l & 1) ? GF_N(bch) : 0;
692 e = a_pow(bch, l/2);
693 /*
694 * use transformation z=X+e:
695 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
696 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
697 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
698 * z^4 + az^3 + b'z^2 + d'
699 */
700 d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
701 b = gf_mul(bch, a, e)^b;
702 }
703 /* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
704 if (d == 0)
705 /* assume all roots have multiplicity 1 */
706 return 0;
707
708 c2 = gf_inv(bch, d);
709 b2 = gf_div(bch, a, d);
710 a2 = gf_div(bch, b, d);
711 } else {
712 /* polynomial is already affine */
713 c2 = d;
714 b2 = c;
715 a2 = b;
716 }
717 /* find the 4 roots of this affine polynomial */
718 if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
719 for (i = 0; i < 4; i++) {
720 /* post-process roots (reverse transformations) */
721 f = a ? gf_inv(bch, roots[i]) : roots[i];
722 roots[i] = a_ilog(bch, f^e);
723 }
724 n = 4;
725 }
726 return n;
727}
728
729/*
730 * build monic, log-based representation of a polynomial
731 */
732static void gf_poly_logrep(struct bch_control *bch,
733 const struct gf_poly *a, int *rep)
734{
735 int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
736
737 /* represent 0 values with -1; warning, rep[d] is not set to 1 */
738 for (i = 0; i < d; i++)
739 rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
740}
741
742/*
743 * compute polynomial Euclidean division remainder in GF(2^m)[X]
744 */
745static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
746 const struct gf_poly *b, int *rep)
747{
748 int la, p, m;
749 unsigned int i, j, *c = a->c;
750 const unsigned int d = b->deg;
751
752 if (a->deg < d)
753 return;
754
755 /* reuse or compute log representation of denominator */
756 if (!rep) {
757 rep = bch->cache;
758 gf_poly_logrep(bch, b, rep);
759 }
760
761 for (j = a->deg; j >= d; j--) {
762 if (c[j]) {
763 la = a_log(bch, c[j]);
764 p = j-d;
765 for (i = 0; i < d; i++, p++) {
766 m = rep[i];
767 if (m >= 0)
768 c[p] ^= bch->a_pow_tab[mod_s(bch,
769 m+la)];
770 }
771 }
772 }
773 a->deg = d-1;
774 while (!c[a->deg] && a->deg)
775 a->deg--;
776}
777
778/*
779 * compute polynomial Euclidean division quotient in GF(2^m)[X]
780 */
781static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
782 const struct gf_poly *b, struct gf_poly *q)
783{
784 if (a->deg >= b->deg) {
785 q->deg = a->deg-b->deg;
786 /* compute a mod b (modifies a) */
787 gf_poly_mod(bch, a, b, NULL);
788 /* quotient is stored in upper part of polynomial a */
789 memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
790 } else {
791 q->deg = 0;
792 q->c[0] = 0;
793 }
794}
795
796/*
797 * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
798 */
799static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
800 struct gf_poly *b)
801{
802 struct gf_poly *tmp;
803
804 dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
805
806 if (a->deg < b->deg) {
807 tmp = b;
808 b = a;
809 a = tmp;
810 }
811
812 while (b->deg > 0) {
813 gf_poly_mod(bch, a, b, NULL);
814 tmp = b;
815 b = a;
816 a = tmp;
817 }
818
819 dbg("%s\n", gf_poly_str(a));
820
821 return a;
822}
823
824/*
825 * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
826 * This is used in Berlekamp Trace algorithm for splitting polynomials
827 */
828static void compute_trace_bk_mod(struct bch_control *bch, int k,
829 const struct gf_poly *f, struct gf_poly *z,
830 struct gf_poly *out)
831{
832 const int m = GF_M(bch);
833 int i, j;
834
835 /* z contains z^2j mod f */
836 z->deg = 1;
837 z->c[0] = 0;
838 z->c[1] = bch->a_pow_tab[k];
839
840 out->deg = 0;
841 memset(out, 0, GF_POLY_SZ(f->deg));
842
843 /* compute f log representation only once */
844 gf_poly_logrep(bch, f, bch->cache);
845
846 for (i = 0; i < m; i++) {
847 /* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
848 for (j = z->deg; j >= 0; j--) {
849 out->c[j] ^= z->c[j];
850 z->c[2*j] = gf_sqr(bch, z->c[j]);
851 z->c[2*j+1] = 0;
852 }
853 if (z->deg > out->deg)
854 out->deg = z->deg;
855
856 if (i < m-1) {
857 z->deg *= 2;
858 /* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
859 gf_poly_mod(bch, z, f, bch->cache);
860 }
861 }
862 while (!out->c[out->deg] && out->deg)
863 out->deg--;
864
865 dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
866}
867
868/*
869 * factor a polynomial using Berlekamp Trace algorithm (BTA)
870 */
871static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
872 struct gf_poly **g, struct gf_poly **h)
873{
874 struct gf_poly *f2 = bch->poly_2t[0];
875 struct gf_poly *q = bch->poly_2t[1];
876 struct gf_poly *tk = bch->poly_2t[2];
877 struct gf_poly *z = bch->poly_2t[3];
878 struct gf_poly *gcd;
879
880 dbg("factoring %s...\n", gf_poly_str(f));
881
882 *g = f;
883 *h = NULL;
884
885 /* tk = Tr(a^k.X) mod f */
886 compute_trace_bk_mod(bch, k, f, z, tk);
887
888 if (tk->deg > 0) {
889 /* compute g = gcd(f, tk) (destructive operation) */
890 gf_poly_copy(f2, f);
891 gcd = gf_poly_gcd(bch, f2, tk);
892 if (gcd->deg < f->deg) {
893 /* compute h=f/gcd(f,tk); this will modify f and q */
894 gf_poly_div(bch, f, gcd, q);
895 /* store g and h in-place (clobbering f) */
896 *h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
897 gf_poly_copy(*g, gcd);
898 gf_poly_copy(*h, q);
899 }
900 }
901}
902
903/*
904 * find roots of a polynomial, using BTZ algorithm; see the beginning of this
905 * file for details
906 */
907static int find_poly_roots(struct bch_control *bch, unsigned int k,
908 struct gf_poly *poly, unsigned int *roots)
909{
910 int cnt;
911 struct gf_poly *f1, *f2;
912
913 switch (poly->deg) {
914 /* handle low degree polynomials with ad hoc techniques */
915 case 1:
916 cnt = find_poly_deg1_roots(bch, poly, roots);
917 break;
918 case 2:
919 cnt = find_poly_deg2_roots(bch, poly, roots);
920 break;
921 case 3:
922 cnt = find_poly_deg3_roots(bch, poly, roots);
923 break;
924 case 4:
925 cnt = find_poly_deg4_roots(bch, poly, roots);
926 break;
927 default:
928 /* factor polynomial using Berlekamp Trace Algorithm (BTA) */
929 cnt = 0;
930 if (poly->deg && (k <= GF_M(bch))) {
931 factor_polynomial(bch, k, poly, &f1, &f2);
932 if (f1)
933 cnt += find_poly_roots(bch, k+1, f1, roots);
934 if (f2)
935 cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
936 }
937 break;
938 }
939 return cnt;
940}
941
942#if defined(USE_CHIEN_SEARCH)
943/*
944 * exhaustive root search (Chien) implementation - not used, included only for
945 * reference/comparison tests
946 */
947static int chien_search(struct bch_control *bch, unsigned int len,
948 struct gf_poly *p, unsigned int *roots)
949{
950 int m;
951 unsigned int i, j, syn, syn0, count = 0;
952 const unsigned int k = 8*len+bch->ecc_bits;
953
954 /* use a log-based representation of polynomial */
955 gf_poly_logrep(bch, p, bch->cache);
956 bch->cache[p->deg] = 0;
957 syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
958
959 for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
960 /* compute elp(a^i) */
961 for (j = 1, syn = syn0; j <= p->deg; j++) {
962 m = bch->cache[j];
963 if (m >= 0)
964 syn ^= a_pow(bch, m+j*i);
965 }
966 if (syn == 0) {
967 roots[count++] = GF_N(bch)-i;
968 if (count == p->deg)
969 break;
970 }
971 }
972 return (count == p->deg) ? count : 0;
973}
974#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
975#endif /* USE_CHIEN_SEARCH */
976
977/**
978 * bch_decode - decode received codeword and find bit error locations
979 * @bch: BCH control structure
980 * @data: received data, ignored if @calc_ecc is provided
981 * @len: data length in bytes, must always be provided
982 * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
983 * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
984 * @syn: hw computed syndrome data (if NULL, syndrome is calculated)
985 * @errloc: output array of error locations
986 *
987 * Returns:
988 * The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
989 * invalid parameters were provided
990 *
991 * Depending on the available hw BCH support and the need to compute @calc_ecc
992 * separately (using bch_encode()), this function should be called with one of
993 * the following parameter configurations -
994 *
995 * by providing @data and @recv_ecc only:
996 * bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
997 *
998 * by providing @recv_ecc and @calc_ecc:
999 * bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
1000 *
1001 * by providing ecc = recv_ecc XOR calc_ecc:
1002 * bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
1003 *
1004 * by providing syndrome results @syn:
1005 * bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
1006 *
1007 * Once bch_decode() has successfully returned with a positive value, error
1008 * locations returned in array @errloc should be interpreted as follows -
1009 *
1010 * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1011 * data correction)
1012 *
1013 * if (errloc[n] < 8*len), then n-th error is located in data and can be
1014 * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1015 *
1016 * Note that this function does not perform any data correction by itself, it
1017 * merely indicates error locations.
1018 */
1019int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len,
1020 const uint8_t *recv_ecc, const uint8_t *calc_ecc,
1021 const unsigned int *syn, unsigned int *errloc)
1022{
1023 const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1024 unsigned int nbits;
1025 int i, err, nroots;
1026 uint32_t sum;
1027
1028 /* sanity check: make sure data length can be handled */
1029 if (8*len > (bch->n-bch->ecc_bits))
1030 return -EINVAL;
1031
1032 /* if caller does not provide syndromes, compute them */
1033 if (!syn) {
1034 if (!calc_ecc) {
1035 /* compute received data ecc into an internal buffer */
1036 if (!data || !recv_ecc)
1037 return -EINVAL;
1038 bch_encode(bch, data, len, NULL);
1039 } else {
1040 /* load provided calculated ecc */
1041 load_ecc8(bch, bch->ecc_buf, calc_ecc);
1042 }
1043 /* load received ecc or assume it was XORed in calc_ecc */
1044 if (recv_ecc) {
1045 load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1046 /* XOR received and calculated ecc */
1047 for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1048 bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1049 sum |= bch->ecc_buf[i];
1050 }
1051 if (!sum)
1052 /* no error found */
1053 return 0;
1054 }
1055 compute_syndromes(bch, bch->ecc_buf, bch->syn);
1056 syn = bch->syn;
1057 }
1058
1059 err = compute_error_locator_polynomial(bch, syn);
1060 if (err > 0) {
1061 nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1062 if (err != nroots)
1063 err = -1;
1064 }
1065 if (err > 0) {
1066 /* post-process raw error locations for easier correction */
1067 nbits = (len*8)+bch->ecc_bits;
1068 for (i = 0; i < err; i++) {
1069 if (errloc[i] >= nbits) {
1070 err = -1;
1071 break;
1072 }
1073 errloc[i] = nbits-1-errloc[i];
1074 if (!bch->swap_bits)
1075 errloc[i] = (errloc[i] & ~7) |
1076 (7-(errloc[i] & 7));
1077 }
1078 }
1079 return (err >= 0) ? err : -EBADMSG;
1080}
1081EXPORT_SYMBOL_GPL(bch_decode);
1082
1083/*
1084 * generate Galois field lookup tables
1085 */
1086static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1087{
1088 unsigned int i, x = 1;
1089 const unsigned int k = 1 << deg(poly);
1090
1091 /* primitive polynomial must be of degree m */
1092 if (k != (1u << GF_M(bch)))
1093 return -1;
1094
1095 for (i = 0; i < GF_N(bch); i++) {
1096 bch->a_pow_tab[i] = x;
1097 bch->a_log_tab[x] = i;
1098 if (i && (x == 1))
1099 /* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1100 return -1;
1101 x <<= 1;
1102 if (x & k)
1103 x ^= poly;
1104 }
1105 bch->a_pow_tab[GF_N(bch)] = 1;
1106 bch->a_log_tab[0] = 0;
1107
1108 return 0;
1109}
1110
1111/*
1112 * compute generator polynomial remainder tables for fast encoding
1113 */
1114static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1115{
1116 int i, j, b, d;
1117 uint32_t data, hi, lo, *tab;
1118 const int l = BCH_ECC_WORDS(bch);
1119 const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1120 const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1121
1122 memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1123
1124 for (i = 0; i < 256; i++) {
1125 /* p(X)=i is a small polynomial of weight <= 8 */
1126 for (b = 0; b < 4; b++) {
1127 /* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1128 tab = bch->mod8_tab + (b*256+i)*l;
1129 data = i << (8*b);
1130 while (data) {
1131 d = deg(data);
1132 /* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1133 data ^= g[0] >> (31-d);
1134 for (j = 0; j < ecclen; j++) {
1135 hi = (d < 31) ? g[j] << (d+1) : 0;
1136 lo = (j+1 < plen) ?
1137 g[j+1] >> (31-d) : 0;
1138 tab[j] ^= hi|lo;
1139 }
1140 }
1141 }
1142 }
1143}
1144
1145/*
1146 * build a base for factoring degree 2 polynomials
1147 */
1148static int build_deg2_base(struct bch_control *bch)
1149{
1150 const int m = GF_M(bch);
1151 int i, j, r;
1152 unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M];
1153
1154 /* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1155 for (i = 0; i < m; i++) {
1156 for (j = 0, sum = 0; j < m; j++)
1157 sum ^= a_pow(bch, i*(1 << j));
1158
1159 if (sum) {
1160 ak = bch->a_pow_tab[i];
1161 break;
1162 }
1163 }
1164 /* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1165 remaining = m;
1166 memset(xi, 0, sizeof(xi));
1167
1168 for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1169 y = gf_sqr(bch, x)^x;
1170 for (i = 0; i < 2; i++) {
1171 r = a_log(bch, y);
1172 if (y && (r < m) && !xi[r]) {
1173 bch->xi_tab[r] = x;
1174 xi[r] = 1;
1175 remaining--;
1176 dbg("x%d = %x\n", r, x);
1177 break;
1178 }
1179 y ^= ak;
1180 }
1181 }
1182 /* should not happen but check anyway */
1183 return remaining ? -1 : 0;
1184}
1185
1186static void *bch_alloc(size_t size, int *err)
1187{
1188 void *ptr;
1189
1190 ptr = kmalloc(size, GFP_KERNEL);
1191 if (ptr == NULL)
1192 *err = 1;
1193 return ptr;
1194}
1195
1196/*
1197 * compute generator polynomial for given (m,t) parameters.
1198 */
1199static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1200{
1201 const unsigned int m = GF_M(bch);
1202 const unsigned int t = GF_T(bch);
1203 int n, err = 0;
1204 unsigned int i, j, nbits, r, word, *roots;
1205 struct gf_poly *g;
1206 uint32_t *genpoly;
1207
1208 g = bch_alloc(GF_POLY_SZ(m*t), &err);
1209 roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1210 genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1211
1212 if (err) {
1213 kfree(genpoly);
1214 genpoly = NULL;
1215 goto finish;
1216 }
1217
1218 /* enumerate all roots of g(X) */
1219 memset(roots , 0, (bch->n+1)*sizeof(*roots));
1220 for (i = 0; i < t; i++) {
1221 for (j = 0, r = 2*i+1; j < m; j++) {
1222 roots[r] = 1;
1223 r = mod_s(bch, 2*r);
1224 }
1225 }
1226 /* build generator polynomial g(X) */
1227 g->deg = 0;
1228 g->c[0] = 1;
1229 for (i = 0; i < GF_N(bch); i++) {
1230 if (roots[i]) {
1231 /* multiply g(X) by (X+root) */
1232 r = bch->a_pow_tab[i];
1233 g->c[g->deg+1] = 1;
1234 for (j = g->deg; j > 0; j--)
1235 g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1236
1237 g->c[0] = gf_mul(bch, g->c[0], r);
1238 g->deg++;
1239 }
1240 }
1241 /* store left-justified binary representation of g(X) */
1242 n = g->deg+1;
1243 i = 0;
1244
1245 while (n > 0) {
1246 nbits = (n > 32) ? 32 : n;
1247 for (j = 0, word = 0; j < nbits; j++) {
1248 if (g->c[n-1-j])
1249 word |= 1u << (31-j);
1250 }
1251 genpoly[i++] = word;
1252 n -= nbits;
1253 }
1254 bch->ecc_bits = g->deg;
1255
1256finish:
1257 kfree(g);
1258 kfree(roots);
1259
1260 return genpoly;
1261}
1262
1263/**
1264 * bch_init - initialize a BCH encoder/decoder
1265 * @m: Galois field order, should be in the range 5-15
1266 * @t: maximum error correction capability, in bits
1267 * @prim_poly: user-provided primitive polynomial (or 0 to use default)
1268 * @swap_bits: swap bits within data and syndrome bytes
1269 *
1270 * Returns:
1271 * a newly allocated BCH control structure if successful, NULL otherwise
1272 *
1273 * This initialization can take some time, as lookup tables are built for fast
1274 * encoding/decoding; make sure not to call this function from a time critical
1275 * path. Usually, bch_init() should be called on module/driver init and
1276 * bch_free() should be called to release memory on exit.
1277 *
1278 * You may provide your own primitive polynomial of degree @m in argument
1279 * @prim_poly, or let bch_init() use its default polynomial.
1280 *
1281 * Once bch_init() has successfully returned a pointer to a newly allocated
1282 * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1283 * the structure.
1284 */
1285struct bch_control *bch_init(int m, int t, unsigned int prim_poly,
1286 bool swap_bits)
1287{
1288 int err = 0;
1289 unsigned int i, words;
1290 uint32_t *genpoly;
1291 struct bch_control *bch = NULL;
1292
1293 const int min_m = 5;
1294
1295 /* default primitive polynomials */
1296 static const unsigned int prim_poly_tab[] = {
1297 0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1298 0x402b, 0x8003,
1299 };
1300
1301#if defined(CONFIG_BCH_CONST_PARAMS)
1302 if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1303 printk(KERN_ERR "bch encoder/decoder was configured to support "
1304 "parameters m=%d, t=%d only!\n",
1305 CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1306 goto fail;
1307 }
1308#endif
1309 if ((m < min_m) || (m > BCH_MAX_M))
1310 /*
1311 * values of m greater than 15 are not currently supported;
1312 * supporting m > 15 would require changing table base type
1313 * (uint16_t) and a small patch in matrix transposition
1314 */
1315 goto fail;
1316
1317 if (t > BCH_MAX_T)
1318 /*
1319 * we can support larger than 64 bits if necessary, at the
1320 * cost of higher stack usage.
1321 */
1322 goto fail;
1323
1324 /* sanity checks */
1325 if ((t < 1) || (m*t >= ((1 << m)-1)))
1326 /* invalid t value */
1327 goto fail;
1328
1329 /* select a primitive polynomial for generating GF(2^m) */
1330 if (prim_poly == 0)
1331 prim_poly = prim_poly_tab[m-min_m];
1332
1333 bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1334 if (bch == NULL)
1335 goto fail;
1336
1337 bch->m = m;
1338 bch->t = t;
1339 bch->n = (1 << m)-1;
1340 words = DIV_ROUND_UP(m*t, 32);
1341 bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1342 bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1343 bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1344 bch->mod8_tab = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1345 bch->ecc_buf = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1346 bch->ecc_buf2 = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1347 bch->xi_tab = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1348 bch->syn = bch_alloc(2*t*sizeof(*bch->syn), &err);
1349 bch->cache = bch_alloc(2*t*sizeof(*bch->cache), &err);
1350 bch->elp = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
1351 bch->swap_bits = swap_bits;
1352
1353 for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1354 bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1355
1356 if (err)
1357 goto fail;
1358
1359 err = build_gf_tables(bch, prim_poly);
1360 if (err)
1361 goto fail;
1362
1363 /* use generator polynomial for computing encoding tables */
1364 genpoly = compute_generator_polynomial(bch);
1365 if (genpoly == NULL)
1366 goto fail;
1367
1368 build_mod8_tables(bch, genpoly);
1369 kfree(genpoly);
1370
1371 err = build_deg2_base(bch);
1372 if (err)
1373 goto fail;
1374
1375 return bch;
1376
1377fail:
1378 bch_free(bch);
1379 return NULL;
1380}
1381EXPORT_SYMBOL_GPL(bch_init);
1382
1383/**
1384 * bch_free - free the BCH control structure
1385 * @bch: BCH control structure to release
1386 */
1387void bch_free(struct bch_control *bch)
1388{
1389 unsigned int i;
1390
1391 if (bch) {
1392 kfree(bch->a_pow_tab);
1393 kfree(bch->a_log_tab);
1394 kfree(bch->mod8_tab);
1395 kfree(bch->ecc_buf);
1396 kfree(bch->ecc_buf2);
1397 kfree(bch->xi_tab);
1398 kfree(bch->syn);
1399 kfree(bch->cache);
1400 kfree(bch->elp);
1401
1402 for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1403 kfree(bch->poly_2t[i]);
1404
1405 kfree(bch);
1406 }
1407}
1408EXPORT_SYMBOL_GPL(bch_free);
1409
1410MODULE_LICENSE("GPL");
1411MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1412MODULE_DESCRIPTION("Binary BCH encoder/decoder");