Linux Audio

Check our new training course

Loading...
v5.4
   1/*
   2 * Generic binary BCH encoding/decoding library
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms of the GNU General Public License version 2 as published by
   6 * the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc., 51
  15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  16 *
  17 * Copyright © 2011 Parrot S.A.
  18 *
  19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
  20 *
  21 * Description:
  22 *
  23 * This library provides runtime configurable encoding/decoding of binary
  24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
  25 *
  26 * Call init_bch to get a pointer to a newly allocated bch_control structure for
  27 * the given m (Galois field order), t (error correction capability) and
  28 * (optional) primitive polynomial parameters.
  29 *
  30 * Call encode_bch to compute and store ecc parity bytes to a given buffer.
  31 * Call decode_bch to detect and locate errors in received data.
  32 *
  33 * On systems supporting hw BCH features, intermediate results may be provided
  34 * to decode_bch in order to skip certain steps. See decode_bch() documentation
  35 * for details.
  36 *
  37 * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
  38 * parameters m and t; thus allowing extra compiler optimizations and providing
  39 * better (up to 2x) encoding performance. Using this option makes sense when
  40 * (m,t) are fixed and known in advance, e.g. when using BCH error correction
  41 * on a particular NAND flash device.
  42 *
  43 * Algorithmic details:
  44 *
  45 * Encoding is performed by processing 32 input bits in parallel, using 4
  46 * remainder lookup tables.
  47 *
  48 * The final stage of decoding involves the following internal steps:
  49 * a. Syndrome computation
  50 * b. Error locator polynomial computation using Berlekamp-Massey algorithm
  51 * c. Error locator root finding (by far the most expensive step)
  52 *
  53 * In this implementation, step c is not performed using the usual Chien search.
  54 * Instead, an alternative approach described in [1] is used. It consists in
  55 * factoring the error locator polynomial using the Berlekamp Trace algorithm
  56 * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
  57 * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
  58 * much better performance than Chien search for usual (m,t) values (typically
  59 * m >= 13, t < 32, see [1]).
  60 *
  61 * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
  62 * of characteristic 2, in: Western European Workshop on Research in Cryptology
  63 * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
  64 * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
  65 * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
  66 */
  67
  68#include <linux/kernel.h>
  69#include <linux/errno.h>
  70#include <linux/init.h>
  71#include <linux/module.h>
  72#include <linux/slab.h>
  73#include <linux/bitops.h>
  74#include <asm/byteorder.h>
  75#include <linux/bch.h>
  76
  77#if defined(CONFIG_BCH_CONST_PARAMS)
  78#define GF_M(_p)               (CONFIG_BCH_CONST_M)
  79#define GF_T(_p)               (CONFIG_BCH_CONST_T)
  80#define GF_N(_p)               ((1 << (CONFIG_BCH_CONST_M))-1)
  81#define BCH_MAX_M              (CONFIG_BCH_CONST_M)
  82#define BCH_MAX_T	       (CONFIG_BCH_CONST_T)
  83#else
  84#define GF_M(_p)               ((_p)->m)
  85#define GF_T(_p)               ((_p)->t)
  86#define GF_N(_p)               ((_p)->n)
  87#define BCH_MAX_M              15 /* 2KB */
  88#define BCH_MAX_T              64 /* 64 bit correction */
  89#endif
  90
  91#define BCH_ECC_WORDS(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
  92#define BCH_ECC_BYTES(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
  93
  94#define BCH_ECC_MAX_WORDS      DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
  95
  96#ifndef dbg
  97#define dbg(_fmt, args...)     do {} while (0)
  98#endif
  99
 100/*
 101 * represent a polynomial over GF(2^m)
 102 */
 103struct gf_poly {
 104	unsigned int deg;    /* polynomial degree */
 105	unsigned int c[0];   /* polynomial terms */
 106};
 107
 108/* given its degree, compute a polynomial size in bytes */
 109#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
 110
 111/* polynomial of degree 1 */
 112struct gf_poly_deg1 {
 113	struct gf_poly poly;
 114	unsigned int   c[2];
 115};
 116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 117/*
 118 * same as encode_bch(), but process input data one byte at a time
 119 */
 120static void encode_bch_unaligned(struct bch_control *bch,
 121				 const unsigned char *data, unsigned int len,
 122				 uint32_t *ecc)
 123{
 124	int i;
 125	const uint32_t *p;
 126	const int l = BCH_ECC_WORDS(bch)-1;
 127
 128	while (len--) {
 129		p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(*data++)) & 0xff);
 
 
 130
 131		for (i = 0; i < l; i++)
 132			ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
 133
 134		ecc[l] = (ecc[l] << 8)^(*p);
 135	}
 136}
 137
 138/*
 139 * convert ecc bytes to aligned, zero-padded 32-bit ecc words
 140 */
 141static void load_ecc8(struct bch_control *bch, uint32_t *dst,
 142		      const uint8_t *src)
 143{
 144	uint8_t pad[4] = {0, 0, 0, 0};
 145	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 146
 147	for (i = 0; i < nwords; i++, src += 4)
 148		dst[i] = (src[0] << 24)|(src[1] << 16)|(src[2] << 8)|src[3];
 
 
 
 149
 150	memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
 151	dst[nwords] = (pad[0] << 24)|(pad[1] << 16)|(pad[2] << 8)|pad[3];
 
 
 
 152}
 153
 154/*
 155 * convert 32-bit ecc words to ecc bytes
 156 */
 157static void store_ecc8(struct bch_control *bch, uint8_t *dst,
 158		       const uint32_t *src)
 159{
 160	uint8_t pad[4];
 161	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 162
 163	for (i = 0; i < nwords; i++) {
 164		*dst++ = (src[i] >> 24);
 165		*dst++ = (src[i] >> 16) & 0xff;
 166		*dst++ = (src[i] >>  8) & 0xff;
 167		*dst++ = (src[i] >>  0) & 0xff;
 168	}
 169	pad[0] = (src[nwords] >> 24);
 170	pad[1] = (src[nwords] >> 16) & 0xff;
 171	pad[2] = (src[nwords] >>  8) & 0xff;
 172	pad[3] = (src[nwords] >>  0) & 0xff;
 173	memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
 174}
 175
 176/**
 177 * encode_bch - calculate BCH ecc parity of data
 178 * @bch:   BCH control structure
 179 * @data:  data to encode
 180 * @len:   data length in bytes
 181 * @ecc:   ecc parity data, must be initialized by caller
 182 *
 183 * The @ecc parity array is used both as input and output parameter, in order to
 184 * allow incremental computations. It should be of the size indicated by member
 185 * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
 186 *
 187 * The exact number of computed ecc parity bits is given by member @ecc_bits of
 188 * @bch; it may be less than m*t for large values of t.
 189 */
 190void encode_bch(struct bch_control *bch, const uint8_t *data,
 191		unsigned int len, uint8_t *ecc)
 192{
 193	const unsigned int l = BCH_ECC_WORDS(bch)-1;
 194	unsigned int i, mlen;
 195	unsigned long m;
 196	uint32_t w, r[BCH_ECC_MAX_WORDS];
 197	const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
 198	const uint32_t * const tab0 = bch->mod8_tab;
 199	const uint32_t * const tab1 = tab0 + 256*(l+1);
 200	const uint32_t * const tab2 = tab1 + 256*(l+1);
 201	const uint32_t * const tab3 = tab2 + 256*(l+1);
 202	const uint32_t *pdata, *p0, *p1, *p2, *p3;
 203
 204	if (WARN_ON(r_bytes > sizeof(r)))
 205		return;
 206
 207	if (ecc) {
 208		/* load ecc parity bytes into internal 32-bit buffer */
 209		load_ecc8(bch, bch->ecc_buf, ecc);
 210	} else {
 211		memset(bch->ecc_buf, 0, r_bytes);
 212	}
 213
 214	/* process first unaligned data bytes */
 215	m = ((unsigned long)data) & 3;
 216	if (m) {
 217		mlen = (len < (4-m)) ? len : 4-m;
 218		encode_bch_unaligned(bch, data, mlen, bch->ecc_buf);
 219		data += mlen;
 220		len  -= mlen;
 221	}
 222
 223	/* process 32-bit aligned data words */
 224	pdata = (uint32_t *)data;
 225	mlen  = len/4;
 226	data += 4*mlen;
 227	len  -= 4*mlen;
 228	memcpy(r, bch->ecc_buf, r_bytes);
 229
 230	/*
 231	 * split each 32-bit word into 4 polynomials of weight 8 as follows:
 232	 *
 233	 * 31 ...24  23 ...16  15 ... 8  7 ... 0
 234	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt
 235	 *                               tttttttt  mod g = r0 (precomputed)
 236	 *                     zzzzzzzz  00000000  mod g = r1 (precomputed)
 237	 *           yyyyyyyy  00000000  00000000  mod g = r2 (precomputed)
 238	 * xxxxxxxx  00000000  00000000  00000000  mod g = r3 (precomputed)
 239	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt  mod g = r0^r1^r2^r3
 240	 */
 241	while (mlen--) {
 242		/* input data is read in big-endian format */
 243		w = r[0]^cpu_to_be32(*pdata++);
 
 
 
 
 
 
 244		p0 = tab0 + (l+1)*((w >>  0) & 0xff);
 245		p1 = tab1 + (l+1)*((w >>  8) & 0xff);
 246		p2 = tab2 + (l+1)*((w >> 16) & 0xff);
 247		p3 = tab3 + (l+1)*((w >> 24) & 0xff);
 248
 249		for (i = 0; i < l; i++)
 250			r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
 251
 252		r[l] = p0[l]^p1[l]^p2[l]^p3[l];
 253	}
 254	memcpy(bch->ecc_buf, r, r_bytes);
 255
 256	/* process last unaligned bytes */
 257	if (len)
 258		encode_bch_unaligned(bch, data, len, bch->ecc_buf);
 259
 260	/* store ecc parity bytes into original parity buffer */
 261	if (ecc)
 262		store_ecc8(bch, ecc, bch->ecc_buf);
 263}
 264EXPORT_SYMBOL_GPL(encode_bch);
 265
 266static inline int modulo(struct bch_control *bch, unsigned int v)
 267{
 268	const unsigned int n = GF_N(bch);
 269	while (v >= n) {
 270		v -= n;
 271		v = (v & n) + (v >> GF_M(bch));
 272	}
 273	return v;
 274}
 275
 276/*
 277 * shorter and faster modulo function, only works when v < 2N.
 278 */
 279static inline int mod_s(struct bch_control *bch, unsigned int v)
 280{
 281	const unsigned int n = GF_N(bch);
 282	return (v < n) ? v : v-n;
 283}
 284
 285static inline int deg(unsigned int poly)
 286{
 287	/* polynomial degree is the most-significant bit index */
 288	return fls(poly)-1;
 289}
 290
 291static inline int parity(unsigned int x)
 292{
 293	/*
 294	 * public domain code snippet, lifted from
 295	 * http://www-graphics.stanford.edu/~seander/bithacks.html
 296	 */
 297	x ^= x >> 1;
 298	x ^= x >> 2;
 299	x = (x & 0x11111111U) * 0x11111111U;
 300	return (x >> 28) & 1;
 301}
 302
 303/* Galois field basic operations: multiply, divide, inverse, etc. */
 304
 305static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
 306				  unsigned int b)
 307{
 308	return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 309					       bch->a_log_tab[b])] : 0;
 310}
 311
 312static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
 313{
 314	return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
 315}
 316
 317static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
 318				  unsigned int b)
 319{
 320	return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 321					GF_N(bch)-bch->a_log_tab[b])] : 0;
 322}
 323
 324static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
 325{
 326	return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
 327}
 328
 329static inline unsigned int a_pow(struct bch_control *bch, int i)
 330{
 331	return bch->a_pow_tab[modulo(bch, i)];
 332}
 333
 334static inline int a_log(struct bch_control *bch, unsigned int x)
 335{
 336	return bch->a_log_tab[x];
 337}
 338
 339static inline int a_ilog(struct bch_control *bch, unsigned int x)
 340{
 341	return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
 342}
 343
 344/*
 345 * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
 346 */
 347static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
 348			      unsigned int *syn)
 349{
 350	int i, j, s;
 351	unsigned int m;
 352	uint32_t poly;
 353	const int t = GF_T(bch);
 354
 355	s = bch->ecc_bits;
 356
 357	/* make sure extra bits in last ecc word are cleared */
 358	m = ((unsigned int)s) & 31;
 359	if (m)
 360		ecc[s/32] &= ~((1u << (32-m))-1);
 361	memset(syn, 0, 2*t*sizeof(*syn));
 362
 363	/* compute v(a^j) for j=1 .. 2t-1 */
 364	do {
 365		poly = *ecc++;
 366		s -= 32;
 367		while (poly) {
 368			i = deg(poly);
 369			for (j = 0; j < 2*t; j += 2)
 370				syn[j] ^= a_pow(bch, (j+1)*(i+s));
 371
 372			poly ^= (1 << i);
 373		}
 374	} while (s > 0);
 375
 376	/* v(a^(2j)) = v(a^j)^2 */
 377	for (j = 0; j < t; j++)
 378		syn[2*j+1] = gf_sqr(bch, syn[j]);
 379}
 380
 381static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
 382{
 383	memcpy(dst, src, GF_POLY_SZ(src->deg));
 384}
 385
 386static int compute_error_locator_polynomial(struct bch_control *bch,
 387					    const unsigned int *syn)
 388{
 389	const unsigned int t = GF_T(bch);
 390	const unsigned int n = GF_N(bch);
 391	unsigned int i, j, tmp, l, pd = 1, d = syn[0];
 392	struct gf_poly *elp = bch->elp;
 393	struct gf_poly *pelp = bch->poly_2t[0];
 394	struct gf_poly *elp_copy = bch->poly_2t[1];
 395	int k, pp = -1;
 396
 397	memset(pelp, 0, GF_POLY_SZ(2*t));
 398	memset(elp, 0, GF_POLY_SZ(2*t));
 399
 400	pelp->deg = 0;
 401	pelp->c[0] = 1;
 402	elp->deg = 0;
 403	elp->c[0] = 1;
 404
 405	/* use simplified binary Berlekamp-Massey algorithm */
 406	for (i = 0; (i < t) && (elp->deg <= t); i++) {
 407		if (d) {
 408			k = 2*i-pp;
 409			gf_poly_copy(elp_copy, elp);
 410			/* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
 411			tmp = a_log(bch, d)+n-a_log(bch, pd);
 412			for (j = 0; j <= pelp->deg; j++) {
 413				if (pelp->c[j]) {
 414					l = a_log(bch, pelp->c[j]);
 415					elp->c[j+k] ^= a_pow(bch, tmp+l);
 416				}
 417			}
 418			/* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
 419			tmp = pelp->deg+k;
 420			if (tmp > elp->deg) {
 421				elp->deg = tmp;
 422				gf_poly_copy(pelp, elp_copy);
 423				pd = d;
 424				pp = 2*i;
 425			}
 426		}
 427		/* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
 428		if (i < t-1) {
 429			d = syn[2*i+2];
 430			for (j = 1; j <= elp->deg; j++)
 431				d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
 432		}
 433	}
 434	dbg("elp=%s\n", gf_poly_str(elp));
 435	return (elp->deg > t) ? -1 : (int)elp->deg;
 436}
 437
 438/*
 439 * solve a m x m linear system in GF(2) with an expected number of solutions,
 440 * and return the number of found solutions
 441 */
 442static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
 443			       unsigned int *sol, int nsol)
 444{
 445	const int m = GF_M(bch);
 446	unsigned int tmp, mask;
 447	int rem, c, r, p, k, param[BCH_MAX_M];
 448
 449	k = 0;
 450	mask = 1 << m;
 451
 452	/* Gaussian elimination */
 453	for (c = 0; c < m; c++) {
 454		rem = 0;
 455		p = c-k;
 456		/* find suitable row for elimination */
 457		for (r = p; r < m; r++) {
 458			if (rows[r] & mask) {
 459				if (r != p) {
 460					tmp = rows[r];
 461					rows[r] = rows[p];
 462					rows[p] = tmp;
 463				}
 464				rem = r+1;
 465				break;
 466			}
 467		}
 468		if (rem) {
 469			/* perform elimination on remaining rows */
 470			tmp = rows[p];
 471			for (r = rem; r < m; r++) {
 472				if (rows[r] & mask)
 473					rows[r] ^= tmp;
 474			}
 475		} else {
 476			/* elimination not needed, store defective row index */
 477			param[k++] = c;
 478		}
 479		mask >>= 1;
 480	}
 481	/* rewrite system, inserting fake parameter rows */
 482	if (k > 0) {
 483		p = k;
 484		for (r = m-1; r >= 0; r--) {
 485			if ((r > m-1-k) && rows[r])
 486				/* system has no solution */
 487				return 0;
 488
 489			rows[r] = (p && (r == param[p-1])) ?
 490				p--, 1u << (m-r) : rows[r-p];
 491		}
 492	}
 493
 494	if (nsol != (1 << k))
 495		/* unexpected number of solutions */
 496		return 0;
 497
 498	for (p = 0; p < nsol; p++) {
 499		/* set parameters for p-th solution */
 500		for (c = 0; c < k; c++)
 501			rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
 502
 503		/* compute unique solution */
 504		tmp = 0;
 505		for (r = m-1; r >= 0; r--) {
 506			mask = rows[r] & (tmp|1);
 507			tmp |= parity(mask) << (m-r);
 508		}
 509		sol[p] = tmp >> 1;
 510	}
 511	return nsol;
 512}
 513
 514/*
 515 * this function builds and solves a linear system for finding roots of a degree
 516 * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
 517 */
 518static int find_affine4_roots(struct bch_control *bch, unsigned int a,
 519			      unsigned int b, unsigned int c,
 520			      unsigned int *roots)
 521{
 522	int i, j, k;
 523	const int m = GF_M(bch);
 524	unsigned int mask = 0xff, t, rows[16] = {0,};
 525
 526	j = a_log(bch, b);
 527	k = a_log(bch, a);
 528	rows[0] = c;
 529
 530	/* buid linear system to solve X^4+aX^2+bX+c = 0 */
 531	for (i = 0; i < m; i++) {
 532		rows[i+1] = bch->a_pow_tab[4*i]^
 533			(a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
 534			(b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
 535		j++;
 536		k += 2;
 537	}
 538	/*
 539	 * transpose 16x16 matrix before passing it to linear solver
 540	 * warning: this code assumes m < 16
 541	 */
 542	for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
 543		for (k = 0; k < 16; k = (k+j+1) & ~j) {
 544			t = ((rows[k] >> j)^rows[k+j]) & mask;
 545			rows[k] ^= (t << j);
 546			rows[k+j] ^= t;
 547		}
 548	}
 549	return solve_linear_system(bch, rows, roots, 4);
 550}
 551
 552/*
 553 * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
 554 */
 555static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
 556				unsigned int *roots)
 557{
 558	int n = 0;
 559
 560	if (poly->c[0])
 561		/* poly[X] = bX+c with c!=0, root=c/b */
 562		roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
 563				   bch->a_log_tab[poly->c[1]]);
 564	return n;
 565}
 566
 567/*
 568 * compute roots of a degree 2 polynomial over GF(2^m)
 569 */
 570static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
 571				unsigned int *roots)
 572{
 573	int n = 0, i, l0, l1, l2;
 574	unsigned int u, v, r;
 575
 576	if (poly->c[0] && poly->c[1]) {
 577
 578		l0 = bch->a_log_tab[poly->c[0]];
 579		l1 = bch->a_log_tab[poly->c[1]];
 580		l2 = bch->a_log_tab[poly->c[2]];
 581
 582		/* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
 583		u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
 584		/*
 585		 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
 586		 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
 587		 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
 588		 * i.e. r and r+1 are roots iff Tr(u)=0
 589		 */
 590		r = 0;
 591		v = u;
 592		while (v) {
 593			i = deg(v);
 594			r ^= bch->xi_tab[i];
 595			v ^= (1 << i);
 596		}
 597		/* verify root */
 598		if ((gf_sqr(bch, r)^r) == u) {
 599			/* reverse z=a/bX transformation and compute log(1/r) */
 600			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 601					    bch->a_log_tab[r]+l2);
 602			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 603					    bch->a_log_tab[r^1]+l2);
 604		}
 605	}
 606	return n;
 607}
 608
 609/*
 610 * compute roots of a degree 3 polynomial over GF(2^m)
 611 */
 612static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
 613				unsigned int *roots)
 614{
 615	int i, n = 0;
 616	unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
 617
 618	if (poly->c[0]) {
 619		/* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
 620		e3 = poly->c[3];
 621		c2 = gf_div(bch, poly->c[0], e3);
 622		b2 = gf_div(bch, poly->c[1], e3);
 623		a2 = gf_div(bch, poly->c[2], e3);
 624
 625		/* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
 626		c = gf_mul(bch, a2, c2);           /* c = a2c2      */
 627		b = gf_mul(bch, a2, b2)^c2;        /* b = a2b2 + c2 */
 628		a = gf_sqr(bch, a2)^b2;            /* a = a2^2 + b2 */
 629
 630		/* find the 4 roots of this affine polynomial */
 631		if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
 632			/* remove a2 from final list of roots */
 633			for (i = 0; i < 4; i++) {
 634				if (tmp[i] != a2)
 635					roots[n++] = a_ilog(bch, tmp[i]);
 636			}
 637		}
 638	}
 639	return n;
 640}
 641
 642/*
 643 * compute roots of a degree 4 polynomial over GF(2^m)
 644 */
 645static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
 646				unsigned int *roots)
 647{
 648	int i, l, n = 0;
 649	unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
 650
 651	if (poly->c[0] == 0)
 652		return 0;
 653
 654	/* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
 655	e4 = poly->c[4];
 656	d = gf_div(bch, poly->c[0], e4);
 657	c = gf_div(bch, poly->c[1], e4);
 658	b = gf_div(bch, poly->c[2], e4);
 659	a = gf_div(bch, poly->c[3], e4);
 660
 661	/* use Y=1/X transformation to get an affine polynomial */
 662	if (a) {
 663		/* first, eliminate cX by using z=X+e with ae^2+c=0 */
 664		if (c) {
 665			/* compute e such that e^2 = c/a */
 666			f = gf_div(bch, c, a);
 667			l = a_log(bch, f);
 668			l += (l & 1) ? GF_N(bch) : 0;
 669			e = a_pow(bch, l/2);
 670			/*
 671			 * use transformation z=X+e:
 672			 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
 673			 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
 674			 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
 675			 * z^4 + az^3 +     b'z^2 + d'
 676			 */
 677			d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
 678			b = gf_mul(bch, a, e)^b;
 679		}
 680		/* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
 681		if (d == 0)
 682			/* assume all roots have multiplicity 1 */
 683			return 0;
 684
 685		c2 = gf_inv(bch, d);
 686		b2 = gf_div(bch, a, d);
 687		a2 = gf_div(bch, b, d);
 688	} else {
 689		/* polynomial is already affine */
 690		c2 = d;
 691		b2 = c;
 692		a2 = b;
 693	}
 694	/* find the 4 roots of this affine polynomial */
 695	if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
 696		for (i = 0; i < 4; i++) {
 697			/* post-process roots (reverse transformations) */
 698			f = a ? gf_inv(bch, roots[i]) : roots[i];
 699			roots[i] = a_ilog(bch, f^e);
 700		}
 701		n = 4;
 702	}
 703	return n;
 704}
 705
 706/*
 707 * build monic, log-based representation of a polynomial
 708 */
 709static void gf_poly_logrep(struct bch_control *bch,
 710			   const struct gf_poly *a, int *rep)
 711{
 712	int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
 713
 714	/* represent 0 values with -1; warning, rep[d] is not set to 1 */
 715	for (i = 0; i < d; i++)
 716		rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
 717}
 718
 719/*
 720 * compute polynomial Euclidean division remainder in GF(2^m)[X]
 721 */
 722static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
 723			const struct gf_poly *b, int *rep)
 724{
 725	int la, p, m;
 726	unsigned int i, j, *c = a->c;
 727	const unsigned int d = b->deg;
 728
 729	if (a->deg < d)
 730		return;
 731
 732	/* reuse or compute log representation of denominator */
 733	if (!rep) {
 734		rep = bch->cache;
 735		gf_poly_logrep(bch, b, rep);
 736	}
 737
 738	for (j = a->deg; j >= d; j--) {
 739		if (c[j]) {
 740			la = a_log(bch, c[j]);
 741			p = j-d;
 742			for (i = 0; i < d; i++, p++) {
 743				m = rep[i];
 744				if (m >= 0)
 745					c[p] ^= bch->a_pow_tab[mod_s(bch,
 746								     m+la)];
 747			}
 748		}
 749	}
 750	a->deg = d-1;
 751	while (!c[a->deg] && a->deg)
 752		a->deg--;
 753}
 754
 755/*
 756 * compute polynomial Euclidean division quotient in GF(2^m)[X]
 757 */
 758static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
 759			const struct gf_poly *b, struct gf_poly *q)
 760{
 761	if (a->deg >= b->deg) {
 762		q->deg = a->deg-b->deg;
 763		/* compute a mod b (modifies a) */
 764		gf_poly_mod(bch, a, b, NULL);
 765		/* quotient is stored in upper part of polynomial a */
 766		memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
 767	} else {
 768		q->deg = 0;
 769		q->c[0] = 0;
 770	}
 771}
 772
 773/*
 774 * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
 775 */
 776static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
 777				   struct gf_poly *b)
 778{
 779	struct gf_poly *tmp;
 780
 781	dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
 782
 783	if (a->deg < b->deg) {
 784		tmp = b;
 785		b = a;
 786		a = tmp;
 787	}
 788
 789	while (b->deg > 0) {
 790		gf_poly_mod(bch, a, b, NULL);
 791		tmp = b;
 792		b = a;
 793		a = tmp;
 794	}
 795
 796	dbg("%s\n", gf_poly_str(a));
 797
 798	return a;
 799}
 800
 801/*
 802 * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
 803 * This is used in Berlekamp Trace algorithm for splitting polynomials
 804 */
 805static void compute_trace_bk_mod(struct bch_control *bch, int k,
 806				 const struct gf_poly *f, struct gf_poly *z,
 807				 struct gf_poly *out)
 808{
 809	const int m = GF_M(bch);
 810	int i, j;
 811
 812	/* z contains z^2j mod f */
 813	z->deg = 1;
 814	z->c[0] = 0;
 815	z->c[1] = bch->a_pow_tab[k];
 816
 817	out->deg = 0;
 818	memset(out, 0, GF_POLY_SZ(f->deg));
 819
 820	/* compute f log representation only once */
 821	gf_poly_logrep(bch, f, bch->cache);
 822
 823	for (i = 0; i < m; i++) {
 824		/* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
 825		for (j = z->deg; j >= 0; j--) {
 826			out->c[j] ^= z->c[j];
 827			z->c[2*j] = gf_sqr(bch, z->c[j]);
 828			z->c[2*j+1] = 0;
 829		}
 830		if (z->deg > out->deg)
 831			out->deg = z->deg;
 832
 833		if (i < m-1) {
 834			z->deg *= 2;
 835			/* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
 836			gf_poly_mod(bch, z, f, bch->cache);
 837		}
 838	}
 839	while (!out->c[out->deg] && out->deg)
 840		out->deg--;
 841
 842	dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
 843}
 844
 845/*
 846 * factor a polynomial using Berlekamp Trace algorithm (BTA)
 847 */
 848static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
 849			      struct gf_poly **g, struct gf_poly **h)
 850{
 851	struct gf_poly *f2 = bch->poly_2t[0];
 852	struct gf_poly *q  = bch->poly_2t[1];
 853	struct gf_poly *tk = bch->poly_2t[2];
 854	struct gf_poly *z  = bch->poly_2t[3];
 855	struct gf_poly *gcd;
 856
 857	dbg("factoring %s...\n", gf_poly_str(f));
 858
 859	*g = f;
 860	*h = NULL;
 861
 862	/* tk = Tr(a^k.X) mod f */
 863	compute_trace_bk_mod(bch, k, f, z, tk);
 864
 865	if (tk->deg > 0) {
 866		/* compute g = gcd(f, tk) (destructive operation) */
 867		gf_poly_copy(f2, f);
 868		gcd = gf_poly_gcd(bch, f2, tk);
 869		if (gcd->deg < f->deg) {
 870			/* compute h=f/gcd(f,tk); this will modify f and q */
 871			gf_poly_div(bch, f, gcd, q);
 872			/* store g and h in-place (clobbering f) */
 873			*h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
 874			gf_poly_copy(*g, gcd);
 875			gf_poly_copy(*h, q);
 876		}
 877	}
 878}
 879
 880/*
 881 * find roots of a polynomial, using BTZ algorithm; see the beginning of this
 882 * file for details
 883 */
 884static int find_poly_roots(struct bch_control *bch, unsigned int k,
 885			   struct gf_poly *poly, unsigned int *roots)
 886{
 887	int cnt;
 888	struct gf_poly *f1, *f2;
 889
 890	switch (poly->deg) {
 891		/* handle low degree polynomials with ad hoc techniques */
 892	case 1:
 893		cnt = find_poly_deg1_roots(bch, poly, roots);
 894		break;
 895	case 2:
 896		cnt = find_poly_deg2_roots(bch, poly, roots);
 897		break;
 898	case 3:
 899		cnt = find_poly_deg3_roots(bch, poly, roots);
 900		break;
 901	case 4:
 902		cnt = find_poly_deg4_roots(bch, poly, roots);
 903		break;
 904	default:
 905		/* factor polynomial using Berlekamp Trace Algorithm (BTA) */
 906		cnt = 0;
 907		if (poly->deg && (k <= GF_M(bch))) {
 908			factor_polynomial(bch, k, poly, &f1, &f2);
 909			if (f1)
 910				cnt += find_poly_roots(bch, k+1, f1, roots);
 911			if (f2)
 912				cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
 913		}
 914		break;
 915	}
 916	return cnt;
 917}
 918
 919#if defined(USE_CHIEN_SEARCH)
 920/*
 921 * exhaustive root search (Chien) implementation - not used, included only for
 922 * reference/comparison tests
 923 */
 924static int chien_search(struct bch_control *bch, unsigned int len,
 925			struct gf_poly *p, unsigned int *roots)
 926{
 927	int m;
 928	unsigned int i, j, syn, syn0, count = 0;
 929	const unsigned int k = 8*len+bch->ecc_bits;
 930
 931	/* use a log-based representation of polynomial */
 932	gf_poly_logrep(bch, p, bch->cache);
 933	bch->cache[p->deg] = 0;
 934	syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
 935
 936	for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
 937		/* compute elp(a^i) */
 938		for (j = 1, syn = syn0; j <= p->deg; j++) {
 939			m = bch->cache[j];
 940			if (m >= 0)
 941				syn ^= a_pow(bch, m+j*i);
 942		}
 943		if (syn == 0) {
 944			roots[count++] = GF_N(bch)-i;
 945			if (count == p->deg)
 946				break;
 947		}
 948	}
 949	return (count == p->deg) ? count : 0;
 950}
 951#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
 952#endif /* USE_CHIEN_SEARCH */
 953
 954/**
 955 * decode_bch - decode received codeword and find bit error locations
 956 * @bch:      BCH control structure
 957 * @data:     received data, ignored if @calc_ecc is provided
 958 * @len:      data length in bytes, must always be provided
 959 * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
 960 * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
 961 * @syn:      hw computed syndrome data (if NULL, syndrome is calculated)
 962 * @errloc:   output array of error locations
 963 *
 964 * Returns:
 965 *  The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
 966 *  invalid parameters were provided
 967 *
 968 * Depending on the available hw BCH support and the need to compute @calc_ecc
 969 * separately (using encode_bch()), this function should be called with one of
 970 * the following parameter configurations -
 971 *
 972 * by providing @data and @recv_ecc only:
 973 *   decode_bch(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
 974 *
 975 * by providing @recv_ecc and @calc_ecc:
 976 *   decode_bch(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
 977 *
 978 * by providing ecc = recv_ecc XOR calc_ecc:
 979 *   decode_bch(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
 980 *
 981 * by providing syndrome results @syn:
 982 *   decode_bch(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
 983 *
 984 * Once decode_bch() has successfully returned with a positive value, error
 985 * locations returned in array @errloc should be interpreted as follows -
 986 *
 987 * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
 988 * data correction)
 989 *
 990 * if (errloc[n] < 8*len), then n-th error is located in data and can be
 991 * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
 992 *
 993 * Note that this function does not perform any data correction by itself, it
 994 * merely indicates error locations.
 995 */
 996int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len,
 997	       const uint8_t *recv_ecc, const uint8_t *calc_ecc,
 998	       const unsigned int *syn, unsigned int *errloc)
 999{
1000	const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1001	unsigned int nbits;
1002	int i, err, nroots;
1003	uint32_t sum;
1004
1005	/* sanity check: make sure data length can be handled */
1006	if (8*len > (bch->n-bch->ecc_bits))
1007		return -EINVAL;
1008
1009	/* if caller does not provide syndromes, compute them */
1010	if (!syn) {
1011		if (!calc_ecc) {
1012			/* compute received data ecc into an internal buffer */
1013			if (!data || !recv_ecc)
1014				return -EINVAL;
1015			encode_bch(bch, data, len, NULL);
1016		} else {
1017			/* load provided calculated ecc */
1018			load_ecc8(bch, bch->ecc_buf, calc_ecc);
1019		}
1020		/* load received ecc or assume it was XORed in calc_ecc */
1021		if (recv_ecc) {
1022			load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1023			/* XOR received and calculated ecc */
1024			for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1025				bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1026				sum |= bch->ecc_buf[i];
1027			}
1028			if (!sum)
1029				/* no error found */
1030				return 0;
1031		}
1032		compute_syndromes(bch, bch->ecc_buf, bch->syn);
1033		syn = bch->syn;
1034	}
1035
1036	err = compute_error_locator_polynomial(bch, syn);
1037	if (err > 0) {
1038		nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1039		if (err != nroots)
1040			err = -1;
1041	}
1042	if (err > 0) {
1043		/* post-process raw error locations for easier correction */
1044		nbits = (len*8)+bch->ecc_bits;
1045		for (i = 0; i < err; i++) {
1046			if (errloc[i] >= nbits) {
1047				err = -1;
1048				break;
1049			}
1050			errloc[i] = nbits-1-errloc[i];
1051			errloc[i] = (errloc[i] & ~7)|(7-(errloc[i] & 7));
 
 
1052		}
1053	}
1054	return (err >= 0) ? err : -EBADMSG;
1055}
1056EXPORT_SYMBOL_GPL(decode_bch);
1057
1058/*
1059 * generate Galois field lookup tables
1060 */
1061static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1062{
1063	unsigned int i, x = 1;
1064	const unsigned int k = 1 << deg(poly);
1065
1066	/* primitive polynomial must be of degree m */
1067	if (k != (1u << GF_M(bch)))
1068		return -1;
1069
1070	for (i = 0; i < GF_N(bch); i++) {
1071		bch->a_pow_tab[i] = x;
1072		bch->a_log_tab[x] = i;
1073		if (i && (x == 1))
1074			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1075			return -1;
1076		x <<= 1;
1077		if (x & k)
1078			x ^= poly;
1079	}
1080	bch->a_pow_tab[GF_N(bch)] = 1;
1081	bch->a_log_tab[0] = 0;
1082
1083	return 0;
1084}
1085
1086/*
1087 * compute generator polynomial remainder tables for fast encoding
1088 */
1089static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1090{
1091	int i, j, b, d;
1092	uint32_t data, hi, lo, *tab;
1093	const int l = BCH_ECC_WORDS(bch);
1094	const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1095	const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1096
1097	memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1098
1099	for (i = 0; i < 256; i++) {
1100		/* p(X)=i is a small polynomial of weight <= 8 */
1101		for (b = 0; b < 4; b++) {
1102			/* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1103			tab = bch->mod8_tab + (b*256+i)*l;
1104			data = i << (8*b);
1105			while (data) {
1106				d = deg(data);
1107				/* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1108				data ^= g[0] >> (31-d);
1109				for (j = 0; j < ecclen; j++) {
1110					hi = (d < 31) ? g[j] << (d+1) : 0;
1111					lo = (j+1 < plen) ?
1112						g[j+1] >> (31-d) : 0;
1113					tab[j] ^= hi|lo;
1114				}
1115			}
1116		}
1117	}
1118}
1119
1120/*
1121 * build a base for factoring degree 2 polynomials
1122 */
1123static int build_deg2_base(struct bch_control *bch)
1124{
1125	const int m = GF_M(bch);
1126	int i, j, r;
1127	unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M];
1128
1129	/* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1130	for (i = 0; i < m; i++) {
1131		for (j = 0, sum = 0; j < m; j++)
1132			sum ^= a_pow(bch, i*(1 << j));
1133
1134		if (sum) {
1135			ak = bch->a_pow_tab[i];
1136			break;
1137		}
1138	}
1139	/* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1140	remaining = m;
1141	memset(xi, 0, sizeof(xi));
1142
1143	for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1144		y = gf_sqr(bch, x)^x;
1145		for (i = 0; i < 2; i++) {
1146			r = a_log(bch, y);
1147			if (y && (r < m) && !xi[r]) {
1148				bch->xi_tab[r] = x;
1149				xi[r] = 1;
1150				remaining--;
1151				dbg("x%d = %x\n", r, x);
1152				break;
1153			}
1154			y ^= ak;
1155		}
1156	}
1157	/* should not happen but check anyway */
1158	return remaining ? -1 : 0;
1159}
1160
1161static void *bch_alloc(size_t size, int *err)
1162{
1163	void *ptr;
1164
1165	ptr = kmalloc(size, GFP_KERNEL);
1166	if (ptr == NULL)
1167		*err = 1;
1168	return ptr;
1169}
1170
1171/*
1172 * compute generator polynomial for given (m,t) parameters.
1173 */
1174static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1175{
1176	const unsigned int m = GF_M(bch);
1177	const unsigned int t = GF_T(bch);
1178	int n, err = 0;
1179	unsigned int i, j, nbits, r, word, *roots;
1180	struct gf_poly *g;
1181	uint32_t *genpoly;
1182
1183	g = bch_alloc(GF_POLY_SZ(m*t), &err);
1184	roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1185	genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1186
1187	if (err) {
1188		kfree(genpoly);
1189		genpoly = NULL;
1190		goto finish;
1191	}
1192
1193	/* enumerate all roots of g(X) */
1194	memset(roots , 0, (bch->n+1)*sizeof(*roots));
1195	for (i = 0; i < t; i++) {
1196		for (j = 0, r = 2*i+1; j < m; j++) {
1197			roots[r] = 1;
1198			r = mod_s(bch, 2*r);
1199		}
1200	}
1201	/* build generator polynomial g(X) */
1202	g->deg = 0;
1203	g->c[0] = 1;
1204	for (i = 0; i < GF_N(bch); i++) {
1205		if (roots[i]) {
1206			/* multiply g(X) by (X+root) */
1207			r = bch->a_pow_tab[i];
1208			g->c[g->deg+1] = 1;
1209			for (j = g->deg; j > 0; j--)
1210				g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1211
1212			g->c[0] = gf_mul(bch, g->c[0], r);
1213			g->deg++;
1214		}
1215	}
1216	/* store left-justified binary representation of g(X) */
1217	n = g->deg+1;
1218	i = 0;
1219
1220	while (n > 0) {
1221		nbits = (n > 32) ? 32 : n;
1222		for (j = 0, word = 0; j < nbits; j++) {
1223			if (g->c[n-1-j])
1224				word |= 1u << (31-j);
1225		}
1226		genpoly[i++] = word;
1227		n -= nbits;
1228	}
1229	bch->ecc_bits = g->deg;
1230
1231finish:
1232	kfree(g);
1233	kfree(roots);
1234
1235	return genpoly;
1236}
1237
1238/**
1239 * init_bch - initialize a BCH encoder/decoder
1240 * @m:          Galois field order, should be in the range 5-15
1241 * @t:          maximum error correction capability, in bits
1242 * @prim_poly:  user-provided primitive polynomial (or 0 to use default)
 
1243 *
1244 * Returns:
1245 *  a newly allocated BCH control structure if successful, NULL otherwise
1246 *
1247 * This initialization can take some time, as lookup tables are built for fast
1248 * encoding/decoding; make sure not to call this function from a time critical
1249 * path. Usually, init_bch() should be called on module/driver init and
1250 * free_bch() should be called to release memory on exit.
1251 *
1252 * You may provide your own primitive polynomial of degree @m in argument
1253 * @prim_poly, or let init_bch() use its default polynomial.
1254 *
1255 * Once init_bch() has successfully returned a pointer to a newly allocated
1256 * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1257 * the structure.
1258 */
1259struct bch_control *init_bch(int m, int t, unsigned int prim_poly)
 
1260{
1261	int err = 0;
1262	unsigned int i, words;
1263	uint32_t *genpoly;
1264	struct bch_control *bch = NULL;
1265
1266	const int min_m = 5;
1267
1268	/* default primitive polynomials */
1269	static const unsigned int prim_poly_tab[] = {
1270		0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1271		0x402b, 0x8003,
1272	};
1273
1274#if defined(CONFIG_BCH_CONST_PARAMS)
1275	if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1276		printk(KERN_ERR "bch encoder/decoder was configured to support "
1277		       "parameters m=%d, t=%d only!\n",
1278		       CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1279		goto fail;
1280	}
1281#endif
1282	if ((m < min_m) || (m > BCH_MAX_M))
1283		/*
1284		 * values of m greater than 15 are not currently supported;
1285		 * supporting m > 15 would require changing table base type
1286		 * (uint16_t) and a small patch in matrix transposition
1287		 */
1288		goto fail;
1289
1290	if (t > BCH_MAX_T)
1291		/*
1292		 * we can support larger than 64 bits if necessary, at the
1293		 * cost of higher stack usage.
1294		 */
1295		goto fail;
1296
1297	/* sanity checks */
1298	if ((t < 1) || (m*t >= ((1 << m)-1)))
1299		/* invalid t value */
1300		goto fail;
1301
1302	/* select a primitive polynomial for generating GF(2^m) */
1303	if (prim_poly == 0)
1304		prim_poly = prim_poly_tab[m-min_m];
1305
1306	bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1307	if (bch == NULL)
1308		goto fail;
1309
1310	bch->m = m;
1311	bch->t = t;
1312	bch->n = (1 << m)-1;
1313	words  = DIV_ROUND_UP(m*t, 32);
1314	bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1315	bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1316	bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1317	bch->mod8_tab  = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1318	bch->ecc_buf   = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1319	bch->ecc_buf2  = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1320	bch->xi_tab    = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1321	bch->syn       = bch_alloc(2*t*sizeof(*bch->syn), &err);
1322	bch->cache     = bch_alloc(2*t*sizeof(*bch->cache), &err);
1323	bch->elp       = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
 
1324
1325	for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1326		bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1327
1328	if (err)
1329		goto fail;
1330
1331	err = build_gf_tables(bch, prim_poly);
1332	if (err)
1333		goto fail;
1334
1335	/* use generator polynomial for computing encoding tables */
1336	genpoly = compute_generator_polynomial(bch);
1337	if (genpoly == NULL)
1338		goto fail;
1339
1340	build_mod8_tables(bch, genpoly);
1341	kfree(genpoly);
1342
1343	err = build_deg2_base(bch);
1344	if (err)
1345		goto fail;
1346
1347	return bch;
1348
1349fail:
1350	free_bch(bch);
1351	return NULL;
1352}
1353EXPORT_SYMBOL_GPL(init_bch);
1354
1355/**
1356 *  free_bch - free the BCH control structure
1357 *  @bch:    BCH control structure to release
1358 */
1359void free_bch(struct bch_control *bch)
1360{
1361	unsigned int i;
1362
1363	if (bch) {
1364		kfree(bch->a_pow_tab);
1365		kfree(bch->a_log_tab);
1366		kfree(bch->mod8_tab);
1367		kfree(bch->ecc_buf);
1368		kfree(bch->ecc_buf2);
1369		kfree(bch->xi_tab);
1370		kfree(bch->syn);
1371		kfree(bch->cache);
1372		kfree(bch->elp);
1373
1374		for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1375			kfree(bch->poly_2t[i]);
1376
1377		kfree(bch);
1378	}
1379}
1380EXPORT_SYMBOL_GPL(free_bch);
1381
1382MODULE_LICENSE("GPL");
1383MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1384MODULE_DESCRIPTION("Binary BCH encoder/decoder");
v6.2
   1/*
   2 * Generic binary BCH encoding/decoding library
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms of the GNU General Public License version 2 as published by
   6 * the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc., 51
  15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  16 *
  17 * Copyright © 2011 Parrot S.A.
  18 *
  19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
  20 *
  21 * Description:
  22 *
  23 * This library provides runtime configurable encoding/decoding of binary
  24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
  25 *
  26 * Call bch_init to get a pointer to a newly allocated bch_control structure for
  27 * the given m (Galois field order), t (error correction capability) and
  28 * (optional) primitive polynomial parameters.
  29 *
  30 * Call bch_encode to compute and store ecc parity bytes to a given buffer.
  31 * Call bch_decode to detect and locate errors in received data.
  32 *
  33 * On systems supporting hw BCH features, intermediate results may be provided
  34 * to bch_decode in order to skip certain steps. See bch_decode() documentation
  35 * for details.
  36 *
  37 * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
  38 * parameters m and t; thus allowing extra compiler optimizations and providing
  39 * better (up to 2x) encoding performance. Using this option makes sense when
  40 * (m,t) are fixed and known in advance, e.g. when using BCH error correction
  41 * on a particular NAND flash device.
  42 *
  43 * Algorithmic details:
  44 *
  45 * Encoding is performed by processing 32 input bits in parallel, using 4
  46 * remainder lookup tables.
  47 *
  48 * The final stage of decoding involves the following internal steps:
  49 * a. Syndrome computation
  50 * b. Error locator polynomial computation using Berlekamp-Massey algorithm
  51 * c. Error locator root finding (by far the most expensive step)
  52 *
  53 * In this implementation, step c is not performed using the usual Chien search.
  54 * Instead, an alternative approach described in [1] is used. It consists in
  55 * factoring the error locator polynomial using the Berlekamp Trace algorithm
  56 * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
  57 * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
  58 * much better performance than Chien search for usual (m,t) values (typically
  59 * m >= 13, t < 32, see [1]).
  60 *
  61 * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
  62 * of characteristic 2, in: Western European Workshop on Research in Cryptology
  63 * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
  64 * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
  65 * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
  66 */
  67
  68#include <linux/kernel.h>
  69#include <linux/errno.h>
  70#include <linux/init.h>
  71#include <linux/module.h>
  72#include <linux/slab.h>
  73#include <linux/bitops.h>
  74#include <asm/byteorder.h>
  75#include <linux/bch.h>
  76
  77#if defined(CONFIG_BCH_CONST_PARAMS)
  78#define GF_M(_p)               (CONFIG_BCH_CONST_M)
  79#define GF_T(_p)               (CONFIG_BCH_CONST_T)
  80#define GF_N(_p)               ((1 << (CONFIG_BCH_CONST_M))-1)
  81#define BCH_MAX_M              (CONFIG_BCH_CONST_M)
  82#define BCH_MAX_T	       (CONFIG_BCH_CONST_T)
  83#else
  84#define GF_M(_p)               ((_p)->m)
  85#define GF_T(_p)               ((_p)->t)
  86#define GF_N(_p)               ((_p)->n)
  87#define BCH_MAX_M              15 /* 2KB */
  88#define BCH_MAX_T              64 /* 64 bit correction */
  89#endif
  90
  91#define BCH_ECC_WORDS(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
  92#define BCH_ECC_BYTES(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
  93
  94#define BCH_ECC_MAX_WORDS      DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
  95
  96#ifndef dbg
  97#define dbg(_fmt, args...)     do {} while (0)
  98#endif
  99
 100/*
 101 * represent a polynomial over GF(2^m)
 102 */
 103struct gf_poly {
 104	unsigned int deg;    /* polynomial degree */
 105	unsigned int c[];   /* polynomial terms */
 106};
 107
 108/* given its degree, compute a polynomial size in bytes */
 109#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
 110
 111/* polynomial of degree 1 */
 112struct gf_poly_deg1 {
 113	struct gf_poly poly;
 114	unsigned int   c[2];
 115};
 116
 117static u8 swap_bits_table[] = {
 118	0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
 119	0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
 120	0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
 121	0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
 122	0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
 123	0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
 124	0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
 125	0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
 126	0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
 127	0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
 128	0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
 129	0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
 130	0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
 131	0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
 132	0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
 133	0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
 134	0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
 135	0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
 136	0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
 137	0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
 138	0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
 139	0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
 140	0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
 141	0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
 142	0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
 143	0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
 144	0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
 145	0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
 146	0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
 147	0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
 148	0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
 149	0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
 150};
 151
 152static u8 swap_bits(struct bch_control *bch, u8 in)
 153{
 154	if (!bch->swap_bits)
 155		return in;
 156
 157	return swap_bits_table[in];
 158}
 159
 160/*
 161 * same as bch_encode(), but process input data one byte at a time
 162 */
 163static void bch_encode_unaligned(struct bch_control *bch,
 164				 const unsigned char *data, unsigned int len,
 165				 uint32_t *ecc)
 166{
 167	int i;
 168	const uint32_t *p;
 169	const int l = BCH_ECC_WORDS(bch)-1;
 170
 171	while (len--) {
 172		u8 tmp = swap_bits(bch, *data++);
 173
 174		p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff);
 175
 176		for (i = 0; i < l; i++)
 177			ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
 178
 179		ecc[l] = (ecc[l] << 8)^(*p);
 180	}
 181}
 182
 183/*
 184 * convert ecc bytes to aligned, zero-padded 32-bit ecc words
 185 */
 186static void load_ecc8(struct bch_control *bch, uint32_t *dst,
 187		      const uint8_t *src)
 188{
 189	uint8_t pad[4] = {0, 0, 0, 0};
 190	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 191
 192	for (i = 0; i < nwords; i++, src += 4)
 193		dst[i] = ((u32)swap_bits(bch, src[0]) << 24) |
 194			((u32)swap_bits(bch, src[1]) << 16) |
 195			((u32)swap_bits(bch, src[2]) << 8) |
 196			swap_bits(bch, src[3]);
 197
 198	memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
 199	dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) |
 200		((u32)swap_bits(bch, pad[1]) << 16) |
 201		((u32)swap_bits(bch, pad[2]) << 8) |
 202		swap_bits(bch, pad[3]);
 203}
 204
 205/*
 206 * convert 32-bit ecc words to ecc bytes
 207 */
 208static void store_ecc8(struct bch_control *bch, uint8_t *dst,
 209		       const uint32_t *src)
 210{
 211	uint8_t pad[4];
 212	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 213
 214	for (i = 0; i < nwords; i++) {
 215		*dst++ = swap_bits(bch, src[i] >> 24);
 216		*dst++ = swap_bits(bch, src[i] >> 16);
 217		*dst++ = swap_bits(bch, src[i] >> 8);
 218		*dst++ = swap_bits(bch, src[i]);
 219	}
 220	pad[0] = swap_bits(bch, src[nwords] >> 24);
 221	pad[1] = swap_bits(bch, src[nwords] >> 16);
 222	pad[2] = swap_bits(bch, src[nwords] >> 8);
 223	pad[3] = swap_bits(bch, src[nwords]);
 224	memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
 225}
 226
 227/**
 228 * bch_encode - calculate BCH ecc parity of data
 229 * @bch:   BCH control structure
 230 * @data:  data to encode
 231 * @len:   data length in bytes
 232 * @ecc:   ecc parity data, must be initialized by caller
 233 *
 234 * The @ecc parity array is used both as input and output parameter, in order to
 235 * allow incremental computations. It should be of the size indicated by member
 236 * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
 237 *
 238 * The exact number of computed ecc parity bits is given by member @ecc_bits of
 239 * @bch; it may be less than m*t for large values of t.
 240 */
 241void bch_encode(struct bch_control *bch, const uint8_t *data,
 242		unsigned int len, uint8_t *ecc)
 243{
 244	const unsigned int l = BCH_ECC_WORDS(bch)-1;
 245	unsigned int i, mlen;
 246	unsigned long m;
 247	uint32_t w, r[BCH_ECC_MAX_WORDS];
 248	const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
 249	const uint32_t * const tab0 = bch->mod8_tab;
 250	const uint32_t * const tab1 = tab0 + 256*(l+1);
 251	const uint32_t * const tab2 = tab1 + 256*(l+1);
 252	const uint32_t * const tab3 = tab2 + 256*(l+1);
 253	const uint32_t *pdata, *p0, *p1, *p2, *p3;
 254
 255	if (WARN_ON(r_bytes > sizeof(r)))
 256		return;
 257
 258	if (ecc) {
 259		/* load ecc parity bytes into internal 32-bit buffer */
 260		load_ecc8(bch, bch->ecc_buf, ecc);
 261	} else {
 262		memset(bch->ecc_buf, 0, r_bytes);
 263	}
 264
 265	/* process first unaligned data bytes */
 266	m = ((unsigned long)data) & 3;
 267	if (m) {
 268		mlen = (len < (4-m)) ? len : 4-m;
 269		bch_encode_unaligned(bch, data, mlen, bch->ecc_buf);
 270		data += mlen;
 271		len  -= mlen;
 272	}
 273
 274	/* process 32-bit aligned data words */
 275	pdata = (uint32_t *)data;
 276	mlen  = len/4;
 277	data += 4*mlen;
 278	len  -= 4*mlen;
 279	memcpy(r, bch->ecc_buf, r_bytes);
 280
 281	/*
 282	 * split each 32-bit word into 4 polynomials of weight 8 as follows:
 283	 *
 284	 * 31 ...24  23 ...16  15 ... 8  7 ... 0
 285	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt
 286	 *                               tttttttt  mod g = r0 (precomputed)
 287	 *                     zzzzzzzz  00000000  mod g = r1 (precomputed)
 288	 *           yyyyyyyy  00000000  00000000  mod g = r2 (precomputed)
 289	 * xxxxxxxx  00000000  00000000  00000000  mod g = r3 (precomputed)
 290	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt  mod g = r0^r1^r2^r3
 291	 */
 292	while (mlen--) {
 293		/* input data is read in big-endian format */
 294		w = cpu_to_be32(*pdata++);
 295		if (bch->swap_bits)
 296			w = (u32)swap_bits(bch, w) |
 297			    ((u32)swap_bits(bch, w >> 8) << 8) |
 298			    ((u32)swap_bits(bch, w >> 16) << 16) |
 299			    ((u32)swap_bits(bch, w >> 24) << 24);
 300		w ^= r[0];
 301		p0 = tab0 + (l+1)*((w >>  0) & 0xff);
 302		p1 = tab1 + (l+1)*((w >>  8) & 0xff);
 303		p2 = tab2 + (l+1)*((w >> 16) & 0xff);
 304		p3 = tab3 + (l+1)*((w >> 24) & 0xff);
 305
 306		for (i = 0; i < l; i++)
 307			r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
 308
 309		r[l] = p0[l]^p1[l]^p2[l]^p3[l];
 310	}
 311	memcpy(bch->ecc_buf, r, r_bytes);
 312
 313	/* process last unaligned bytes */
 314	if (len)
 315		bch_encode_unaligned(bch, data, len, bch->ecc_buf);
 316
 317	/* store ecc parity bytes into original parity buffer */
 318	if (ecc)
 319		store_ecc8(bch, ecc, bch->ecc_buf);
 320}
 321EXPORT_SYMBOL_GPL(bch_encode);
 322
 323static inline int modulo(struct bch_control *bch, unsigned int v)
 324{
 325	const unsigned int n = GF_N(bch);
 326	while (v >= n) {
 327		v -= n;
 328		v = (v & n) + (v >> GF_M(bch));
 329	}
 330	return v;
 331}
 332
 333/*
 334 * shorter and faster modulo function, only works when v < 2N.
 335 */
 336static inline int mod_s(struct bch_control *bch, unsigned int v)
 337{
 338	const unsigned int n = GF_N(bch);
 339	return (v < n) ? v : v-n;
 340}
 341
 342static inline int deg(unsigned int poly)
 343{
 344	/* polynomial degree is the most-significant bit index */
 345	return fls(poly)-1;
 346}
 347
 348static inline int parity(unsigned int x)
 349{
 350	/*
 351	 * public domain code snippet, lifted from
 352	 * http://www-graphics.stanford.edu/~seander/bithacks.html
 353	 */
 354	x ^= x >> 1;
 355	x ^= x >> 2;
 356	x = (x & 0x11111111U) * 0x11111111U;
 357	return (x >> 28) & 1;
 358}
 359
 360/* Galois field basic operations: multiply, divide, inverse, etc. */
 361
 362static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
 363				  unsigned int b)
 364{
 365	return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 366					       bch->a_log_tab[b])] : 0;
 367}
 368
 369static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
 370{
 371	return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
 372}
 373
 374static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
 375				  unsigned int b)
 376{
 377	return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 378					GF_N(bch)-bch->a_log_tab[b])] : 0;
 379}
 380
 381static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
 382{
 383	return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
 384}
 385
 386static inline unsigned int a_pow(struct bch_control *bch, int i)
 387{
 388	return bch->a_pow_tab[modulo(bch, i)];
 389}
 390
 391static inline int a_log(struct bch_control *bch, unsigned int x)
 392{
 393	return bch->a_log_tab[x];
 394}
 395
 396static inline int a_ilog(struct bch_control *bch, unsigned int x)
 397{
 398	return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
 399}
 400
 401/*
 402 * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
 403 */
 404static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
 405			      unsigned int *syn)
 406{
 407	int i, j, s;
 408	unsigned int m;
 409	uint32_t poly;
 410	const int t = GF_T(bch);
 411
 412	s = bch->ecc_bits;
 413
 414	/* make sure extra bits in last ecc word are cleared */
 415	m = ((unsigned int)s) & 31;
 416	if (m)
 417		ecc[s/32] &= ~((1u << (32-m))-1);
 418	memset(syn, 0, 2*t*sizeof(*syn));
 419
 420	/* compute v(a^j) for j=1 .. 2t-1 */
 421	do {
 422		poly = *ecc++;
 423		s -= 32;
 424		while (poly) {
 425			i = deg(poly);
 426			for (j = 0; j < 2*t; j += 2)
 427				syn[j] ^= a_pow(bch, (j+1)*(i+s));
 428
 429			poly ^= (1 << i);
 430		}
 431	} while (s > 0);
 432
 433	/* v(a^(2j)) = v(a^j)^2 */
 434	for (j = 0; j < t; j++)
 435		syn[2*j+1] = gf_sqr(bch, syn[j]);
 436}
 437
 438static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
 439{
 440	memcpy(dst, src, GF_POLY_SZ(src->deg));
 441}
 442
 443static int compute_error_locator_polynomial(struct bch_control *bch,
 444					    const unsigned int *syn)
 445{
 446	const unsigned int t = GF_T(bch);
 447	const unsigned int n = GF_N(bch);
 448	unsigned int i, j, tmp, l, pd = 1, d = syn[0];
 449	struct gf_poly *elp = bch->elp;
 450	struct gf_poly *pelp = bch->poly_2t[0];
 451	struct gf_poly *elp_copy = bch->poly_2t[1];
 452	int k, pp = -1;
 453
 454	memset(pelp, 0, GF_POLY_SZ(2*t));
 455	memset(elp, 0, GF_POLY_SZ(2*t));
 456
 457	pelp->deg = 0;
 458	pelp->c[0] = 1;
 459	elp->deg = 0;
 460	elp->c[0] = 1;
 461
 462	/* use simplified binary Berlekamp-Massey algorithm */
 463	for (i = 0; (i < t) && (elp->deg <= t); i++) {
 464		if (d) {
 465			k = 2*i-pp;
 466			gf_poly_copy(elp_copy, elp);
 467			/* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
 468			tmp = a_log(bch, d)+n-a_log(bch, pd);
 469			for (j = 0; j <= pelp->deg; j++) {
 470				if (pelp->c[j]) {
 471					l = a_log(bch, pelp->c[j]);
 472					elp->c[j+k] ^= a_pow(bch, tmp+l);
 473				}
 474			}
 475			/* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
 476			tmp = pelp->deg+k;
 477			if (tmp > elp->deg) {
 478				elp->deg = tmp;
 479				gf_poly_copy(pelp, elp_copy);
 480				pd = d;
 481				pp = 2*i;
 482			}
 483		}
 484		/* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
 485		if (i < t-1) {
 486			d = syn[2*i+2];
 487			for (j = 1; j <= elp->deg; j++)
 488				d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
 489		}
 490	}
 491	dbg("elp=%s\n", gf_poly_str(elp));
 492	return (elp->deg > t) ? -1 : (int)elp->deg;
 493}
 494
 495/*
 496 * solve a m x m linear system in GF(2) with an expected number of solutions,
 497 * and return the number of found solutions
 498 */
 499static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
 500			       unsigned int *sol, int nsol)
 501{
 502	const int m = GF_M(bch);
 503	unsigned int tmp, mask;
 504	int rem, c, r, p, k, param[BCH_MAX_M];
 505
 506	k = 0;
 507	mask = 1 << m;
 508
 509	/* Gaussian elimination */
 510	for (c = 0; c < m; c++) {
 511		rem = 0;
 512		p = c-k;
 513		/* find suitable row for elimination */
 514		for (r = p; r < m; r++) {
 515			if (rows[r] & mask) {
 516				if (r != p) {
 517					tmp = rows[r];
 518					rows[r] = rows[p];
 519					rows[p] = tmp;
 520				}
 521				rem = r+1;
 522				break;
 523			}
 524		}
 525		if (rem) {
 526			/* perform elimination on remaining rows */
 527			tmp = rows[p];
 528			for (r = rem; r < m; r++) {
 529				if (rows[r] & mask)
 530					rows[r] ^= tmp;
 531			}
 532		} else {
 533			/* elimination not needed, store defective row index */
 534			param[k++] = c;
 535		}
 536		mask >>= 1;
 537	}
 538	/* rewrite system, inserting fake parameter rows */
 539	if (k > 0) {
 540		p = k;
 541		for (r = m-1; r >= 0; r--) {
 542			if ((r > m-1-k) && rows[r])
 543				/* system has no solution */
 544				return 0;
 545
 546			rows[r] = (p && (r == param[p-1])) ?
 547				p--, 1u << (m-r) : rows[r-p];
 548		}
 549	}
 550
 551	if (nsol != (1 << k))
 552		/* unexpected number of solutions */
 553		return 0;
 554
 555	for (p = 0; p < nsol; p++) {
 556		/* set parameters for p-th solution */
 557		for (c = 0; c < k; c++)
 558			rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
 559
 560		/* compute unique solution */
 561		tmp = 0;
 562		for (r = m-1; r >= 0; r--) {
 563			mask = rows[r] & (tmp|1);
 564			tmp |= parity(mask) << (m-r);
 565		}
 566		sol[p] = tmp >> 1;
 567	}
 568	return nsol;
 569}
 570
 571/*
 572 * this function builds and solves a linear system for finding roots of a degree
 573 * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
 574 */
 575static int find_affine4_roots(struct bch_control *bch, unsigned int a,
 576			      unsigned int b, unsigned int c,
 577			      unsigned int *roots)
 578{
 579	int i, j, k;
 580	const int m = GF_M(bch);
 581	unsigned int mask = 0xff, t, rows[16] = {0,};
 582
 583	j = a_log(bch, b);
 584	k = a_log(bch, a);
 585	rows[0] = c;
 586
 587	/* build linear system to solve X^4+aX^2+bX+c = 0 */
 588	for (i = 0; i < m; i++) {
 589		rows[i+1] = bch->a_pow_tab[4*i]^
 590			(a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
 591			(b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
 592		j++;
 593		k += 2;
 594	}
 595	/*
 596	 * transpose 16x16 matrix before passing it to linear solver
 597	 * warning: this code assumes m < 16
 598	 */
 599	for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
 600		for (k = 0; k < 16; k = (k+j+1) & ~j) {
 601			t = ((rows[k] >> j)^rows[k+j]) & mask;
 602			rows[k] ^= (t << j);
 603			rows[k+j] ^= t;
 604		}
 605	}
 606	return solve_linear_system(bch, rows, roots, 4);
 607}
 608
 609/*
 610 * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
 611 */
 612static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
 613				unsigned int *roots)
 614{
 615	int n = 0;
 616
 617	if (poly->c[0])
 618		/* poly[X] = bX+c with c!=0, root=c/b */
 619		roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
 620				   bch->a_log_tab[poly->c[1]]);
 621	return n;
 622}
 623
 624/*
 625 * compute roots of a degree 2 polynomial over GF(2^m)
 626 */
 627static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
 628				unsigned int *roots)
 629{
 630	int n = 0, i, l0, l1, l2;
 631	unsigned int u, v, r;
 632
 633	if (poly->c[0] && poly->c[1]) {
 634
 635		l0 = bch->a_log_tab[poly->c[0]];
 636		l1 = bch->a_log_tab[poly->c[1]];
 637		l2 = bch->a_log_tab[poly->c[2]];
 638
 639		/* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
 640		u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
 641		/*
 642		 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
 643		 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
 644		 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
 645		 * i.e. r and r+1 are roots iff Tr(u)=0
 646		 */
 647		r = 0;
 648		v = u;
 649		while (v) {
 650			i = deg(v);
 651			r ^= bch->xi_tab[i];
 652			v ^= (1 << i);
 653		}
 654		/* verify root */
 655		if ((gf_sqr(bch, r)^r) == u) {
 656			/* reverse z=a/bX transformation and compute log(1/r) */
 657			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 658					    bch->a_log_tab[r]+l2);
 659			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 660					    bch->a_log_tab[r^1]+l2);
 661		}
 662	}
 663	return n;
 664}
 665
 666/*
 667 * compute roots of a degree 3 polynomial over GF(2^m)
 668 */
 669static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
 670				unsigned int *roots)
 671{
 672	int i, n = 0;
 673	unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
 674
 675	if (poly->c[0]) {
 676		/* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
 677		e3 = poly->c[3];
 678		c2 = gf_div(bch, poly->c[0], e3);
 679		b2 = gf_div(bch, poly->c[1], e3);
 680		a2 = gf_div(bch, poly->c[2], e3);
 681
 682		/* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
 683		c = gf_mul(bch, a2, c2);           /* c = a2c2      */
 684		b = gf_mul(bch, a2, b2)^c2;        /* b = a2b2 + c2 */
 685		a = gf_sqr(bch, a2)^b2;            /* a = a2^2 + b2 */
 686
 687		/* find the 4 roots of this affine polynomial */
 688		if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
 689			/* remove a2 from final list of roots */
 690			for (i = 0; i < 4; i++) {
 691				if (tmp[i] != a2)
 692					roots[n++] = a_ilog(bch, tmp[i]);
 693			}
 694		}
 695	}
 696	return n;
 697}
 698
 699/*
 700 * compute roots of a degree 4 polynomial over GF(2^m)
 701 */
 702static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
 703				unsigned int *roots)
 704{
 705	int i, l, n = 0;
 706	unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
 707
 708	if (poly->c[0] == 0)
 709		return 0;
 710
 711	/* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
 712	e4 = poly->c[4];
 713	d = gf_div(bch, poly->c[0], e4);
 714	c = gf_div(bch, poly->c[1], e4);
 715	b = gf_div(bch, poly->c[2], e4);
 716	a = gf_div(bch, poly->c[3], e4);
 717
 718	/* use Y=1/X transformation to get an affine polynomial */
 719	if (a) {
 720		/* first, eliminate cX by using z=X+e with ae^2+c=0 */
 721		if (c) {
 722			/* compute e such that e^2 = c/a */
 723			f = gf_div(bch, c, a);
 724			l = a_log(bch, f);
 725			l += (l & 1) ? GF_N(bch) : 0;
 726			e = a_pow(bch, l/2);
 727			/*
 728			 * use transformation z=X+e:
 729			 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
 730			 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
 731			 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
 732			 * z^4 + az^3 +     b'z^2 + d'
 733			 */
 734			d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
 735			b = gf_mul(bch, a, e)^b;
 736		}
 737		/* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
 738		if (d == 0)
 739			/* assume all roots have multiplicity 1 */
 740			return 0;
 741
 742		c2 = gf_inv(bch, d);
 743		b2 = gf_div(bch, a, d);
 744		a2 = gf_div(bch, b, d);
 745	} else {
 746		/* polynomial is already affine */
 747		c2 = d;
 748		b2 = c;
 749		a2 = b;
 750	}
 751	/* find the 4 roots of this affine polynomial */
 752	if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
 753		for (i = 0; i < 4; i++) {
 754			/* post-process roots (reverse transformations) */
 755			f = a ? gf_inv(bch, roots[i]) : roots[i];
 756			roots[i] = a_ilog(bch, f^e);
 757		}
 758		n = 4;
 759	}
 760	return n;
 761}
 762
 763/*
 764 * build monic, log-based representation of a polynomial
 765 */
 766static void gf_poly_logrep(struct bch_control *bch,
 767			   const struct gf_poly *a, int *rep)
 768{
 769	int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
 770
 771	/* represent 0 values with -1; warning, rep[d] is not set to 1 */
 772	for (i = 0; i < d; i++)
 773		rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
 774}
 775
 776/*
 777 * compute polynomial Euclidean division remainder in GF(2^m)[X]
 778 */
 779static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
 780			const struct gf_poly *b, int *rep)
 781{
 782	int la, p, m;
 783	unsigned int i, j, *c = a->c;
 784	const unsigned int d = b->deg;
 785
 786	if (a->deg < d)
 787		return;
 788
 789	/* reuse or compute log representation of denominator */
 790	if (!rep) {
 791		rep = bch->cache;
 792		gf_poly_logrep(bch, b, rep);
 793	}
 794
 795	for (j = a->deg; j >= d; j--) {
 796		if (c[j]) {
 797			la = a_log(bch, c[j]);
 798			p = j-d;
 799			for (i = 0; i < d; i++, p++) {
 800				m = rep[i];
 801				if (m >= 0)
 802					c[p] ^= bch->a_pow_tab[mod_s(bch,
 803								     m+la)];
 804			}
 805		}
 806	}
 807	a->deg = d-1;
 808	while (!c[a->deg] && a->deg)
 809		a->deg--;
 810}
 811
 812/*
 813 * compute polynomial Euclidean division quotient in GF(2^m)[X]
 814 */
 815static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
 816			const struct gf_poly *b, struct gf_poly *q)
 817{
 818	if (a->deg >= b->deg) {
 819		q->deg = a->deg-b->deg;
 820		/* compute a mod b (modifies a) */
 821		gf_poly_mod(bch, a, b, NULL);
 822		/* quotient is stored in upper part of polynomial a */
 823		memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
 824	} else {
 825		q->deg = 0;
 826		q->c[0] = 0;
 827	}
 828}
 829
 830/*
 831 * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
 832 */
 833static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
 834				   struct gf_poly *b)
 835{
 836	struct gf_poly *tmp;
 837
 838	dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
 839
 840	if (a->deg < b->deg) {
 841		tmp = b;
 842		b = a;
 843		a = tmp;
 844	}
 845
 846	while (b->deg > 0) {
 847		gf_poly_mod(bch, a, b, NULL);
 848		tmp = b;
 849		b = a;
 850		a = tmp;
 851	}
 852
 853	dbg("%s\n", gf_poly_str(a));
 854
 855	return a;
 856}
 857
 858/*
 859 * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
 860 * This is used in Berlekamp Trace algorithm for splitting polynomials
 861 */
 862static void compute_trace_bk_mod(struct bch_control *bch, int k,
 863				 const struct gf_poly *f, struct gf_poly *z,
 864				 struct gf_poly *out)
 865{
 866	const int m = GF_M(bch);
 867	int i, j;
 868
 869	/* z contains z^2j mod f */
 870	z->deg = 1;
 871	z->c[0] = 0;
 872	z->c[1] = bch->a_pow_tab[k];
 873
 874	out->deg = 0;
 875	memset(out, 0, GF_POLY_SZ(f->deg));
 876
 877	/* compute f log representation only once */
 878	gf_poly_logrep(bch, f, bch->cache);
 879
 880	for (i = 0; i < m; i++) {
 881		/* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
 882		for (j = z->deg; j >= 0; j--) {
 883			out->c[j] ^= z->c[j];
 884			z->c[2*j] = gf_sqr(bch, z->c[j]);
 885			z->c[2*j+1] = 0;
 886		}
 887		if (z->deg > out->deg)
 888			out->deg = z->deg;
 889
 890		if (i < m-1) {
 891			z->deg *= 2;
 892			/* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
 893			gf_poly_mod(bch, z, f, bch->cache);
 894		}
 895	}
 896	while (!out->c[out->deg] && out->deg)
 897		out->deg--;
 898
 899	dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
 900}
 901
 902/*
 903 * factor a polynomial using Berlekamp Trace algorithm (BTA)
 904 */
 905static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
 906			      struct gf_poly **g, struct gf_poly **h)
 907{
 908	struct gf_poly *f2 = bch->poly_2t[0];
 909	struct gf_poly *q  = bch->poly_2t[1];
 910	struct gf_poly *tk = bch->poly_2t[2];
 911	struct gf_poly *z  = bch->poly_2t[3];
 912	struct gf_poly *gcd;
 913
 914	dbg("factoring %s...\n", gf_poly_str(f));
 915
 916	*g = f;
 917	*h = NULL;
 918
 919	/* tk = Tr(a^k.X) mod f */
 920	compute_trace_bk_mod(bch, k, f, z, tk);
 921
 922	if (tk->deg > 0) {
 923		/* compute g = gcd(f, tk) (destructive operation) */
 924		gf_poly_copy(f2, f);
 925		gcd = gf_poly_gcd(bch, f2, tk);
 926		if (gcd->deg < f->deg) {
 927			/* compute h=f/gcd(f,tk); this will modify f and q */
 928			gf_poly_div(bch, f, gcd, q);
 929			/* store g and h in-place (clobbering f) */
 930			*h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
 931			gf_poly_copy(*g, gcd);
 932			gf_poly_copy(*h, q);
 933		}
 934	}
 935}
 936
 937/*
 938 * find roots of a polynomial, using BTZ algorithm; see the beginning of this
 939 * file for details
 940 */
 941static int find_poly_roots(struct bch_control *bch, unsigned int k,
 942			   struct gf_poly *poly, unsigned int *roots)
 943{
 944	int cnt;
 945	struct gf_poly *f1, *f2;
 946
 947	switch (poly->deg) {
 948		/* handle low degree polynomials with ad hoc techniques */
 949	case 1:
 950		cnt = find_poly_deg1_roots(bch, poly, roots);
 951		break;
 952	case 2:
 953		cnt = find_poly_deg2_roots(bch, poly, roots);
 954		break;
 955	case 3:
 956		cnt = find_poly_deg3_roots(bch, poly, roots);
 957		break;
 958	case 4:
 959		cnt = find_poly_deg4_roots(bch, poly, roots);
 960		break;
 961	default:
 962		/* factor polynomial using Berlekamp Trace Algorithm (BTA) */
 963		cnt = 0;
 964		if (poly->deg && (k <= GF_M(bch))) {
 965			factor_polynomial(bch, k, poly, &f1, &f2);
 966			if (f1)
 967				cnt += find_poly_roots(bch, k+1, f1, roots);
 968			if (f2)
 969				cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
 970		}
 971		break;
 972	}
 973	return cnt;
 974}
 975
 976#if defined(USE_CHIEN_SEARCH)
 977/*
 978 * exhaustive root search (Chien) implementation - not used, included only for
 979 * reference/comparison tests
 980 */
 981static int chien_search(struct bch_control *bch, unsigned int len,
 982			struct gf_poly *p, unsigned int *roots)
 983{
 984	int m;
 985	unsigned int i, j, syn, syn0, count = 0;
 986	const unsigned int k = 8*len+bch->ecc_bits;
 987
 988	/* use a log-based representation of polynomial */
 989	gf_poly_logrep(bch, p, bch->cache);
 990	bch->cache[p->deg] = 0;
 991	syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
 992
 993	for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
 994		/* compute elp(a^i) */
 995		for (j = 1, syn = syn0; j <= p->deg; j++) {
 996			m = bch->cache[j];
 997			if (m >= 0)
 998				syn ^= a_pow(bch, m+j*i);
 999		}
1000		if (syn == 0) {
1001			roots[count++] = GF_N(bch)-i;
1002			if (count == p->deg)
1003				break;
1004		}
1005	}
1006	return (count == p->deg) ? count : 0;
1007}
1008#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
1009#endif /* USE_CHIEN_SEARCH */
1010
1011/**
1012 * bch_decode - decode received codeword and find bit error locations
1013 * @bch:      BCH control structure
1014 * @data:     received data, ignored if @calc_ecc is provided
1015 * @len:      data length in bytes, must always be provided
1016 * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
1017 * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
1018 * @syn:      hw computed syndrome data (if NULL, syndrome is calculated)
1019 * @errloc:   output array of error locations
1020 *
1021 * Returns:
1022 *  The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
1023 *  invalid parameters were provided
1024 *
1025 * Depending on the available hw BCH support and the need to compute @calc_ecc
1026 * separately (using bch_encode()), this function should be called with one of
1027 * the following parameter configurations -
1028 *
1029 * by providing @data and @recv_ecc only:
1030 *   bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
1031 *
1032 * by providing @recv_ecc and @calc_ecc:
1033 *   bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
1034 *
1035 * by providing ecc = recv_ecc XOR calc_ecc:
1036 *   bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
1037 *
1038 * by providing syndrome results @syn:
1039 *   bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
1040 *
1041 * Once bch_decode() has successfully returned with a positive value, error
1042 * locations returned in array @errloc should be interpreted as follows -
1043 *
1044 * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1045 * data correction)
1046 *
1047 * if (errloc[n] < 8*len), then n-th error is located in data and can be
1048 * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1049 *
1050 * Note that this function does not perform any data correction by itself, it
1051 * merely indicates error locations.
1052 */
1053int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len,
1054	       const uint8_t *recv_ecc, const uint8_t *calc_ecc,
1055	       const unsigned int *syn, unsigned int *errloc)
1056{
1057	const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1058	unsigned int nbits;
1059	int i, err, nroots;
1060	uint32_t sum;
1061
1062	/* sanity check: make sure data length can be handled */
1063	if (8*len > (bch->n-bch->ecc_bits))
1064		return -EINVAL;
1065
1066	/* if caller does not provide syndromes, compute them */
1067	if (!syn) {
1068		if (!calc_ecc) {
1069			/* compute received data ecc into an internal buffer */
1070			if (!data || !recv_ecc)
1071				return -EINVAL;
1072			bch_encode(bch, data, len, NULL);
1073		} else {
1074			/* load provided calculated ecc */
1075			load_ecc8(bch, bch->ecc_buf, calc_ecc);
1076		}
1077		/* load received ecc or assume it was XORed in calc_ecc */
1078		if (recv_ecc) {
1079			load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1080			/* XOR received and calculated ecc */
1081			for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1082				bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1083				sum |= bch->ecc_buf[i];
1084			}
1085			if (!sum)
1086				/* no error found */
1087				return 0;
1088		}
1089		compute_syndromes(bch, bch->ecc_buf, bch->syn);
1090		syn = bch->syn;
1091	}
1092
1093	err = compute_error_locator_polynomial(bch, syn);
1094	if (err > 0) {
1095		nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1096		if (err != nroots)
1097			err = -1;
1098	}
1099	if (err > 0) {
1100		/* post-process raw error locations for easier correction */
1101		nbits = (len*8)+bch->ecc_bits;
1102		for (i = 0; i < err; i++) {
1103			if (errloc[i] >= nbits) {
1104				err = -1;
1105				break;
1106			}
1107			errloc[i] = nbits-1-errloc[i];
1108			if (!bch->swap_bits)
1109				errloc[i] = (errloc[i] & ~7) |
1110					    (7-(errloc[i] & 7));
1111		}
1112	}
1113	return (err >= 0) ? err : -EBADMSG;
1114}
1115EXPORT_SYMBOL_GPL(bch_decode);
1116
1117/*
1118 * generate Galois field lookup tables
1119 */
1120static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1121{
1122	unsigned int i, x = 1;
1123	const unsigned int k = 1 << deg(poly);
1124
1125	/* primitive polynomial must be of degree m */
1126	if (k != (1u << GF_M(bch)))
1127		return -1;
1128
1129	for (i = 0; i < GF_N(bch); i++) {
1130		bch->a_pow_tab[i] = x;
1131		bch->a_log_tab[x] = i;
1132		if (i && (x == 1))
1133			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1134			return -1;
1135		x <<= 1;
1136		if (x & k)
1137			x ^= poly;
1138	}
1139	bch->a_pow_tab[GF_N(bch)] = 1;
1140	bch->a_log_tab[0] = 0;
1141
1142	return 0;
1143}
1144
1145/*
1146 * compute generator polynomial remainder tables for fast encoding
1147 */
1148static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1149{
1150	int i, j, b, d;
1151	uint32_t data, hi, lo, *tab;
1152	const int l = BCH_ECC_WORDS(bch);
1153	const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1154	const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1155
1156	memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1157
1158	for (i = 0; i < 256; i++) {
1159		/* p(X)=i is a small polynomial of weight <= 8 */
1160		for (b = 0; b < 4; b++) {
1161			/* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1162			tab = bch->mod8_tab + (b*256+i)*l;
1163			data = i << (8*b);
1164			while (data) {
1165				d = deg(data);
1166				/* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1167				data ^= g[0] >> (31-d);
1168				for (j = 0; j < ecclen; j++) {
1169					hi = (d < 31) ? g[j] << (d+1) : 0;
1170					lo = (j+1 < plen) ?
1171						g[j+1] >> (31-d) : 0;
1172					tab[j] ^= hi|lo;
1173				}
1174			}
1175		}
1176	}
1177}
1178
1179/*
1180 * build a base for factoring degree 2 polynomials
1181 */
1182static int build_deg2_base(struct bch_control *bch)
1183{
1184	const int m = GF_M(bch);
1185	int i, j, r;
1186	unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M];
1187
1188	/* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1189	for (i = 0; i < m; i++) {
1190		for (j = 0, sum = 0; j < m; j++)
1191			sum ^= a_pow(bch, i*(1 << j));
1192
1193		if (sum) {
1194			ak = bch->a_pow_tab[i];
1195			break;
1196		}
1197	}
1198	/* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1199	remaining = m;
1200	memset(xi, 0, sizeof(xi));
1201
1202	for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1203		y = gf_sqr(bch, x)^x;
1204		for (i = 0; i < 2; i++) {
1205			r = a_log(bch, y);
1206			if (y && (r < m) && !xi[r]) {
1207				bch->xi_tab[r] = x;
1208				xi[r] = 1;
1209				remaining--;
1210				dbg("x%d = %x\n", r, x);
1211				break;
1212			}
1213			y ^= ak;
1214		}
1215	}
1216	/* should not happen but check anyway */
1217	return remaining ? -1 : 0;
1218}
1219
1220static void *bch_alloc(size_t size, int *err)
1221{
1222	void *ptr;
1223
1224	ptr = kmalloc(size, GFP_KERNEL);
1225	if (ptr == NULL)
1226		*err = 1;
1227	return ptr;
1228}
1229
1230/*
1231 * compute generator polynomial for given (m,t) parameters.
1232 */
1233static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1234{
1235	const unsigned int m = GF_M(bch);
1236	const unsigned int t = GF_T(bch);
1237	int n, err = 0;
1238	unsigned int i, j, nbits, r, word, *roots;
1239	struct gf_poly *g;
1240	uint32_t *genpoly;
1241
1242	g = bch_alloc(GF_POLY_SZ(m*t), &err);
1243	roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1244	genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1245
1246	if (err) {
1247		kfree(genpoly);
1248		genpoly = NULL;
1249		goto finish;
1250	}
1251
1252	/* enumerate all roots of g(X) */
1253	memset(roots , 0, (bch->n+1)*sizeof(*roots));
1254	for (i = 0; i < t; i++) {
1255		for (j = 0, r = 2*i+1; j < m; j++) {
1256			roots[r] = 1;
1257			r = mod_s(bch, 2*r);
1258		}
1259	}
1260	/* build generator polynomial g(X) */
1261	g->deg = 0;
1262	g->c[0] = 1;
1263	for (i = 0; i < GF_N(bch); i++) {
1264		if (roots[i]) {
1265			/* multiply g(X) by (X+root) */
1266			r = bch->a_pow_tab[i];
1267			g->c[g->deg+1] = 1;
1268			for (j = g->deg; j > 0; j--)
1269				g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1270
1271			g->c[0] = gf_mul(bch, g->c[0], r);
1272			g->deg++;
1273		}
1274	}
1275	/* store left-justified binary representation of g(X) */
1276	n = g->deg+1;
1277	i = 0;
1278
1279	while (n > 0) {
1280		nbits = (n > 32) ? 32 : n;
1281		for (j = 0, word = 0; j < nbits; j++) {
1282			if (g->c[n-1-j])
1283				word |= 1u << (31-j);
1284		}
1285		genpoly[i++] = word;
1286		n -= nbits;
1287	}
1288	bch->ecc_bits = g->deg;
1289
1290finish:
1291	kfree(g);
1292	kfree(roots);
1293
1294	return genpoly;
1295}
1296
1297/**
1298 * bch_init - initialize a BCH encoder/decoder
1299 * @m:          Galois field order, should be in the range 5-15
1300 * @t:          maximum error correction capability, in bits
1301 * @prim_poly:  user-provided primitive polynomial (or 0 to use default)
1302 * @swap_bits:  swap bits within data and syndrome bytes
1303 *
1304 * Returns:
1305 *  a newly allocated BCH control structure if successful, NULL otherwise
1306 *
1307 * This initialization can take some time, as lookup tables are built for fast
1308 * encoding/decoding; make sure not to call this function from a time critical
1309 * path. Usually, bch_init() should be called on module/driver init and
1310 * bch_free() should be called to release memory on exit.
1311 *
1312 * You may provide your own primitive polynomial of degree @m in argument
1313 * @prim_poly, or let bch_init() use its default polynomial.
1314 *
1315 * Once bch_init() has successfully returned a pointer to a newly allocated
1316 * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1317 * the structure.
1318 */
1319struct bch_control *bch_init(int m, int t, unsigned int prim_poly,
1320			     bool swap_bits)
1321{
1322	int err = 0;
1323	unsigned int i, words;
1324	uint32_t *genpoly;
1325	struct bch_control *bch = NULL;
1326
1327	const int min_m = 5;
1328
1329	/* default primitive polynomials */
1330	static const unsigned int prim_poly_tab[] = {
1331		0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1332		0x402b, 0x8003,
1333	};
1334
1335#if defined(CONFIG_BCH_CONST_PARAMS)
1336	if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1337		printk(KERN_ERR "bch encoder/decoder was configured to support "
1338		       "parameters m=%d, t=%d only!\n",
1339		       CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1340		goto fail;
1341	}
1342#endif
1343	if ((m < min_m) || (m > BCH_MAX_M))
1344		/*
1345		 * values of m greater than 15 are not currently supported;
1346		 * supporting m > 15 would require changing table base type
1347		 * (uint16_t) and a small patch in matrix transposition
1348		 */
1349		goto fail;
1350
1351	if (t > BCH_MAX_T)
1352		/*
1353		 * we can support larger than 64 bits if necessary, at the
1354		 * cost of higher stack usage.
1355		 */
1356		goto fail;
1357
1358	/* sanity checks */
1359	if ((t < 1) || (m*t >= ((1 << m)-1)))
1360		/* invalid t value */
1361		goto fail;
1362
1363	/* select a primitive polynomial for generating GF(2^m) */
1364	if (prim_poly == 0)
1365		prim_poly = prim_poly_tab[m-min_m];
1366
1367	bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1368	if (bch == NULL)
1369		goto fail;
1370
1371	bch->m = m;
1372	bch->t = t;
1373	bch->n = (1 << m)-1;
1374	words  = DIV_ROUND_UP(m*t, 32);
1375	bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1376	bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1377	bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1378	bch->mod8_tab  = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1379	bch->ecc_buf   = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1380	bch->ecc_buf2  = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1381	bch->xi_tab    = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1382	bch->syn       = bch_alloc(2*t*sizeof(*bch->syn), &err);
1383	bch->cache     = bch_alloc(2*t*sizeof(*bch->cache), &err);
1384	bch->elp       = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
1385	bch->swap_bits = swap_bits;
1386
1387	for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1388		bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1389
1390	if (err)
1391		goto fail;
1392
1393	err = build_gf_tables(bch, prim_poly);
1394	if (err)
1395		goto fail;
1396
1397	/* use generator polynomial for computing encoding tables */
1398	genpoly = compute_generator_polynomial(bch);
1399	if (genpoly == NULL)
1400		goto fail;
1401
1402	build_mod8_tables(bch, genpoly);
1403	kfree(genpoly);
1404
1405	err = build_deg2_base(bch);
1406	if (err)
1407		goto fail;
1408
1409	return bch;
1410
1411fail:
1412	bch_free(bch);
1413	return NULL;
1414}
1415EXPORT_SYMBOL_GPL(bch_init);
1416
1417/**
1418 *  bch_free - free the BCH control structure
1419 *  @bch:    BCH control structure to release
1420 */
1421void bch_free(struct bch_control *bch)
1422{
1423	unsigned int i;
1424
1425	if (bch) {
1426		kfree(bch->a_pow_tab);
1427		kfree(bch->a_log_tab);
1428		kfree(bch->mod8_tab);
1429		kfree(bch->ecc_buf);
1430		kfree(bch->ecc_buf2);
1431		kfree(bch->xi_tab);
1432		kfree(bch->syn);
1433		kfree(bch->cache);
1434		kfree(bch->elp);
1435
1436		for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1437			kfree(bch->poly_2t[i]);
1438
1439		kfree(bch);
1440	}
1441}
1442EXPORT_SYMBOL_GPL(bch_free);
1443
1444MODULE_LICENSE("GPL");
1445MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1446MODULE_DESCRIPTION("Binary BCH encoder/decoder");