Linux Audio

Check our new training course

Loading...
v4.6
   1/*
   2 * Generic binary BCH encoding/decoding library
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms of the GNU General Public License version 2 as published by
   6 * the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc., 51
  15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  16 *
  17 * Copyright © 2011 Parrot S.A.
  18 *
  19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
  20 *
  21 * Description:
  22 *
  23 * This library provides runtime configurable encoding/decoding of binary
  24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
  25 *
  26 * Call init_bch to get a pointer to a newly allocated bch_control structure for
  27 * the given m (Galois field order), t (error correction capability) and
  28 * (optional) primitive polynomial parameters.
  29 *
  30 * Call encode_bch to compute and store ecc parity bytes to a given buffer.
  31 * Call decode_bch to detect and locate errors in received data.
  32 *
  33 * On systems supporting hw BCH features, intermediate results may be provided
  34 * to decode_bch in order to skip certain steps. See decode_bch() documentation
  35 * for details.
  36 *
  37 * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
  38 * parameters m and t; thus allowing extra compiler optimizations and providing
  39 * better (up to 2x) encoding performance. Using this option makes sense when
  40 * (m,t) are fixed and known in advance, e.g. when using BCH error correction
  41 * on a particular NAND flash device.
  42 *
  43 * Algorithmic details:
  44 *
  45 * Encoding is performed by processing 32 input bits in parallel, using 4
  46 * remainder lookup tables.
  47 *
  48 * The final stage of decoding involves the following internal steps:
  49 * a. Syndrome computation
  50 * b. Error locator polynomial computation using Berlekamp-Massey algorithm
  51 * c. Error locator root finding (by far the most expensive step)
  52 *
  53 * In this implementation, step c is not performed using the usual Chien search.
  54 * Instead, an alternative approach described in [1] is used. It consists in
  55 * factoring the error locator polynomial using the Berlekamp Trace algorithm
  56 * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
  57 * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
  58 * much better performance than Chien search for usual (m,t) values (typically
  59 * m >= 13, t < 32, see [1]).
  60 *
  61 * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
  62 * of characteristic 2, in: Western European Workshop on Research in Cryptology
  63 * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
  64 * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
  65 * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
  66 */
  67
  68#include <linux/kernel.h>
  69#include <linux/errno.h>
  70#include <linux/init.h>
  71#include <linux/module.h>
  72#include <linux/slab.h>
  73#include <linux/bitops.h>
  74#include <asm/byteorder.h>
  75#include <linux/bch.h>
  76
  77#if defined(CONFIG_BCH_CONST_PARAMS)
  78#define GF_M(_p)               (CONFIG_BCH_CONST_M)
  79#define GF_T(_p)               (CONFIG_BCH_CONST_T)
  80#define GF_N(_p)               ((1 << (CONFIG_BCH_CONST_M))-1)
 
 
  81#else
  82#define GF_M(_p)               ((_p)->m)
  83#define GF_T(_p)               ((_p)->t)
  84#define GF_N(_p)               ((_p)->n)
 
 
  85#endif
  86
  87#define BCH_ECC_WORDS(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
  88#define BCH_ECC_BYTES(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
  89
 
 
  90#ifndef dbg
  91#define dbg(_fmt, args...)     do {} while (0)
  92#endif
  93
  94/*
  95 * represent a polynomial over GF(2^m)
  96 */
  97struct gf_poly {
  98	unsigned int deg;    /* polynomial degree */
  99	unsigned int c[0];   /* polynomial terms */
 100};
 101
 102/* given its degree, compute a polynomial size in bytes */
 103#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
 104
 105/* polynomial of degree 1 */
 106struct gf_poly_deg1 {
 107	struct gf_poly poly;
 108	unsigned int   c[2];
 109};
 110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 111/*
 112 * same as encode_bch(), but process input data one byte at a time
 113 */
 114static void encode_bch_unaligned(struct bch_control *bch,
 115				 const unsigned char *data, unsigned int len,
 116				 uint32_t *ecc)
 117{
 118	int i;
 119	const uint32_t *p;
 120	const int l = BCH_ECC_WORDS(bch)-1;
 121
 122	while (len--) {
 123		p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(*data++)) & 0xff);
 
 
 124
 125		for (i = 0; i < l; i++)
 126			ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
 127
 128		ecc[l] = (ecc[l] << 8)^(*p);
 129	}
 130}
 131
 132/*
 133 * convert ecc bytes to aligned, zero-padded 32-bit ecc words
 134 */
 135static void load_ecc8(struct bch_control *bch, uint32_t *dst,
 136		      const uint8_t *src)
 137{
 138	uint8_t pad[4] = {0, 0, 0, 0};
 139	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 140
 141	for (i = 0; i < nwords; i++, src += 4)
 142		dst[i] = (src[0] << 24)|(src[1] << 16)|(src[2] << 8)|src[3];
 
 
 
 143
 144	memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
 145	dst[nwords] = (pad[0] << 24)|(pad[1] << 16)|(pad[2] << 8)|pad[3];
 
 
 
 146}
 147
 148/*
 149 * convert 32-bit ecc words to ecc bytes
 150 */
 151static void store_ecc8(struct bch_control *bch, uint8_t *dst,
 152		       const uint32_t *src)
 153{
 154	uint8_t pad[4];
 155	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 156
 157	for (i = 0; i < nwords; i++) {
 158		*dst++ = (src[i] >> 24);
 159		*dst++ = (src[i] >> 16) & 0xff;
 160		*dst++ = (src[i] >>  8) & 0xff;
 161		*dst++ = (src[i] >>  0) & 0xff;
 162	}
 163	pad[0] = (src[nwords] >> 24);
 164	pad[1] = (src[nwords] >> 16) & 0xff;
 165	pad[2] = (src[nwords] >>  8) & 0xff;
 166	pad[3] = (src[nwords] >>  0) & 0xff;
 167	memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
 168}
 169
 170/**
 171 * encode_bch - calculate BCH ecc parity of data
 172 * @bch:   BCH control structure
 173 * @data:  data to encode
 174 * @len:   data length in bytes
 175 * @ecc:   ecc parity data, must be initialized by caller
 176 *
 177 * The @ecc parity array is used both as input and output parameter, in order to
 178 * allow incremental computations. It should be of the size indicated by member
 179 * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
 180 *
 181 * The exact number of computed ecc parity bits is given by member @ecc_bits of
 182 * @bch; it may be less than m*t for large values of t.
 183 */
 184void encode_bch(struct bch_control *bch, const uint8_t *data,
 185		unsigned int len, uint8_t *ecc)
 186{
 187	const unsigned int l = BCH_ECC_WORDS(bch)-1;
 188	unsigned int i, mlen;
 189	unsigned long m;
 190	uint32_t w, r[l+1];
 
 191	const uint32_t * const tab0 = bch->mod8_tab;
 192	const uint32_t * const tab1 = tab0 + 256*(l+1);
 193	const uint32_t * const tab2 = tab1 + 256*(l+1);
 194	const uint32_t * const tab3 = tab2 + 256*(l+1);
 195	const uint32_t *pdata, *p0, *p1, *p2, *p3;
 196
 
 
 
 197	if (ecc) {
 198		/* load ecc parity bytes into internal 32-bit buffer */
 199		load_ecc8(bch, bch->ecc_buf, ecc);
 200	} else {
 201		memset(bch->ecc_buf, 0, sizeof(r));
 202	}
 203
 204	/* process first unaligned data bytes */
 205	m = ((unsigned long)data) & 3;
 206	if (m) {
 207		mlen = (len < (4-m)) ? len : 4-m;
 208		encode_bch_unaligned(bch, data, mlen, bch->ecc_buf);
 209		data += mlen;
 210		len  -= mlen;
 211	}
 212
 213	/* process 32-bit aligned data words */
 214	pdata = (uint32_t *)data;
 215	mlen  = len/4;
 216	data += 4*mlen;
 217	len  -= 4*mlen;
 218	memcpy(r, bch->ecc_buf, sizeof(r));
 219
 220	/*
 221	 * split each 32-bit word into 4 polynomials of weight 8 as follows:
 222	 *
 223	 * 31 ...24  23 ...16  15 ... 8  7 ... 0
 224	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt
 225	 *                               tttttttt  mod g = r0 (precomputed)
 226	 *                     zzzzzzzz  00000000  mod g = r1 (precomputed)
 227	 *           yyyyyyyy  00000000  00000000  mod g = r2 (precomputed)
 228	 * xxxxxxxx  00000000  00000000  00000000  mod g = r3 (precomputed)
 229	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt  mod g = r0^r1^r2^r3
 230	 */
 231	while (mlen--) {
 232		/* input data is read in big-endian format */
 233		w = r[0]^cpu_to_be32(*pdata++);
 
 
 
 
 
 
 234		p0 = tab0 + (l+1)*((w >>  0) & 0xff);
 235		p1 = tab1 + (l+1)*((w >>  8) & 0xff);
 236		p2 = tab2 + (l+1)*((w >> 16) & 0xff);
 237		p3 = tab3 + (l+1)*((w >> 24) & 0xff);
 238
 239		for (i = 0; i < l; i++)
 240			r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
 241
 242		r[l] = p0[l]^p1[l]^p2[l]^p3[l];
 243	}
 244	memcpy(bch->ecc_buf, r, sizeof(r));
 245
 246	/* process last unaligned bytes */
 247	if (len)
 248		encode_bch_unaligned(bch, data, len, bch->ecc_buf);
 249
 250	/* store ecc parity bytes into original parity buffer */
 251	if (ecc)
 252		store_ecc8(bch, ecc, bch->ecc_buf);
 253}
 254EXPORT_SYMBOL_GPL(encode_bch);
 255
 256static inline int modulo(struct bch_control *bch, unsigned int v)
 257{
 258	const unsigned int n = GF_N(bch);
 259	while (v >= n) {
 260		v -= n;
 261		v = (v & n) + (v >> GF_M(bch));
 262	}
 263	return v;
 264}
 265
 266/*
 267 * shorter and faster modulo function, only works when v < 2N.
 268 */
 269static inline int mod_s(struct bch_control *bch, unsigned int v)
 270{
 271	const unsigned int n = GF_N(bch);
 272	return (v < n) ? v : v-n;
 273}
 274
 275static inline int deg(unsigned int poly)
 276{
 277	/* polynomial degree is the most-significant bit index */
 278	return fls(poly)-1;
 279}
 280
 281static inline int parity(unsigned int x)
 282{
 283	/*
 284	 * public domain code snippet, lifted from
 285	 * http://www-graphics.stanford.edu/~seander/bithacks.html
 286	 */
 287	x ^= x >> 1;
 288	x ^= x >> 2;
 289	x = (x & 0x11111111U) * 0x11111111U;
 290	return (x >> 28) & 1;
 291}
 292
 293/* Galois field basic operations: multiply, divide, inverse, etc. */
 294
 295static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
 296				  unsigned int b)
 297{
 298	return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 299					       bch->a_log_tab[b])] : 0;
 300}
 301
 302static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
 303{
 304	return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
 305}
 306
 307static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
 308				  unsigned int b)
 309{
 310	return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 311					GF_N(bch)-bch->a_log_tab[b])] : 0;
 312}
 313
 314static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
 315{
 316	return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
 317}
 318
 319static inline unsigned int a_pow(struct bch_control *bch, int i)
 320{
 321	return bch->a_pow_tab[modulo(bch, i)];
 322}
 323
 324static inline int a_log(struct bch_control *bch, unsigned int x)
 325{
 326	return bch->a_log_tab[x];
 327}
 328
 329static inline int a_ilog(struct bch_control *bch, unsigned int x)
 330{
 331	return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
 332}
 333
 334/*
 335 * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
 336 */
 337static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
 338			      unsigned int *syn)
 339{
 340	int i, j, s;
 341	unsigned int m;
 342	uint32_t poly;
 343	const int t = GF_T(bch);
 344
 345	s = bch->ecc_bits;
 346
 347	/* make sure extra bits in last ecc word are cleared */
 348	m = ((unsigned int)s) & 31;
 349	if (m)
 350		ecc[s/32] &= ~((1u << (32-m))-1);
 351	memset(syn, 0, 2*t*sizeof(*syn));
 352
 353	/* compute v(a^j) for j=1 .. 2t-1 */
 354	do {
 355		poly = *ecc++;
 356		s -= 32;
 357		while (poly) {
 358			i = deg(poly);
 359			for (j = 0; j < 2*t; j += 2)
 360				syn[j] ^= a_pow(bch, (j+1)*(i+s));
 361
 362			poly ^= (1 << i);
 363		}
 364	} while (s > 0);
 365
 366	/* v(a^(2j)) = v(a^j)^2 */
 367	for (j = 0; j < t; j++)
 368		syn[2*j+1] = gf_sqr(bch, syn[j]);
 369}
 370
 371static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
 372{
 373	memcpy(dst, src, GF_POLY_SZ(src->deg));
 374}
 375
 376static int compute_error_locator_polynomial(struct bch_control *bch,
 377					    const unsigned int *syn)
 378{
 379	const unsigned int t = GF_T(bch);
 380	const unsigned int n = GF_N(bch);
 381	unsigned int i, j, tmp, l, pd = 1, d = syn[0];
 382	struct gf_poly *elp = bch->elp;
 383	struct gf_poly *pelp = bch->poly_2t[0];
 384	struct gf_poly *elp_copy = bch->poly_2t[1];
 385	int k, pp = -1;
 386
 387	memset(pelp, 0, GF_POLY_SZ(2*t));
 388	memset(elp, 0, GF_POLY_SZ(2*t));
 389
 390	pelp->deg = 0;
 391	pelp->c[0] = 1;
 392	elp->deg = 0;
 393	elp->c[0] = 1;
 394
 395	/* use simplified binary Berlekamp-Massey algorithm */
 396	for (i = 0; (i < t) && (elp->deg <= t); i++) {
 397		if (d) {
 398			k = 2*i-pp;
 399			gf_poly_copy(elp_copy, elp);
 400			/* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
 401			tmp = a_log(bch, d)+n-a_log(bch, pd);
 402			for (j = 0; j <= pelp->deg; j++) {
 403				if (pelp->c[j]) {
 404					l = a_log(bch, pelp->c[j]);
 405					elp->c[j+k] ^= a_pow(bch, tmp+l);
 406				}
 407			}
 408			/* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
 409			tmp = pelp->deg+k;
 410			if (tmp > elp->deg) {
 411				elp->deg = tmp;
 412				gf_poly_copy(pelp, elp_copy);
 413				pd = d;
 414				pp = 2*i;
 415			}
 416		}
 417		/* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
 418		if (i < t-1) {
 419			d = syn[2*i+2];
 420			for (j = 1; j <= elp->deg; j++)
 421				d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
 422		}
 423	}
 424	dbg("elp=%s\n", gf_poly_str(elp));
 425	return (elp->deg > t) ? -1 : (int)elp->deg;
 426}
 427
 428/*
 429 * solve a m x m linear system in GF(2) with an expected number of solutions,
 430 * and return the number of found solutions
 431 */
 432static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
 433			       unsigned int *sol, int nsol)
 434{
 435	const int m = GF_M(bch);
 436	unsigned int tmp, mask;
 437	int rem, c, r, p, k, param[m];
 438
 439	k = 0;
 440	mask = 1 << m;
 441
 442	/* Gaussian elimination */
 443	for (c = 0; c < m; c++) {
 444		rem = 0;
 445		p = c-k;
 446		/* find suitable row for elimination */
 447		for (r = p; r < m; r++) {
 448			if (rows[r] & mask) {
 449				if (r != p) {
 450					tmp = rows[r];
 451					rows[r] = rows[p];
 452					rows[p] = tmp;
 453				}
 454				rem = r+1;
 455				break;
 456			}
 457		}
 458		if (rem) {
 459			/* perform elimination on remaining rows */
 460			tmp = rows[p];
 461			for (r = rem; r < m; r++) {
 462				if (rows[r] & mask)
 463					rows[r] ^= tmp;
 464			}
 465		} else {
 466			/* elimination not needed, store defective row index */
 467			param[k++] = c;
 468		}
 469		mask >>= 1;
 470	}
 471	/* rewrite system, inserting fake parameter rows */
 472	if (k > 0) {
 473		p = k;
 474		for (r = m-1; r >= 0; r--) {
 475			if ((r > m-1-k) && rows[r])
 476				/* system has no solution */
 477				return 0;
 478
 479			rows[r] = (p && (r == param[p-1])) ?
 480				p--, 1u << (m-r) : rows[r-p];
 481		}
 482	}
 483
 484	if (nsol != (1 << k))
 485		/* unexpected number of solutions */
 486		return 0;
 487
 488	for (p = 0; p < nsol; p++) {
 489		/* set parameters for p-th solution */
 490		for (c = 0; c < k; c++)
 491			rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
 492
 493		/* compute unique solution */
 494		tmp = 0;
 495		for (r = m-1; r >= 0; r--) {
 496			mask = rows[r] & (tmp|1);
 497			tmp |= parity(mask) << (m-r);
 498		}
 499		sol[p] = tmp >> 1;
 500	}
 501	return nsol;
 502}
 503
 504/*
 505 * this function builds and solves a linear system for finding roots of a degree
 506 * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
 507 */
 508static int find_affine4_roots(struct bch_control *bch, unsigned int a,
 509			      unsigned int b, unsigned int c,
 510			      unsigned int *roots)
 511{
 512	int i, j, k;
 513	const int m = GF_M(bch);
 514	unsigned int mask = 0xff, t, rows[16] = {0,};
 515
 516	j = a_log(bch, b);
 517	k = a_log(bch, a);
 518	rows[0] = c;
 519
 520	/* buid linear system to solve X^4+aX^2+bX+c = 0 */
 521	for (i = 0; i < m; i++) {
 522		rows[i+1] = bch->a_pow_tab[4*i]^
 523			(a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
 524			(b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
 525		j++;
 526		k += 2;
 527	}
 528	/*
 529	 * transpose 16x16 matrix before passing it to linear solver
 530	 * warning: this code assumes m < 16
 531	 */
 532	for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
 533		for (k = 0; k < 16; k = (k+j+1) & ~j) {
 534			t = ((rows[k] >> j)^rows[k+j]) & mask;
 535			rows[k] ^= (t << j);
 536			rows[k+j] ^= t;
 537		}
 538	}
 539	return solve_linear_system(bch, rows, roots, 4);
 540}
 541
 542/*
 543 * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
 544 */
 545static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
 546				unsigned int *roots)
 547{
 548	int n = 0;
 549
 550	if (poly->c[0])
 551		/* poly[X] = bX+c with c!=0, root=c/b */
 552		roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
 553				   bch->a_log_tab[poly->c[1]]);
 554	return n;
 555}
 556
 557/*
 558 * compute roots of a degree 2 polynomial over GF(2^m)
 559 */
 560static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
 561				unsigned int *roots)
 562{
 563	int n = 0, i, l0, l1, l2;
 564	unsigned int u, v, r;
 565
 566	if (poly->c[0] && poly->c[1]) {
 567
 568		l0 = bch->a_log_tab[poly->c[0]];
 569		l1 = bch->a_log_tab[poly->c[1]];
 570		l2 = bch->a_log_tab[poly->c[2]];
 571
 572		/* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
 573		u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
 574		/*
 575		 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
 576		 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
 577		 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
 578		 * i.e. r and r+1 are roots iff Tr(u)=0
 579		 */
 580		r = 0;
 581		v = u;
 582		while (v) {
 583			i = deg(v);
 584			r ^= bch->xi_tab[i];
 585			v ^= (1 << i);
 586		}
 587		/* verify root */
 588		if ((gf_sqr(bch, r)^r) == u) {
 589			/* reverse z=a/bX transformation and compute log(1/r) */
 590			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 591					    bch->a_log_tab[r]+l2);
 592			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 593					    bch->a_log_tab[r^1]+l2);
 594		}
 595	}
 596	return n;
 597}
 598
 599/*
 600 * compute roots of a degree 3 polynomial over GF(2^m)
 601 */
 602static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
 603				unsigned int *roots)
 604{
 605	int i, n = 0;
 606	unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
 607
 608	if (poly->c[0]) {
 609		/* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
 610		e3 = poly->c[3];
 611		c2 = gf_div(bch, poly->c[0], e3);
 612		b2 = gf_div(bch, poly->c[1], e3);
 613		a2 = gf_div(bch, poly->c[2], e3);
 614
 615		/* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
 616		c = gf_mul(bch, a2, c2);           /* c = a2c2      */
 617		b = gf_mul(bch, a2, b2)^c2;        /* b = a2b2 + c2 */
 618		a = gf_sqr(bch, a2)^b2;            /* a = a2^2 + b2 */
 619
 620		/* find the 4 roots of this affine polynomial */
 621		if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
 622			/* remove a2 from final list of roots */
 623			for (i = 0; i < 4; i++) {
 624				if (tmp[i] != a2)
 625					roots[n++] = a_ilog(bch, tmp[i]);
 626			}
 627		}
 628	}
 629	return n;
 630}
 631
 632/*
 633 * compute roots of a degree 4 polynomial over GF(2^m)
 634 */
 635static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
 636				unsigned int *roots)
 637{
 638	int i, l, n = 0;
 639	unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
 640
 641	if (poly->c[0] == 0)
 642		return 0;
 643
 644	/* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
 645	e4 = poly->c[4];
 646	d = gf_div(bch, poly->c[0], e4);
 647	c = gf_div(bch, poly->c[1], e4);
 648	b = gf_div(bch, poly->c[2], e4);
 649	a = gf_div(bch, poly->c[3], e4);
 650
 651	/* use Y=1/X transformation to get an affine polynomial */
 652	if (a) {
 653		/* first, eliminate cX by using z=X+e with ae^2+c=0 */
 654		if (c) {
 655			/* compute e such that e^2 = c/a */
 656			f = gf_div(bch, c, a);
 657			l = a_log(bch, f);
 658			l += (l & 1) ? GF_N(bch) : 0;
 659			e = a_pow(bch, l/2);
 660			/*
 661			 * use transformation z=X+e:
 662			 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
 663			 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
 664			 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
 665			 * z^4 + az^3 +     b'z^2 + d'
 666			 */
 667			d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
 668			b = gf_mul(bch, a, e)^b;
 669		}
 670		/* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
 671		if (d == 0)
 672			/* assume all roots have multiplicity 1 */
 673			return 0;
 674
 675		c2 = gf_inv(bch, d);
 676		b2 = gf_div(bch, a, d);
 677		a2 = gf_div(bch, b, d);
 678	} else {
 679		/* polynomial is already affine */
 680		c2 = d;
 681		b2 = c;
 682		a2 = b;
 683	}
 684	/* find the 4 roots of this affine polynomial */
 685	if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
 686		for (i = 0; i < 4; i++) {
 687			/* post-process roots (reverse transformations) */
 688			f = a ? gf_inv(bch, roots[i]) : roots[i];
 689			roots[i] = a_ilog(bch, f^e);
 690		}
 691		n = 4;
 692	}
 693	return n;
 694}
 695
 696/*
 697 * build monic, log-based representation of a polynomial
 698 */
 699static void gf_poly_logrep(struct bch_control *bch,
 700			   const struct gf_poly *a, int *rep)
 701{
 702	int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
 703
 704	/* represent 0 values with -1; warning, rep[d] is not set to 1 */
 705	for (i = 0; i < d; i++)
 706		rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
 707}
 708
 709/*
 710 * compute polynomial Euclidean division remainder in GF(2^m)[X]
 711 */
 712static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
 713			const struct gf_poly *b, int *rep)
 714{
 715	int la, p, m;
 716	unsigned int i, j, *c = a->c;
 717	const unsigned int d = b->deg;
 718
 719	if (a->deg < d)
 720		return;
 721
 722	/* reuse or compute log representation of denominator */
 723	if (!rep) {
 724		rep = bch->cache;
 725		gf_poly_logrep(bch, b, rep);
 726	}
 727
 728	for (j = a->deg; j >= d; j--) {
 729		if (c[j]) {
 730			la = a_log(bch, c[j]);
 731			p = j-d;
 732			for (i = 0; i < d; i++, p++) {
 733				m = rep[i];
 734				if (m >= 0)
 735					c[p] ^= bch->a_pow_tab[mod_s(bch,
 736								     m+la)];
 737			}
 738		}
 739	}
 740	a->deg = d-1;
 741	while (!c[a->deg] && a->deg)
 742		a->deg--;
 743}
 744
 745/*
 746 * compute polynomial Euclidean division quotient in GF(2^m)[X]
 747 */
 748static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
 749			const struct gf_poly *b, struct gf_poly *q)
 750{
 751	if (a->deg >= b->deg) {
 752		q->deg = a->deg-b->deg;
 753		/* compute a mod b (modifies a) */
 754		gf_poly_mod(bch, a, b, NULL);
 755		/* quotient is stored in upper part of polynomial a */
 756		memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
 757	} else {
 758		q->deg = 0;
 759		q->c[0] = 0;
 760	}
 761}
 762
 763/*
 764 * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
 765 */
 766static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
 767				   struct gf_poly *b)
 768{
 769	struct gf_poly *tmp;
 770
 771	dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
 772
 773	if (a->deg < b->deg) {
 774		tmp = b;
 775		b = a;
 776		a = tmp;
 777	}
 778
 779	while (b->deg > 0) {
 780		gf_poly_mod(bch, a, b, NULL);
 781		tmp = b;
 782		b = a;
 783		a = tmp;
 784	}
 785
 786	dbg("%s\n", gf_poly_str(a));
 787
 788	return a;
 789}
 790
 791/*
 792 * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
 793 * This is used in Berlekamp Trace algorithm for splitting polynomials
 794 */
 795static void compute_trace_bk_mod(struct bch_control *bch, int k,
 796				 const struct gf_poly *f, struct gf_poly *z,
 797				 struct gf_poly *out)
 798{
 799	const int m = GF_M(bch);
 800	int i, j;
 801
 802	/* z contains z^2j mod f */
 803	z->deg = 1;
 804	z->c[0] = 0;
 805	z->c[1] = bch->a_pow_tab[k];
 806
 807	out->deg = 0;
 808	memset(out, 0, GF_POLY_SZ(f->deg));
 809
 810	/* compute f log representation only once */
 811	gf_poly_logrep(bch, f, bch->cache);
 812
 813	for (i = 0; i < m; i++) {
 814		/* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
 815		for (j = z->deg; j >= 0; j--) {
 816			out->c[j] ^= z->c[j];
 817			z->c[2*j] = gf_sqr(bch, z->c[j]);
 818			z->c[2*j+1] = 0;
 819		}
 820		if (z->deg > out->deg)
 821			out->deg = z->deg;
 822
 823		if (i < m-1) {
 824			z->deg *= 2;
 825			/* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
 826			gf_poly_mod(bch, z, f, bch->cache);
 827		}
 828	}
 829	while (!out->c[out->deg] && out->deg)
 830		out->deg--;
 831
 832	dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
 833}
 834
 835/*
 836 * factor a polynomial using Berlekamp Trace algorithm (BTA)
 837 */
 838static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
 839			      struct gf_poly **g, struct gf_poly **h)
 840{
 841	struct gf_poly *f2 = bch->poly_2t[0];
 842	struct gf_poly *q  = bch->poly_2t[1];
 843	struct gf_poly *tk = bch->poly_2t[2];
 844	struct gf_poly *z  = bch->poly_2t[3];
 845	struct gf_poly *gcd;
 846
 847	dbg("factoring %s...\n", gf_poly_str(f));
 848
 849	*g = f;
 850	*h = NULL;
 851
 852	/* tk = Tr(a^k.X) mod f */
 853	compute_trace_bk_mod(bch, k, f, z, tk);
 854
 855	if (tk->deg > 0) {
 856		/* compute g = gcd(f, tk) (destructive operation) */
 857		gf_poly_copy(f2, f);
 858		gcd = gf_poly_gcd(bch, f2, tk);
 859		if (gcd->deg < f->deg) {
 860			/* compute h=f/gcd(f,tk); this will modify f and q */
 861			gf_poly_div(bch, f, gcd, q);
 862			/* store g and h in-place (clobbering f) */
 863			*h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
 864			gf_poly_copy(*g, gcd);
 865			gf_poly_copy(*h, q);
 866		}
 867	}
 868}
 869
 870/*
 871 * find roots of a polynomial, using BTZ algorithm; see the beginning of this
 872 * file for details
 873 */
 874static int find_poly_roots(struct bch_control *bch, unsigned int k,
 875			   struct gf_poly *poly, unsigned int *roots)
 876{
 877	int cnt;
 878	struct gf_poly *f1, *f2;
 879
 880	switch (poly->deg) {
 881		/* handle low degree polynomials with ad hoc techniques */
 882	case 1:
 883		cnt = find_poly_deg1_roots(bch, poly, roots);
 884		break;
 885	case 2:
 886		cnt = find_poly_deg2_roots(bch, poly, roots);
 887		break;
 888	case 3:
 889		cnt = find_poly_deg3_roots(bch, poly, roots);
 890		break;
 891	case 4:
 892		cnt = find_poly_deg4_roots(bch, poly, roots);
 893		break;
 894	default:
 895		/* factor polynomial using Berlekamp Trace Algorithm (BTA) */
 896		cnt = 0;
 897		if (poly->deg && (k <= GF_M(bch))) {
 898			factor_polynomial(bch, k, poly, &f1, &f2);
 899			if (f1)
 900				cnt += find_poly_roots(bch, k+1, f1, roots);
 901			if (f2)
 902				cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
 903		}
 904		break;
 905	}
 906	return cnt;
 907}
 908
 909#if defined(USE_CHIEN_SEARCH)
 910/*
 911 * exhaustive root search (Chien) implementation - not used, included only for
 912 * reference/comparison tests
 913 */
 914static int chien_search(struct bch_control *bch, unsigned int len,
 915			struct gf_poly *p, unsigned int *roots)
 916{
 917	int m;
 918	unsigned int i, j, syn, syn0, count = 0;
 919	const unsigned int k = 8*len+bch->ecc_bits;
 920
 921	/* use a log-based representation of polynomial */
 922	gf_poly_logrep(bch, p, bch->cache);
 923	bch->cache[p->deg] = 0;
 924	syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
 925
 926	for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
 927		/* compute elp(a^i) */
 928		for (j = 1, syn = syn0; j <= p->deg; j++) {
 929			m = bch->cache[j];
 930			if (m >= 0)
 931				syn ^= a_pow(bch, m+j*i);
 932		}
 933		if (syn == 0) {
 934			roots[count++] = GF_N(bch)-i;
 935			if (count == p->deg)
 936				break;
 937		}
 938	}
 939	return (count == p->deg) ? count : 0;
 940}
 941#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
 942#endif /* USE_CHIEN_SEARCH */
 943
 944/**
 945 * decode_bch - decode received codeword and find bit error locations
 946 * @bch:      BCH control structure
 947 * @data:     received data, ignored if @calc_ecc is provided
 948 * @len:      data length in bytes, must always be provided
 949 * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
 950 * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
 951 * @syn:      hw computed syndrome data (if NULL, syndrome is calculated)
 952 * @errloc:   output array of error locations
 953 *
 954 * Returns:
 955 *  The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
 956 *  invalid parameters were provided
 957 *
 958 * Depending on the available hw BCH support and the need to compute @calc_ecc
 959 * separately (using encode_bch()), this function should be called with one of
 960 * the following parameter configurations -
 961 *
 962 * by providing @data and @recv_ecc only:
 963 *   decode_bch(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
 964 *
 965 * by providing @recv_ecc and @calc_ecc:
 966 *   decode_bch(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
 967 *
 968 * by providing ecc = recv_ecc XOR calc_ecc:
 969 *   decode_bch(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
 970 *
 971 * by providing syndrome results @syn:
 972 *   decode_bch(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
 973 *
 974 * Once decode_bch() has successfully returned with a positive value, error
 975 * locations returned in array @errloc should be interpreted as follows -
 976 *
 977 * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
 978 * data correction)
 979 *
 980 * if (errloc[n] < 8*len), then n-th error is located in data and can be
 981 * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
 982 *
 983 * Note that this function does not perform any data correction by itself, it
 984 * merely indicates error locations.
 985 */
 986int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len,
 987	       const uint8_t *recv_ecc, const uint8_t *calc_ecc,
 988	       const unsigned int *syn, unsigned int *errloc)
 989{
 990	const unsigned int ecc_words = BCH_ECC_WORDS(bch);
 991	unsigned int nbits;
 992	int i, err, nroots;
 993	uint32_t sum;
 994
 995	/* sanity check: make sure data length can be handled */
 996	if (8*len > (bch->n-bch->ecc_bits))
 997		return -EINVAL;
 998
 999	/* if caller does not provide syndromes, compute them */
1000	if (!syn) {
1001		if (!calc_ecc) {
1002			/* compute received data ecc into an internal buffer */
1003			if (!data || !recv_ecc)
1004				return -EINVAL;
1005			encode_bch(bch, data, len, NULL);
1006		} else {
1007			/* load provided calculated ecc */
1008			load_ecc8(bch, bch->ecc_buf, calc_ecc);
1009		}
1010		/* load received ecc or assume it was XORed in calc_ecc */
1011		if (recv_ecc) {
1012			load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1013			/* XOR received and calculated ecc */
1014			for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1015				bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1016				sum |= bch->ecc_buf[i];
1017			}
1018			if (!sum)
1019				/* no error found */
1020				return 0;
1021		}
1022		compute_syndromes(bch, bch->ecc_buf, bch->syn);
1023		syn = bch->syn;
1024	}
1025
1026	err = compute_error_locator_polynomial(bch, syn);
1027	if (err > 0) {
1028		nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1029		if (err != nroots)
1030			err = -1;
1031	}
1032	if (err > 0) {
1033		/* post-process raw error locations for easier correction */
1034		nbits = (len*8)+bch->ecc_bits;
1035		for (i = 0; i < err; i++) {
1036			if (errloc[i] >= nbits) {
1037				err = -1;
1038				break;
1039			}
1040			errloc[i] = nbits-1-errloc[i];
1041			errloc[i] = (errloc[i] & ~7)|(7-(errloc[i] & 7));
 
 
1042		}
1043	}
1044	return (err >= 0) ? err : -EBADMSG;
1045}
1046EXPORT_SYMBOL_GPL(decode_bch);
1047
1048/*
1049 * generate Galois field lookup tables
1050 */
1051static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1052{
1053	unsigned int i, x = 1;
1054	const unsigned int k = 1 << deg(poly);
1055
1056	/* primitive polynomial must be of degree m */
1057	if (k != (1u << GF_M(bch)))
1058		return -1;
1059
1060	for (i = 0; i < GF_N(bch); i++) {
1061		bch->a_pow_tab[i] = x;
1062		bch->a_log_tab[x] = i;
1063		if (i && (x == 1))
1064			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1065			return -1;
1066		x <<= 1;
1067		if (x & k)
1068			x ^= poly;
1069	}
1070	bch->a_pow_tab[GF_N(bch)] = 1;
1071	bch->a_log_tab[0] = 0;
1072
1073	return 0;
1074}
1075
1076/*
1077 * compute generator polynomial remainder tables for fast encoding
1078 */
1079static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1080{
1081	int i, j, b, d;
1082	uint32_t data, hi, lo, *tab;
1083	const int l = BCH_ECC_WORDS(bch);
1084	const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1085	const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1086
1087	memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1088
1089	for (i = 0; i < 256; i++) {
1090		/* p(X)=i is a small polynomial of weight <= 8 */
1091		for (b = 0; b < 4; b++) {
1092			/* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1093			tab = bch->mod8_tab + (b*256+i)*l;
1094			data = i << (8*b);
1095			while (data) {
1096				d = deg(data);
1097				/* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1098				data ^= g[0] >> (31-d);
1099				for (j = 0; j < ecclen; j++) {
1100					hi = (d < 31) ? g[j] << (d+1) : 0;
1101					lo = (j+1 < plen) ?
1102						g[j+1] >> (31-d) : 0;
1103					tab[j] ^= hi|lo;
1104				}
1105			}
1106		}
1107	}
1108}
1109
1110/*
1111 * build a base for factoring degree 2 polynomials
1112 */
1113static int build_deg2_base(struct bch_control *bch)
1114{
1115	const int m = GF_M(bch);
1116	int i, j, r;
1117	unsigned int sum, x, y, remaining, ak = 0, xi[m];
1118
1119	/* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1120	for (i = 0; i < m; i++) {
1121		for (j = 0, sum = 0; j < m; j++)
1122			sum ^= a_pow(bch, i*(1 << j));
1123
1124		if (sum) {
1125			ak = bch->a_pow_tab[i];
1126			break;
1127		}
1128	}
1129	/* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1130	remaining = m;
1131	memset(xi, 0, sizeof(xi));
1132
1133	for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1134		y = gf_sqr(bch, x)^x;
1135		for (i = 0; i < 2; i++) {
1136			r = a_log(bch, y);
1137			if (y && (r < m) && !xi[r]) {
1138				bch->xi_tab[r] = x;
1139				xi[r] = 1;
1140				remaining--;
1141				dbg("x%d = %x\n", r, x);
1142				break;
1143			}
1144			y ^= ak;
1145		}
1146	}
1147	/* should not happen but check anyway */
1148	return remaining ? -1 : 0;
1149}
1150
1151static void *bch_alloc(size_t size, int *err)
1152{
1153	void *ptr;
1154
1155	ptr = kmalloc(size, GFP_KERNEL);
1156	if (ptr == NULL)
1157		*err = 1;
1158	return ptr;
1159}
1160
1161/*
1162 * compute generator polynomial for given (m,t) parameters.
1163 */
1164static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1165{
1166	const unsigned int m = GF_M(bch);
1167	const unsigned int t = GF_T(bch);
1168	int n, err = 0;
1169	unsigned int i, j, nbits, r, word, *roots;
1170	struct gf_poly *g;
1171	uint32_t *genpoly;
1172
1173	g = bch_alloc(GF_POLY_SZ(m*t), &err);
1174	roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1175	genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1176
1177	if (err) {
1178		kfree(genpoly);
1179		genpoly = NULL;
1180		goto finish;
1181	}
1182
1183	/* enumerate all roots of g(X) */
1184	memset(roots , 0, (bch->n+1)*sizeof(*roots));
1185	for (i = 0; i < t; i++) {
1186		for (j = 0, r = 2*i+1; j < m; j++) {
1187			roots[r] = 1;
1188			r = mod_s(bch, 2*r);
1189		}
1190	}
1191	/* build generator polynomial g(X) */
1192	g->deg = 0;
1193	g->c[0] = 1;
1194	for (i = 0; i < GF_N(bch); i++) {
1195		if (roots[i]) {
1196			/* multiply g(X) by (X+root) */
1197			r = bch->a_pow_tab[i];
1198			g->c[g->deg+1] = 1;
1199			for (j = g->deg; j > 0; j--)
1200				g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1201
1202			g->c[0] = gf_mul(bch, g->c[0], r);
1203			g->deg++;
1204		}
1205	}
1206	/* store left-justified binary representation of g(X) */
1207	n = g->deg+1;
1208	i = 0;
1209
1210	while (n > 0) {
1211		nbits = (n > 32) ? 32 : n;
1212		for (j = 0, word = 0; j < nbits; j++) {
1213			if (g->c[n-1-j])
1214				word |= 1u << (31-j);
1215		}
1216		genpoly[i++] = word;
1217		n -= nbits;
1218	}
1219	bch->ecc_bits = g->deg;
1220
1221finish:
1222	kfree(g);
1223	kfree(roots);
1224
1225	return genpoly;
1226}
1227
1228/**
1229 * init_bch - initialize a BCH encoder/decoder
1230 * @m:          Galois field order, should be in the range 5-15
1231 * @t:          maximum error correction capability, in bits
1232 * @prim_poly:  user-provided primitive polynomial (or 0 to use default)
 
1233 *
1234 * Returns:
1235 *  a newly allocated BCH control structure if successful, NULL otherwise
1236 *
1237 * This initialization can take some time, as lookup tables are built for fast
1238 * encoding/decoding; make sure not to call this function from a time critical
1239 * path. Usually, init_bch() should be called on module/driver init and
1240 * free_bch() should be called to release memory on exit.
1241 *
1242 * You may provide your own primitive polynomial of degree @m in argument
1243 * @prim_poly, or let init_bch() use its default polynomial.
1244 *
1245 * Once init_bch() has successfully returned a pointer to a newly allocated
1246 * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1247 * the structure.
1248 */
1249struct bch_control *init_bch(int m, int t, unsigned int prim_poly)
 
1250{
1251	int err = 0;
1252	unsigned int i, words;
1253	uint32_t *genpoly;
1254	struct bch_control *bch = NULL;
1255
1256	const int min_m = 5;
1257	const int max_m = 15;
1258
1259	/* default primitive polynomials */
1260	static const unsigned int prim_poly_tab[] = {
1261		0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1262		0x402b, 0x8003,
1263	};
1264
1265#if defined(CONFIG_BCH_CONST_PARAMS)
1266	if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1267		printk(KERN_ERR "bch encoder/decoder was configured to support "
1268		       "parameters m=%d, t=%d only!\n",
1269		       CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1270		goto fail;
1271	}
1272#endif
1273	if ((m < min_m) || (m > max_m))
1274		/*
1275		 * values of m greater than 15 are not currently supported;
1276		 * supporting m > 15 would require changing table base type
1277		 * (uint16_t) and a small patch in matrix transposition
1278		 */
1279		goto fail;
1280
 
 
 
 
 
 
 
1281	/* sanity checks */
1282	if ((t < 1) || (m*t >= ((1 << m)-1)))
1283		/* invalid t value */
1284		goto fail;
1285
1286	/* select a primitive polynomial for generating GF(2^m) */
1287	if (prim_poly == 0)
1288		prim_poly = prim_poly_tab[m-min_m];
1289
1290	bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1291	if (bch == NULL)
1292		goto fail;
1293
1294	bch->m = m;
1295	bch->t = t;
1296	bch->n = (1 << m)-1;
1297	words  = DIV_ROUND_UP(m*t, 32);
1298	bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1299	bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1300	bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1301	bch->mod8_tab  = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1302	bch->ecc_buf   = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1303	bch->ecc_buf2  = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1304	bch->xi_tab    = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1305	bch->syn       = bch_alloc(2*t*sizeof(*bch->syn), &err);
1306	bch->cache     = bch_alloc(2*t*sizeof(*bch->cache), &err);
1307	bch->elp       = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
 
1308
1309	for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1310		bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1311
1312	if (err)
1313		goto fail;
1314
1315	err = build_gf_tables(bch, prim_poly);
1316	if (err)
1317		goto fail;
1318
1319	/* use generator polynomial for computing encoding tables */
1320	genpoly = compute_generator_polynomial(bch);
1321	if (genpoly == NULL)
1322		goto fail;
1323
1324	build_mod8_tables(bch, genpoly);
1325	kfree(genpoly);
1326
1327	err = build_deg2_base(bch);
1328	if (err)
1329		goto fail;
1330
1331	return bch;
1332
1333fail:
1334	free_bch(bch);
1335	return NULL;
1336}
1337EXPORT_SYMBOL_GPL(init_bch);
1338
1339/**
1340 *  free_bch - free the BCH control structure
1341 *  @bch:    BCH control structure to release
1342 */
1343void free_bch(struct bch_control *bch)
1344{
1345	unsigned int i;
1346
1347	if (bch) {
1348		kfree(bch->a_pow_tab);
1349		kfree(bch->a_log_tab);
1350		kfree(bch->mod8_tab);
1351		kfree(bch->ecc_buf);
1352		kfree(bch->ecc_buf2);
1353		kfree(bch->xi_tab);
1354		kfree(bch->syn);
1355		kfree(bch->cache);
1356		kfree(bch->elp);
1357
1358		for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1359			kfree(bch->poly_2t[i]);
1360
1361		kfree(bch);
1362	}
1363}
1364EXPORT_SYMBOL_GPL(free_bch);
1365
1366MODULE_LICENSE("GPL");
1367MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1368MODULE_DESCRIPTION("Binary BCH encoder/decoder");
v5.9
   1/*
   2 * Generic binary BCH encoding/decoding library
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms of the GNU General Public License version 2 as published by
   6 * the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc., 51
  15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  16 *
  17 * Copyright © 2011 Parrot S.A.
  18 *
  19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
  20 *
  21 * Description:
  22 *
  23 * This library provides runtime configurable encoding/decoding of binary
  24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
  25 *
  26 * Call bch_init to get a pointer to a newly allocated bch_control structure for
  27 * the given m (Galois field order), t (error correction capability) and
  28 * (optional) primitive polynomial parameters.
  29 *
  30 * Call bch_encode to compute and store ecc parity bytes to a given buffer.
  31 * Call bch_decode to detect and locate errors in received data.
  32 *
  33 * On systems supporting hw BCH features, intermediate results may be provided
  34 * to bch_decode in order to skip certain steps. See bch_decode() documentation
  35 * for details.
  36 *
  37 * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
  38 * parameters m and t; thus allowing extra compiler optimizations and providing
  39 * better (up to 2x) encoding performance. Using this option makes sense when
  40 * (m,t) are fixed and known in advance, e.g. when using BCH error correction
  41 * on a particular NAND flash device.
  42 *
  43 * Algorithmic details:
  44 *
  45 * Encoding is performed by processing 32 input bits in parallel, using 4
  46 * remainder lookup tables.
  47 *
  48 * The final stage of decoding involves the following internal steps:
  49 * a. Syndrome computation
  50 * b. Error locator polynomial computation using Berlekamp-Massey algorithm
  51 * c. Error locator root finding (by far the most expensive step)
  52 *
  53 * In this implementation, step c is not performed using the usual Chien search.
  54 * Instead, an alternative approach described in [1] is used. It consists in
  55 * factoring the error locator polynomial using the Berlekamp Trace algorithm
  56 * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
  57 * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
  58 * much better performance than Chien search for usual (m,t) values (typically
  59 * m >= 13, t < 32, see [1]).
  60 *
  61 * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
  62 * of characteristic 2, in: Western European Workshop on Research in Cryptology
  63 * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
  64 * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
  65 * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
  66 */
  67
  68#include <linux/kernel.h>
  69#include <linux/errno.h>
  70#include <linux/init.h>
  71#include <linux/module.h>
  72#include <linux/slab.h>
  73#include <linux/bitops.h>
  74#include <asm/byteorder.h>
  75#include <linux/bch.h>
  76
  77#if defined(CONFIG_BCH_CONST_PARAMS)
  78#define GF_M(_p)               (CONFIG_BCH_CONST_M)
  79#define GF_T(_p)               (CONFIG_BCH_CONST_T)
  80#define GF_N(_p)               ((1 << (CONFIG_BCH_CONST_M))-1)
  81#define BCH_MAX_M              (CONFIG_BCH_CONST_M)
  82#define BCH_MAX_T	       (CONFIG_BCH_CONST_T)
  83#else
  84#define GF_M(_p)               ((_p)->m)
  85#define GF_T(_p)               ((_p)->t)
  86#define GF_N(_p)               ((_p)->n)
  87#define BCH_MAX_M              15 /* 2KB */
  88#define BCH_MAX_T              64 /* 64 bit correction */
  89#endif
  90
  91#define BCH_ECC_WORDS(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
  92#define BCH_ECC_BYTES(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
  93
  94#define BCH_ECC_MAX_WORDS      DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
  95
  96#ifndef dbg
  97#define dbg(_fmt, args...)     do {} while (0)
  98#endif
  99
 100/*
 101 * represent a polynomial over GF(2^m)
 102 */
 103struct gf_poly {
 104	unsigned int deg;    /* polynomial degree */
 105	unsigned int c[];   /* polynomial terms */
 106};
 107
 108/* given its degree, compute a polynomial size in bytes */
 109#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
 110
 111/* polynomial of degree 1 */
 112struct gf_poly_deg1 {
 113	struct gf_poly poly;
 114	unsigned int   c[2];
 115};
 116
 117static u8 swap_bits_table[] = {
 118	0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
 119	0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
 120	0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
 121	0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
 122	0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
 123	0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
 124	0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
 125	0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
 126	0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
 127	0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
 128	0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
 129	0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
 130	0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
 131	0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
 132	0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
 133	0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
 134	0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
 135	0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
 136	0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
 137	0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
 138	0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
 139	0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
 140	0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
 141	0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
 142	0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
 143	0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
 144	0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
 145	0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
 146	0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
 147	0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
 148	0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
 149	0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
 150};
 151
 152static u8 swap_bits(struct bch_control *bch, u8 in)
 153{
 154	if (!bch->swap_bits)
 155		return in;
 156
 157	return swap_bits_table[in];
 158}
 159
 160/*
 161 * same as bch_encode(), but process input data one byte at a time
 162 */
 163static void bch_encode_unaligned(struct bch_control *bch,
 164				 const unsigned char *data, unsigned int len,
 165				 uint32_t *ecc)
 166{
 167	int i;
 168	const uint32_t *p;
 169	const int l = BCH_ECC_WORDS(bch)-1;
 170
 171	while (len--) {
 172		u8 tmp = swap_bits(bch, *data++);
 173
 174		p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff);
 175
 176		for (i = 0; i < l; i++)
 177			ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
 178
 179		ecc[l] = (ecc[l] << 8)^(*p);
 180	}
 181}
 182
 183/*
 184 * convert ecc bytes to aligned, zero-padded 32-bit ecc words
 185 */
 186static void load_ecc8(struct bch_control *bch, uint32_t *dst,
 187		      const uint8_t *src)
 188{
 189	uint8_t pad[4] = {0, 0, 0, 0};
 190	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 191
 192	for (i = 0; i < nwords; i++, src += 4)
 193		dst[i] = ((u32)swap_bits(bch, src[0]) << 24) |
 194			((u32)swap_bits(bch, src[1]) << 16) |
 195			((u32)swap_bits(bch, src[2]) << 8) |
 196			swap_bits(bch, src[3]);
 197
 198	memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
 199	dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) |
 200		((u32)swap_bits(bch, pad[1]) << 16) |
 201		((u32)swap_bits(bch, pad[2]) << 8) |
 202		swap_bits(bch, pad[3]);
 203}
 204
 205/*
 206 * convert 32-bit ecc words to ecc bytes
 207 */
 208static void store_ecc8(struct bch_control *bch, uint8_t *dst,
 209		       const uint32_t *src)
 210{
 211	uint8_t pad[4];
 212	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 213
 214	for (i = 0; i < nwords; i++) {
 215		*dst++ = swap_bits(bch, src[i] >> 24);
 216		*dst++ = swap_bits(bch, src[i] >> 16);
 217		*dst++ = swap_bits(bch, src[i] >> 8);
 218		*dst++ = swap_bits(bch, src[i]);
 219	}
 220	pad[0] = swap_bits(bch, src[nwords] >> 24);
 221	pad[1] = swap_bits(bch, src[nwords] >> 16);
 222	pad[2] = swap_bits(bch, src[nwords] >> 8);
 223	pad[3] = swap_bits(bch, src[nwords]);
 224	memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
 225}
 226
 227/**
 228 * bch_encode - calculate BCH ecc parity of data
 229 * @bch:   BCH control structure
 230 * @data:  data to encode
 231 * @len:   data length in bytes
 232 * @ecc:   ecc parity data, must be initialized by caller
 233 *
 234 * The @ecc parity array is used both as input and output parameter, in order to
 235 * allow incremental computations. It should be of the size indicated by member
 236 * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
 237 *
 238 * The exact number of computed ecc parity bits is given by member @ecc_bits of
 239 * @bch; it may be less than m*t for large values of t.
 240 */
 241void bch_encode(struct bch_control *bch, const uint8_t *data,
 242		unsigned int len, uint8_t *ecc)
 243{
 244	const unsigned int l = BCH_ECC_WORDS(bch)-1;
 245	unsigned int i, mlen;
 246	unsigned long m;
 247	uint32_t w, r[BCH_ECC_MAX_WORDS];
 248	const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
 249	const uint32_t * const tab0 = bch->mod8_tab;
 250	const uint32_t * const tab1 = tab0 + 256*(l+1);
 251	const uint32_t * const tab2 = tab1 + 256*(l+1);
 252	const uint32_t * const tab3 = tab2 + 256*(l+1);
 253	const uint32_t *pdata, *p0, *p1, *p2, *p3;
 254
 255	if (WARN_ON(r_bytes > sizeof(r)))
 256		return;
 257
 258	if (ecc) {
 259		/* load ecc parity bytes into internal 32-bit buffer */
 260		load_ecc8(bch, bch->ecc_buf, ecc);
 261	} else {
 262		memset(bch->ecc_buf, 0, r_bytes);
 263	}
 264
 265	/* process first unaligned data bytes */
 266	m = ((unsigned long)data) & 3;
 267	if (m) {
 268		mlen = (len < (4-m)) ? len : 4-m;
 269		bch_encode_unaligned(bch, data, mlen, bch->ecc_buf);
 270		data += mlen;
 271		len  -= mlen;
 272	}
 273
 274	/* process 32-bit aligned data words */
 275	pdata = (uint32_t *)data;
 276	mlen  = len/4;
 277	data += 4*mlen;
 278	len  -= 4*mlen;
 279	memcpy(r, bch->ecc_buf, r_bytes);
 280
 281	/*
 282	 * split each 32-bit word into 4 polynomials of weight 8 as follows:
 283	 *
 284	 * 31 ...24  23 ...16  15 ... 8  7 ... 0
 285	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt
 286	 *                               tttttttt  mod g = r0 (precomputed)
 287	 *                     zzzzzzzz  00000000  mod g = r1 (precomputed)
 288	 *           yyyyyyyy  00000000  00000000  mod g = r2 (precomputed)
 289	 * xxxxxxxx  00000000  00000000  00000000  mod g = r3 (precomputed)
 290	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt  mod g = r0^r1^r2^r3
 291	 */
 292	while (mlen--) {
 293		/* input data is read in big-endian format */
 294		w = cpu_to_be32(*pdata++);
 295		if (bch->swap_bits)
 296			w = (u32)swap_bits(bch, w) |
 297			    ((u32)swap_bits(bch, w >> 8) << 8) |
 298			    ((u32)swap_bits(bch, w >> 16) << 16) |
 299			    ((u32)swap_bits(bch, w >> 24) << 24);
 300		w ^= r[0];
 301		p0 = tab0 + (l+1)*((w >>  0) & 0xff);
 302		p1 = tab1 + (l+1)*((w >>  8) & 0xff);
 303		p2 = tab2 + (l+1)*((w >> 16) & 0xff);
 304		p3 = tab3 + (l+1)*((w >> 24) & 0xff);
 305
 306		for (i = 0; i < l; i++)
 307			r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
 308
 309		r[l] = p0[l]^p1[l]^p2[l]^p3[l];
 310	}
 311	memcpy(bch->ecc_buf, r, r_bytes);
 312
 313	/* process last unaligned bytes */
 314	if (len)
 315		bch_encode_unaligned(bch, data, len, bch->ecc_buf);
 316
 317	/* store ecc parity bytes into original parity buffer */
 318	if (ecc)
 319		store_ecc8(bch, ecc, bch->ecc_buf);
 320}
 321EXPORT_SYMBOL_GPL(bch_encode);
 322
 323static inline int modulo(struct bch_control *bch, unsigned int v)
 324{
 325	const unsigned int n = GF_N(bch);
 326	while (v >= n) {
 327		v -= n;
 328		v = (v & n) + (v >> GF_M(bch));
 329	}
 330	return v;
 331}
 332
 333/*
 334 * shorter and faster modulo function, only works when v < 2N.
 335 */
 336static inline int mod_s(struct bch_control *bch, unsigned int v)
 337{
 338	const unsigned int n = GF_N(bch);
 339	return (v < n) ? v : v-n;
 340}
 341
 342static inline int deg(unsigned int poly)
 343{
 344	/* polynomial degree is the most-significant bit index */
 345	return fls(poly)-1;
 346}
 347
 348static inline int parity(unsigned int x)
 349{
 350	/*
 351	 * public domain code snippet, lifted from
 352	 * http://www-graphics.stanford.edu/~seander/bithacks.html
 353	 */
 354	x ^= x >> 1;
 355	x ^= x >> 2;
 356	x = (x & 0x11111111U) * 0x11111111U;
 357	return (x >> 28) & 1;
 358}
 359
 360/* Galois field basic operations: multiply, divide, inverse, etc. */
 361
 362static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
 363				  unsigned int b)
 364{
 365	return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 366					       bch->a_log_tab[b])] : 0;
 367}
 368
 369static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
 370{
 371	return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
 372}
 373
 374static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
 375				  unsigned int b)
 376{
 377	return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 378					GF_N(bch)-bch->a_log_tab[b])] : 0;
 379}
 380
 381static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
 382{
 383	return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
 384}
 385
 386static inline unsigned int a_pow(struct bch_control *bch, int i)
 387{
 388	return bch->a_pow_tab[modulo(bch, i)];
 389}
 390
 391static inline int a_log(struct bch_control *bch, unsigned int x)
 392{
 393	return bch->a_log_tab[x];
 394}
 395
 396static inline int a_ilog(struct bch_control *bch, unsigned int x)
 397{
 398	return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
 399}
 400
 401/*
 402 * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
 403 */
 404static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
 405			      unsigned int *syn)
 406{
 407	int i, j, s;
 408	unsigned int m;
 409	uint32_t poly;
 410	const int t = GF_T(bch);
 411
 412	s = bch->ecc_bits;
 413
 414	/* make sure extra bits in last ecc word are cleared */
 415	m = ((unsigned int)s) & 31;
 416	if (m)
 417		ecc[s/32] &= ~((1u << (32-m))-1);
 418	memset(syn, 0, 2*t*sizeof(*syn));
 419
 420	/* compute v(a^j) for j=1 .. 2t-1 */
 421	do {
 422		poly = *ecc++;
 423		s -= 32;
 424		while (poly) {
 425			i = deg(poly);
 426			for (j = 0; j < 2*t; j += 2)
 427				syn[j] ^= a_pow(bch, (j+1)*(i+s));
 428
 429			poly ^= (1 << i);
 430		}
 431	} while (s > 0);
 432
 433	/* v(a^(2j)) = v(a^j)^2 */
 434	for (j = 0; j < t; j++)
 435		syn[2*j+1] = gf_sqr(bch, syn[j]);
 436}
 437
 438static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
 439{
 440	memcpy(dst, src, GF_POLY_SZ(src->deg));
 441}
 442
 443static int compute_error_locator_polynomial(struct bch_control *bch,
 444					    const unsigned int *syn)
 445{
 446	const unsigned int t = GF_T(bch);
 447	const unsigned int n = GF_N(bch);
 448	unsigned int i, j, tmp, l, pd = 1, d = syn[0];
 449	struct gf_poly *elp = bch->elp;
 450	struct gf_poly *pelp = bch->poly_2t[0];
 451	struct gf_poly *elp_copy = bch->poly_2t[1];
 452	int k, pp = -1;
 453
 454	memset(pelp, 0, GF_POLY_SZ(2*t));
 455	memset(elp, 0, GF_POLY_SZ(2*t));
 456
 457	pelp->deg = 0;
 458	pelp->c[0] = 1;
 459	elp->deg = 0;
 460	elp->c[0] = 1;
 461
 462	/* use simplified binary Berlekamp-Massey algorithm */
 463	for (i = 0; (i < t) && (elp->deg <= t); i++) {
 464		if (d) {
 465			k = 2*i-pp;
 466			gf_poly_copy(elp_copy, elp);
 467			/* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
 468			tmp = a_log(bch, d)+n-a_log(bch, pd);
 469			for (j = 0; j <= pelp->deg; j++) {
 470				if (pelp->c[j]) {
 471					l = a_log(bch, pelp->c[j]);
 472					elp->c[j+k] ^= a_pow(bch, tmp+l);
 473				}
 474			}
 475			/* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
 476			tmp = pelp->deg+k;
 477			if (tmp > elp->deg) {
 478				elp->deg = tmp;
 479				gf_poly_copy(pelp, elp_copy);
 480				pd = d;
 481				pp = 2*i;
 482			}
 483		}
 484		/* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
 485		if (i < t-1) {
 486			d = syn[2*i+2];
 487			for (j = 1; j <= elp->deg; j++)
 488				d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
 489		}
 490	}
 491	dbg("elp=%s\n", gf_poly_str(elp));
 492	return (elp->deg > t) ? -1 : (int)elp->deg;
 493}
 494
 495/*
 496 * solve a m x m linear system in GF(2) with an expected number of solutions,
 497 * and return the number of found solutions
 498 */
 499static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
 500			       unsigned int *sol, int nsol)
 501{
 502	const int m = GF_M(bch);
 503	unsigned int tmp, mask;
 504	int rem, c, r, p, k, param[BCH_MAX_M];
 505
 506	k = 0;
 507	mask = 1 << m;
 508
 509	/* Gaussian elimination */
 510	for (c = 0; c < m; c++) {
 511		rem = 0;
 512		p = c-k;
 513		/* find suitable row for elimination */
 514		for (r = p; r < m; r++) {
 515			if (rows[r] & mask) {
 516				if (r != p) {
 517					tmp = rows[r];
 518					rows[r] = rows[p];
 519					rows[p] = tmp;
 520				}
 521				rem = r+1;
 522				break;
 523			}
 524		}
 525		if (rem) {
 526			/* perform elimination on remaining rows */
 527			tmp = rows[p];
 528			for (r = rem; r < m; r++) {
 529				if (rows[r] & mask)
 530					rows[r] ^= tmp;
 531			}
 532		} else {
 533			/* elimination not needed, store defective row index */
 534			param[k++] = c;
 535		}
 536		mask >>= 1;
 537	}
 538	/* rewrite system, inserting fake parameter rows */
 539	if (k > 0) {
 540		p = k;
 541		for (r = m-1; r >= 0; r--) {
 542			if ((r > m-1-k) && rows[r])
 543				/* system has no solution */
 544				return 0;
 545
 546			rows[r] = (p && (r == param[p-1])) ?
 547				p--, 1u << (m-r) : rows[r-p];
 548		}
 549	}
 550
 551	if (nsol != (1 << k))
 552		/* unexpected number of solutions */
 553		return 0;
 554
 555	for (p = 0; p < nsol; p++) {
 556		/* set parameters for p-th solution */
 557		for (c = 0; c < k; c++)
 558			rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
 559
 560		/* compute unique solution */
 561		tmp = 0;
 562		for (r = m-1; r >= 0; r--) {
 563			mask = rows[r] & (tmp|1);
 564			tmp |= parity(mask) << (m-r);
 565		}
 566		sol[p] = tmp >> 1;
 567	}
 568	return nsol;
 569}
 570
 571/*
 572 * this function builds and solves a linear system for finding roots of a degree
 573 * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
 574 */
 575static int find_affine4_roots(struct bch_control *bch, unsigned int a,
 576			      unsigned int b, unsigned int c,
 577			      unsigned int *roots)
 578{
 579	int i, j, k;
 580	const int m = GF_M(bch);
 581	unsigned int mask = 0xff, t, rows[16] = {0,};
 582
 583	j = a_log(bch, b);
 584	k = a_log(bch, a);
 585	rows[0] = c;
 586
 587	/* buid linear system to solve X^4+aX^2+bX+c = 0 */
 588	for (i = 0; i < m; i++) {
 589		rows[i+1] = bch->a_pow_tab[4*i]^
 590			(a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
 591			(b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
 592		j++;
 593		k += 2;
 594	}
 595	/*
 596	 * transpose 16x16 matrix before passing it to linear solver
 597	 * warning: this code assumes m < 16
 598	 */
 599	for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
 600		for (k = 0; k < 16; k = (k+j+1) & ~j) {
 601			t = ((rows[k] >> j)^rows[k+j]) & mask;
 602			rows[k] ^= (t << j);
 603			rows[k+j] ^= t;
 604		}
 605	}
 606	return solve_linear_system(bch, rows, roots, 4);
 607}
 608
 609/*
 610 * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
 611 */
 612static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
 613				unsigned int *roots)
 614{
 615	int n = 0;
 616
 617	if (poly->c[0])
 618		/* poly[X] = bX+c with c!=0, root=c/b */
 619		roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
 620				   bch->a_log_tab[poly->c[1]]);
 621	return n;
 622}
 623
 624/*
 625 * compute roots of a degree 2 polynomial over GF(2^m)
 626 */
 627static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
 628				unsigned int *roots)
 629{
 630	int n = 0, i, l0, l1, l2;
 631	unsigned int u, v, r;
 632
 633	if (poly->c[0] && poly->c[1]) {
 634
 635		l0 = bch->a_log_tab[poly->c[0]];
 636		l1 = bch->a_log_tab[poly->c[1]];
 637		l2 = bch->a_log_tab[poly->c[2]];
 638
 639		/* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
 640		u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
 641		/*
 642		 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
 643		 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
 644		 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
 645		 * i.e. r and r+1 are roots iff Tr(u)=0
 646		 */
 647		r = 0;
 648		v = u;
 649		while (v) {
 650			i = deg(v);
 651			r ^= bch->xi_tab[i];
 652			v ^= (1 << i);
 653		}
 654		/* verify root */
 655		if ((gf_sqr(bch, r)^r) == u) {
 656			/* reverse z=a/bX transformation and compute log(1/r) */
 657			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 658					    bch->a_log_tab[r]+l2);
 659			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 660					    bch->a_log_tab[r^1]+l2);
 661		}
 662	}
 663	return n;
 664}
 665
 666/*
 667 * compute roots of a degree 3 polynomial over GF(2^m)
 668 */
 669static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
 670				unsigned int *roots)
 671{
 672	int i, n = 0;
 673	unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
 674
 675	if (poly->c[0]) {
 676		/* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
 677		e3 = poly->c[3];
 678		c2 = gf_div(bch, poly->c[0], e3);
 679		b2 = gf_div(bch, poly->c[1], e3);
 680		a2 = gf_div(bch, poly->c[2], e3);
 681
 682		/* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
 683		c = gf_mul(bch, a2, c2);           /* c = a2c2      */
 684		b = gf_mul(bch, a2, b2)^c2;        /* b = a2b2 + c2 */
 685		a = gf_sqr(bch, a2)^b2;            /* a = a2^2 + b2 */
 686
 687		/* find the 4 roots of this affine polynomial */
 688		if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
 689			/* remove a2 from final list of roots */
 690			for (i = 0; i < 4; i++) {
 691				if (tmp[i] != a2)
 692					roots[n++] = a_ilog(bch, tmp[i]);
 693			}
 694		}
 695	}
 696	return n;
 697}
 698
 699/*
 700 * compute roots of a degree 4 polynomial over GF(2^m)
 701 */
 702static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
 703				unsigned int *roots)
 704{
 705	int i, l, n = 0;
 706	unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
 707
 708	if (poly->c[0] == 0)
 709		return 0;
 710
 711	/* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
 712	e4 = poly->c[4];
 713	d = gf_div(bch, poly->c[0], e4);
 714	c = gf_div(bch, poly->c[1], e4);
 715	b = gf_div(bch, poly->c[2], e4);
 716	a = gf_div(bch, poly->c[3], e4);
 717
 718	/* use Y=1/X transformation to get an affine polynomial */
 719	if (a) {
 720		/* first, eliminate cX by using z=X+e with ae^2+c=0 */
 721		if (c) {
 722			/* compute e such that e^2 = c/a */
 723			f = gf_div(bch, c, a);
 724			l = a_log(bch, f);
 725			l += (l & 1) ? GF_N(bch) : 0;
 726			e = a_pow(bch, l/2);
 727			/*
 728			 * use transformation z=X+e:
 729			 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
 730			 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
 731			 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
 732			 * z^4 + az^3 +     b'z^2 + d'
 733			 */
 734			d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
 735			b = gf_mul(bch, a, e)^b;
 736		}
 737		/* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
 738		if (d == 0)
 739			/* assume all roots have multiplicity 1 */
 740			return 0;
 741
 742		c2 = gf_inv(bch, d);
 743		b2 = gf_div(bch, a, d);
 744		a2 = gf_div(bch, b, d);
 745	} else {
 746		/* polynomial is already affine */
 747		c2 = d;
 748		b2 = c;
 749		a2 = b;
 750	}
 751	/* find the 4 roots of this affine polynomial */
 752	if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
 753		for (i = 0; i < 4; i++) {
 754			/* post-process roots (reverse transformations) */
 755			f = a ? gf_inv(bch, roots[i]) : roots[i];
 756			roots[i] = a_ilog(bch, f^e);
 757		}
 758		n = 4;
 759	}
 760	return n;
 761}
 762
 763/*
 764 * build monic, log-based representation of a polynomial
 765 */
 766static void gf_poly_logrep(struct bch_control *bch,
 767			   const struct gf_poly *a, int *rep)
 768{
 769	int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
 770
 771	/* represent 0 values with -1; warning, rep[d] is not set to 1 */
 772	for (i = 0; i < d; i++)
 773		rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
 774}
 775
 776/*
 777 * compute polynomial Euclidean division remainder in GF(2^m)[X]
 778 */
 779static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
 780			const struct gf_poly *b, int *rep)
 781{
 782	int la, p, m;
 783	unsigned int i, j, *c = a->c;
 784	const unsigned int d = b->deg;
 785
 786	if (a->deg < d)
 787		return;
 788
 789	/* reuse or compute log representation of denominator */
 790	if (!rep) {
 791		rep = bch->cache;
 792		gf_poly_logrep(bch, b, rep);
 793	}
 794
 795	for (j = a->deg; j >= d; j--) {
 796		if (c[j]) {
 797			la = a_log(bch, c[j]);
 798			p = j-d;
 799			for (i = 0; i < d; i++, p++) {
 800				m = rep[i];
 801				if (m >= 0)
 802					c[p] ^= bch->a_pow_tab[mod_s(bch,
 803								     m+la)];
 804			}
 805		}
 806	}
 807	a->deg = d-1;
 808	while (!c[a->deg] && a->deg)
 809		a->deg--;
 810}
 811
 812/*
 813 * compute polynomial Euclidean division quotient in GF(2^m)[X]
 814 */
 815static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
 816			const struct gf_poly *b, struct gf_poly *q)
 817{
 818	if (a->deg >= b->deg) {
 819		q->deg = a->deg-b->deg;
 820		/* compute a mod b (modifies a) */
 821		gf_poly_mod(bch, a, b, NULL);
 822		/* quotient is stored in upper part of polynomial a */
 823		memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
 824	} else {
 825		q->deg = 0;
 826		q->c[0] = 0;
 827	}
 828}
 829
 830/*
 831 * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
 832 */
 833static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
 834				   struct gf_poly *b)
 835{
 836	struct gf_poly *tmp;
 837
 838	dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
 839
 840	if (a->deg < b->deg) {
 841		tmp = b;
 842		b = a;
 843		a = tmp;
 844	}
 845
 846	while (b->deg > 0) {
 847		gf_poly_mod(bch, a, b, NULL);
 848		tmp = b;
 849		b = a;
 850		a = tmp;
 851	}
 852
 853	dbg("%s\n", gf_poly_str(a));
 854
 855	return a;
 856}
 857
 858/*
 859 * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
 860 * This is used in Berlekamp Trace algorithm for splitting polynomials
 861 */
 862static void compute_trace_bk_mod(struct bch_control *bch, int k,
 863				 const struct gf_poly *f, struct gf_poly *z,
 864				 struct gf_poly *out)
 865{
 866	const int m = GF_M(bch);
 867	int i, j;
 868
 869	/* z contains z^2j mod f */
 870	z->deg = 1;
 871	z->c[0] = 0;
 872	z->c[1] = bch->a_pow_tab[k];
 873
 874	out->deg = 0;
 875	memset(out, 0, GF_POLY_SZ(f->deg));
 876
 877	/* compute f log representation only once */
 878	gf_poly_logrep(bch, f, bch->cache);
 879
 880	for (i = 0; i < m; i++) {
 881		/* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
 882		for (j = z->deg; j >= 0; j--) {
 883			out->c[j] ^= z->c[j];
 884			z->c[2*j] = gf_sqr(bch, z->c[j]);
 885			z->c[2*j+1] = 0;
 886		}
 887		if (z->deg > out->deg)
 888			out->deg = z->deg;
 889
 890		if (i < m-1) {
 891			z->deg *= 2;
 892			/* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
 893			gf_poly_mod(bch, z, f, bch->cache);
 894		}
 895	}
 896	while (!out->c[out->deg] && out->deg)
 897		out->deg--;
 898
 899	dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
 900}
 901
 902/*
 903 * factor a polynomial using Berlekamp Trace algorithm (BTA)
 904 */
 905static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
 906			      struct gf_poly **g, struct gf_poly **h)
 907{
 908	struct gf_poly *f2 = bch->poly_2t[0];
 909	struct gf_poly *q  = bch->poly_2t[1];
 910	struct gf_poly *tk = bch->poly_2t[2];
 911	struct gf_poly *z  = bch->poly_2t[3];
 912	struct gf_poly *gcd;
 913
 914	dbg("factoring %s...\n", gf_poly_str(f));
 915
 916	*g = f;
 917	*h = NULL;
 918
 919	/* tk = Tr(a^k.X) mod f */
 920	compute_trace_bk_mod(bch, k, f, z, tk);
 921
 922	if (tk->deg > 0) {
 923		/* compute g = gcd(f, tk) (destructive operation) */
 924		gf_poly_copy(f2, f);
 925		gcd = gf_poly_gcd(bch, f2, tk);
 926		if (gcd->deg < f->deg) {
 927			/* compute h=f/gcd(f,tk); this will modify f and q */
 928			gf_poly_div(bch, f, gcd, q);
 929			/* store g and h in-place (clobbering f) */
 930			*h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
 931			gf_poly_copy(*g, gcd);
 932			gf_poly_copy(*h, q);
 933		}
 934	}
 935}
 936
 937/*
 938 * find roots of a polynomial, using BTZ algorithm; see the beginning of this
 939 * file for details
 940 */
 941static int find_poly_roots(struct bch_control *bch, unsigned int k,
 942			   struct gf_poly *poly, unsigned int *roots)
 943{
 944	int cnt;
 945	struct gf_poly *f1, *f2;
 946
 947	switch (poly->deg) {
 948		/* handle low degree polynomials with ad hoc techniques */
 949	case 1:
 950		cnt = find_poly_deg1_roots(bch, poly, roots);
 951		break;
 952	case 2:
 953		cnt = find_poly_deg2_roots(bch, poly, roots);
 954		break;
 955	case 3:
 956		cnt = find_poly_deg3_roots(bch, poly, roots);
 957		break;
 958	case 4:
 959		cnt = find_poly_deg4_roots(bch, poly, roots);
 960		break;
 961	default:
 962		/* factor polynomial using Berlekamp Trace Algorithm (BTA) */
 963		cnt = 0;
 964		if (poly->deg && (k <= GF_M(bch))) {
 965			factor_polynomial(bch, k, poly, &f1, &f2);
 966			if (f1)
 967				cnt += find_poly_roots(bch, k+1, f1, roots);
 968			if (f2)
 969				cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
 970		}
 971		break;
 972	}
 973	return cnt;
 974}
 975
 976#if defined(USE_CHIEN_SEARCH)
 977/*
 978 * exhaustive root search (Chien) implementation - not used, included only for
 979 * reference/comparison tests
 980 */
 981static int chien_search(struct bch_control *bch, unsigned int len,
 982			struct gf_poly *p, unsigned int *roots)
 983{
 984	int m;
 985	unsigned int i, j, syn, syn0, count = 0;
 986	const unsigned int k = 8*len+bch->ecc_bits;
 987
 988	/* use a log-based representation of polynomial */
 989	gf_poly_logrep(bch, p, bch->cache);
 990	bch->cache[p->deg] = 0;
 991	syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
 992
 993	for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
 994		/* compute elp(a^i) */
 995		for (j = 1, syn = syn0; j <= p->deg; j++) {
 996			m = bch->cache[j];
 997			if (m >= 0)
 998				syn ^= a_pow(bch, m+j*i);
 999		}
1000		if (syn == 0) {
1001			roots[count++] = GF_N(bch)-i;
1002			if (count == p->deg)
1003				break;
1004		}
1005	}
1006	return (count == p->deg) ? count : 0;
1007}
1008#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
1009#endif /* USE_CHIEN_SEARCH */
1010
1011/**
1012 * bch_decode - decode received codeword and find bit error locations
1013 * @bch:      BCH control structure
1014 * @data:     received data, ignored if @calc_ecc is provided
1015 * @len:      data length in bytes, must always be provided
1016 * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
1017 * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
1018 * @syn:      hw computed syndrome data (if NULL, syndrome is calculated)
1019 * @errloc:   output array of error locations
1020 *
1021 * Returns:
1022 *  The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
1023 *  invalid parameters were provided
1024 *
1025 * Depending on the available hw BCH support and the need to compute @calc_ecc
1026 * separately (using bch_encode()), this function should be called with one of
1027 * the following parameter configurations -
1028 *
1029 * by providing @data and @recv_ecc only:
1030 *   bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
1031 *
1032 * by providing @recv_ecc and @calc_ecc:
1033 *   bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
1034 *
1035 * by providing ecc = recv_ecc XOR calc_ecc:
1036 *   bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
1037 *
1038 * by providing syndrome results @syn:
1039 *   bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
1040 *
1041 * Once bch_decode() has successfully returned with a positive value, error
1042 * locations returned in array @errloc should be interpreted as follows -
1043 *
1044 * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1045 * data correction)
1046 *
1047 * if (errloc[n] < 8*len), then n-th error is located in data and can be
1048 * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1049 *
1050 * Note that this function does not perform any data correction by itself, it
1051 * merely indicates error locations.
1052 */
1053int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len,
1054	       const uint8_t *recv_ecc, const uint8_t *calc_ecc,
1055	       const unsigned int *syn, unsigned int *errloc)
1056{
1057	const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1058	unsigned int nbits;
1059	int i, err, nroots;
1060	uint32_t sum;
1061
1062	/* sanity check: make sure data length can be handled */
1063	if (8*len > (bch->n-bch->ecc_bits))
1064		return -EINVAL;
1065
1066	/* if caller does not provide syndromes, compute them */
1067	if (!syn) {
1068		if (!calc_ecc) {
1069			/* compute received data ecc into an internal buffer */
1070			if (!data || !recv_ecc)
1071				return -EINVAL;
1072			bch_encode(bch, data, len, NULL);
1073		} else {
1074			/* load provided calculated ecc */
1075			load_ecc8(bch, bch->ecc_buf, calc_ecc);
1076		}
1077		/* load received ecc or assume it was XORed in calc_ecc */
1078		if (recv_ecc) {
1079			load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1080			/* XOR received and calculated ecc */
1081			for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1082				bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1083				sum |= bch->ecc_buf[i];
1084			}
1085			if (!sum)
1086				/* no error found */
1087				return 0;
1088		}
1089		compute_syndromes(bch, bch->ecc_buf, bch->syn);
1090		syn = bch->syn;
1091	}
1092
1093	err = compute_error_locator_polynomial(bch, syn);
1094	if (err > 0) {
1095		nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1096		if (err != nroots)
1097			err = -1;
1098	}
1099	if (err > 0) {
1100		/* post-process raw error locations for easier correction */
1101		nbits = (len*8)+bch->ecc_bits;
1102		for (i = 0; i < err; i++) {
1103			if (errloc[i] >= nbits) {
1104				err = -1;
1105				break;
1106			}
1107			errloc[i] = nbits-1-errloc[i];
1108			if (!bch->swap_bits)
1109				errloc[i] = (errloc[i] & ~7) |
1110					    (7-(errloc[i] & 7));
1111		}
1112	}
1113	return (err >= 0) ? err : -EBADMSG;
1114}
1115EXPORT_SYMBOL_GPL(bch_decode);
1116
1117/*
1118 * generate Galois field lookup tables
1119 */
1120static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1121{
1122	unsigned int i, x = 1;
1123	const unsigned int k = 1 << deg(poly);
1124
1125	/* primitive polynomial must be of degree m */
1126	if (k != (1u << GF_M(bch)))
1127		return -1;
1128
1129	for (i = 0; i < GF_N(bch); i++) {
1130		bch->a_pow_tab[i] = x;
1131		bch->a_log_tab[x] = i;
1132		if (i && (x == 1))
1133			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1134			return -1;
1135		x <<= 1;
1136		if (x & k)
1137			x ^= poly;
1138	}
1139	bch->a_pow_tab[GF_N(bch)] = 1;
1140	bch->a_log_tab[0] = 0;
1141
1142	return 0;
1143}
1144
1145/*
1146 * compute generator polynomial remainder tables for fast encoding
1147 */
1148static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1149{
1150	int i, j, b, d;
1151	uint32_t data, hi, lo, *tab;
1152	const int l = BCH_ECC_WORDS(bch);
1153	const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1154	const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1155
1156	memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1157
1158	for (i = 0; i < 256; i++) {
1159		/* p(X)=i is a small polynomial of weight <= 8 */
1160		for (b = 0; b < 4; b++) {
1161			/* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1162			tab = bch->mod8_tab + (b*256+i)*l;
1163			data = i << (8*b);
1164			while (data) {
1165				d = deg(data);
1166				/* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1167				data ^= g[0] >> (31-d);
1168				for (j = 0; j < ecclen; j++) {
1169					hi = (d < 31) ? g[j] << (d+1) : 0;
1170					lo = (j+1 < plen) ?
1171						g[j+1] >> (31-d) : 0;
1172					tab[j] ^= hi|lo;
1173				}
1174			}
1175		}
1176	}
1177}
1178
1179/*
1180 * build a base for factoring degree 2 polynomials
1181 */
1182static int build_deg2_base(struct bch_control *bch)
1183{
1184	const int m = GF_M(bch);
1185	int i, j, r;
1186	unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M];
1187
1188	/* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1189	for (i = 0; i < m; i++) {
1190		for (j = 0, sum = 0; j < m; j++)
1191			sum ^= a_pow(bch, i*(1 << j));
1192
1193		if (sum) {
1194			ak = bch->a_pow_tab[i];
1195			break;
1196		}
1197	}
1198	/* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1199	remaining = m;
1200	memset(xi, 0, sizeof(xi));
1201
1202	for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1203		y = gf_sqr(bch, x)^x;
1204		for (i = 0; i < 2; i++) {
1205			r = a_log(bch, y);
1206			if (y && (r < m) && !xi[r]) {
1207				bch->xi_tab[r] = x;
1208				xi[r] = 1;
1209				remaining--;
1210				dbg("x%d = %x\n", r, x);
1211				break;
1212			}
1213			y ^= ak;
1214		}
1215	}
1216	/* should not happen but check anyway */
1217	return remaining ? -1 : 0;
1218}
1219
1220static void *bch_alloc(size_t size, int *err)
1221{
1222	void *ptr;
1223
1224	ptr = kmalloc(size, GFP_KERNEL);
1225	if (ptr == NULL)
1226		*err = 1;
1227	return ptr;
1228}
1229
1230/*
1231 * compute generator polynomial for given (m,t) parameters.
1232 */
1233static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1234{
1235	const unsigned int m = GF_M(bch);
1236	const unsigned int t = GF_T(bch);
1237	int n, err = 0;
1238	unsigned int i, j, nbits, r, word, *roots;
1239	struct gf_poly *g;
1240	uint32_t *genpoly;
1241
1242	g = bch_alloc(GF_POLY_SZ(m*t), &err);
1243	roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1244	genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1245
1246	if (err) {
1247		kfree(genpoly);
1248		genpoly = NULL;
1249		goto finish;
1250	}
1251
1252	/* enumerate all roots of g(X) */
1253	memset(roots , 0, (bch->n+1)*sizeof(*roots));
1254	for (i = 0; i < t; i++) {
1255		for (j = 0, r = 2*i+1; j < m; j++) {
1256			roots[r] = 1;
1257			r = mod_s(bch, 2*r);
1258		}
1259	}
1260	/* build generator polynomial g(X) */
1261	g->deg = 0;
1262	g->c[0] = 1;
1263	for (i = 0; i < GF_N(bch); i++) {
1264		if (roots[i]) {
1265			/* multiply g(X) by (X+root) */
1266			r = bch->a_pow_tab[i];
1267			g->c[g->deg+1] = 1;
1268			for (j = g->deg; j > 0; j--)
1269				g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1270
1271			g->c[0] = gf_mul(bch, g->c[0], r);
1272			g->deg++;
1273		}
1274	}
1275	/* store left-justified binary representation of g(X) */
1276	n = g->deg+1;
1277	i = 0;
1278
1279	while (n > 0) {
1280		nbits = (n > 32) ? 32 : n;
1281		for (j = 0, word = 0; j < nbits; j++) {
1282			if (g->c[n-1-j])
1283				word |= 1u << (31-j);
1284		}
1285		genpoly[i++] = word;
1286		n -= nbits;
1287	}
1288	bch->ecc_bits = g->deg;
1289
1290finish:
1291	kfree(g);
1292	kfree(roots);
1293
1294	return genpoly;
1295}
1296
1297/**
1298 * bch_init - initialize a BCH encoder/decoder
1299 * @m:          Galois field order, should be in the range 5-15
1300 * @t:          maximum error correction capability, in bits
1301 * @prim_poly:  user-provided primitive polynomial (or 0 to use default)
1302 * @swap_bits:  swap bits within data and syndrome bytes
1303 *
1304 * Returns:
1305 *  a newly allocated BCH control structure if successful, NULL otherwise
1306 *
1307 * This initialization can take some time, as lookup tables are built for fast
1308 * encoding/decoding; make sure not to call this function from a time critical
1309 * path. Usually, bch_init() should be called on module/driver init and
1310 * bch_free() should be called to release memory on exit.
1311 *
1312 * You may provide your own primitive polynomial of degree @m in argument
1313 * @prim_poly, or let bch_init() use its default polynomial.
1314 *
1315 * Once bch_init() has successfully returned a pointer to a newly allocated
1316 * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1317 * the structure.
1318 */
1319struct bch_control *bch_init(int m, int t, unsigned int prim_poly,
1320			     bool swap_bits)
1321{
1322	int err = 0;
1323	unsigned int i, words;
1324	uint32_t *genpoly;
1325	struct bch_control *bch = NULL;
1326
1327	const int min_m = 5;
 
1328
1329	/* default primitive polynomials */
1330	static const unsigned int prim_poly_tab[] = {
1331		0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1332		0x402b, 0x8003,
1333	};
1334
1335#if defined(CONFIG_BCH_CONST_PARAMS)
1336	if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1337		printk(KERN_ERR "bch encoder/decoder was configured to support "
1338		       "parameters m=%d, t=%d only!\n",
1339		       CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1340		goto fail;
1341	}
1342#endif
1343	if ((m < min_m) || (m > BCH_MAX_M))
1344		/*
1345		 * values of m greater than 15 are not currently supported;
1346		 * supporting m > 15 would require changing table base type
1347		 * (uint16_t) and a small patch in matrix transposition
1348		 */
1349		goto fail;
1350
1351	if (t > BCH_MAX_T)
1352		/*
1353		 * we can support larger than 64 bits if necessary, at the
1354		 * cost of higher stack usage.
1355		 */
1356		goto fail;
1357
1358	/* sanity checks */
1359	if ((t < 1) || (m*t >= ((1 << m)-1)))
1360		/* invalid t value */
1361		goto fail;
1362
1363	/* select a primitive polynomial for generating GF(2^m) */
1364	if (prim_poly == 0)
1365		prim_poly = prim_poly_tab[m-min_m];
1366
1367	bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1368	if (bch == NULL)
1369		goto fail;
1370
1371	bch->m = m;
1372	bch->t = t;
1373	bch->n = (1 << m)-1;
1374	words  = DIV_ROUND_UP(m*t, 32);
1375	bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1376	bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1377	bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1378	bch->mod8_tab  = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1379	bch->ecc_buf   = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1380	bch->ecc_buf2  = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1381	bch->xi_tab    = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1382	bch->syn       = bch_alloc(2*t*sizeof(*bch->syn), &err);
1383	bch->cache     = bch_alloc(2*t*sizeof(*bch->cache), &err);
1384	bch->elp       = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
1385	bch->swap_bits = swap_bits;
1386
1387	for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1388		bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1389
1390	if (err)
1391		goto fail;
1392
1393	err = build_gf_tables(bch, prim_poly);
1394	if (err)
1395		goto fail;
1396
1397	/* use generator polynomial for computing encoding tables */
1398	genpoly = compute_generator_polynomial(bch);
1399	if (genpoly == NULL)
1400		goto fail;
1401
1402	build_mod8_tables(bch, genpoly);
1403	kfree(genpoly);
1404
1405	err = build_deg2_base(bch);
1406	if (err)
1407		goto fail;
1408
1409	return bch;
1410
1411fail:
1412	bch_free(bch);
1413	return NULL;
1414}
1415EXPORT_SYMBOL_GPL(bch_init);
1416
1417/**
1418 *  bch_free - free the BCH control structure
1419 *  @bch:    BCH control structure to release
1420 */
1421void bch_free(struct bch_control *bch)
1422{
1423	unsigned int i;
1424
1425	if (bch) {
1426		kfree(bch->a_pow_tab);
1427		kfree(bch->a_log_tab);
1428		kfree(bch->mod8_tab);
1429		kfree(bch->ecc_buf);
1430		kfree(bch->ecc_buf2);
1431		kfree(bch->xi_tab);
1432		kfree(bch->syn);
1433		kfree(bch->cache);
1434		kfree(bch->elp);
1435
1436		for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1437			kfree(bch->poly_2t[i]);
1438
1439		kfree(bch);
1440	}
1441}
1442EXPORT_SYMBOL_GPL(bch_free);
1443
1444MODULE_LICENSE("GPL");
1445MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1446MODULE_DESCRIPTION("Binary BCH encoder/decoder");