bch.c - lib/bch.c - Linux diff v5.14.15 - Bootlin Elixir Cross Referencer

   1/*
   2 * Generic binary BCH encoding/decoding library
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms of the GNU General Public License version 2 as published by
   6 * the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc., 51
  15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  16 *
  17 * Copyright © 2011 Parrot S.A.
  18 *
  19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
  20 *
  21 * Description:
  22 *
  23 * This library provides runtime configurable encoding/decoding of binary
  24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
  25 *
  26 * Call bch_init to get a pointer to a newly allocated bch_control structure for
  27 * the given m (Galois field order), t (error correction capability) and
  28 * (optional) primitive polynomial parameters.
  29 *
  30 * Call bch_encode to compute and store ecc parity bytes to a given buffer.
  31 * Call bch_decode to detect and locate errors in received data.
  32 *
  33 * On systems supporting hw BCH features, intermediate results may be provided
  34 * to bch_decode in order to skip certain steps. See bch_decode() documentation
  35 * for details.
  36 *
  37 * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
  38 * parameters m and t; thus allowing extra compiler optimizations and providing
  39 * better (up to 2x) encoding performance. Using this option makes sense when
  40 * (m,t) are fixed and known in advance, e.g. when using BCH error correction
  41 * on a particular NAND flash device.
  42 *
  43 * Algorithmic details:
  44 *
  45 * Encoding is performed by processing 32 input bits in parallel, using 4
  46 * remainder lookup tables.
  47 *
  48 * The final stage of decoding involves the following internal steps:
  49 * a. Syndrome computation
  50 * b. Error locator polynomial computation using Berlekamp-Massey algorithm
  51 * c. Error locator root finding (by far the most expensive step)
  52 *
  53 * In this implementation, step c is not performed using the usual Chien search.
  54 * Instead, an alternative approach described in [1] is used. It consists in
  55 * factoring the error locator polynomial using the Berlekamp Trace algorithm
  56 * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
  57 * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
  58 * much better performance than Chien search for usual (m,t) values (typically
  59 * m >= 13, t < 32, see [1]).
  60 *
  61 * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
  62 * of characteristic 2, in: Western European Workshop on Research in Cryptology
  63 * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
  64 * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
  65 * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
  66 */
  67
  68#include <linux/kernel.h>
  69#include <linux/errno.h>
  70#include <linux/init.h>
  71#include <linux/module.h>
  72#include <linux/slab.h>
  73#include <linux/bitops.h>
 
  74#include <asm/byteorder.h>
  75#include <linux/bch.h>
  76
  77#if defined(CONFIG_BCH_CONST_PARAMS)
  78#define GF_M(_p)               (CONFIG_BCH_CONST_M)
  79#define GF_T(_p)               (CONFIG_BCH_CONST_T)
  80#define GF_N(_p)               ((1 << (CONFIG_BCH_CONST_M))-1)
  81#define BCH_MAX_M              (CONFIG_BCH_CONST_M)
  82#define BCH_MAX_T	       (CONFIG_BCH_CONST_T)
  83#else
  84#define GF_M(_p)               ((_p)->m)
  85#define GF_T(_p)               ((_p)->t)
  86#define GF_N(_p)               ((_p)->n)
  87#define BCH_MAX_M              15 /* 2KB */
  88#define BCH_MAX_T              64 /* 64 bit correction */
  89#endif
  90
  91#define BCH_ECC_WORDS(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
  92#define BCH_ECC_BYTES(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
  93
  94#define BCH_ECC_MAX_WORDS      DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
  95
  96#ifndef dbg
  97#define dbg(_fmt, args...)     do {} while (0)
  98#endif
  99
 100/*
 101 * represent a polynomial over GF(2^m)
 102 */
 103struct gf_poly {
 104	unsigned int deg;    /* polynomial degree */
 105	unsigned int c[];   /* polynomial terms */
 106};
 107
 108/* given its degree, compute a polynomial size in bytes */
 109#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
 110
 111/* polynomial of degree 1 */
 112struct gf_poly_deg1 {
 113	struct gf_poly poly;
 114	unsigned int   c[2];
 115};
 116
 117static u8 swap_bits_table[] = {
 118	0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
 119	0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
 120	0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
 121	0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
 122	0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
 123	0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
 124	0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
 125	0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
 126	0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
 127	0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
 128	0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
 129	0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
 130	0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
 131	0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
 132	0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
 133	0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
 134	0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
 135	0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
 136	0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
 137	0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
 138	0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
 139	0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
 140	0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
 141	0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
 142	0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
 143	0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
 144	0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
 145	0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
 146	0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
 147	0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
 148	0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
 149	0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
 150};
 151
 152static u8 swap_bits(struct bch_control *bch, u8 in)
 153{
 154	if (!bch->swap_bits)
 155		return in;
 156
 157	return swap_bits_table[in];
 158}
 159
 160/*
 161 * same as bch_encode(), but process input data one byte at a time
 162 */
 163static void bch_encode_unaligned(struct bch_control *bch,
 164				 const unsigned char *data, unsigned int len,
 165				 uint32_t *ecc)
 166{
 167	int i;
 168	const uint32_t *p;
 169	const int l = BCH_ECC_WORDS(bch)-1;
 170
 171	while (len--) {
 172		u8 tmp = swap_bits(bch, *data++);
 173
 174		p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff);
 175
 176		for (i = 0; i < l; i++)
 177			ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
 178
 179		ecc[l] = (ecc[l] << 8)^(*p);
 180	}
 181}
 182
 183/*
 184 * convert ecc bytes to aligned, zero-padded 32-bit ecc words
 185 */
 186static void load_ecc8(struct bch_control *bch, uint32_t *dst,
 187		      const uint8_t *src)
 188{
 189	uint8_t pad[4] = {0, 0, 0, 0};
 190	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 191
 192	for (i = 0; i < nwords; i++, src += 4)
 193		dst[i] = ((u32)swap_bits(bch, src[0]) << 24) |
 194			((u32)swap_bits(bch, src[1]) << 16) |
 195			((u32)swap_bits(bch, src[2]) << 8) |
 196			swap_bits(bch, src[3]);
 197
 198	memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
 199	dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) |
 200		((u32)swap_bits(bch, pad[1]) << 16) |
 201		((u32)swap_bits(bch, pad[2]) << 8) |
 202		swap_bits(bch, pad[3]);
 203}
 204
 205/*
 206 * convert 32-bit ecc words to ecc bytes
 207 */
 208static void store_ecc8(struct bch_control *bch, uint8_t *dst,
 209		       const uint32_t *src)
 210{
 211	uint8_t pad[4];
 212	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 213
 214	for (i = 0; i < nwords; i++) {
 215		*dst++ = swap_bits(bch, src[i] >> 24);
 216		*dst++ = swap_bits(bch, src[i] >> 16);
 217		*dst++ = swap_bits(bch, src[i] >> 8);
 218		*dst++ = swap_bits(bch, src[i]);
 219	}
 220	pad[0] = swap_bits(bch, src[nwords] >> 24);
 221	pad[1] = swap_bits(bch, src[nwords] >> 16);
 222	pad[2] = swap_bits(bch, src[nwords] >> 8);
 223	pad[3] = swap_bits(bch, src[nwords]);
 224	memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
 225}
 226
 227/**
 228 * bch_encode - calculate BCH ecc parity of data
 229 * @bch:   BCH control structure
 230 * @data:  data to encode
 231 * @len:   data length in bytes
 232 * @ecc:   ecc parity data, must be initialized by caller
 233 *
 234 * The @ecc parity array is used both as input and output parameter, in order to
 235 * allow incremental computations. It should be of the size indicated by member
 236 * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
 237 *
 238 * The exact number of computed ecc parity bits is given by member @ecc_bits of
 239 * @bch; it may be less than m*t for large values of t.
 240 */
 241void bch_encode(struct bch_control *bch, const uint8_t *data,
 242		unsigned int len, uint8_t *ecc)
 243{
 244	const unsigned int l = BCH_ECC_WORDS(bch)-1;
 245	unsigned int i, mlen;
 246	unsigned long m;
 247	uint32_t w, r[BCH_ECC_MAX_WORDS];
 248	const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
 249	const uint32_t * const tab0 = bch->mod8_tab;
 250	const uint32_t * const tab1 = tab0 + 256*(l+1);
 251	const uint32_t * const tab2 = tab1 + 256*(l+1);
 252	const uint32_t * const tab3 = tab2 + 256*(l+1);
 253	const uint32_t *pdata, *p0, *p1, *p2, *p3;
 254
 255	if (WARN_ON(r_bytes > sizeof(r)))
 256		return;
 257
 258	if (ecc) {
 259		/* load ecc parity bytes into internal 32-bit buffer */
 260		load_ecc8(bch, bch->ecc_buf, ecc);
 261	} else {
 262		memset(bch->ecc_buf, 0, r_bytes);
 263	}
 264
 265	/* process first unaligned data bytes */
 266	m = ((unsigned long)data) & 3;
 267	if (m) {
 268		mlen = (len < (4-m)) ? len : 4-m;
 269		bch_encode_unaligned(bch, data, mlen, bch->ecc_buf);
 270		data += mlen;
 271		len  -= mlen;
 272	}
 273
 274	/* process 32-bit aligned data words */
 275	pdata = (uint32_t *)data;
 276	mlen  = len/4;
 277	data += 4*mlen;
 278	len  -= 4*mlen;
 279	memcpy(r, bch->ecc_buf, r_bytes);
 280
 281	/*
 282	 * split each 32-bit word into 4 polynomials of weight 8 as follows:
 283	 *
 284	 * 31 ...24  23 ...16  15 ... 8  7 ... 0
 285	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt
 286	 *                               tttttttt  mod g = r0 (precomputed)
 287	 *                     zzzzzzzz  00000000  mod g = r1 (precomputed)
 288	 *           yyyyyyyy  00000000  00000000  mod g = r2 (precomputed)
 289	 * xxxxxxxx  00000000  00000000  00000000  mod g = r3 (precomputed)
 290	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt  mod g = r0^r1^r2^r3
 291	 */
 292	while (mlen--) {
 293		/* input data is read in big-endian format */
 294		w = cpu_to_be32(*pdata++);
 295		if (bch->swap_bits)
 296			w = (u32)swap_bits(bch, w) |
 297			    ((u32)swap_bits(bch, w >> 8) << 8) |
 298			    ((u32)swap_bits(bch, w >> 16) << 16) |
 299			    ((u32)swap_bits(bch, w >> 24) << 24);
 300		w ^= r[0];
 301		p0 = tab0 + (l+1)*((w >>  0) & 0xff);
 302		p1 = tab1 + (l+1)*((w >>  8) & 0xff);
 303		p2 = tab2 + (l+1)*((w >> 16) & 0xff);
 304		p3 = tab3 + (l+1)*((w >> 24) & 0xff);
 305
 306		for (i = 0; i < l; i++)
 307			r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
 308
 309		r[l] = p0[l]^p1[l]^p2[l]^p3[l];
 310	}
 311	memcpy(bch->ecc_buf, r, r_bytes);
 312
 313	/* process last unaligned bytes */
 314	if (len)
 315		bch_encode_unaligned(bch, data, len, bch->ecc_buf);
 316
 317	/* store ecc parity bytes into original parity buffer */
 318	if (ecc)
 319		store_ecc8(bch, ecc, bch->ecc_buf);
 320}
 321EXPORT_SYMBOL_GPL(bch_encode);
 322
 323static inline int modulo(struct bch_control *bch, unsigned int v)
 324{
 325	const unsigned int n = GF_N(bch);
 326	while (v >= n) {
 327		v -= n;
 328		v = (v & n) + (v >> GF_M(bch));
 329	}
 330	return v;
 331}
 332
 333/*
 334 * shorter and faster modulo function, only works when v < 2N.
 335 */
 336static inline int mod_s(struct bch_control *bch, unsigned int v)
 337{
 338	const unsigned int n = GF_N(bch);
 339	return (v < n) ? v : v-n;
 340}
 341
 342static inline int deg(unsigned int poly)
 343{
 344	/* polynomial degree is the most-significant bit index */
 345	return fls(poly)-1;
 346}
 347
 348static inline int parity(unsigned int x)
 349{
 350	/*
 351	 * public domain code snippet, lifted from
 352	 * http://www-graphics.stanford.edu/~seander/bithacks.html
 353	 */
 354	x ^= x >> 1;
 355	x ^= x >> 2;
 356	x = (x & 0x11111111U) * 0x11111111U;
 357	return (x >> 28) & 1;
 358}
 359
 360/* Galois field basic operations: multiply, divide, inverse, etc. */
 361
 362static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
 363				  unsigned int b)
 364{
 365	return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 366					       bch->a_log_tab[b])] : 0;
 367}
 368
 369static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
 370{
 371	return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
 372}
 373
 374static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
 375				  unsigned int b)
 376{
 377	return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 378					GF_N(bch)-bch->a_log_tab[b])] : 0;
 379}
 380
 381static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
 382{
 383	return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
 384}
 385
 386static inline unsigned int a_pow(struct bch_control *bch, int i)
 387{
 388	return bch->a_pow_tab[modulo(bch, i)];
 389}
 390
 391static inline int a_log(struct bch_control *bch, unsigned int x)
 392{
 393	return bch->a_log_tab[x];
 394}
 395
 396static inline int a_ilog(struct bch_control *bch, unsigned int x)
 397{
 398	return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
 399}
 400
 401/*
 402 * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
 403 */
 404static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
 405			      unsigned int *syn)
 406{
 407	int i, j, s;
 408	unsigned int m;
 409	uint32_t poly;
 410	const int t = GF_T(bch);
 411
 412	s = bch->ecc_bits;
 413
 414	/* make sure extra bits in last ecc word are cleared */
 415	m = ((unsigned int)s) & 31;
 416	if (m)
 417		ecc[s/32] &= ~((1u << (32-m))-1);
 418	memset(syn, 0, 2*t*sizeof(*syn));
 419
 420	/* compute v(a^j) for j=1 .. 2t-1 */
 421	do {
 422		poly = *ecc++;
 423		s -= 32;
 424		while (poly) {
 425			i = deg(poly);
 426			for (j = 0; j < 2*t; j += 2)
 427				syn[j] ^= a_pow(bch, (j+1)*(i+s));
 428
 429			poly ^= (1 << i);
 430		}
 431	} while (s > 0);
 432
 433	/* v(a^(2j)) = v(a^j)^2 */
 434	for (j = 0; j < t; j++)
 435		syn[2*j+1] = gf_sqr(bch, syn[j]);
 436}
 437
 438static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
 439{
 440	memcpy(dst, src, GF_POLY_SZ(src->deg));
 441}
 442
 443static int compute_error_locator_polynomial(struct bch_control *bch,
 444					    const unsigned int *syn)
 445{
 446	const unsigned int t = GF_T(bch);
 447	const unsigned int n = GF_N(bch);
 448	unsigned int i, j, tmp, l, pd = 1, d = syn[0];
 449	struct gf_poly *elp = bch->elp;
 450	struct gf_poly *pelp = bch->poly_2t[0];
 451	struct gf_poly *elp_copy = bch->poly_2t[1];
 452	int k, pp = -1;
 453
 454	memset(pelp, 0, GF_POLY_SZ(2*t));
 455	memset(elp, 0, GF_POLY_SZ(2*t));
 456
 457	pelp->deg = 0;
 458	pelp->c[0] = 1;
 459	elp->deg = 0;
 460	elp->c[0] = 1;
 461
 462	/* use simplified binary Berlekamp-Massey algorithm */
 463	for (i = 0; (i < t) && (elp->deg <= t); i++) {
 464		if (d) {
 465			k = 2*i-pp;
 466			gf_poly_copy(elp_copy, elp);
 467			/* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
 468			tmp = a_log(bch, d)+n-a_log(bch, pd);
 469			for (j = 0; j <= pelp->deg; j++) {
 470				if (pelp->c[j]) {
 471					l = a_log(bch, pelp->c[j]);
 472					elp->c[j+k] ^= a_pow(bch, tmp+l);
 473				}
 474			}
 475			/* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
 476			tmp = pelp->deg+k;
 477			if (tmp > elp->deg) {
 478				elp->deg = tmp;
 479				gf_poly_copy(pelp, elp_copy);
 480				pd = d;
 481				pp = 2*i;
 482			}
 483		}
 484		/* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
 485		if (i < t-1) {
 486			d = syn[2*i+2];
 487			for (j = 1; j <= elp->deg; j++)
 488				d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
 489		}
 490	}
 491	dbg("elp=%s\n", gf_poly_str(elp));
 492	return (elp->deg > t) ? -1 : (int)elp->deg;
 493}
 494
 495/*
 496 * solve a m x m linear system in GF(2) with an expected number of solutions,
 497 * and return the number of found solutions
 498 */
 499static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
 500			       unsigned int *sol, int nsol)
 501{
 502	const int m = GF_M(bch);
 503	unsigned int tmp, mask;
 504	int rem, c, r, p, k, param[BCH_MAX_M];
 505
 506	k = 0;
 507	mask = 1 << m;
 508
 509	/* Gaussian elimination */
 510	for (c = 0; c < m; c++) {
 511		rem = 0;
 512		p = c-k;
 513		/* find suitable row for elimination */
 514		for (r = p; r < m; r++) {
 515			if (rows[r] & mask) {
 516				if (r != p) {
 517					tmp = rows[r];
 518					rows[r] = rows[p];
 519					rows[p] = tmp;
 520				}
 521				rem = r+1;
 522				break;
 523			}
 524		}
 525		if (rem) {
 526			/* perform elimination on remaining rows */
 527			tmp = rows[p];
 528			for (r = rem; r < m; r++) {
 529				if (rows[r] & mask)
 530					rows[r] ^= tmp;
 531			}
 532		} else {
 533			/* elimination not needed, store defective row index */
 534			param[k++] = c;
 535		}
 536		mask >>= 1;
 537	}
 538	/* rewrite system, inserting fake parameter rows */
 539	if (k > 0) {
 540		p = k;
 541		for (r = m-1; r >= 0; r--) {
 542			if ((r > m-1-k) && rows[r])
 543				/* system has no solution */
 544				return 0;
 545
 546			rows[r] = (p && (r == param[p-1])) ?
 547				p--, 1u << (m-r) : rows[r-p];
 548		}
 549	}
 550
 551	if (nsol != (1 << k))
 552		/* unexpected number of solutions */
 553		return 0;
 554
 555	for (p = 0; p < nsol; p++) {
 556		/* set parameters for p-th solution */
 557		for (c = 0; c < k; c++)
 558			rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
 559
 560		/* compute unique solution */
 561		tmp = 0;
 562		for (r = m-1; r >= 0; r--) {
 563			mask = rows[r] & (tmp|1);
 564			tmp |= parity(mask) << (m-r);
 565		}
 566		sol[p] = tmp >> 1;
 567	}
 568	return nsol;
 569}
 570
 571/*
 572 * this function builds and solves a linear system for finding roots of a degree
 573 * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
 574 */
 575static int find_affine4_roots(struct bch_control *bch, unsigned int a,
 576			      unsigned int b, unsigned int c,
 577			      unsigned int *roots)
 578{
 579	int i, j, k;
 580	const int m = GF_M(bch);
 581	unsigned int mask = 0xff, t, rows[16] = {0,};
 582
 583	j = a_log(bch, b);
 584	k = a_log(bch, a);
 585	rows[0] = c;
 586
 587	/* build linear system to solve X^4+aX^2+bX+c = 0 */
 588	for (i = 0; i < m; i++) {
 589		rows[i+1] = bch->a_pow_tab[4*i]^
 590			(a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
 591			(b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
 592		j++;
 593		k += 2;
 594	}
 595	/*
 596	 * transpose 16x16 matrix before passing it to linear solver
 597	 * warning: this code assumes m < 16
 598	 */
 599	for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
 600		for (k = 0; k < 16; k = (k+j+1) & ~j) {
 601			t = ((rows[k] >> j)^rows[k+j]) & mask;
 602			rows[k] ^= (t << j);
 603			rows[k+j] ^= t;
 604		}
 605	}
 606	return solve_linear_system(bch, rows, roots, 4);
 607}
 608
 609/*
 610 * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
 611 */
 612static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
 613				unsigned int *roots)
 614{
 615	int n = 0;
 616
 617	if (poly->c[0])
 618		/* poly[X] = bX+c with c!=0, root=c/b */
 619		roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
 620				   bch->a_log_tab[poly->c[1]]);
 621	return n;
 622}
 623
 624/*
 625 * compute roots of a degree 2 polynomial over GF(2^m)
 626 */
 627static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
 628				unsigned int *roots)
 629{
 630	int n = 0, i, l0, l1, l2;
 631	unsigned int u, v, r;
 632
 633	if (poly->c[0] && poly->c[1]) {
 634
 635		l0 = bch->a_log_tab[poly->c[0]];
 636		l1 = bch->a_log_tab[poly->c[1]];
 637		l2 = bch->a_log_tab[poly->c[2]];
 638
 639		/* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
 640		u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
 641		/*
 642		 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
 643		 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
 644		 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
 645		 * i.e. r and r+1 are roots iff Tr(u)=0
 646		 */
 647		r = 0;
 648		v = u;
 649		while (v) {
 650			i = deg(v);
 651			r ^= bch->xi_tab[i];
 652			v ^= (1 << i);
 653		}
 654		/* verify root */
 655		if ((gf_sqr(bch, r)^r) == u) {
 656			/* reverse z=a/bX transformation and compute log(1/r) */
 657			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 658					    bch->a_log_tab[r]+l2);
 659			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 660					    bch->a_log_tab[r^1]+l2);
 661		}
 662	}
 663	return n;
 664}
 665
 666/*
 667 * compute roots of a degree 3 polynomial over GF(2^m)
 668 */
 669static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
 670				unsigned int *roots)
 671{
 672	int i, n = 0;
 673	unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
 674
 675	if (poly->c[0]) {
 676		/* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
 677		e3 = poly->c[3];
 678		c2 = gf_div(bch, poly->c[0], e3);
 679		b2 = gf_div(bch, poly->c[1], e3);
 680		a2 = gf_div(bch, poly->c[2], e3);
 681
 682		/* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
 683		c = gf_mul(bch, a2, c2);           /* c = a2c2      */
 684		b = gf_mul(bch, a2, b2)^c2;        /* b = a2b2 + c2 */
 685		a = gf_sqr(bch, a2)^b2;            /* a = a2^2 + b2 */
 686
 687		/* find the 4 roots of this affine polynomial */
 688		if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
 689			/* remove a2 from final list of roots */
 690			for (i = 0; i < 4; i++) {
 691				if (tmp[i] != a2)
 692					roots[n++] = a_ilog(bch, tmp[i]);
 693			}
 694		}
 695	}
 696	return n;
 697}
 698
 699/*
 700 * compute roots of a degree 4 polynomial over GF(2^m)
 701 */
 702static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
 703				unsigned int *roots)
 704{
 705	int i, l, n = 0;
 706	unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
 707
 708	if (poly->c[0] == 0)
 709		return 0;
 710
 711	/* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
 712	e4 = poly->c[4];
 713	d = gf_div(bch, poly->c[0], e4);
 714	c = gf_div(bch, poly->c[1], e4);
 715	b = gf_div(bch, poly->c[2], e4);
 716	a = gf_div(bch, poly->c[3], e4);
 717
 718	/* use Y=1/X transformation to get an affine polynomial */
 719	if (a) {
 720		/* first, eliminate cX by using z=X+e with ae^2+c=0 */
 721		if (c) {
 722			/* compute e such that e^2 = c/a */
 723			f = gf_div(bch, c, a);
 724			l = a_log(bch, f);
 725			l += (l & 1) ? GF_N(bch) : 0;
 726			e = a_pow(bch, l/2);
 727			/*
 728			 * use transformation z=X+e:
 729			 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
 730			 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
 731			 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
 732			 * z^4 + az^3 +     b'z^2 + d'
 733			 */
 734			d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
 735			b = gf_mul(bch, a, e)^b;
 736		}
 737		/* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
 738		if (d == 0)
 739			/* assume all roots have multiplicity 1 */
 740			return 0;
 741
 742		c2 = gf_inv(bch, d);
 743		b2 = gf_div(bch, a, d);
 744		a2 = gf_div(bch, b, d);
 745	} else {
 746		/* polynomial is already affine */
 747		c2 = d;
 748		b2 = c;
 749		a2 = b;
 750	}
 751	/* find the 4 roots of this affine polynomial */
 752	if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
 753		for (i = 0; i < 4; i++) {
 754			/* post-process roots (reverse transformations) */
 755			f = a ? gf_inv(bch, roots[i]) : roots[i];
 756			roots[i] = a_ilog(bch, f^e);
 757		}
 758		n = 4;
 759	}
 760	return n;
 761}
 762
 763/*
 764 * build monic, log-based representation of a polynomial
 765 */
 766static void gf_poly_logrep(struct bch_control *bch,
 767			   const struct gf_poly *a, int *rep)
 768{
 769	int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
 770
 771	/* represent 0 values with -1; warning, rep[d] is not set to 1 */
 772	for (i = 0; i < d; i++)
 773		rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
 774}
 775
 776/*
 777 * compute polynomial Euclidean division remainder in GF(2^m)[X]
 778 */
 779static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
 780			const struct gf_poly *b, int *rep)
 781{
 782	int la, p, m;
 783	unsigned int i, j, *c = a->c;
 784	const unsigned int d = b->deg;
 785
 786	if (a->deg < d)
 787		return;
 788
 789	/* reuse or compute log representation of denominator */
 790	if (!rep) {
 791		rep = bch->cache;
 792		gf_poly_logrep(bch, b, rep);
 793	}
 794
 795	for (j = a->deg; j >= d; j--) {
 796		if (c[j]) {
 797			la = a_log(bch, c[j]);
 798			p = j-d;
 799			for (i = 0; i < d; i++, p++) {
 800				m = rep[i];
 801				if (m >= 0)
 802					c[p] ^= bch->a_pow_tab[mod_s(bch,
 803								     m+la)];
 804			}
 805		}
 806	}
 807	a->deg = d-1;
 808	while (!c[a->deg] && a->deg)
 809		a->deg--;
 810}
 811
 812/*
 813 * compute polynomial Euclidean division quotient in GF(2^m)[X]
 814 */
 815static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
 816			const struct gf_poly *b, struct gf_poly *q)
 817{
 818	if (a->deg >= b->deg) {
 819		q->deg = a->deg-b->deg;
 820		/* compute a mod b (modifies a) */
 821		gf_poly_mod(bch, a, b, NULL);
 822		/* quotient is stored in upper part of polynomial a */
 823		memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
 824	} else {
 825		q->deg = 0;
 826		q->c[0] = 0;
 827	}
 828}
 829
 830/*
 831 * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
 832 */
 833static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
 834				   struct gf_poly *b)
 835{
 836	struct gf_poly *tmp;
 837
 838	dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
 839
 840	if (a->deg < b->deg) {
 841		tmp = b;
 842		b = a;
 843		a = tmp;
 844	}
 845
 846	while (b->deg > 0) {
 847		gf_poly_mod(bch, a, b, NULL);
 848		tmp = b;
 849		b = a;
 850		a = tmp;
 851	}
 852
 853	dbg("%s\n", gf_poly_str(a));
 854
 855	return a;
 856}
 857
 858/*
 859 * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
 860 * This is used in Berlekamp Trace algorithm for splitting polynomials
 861 */
 862static void compute_trace_bk_mod(struct bch_control *bch, int k,
 863				 const struct gf_poly *f, struct gf_poly *z,
 864				 struct gf_poly *out)
 865{
 866	const int m = GF_M(bch);
 867	int i, j;
 868
 869	/* z contains z^2j mod f */
 870	z->deg = 1;
 871	z->c[0] = 0;
 872	z->c[1] = bch->a_pow_tab[k];
 873
 874	out->deg = 0;
 875	memset(out, 0, GF_POLY_SZ(f->deg));
 876
 877	/* compute f log representation only once */
 878	gf_poly_logrep(bch, f, bch->cache);
 879
 880	for (i = 0; i < m; i++) {
 881		/* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
 882		for (j = z->deg; j >= 0; j--) {
 883			out->c[j] ^= z->c[j];
 884			z->c[2*j] = gf_sqr(bch, z->c[j]);
 885			z->c[2*j+1] = 0;
 886		}
 887		if (z->deg > out->deg)
 888			out->deg = z->deg;
 889
 890		if (i < m-1) {
 891			z->deg *= 2;
 892			/* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
 893			gf_poly_mod(bch, z, f, bch->cache);
 894		}
 895	}
 896	while (!out->c[out->deg] && out->deg)
 897		out->deg--;
 898
 899	dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
 900}
 901
 902/*
 903 * factor a polynomial using Berlekamp Trace algorithm (BTA)
 904 */
 905static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
 906			      struct gf_poly **g, struct gf_poly **h)
 907{
 908	struct gf_poly *f2 = bch->poly_2t[0];
 909	struct gf_poly *q  = bch->poly_2t[1];
 910	struct gf_poly *tk = bch->poly_2t[2];
 911	struct gf_poly *z  = bch->poly_2t[3];
 912	struct gf_poly *gcd;
 913
 914	dbg("factoring %s...\n", gf_poly_str(f));
 915
 916	*g = f;
 917	*h = NULL;
 918
 919	/* tk = Tr(a^k.X) mod f */
 920	compute_trace_bk_mod(bch, k, f, z, tk);
 921
 922	if (tk->deg > 0) {
 923		/* compute g = gcd(f, tk) (destructive operation) */
 924		gf_poly_copy(f2, f);
 925		gcd = gf_poly_gcd(bch, f2, tk);
 926		if (gcd->deg < f->deg) {
 927			/* compute h=f/gcd(f,tk); this will modify f and q */
 928			gf_poly_div(bch, f, gcd, q);
 929			/* store g and h in-place (clobbering f) */
 930			*h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
 931			gf_poly_copy(*g, gcd);
 932			gf_poly_copy(*h, q);
 933		}
 934	}
 935}
 936
 937/*
 938 * find roots of a polynomial, using BTZ algorithm; see the beginning of this
 939 * file for details
 940 */
 941static int find_poly_roots(struct bch_control *bch, unsigned int k,
 942			   struct gf_poly *poly, unsigned int *roots)
 943{
 944	int cnt;
 945	struct gf_poly *f1, *f2;
 946
 947	switch (poly->deg) {
 948		/* handle low degree polynomials with ad hoc techniques */
 949	case 1:
 950		cnt = find_poly_deg1_roots(bch, poly, roots);
 951		break;
 952	case 2:
 953		cnt = find_poly_deg2_roots(bch, poly, roots);
 954		break;
 955	case 3:
 956		cnt = find_poly_deg3_roots(bch, poly, roots);
 957		break;
 958	case 4:
 959		cnt = find_poly_deg4_roots(bch, poly, roots);
 960		break;
 961	default:
 962		/* factor polynomial using Berlekamp Trace Algorithm (BTA) */
 963		cnt = 0;
 964		if (poly->deg && (k <= GF_M(bch))) {
 965			factor_polynomial(bch, k, poly, &f1, &f2);
 966			if (f1)
 967				cnt += find_poly_roots(bch, k+1, f1, roots);
 968			if (f2)
 969				cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
 970		}
 971		break;
 972	}
 973	return cnt;
 974}
 975
 976#if defined(USE_CHIEN_SEARCH)
 977/*
 978 * exhaustive root search (Chien) implementation - not used, included only for
 979 * reference/comparison tests
 980 */
 981static int chien_search(struct bch_control *bch, unsigned int len,
 982			struct gf_poly *p, unsigned int *roots)
 983{
 984	int m;
 985	unsigned int i, j, syn, syn0, count = 0;
 986	const unsigned int k = 8*len+bch->ecc_bits;
 987
 988	/* use a log-based representation of polynomial */
 989	gf_poly_logrep(bch, p, bch->cache);
 990	bch->cache[p->deg] = 0;
 991	syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
 992
 993	for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
 994		/* compute elp(a^i) */
 995		for (j = 1, syn = syn0; j <= p->deg; j++) {
 996			m = bch->cache[j];
 997			if (m >= 0)
 998				syn ^= a_pow(bch, m+j*i);
 999		}
1000		if (syn == 0) {
1001			roots[count++] = GF_N(bch)-i;
1002			if (count == p->deg)
1003				break;
1004		}
1005	}
1006	return (count == p->deg) ? count : 0;
1007}
1008#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
1009#endif /* USE_CHIEN_SEARCH */
1010
1011/**
1012 * bch_decode - decode received codeword and find bit error locations
1013 * @bch:      BCH control structure
1014 * @data:     received data, ignored if @calc_ecc is provided
1015 * @len:      data length in bytes, must always be provided
1016 * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
1017 * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
1018 * @syn:      hw computed syndrome data (if NULL, syndrome is calculated)
1019 * @errloc:   output array of error locations
1020 *
1021 * Returns:
1022 *  The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
1023 *  invalid parameters were provided
1024 *
1025 * Depending on the available hw BCH support and the need to compute @calc_ecc
1026 * separately (using bch_encode()), this function should be called with one of
1027 * the following parameter configurations -
1028 *
1029 * by providing @data and @recv_ecc only:
1030 *   bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
1031 *
1032 * by providing @recv_ecc and @calc_ecc:
1033 *   bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
1034 *
1035 * by providing ecc = recv_ecc XOR calc_ecc:
1036 *   bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
1037 *
1038 * by providing syndrome results @syn:
1039 *   bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
1040 *
1041 * Once bch_decode() has successfully returned with a positive value, error
1042 * locations returned in array @errloc should be interpreted as follows -
1043 *
1044 * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1045 * data correction)
1046 *
1047 * if (errloc[n] < 8*len), then n-th error is located in data and can be
1048 * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1049 *
1050 * Note that this function does not perform any data correction by itself, it
1051 * merely indicates error locations.
1052 */
1053int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len,
1054	       const uint8_t *recv_ecc, const uint8_t *calc_ecc,
1055	       const unsigned int *syn, unsigned int *errloc)
1056{
1057	const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1058	unsigned int nbits;
1059	int i, err, nroots;
1060	uint32_t sum;
1061
1062	/* sanity check: make sure data length can be handled */
1063	if (8*len > (bch->n-bch->ecc_bits))
1064		return -EINVAL;
1065
1066	/* if caller does not provide syndromes, compute them */
1067	if (!syn) {
1068		if (!calc_ecc) {
1069			/* compute received data ecc into an internal buffer */
1070			if (!data || !recv_ecc)
1071				return -EINVAL;
1072			bch_encode(bch, data, len, NULL);
1073		} else {
1074			/* load provided calculated ecc */
1075			load_ecc8(bch, bch->ecc_buf, calc_ecc);
1076		}
1077		/* load received ecc or assume it was XORed in calc_ecc */
1078		if (recv_ecc) {
1079			load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1080			/* XOR received and calculated ecc */
1081			for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1082				bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1083				sum |= bch->ecc_buf[i];
1084			}
1085			if (!sum)
1086				/* no error found */
1087				return 0;
1088		}
1089		compute_syndromes(bch, bch->ecc_buf, bch->syn);
1090		syn = bch->syn;
1091	}
1092
1093	err = compute_error_locator_polynomial(bch, syn);
1094	if (err > 0) {
1095		nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1096		if (err != nroots)
1097			err = -1;
1098	}
1099	if (err > 0) {
1100		/* post-process raw error locations for easier correction */
1101		nbits = (len*8)+bch->ecc_bits;
1102		for (i = 0; i < err; i++) {
1103			if (errloc[i] >= nbits) {
1104				err = -1;
1105				break;
1106			}
1107			errloc[i] = nbits-1-errloc[i];
1108			if (!bch->swap_bits)
1109				errloc[i] = (errloc[i] & ~7) |
1110					    (7-(errloc[i] & 7));
1111		}
1112	}
1113	return (err >= 0) ? err : -EBADMSG;
1114}
1115EXPORT_SYMBOL_GPL(bch_decode);
1116
1117/*
1118 * generate Galois field lookup tables
1119 */
1120static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1121{
1122	unsigned int i, x = 1;
1123	const unsigned int k = 1 << deg(poly);
1124
1125	/* primitive polynomial must be of degree m */
1126	if (k != (1u << GF_M(bch)))
1127		return -1;
1128
1129	for (i = 0; i < GF_N(bch); i++) {
1130		bch->a_pow_tab[i] = x;
1131		bch->a_log_tab[x] = i;
1132		if (i && (x == 1))
1133			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1134			return -1;
1135		x <<= 1;
1136		if (x & k)
1137			x ^= poly;
1138	}
1139	bch->a_pow_tab[GF_N(bch)] = 1;
1140	bch->a_log_tab[0] = 0;
1141
1142	return 0;
1143}
1144
1145/*
1146 * compute generator polynomial remainder tables for fast encoding
1147 */
1148static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1149{
1150	int i, j, b, d;
1151	uint32_t data, hi, lo, *tab;
1152	const int l = BCH_ECC_WORDS(bch);
1153	const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1154	const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1155
1156	memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1157
1158	for (i = 0; i < 256; i++) {
1159		/* p(X)=i is a small polynomial of weight <= 8 */
1160		for (b = 0; b < 4; b++) {
1161			/* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1162			tab = bch->mod8_tab + (b*256+i)*l;
1163			data = i << (8*b);
1164			while (data) {
1165				d = deg(data);
1166				/* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1167				data ^= g[0] >> (31-d);
1168				for (j = 0; j < ecclen; j++) {
1169					hi = (d < 31) ? g[j] << (d+1) : 0;
1170					lo = (j+1 < plen) ?
1171						g[j+1] >> (31-d) : 0;
1172					tab[j] ^= hi|lo;
1173				}
1174			}
1175		}
1176	}
1177}
1178
1179/*
1180 * build a base for factoring degree 2 polynomials
1181 */
1182static int build_deg2_base(struct bch_control *bch)
1183{
1184	const int m = GF_M(bch);
1185	int i, j, r;
1186	unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M];
1187
1188	/* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1189	for (i = 0; i < m; i++) {
1190		for (j = 0, sum = 0; j < m; j++)
1191			sum ^= a_pow(bch, i*(1 << j));
1192
1193		if (sum) {
1194			ak = bch->a_pow_tab[i];
1195			break;
1196		}
1197	}
1198	/* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1199	remaining = m;
1200	memset(xi, 0, sizeof(xi));
1201
1202	for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1203		y = gf_sqr(bch, x)^x;
1204		for (i = 0; i < 2; i++) {
1205			r = a_log(bch, y);
1206			if (y && (r < m) && !xi[r]) {
1207				bch->xi_tab[r] = x;
1208				xi[r] = 1;
1209				remaining--;
1210				dbg("x%d = %x\n", r, x);
1211				break;
1212			}
1213			y ^= ak;
1214		}
1215	}
1216	/* should not happen but check anyway */
1217	return remaining ? -1 : 0;
1218}
1219
1220static void *bch_alloc(size_t size, int *err)
1221{
1222	void *ptr;
1223
1224	ptr = kmalloc(size, GFP_KERNEL);
1225	if (ptr == NULL)
1226		*err = 1;
1227	return ptr;
1228}
1229
1230/*
1231 * compute generator polynomial for given (m,t) parameters.
1232 */
1233static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1234{
1235	const unsigned int m = GF_M(bch);
1236	const unsigned int t = GF_T(bch);
1237	int n, err = 0;
1238	unsigned int i, j, nbits, r, word, *roots;
1239	struct gf_poly *g;
1240	uint32_t *genpoly;
1241
1242	g = bch_alloc(GF_POLY_SZ(m*t), &err);
1243	roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1244	genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1245
1246	if (err) {
1247		kfree(genpoly);
1248		genpoly = NULL;
1249		goto finish;
1250	}
1251
1252	/* enumerate all roots of g(X) */
1253	memset(roots , 0, (bch->n+1)*sizeof(*roots));
1254	for (i = 0; i < t; i++) {
1255		for (j = 0, r = 2*i+1; j < m; j++) {
1256			roots[r] = 1;
1257			r = mod_s(bch, 2*r);
1258		}
1259	}
1260	/* build generator polynomial g(X) */
1261	g->deg = 0;
1262	g->c[0] = 1;
1263	for (i = 0; i < GF_N(bch); i++) {
1264		if (roots[i]) {
1265			/* multiply g(X) by (X+root) */
1266			r = bch->a_pow_tab[i];
1267			g->c[g->deg+1] = 1;
1268			for (j = g->deg; j > 0; j--)
1269				g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1270
1271			g->c[0] = gf_mul(bch, g->c[0], r);
1272			g->deg++;
1273		}
1274	}
1275	/* store left-justified binary representation of g(X) */
1276	n = g->deg+1;
1277	i = 0;
1278
1279	while (n > 0) {
1280		nbits = (n > 32) ? 32 : n;
1281		for (j = 0, word = 0; j < nbits; j++) {
1282			if (g->c[n-1-j])
1283				word |= 1u << (31-j);
1284		}
1285		genpoly[i++] = word;
1286		n -= nbits;
1287	}
1288	bch->ecc_bits = g->deg;
1289
1290finish:
1291	kfree(g);
1292	kfree(roots);
1293
1294	return genpoly;
1295}
1296
1297/**
1298 * bch_init - initialize a BCH encoder/decoder
1299 * @m:          Galois field order, should be in the range 5-15
1300 * @t:          maximum error correction capability, in bits
1301 * @prim_poly:  user-provided primitive polynomial (or 0 to use default)
1302 * @swap_bits:  swap bits within data and syndrome bytes
1303 *
1304 * Returns:
1305 *  a newly allocated BCH control structure if successful, NULL otherwise
1306 *
1307 * This initialization can take some time, as lookup tables are built for fast
1308 * encoding/decoding; make sure not to call this function from a time critical
1309 * path. Usually, bch_init() should be called on module/driver init and
1310 * bch_free() should be called to release memory on exit.
1311 *
1312 * You may provide your own primitive polynomial of degree @m in argument
1313 * @prim_poly, or let bch_init() use its default polynomial.
1314 *
1315 * Once bch_init() has successfully returned a pointer to a newly allocated
1316 * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1317 * the structure.
1318 */
1319struct bch_control *bch_init(int m, int t, unsigned int prim_poly,
1320			     bool swap_bits)
1321{
1322	int err = 0;
1323	unsigned int i, words;
1324	uint32_t *genpoly;
1325	struct bch_control *bch = NULL;
1326
1327	const int min_m = 5;
1328
1329	/* default primitive polynomials */
1330	static const unsigned int prim_poly_tab[] = {
1331		0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1332		0x402b, 0x8003,
1333	};
1334
1335#if defined(CONFIG_BCH_CONST_PARAMS)
1336	if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1337		printk(KERN_ERR "bch encoder/decoder was configured to support "
1338		       "parameters m=%d, t=%d only!\n",
1339		       CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1340		goto fail;
1341	}
1342#endif
1343	if ((m < min_m) || (m > BCH_MAX_M))
1344		/*
1345		 * values of m greater than 15 are not currently supported;
1346		 * supporting m > 15 would require changing table base type
1347		 * (uint16_t) and a small patch in matrix transposition
1348		 */
1349		goto fail;
1350
1351	if (t > BCH_MAX_T)
1352		/*
1353		 * we can support larger than 64 bits if necessary, at the
1354		 * cost of higher stack usage.
1355		 */
1356		goto fail;
1357
1358	/* sanity checks */
1359	if ((t < 1) || (m*t >= ((1 << m)-1)))
1360		/* invalid t value */
1361		goto fail;
1362
1363	/* select a primitive polynomial for generating GF(2^m) */
1364	if (prim_poly == 0)
1365		prim_poly = prim_poly_tab[m-min_m];
1366
1367	bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1368	if (bch == NULL)
1369		goto fail;
1370
1371	bch->m = m;
1372	bch->t = t;
1373	bch->n = (1 << m)-1;
1374	words  = DIV_ROUND_UP(m*t, 32);
1375	bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1376	bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1377	bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1378	bch->mod8_tab  = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1379	bch->ecc_buf   = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1380	bch->ecc_buf2  = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1381	bch->xi_tab    = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1382	bch->syn       = bch_alloc(2*t*sizeof(*bch->syn), &err);
1383	bch->cache     = bch_alloc(2*t*sizeof(*bch->cache), &err);
1384	bch->elp       = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
1385	bch->swap_bits = swap_bits;
1386
1387	for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1388		bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1389
1390	if (err)
1391		goto fail;
1392
1393	err = build_gf_tables(bch, prim_poly);
1394	if (err)
1395		goto fail;
1396
1397	/* use generator polynomial for computing encoding tables */
1398	genpoly = compute_generator_polynomial(bch);
1399	if (genpoly == NULL)
1400		goto fail;
1401
1402	build_mod8_tables(bch, genpoly);
1403	kfree(genpoly);
1404
1405	err = build_deg2_base(bch);
1406	if (err)
1407		goto fail;
1408
1409	return bch;
1410
1411fail:
1412	bch_free(bch);
1413	return NULL;
1414}
1415EXPORT_SYMBOL_GPL(bch_init);
1416
1417/**
1418 *  bch_free - free the BCH control structure
1419 *  @bch:    BCH control structure to release
1420 */
1421void bch_free(struct bch_control *bch)
1422{
1423	unsigned int i;
1424
1425	if (bch) {
1426		kfree(bch->a_pow_tab);
1427		kfree(bch->a_log_tab);
1428		kfree(bch->mod8_tab);
1429		kfree(bch->ecc_buf);
1430		kfree(bch->ecc_buf2);
1431		kfree(bch->xi_tab);
1432		kfree(bch->syn);
1433		kfree(bch->cache);
1434		kfree(bch->elp);
1435
1436		for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1437			kfree(bch->poly_2t[i]);
1438
1439		kfree(bch);
1440	}
1441}
1442EXPORT_SYMBOL_GPL(bch_free);
1443
1444MODULE_LICENSE("GPL");
1445MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1446MODULE_DESCRIPTION("Binary BCH encoder/decoder");

   1/*
   2 * Generic binary BCH encoding/decoding library
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms of the GNU General Public License version 2 as published by
   6 * the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc., 51
  15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  16 *
  17 * Copyright © 2011 Parrot S.A.
  18 *
  19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
  20 *
  21 * Description:
  22 *
  23 * This library provides runtime configurable encoding/decoding of binary
  24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
  25 *
  26 * Call bch_init to get a pointer to a newly allocated bch_control structure for
  27 * the given m (Galois field order), t (error correction capability) and
  28 * (optional) primitive polynomial parameters.
  29 *
  30 * Call bch_encode to compute and store ecc parity bytes to a given buffer.
  31 * Call bch_decode to detect and locate errors in received data.
  32 *
  33 * On systems supporting hw BCH features, intermediate results may be provided
  34 * to bch_decode in order to skip certain steps. See bch_decode() documentation
  35 * for details.
  36 *
  37 * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
  38 * parameters m and t; thus allowing extra compiler optimizations and providing
  39 * better (up to 2x) encoding performance. Using this option makes sense when
  40 * (m,t) are fixed and known in advance, e.g. when using BCH error correction
  41 * on a particular NAND flash device.
  42 *
  43 * Algorithmic details:
  44 *
  45 * Encoding is performed by processing 32 input bits in parallel, using 4
  46 * remainder lookup tables.
  47 *
  48 * The final stage of decoding involves the following internal steps:
  49 * a. Syndrome computation
  50 * b. Error locator polynomial computation using Berlekamp-Massey algorithm
  51 * c. Error locator root finding (by far the most expensive step)
  52 *
  53 * In this implementation, step c is not performed using the usual Chien search.
  54 * Instead, an alternative approach described in [1] is used. It consists in
  55 * factoring the error locator polynomial using the Berlekamp Trace algorithm
  56 * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
  57 * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
  58 * much better performance than Chien search for usual (m,t) values (typically
  59 * m >= 13, t < 32, see [1]).
  60 *
  61 * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
  62 * of characteristic 2, in: Western European Workshop on Research in Cryptology
  63 * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
  64 * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
  65 * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
  66 */
  67
  68#include <linux/kernel.h>
  69#include <linux/errno.h>
  70#include <linux/init.h>
  71#include <linux/module.h>
  72#include <linux/slab.h>
  73#include <linux/bitops.h>
  74#include <linux/bitrev.h>
  75#include <asm/byteorder.h>
  76#include <linux/bch.h>
  77
  78#if defined(CONFIG_BCH_CONST_PARAMS)
  79#define GF_M(_p)               (CONFIG_BCH_CONST_M)
  80#define GF_T(_p)               (CONFIG_BCH_CONST_T)
  81#define GF_N(_p)               ((1 << (CONFIG_BCH_CONST_M))-1)
  82#define BCH_MAX_M              (CONFIG_BCH_CONST_M)
  83#define BCH_MAX_T	       (CONFIG_BCH_CONST_T)
  84#else
  85#define GF_M(_p)               ((_p)->m)
  86#define GF_T(_p)               ((_p)->t)
  87#define GF_N(_p)               ((_p)->n)
  88#define BCH_MAX_M              15 /* 2KB */
  89#define BCH_MAX_T              64 /* 64 bit correction */
  90#endif
  91
  92#define BCH_ECC_WORDS(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
  93#define BCH_ECC_BYTES(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
  94
  95#define BCH_ECC_MAX_WORDS      DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
  96
  97#ifndef dbg
  98#define dbg(_fmt, args...)     do {} while (0)
  99#endif
 100
 101/*
 102 * represent a polynomial over GF(2^m)
 103 */
 104struct gf_poly {
 105	unsigned int deg;    /* polynomial degree */
 106	unsigned int c[];   /* polynomial terms */
 107};
 108
 109/* given its degree, compute a polynomial size in bytes */
 110#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
 111
 112/* polynomial of degree 1 */
 113struct gf_poly_deg1 {
 114	struct gf_poly poly;
 115	unsigned int   c[2];
 116};
 117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 118static u8 swap_bits(struct bch_control *bch, u8 in)
 119{
 120	if (!bch->swap_bits)
 121		return in;
 122
 123	return bitrev8(in);
 124}
 125
 126/*
 127 * same as bch_encode(), but process input data one byte at a time
 128 */
 129static void bch_encode_unaligned(struct bch_control *bch,
 130				 const unsigned char *data, unsigned int len,
 131				 uint32_t *ecc)
 132{
 133	int i;
 134	const uint32_t *p;
 135	const int l = BCH_ECC_WORDS(bch)-1;
 136
 137	while (len--) {
 138		u8 tmp = swap_bits(bch, *data++);
 139
 140		p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff);
 141
 142		for (i = 0; i < l; i++)
 143			ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
 144
 145		ecc[l] = (ecc[l] << 8)^(*p);
 146	}
 147}
 148
 149/*
 150 * convert ecc bytes to aligned, zero-padded 32-bit ecc words
 151 */
 152static void load_ecc8(struct bch_control *bch, uint32_t *dst,
 153		      const uint8_t *src)
 154{
 155	uint8_t pad[4] = {0, 0, 0, 0};
 156	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 157
 158	for (i = 0; i < nwords; i++, src += 4)
 159		dst[i] = ((u32)swap_bits(bch, src[0]) << 24) |
 160			((u32)swap_bits(bch, src[1]) << 16) |
 161			((u32)swap_bits(bch, src[2]) << 8) |
 162			swap_bits(bch, src[3]);
 163
 164	memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
 165	dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) |
 166		((u32)swap_bits(bch, pad[1]) << 16) |
 167		((u32)swap_bits(bch, pad[2]) << 8) |
 168		swap_bits(bch, pad[3]);
 169}
 170
 171/*
 172 * convert 32-bit ecc words to ecc bytes
 173 */
 174static void store_ecc8(struct bch_control *bch, uint8_t *dst,
 175		       const uint32_t *src)
 176{
 177	uint8_t pad[4];
 178	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 179
 180	for (i = 0; i < nwords; i++) {
 181		*dst++ = swap_bits(bch, src[i] >> 24);
 182		*dst++ = swap_bits(bch, src[i] >> 16);
 183		*dst++ = swap_bits(bch, src[i] >> 8);
 184		*dst++ = swap_bits(bch, src[i]);
 185	}
 186	pad[0] = swap_bits(bch, src[nwords] >> 24);
 187	pad[1] = swap_bits(bch, src[nwords] >> 16);
 188	pad[2] = swap_bits(bch, src[nwords] >> 8);
 189	pad[3] = swap_bits(bch, src[nwords]);
 190	memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
 191}
 192
 193/**
 194 * bch_encode - calculate BCH ecc parity of data
 195 * @bch:   BCH control structure
 196 * @data:  data to encode
 197 * @len:   data length in bytes
 198 * @ecc:   ecc parity data, must be initialized by caller
 199 *
 200 * The @ecc parity array is used both as input and output parameter, in order to
 201 * allow incremental computations. It should be of the size indicated by member
 202 * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
 203 *
 204 * The exact number of computed ecc parity bits is given by member @ecc_bits of
 205 * @bch; it may be less than m*t for large values of t.
 206 */
 207void bch_encode(struct bch_control *bch, const uint8_t *data,
 208		unsigned int len, uint8_t *ecc)
 209{
 210	const unsigned int l = BCH_ECC_WORDS(bch)-1;
 211	unsigned int i, mlen;
 212	unsigned long m;
 213	uint32_t w, r[BCH_ECC_MAX_WORDS];
 214	const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
 215	const uint32_t * const tab0 = bch->mod8_tab;
 216	const uint32_t * const tab1 = tab0 + 256*(l+1);
 217	const uint32_t * const tab2 = tab1 + 256*(l+1);
 218	const uint32_t * const tab3 = tab2 + 256*(l+1);
 219	const uint32_t *pdata, *p0, *p1, *p2, *p3;
 220
 221	if (WARN_ON(r_bytes > sizeof(r)))
 222		return;
 223
 224	if (ecc) {
 225		/* load ecc parity bytes into internal 32-bit buffer */
 226		load_ecc8(bch, bch->ecc_buf, ecc);
 227	} else {
 228		memset(bch->ecc_buf, 0, r_bytes);
 229	}
 230
 231	/* process first unaligned data bytes */
 232	m = ((unsigned long)data) & 3;
 233	if (m) {
 234		mlen = (len < (4-m)) ? len : 4-m;
 235		bch_encode_unaligned(bch, data, mlen, bch->ecc_buf);
 236		data += mlen;
 237		len  -= mlen;
 238	}
 239
 240	/* process 32-bit aligned data words */
 241	pdata = (uint32_t *)data;
 242	mlen  = len/4;
 243	data += 4*mlen;
 244	len  -= 4*mlen;
 245	memcpy(r, bch->ecc_buf, r_bytes);
 246
 247	/*
 248	 * split each 32-bit word into 4 polynomials of weight 8 as follows:
 249	 *
 250	 * 31 ...24  23 ...16  15 ... 8  7 ... 0
 251	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt
 252	 *                               tttttttt  mod g = r0 (precomputed)
 253	 *                     zzzzzzzz  00000000  mod g = r1 (precomputed)
 254	 *           yyyyyyyy  00000000  00000000  mod g = r2 (precomputed)
 255	 * xxxxxxxx  00000000  00000000  00000000  mod g = r3 (precomputed)
 256	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt  mod g = r0^r1^r2^r3
 257	 */
 258	while (mlen--) {
 259		/* input data is read in big-endian format */
 260		w = cpu_to_be32(*pdata++);
 261		if (bch->swap_bits)
 262			w = (u32)swap_bits(bch, w) |
 263			    ((u32)swap_bits(bch, w >> 8) << 8) |
 264			    ((u32)swap_bits(bch, w >> 16) << 16) |
 265			    ((u32)swap_bits(bch, w >> 24) << 24);
 266		w ^= r[0];
 267		p0 = tab0 + (l+1)*((w >>  0) & 0xff);
 268		p1 = tab1 + (l+1)*((w >>  8) & 0xff);
 269		p2 = tab2 + (l+1)*((w >> 16) & 0xff);
 270		p3 = tab3 + (l+1)*((w >> 24) & 0xff);
 271
 272		for (i = 0; i < l; i++)
 273			r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
 274
 275		r[l] = p0[l]^p1[l]^p2[l]^p3[l];
 276	}
 277	memcpy(bch->ecc_buf, r, r_bytes);
 278
 279	/* process last unaligned bytes */
 280	if (len)
 281		bch_encode_unaligned(bch, data, len, bch->ecc_buf);
 282
 283	/* store ecc parity bytes into original parity buffer */
 284	if (ecc)
 285		store_ecc8(bch, ecc, bch->ecc_buf);
 286}
 287EXPORT_SYMBOL_GPL(bch_encode);
 288
 289static inline int modulo(struct bch_control *bch, unsigned int v)
 290{
 291	const unsigned int n = GF_N(bch);
 292	while (v >= n) {
 293		v -= n;
 294		v = (v & n) + (v >> GF_M(bch));
 295	}
 296	return v;
 297}
 298
 299/*
 300 * shorter and faster modulo function, only works when v < 2N.
 301 */
 302static inline int mod_s(struct bch_control *bch, unsigned int v)
 303{
 304	const unsigned int n = GF_N(bch);
 305	return (v < n) ? v : v-n;
 306}
 307
 308static inline int deg(unsigned int poly)
 309{
 310	/* polynomial degree is the most-significant bit index */
 311	return fls(poly)-1;
 312}
 313
 314static inline int parity(unsigned int x)
 315{
 316	/*
 317	 * public domain code snippet, lifted from
 318	 * http://www-graphics.stanford.edu/~seander/bithacks.html
 319	 */
 320	x ^= x >> 1;
 321	x ^= x >> 2;
 322	x = (x & 0x11111111U) * 0x11111111U;
 323	return (x >> 28) & 1;
 324}
 325
 326/* Galois field basic operations: multiply, divide, inverse, etc. */
 327
 328static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
 329				  unsigned int b)
 330{
 331	return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 332					       bch->a_log_tab[b])] : 0;
 333}
 334
 335static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
 336{
 337	return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
 338}
 339
 340static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
 341				  unsigned int b)
 342{
 343	return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 344					GF_N(bch)-bch->a_log_tab[b])] : 0;
 345}
 346
 347static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
 348{
 349	return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
 350}
 351
 352static inline unsigned int a_pow(struct bch_control *bch, int i)
 353{
 354	return bch->a_pow_tab[modulo(bch, i)];
 355}
 356
 357static inline int a_log(struct bch_control *bch, unsigned int x)
 358{
 359	return bch->a_log_tab[x];
 360}
 361
 362static inline int a_ilog(struct bch_control *bch, unsigned int x)
 363{
 364	return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
 365}
 366
 367/*
 368 * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
 369 */
 370static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
 371			      unsigned int *syn)
 372{
 373	int i, j, s;
 374	unsigned int m;
 375	uint32_t poly;
 376	const int t = GF_T(bch);
 377
 378	s = bch->ecc_bits;
 379
 380	/* make sure extra bits in last ecc word are cleared */
 381	m = ((unsigned int)s) & 31;
 382	if (m)
 383		ecc[s/32] &= ~((1u << (32-m))-1);
 384	memset(syn, 0, 2*t*sizeof(*syn));
 385
 386	/* compute v(a^j) for j=1 .. 2t-1 */
 387	do {
 388		poly = *ecc++;
 389		s -= 32;
 390		while (poly) {
 391			i = deg(poly);
 392			for (j = 0; j < 2*t; j += 2)
 393				syn[j] ^= a_pow(bch, (j+1)*(i+s));
 394
 395			poly ^= (1 << i);
 396		}
 397	} while (s > 0);
 398
 399	/* v(a^(2j)) = v(a^j)^2 */
 400	for (j = 0; j < t; j++)
 401		syn[2*j+1] = gf_sqr(bch, syn[j]);
 402}
 403
 404static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
 405{
 406	memcpy(dst, src, GF_POLY_SZ(src->deg));
 407}
 408
 409static int compute_error_locator_polynomial(struct bch_control *bch,
 410					    const unsigned int *syn)
 411{
 412	const unsigned int t = GF_T(bch);
 413	const unsigned int n = GF_N(bch);
 414	unsigned int i, j, tmp, l, pd = 1, d = syn[0];
 415	struct gf_poly *elp = bch->elp;
 416	struct gf_poly *pelp = bch->poly_2t[0];
 417	struct gf_poly *elp_copy = bch->poly_2t[1];
 418	int k, pp = -1;
 419
 420	memset(pelp, 0, GF_POLY_SZ(2*t));
 421	memset(elp, 0, GF_POLY_SZ(2*t));
 422
 423	pelp->deg = 0;
 424	pelp->c[0] = 1;
 425	elp->deg = 0;
 426	elp->c[0] = 1;
 427
 428	/* use simplified binary Berlekamp-Massey algorithm */
 429	for (i = 0; (i < t) && (elp->deg <= t); i++) {
 430		if (d) {
 431			k = 2*i-pp;
 432			gf_poly_copy(elp_copy, elp);
 433			/* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
 434			tmp = a_log(bch, d)+n-a_log(bch, pd);
 435			for (j = 0; j <= pelp->deg; j++) {
 436				if (pelp->c[j]) {
 437					l = a_log(bch, pelp->c[j]);
 438					elp->c[j+k] ^= a_pow(bch, tmp+l);
 439				}
 440			}
 441			/* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
 442			tmp = pelp->deg+k;
 443			if (tmp > elp->deg) {
 444				elp->deg = tmp;
 445				gf_poly_copy(pelp, elp_copy);
 446				pd = d;
 447				pp = 2*i;
 448			}
 449		}
 450		/* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
 451		if (i < t-1) {
 452			d = syn[2*i+2];
 453			for (j = 1; j <= elp->deg; j++)
 454				d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
 455		}
 456	}
 457	dbg("elp=%s\n", gf_poly_str(elp));
 458	return (elp->deg > t) ? -1 : (int)elp->deg;
 459}
 460
 461/*
 462 * solve a m x m linear system in GF(2) with an expected number of solutions,
 463 * and return the number of found solutions
 464 */
 465static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
 466			       unsigned int *sol, int nsol)
 467{
 468	const int m = GF_M(bch);
 469	unsigned int tmp, mask;
 470	int rem, c, r, p, k, param[BCH_MAX_M];
 471
 472	k = 0;
 473	mask = 1 << m;
 474
 475	/* Gaussian elimination */
 476	for (c = 0; c < m; c++) {
 477		rem = 0;
 478		p = c-k;
 479		/* find suitable row for elimination */
 480		for (r = p; r < m; r++) {
 481			if (rows[r] & mask) {
 482				if (r != p)
 483					swap(rows[r], rows[p]);
 
 
 
 484				rem = r+1;
 485				break;
 486			}
 487		}
 488		if (rem) {
 489			/* perform elimination on remaining rows */
 490			tmp = rows[p];
 491			for (r = rem; r < m; r++) {
 492				if (rows[r] & mask)
 493					rows[r] ^= tmp;
 494			}
 495		} else {
 496			/* elimination not needed, store defective row index */
 497			param[k++] = c;
 498		}
 499		mask >>= 1;
 500	}
 501	/* rewrite system, inserting fake parameter rows */
 502	if (k > 0) {
 503		p = k;
 504		for (r = m-1; r >= 0; r--) {
 505			if ((r > m-1-k) && rows[r])
 506				/* system has no solution */
 507				return 0;
 508
 509			rows[r] = (p && (r == param[p-1])) ?
 510				p--, 1u << (m-r) : rows[r-p];
 511		}
 512	}
 513
 514	if (nsol != (1 << k))
 515		/* unexpected number of solutions */
 516		return 0;
 517
 518	for (p = 0; p < nsol; p++) {
 519		/* set parameters for p-th solution */
 520		for (c = 0; c < k; c++)
 521			rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
 522
 523		/* compute unique solution */
 524		tmp = 0;
 525		for (r = m-1; r >= 0; r--) {
 526			mask = rows[r] & (tmp|1);
 527			tmp |= parity(mask) << (m-r);
 528		}
 529		sol[p] = tmp >> 1;
 530	}
 531	return nsol;
 532}
 533
 534/*
 535 * this function builds and solves a linear system for finding roots of a degree
 536 * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
 537 */
 538static int find_affine4_roots(struct bch_control *bch, unsigned int a,
 539			      unsigned int b, unsigned int c,
 540			      unsigned int *roots)
 541{
 542	int i, j, k;
 543	const int m = GF_M(bch);
 544	unsigned int mask = 0xff, t, rows[16] = {0,};
 545
 546	j = a_log(bch, b);
 547	k = a_log(bch, a);
 548	rows[0] = c;
 549
 550	/* build linear system to solve X^4+aX^2+bX+c = 0 */
 551	for (i = 0; i < m; i++) {
 552		rows[i+1] = bch->a_pow_tab[4*i]^
 553			(a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
 554			(b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
 555		j++;
 556		k += 2;
 557	}
 558	/*
 559	 * transpose 16x16 matrix before passing it to linear solver
 560	 * warning: this code assumes m < 16
 561	 */
 562	for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
 563		for (k = 0; k < 16; k = (k+j+1) & ~j) {
 564			t = ((rows[k] >> j)^rows[k+j]) & mask;
 565			rows[k] ^= (t << j);
 566			rows[k+j] ^= t;
 567		}
 568	}
 569	return solve_linear_system(bch, rows, roots, 4);
 570}
 571
 572/*
 573 * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
 574 */
 575static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
 576				unsigned int *roots)
 577{
 578	int n = 0;
 579
 580	if (poly->c[0])
 581		/* poly[X] = bX+c with c!=0, root=c/b */
 582		roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
 583				   bch->a_log_tab[poly->c[1]]);
 584	return n;
 585}
 586
 587/*
 588 * compute roots of a degree 2 polynomial over GF(2^m)
 589 */
 590static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
 591				unsigned int *roots)
 592{
 593	int n = 0, i, l0, l1, l2;
 594	unsigned int u, v, r;
 595
 596	if (poly->c[0] && poly->c[1]) {
 597
 598		l0 = bch->a_log_tab[poly->c[0]];
 599		l1 = bch->a_log_tab[poly->c[1]];
 600		l2 = bch->a_log_tab[poly->c[2]];
 601
 602		/* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
 603		u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
 604		/*
 605		 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
 606		 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
 607		 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
 608		 * i.e. r and r+1 are roots iff Tr(u)=0
 609		 */
 610		r = 0;
 611		v = u;
 612		while (v) {
 613			i = deg(v);
 614			r ^= bch->xi_tab[i];
 615			v ^= (1 << i);
 616		}
 617		/* verify root */
 618		if ((gf_sqr(bch, r)^r) == u) {
 619			/* reverse z=a/bX transformation and compute log(1/r) */
 620			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 621					    bch->a_log_tab[r]+l2);
 622			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 623					    bch->a_log_tab[r^1]+l2);
 624		}
 625	}
 626	return n;
 627}
 628
 629/*
 630 * compute roots of a degree 3 polynomial over GF(2^m)
 631 */
 632static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
 633				unsigned int *roots)
 634{
 635	int i, n = 0;
 636	unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
 637
 638	if (poly->c[0]) {
 639		/* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
 640		e3 = poly->c[3];
 641		c2 = gf_div(bch, poly->c[0], e3);
 642		b2 = gf_div(bch, poly->c[1], e3);
 643		a2 = gf_div(bch, poly->c[2], e3);
 644
 645		/* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
 646		c = gf_mul(bch, a2, c2);           /* c = a2c2      */
 647		b = gf_mul(bch, a2, b2)^c2;        /* b = a2b2 + c2 */
 648		a = gf_sqr(bch, a2)^b2;            /* a = a2^2 + b2 */
 649
 650		/* find the 4 roots of this affine polynomial */
 651		if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
 652			/* remove a2 from final list of roots */
 653			for (i = 0; i < 4; i++) {
 654				if (tmp[i] != a2)
 655					roots[n++] = a_ilog(bch, tmp[i]);
 656			}
 657		}
 658	}
 659	return n;
 660}
 661
 662/*
 663 * compute roots of a degree 4 polynomial over GF(2^m)
 664 */
 665static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
 666				unsigned int *roots)
 667{
 668	int i, l, n = 0;
 669	unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
 670
 671	if (poly->c[0] == 0)
 672		return 0;
 673
 674	/* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
 675	e4 = poly->c[4];
 676	d = gf_div(bch, poly->c[0], e4);
 677	c = gf_div(bch, poly->c[1], e4);
 678	b = gf_div(bch, poly->c[2], e4);
 679	a = gf_div(bch, poly->c[3], e4);
 680
 681	/* use Y=1/X transformation to get an affine polynomial */
 682	if (a) {
 683		/* first, eliminate cX by using z=X+e with ae^2+c=0 */
 684		if (c) {
 685			/* compute e such that e^2 = c/a */
 686			f = gf_div(bch, c, a);
 687			l = a_log(bch, f);
 688			l += (l & 1) ? GF_N(bch) : 0;
 689			e = a_pow(bch, l/2);
 690			/*
 691			 * use transformation z=X+e:
 692			 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
 693			 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
 694			 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
 695			 * z^4 + az^3 +     b'z^2 + d'
 696			 */
 697			d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
 698			b = gf_mul(bch, a, e)^b;
 699		}
 700		/* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
 701		if (d == 0)
 702			/* assume all roots have multiplicity 1 */
 703			return 0;
 704
 705		c2 = gf_inv(bch, d);
 706		b2 = gf_div(bch, a, d);
 707		a2 = gf_div(bch, b, d);
 708	} else {
 709		/* polynomial is already affine */
 710		c2 = d;
 711		b2 = c;
 712		a2 = b;
 713	}
 714	/* find the 4 roots of this affine polynomial */
 715	if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
 716		for (i = 0; i < 4; i++) {
 717			/* post-process roots (reverse transformations) */
 718			f = a ? gf_inv(bch, roots[i]) : roots[i];
 719			roots[i] = a_ilog(bch, f^e);
 720		}
 721		n = 4;
 722	}
 723	return n;
 724}
 725
 726/*
 727 * build monic, log-based representation of a polynomial
 728 */
 729static void gf_poly_logrep(struct bch_control *bch,
 730			   const struct gf_poly *a, int *rep)
 731{
 732	int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
 733
 734	/* represent 0 values with -1; warning, rep[d] is not set to 1 */
 735	for (i = 0; i < d; i++)
 736		rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
 737}
 738
 739/*
 740 * compute polynomial Euclidean division remainder in GF(2^m)[X]
 741 */
 742static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
 743			const struct gf_poly *b, int *rep)
 744{
 745	int la, p, m;
 746	unsigned int i, j, *c = a->c;
 747	const unsigned int d = b->deg;
 748
 749	if (a->deg < d)
 750		return;
 751
 752	/* reuse or compute log representation of denominator */
 753	if (!rep) {
 754		rep = bch->cache;
 755		gf_poly_logrep(bch, b, rep);
 756	}
 757
 758	for (j = a->deg; j >= d; j--) {
 759		if (c[j]) {
 760			la = a_log(bch, c[j]);
 761			p = j-d;
 762			for (i = 0; i < d; i++, p++) {
 763				m = rep[i];
 764				if (m >= 0)
 765					c[p] ^= bch->a_pow_tab[mod_s(bch,
 766								     m+la)];
 767			}
 768		}
 769	}
 770	a->deg = d-1;
 771	while (!c[a->deg] && a->deg)
 772		a->deg--;
 773}
 774
 775/*
 776 * compute polynomial Euclidean division quotient in GF(2^m)[X]
 777 */
 778static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
 779			const struct gf_poly *b, struct gf_poly *q)
 780{
 781	if (a->deg >= b->deg) {
 782		q->deg = a->deg-b->deg;
 783		/* compute a mod b (modifies a) */
 784		gf_poly_mod(bch, a, b, NULL);
 785		/* quotient is stored in upper part of polynomial a */
 786		memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
 787	} else {
 788		q->deg = 0;
 789		q->c[0] = 0;
 790	}
 791}
 792
 793/*
 794 * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
 795 */
 796static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
 797				   struct gf_poly *b)
 798{
 
 
 799	dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
 800
 801	if (a->deg < b->deg)
 802		swap(a, b);
 
 
 
 803
 804	while (b->deg > 0) {
 805		gf_poly_mod(bch, a, b, NULL);
 806		swap(a, b);
 
 
 807	}
 808
 809	dbg("%s\n", gf_poly_str(a));
 810
 811	return a;
 812}
 813
 814/*
 815 * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
 816 * This is used in Berlekamp Trace algorithm for splitting polynomials
 817 */
 818static void compute_trace_bk_mod(struct bch_control *bch, int k,
 819				 const struct gf_poly *f, struct gf_poly *z,
 820				 struct gf_poly *out)
 821{
 822	const int m = GF_M(bch);
 823	int i, j;
 824
 825	/* z contains z^2j mod f */
 826	z->deg = 1;
 827	z->c[0] = 0;
 828	z->c[1] = bch->a_pow_tab[k];
 829
 830	out->deg = 0;
 831	memset(out, 0, GF_POLY_SZ(f->deg));
 832
 833	/* compute f log representation only once */
 834	gf_poly_logrep(bch, f, bch->cache);
 835
 836	for (i = 0; i < m; i++) {
 837		/* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
 838		for (j = z->deg; j >= 0; j--) {
 839			out->c[j] ^= z->c[j];
 840			z->c[2*j] = gf_sqr(bch, z->c[j]);
 841			z->c[2*j+1] = 0;
 842		}
 843		if (z->deg > out->deg)
 844			out->deg = z->deg;
 845
 846		if (i < m-1) {
 847			z->deg *= 2;
 848			/* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
 849			gf_poly_mod(bch, z, f, bch->cache);
 850		}
 851	}
 852	while (!out->c[out->deg] && out->deg)
 853		out->deg--;
 854
 855	dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
 856}
 857
 858/*
 859 * factor a polynomial using Berlekamp Trace algorithm (BTA)
 860 */
 861static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
 862			      struct gf_poly **g, struct gf_poly **h)
 863{
 864	struct gf_poly *f2 = bch->poly_2t[0];
 865	struct gf_poly *q  = bch->poly_2t[1];
 866	struct gf_poly *tk = bch->poly_2t[2];
 867	struct gf_poly *z  = bch->poly_2t[3];
 868	struct gf_poly *gcd;
 869
 870	dbg("factoring %s...\n", gf_poly_str(f));
 871
 872	*g = f;
 873	*h = NULL;
 874
 875	/* tk = Tr(a^k.X) mod f */
 876	compute_trace_bk_mod(bch, k, f, z, tk);
 877
 878	if (tk->deg > 0) {
 879		/* compute g = gcd(f, tk) (destructive operation) */
 880		gf_poly_copy(f2, f);
 881		gcd = gf_poly_gcd(bch, f2, tk);
 882		if (gcd->deg < f->deg) {
 883			/* compute h=f/gcd(f,tk); this will modify f and q */
 884			gf_poly_div(bch, f, gcd, q);
 885			/* store g and h in-place (clobbering f) */
 886			*h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
 887			gf_poly_copy(*g, gcd);
 888			gf_poly_copy(*h, q);
 889		}
 890	}
 891}
 892
 893/*
 894 * find roots of a polynomial, using BTZ algorithm; see the beginning of this
 895 * file for details
 896 */
 897static int find_poly_roots(struct bch_control *bch, unsigned int k,
 898			   struct gf_poly *poly, unsigned int *roots)
 899{
 900	int cnt;
 901	struct gf_poly *f1, *f2;
 902
 903	switch (poly->deg) {
 904		/* handle low degree polynomials with ad hoc techniques */
 905	case 1:
 906		cnt = find_poly_deg1_roots(bch, poly, roots);
 907		break;
 908	case 2:
 909		cnt = find_poly_deg2_roots(bch, poly, roots);
 910		break;
 911	case 3:
 912		cnt = find_poly_deg3_roots(bch, poly, roots);
 913		break;
 914	case 4:
 915		cnt = find_poly_deg4_roots(bch, poly, roots);
 916		break;
 917	default:
 918		/* factor polynomial using Berlekamp Trace Algorithm (BTA) */
 919		cnt = 0;
 920		if (poly->deg && (k <= GF_M(bch))) {
 921			factor_polynomial(bch, k, poly, &f1, &f2);
 922			if (f1)
 923				cnt += find_poly_roots(bch, k+1, f1, roots);
 924			if (f2)
 925				cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
 926		}
 927		break;
 928	}
 929	return cnt;
 930}
 931
 932#if defined(USE_CHIEN_SEARCH)
 933/*
 934 * exhaustive root search (Chien) implementation - not used, included only for
 935 * reference/comparison tests
 936 */
 937static int chien_search(struct bch_control *bch, unsigned int len,
 938			struct gf_poly *p, unsigned int *roots)
 939{
 940	int m;
 941	unsigned int i, j, syn, syn0, count = 0;
 942	const unsigned int k = 8*len+bch->ecc_bits;
 943
 944	/* use a log-based representation of polynomial */
 945	gf_poly_logrep(bch, p, bch->cache);
 946	bch->cache[p->deg] = 0;
 947	syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
 948
 949	for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
 950		/* compute elp(a^i) */
 951		for (j = 1, syn = syn0; j <= p->deg; j++) {
 952			m = bch->cache[j];
 953			if (m >= 0)
 954				syn ^= a_pow(bch, m+j*i);
 955		}
 956		if (syn == 0) {
 957			roots[count++] = GF_N(bch)-i;
 958			if (count == p->deg)
 959				break;
 960		}
 961	}
 962	return (count == p->deg) ? count : 0;
 963}
 964#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
 965#endif /* USE_CHIEN_SEARCH */
 966
 967/**
 968 * bch_decode - decode received codeword and find bit error locations
 969 * @bch:      BCH control structure
 970 * @data:     received data, ignored if @calc_ecc is provided
 971 * @len:      data length in bytes, must always be provided
 972 * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
 973 * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
 974 * @syn:      hw computed syndrome data (if NULL, syndrome is calculated)
 975 * @errloc:   output array of error locations
 976 *
 977 * Returns:
 978 *  The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
 979 *  invalid parameters were provided
 980 *
 981 * Depending on the available hw BCH support and the need to compute @calc_ecc
 982 * separately (using bch_encode()), this function should be called with one of
 983 * the following parameter configurations -
 984 *
 985 * by providing @data and @recv_ecc only:
 986 *   bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
 987 *
 988 * by providing @recv_ecc and @calc_ecc:
 989 *   bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
 990 *
 991 * by providing ecc = recv_ecc XOR calc_ecc:
 992 *   bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
 993 *
 994 * by providing syndrome results @syn:
 995 *   bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
 996 *
 997 * Once bch_decode() has successfully returned with a positive value, error
 998 * locations returned in array @errloc should be interpreted as follows -
 999 *
1000 * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1001 * data correction)
1002 *
1003 * if (errloc[n] < 8*len), then n-th error is located in data and can be
1004 * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1005 *
1006 * Note that this function does not perform any data correction by itself, it
1007 * merely indicates error locations.
1008 */
1009int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len,
1010	       const uint8_t *recv_ecc, const uint8_t *calc_ecc,
1011	       const unsigned int *syn, unsigned int *errloc)
1012{
1013	const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1014	unsigned int nbits;
1015	int i, err, nroots;
1016	uint32_t sum;
1017
1018	/* sanity check: make sure data length can be handled */
1019	if (8*len > (bch->n-bch->ecc_bits))
1020		return -EINVAL;
1021
1022	/* if caller does not provide syndromes, compute them */
1023	if (!syn) {
1024		if (!calc_ecc) {
1025			/* compute received data ecc into an internal buffer */
1026			if (!data || !recv_ecc)
1027				return -EINVAL;
1028			bch_encode(bch, data, len, NULL);
1029		} else {
1030			/* load provided calculated ecc */
1031			load_ecc8(bch, bch->ecc_buf, calc_ecc);
1032		}
1033		/* load received ecc or assume it was XORed in calc_ecc */
1034		if (recv_ecc) {
1035			load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1036			/* XOR received and calculated ecc */
1037			for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1038				bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1039				sum |= bch->ecc_buf[i];
1040			}
1041			if (!sum)
1042				/* no error found */
1043				return 0;
1044		}
1045		compute_syndromes(bch, bch->ecc_buf, bch->syn);
1046		syn = bch->syn;
1047	}
1048
1049	err = compute_error_locator_polynomial(bch, syn);
1050	if (err > 0) {
1051		nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1052		if (err != nroots)
1053			err = -1;
1054	}
1055	if (err > 0) {
1056		/* post-process raw error locations for easier correction */
1057		nbits = (len*8)+bch->ecc_bits;
1058		for (i = 0; i < err; i++) {
1059			if (errloc[i] >= nbits) {
1060				err = -1;
1061				break;
1062			}
1063			errloc[i] = nbits-1-errloc[i];
1064			if (!bch->swap_bits)
1065				errloc[i] = (errloc[i] & ~7) |
1066					    (7-(errloc[i] & 7));
1067		}
1068	}
1069	return (err >= 0) ? err : -EBADMSG;
1070}
1071EXPORT_SYMBOL_GPL(bch_decode);
1072
1073/*
1074 * generate Galois field lookup tables
1075 */
1076static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1077{
1078	unsigned int i, x = 1;
1079	const unsigned int k = 1 << deg(poly);
1080
1081	/* primitive polynomial must be of degree m */
1082	if (k != (1u << GF_M(bch)))
1083		return -1;
1084
1085	for (i = 0; i < GF_N(bch); i++) {
1086		bch->a_pow_tab[i] = x;
1087		bch->a_log_tab[x] = i;
1088		if (i && (x == 1))
1089			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1090			return -1;
1091		x <<= 1;
1092		if (x & k)
1093			x ^= poly;
1094	}
1095	bch->a_pow_tab[GF_N(bch)] = 1;
1096	bch->a_log_tab[0] = 0;
1097
1098	return 0;
1099}
1100
1101/*
1102 * compute generator polynomial remainder tables for fast encoding
1103 */
1104static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1105{
1106	int i, j, b, d;
1107	uint32_t data, hi, lo, *tab;
1108	const int l = BCH_ECC_WORDS(bch);
1109	const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1110	const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1111
1112	memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1113
1114	for (i = 0; i < 256; i++) {
1115		/* p(X)=i is a small polynomial of weight <= 8 */
1116		for (b = 0; b < 4; b++) {
1117			/* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1118			tab = bch->mod8_tab + (b*256+i)*l;
1119			data = i << (8*b);
1120			while (data) {
1121				d = deg(data);
1122				/* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1123				data ^= g[0] >> (31-d);
1124				for (j = 0; j < ecclen; j++) {
1125					hi = (d < 31) ? g[j] << (d+1) : 0;
1126					lo = (j+1 < plen) ?
1127						g[j+1] >> (31-d) : 0;
1128					tab[j] ^= hi|lo;
1129				}
1130			}
1131		}
1132	}
1133}
1134
1135/*
1136 * build a base for factoring degree 2 polynomials
1137 */
1138static int build_deg2_base(struct bch_control *bch)
1139{
1140	const int m = GF_M(bch);
1141	int i, j, r;
1142	unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M];
1143
1144	/* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1145	for (i = 0; i < m; i++) {
1146		for (j = 0, sum = 0; j < m; j++)
1147			sum ^= a_pow(bch, i*(1 << j));
1148
1149		if (sum) {
1150			ak = bch->a_pow_tab[i];
1151			break;
1152		}
1153	}
1154	/* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1155	remaining = m;
1156	memset(xi, 0, sizeof(xi));
1157
1158	for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1159		y = gf_sqr(bch, x)^x;
1160		for (i = 0; i < 2; i++) {
1161			r = a_log(bch, y);
1162			if (y && (r < m) && !xi[r]) {
1163				bch->xi_tab[r] = x;
1164				xi[r] = 1;
1165				remaining--;
1166				dbg("x%d = %x\n", r, x);
1167				break;
1168			}
1169			y ^= ak;
1170		}
1171	}
1172	/* should not happen but check anyway */
1173	return remaining ? -1 : 0;
1174}
1175
1176static void *bch_alloc(size_t size, int *err)
1177{
1178	void *ptr;
1179
1180	ptr = kmalloc(size, GFP_KERNEL);
1181	if (ptr == NULL)
1182		*err = 1;
1183	return ptr;
1184}
1185
1186/*
1187 * compute generator polynomial for given (m,t) parameters.
1188 */
1189static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1190{
1191	const unsigned int m = GF_M(bch);
1192	const unsigned int t = GF_T(bch);
1193	int n, err = 0;
1194	unsigned int i, j, nbits, r, word, *roots;
1195	struct gf_poly *g;
1196	uint32_t *genpoly;
1197
1198	g = bch_alloc(GF_POLY_SZ(m*t), &err);
1199	roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1200	genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1201
1202	if (err) {
1203		kfree(genpoly);
1204		genpoly = NULL;
1205		goto finish;
1206	}
1207
1208	/* enumerate all roots of g(X) */
1209	memset(roots , 0, (bch->n+1)*sizeof(*roots));
1210	for (i = 0; i < t; i++) {
1211		for (j = 0, r = 2*i+1; j < m; j++) {
1212			roots[r] = 1;
1213			r = mod_s(bch, 2*r);
1214		}
1215	}
1216	/* build generator polynomial g(X) */
1217	g->deg = 0;
1218	g->c[0] = 1;
1219	for (i = 0; i < GF_N(bch); i++) {
1220		if (roots[i]) {
1221			/* multiply g(X) by (X+root) */
1222			r = bch->a_pow_tab[i];
1223			g->c[g->deg+1] = 1;
1224			for (j = g->deg; j > 0; j--)
1225				g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1226
1227			g->c[0] = gf_mul(bch, g->c[0], r);
1228			g->deg++;
1229		}
1230	}
1231	/* store left-justified binary representation of g(X) */
1232	n = g->deg+1;
1233	i = 0;
1234
1235	while (n > 0) {
1236		nbits = (n > 32) ? 32 : n;
1237		for (j = 0, word = 0; j < nbits; j++) {
1238			if (g->c[n-1-j])
1239				word |= 1u << (31-j);
1240		}
1241		genpoly[i++] = word;
1242		n -= nbits;
1243	}
1244	bch->ecc_bits = g->deg;
1245
1246finish:
1247	kfree(g);
1248	kfree(roots);
1249
1250	return genpoly;
1251}
1252
1253/**
1254 * bch_init - initialize a BCH encoder/decoder
1255 * @m:          Galois field order, should be in the range 5-15
1256 * @t:          maximum error correction capability, in bits
1257 * @prim_poly:  user-provided primitive polynomial (or 0 to use default)
1258 * @swap_bits:  swap bits within data and syndrome bytes
1259 *
1260 * Returns:
1261 *  a newly allocated BCH control structure if successful, NULL otherwise
1262 *
1263 * This initialization can take some time, as lookup tables are built for fast
1264 * encoding/decoding; make sure not to call this function from a time critical
1265 * path. Usually, bch_init() should be called on module/driver init and
1266 * bch_free() should be called to release memory on exit.
1267 *
1268 * You may provide your own primitive polynomial of degree @m in argument
1269 * @prim_poly, or let bch_init() use its default polynomial.
1270 *
1271 * Once bch_init() has successfully returned a pointer to a newly allocated
1272 * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1273 * the structure.
1274 */
1275struct bch_control *bch_init(int m, int t, unsigned int prim_poly,
1276			     bool swap_bits)
1277{
1278	int err = 0;
1279	unsigned int i, words;
1280	uint32_t *genpoly;
1281	struct bch_control *bch = NULL;
1282
1283	const int min_m = 5;
1284
1285	/* default primitive polynomials */
1286	static const unsigned int prim_poly_tab[] = {
1287		0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1288		0x402b, 0x8003,
1289	};
1290
1291#if defined(CONFIG_BCH_CONST_PARAMS)
1292	if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1293		printk(KERN_ERR "bch encoder/decoder was configured to support "
1294		       "parameters m=%d, t=%d only!\n",
1295		       CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1296		goto fail;
1297	}
1298#endif
1299	if ((m < min_m) || (m > BCH_MAX_M))
1300		/*
1301		 * values of m greater than 15 are not currently supported;
1302		 * supporting m > 15 would require changing table base type
1303		 * (uint16_t) and a small patch in matrix transposition
1304		 */
1305		goto fail;
1306
1307	if (t > BCH_MAX_T)
1308		/*
1309		 * we can support larger than 64 bits if necessary, at the
1310		 * cost of higher stack usage.
1311		 */
1312		goto fail;
1313
1314	/* sanity checks */
1315	if ((t < 1) || (m*t >= ((1 << m)-1)))
1316		/* invalid t value */
1317		goto fail;
1318
1319	/* select a primitive polynomial for generating GF(2^m) */
1320	if (prim_poly == 0)
1321		prim_poly = prim_poly_tab[m-min_m];
1322
1323	bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1324	if (bch == NULL)
1325		goto fail;
1326
1327	bch->m = m;
1328	bch->t = t;
1329	bch->n = (1 << m)-1;
1330	words  = DIV_ROUND_UP(m*t, 32);
1331	bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1332	bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1333	bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1334	bch->mod8_tab  = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1335	bch->ecc_buf   = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1336	bch->ecc_buf2  = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1337	bch->xi_tab    = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1338	bch->syn       = bch_alloc(2*t*sizeof(*bch->syn), &err);
1339	bch->cache     = bch_alloc(2*t*sizeof(*bch->cache), &err);
1340	bch->elp       = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
1341	bch->swap_bits = swap_bits;
1342
1343	for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1344		bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1345
1346	if (err)
1347		goto fail;
1348
1349	err = build_gf_tables(bch, prim_poly);
1350	if (err)
1351		goto fail;
1352
1353	/* use generator polynomial for computing encoding tables */
1354	genpoly = compute_generator_polynomial(bch);
1355	if (genpoly == NULL)
1356		goto fail;
1357
1358	build_mod8_tables(bch, genpoly);
1359	kfree(genpoly);
1360
1361	err = build_deg2_base(bch);
1362	if (err)
1363		goto fail;
1364
1365	return bch;
1366
1367fail:
1368	bch_free(bch);
1369	return NULL;
1370}
1371EXPORT_SYMBOL_GPL(bch_init);
1372
1373/**
1374 *  bch_free - free the BCH control structure
1375 *  @bch:    BCH control structure to release
1376 */
1377void bch_free(struct bch_control *bch)
1378{
1379	unsigned int i;
1380
1381	if (bch) {
1382		kfree(bch->a_pow_tab);
1383		kfree(bch->a_log_tab);
1384		kfree(bch->mod8_tab);
1385		kfree(bch->ecc_buf);
1386		kfree(bch->ecc_buf2);
1387		kfree(bch->xi_tab);
1388		kfree(bch->syn);
1389		kfree(bch->cache);
1390		kfree(bch->elp);
1391
1392		for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1393			kfree(bch->poly_2t[i]);
1394
1395		kfree(bch);
1396	}
1397}
1398EXPORT_SYMBOL_GPL(bch_free);
1399
1400MODULE_LICENSE("GPL");
1401MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1402MODULE_DESCRIPTION("Binary BCH encoder/decoder");