2 * This file contains an ECC algorithm that detects and corrects 1 bit
3 * errors in a 256 byte block of data.
5 * Copyright © 2008 Koninklijke Philips Electronics NV.
6 * Author: Frans Meulenbroeks
8 * Completely replaces the previous ECC implementation which was written by:
12 * Information on how this algorithm works and how it was developed
13 * can be found in Documentation/mtd/nand_ecc.txt
15 * This file is free software; you can redistribute it and/or modify it
16 * under the terms of the GNU General Public License as published by the
17 * Free Software Foundation; either version 2 or (at your option) any
20 * This file is distributed in the hope that it will be useful, but WITHOUT
21 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
22 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 * You should have received a copy of the GNU General Public License along
26 * with this file; if not, write to the Free Software Foundation, Inc.,
27 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
31 #include <linux/types.h>
32 #include <linux/kernel.h>
33 #include <linux/module.h>
34 #include <linux/mtd/mtd.h>
35 #include <linux/mtd/rawnand.h>
36 #include <linux/mtd/nand_ecc.h>
37 #include <asm/byteorder.h>
40 * invparity is a 256 byte table that contains the odd parity
41 * for each byte. So if the number of bits in a byte is even,
42 * the array element is 1, and when the number of bits is odd
43 * the array eleemnt is 0.
45 static const char invparity[256] = {
46 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
47 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
48 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
49 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
50 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
51 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
52 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
53 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
54 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
55 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
56 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
57 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
58 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
59 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
60 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
61 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
65 * bitsperbyte contains the number of bits per byte
66 * this is only used for testing and repairing parity
67 * (a precalculated value slightly improves performance)
69 static const char bitsperbyte[256] = {
70 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
71 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
72 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
73 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
74 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
75 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
76 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
77 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
78 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
79 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
80 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
81 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
82 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
83 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
84 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
85 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
89 * addressbits is a lookup table to filter out the bits from the xor-ed
90 * ECC data that identify the faulty location.
91 * this is only used for repairing parity
92 * see the comments in nand_correct_data for more details
94 static const char addressbits[256] = {
95 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
96 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
97 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
98 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
99 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
100 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
101 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
102 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
103 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
104 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
105 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
106 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
107 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
108 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
109 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
110 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
111 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
112 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
113 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
114 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
115 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
116 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
117 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
118 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
119 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
120 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
121 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
122 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
123 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
124 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
125 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
126 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
130 * __nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
132 * @buf: input buffer with raw data
133 * @eccsize: data bytes per ECC step (256 or 512)
134 * @code: output buffer with ECC
135 * @sm_order: Smart Media byte ordering
137 void __nand_calculate_ecc(const unsigned char *buf, unsigned int eccsize,
138 unsigned char *code, bool sm_order)
141 const uint32_t *bp = (uint32_t *)buf;
142 /* 256 or 512 bytes/ecc */
143 const uint32_t eccsize_mult = eccsize >> 8;
144 uint32_t cur; /* current value in buffer */
145 /* rp0..rp15..rp17 are the various accumulated parities (per byte) */
146 uint32_t rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
147 uint32_t rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15, rp16;
148 uint32_t uninitialized_var(rp17); /* to make compiler happy */
149 uint32_t par; /* the cumulative parity for all data */
150 uint32_t tmppar; /* the cumulative parity for this iteration;
151 for rp12, rp14 and rp16 at the end of the
164 * The loop is unrolled a number of times;
165 * This avoids if statements to decide on which rp value to update
166 * Also we process the data by longwords.
167 * Note: passing unaligned data might give a performance penalty.
168 * It is assumed that the buffers are aligned.
169 * tmppar is the cumulative sum of this iteration.
170 * needed for calculating rp12, rp14, rp16 and par
171 * also used as a performance improvement for rp6, rp8 and rp10
173 for (i = 0; i < eccsize_mult << 2; i++) {
236 if (eccsize_mult == 2 && (i & 0x4) == 0)
241 * handle the fact that we use longword operations
242 * we'll bring rp4..rp14..rp16 back to single byte entities by
243 * shifting and xoring first fold the upper and lower 16 bits,
244 * then the upper and lower 8 bits.
255 rp10 ^= (rp10 >> 16);
258 rp12 ^= (rp12 >> 16);
261 rp14 ^= (rp14 >> 16);
264 if (eccsize_mult == 2) {
265 rp16 ^= (rp16 >> 16);
271 * we also need to calculate the row parity for rp0..rp3
272 * This is present in par, because par is now
273 * rp3 rp3 rp2 rp2 in little endian and
274 * rp2 rp2 rp3 rp3 in big endian
276 * rp1 rp0 rp1 rp0 in little endian and
277 * rp0 rp1 rp0 rp1 in big endian
278 * First calculate rp2 and rp3
296 /* reduce par to 16 bits then calculate rp1 and rp0 */
299 rp0 = (par >> 8) & 0xff;
302 rp1 = (par >> 8) & 0xff;
306 /* finally reduce par to 8 bits */
311 * and calculate rp5..rp15..rp17
312 * note that par = rp4 ^ rp5 and due to the commutative property
313 * of the ^ operator we can say:
315 * The & 0xff seems superfluous, but benchmarking learned that
316 * leaving it out gives slightly worse results. No idea why, probably
317 * it has to do with the way the pipeline in pentium is organized.
319 rp5 = (par ^ rp4) & 0xff;
320 rp7 = (par ^ rp6) & 0xff;
321 rp9 = (par ^ rp8) & 0xff;
322 rp11 = (par ^ rp10) & 0xff;
323 rp13 = (par ^ rp12) & 0xff;
324 rp15 = (par ^ rp14) & 0xff;
325 if (eccsize_mult == 2)
326 rp17 = (par ^ rp16) & 0xff;
329 * Finally calculate the ECC bits.
330 * Again here it might seem that there are performance optimisations
331 * possible, but benchmarks showed that on the system this is developed
332 * the code below is the fastest
335 code[0] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
336 (invparity[rp5] << 5) | (invparity[rp4] << 4) |
337 (invparity[rp3] << 3) | (invparity[rp2] << 2) |
338 (invparity[rp1] << 1) | (invparity[rp0]);
339 code[1] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
340 (invparity[rp13] << 5) | (invparity[rp12] << 4) |
341 (invparity[rp11] << 3) | (invparity[rp10] << 2) |
342 (invparity[rp9] << 1) | (invparity[rp8]);
344 code[1] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
345 (invparity[rp5] << 5) | (invparity[rp4] << 4) |
346 (invparity[rp3] << 3) | (invparity[rp2] << 2) |
347 (invparity[rp1] << 1) | (invparity[rp0]);
348 code[0] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
349 (invparity[rp13] << 5) | (invparity[rp12] << 4) |
350 (invparity[rp11] << 3) | (invparity[rp10] << 2) |
351 (invparity[rp9] << 1) | (invparity[rp8]);
354 if (eccsize_mult == 1)
356 (invparity[par & 0xf0] << 7) |
357 (invparity[par & 0x0f] << 6) |
358 (invparity[par & 0xcc] << 5) |
359 (invparity[par & 0x33] << 4) |
360 (invparity[par & 0xaa] << 3) |
361 (invparity[par & 0x55] << 2) |
365 (invparity[par & 0xf0] << 7) |
366 (invparity[par & 0x0f] << 6) |
367 (invparity[par & 0xcc] << 5) |
368 (invparity[par & 0x33] << 4) |
369 (invparity[par & 0xaa] << 3) |
370 (invparity[par & 0x55] << 2) |
371 (invparity[rp17] << 1) |
372 (invparity[rp16] << 0);
374 EXPORT_SYMBOL(__nand_calculate_ecc);
377 * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
379 * @chip: NAND chip object
380 * @buf: input buffer with raw data
381 * @code: output buffer with ECC
383 int nand_calculate_ecc(struct nand_chip *chip, const unsigned char *buf,
386 bool sm_order = chip->ecc.options & NAND_ECC_SOFT_HAMMING_SM_ORDER;
388 __nand_calculate_ecc(buf, chip->ecc.size, code, sm_order);
392 EXPORT_SYMBOL(nand_calculate_ecc);
395 * __nand_correct_data - [NAND Interface] Detect and correct bit error(s)
396 * @buf: raw data read from the chip
397 * @read_ecc: ECC from the chip
398 * @calc_ecc: the ECC calculated from raw data
399 * @eccsize: data bytes per ECC step (256 or 512)
400 * @sm_order: Smart Media byte order
402 * Detect and correct a 1 bit error for eccsize byte block
404 int __nand_correct_data(unsigned char *buf,
405 unsigned char *read_ecc, unsigned char *calc_ecc,
406 unsigned int eccsize, bool sm_order)
408 unsigned char b0, b1, b2, bit_addr;
409 unsigned int byte_addr;
410 /* 256 or 512 bytes/ecc */
411 const uint32_t eccsize_mult = eccsize >> 8;
414 * b0 to b2 indicate which bit is faulty (if any)
415 * we might need the xor result more than once,
416 * so keep them in a local var
419 b0 = read_ecc[0] ^ calc_ecc[0];
420 b1 = read_ecc[1] ^ calc_ecc[1];
422 b0 = read_ecc[1] ^ calc_ecc[1];
423 b1 = read_ecc[0] ^ calc_ecc[0];
426 b2 = read_ecc[2] ^ calc_ecc[2];
428 /* check if there are any bitfaults */
430 /* repeated if statements are slightly more efficient than switch ... */
431 /* ordered in order of likelihood */
433 if ((b0 | b1 | b2) == 0)
434 return 0; /* no error */
436 if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
437 (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
438 ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
439 (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
440 /* single bit error */
442 * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
443 * byte, cp 5/3/1 indicate the faulty bit.
444 * A lookup table (called addressbits) is used to filter
445 * the bits from the byte they are in.
446 * A marginal optimisation is possible by having three
447 * different lookup tables.
448 * One as we have now (for b0), one for b2
449 * (that would avoid the >> 1), and one for b1 (with all values
450 * << 4). However it was felt that introducing two more tables
451 * hardly justify the gain.
453 * The b2 shift is there to get rid of the lowest two bits.
454 * We could also do addressbits[b2] >> 1 but for the
455 * performance it does not make any difference
457 if (eccsize_mult == 1)
458 byte_addr = (addressbits[b1] << 4) + addressbits[b0];
460 byte_addr = (addressbits[b2 & 0x3] << 8) +
461 (addressbits[b1] << 4) + addressbits[b0];
462 bit_addr = addressbits[b2 >> 2];
464 buf[byte_addr] ^= (1 << bit_addr);
468 /* count nr of bits; use table lookup, faster than calculating it */
469 if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
470 return 1; /* error in ECC data; no action needed */
472 pr_err("%s: uncorrectable ECC error\n", __func__);
475 EXPORT_SYMBOL(__nand_correct_data);
478 * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
479 * @chip: NAND chip object
480 * @buf: raw data read from the chip
481 * @read_ecc: ECC from the chip
482 * @calc_ecc: the ECC calculated from raw data
484 * Detect and correct a 1 bit error for 256/512 byte block
486 int nand_correct_data(struct nand_chip *chip, unsigned char *buf,
487 unsigned char *read_ecc, unsigned char *calc_ecc)
489 bool sm_order = chip->ecc.options & NAND_ECC_SOFT_HAMMING_SM_ORDER;
491 return __nand_correct_data(buf, read_ecc, calc_ecc, chip->ecc.size,
494 EXPORT_SYMBOL(nand_correct_data);
496 MODULE_LICENSE("GPL");
498 MODULE_DESCRIPTION("Generic NAND ECC support");