Loading...
1/*
2 * Bad block management
3 *
4 * - Heavily based on MD badblocks code from Neil Brown
5 *
6 * Copyright (c) 2015, Intel Corporation.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#include <linux/badblocks.h>
19#include <linux/seqlock.h>
20#include <linux/device.h>
21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/stddef.h>
24#include <linux/types.h>
25#include <linux/slab.h>
26
27/**
28 * badblocks_check() - check a given range for bad sectors
29 * @bb: the badblocks structure that holds all badblock information
30 * @s: sector (start) at which to check for badblocks
31 * @sectors: number of sectors to check for badblocks
32 * @first_bad: pointer to store location of the first badblock
33 * @bad_sectors: pointer to store number of badblocks after @first_bad
34 *
35 * We can record which blocks on each device are 'bad' and so just
36 * fail those blocks, or that stripe, rather than the whole device.
37 * Entries in the bad-block table are 64bits wide. This comprises:
38 * Length of bad-range, in sectors: 0-511 for lengths 1-512
39 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
40 * A 'shift' can be set so that larger blocks are tracked and
41 * consequently larger devices can be covered.
42 * 'Acknowledged' flag - 1 bit. - the most significant bit.
43 *
44 * Locking of the bad-block table uses a seqlock so badblocks_check
45 * might need to retry if it is very unlucky.
46 * We will sometimes want to check for bad blocks in a bi_end_io function,
47 * so we use the write_seqlock_irq variant.
48 *
49 * When looking for a bad block we specify a range and want to
50 * know if any block in the range is bad. So we binary-search
51 * to the last range that starts at-or-before the given endpoint,
52 * (or "before the sector after the target range")
53 * then see if it ends after the given start.
54 *
55 * Return:
56 * 0: there are no known bad blocks in the range
57 * 1: there are known bad block which are all acknowledged
58 * -1: there are bad blocks which have not yet been acknowledged in metadata.
59 * plus the start/length of the first bad section we overlap.
60 */
61int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
62 sector_t *first_bad, int *bad_sectors)
63{
64 int hi;
65 int lo;
66 u64 *p = bb->page;
67 int rv;
68 sector_t target = s + sectors;
69 unsigned seq;
70
71 if (bb->shift > 0) {
72 /* round the start down, and the end up */
73 s >>= bb->shift;
74 target += (1<<bb->shift) - 1;
75 target >>= bb->shift;
76 sectors = target - s;
77 }
78 /* 'target' is now the first block after the bad range */
79
80retry:
81 seq = read_seqbegin(&bb->lock);
82 lo = 0;
83 rv = 0;
84 hi = bb->count;
85
86 /* Binary search between lo and hi for 'target'
87 * i.e. for the last range that starts before 'target'
88 */
89 /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
90 * are known not to be the last range before target.
91 * VARIANT: hi-lo is the number of possible
92 * ranges, and decreases until it reaches 1
93 */
94 while (hi - lo > 1) {
95 int mid = (lo + hi) / 2;
96 sector_t a = BB_OFFSET(p[mid]);
97
98 if (a < target)
99 /* This could still be the one, earlier ranges
100 * could not.
101 */
102 lo = mid;
103 else
104 /* This and later ranges are definitely out. */
105 hi = mid;
106 }
107 /* 'lo' might be the last that started before target, but 'hi' isn't */
108 if (hi > lo) {
109 /* need to check all range that end after 's' to see if
110 * any are unacknowledged.
111 */
112 while (lo >= 0 &&
113 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
114 if (BB_OFFSET(p[lo]) < target) {
115 /* starts before the end, and finishes after
116 * the start, so they must overlap
117 */
118 if (rv != -1 && BB_ACK(p[lo]))
119 rv = 1;
120 else
121 rv = -1;
122 *first_bad = BB_OFFSET(p[lo]);
123 *bad_sectors = BB_LEN(p[lo]);
124 }
125 lo--;
126 }
127 }
128
129 if (read_seqretry(&bb->lock, seq))
130 goto retry;
131
132 return rv;
133}
134EXPORT_SYMBOL_GPL(badblocks_check);
135
136static void badblocks_update_acked(struct badblocks *bb)
137{
138 u64 *p = bb->page;
139 int i;
140 bool unacked = false;
141
142 if (!bb->unacked_exist)
143 return;
144
145 for (i = 0; i < bb->count ; i++) {
146 if (!BB_ACK(p[i])) {
147 unacked = true;
148 break;
149 }
150 }
151
152 if (!unacked)
153 bb->unacked_exist = 0;
154}
155
156/**
157 * badblocks_set() - Add a range of bad blocks to the table.
158 * @bb: the badblocks structure that holds all badblock information
159 * @s: first sector to mark as bad
160 * @sectors: number of sectors to mark as bad
161 * @acknowledged: weather to mark the bad sectors as acknowledged
162 *
163 * This might extend the table, or might contract it if two adjacent ranges
164 * can be merged. We binary-search to find the 'insertion' point, then
165 * decide how best to handle it.
166 *
167 * Return:
168 * 0: success
169 * 1: failed to set badblocks (out of space)
170 */
171int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
172 int acknowledged)
173{
174 u64 *p;
175 int lo, hi;
176 int rv = 0;
177 unsigned long flags;
178
179 if (bb->shift < 0)
180 /* badblocks are disabled */
181 return 1;
182
183 if (bb->shift) {
184 /* round the start down, and the end up */
185 sector_t next = s + sectors;
186
187 s >>= bb->shift;
188 next += (1<<bb->shift) - 1;
189 next >>= bb->shift;
190 sectors = next - s;
191 }
192
193 write_seqlock_irqsave(&bb->lock, flags);
194
195 p = bb->page;
196 lo = 0;
197 hi = bb->count;
198 /* Find the last range that starts at-or-before 's' */
199 while (hi - lo > 1) {
200 int mid = (lo + hi) / 2;
201 sector_t a = BB_OFFSET(p[mid]);
202
203 if (a <= s)
204 lo = mid;
205 else
206 hi = mid;
207 }
208 if (hi > lo && BB_OFFSET(p[lo]) > s)
209 hi = lo;
210
211 if (hi > lo) {
212 /* we found a range that might merge with the start
213 * of our new range
214 */
215 sector_t a = BB_OFFSET(p[lo]);
216 sector_t e = a + BB_LEN(p[lo]);
217 int ack = BB_ACK(p[lo]);
218
219 if (e >= s) {
220 /* Yes, we can merge with a previous range */
221 if (s == a && s + sectors >= e)
222 /* new range covers old */
223 ack = acknowledged;
224 else
225 ack = ack && acknowledged;
226
227 if (e < s + sectors)
228 e = s + sectors;
229 if (e - a <= BB_MAX_LEN) {
230 p[lo] = BB_MAKE(a, e-a, ack);
231 s = e;
232 } else {
233 /* does not all fit in one range,
234 * make p[lo] maximal
235 */
236 if (BB_LEN(p[lo]) != BB_MAX_LEN)
237 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
238 s = a + BB_MAX_LEN;
239 }
240 sectors = e - s;
241 }
242 }
243 if (sectors && hi < bb->count) {
244 /* 'hi' points to the first range that starts after 's'.
245 * Maybe we can merge with the start of that range
246 */
247 sector_t a = BB_OFFSET(p[hi]);
248 sector_t e = a + BB_LEN(p[hi]);
249 int ack = BB_ACK(p[hi]);
250
251 if (a <= s + sectors) {
252 /* merging is possible */
253 if (e <= s + sectors) {
254 /* full overlap */
255 e = s + sectors;
256 ack = acknowledged;
257 } else
258 ack = ack && acknowledged;
259
260 a = s;
261 if (e - a <= BB_MAX_LEN) {
262 p[hi] = BB_MAKE(a, e-a, ack);
263 s = e;
264 } else {
265 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
266 s = a + BB_MAX_LEN;
267 }
268 sectors = e - s;
269 lo = hi;
270 hi++;
271 }
272 }
273 if (sectors == 0 && hi < bb->count) {
274 /* we might be able to combine lo and hi */
275 /* Note: 's' is at the end of 'lo' */
276 sector_t a = BB_OFFSET(p[hi]);
277 int lolen = BB_LEN(p[lo]);
278 int hilen = BB_LEN(p[hi]);
279 int newlen = lolen + hilen - (s - a);
280
281 if (s >= a && newlen < BB_MAX_LEN) {
282 /* yes, we can combine them */
283 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
284
285 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
286 memmove(p + hi, p + hi + 1,
287 (bb->count - hi - 1) * 8);
288 bb->count--;
289 }
290 }
291 while (sectors) {
292 /* didn't merge (it all).
293 * Need to add a range just before 'hi'
294 */
295 if (bb->count >= MAX_BADBLOCKS) {
296 /* No room for more */
297 rv = 1;
298 break;
299 } else {
300 int this_sectors = sectors;
301
302 memmove(p + hi + 1, p + hi,
303 (bb->count - hi) * 8);
304 bb->count++;
305
306 if (this_sectors > BB_MAX_LEN)
307 this_sectors = BB_MAX_LEN;
308 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
309 sectors -= this_sectors;
310 s += this_sectors;
311 }
312 }
313
314 bb->changed = 1;
315 if (!acknowledged)
316 bb->unacked_exist = 1;
317 else
318 badblocks_update_acked(bb);
319 write_sequnlock_irqrestore(&bb->lock, flags);
320
321 return rv;
322}
323EXPORT_SYMBOL_GPL(badblocks_set);
324
325/**
326 * badblocks_clear() - Remove a range of bad blocks to the table.
327 * @bb: the badblocks structure that holds all badblock information
328 * @s: first sector to mark as bad
329 * @sectors: number of sectors to mark as bad
330 *
331 * This may involve extending the table if we spilt a region,
332 * but it must not fail. So if the table becomes full, we just
333 * drop the remove request.
334 *
335 * Return:
336 * 0: success
337 * 1: failed to clear badblocks
338 */
339int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
340{
341 u64 *p;
342 int lo, hi;
343 sector_t target = s + sectors;
344 int rv = 0;
345
346 if (bb->shift > 0) {
347 /* When clearing we round the start up and the end down.
348 * This should not matter as the shift should align with
349 * the block size and no rounding should ever be needed.
350 * However it is better the think a block is bad when it
351 * isn't than to think a block is not bad when it is.
352 */
353 s += (1<<bb->shift) - 1;
354 s >>= bb->shift;
355 target >>= bb->shift;
356 sectors = target - s;
357 }
358
359 write_seqlock_irq(&bb->lock);
360
361 p = bb->page;
362 lo = 0;
363 hi = bb->count;
364 /* Find the last range that starts before 'target' */
365 while (hi - lo > 1) {
366 int mid = (lo + hi) / 2;
367 sector_t a = BB_OFFSET(p[mid]);
368
369 if (a < target)
370 lo = mid;
371 else
372 hi = mid;
373 }
374 if (hi > lo) {
375 /* p[lo] is the last range that could overlap the
376 * current range. Earlier ranges could also overlap,
377 * but only this one can overlap the end of the range.
378 */
379 if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
380 (BB_OFFSET(p[lo]) < target)) {
381 /* Partial overlap, leave the tail of this range */
382 int ack = BB_ACK(p[lo]);
383 sector_t a = BB_OFFSET(p[lo]);
384 sector_t end = a + BB_LEN(p[lo]);
385
386 if (a < s) {
387 /* we need to split this range */
388 if (bb->count >= MAX_BADBLOCKS) {
389 rv = -ENOSPC;
390 goto out;
391 }
392 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
393 bb->count++;
394 p[lo] = BB_MAKE(a, s-a, ack);
395 lo++;
396 }
397 p[lo] = BB_MAKE(target, end - target, ack);
398 /* there is no longer an overlap */
399 hi = lo;
400 lo--;
401 }
402 while (lo >= 0 &&
403 (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
404 (BB_OFFSET(p[lo]) < target)) {
405 /* This range does overlap */
406 if (BB_OFFSET(p[lo]) < s) {
407 /* Keep the early parts of this range. */
408 int ack = BB_ACK(p[lo]);
409 sector_t start = BB_OFFSET(p[lo]);
410
411 p[lo] = BB_MAKE(start, s - start, ack);
412 /* now low doesn't overlap, so.. */
413 break;
414 }
415 lo--;
416 }
417 /* 'lo' is strictly before, 'hi' is strictly after,
418 * anything between needs to be discarded
419 */
420 if (hi - lo > 1) {
421 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
422 bb->count -= (hi - lo - 1);
423 }
424 }
425
426 badblocks_update_acked(bb);
427 bb->changed = 1;
428out:
429 write_sequnlock_irq(&bb->lock);
430 return rv;
431}
432EXPORT_SYMBOL_GPL(badblocks_clear);
433
434/**
435 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
436 * @bb: the badblocks structure that holds all badblock information
437 *
438 * This only succeeds if ->changed is clear. It is used by
439 * in-kernel metadata updates
440 */
441void ack_all_badblocks(struct badblocks *bb)
442{
443 if (bb->page == NULL || bb->changed)
444 /* no point even trying */
445 return;
446 write_seqlock_irq(&bb->lock);
447
448 if (bb->changed == 0 && bb->unacked_exist) {
449 u64 *p = bb->page;
450 int i;
451
452 for (i = 0; i < bb->count ; i++) {
453 if (!BB_ACK(p[i])) {
454 sector_t start = BB_OFFSET(p[i]);
455 int len = BB_LEN(p[i]);
456
457 p[i] = BB_MAKE(start, len, 1);
458 }
459 }
460 bb->unacked_exist = 0;
461 }
462 write_sequnlock_irq(&bb->lock);
463}
464EXPORT_SYMBOL_GPL(ack_all_badblocks);
465
466/**
467 * badblocks_show() - sysfs access to bad-blocks list
468 * @bb: the badblocks structure that holds all badblock information
469 * @page: buffer received from sysfs
470 * @unack: weather to show unacknowledged badblocks
471 *
472 * Return:
473 * Length of returned data
474 */
475ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
476{
477 size_t len;
478 int i;
479 u64 *p = bb->page;
480 unsigned seq;
481
482 if (bb->shift < 0)
483 return 0;
484
485retry:
486 seq = read_seqbegin(&bb->lock);
487
488 len = 0;
489 i = 0;
490
491 while (len < PAGE_SIZE && i < bb->count) {
492 sector_t s = BB_OFFSET(p[i]);
493 unsigned int length = BB_LEN(p[i]);
494 int ack = BB_ACK(p[i]);
495
496 i++;
497
498 if (unack && ack)
499 continue;
500
501 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
502 (unsigned long long)s << bb->shift,
503 length << bb->shift);
504 }
505 if (unack && len == 0)
506 bb->unacked_exist = 0;
507
508 if (read_seqretry(&bb->lock, seq))
509 goto retry;
510
511 return len;
512}
513EXPORT_SYMBOL_GPL(badblocks_show);
514
515/**
516 * badblocks_store() - sysfs access to bad-blocks list
517 * @bb: the badblocks structure that holds all badblock information
518 * @page: buffer received from sysfs
519 * @len: length of data received from sysfs
520 * @unack: weather to show unacknowledged badblocks
521 *
522 * Return:
523 * Length of the buffer processed or -ve error.
524 */
525ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
526 int unack)
527{
528 unsigned long long sector;
529 int length;
530 char newline;
531
532 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
533 case 3:
534 if (newline != '\n')
535 return -EINVAL;
536 /* fall through */
537 case 2:
538 if (length <= 0)
539 return -EINVAL;
540 break;
541 default:
542 return -EINVAL;
543 }
544
545 if (badblocks_set(bb, sector, length, !unack))
546 return -ENOSPC;
547 else
548 return len;
549}
550EXPORT_SYMBOL_GPL(badblocks_store);
551
552static int __badblocks_init(struct device *dev, struct badblocks *bb,
553 int enable)
554{
555 bb->dev = dev;
556 bb->count = 0;
557 if (enable)
558 bb->shift = 0;
559 else
560 bb->shift = -1;
561 if (dev)
562 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
563 else
564 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
565 if (!bb->page) {
566 bb->shift = -1;
567 return -ENOMEM;
568 }
569 seqlock_init(&bb->lock);
570
571 return 0;
572}
573
574/**
575 * badblocks_init() - initialize the badblocks structure
576 * @bb: the badblocks structure that holds all badblock information
577 * @enable: weather to enable badblocks accounting
578 *
579 * Return:
580 * 0: success
581 * -ve errno: on error
582 */
583int badblocks_init(struct badblocks *bb, int enable)
584{
585 return __badblocks_init(NULL, bb, enable);
586}
587EXPORT_SYMBOL_GPL(badblocks_init);
588
589int devm_init_badblocks(struct device *dev, struct badblocks *bb)
590{
591 if (!bb)
592 return -EINVAL;
593 return __badblocks_init(dev, bb, 1);
594}
595EXPORT_SYMBOL_GPL(devm_init_badblocks);
596
597/**
598 * badblocks_exit() - free the badblocks structure
599 * @bb: the badblocks structure that holds all badblock information
600 */
601void badblocks_exit(struct badblocks *bb)
602{
603 if (!bb)
604 return;
605 if (bb->dev)
606 devm_kfree(bb->dev, bb->page);
607 else
608 kfree(bb->page);
609 bb->page = NULL;
610}
611EXPORT_SYMBOL_GPL(badblocks_exit);
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Bad block management
4 *
5 * - Heavily based on MD badblocks code from Neil Brown
6 *
7 * Copyright (c) 2015, Intel Corporation.
8 */
9
10#include <linux/badblocks.h>
11#include <linux/seqlock.h>
12#include <linux/device.h>
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/stddef.h>
16#include <linux/types.h>
17#include <linux/slab.h>
18
19/**
20 * badblocks_check() - check a given range for bad sectors
21 * @bb: the badblocks structure that holds all badblock information
22 * @s: sector (start) at which to check for badblocks
23 * @sectors: number of sectors to check for badblocks
24 * @first_bad: pointer to store location of the first badblock
25 * @bad_sectors: pointer to store number of badblocks after @first_bad
26 *
27 * We can record which blocks on each device are 'bad' and so just
28 * fail those blocks, or that stripe, rather than the whole device.
29 * Entries in the bad-block table are 64bits wide. This comprises:
30 * Length of bad-range, in sectors: 0-511 for lengths 1-512
31 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
32 * A 'shift' can be set so that larger blocks are tracked and
33 * consequently larger devices can be covered.
34 * 'Acknowledged' flag - 1 bit. - the most significant bit.
35 *
36 * Locking of the bad-block table uses a seqlock so badblocks_check
37 * might need to retry if it is very unlucky.
38 * We will sometimes want to check for bad blocks in a bi_end_io function,
39 * so we use the write_seqlock_irq variant.
40 *
41 * When looking for a bad block we specify a range and want to
42 * know if any block in the range is bad. So we binary-search
43 * to the last range that starts at-or-before the given endpoint,
44 * (or "before the sector after the target range")
45 * then see if it ends after the given start.
46 *
47 * Return:
48 * 0: there are no known bad blocks in the range
49 * 1: there are known bad block which are all acknowledged
50 * -1: there are bad blocks which have not yet been acknowledged in metadata.
51 * plus the start/length of the first bad section we overlap.
52 */
53int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
54 sector_t *first_bad, int *bad_sectors)
55{
56 int hi;
57 int lo;
58 u64 *p = bb->page;
59 int rv;
60 sector_t target = s + sectors;
61 unsigned seq;
62
63 if (bb->shift > 0) {
64 /* round the start down, and the end up */
65 s >>= bb->shift;
66 target += (1<<bb->shift) - 1;
67 target >>= bb->shift;
68 sectors = target - s;
69 }
70 /* 'target' is now the first block after the bad range */
71
72retry:
73 seq = read_seqbegin(&bb->lock);
74 lo = 0;
75 rv = 0;
76 hi = bb->count;
77
78 /* Binary search between lo and hi for 'target'
79 * i.e. for the last range that starts before 'target'
80 */
81 /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
82 * are known not to be the last range before target.
83 * VARIANT: hi-lo is the number of possible
84 * ranges, and decreases until it reaches 1
85 */
86 while (hi - lo > 1) {
87 int mid = (lo + hi) / 2;
88 sector_t a = BB_OFFSET(p[mid]);
89
90 if (a < target)
91 /* This could still be the one, earlier ranges
92 * could not.
93 */
94 lo = mid;
95 else
96 /* This and later ranges are definitely out. */
97 hi = mid;
98 }
99 /* 'lo' might be the last that started before target, but 'hi' isn't */
100 if (hi > lo) {
101 /* need to check all range that end after 's' to see if
102 * any are unacknowledged.
103 */
104 while (lo >= 0 &&
105 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
106 if (BB_OFFSET(p[lo]) < target) {
107 /* starts before the end, and finishes after
108 * the start, so they must overlap
109 */
110 if (rv != -1 && BB_ACK(p[lo]))
111 rv = 1;
112 else
113 rv = -1;
114 *first_bad = BB_OFFSET(p[lo]);
115 *bad_sectors = BB_LEN(p[lo]);
116 }
117 lo--;
118 }
119 }
120
121 if (read_seqretry(&bb->lock, seq))
122 goto retry;
123
124 return rv;
125}
126EXPORT_SYMBOL_GPL(badblocks_check);
127
128static void badblocks_update_acked(struct badblocks *bb)
129{
130 u64 *p = bb->page;
131 int i;
132 bool unacked = false;
133
134 if (!bb->unacked_exist)
135 return;
136
137 for (i = 0; i < bb->count ; i++) {
138 if (!BB_ACK(p[i])) {
139 unacked = true;
140 break;
141 }
142 }
143
144 if (!unacked)
145 bb->unacked_exist = 0;
146}
147
148/**
149 * badblocks_set() - Add a range of bad blocks to the table.
150 * @bb: the badblocks structure that holds all badblock information
151 * @s: first sector to mark as bad
152 * @sectors: number of sectors to mark as bad
153 * @acknowledged: weather to mark the bad sectors as acknowledged
154 *
155 * This might extend the table, or might contract it if two adjacent ranges
156 * can be merged. We binary-search to find the 'insertion' point, then
157 * decide how best to handle it.
158 *
159 * Return:
160 * 0: success
161 * 1: failed to set badblocks (out of space)
162 */
163int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
164 int acknowledged)
165{
166 u64 *p;
167 int lo, hi;
168 int rv = 0;
169 unsigned long flags;
170
171 if (bb->shift < 0)
172 /* badblocks are disabled */
173 return 1;
174
175 if (bb->shift) {
176 /* round the start down, and the end up */
177 sector_t next = s + sectors;
178
179 s >>= bb->shift;
180 next += (1<<bb->shift) - 1;
181 next >>= bb->shift;
182 sectors = next - s;
183 }
184
185 write_seqlock_irqsave(&bb->lock, flags);
186
187 p = bb->page;
188 lo = 0;
189 hi = bb->count;
190 /* Find the last range that starts at-or-before 's' */
191 while (hi - lo > 1) {
192 int mid = (lo + hi) / 2;
193 sector_t a = BB_OFFSET(p[mid]);
194
195 if (a <= s)
196 lo = mid;
197 else
198 hi = mid;
199 }
200 if (hi > lo && BB_OFFSET(p[lo]) > s)
201 hi = lo;
202
203 if (hi > lo) {
204 /* we found a range that might merge with the start
205 * of our new range
206 */
207 sector_t a = BB_OFFSET(p[lo]);
208 sector_t e = a + BB_LEN(p[lo]);
209 int ack = BB_ACK(p[lo]);
210
211 if (e >= s) {
212 /* Yes, we can merge with a previous range */
213 if (s == a && s + sectors >= e)
214 /* new range covers old */
215 ack = acknowledged;
216 else
217 ack = ack && acknowledged;
218
219 if (e < s + sectors)
220 e = s + sectors;
221 if (e - a <= BB_MAX_LEN) {
222 p[lo] = BB_MAKE(a, e-a, ack);
223 s = e;
224 } else {
225 /* does not all fit in one range,
226 * make p[lo] maximal
227 */
228 if (BB_LEN(p[lo]) != BB_MAX_LEN)
229 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
230 s = a + BB_MAX_LEN;
231 }
232 sectors = e - s;
233 }
234 }
235 if (sectors && hi < bb->count) {
236 /* 'hi' points to the first range that starts after 's'.
237 * Maybe we can merge with the start of that range
238 */
239 sector_t a = BB_OFFSET(p[hi]);
240 sector_t e = a + BB_LEN(p[hi]);
241 int ack = BB_ACK(p[hi]);
242
243 if (a <= s + sectors) {
244 /* merging is possible */
245 if (e <= s + sectors) {
246 /* full overlap */
247 e = s + sectors;
248 ack = acknowledged;
249 } else
250 ack = ack && acknowledged;
251
252 a = s;
253 if (e - a <= BB_MAX_LEN) {
254 p[hi] = BB_MAKE(a, e-a, ack);
255 s = e;
256 } else {
257 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
258 s = a + BB_MAX_LEN;
259 }
260 sectors = e - s;
261 lo = hi;
262 hi++;
263 }
264 }
265 if (sectors == 0 && hi < bb->count) {
266 /* we might be able to combine lo and hi */
267 /* Note: 's' is at the end of 'lo' */
268 sector_t a = BB_OFFSET(p[hi]);
269 int lolen = BB_LEN(p[lo]);
270 int hilen = BB_LEN(p[hi]);
271 int newlen = lolen + hilen - (s - a);
272
273 if (s >= a && newlen < BB_MAX_LEN) {
274 /* yes, we can combine them */
275 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
276
277 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
278 memmove(p + hi, p + hi + 1,
279 (bb->count - hi - 1) * 8);
280 bb->count--;
281 }
282 }
283 while (sectors) {
284 /* didn't merge (it all).
285 * Need to add a range just before 'hi'
286 */
287 if (bb->count >= MAX_BADBLOCKS) {
288 /* No room for more */
289 rv = 1;
290 break;
291 } else {
292 int this_sectors = sectors;
293
294 memmove(p + hi + 1, p + hi,
295 (bb->count - hi) * 8);
296 bb->count++;
297
298 if (this_sectors > BB_MAX_LEN)
299 this_sectors = BB_MAX_LEN;
300 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
301 sectors -= this_sectors;
302 s += this_sectors;
303 }
304 }
305
306 bb->changed = 1;
307 if (!acknowledged)
308 bb->unacked_exist = 1;
309 else
310 badblocks_update_acked(bb);
311 write_sequnlock_irqrestore(&bb->lock, flags);
312
313 return rv;
314}
315EXPORT_SYMBOL_GPL(badblocks_set);
316
317/**
318 * badblocks_clear() - Remove a range of bad blocks to the table.
319 * @bb: the badblocks structure that holds all badblock information
320 * @s: first sector to mark as bad
321 * @sectors: number of sectors to mark as bad
322 *
323 * This may involve extending the table if we spilt a region,
324 * but it must not fail. So if the table becomes full, we just
325 * drop the remove request.
326 *
327 * Return:
328 * 0: success
329 * 1: failed to clear badblocks
330 */
331int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
332{
333 u64 *p;
334 int lo, hi;
335 sector_t target = s + sectors;
336 int rv = 0;
337
338 if (bb->shift > 0) {
339 /* When clearing we round the start up and the end down.
340 * This should not matter as the shift should align with
341 * the block size and no rounding should ever be needed.
342 * However it is better the think a block is bad when it
343 * isn't than to think a block is not bad when it is.
344 */
345 s += (1<<bb->shift) - 1;
346 s >>= bb->shift;
347 target >>= bb->shift;
348 sectors = target - s;
349 }
350
351 write_seqlock_irq(&bb->lock);
352
353 p = bb->page;
354 lo = 0;
355 hi = bb->count;
356 /* Find the last range that starts before 'target' */
357 while (hi - lo > 1) {
358 int mid = (lo + hi) / 2;
359 sector_t a = BB_OFFSET(p[mid]);
360
361 if (a < target)
362 lo = mid;
363 else
364 hi = mid;
365 }
366 if (hi > lo) {
367 /* p[lo] is the last range that could overlap the
368 * current range. Earlier ranges could also overlap,
369 * but only this one can overlap the end of the range.
370 */
371 if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
372 (BB_OFFSET(p[lo]) < target)) {
373 /* Partial overlap, leave the tail of this range */
374 int ack = BB_ACK(p[lo]);
375 sector_t a = BB_OFFSET(p[lo]);
376 sector_t end = a + BB_LEN(p[lo]);
377
378 if (a < s) {
379 /* we need to split this range */
380 if (bb->count >= MAX_BADBLOCKS) {
381 rv = -ENOSPC;
382 goto out;
383 }
384 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
385 bb->count++;
386 p[lo] = BB_MAKE(a, s-a, ack);
387 lo++;
388 }
389 p[lo] = BB_MAKE(target, end - target, ack);
390 /* there is no longer an overlap */
391 hi = lo;
392 lo--;
393 }
394 while (lo >= 0 &&
395 (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
396 (BB_OFFSET(p[lo]) < target)) {
397 /* This range does overlap */
398 if (BB_OFFSET(p[lo]) < s) {
399 /* Keep the early parts of this range. */
400 int ack = BB_ACK(p[lo]);
401 sector_t start = BB_OFFSET(p[lo]);
402
403 p[lo] = BB_MAKE(start, s - start, ack);
404 /* now low doesn't overlap, so.. */
405 break;
406 }
407 lo--;
408 }
409 /* 'lo' is strictly before, 'hi' is strictly after,
410 * anything between needs to be discarded
411 */
412 if (hi - lo > 1) {
413 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
414 bb->count -= (hi - lo - 1);
415 }
416 }
417
418 badblocks_update_acked(bb);
419 bb->changed = 1;
420out:
421 write_sequnlock_irq(&bb->lock);
422 return rv;
423}
424EXPORT_SYMBOL_GPL(badblocks_clear);
425
426/**
427 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
428 * @bb: the badblocks structure that holds all badblock information
429 *
430 * This only succeeds if ->changed is clear. It is used by
431 * in-kernel metadata updates
432 */
433void ack_all_badblocks(struct badblocks *bb)
434{
435 if (bb->page == NULL || bb->changed)
436 /* no point even trying */
437 return;
438 write_seqlock_irq(&bb->lock);
439
440 if (bb->changed == 0 && bb->unacked_exist) {
441 u64 *p = bb->page;
442 int i;
443
444 for (i = 0; i < bb->count ; i++) {
445 if (!BB_ACK(p[i])) {
446 sector_t start = BB_OFFSET(p[i]);
447 int len = BB_LEN(p[i]);
448
449 p[i] = BB_MAKE(start, len, 1);
450 }
451 }
452 bb->unacked_exist = 0;
453 }
454 write_sequnlock_irq(&bb->lock);
455}
456EXPORT_SYMBOL_GPL(ack_all_badblocks);
457
458/**
459 * badblocks_show() - sysfs access to bad-blocks list
460 * @bb: the badblocks structure that holds all badblock information
461 * @page: buffer received from sysfs
462 * @unack: weather to show unacknowledged badblocks
463 *
464 * Return:
465 * Length of returned data
466 */
467ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
468{
469 size_t len;
470 int i;
471 u64 *p = bb->page;
472 unsigned seq;
473
474 if (bb->shift < 0)
475 return 0;
476
477retry:
478 seq = read_seqbegin(&bb->lock);
479
480 len = 0;
481 i = 0;
482
483 while (len < PAGE_SIZE && i < bb->count) {
484 sector_t s = BB_OFFSET(p[i]);
485 unsigned int length = BB_LEN(p[i]);
486 int ack = BB_ACK(p[i]);
487
488 i++;
489
490 if (unack && ack)
491 continue;
492
493 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
494 (unsigned long long)s << bb->shift,
495 length << bb->shift);
496 }
497 if (unack && len == 0)
498 bb->unacked_exist = 0;
499
500 if (read_seqretry(&bb->lock, seq))
501 goto retry;
502
503 return len;
504}
505EXPORT_SYMBOL_GPL(badblocks_show);
506
507/**
508 * badblocks_store() - sysfs access to bad-blocks list
509 * @bb: the badblocks structure that holds all badblock information
510 * @page: buffer received from sysfs
511 * @len: length of data received from sysfs
512 * @unack: weather to show unacknowledged badblocks
513 *
514 * Return:
515 * Length of the buffer processed or -ve error.
516 */
517ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
518 int unack)
519{
520 unsigned long long sector;
521 int length;
522 char newline;
523
524 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
525 case 3:
526 if (newline != '\n')
527 return -EINVAL;
528 /* fall through */
529 case 2:
530 if (length <= 0)
531 return -EINVAL;
532 break;
533 default:
534 return -EINVAL;
535 }
536
537 if (badblocks_set(bb, sector, length, !unack))
538 return -ENOSPC;
539 else
540 return len;
541}
542EXPORT_SYMBOL_GPL(badblocks_store);
543
544static int __badblocks_init(struct device *dev, struct badblocks *bb,
545 int enable)
546{
547 bb->dev = dev;
548 bb->count = 0;
549 if (enable)
550 bb->shift = 0;
551 else
552 bb->shift = -1;
553 if (dev)
554 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
555 else
556 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
557 if (!bb->page) {
558 bb->shift = -1;
559 return -ENOMEM;
560 }
561 seqlock_init(&bb->lock);
562
563 return 0;
564}
565
566/**
567 * badblocks_init() - initialize the badblocks structure
568 * @bb: the badblocks structure that holds all badblock information
569 * @enable: weather to enable badblocks accounting
570 *
571 * Return:
572 * 0: success
573 * -ve errno: on error
574 */
575int badblocks_init(struct badblocks *bb, int enable)
576{
577 return __badblocks_init(NULL, bb, enable);
578}
579EXPORT_SYMBOL_GPL(badblocks_init);
580
581int devm_init_badblocks(struct device *dev, struct badblocks *bb)
582{
583 if (!bb)
584 return -EINVAL;
585 return __badblocks_init(dev, bb, 1);
586}
587EXPORT_SYMBOL_GPL(devm_init_badblocks);
588
589/**
590 * badblocks_exit() - free the badblocks structure
591 * @bb: the badblocks structure that holds all badblock information
592 */
593void badblocks_exit(struct badblocks *bb)
594{
595 if (!bb)
596 return;
597 if (bb->dev)
598 devm_kfree(bb->dev, bb->page);
599 else
600 kfree(bb->page);
601 bb->page = NULL;
602}
603EXPORT_SYMBOL_GPL(badblocks_exit);