Loading...
1/*
2 * Bad block management
3 *
4 * - Heavily based on MD badblocks code from Neil Brown
5 *
6 * Copyright (c) 2015, Intel Corporation.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#include <linux/badblocks.h>
19#include <linux/seqlock.h>
20#include <linux/device.h>
21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/stddef.h>
24#include <linux/types.h>
25#include <linux/slab.h>
26
27/**
28 * badblocks_check() - check a given range for bad sectors
29 * @bb: the badblocks structure that holds all badblock information
30 * @s: sector (start) at which to check for badblocks
31 * @sectors: number of sectors to check for badblocks
32 * @first_bad: pointer to store location of the first badblock
33 * @bad_sectors: pointer to store number of badblocks after @first_bad
34 *
35 * We can record which blocks on each device are 'bad' and so just
36 * fail those blocks, or that stripe, rather than the whole device.
37 * Entries in the bad-block table are 64bits wide. This comprises:
38 * Length of bad-range, in sectors: 0-511 for lengths 1-512
39 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
40 * A 'shift' can be set so that larger blocks are tracked and
41 * consequently larger devices can be covered.
42 * 'Acknowledged' flag - 1 bit. - the most significant bit.
43 *
44 * Locking of the bad-block table uses a seqlock so badblocks_check
45 * might need to retry if it is very unlucky.
46 * We will sometimes want to check for bad blocks in a bi_end_io function,
47 * so we use the write_seqlock_irq variant.
48 *
49 * When looking for a bad block we specify a range and want to
50 * know if any block in the range is bad. So we binary-search
51 * to the last range that starts at-or-before the given endpoint,
52 * (or "before the sector after the target range")
53 * then see if it ends after the given start.
54 *
55 * Return:
56 * 0: there are no known bad blocks in the range
57 * 1: there are known bad block which are all acknowledged
58 * -1: there are bad blocks which have not yet been acknowledged in metadata.
59 * plus the start/length of the first bad section we overlap.
60 */
61int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
62 sector_t *first_bad, int *bad_sectors)
63{
64 int hi;
65 int lo;
66 u64 *p = bb->page;
67 int rv;
68 sector_t target = s + sectors;
69 unsigned seq;
70
71 if (bb->shift > 0) {
72 /* round the start down, and the end up */
73 s >>= bb->shift;
74 target += (1<<bb->shift) - 1;
75 target >>= bb->shift;
76 sectors = target - s;
77 }
78 /* 'target' is now the first block after the bad range */
79
80retry:
81 seq = read_seqbegin(&bb->lock);
82 lo = 0;
83 rv = 0;
84 hi = bb->count;
85
86 /* Binary search between lo and hi for 'target'
87 * i.e. for the last range that starts before 'target'
88 */
89 /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
90 * are known not to be the last range before target.
91 * VARIANT: hi-lo is the number of possible
92 * ranges, and decreases until it reaches 1
93 */
94 while (hi - lo > 1) {
95 int mid = (lo + hi) / 2;
96 sector_t a = BB_OFFSET(p[mid]);
97
98 if (a < target)
99 /* This could still be the one, earlier ranges
100 * could not.
101 */
102 lo = mid;
103 else
104 /* This and later ranges are definitely out. */
105 hi = mid;
106 }
107 /* 'lo' might be the last that started before target, but 'hi' isn't */
108 if (hi > lo) {
109 /* need to check all range that end after 's' to see if
110 * any are unacknowledged.
111 */
112 while (lo >= 0 &&
113 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
114 if (BB_OFFSET(p[lo]) < target) {
115 /* starts before the end, and finishes after
116 * the start, so they must overlap
117 */
118 if (rv != -1 && BB_ACK(p[lo]))
119 rv = 1;
120 else
121 rv = -1;
122 *first_bad = BB_OFFSET(p[lo]);
123 *bad_sectors = BB_LEN(p[lo]);
124 }
125 lo--;
126 }
127 }
128
129 if (read_seqretry(&bb->lock, seq))
130 goto retry;
131
132 return rv;
133}
134EXPORT_SYMBOL_GPL(badblocks_check);
135
136/**
137 * badblocks_set() - Add a range of bad blocks to the table.
138 * @bb: the badblocks structure that holds all badblock information
139 * @s: first sector to mark as bad
140 * @sectors: number of sectors to mark as bad
141 * @acknowledged: weather to mark the bad sectors as acknowledged
142 *
143 * This might extend the table, or might contract it if two adjacent ranges
144 * can be merged. We binary-search to find the 'insertion' point, then
145 * decide how best to handle it.
146 *
147 * Return:
148 * 0: success
149 * 1: failed to set badblocks (out of space)
150 */
151int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
152 int acknowledged)
153{
154 u64 *p;
155 int lo, hi;
156 int rv = 0;
157 unsigned long flags;
158
159 if (bb->shift < 0)
160 /* badblocks are disabled */
161 return 0;
162
163 if (bb->shift) {
164 /* round the start down, and the end up */
165 sector_t next = s + sectors;
166
167 s >>= bb->shift;
168 next += (1<<bb->shift) - 1;
169 next >>= bb->shift;
170 sectors = next - s;
171 }
172
173 write_seqlock_irqsave(&bb->lock, flags);
174
175 p = bb->page;
176 lo = 0;
177 hi = bb->count;
178 /* Find the last range that starts at-or-before 's' */
179 while (hi - lo > 1) {
180 int mid = (lo + hi) / 2;
181 sector_t a = BB_OFFSET(p[mid]);
182
183 if (a <= s)
184 lo = mid;
185 else
186 hi = mid;
187 }
188 if (hi > lo && BB_OFFSET(p[lo]) > s)
189 hi = lo;
190
191 if (hi > lo) {
192 /* we found a range that might merge with the start
193 * of our new range
194 */
195 sector_t a = BB_OFFSET(p[lo]);
196 sector_t e = a + BB_LEN(p[lo]);
197 int ack = BB_ACK(p[lo]);
198
199 if (e >= s) {
200 /* Yes, we can merge with a previous range */
201 if (s == a && s + sectors >= e)
202 /* new range covers old */
203 ack = acknowledged;
204 else
205 ack = ack && acknowledged;
206
207 if (e < s + sectors)
208 e = s + sectors;
209 if (e - a <= BB_MAX_LEN) {
210 p[lo] = BB_MAKE(a, e-a, ack);
211 s = e;
212 } else {
213 /* does not all fit in one range,
214 * make p[lo] maximal
215 */
216 if (BB_LEN(p[lo]) != BB_MAX_LEN)
217 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
218 s = a + BB_MAX_LEN;
219 }
220 sectors = e - s;
221 }
222 }
223 if (sectors && hi < bb->count) {
224 /* 'hi' points to the first range that starts after 's'.
225 * Maybe we can merge with the start of that range
226 */
227 sector_t a = BB_OFFSET(p[hi]);
228 sector_t e = a + BB_LEN(p[hi]);
229 int ack = BB_ACK(p[hi]);
230
231 if (a <= s + sectors) {
232 /* merging is possible */
233 if (e <= s + sectors) {
234 /* full overlap */
235 e = s + sectors;
236 ack = acknowledged;
237 } else
238 ack = ack && acknowledged;
239
240 a = s;
241 if (e - a <= BB_MAX_LEN) {
242 p[hi] = BB_MAKE(a, e-a, ack);
243 s = e;
244 } else {
245 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
246 s = a + BB_MAX_LEN;
247 }
248 sectors = e - s;
249 lo = hi;
250 hi++;
251 }
252 }
253 if (sectors == 0 && hi < bb->count) {
254 /* we might be able to combine lo and hi */
255 /* Note: 's' is at the end of 'lo' */
256 sector_t a = BB_OFFSET(p[hi]);
257 int lolen = BB_LEN(p[lo]);
258 int hilen = BB_LEN(p[hi]);
259 int newlen = lolen + hilen - (s - a);
260
261 if (s >= a && newlen < BB_MAX_LEN) {
262 /* yes, we can combine them */
263 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
264
265 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
266 memmove(p + hi, p + hi + 1,
267 (bb->count - hi - 1) * 8);
268 bb->count--;
269 }
270 }
271 while (sectors) {
272 /* didn't merge (it all).
273 * Need to add a range just before 'hi'
274 */
275 if (bb->count >= MAX_BADBLOCKS) {
276 /* No room for more */
277 rv = 1;
278 break;
279 } else {
280 int this_sectors = sectors;
281
282 memmove(p + hi + 1, p + hi,
283 (bb->count - hi) * 8);
284 bb->count++;
285
286 if (this_sectors > BB_MAX_LEN)
287 this_sectors = BB_MAX_LEN;
288 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
289 sectors -= this_sectors;
290 s += this_sectors;
291 }
292 }
293
294 bb->changed = 1;
295 if (!acknowledged)
296 bb->unacked_exist = 1;
297 write_sequnlock_irqrestore(&bb->lock, flags);
298
299 return rv;
300}
301EXPORT_SYMBOL_GPL(badblocks_set);
302
303/**
304 * badblocks_clear() - Remove a range of bad blocks to the table.
305 * @bb: the badblocks structure that holds all badblock information
306 * @s: first sector to mark as bad
307 * @sectors: number of sectors to mark as bad
308 *
309 * This may involve extending the table if we spilt a region,
310 * but it must not fail. So if the table becomes full, we just
311 * drop the remove request.
312 *
313 * Return:
314 * 0: success
315 * 1: failed to clear badblocks
316 */
317int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
318{
319 u64 *p;
320 int lo, hi;
321 sector_t target = s + sectors;
322 int rv = 0;
323
324 if (bb->shift > 0) {
325 /* When clearing we round the start up and the end down.
326 * This should not matter as the shift should align with
327 * the block size and no rounding should ever be needed.
328 * However it is better the think a block is bad when it
329 * isn't than to think a block is not bad when it is.
330 */
331 s += (1<<bb->shift) - 1;
332 s >>= bb->shift;
333 target >>= bb->shift;
334 sectors = target - s;
335 }
336
337 write_seqlock_irq(&bb->lock);
338
339 p = bb->page;
340 lo = 0;
341 hi = bb->count;
342 /* Find the last range that starts before 'target' */
343 while (hi - lo > 1) {
344 int mid = (lo + hi) / 2;
345 sector_t a = BB_OFFSET(p[mid]);
346
347 if (a < target)
348 lo = mid;
349 else
350 hi = mid;
351 }
352 if (hi > lo) {
353 /* p[lo] is the last range that could overlap the
354 * current range. Earlier ranges could also overlap,
355 * but only this one can overlap the end of the range.
356 */
357 if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
358 /* Partial overlap, leave the tail of this range */
359 int ack = BB_ACK(p[lo]);
360 sector_t a = BB_OFFSET(p[lo]);
361 sector_t end = a + BB_LEN(p[lo]);
362
363 if (a < s) {
364 /* we need to split this range */
365 if (bb->count >= MAX_BADBLOCKS) {
366 rv = -ENOSPC;
367 goto out;
368 }
369 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
370 bb->count++;
371 p[lo] = BB_MAKE(a, s-a, ack);
372 lo++;
373 }
374 p[lo] = BB_MAKE(target, end - target, ack);
375 /* there is no longer an overlap */
376 hi = lo;
377 lo--;
378 }
379 while (lo >= 0 &&
380 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
381 /* This range does overlap */
382 if (BB_OFFSET(p[lo]) < s) {
383 /* Keep the early parts of this range. */
384 int ack = BB_ACK(p[lo]);
385 sector_t start = BB_OFFSET(p[lo]);
386
387 p[lo] = BB_MAKE(start, s - start, ack);
388 /* now low doesn't overlap, so.. */
389 break;
390 }
391 lo--;
392 }
393 /* 'lo' is strictly before, 'hi' is strictly after,
394 * anything between needs to be discarded
395 */
396 if (hi - lo > 1) {
397 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
398 bb->count -= (hi - lo - 1);
399 }
400 }
401
402 bb->changed = 1;
403out:
404 write_sequnlock_irq(&bb->lock);
405 return rv;
406}
407EXPORT_SYMBOL_GPL(badblocks_clear);
408
409/**
410 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
411 * @bb: the badblocks structure that holds all badblock information
412 *
413 * This only succeeds if ->changed is clear. It is used by
414 * in-kernel metadata updates
415 */
416void ack_all_badblocks(struct badblocks *bb)
417{
418 if (bb->page == NULL || bb->changed)
419 /* no point even trying */
420 return;
421 write_seqlock_irq(&bb->lock);
422
423 if (bb->changed == 0 && bb->unacked_exist) {
424 u64 *p = bb->page;
425 int i;
426
427 for (i = 0; i < bb->count ; i++) {
428 if (!BB_ACK(p[i])) {
429 sector_t start = BB_OFFSET(p[i]);
430 int len = BB_LEN(p[i]);
431
432 p[i] = BB_MAKE(start, len, 1);
433 }
434 }
435 bb->unacked_exist = 0;
436 }
437 write_sequnlock_irq(&bb->lock);
438}
439EXPORT_SYMBOL_GPL(ack_all_badblocks);
440
441/**
442 * badblocks_show() - sysfs access to bad-blocks list
443 * @bb: the badblocks structure that holds all badblock information
444 * @page: buffer received from sysfs
445 * @unack: weather to show unacknowledged badblocks
446 *
447 * Return:
448 * Length of returned data
449 */
450ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
451{
452 size_t len;
453 int i;
454 u64 *p = bb->page;
455 unsigned seq;
456
457 if (bb->shift < 0)
458 return 0;
459
460retry:
461 seq = read_seqbegin(&bb->lock);
462
463 len = 0;
464 i = 0;
465
466 while (len < PAGE_SIZE && i < bb->count) {
467 sector_t s = BB_OFFSET(p[i]);
468 unsigned int length = BB_LEN(p[i]);
469 int ack = BB_ACK(p[i]);
470
471 i++;
472
473 if (unack && ack)
474 continue;
475
476 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
477 (unsigned long long)s << bb->shift,
478 length << bb->shift);
479 }
480 if (unack && len == 0)
481 bb->unacked_exist = 0;
482
483 if (read_seqretry(&bb->lock, seq))
484 goto retry;
485
486 return len;
487}
488EXPORT_SYMBOL_GPL(badblocks_show);
489
490/**
491 * badblocks_store() - sysfs access to bad-blocks list
492 * @bb: the badblocks structure that holds all badblock information
493 * @page: buffer received from sysfs
494 * @len: length of data received from sysfs
495 * @unack: weather to show unacknowledged badblocks
496 *
497 * Return:
498 * Length of the buffer processed or -ve error.
499 */
500ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
501 int unack)
502{
503 unsigned long long sector;
504 int length;
505 char newline;
506
507 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
508 case 3:
509 if (newline != '\n')
510 return -EINVAL;
511 case 2:
512 if (length <= 0)
513 return -EINVAL;
514 break;
515 default:
516 return -EINVAL;
517 }
518
519 if (badblocks_set(bb, sector, length, !unack))
520 return -ENOSPC;
521 else
522 return len;
523}
524EXPORT_SYMBOL_GPL(badblocks_store);
525
526static int __badblocks_init(struct device *dev, struct badblocks *bb,
527 int enable)
528{
529 bb->dev = dev;
530 bb->count = 0;
531 if (enable)
532 bb->shift = 0;
533 else
534 bb->shift = -1;
535 if (dev)
536 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
537 else
538 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
539 if (!bb->page) {
540 bb->shift = -1;
541 return -ENOMEM;
542 }
543 seqlock_init(&bb->lock);
544
545 return 0;
546}
547
548/**
549 * badblocks_init() - initialize the badblocks structure
550 * @bb: the badblocks structure that holds all badblock information
551 * @enable: weather to enable badblocks accounting
552 *
553 * Return:
554 * 0: success
555 * -ve errno: on error
556 */
557int badblocks_init(struct badblocks *bb, int enable)
558{
559 return __badblocks_init(NULL, bb, enable);
560}
561EXPORT_SYMBOL_GPL(badblocks_init);
562
563int devm_init_badblocks(struct device *dev, struct badblocks *bb)
564{
565 if (!bb)
566 return -EINVAL;
567 return __badblocks_init(dev, bb, 1);
568}
569EXPORT_SYMBOL_GPL(devm_init_badblocks);
570
571/**
572 * badblocks_exit() - free the badblocks structure
573 * @bb: the badblocks structure that holds all badblock information
574 */
575void badblocks_exit(struct badblocks *bb)
576{
577 if (!bb)
578 return;
579 if (bb->dev)
580 devm_kfree(bb->dev, bb->page);
581 else
582 kfree(bb->page);
583 bb->page = NULL;
584}
585EXPORT_SYMBOL_GPL(badblocks_exit);
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Bad block management
4 *
5 * - Heavily based on MD badblocks code from Neil Brown
6 *
7 * Copyright (c) 2015, Intel Corporation.
8 */
9
10#include <linux/badblocks.h>
11#include <linux/seqlock.h>
12#include <linux/device.h>
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/stddef.h>
16#include <linux/types.h>
17#include <linux/slab.h>
18
19/**
20 * badblocks_check() - check a given range for bad sectors
21 * @bb: the badblocks structure that holds all badblock information
22 * @s: sector (start) at which to check for badblocks
23 * @sectors: number of sectors to check for badblocks
24 * @first_bad: pointer to store location of the first badblock
25 * @bad_sectors: pointer to store number of badblocks after @first_bad
26 *
27 * We can record which blocks on each device are 'bad' and so just
28 * fail those blocks, or that stripe, rather than the whole device.
29 * Entries in the bad-block table are 64bits wide. This comprises:
30 * Length of bad-range, in sectors: 0-511 for lengths 1-512
31 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
32 * A 'shift' can be set so that larger blocks are tracked and
33 * consequently larger devices can be covered.
34 * 'Acknowledged' flag - 1 bit. - the most significant bit.
35 *
36 * Locking of the bad-block table uses a seqlock so badblocks_check
37 * might need to retry if it is very unlucky.
38 * We will sometimes want to check for bad blocks in a bi_end_io function,
39 * so we use the write_seqlock_irq variant.
40 *
41 * When looking for a bad block we specify a range and want to
42 * know if any block in the range is bad. So we binary-search
43 * to the last range that starts at-or-before the given endpoint,
44 * (or "before the sector after the target range")
45 * then see if it ends after the given start.
46 *
47 * Return:
48 * 0: there are no known bad blocks in the range
49 * 1: there are known bad block which are all acknowledged
50 * -1: there are bad blocks which have not yet been acknowledged in metadata.
51 * plus the start/length of the first bad section we overlap.
52 */
53int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
54 sector_t *first_bad, int *bad_sectors)
55{
56 int hi;
57 int lo;
58 u64 *p = bb->page;
59 int rv;
60 sector_t target = s + sectors;
61 unsigned seq;
62
63 if (bb->shift > 0) {
64 /* round the start down, and the end up */
65 s >>= bb->shift;
66 target += (1<<bb->shift) - 1;
67 target >>= bb->shift;
68 }
69 /* 'target' is now the first block after the bad range */
70
71retry:
72 seq = read_seqbegin(&bb->lock);
73 lo = 0;
74 rv = 0;
75 hi = bb->count;
76
77 /* Binary search between lo and hi for 'target'
78 * i.e. for the last range that starts before 'target'
79 */
80 /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
81 * are known not to be the last range before target.
82 * VARIANT: hi-lo is the number of possible
83 * ranges, and decreases until it reaches 1
84 */
85 while (hi - lo > 1) {
86 int mid = (lo + hi) / 2;
87 sector_t a = BB_OFFSET(p[mid]);
88
89 if (a < target)
90 /* This could still be the one, earlier ranges
91 * could not.
92 */
93 lo = mid;
94 else
95 /* This and later ranges are definitely out. */
96 hi = mid;
97 }
98 /* 'lo' might be the last that started before target, but 'hi' isn't */
99 if (hi > lo) {
100 /* need to check all range that end after 's' to see if
101 * any are unacknowledged.
102 */
103 while (lo >= 0 &&
104 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
105 if (BB_OFFSET(p[lo]) < target) {
106 /* starts before the end, and finishes after
107 * the start, so they must overlap
108 */
109 if (rv != -1 && BB_ACK(p[lo]))
110 rv = 1;
111 else
112 rv = -1;
113 *first_bad = BB_OFFSET(p[lo]);
114 *bad_sectors = BB_LEN(p[lo]);
115 }
116 lo--;
117 }
118 }
119
120 if (read_seqretry(&bb->lock, seq))
121 goto retry;
122
123 return rv;
124}
125EXPORT_SYMBOL_GPL(badblocks_check);
126
127static void badblocks_update_acked(struct badblocks *bb)
128{
129 u64 *p = bb->page;
130 int i;
131 bool unacked = false;
132
133 if (!bb->unacked_exist)
134 return;
135
136 for (i = 0; i < bb->count ; i++) {
137 if (!BB_ACK(p[i])) {
138 unacked = true;
139 break;
140 }
141 }
142
143 if (!unacked)
144 bb->unacked_exist = 0;
145}
146
147/**
148 * badblocks_set() - Add a range of bad blocks to the table.
149 * @bb: the badblocks structure that holds all badblock information
150 * @s: first sector to mark as bad
151 * @sectors: number of sectors to mark as bad
152 * @acknowledged: weather to mark the bad sectors as acknowledged
153 *
154 * This might extend the table, or might contract it if two adjacent ranges
155 * can be merged. We binary-search to find the 'insertion' point, then
156 * decide how best to handle it.
157 *
158 * Return:
159 * 0: success
160 * 1: failed to set badblocks (out of space)
161 */
162int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
163 int acknowledged)
164{
165 u64 *p;
166 int lo, hi;
167 int rv = 0;
168 unsigned long flags;
169
170 if (bb->shift < 0)
171 /* badblocks are disabled */
172 return 1;
173
174 if (bb->shift) {
175 /* round the start down, and the end up */
176 sector_t next = s + sectors;
177
178 s >>= bb->shift;
179 next += (1<<bb->shift) - 1;
180 next >>= bb->shift;
181 sectors = next - s;
182 }
183
184 write_seqlock_irqsave(&bb->lock, flags);
185
186 p = bb->page;
187 lo = 0;
188 hi = bb->count;
189 /* Find the last range that starts at-or-before 's' */
190 while (hi - lo > 1) {
191 int mid = (lo + hi) / 2;
192 sector_t a = BB_OFFSET(p[mid]);
193
194 if (a <= s)
195 lo = mid;
196 else
197 hi = mid;
198 }
199 if (hi > lo && BB_OFFSET(p[lo]) > s)
200 hi = lo;
201
202 if (hi > lo) {
203 /* we found a range that might merge with the start
204 * of our new range
205 */
206 sector_t a = BB_OFFSET(p[lo]);
207 sector_t e = a + BB_LEN(p[lo]);
208 int ack = BB_ACK(p[lo]);
209
210 if (e >= s) {
211 /* Yes, we can merge with a previous range */
212 if (s == a && s + sectors >= e)
213 /* new range covers old */
214 ack = acknowledged;
215 else
216 ack = ack && acknowledged;
217
218 if (e < s + sectors)
219 e = s + sectors;
220 if (e - a <= BB_MAX_LEN) {
221 p[lo] = BB_MAKE(a, e-a, ack);
222 s = e;
223 } else {
224 /* does not all fit in one range,
225 * make p[lo] maximal
226 */
227 if (BB_LEN(p[lo]) != BB_MAX_LEN)
228 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
229 s = a + BB_MAX_LEN;
230 }
231 sectors = e - s;
232 }
233 }
234 if (sectors && hi < bb->count) {
235 /* 'hi' points to the first range that starts after 's'.
236 * Maybe we can merge with the start of that range
237 */
238 sector_t a = BB_OFFSET(p[hi]);
239 sector_t e = a + BB_LEN(p[hi]);
240 int ack = BB_ACK(p[hi]);
241
242 if (a <= s + sectors) {
243 /* merging is possible */
244 if (e <= s + sectors) {
245 /* full overlap */
246 e = s + sectors;
247 ack = acknowledged;
248 } else
249 ack = ack && acknowledged;
250
251 a = s;
252 if (e - a <= BB_MAX_LEN) {
253 p[hi] = BB_MAKE(a, e-a, ack);
254 s = e;
255 } else {
256 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
257 s = a + BB_MAX_LEN;
258 }
259 sectors = e - s;
260 lo = hi;
261 hi++;
262 }
263 }
264 if (sectors == 0 && hi < bb->count) {
265 /* we might be able to combine lo and hi */
266 /* Note: 's' is at the end of 'lo' */
267 sector_t a = BB_OFFSET(p[hi]);
268 int lolen = BB_LEN(p[lo]);
269 int hilen = BB_LEN(p[hi]);
270 int newlen = lolen + hilen - (s - a);
271
272 if (s >= a && newlen < BB_MAX_LEN) {
273 /* yes, we can combine them */
274 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
275
276 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
277 memmove(p + hi, p + hi + 1,
278 (bb->count - hi - 1) * 8);
279 bb->count--;
280 }
281 }
282 while (sectors) {
283 /* didn't merge (it all).
284 * Need to add a range just before 'hi'
285 */
286 if (bb->count >= MAX_BADBLOCKS) {
287 /* No room for more */
288 rv = 1;
289 break;
290 } else {
291 int this_sectors = sectors;
292
293 memmove(p + hi + 1, p + hi,
294 (bb->count - hi) * 8);
295 bb->count++;
296
297 if (this_sectors > BB_MAX_LEN)
298 this_sectors = BB_MAX_LEN;
299 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
300 sectors -= this_sectors;
301 s += this_sectors;
302 }
303 }
304
305 bb->changed = 1;
306 if (!acknowledged)
307 bb->unacked_exist = 1;
308 else
309 badblocks_update_acked(bb);
310 write_sequnlock_irqrestore(&bb->lock, flags);
311
312 return rv;
313}
314EXPORT_SYMBOL_GPL(badblocks_set);
315
316/**
317 * badblocks_clear() - Remove a range of bad blocks to the table.
318 * @bb: the badblocks structure that holds all badblock information
319 * @s: first sector to mark as bad
320 * @sectors: number of sectors to mark as bad
321 *
322 * This may involve extending the table if we spilt a region,
323 * but it must not fail. So if the table becomes full, we just
324 * drop the remove request.
325 *
326 * Return:
327 * 0: success
328 * 1: failed to clear badblocks
329 */
330int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
331{
332 u64 *p;
333 int lo, hi;
334 sector_t target = s + sectors;
335 int rv = 0;
336
337 if (bb->shift > 0) {
338 /* When clearing we round the start up and the end down.
339 * This should not matter as the shift should align with
340 * the block size and no rounding should ever be needed.
341 * However it is better the think a block is bad when it
342 * isn't than to think a block is not bad when it is.
343 */
344 s += (1<<bb->shift) - 1;
345 s >>= bb->shift;
346 target >>= bb->shift;
347 }
348
349 write_seqlock_irq(&bb->lock);
350
351 p = bb->page;
352 lo = 0;
353 hi = bb->count;
354 /* Find the last range that starts before 'target' */
355 while (hi - lo > 1) {
356 int mid = (lo + hi) / 2;
357 sector_t a = BB_OFFSET(p[mid]);
358
359 if (a < target)
360 lo = mid;
361 else
362 hi = mid;
363 }
364 if (hi > lo) {
365 /* p[lo] is the last range that could overlap the
366 * current range. Earlier ranges could also overlap,
367 * but only this one can overlap the end of the range.
368 */
369 if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
370 (BB_OFFSET(p[lo]) < target)) {
371 /* Partial overlap, leave the tail of this range */
372 int ack = BB_ACK(p[lo]);
373 sector_t a = BB_OFFSET(p[lo]);
374 sector_t end = a + BB_LEN(p[lo]);
375
376 if (a < s) {
377 /* we need to split this range */
378 if (bb->count >= MAX_BADBLOCKS) {
379 rv = -ENOSPC;
380 goto out;
381 }
382 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
383 bb->count++;
384 p[lo] = BB_MAKE(a, s-a, ack);
385 lo++;
386 }
387 p[lo] = BB_MAKE(target, end - target, ack);
388 /* there is no longer an overlap */
389 hi = lo;
390 lo--;
391 }
392 while (lo >= 0 &&
393 (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
394 (BB_OFFSET(p[lo]) < target)) {
395 /* This range does overlap */
396 if (BB_OFFSET(p[lo]) < s) {
397 /* Keep the early parts of this range. */
398 int ack = BB_ACK(p[lo]);
399 sector_t start = BB_OFFSET(p[lo]);
400
401 p[lo] = BB_MAKE(start, s - start, ack);
402 /* now low doesn't overlap, so.. */
403 break;
404 }
405 lo--;
406 }
407 /* 'lo' is strictly before, 'hi' is strictly after,
408 * anything between needs to be discarded
409 */
410 if (hi - lo > 1) {
411 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
412 bb->count -= (hi - lo - 1);
413 }
414 }
415
416 badblocks_update_acked(bb);
417 bb->changed = 1;
418out:
419 write_sequnlock_irq(&bb->lock);
420 return rv;
421}
422EXPORT_SYMBOL_GPL(badblocks_clear);
423
424/**
425 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
426 * @bb: the badblocks structure that holds all badblock information
427 *
428 * This only succeeds if ->changed is clear. It is used by
429 * in-kernel metadata updates
430 */
431void ack_all_badblocks(struct badblocks *bb)
432{
433 if (bb->page == NULL || bb->changed)
434 /* no point even trying */
435 return;
436 write_seqlock_irq(&bb->lock);
437
438 if (bb->changed == 0 && bb->unacked_exist) {
439 u64 *p = bb->page;
440 int i;
441
442 for (i = 0; i < bb->count ; i++) {
443 if (!BB_ACK(p[i])) {
444 sector_t start = BB_OFFSET(p[i]);
445 int len = BB_LEN(p[i]);
446
447 p[i] = BB_MAKE(start, len, 1);
448 }
449 }
450 bb->unacked_exist = 0;
451 }
452 write_sequnlock_irq(&bb->lock);
453}
454EXPORT_SYMBOL_GPL(ack_all_badblocks);
455
456/**
457 * badblocks_show() - sysfs access to bad-blocks list
458 * @bb: the badblocks structure that holds all badblock information
459 * @page: buffer received from sysfs
460 * @unack: weather to show unacknowledged badblocks
461 *
462 * Return:
463 * Length of returned data
464 */
465ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
466{
467 size_t len;
468 int i;
469 u64 *p = bb->page;
470 unsigned seq;
471
472 if (bb->shift < 0)
473 return 0;
474
475retry:
476 seq = read_seqbegin(&bb->lock);
477
478 len = 0;
479 i = 0;
480
481 while (len < PAGE_SIZE && i < bb->count) {
482 sector_t s = BB_OFFSET(p[i]);
483 unsigned int length = BB_LEN(p[i]);
484 int ack = BB_ACK(p[i]);
485
486 i++;
487
488 if (unack && ack)
489 continue;
490
491 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
492 (unsigned long long)s << bb->shift,
493 length << bb->shift);
494 }
495 if (unack && len == 0)
496 bb->unacked_exist = 0;
497
498 if (read_seqretry(&bb->lock, seq))
499 goto retry;
500
501 return len;
502}
503EXPORT_SYMBOL_GPL(badblocks_show);
504
505/**
506 * badblocks_store() - sysfs access to bad-blocks list
507 * @bb: the badblocks structure that holds all badblock information
508 * @page: buffer received from sysfs
509 * @len: length of data received from sysfs
510 * @unack: weather to show unacknowledged badblocks
511 *
512 * Return:
513 * Length of the buffer processed or -ve error.
514 */
515ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
516 int unack)
517{
518 unsigned long long sector;
519 int length;
520 char newline;
521
522 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
523 case 3:
524 if (newline != '\n')
525 return -EINVAL;
526 fallthrough;
527 case 2:
528 if (length <= 0)
529 return -EINVAL;
530 break;
531 default:
532 return -EINVAL;
533 }
534
535 if (badblocks_set(bb, sector, length, !unack))
536 return -ENOSPC;
537 else
538 return len;
539}
540EXPORT_SYMBOL_GPL(badblocks_store);
541
542static int __badblocks_init(struct device *dev, struct badblocks *bb,
543 int enable)
544{
545 bb->dev = dev;
546 bb->count = 0;
547 if (enable)
548 bb->shift = 0;
549 else
550 bb->shift = -1;
551 if (dev)
552 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
553 else
554 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
555 if (!bb->page) {
556 bb->shift = -1;
557 return -ENOMEM;
558 }
559 seqlock_init(&bb->lock);
560
561 return 0;
562}
563
564/**
565 * badblocks_init() - initialize the badblocks structure
566 * @bb: the badblocks structure that holds all badblock information
567 * @enable: weather to enable badblocks accounting
568 *
569 * Return:
570 * 0: success
571 * -ve errno: on error
572 */
573int badblocks_init(struct badblocks *bb, int enable)
574{
575 return __badblocks_init(NULL, bb, enable);
576}
577EXPORT_SYMBOL_GPL(badblocks_init);
578
579int devm_init_badblocks(struct device *dev, struct badblocks *bb)
580{
581 if (!bb)
582 return -EINVAL;
583 return __badblocks_init(dev, bb, 1);
584}
585EXPORT_SYMBOL_GPL(devm_init_badblocks);
586
587/**
588 * badblocks_exit() - free the badblocks structure
589 * @bb: the badblocks structure that holds all badblock information
590 */
591void badblocks_exit(struct badblocks *bb)
592{
593 if (!bb)
594 return;
595 if (bb->dev)
596 devm_kfree(bb->dev, bb->page);
597 else
598 kfree(bb->page);
599 bb->page = NULL;
600}
601EXPORT_SYMBOL_GPL(badblocks_exit);