Linux Audio

Check our new training course

Linux kernel drivers training

Mar 31-Apr 9, 2025, special US time zones
Register
Loading...
v4.6
 
  1/*
  2 * Bad block management
  3 *
  4 * - Heavily based on MD badblocks code from Neil Brown
  5 *
  6 * Copyright (c) 2015, Intel Corporation.
  7 *
  8 * This program is free software; you can redistribute it and/or modify it
  9 * under the terms and conditions of the GNU General Public License,
 10 * version 2, as published by the Free Software Foundation.
 11 *
 12 * This program is distributed in the hope it will be useful, but WITHOUT
 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 15 * more details.
 16 */
 17
 18#include <linux/badblocks.h>
 19#include <linux/seqlock.h>
 20#include <linux/device.h>
 21#include <linux/kernel.h>
 22#include <linux/module.h>
 23#include <linux/stddef.h>
 24#include <linux/types.h>
 25#include <linux/slab.h>
 26
 27/**
 28 * badblocks_check() - check a given range for bad sectors
 29 * @bb:		the badblocks structure that holds all badblock information
 30 * @s:		sector (start) at which to check for badblocks
 31 * @sectors:	number of sectors to check for badblocks
 32 * @first_bad:	pointer to store location of the first badblock
 33 * @bad_sectors: pointer to store number of badblocks after @first_bad
 34 *
 35 * We can record which blocks on each device are 'bad' and so just
 36 * fail those blocks, or that stripe, rather than the whole device.
 37 * Entries in the bad-block table are 64bits wide.  This comprises:
 38 * Length of bad-range, in sectors: 0-511 for lengths 1-512
 39 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
 40 *  A 'shift' can be set so that larger blocks are tracked and
 41 *  consequently larger devices can be covered.
 42 * 'Acknowledged' flag - 1 bit. - the most significant bit.
 43 *
 44 * Locking of the bad-block table uses a seqlock so badblocks_check
 45 * might need to retry if it is very unlucky.
 46 * We will sometimes want to check for bad blocks in a bi_end_io function,
 47 * so we use the write_seqlock_irq variant.
 48 *
 49 * When looking for a bad block we specify a range and want to
 50 * know if any block in the range is bad.  So we binary-search
 51 * to the last range that starts at-or-before the given endpoint,
 52 * (or "before the sector after the target range")
 53 * then see if it ends after the given start.
 54 *
 55 * Return:
 56 *  0: there are no known bad blocks in the range
 57 *  1: there are known bad block which are all acknowledged
 58 * -1: there are bad blocks which have not yet been acknowledged in metadata.
 59 * plus the start/length of the first bad section we overlap.
 60 */
 61int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
 62			sector_t *first_bad, int *bad_sectors)
 63{
 64	int hi;
 65	int lo;
 66	u64 *p = bb->page;
 67	int rv;
 68	sector_t target = s + sectors;
 69	unsigned seq;
 70
 71	if (bb->shift > 0) {
 72		/* round the start down, and the end up */
 73		s >>= bb->shift;
 74		target += (1<<bb->shift) - 1;
 75		target >>= bb->shift;
 76		sectors = target - s;
 77	}
 78	/* 'target' is now the first block after the bad range */
 79
 80retry:
 81	seq = read_seqbegin(&bb->lock);
 82	lo = 0;
 83	rv = 0;
 84	hi = bb->count;
 85
 86	/* Binary search between lo and hi for 'target'
 87	 * i.e. for the last range that starts before 'target'
 88	 */
 89	/* INVARIANT: ranges before 'lo' and at-or-after 'hi'
 90	 * are known not to be the last range before target.
 91	 * VARIANT: hi-lo is the number of possible
 92	 * ranges, and decreases until it reaches 1
 93	 */
 94	while (hi - lo > 1) {
 95		int mid = (lo + hi) / 2;
 96		sector_t a = BB_OFFSET(p[mid]);
 97
 98		if (a < target)
 99			/* This could still be the one, earlier ranges
100			 * could not.
101			 */
102			lo = mid;
103		else
104			/* This and later ranges are definitely out. */
105			hi = mid;
106	}
107	/* 'lo' might be the last that started before target, but 'hi' isn't */
108	if (hi > lo) {
109		/* need to check all range that end after 's' to see if
110		 * any are unacknowledged.
111		 */
112		while (lo >= 0 &&
113		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
114			if (BB_OFFSET(p[lo]) < target) {
115				/* starts before the end, and finishes after
116				 * the start, so they must overlap
117				 */
118				if (rv != -1 && BB_ACK(p[lo]))
119					rv = 1;
120				else
121					rv = -1;
122				*first_bad = BB_OFFSET(p[lo]);
123				*bad_sectors = BB_LEN(p[lo]);
124			}
125			lo--;
126		}
127	}
128
129	if (read_seqretry(&bb->lock, seq))
130		goto retry;
131
132	return rv;
133}
134EXPORT_SYMBOL_GPL(badblocks_check);
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136/**
137 * badblocks_set() - Add a range of bad blocks to the table.
138 * @bb:		the badblocks structure that holds all badblock information
139 * @s:		first sector to mark as bad
140 * @sectors:	number of sectors to mark as bad
141 * @acknowledged: weather to mark the bad sectors as acknowledged
142 *
143 * This might extend the table, or might contract it if two adjacent ranges
144 * can be merged. We binary-search to find the 'insertion' point, then
145 * decide how best to handle it.
146 *
147 * Return:
148 *  0: success
149 *  1: failed to set badblocks (out of space)
150 */
151int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
152			int acknowledged)
153{
154	u64 *p;
155	int lo, hi;
156	int rv = 0;
157	unsigned long flags;
158
159	if (bb->shift < 0)
160		/* badblocks are disabled */
161		return 0;
162
163	if (bb->shift) {
164		/* round the start down, and the end up */
165		sector_t next = s + sectors;
166
167		s >>= bb->shift;
168		next += (1<<bb->shift) - 1;
169		next >>= bb->shift;
170		sectors = next - s;
171	}
172
173	write_seqlock_irqsave(&bb->lock, flags);
174
175	p = bb->page;
176	lo = 0;
177	hi = bb->count;
178	/* Find the last range that starts at-or-before 's' */
179	while (hi - lo > 1) {
180		int mid = (lo + hi) / 2;
181		sector_t a = BB_OFFSET(p[mid]);
182
183		if (a <= s)
184			lo = mid;
185		else
186			hi = mid;
187	}
188	if (hi > lo && BB_OFFSET(p[lo]) > s)
189		hi = lo;
190
191	if (hi > lo) {
192		/* we found a range that might merge with the start
193		 * of our new range
194		 */
195		sector_t a = BB_OFFSET(p[lo]);
196		sector_t e = a + BB_LEN(p[lo]);
197		int ack = BB_ACK(p[lo]);
198
199		if (e >= s) {
200			/* Yes, we can merge with a previous range */
201			if (s == a && s + sectors >= e)
202				/* new range covers old */
203				ack = acknowledged;
204			else
205				ack = ack && acknowledged;
206
207			if (e < s + sectors)
208				e = s + sectors;
209			if (e - a <= BB_MAX_LEN) {
210				p[lo] = BB_MAKE(a, e-a, ack);
211				s = e;
212			} else {
213				/* does not all fit in one range,
214				 * make p[lo] maximal
215				 */
216				if (BB_LEN(p[lo]) != BB_MAX_LEN)
217					p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
218				s = a + BB_MAX_LEN;
219			}
220			sectors = e - s;
221		}
222	}
223	if (sectors && hi < bb->count) {
224		/* 'hi' points to the first range that starts after 's'.
225		 * Maybe we can merge with the start of that range
226		 */
227		sector_t a = BB_OFFSET(p[hi]);
228		sector_t e = a + BB_LEN(p[hi]);
229		int ack = BB_ACK(p[hi]);
230
231		if (a <= s + sectors) {
232			/* merging is possible */
233			if (e <= s + sectors) {
234				/* full overlap */
235				e = s + sectors;
236				ack = acknowledged;
237			} else
238				ack = ack && acknowledged;
239
240			a = s;
241			if (e - a <= BB_MAX_LEN) {
242				p[hi] = BB_MAKE(a, e-a, ack);
243				s = e;
244			} else {
245				p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
246				s = a + BB_MAX_LEN;
247			}
248			sectors = e - s;
249			lo = hi;
250			hi++;
251		}
252	}
253	if (sectors == 0 && hi < bb->count) {
254		/* we might be able to combine lo and hi */
255		/* Note: 's' is at the end of 'lo' */
256		sector_t a = BB_OFFSET(p[hi]);
257		int lolen = BB_LEN(p[lo]);
258		int hilen = BB_LEN(p[hi]);
259		int newlen = lolen + hilen - (s - a);
260
261		if (s >= a && newlen < BB_MAX_LEN) {
262			/* yes, we can combine them */
263			int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
264
265			p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
266			memmove(p + hi, p + hi + 1,
267				(bb->count - hi - 1) * 8);
268			bb->count--;
269		}
270	}
271	while (sectors) {
272		/* didn't merge (it all).
273		 * Need to add a range just before 'hi'
274		 */
275		if (bb->count >= MAX_BADBLOCKS) {
276			/* No room for more */
277			rv = 1;
278			break;
279		} else {
280			int this_sectors = sectors;
281
282			memmove(p + hi + 1, p + hi,
283				(bb->count - hi) * 8);
284			bb->count++;
285
286			if (this_sectors > BB_MAX_LEN)
287				this_sectors = BB_MAX_LEN;
288			p[hi] = BB_MAKE(s, this_sectors, acknowledged);
289			sectors -= this_sectors;
290			s += this_sectors;
291		}
292	}
293
294	bb->changed = 1;
295	if (!acknowledged)
296		bb->unacked_exist = 1;
 
 
297	write_sequnlock_irqrestore(&bb->lock, flags);
298
299	return rv;
300}
301EXPORT_SYMBOL_GPL(badblocks_set);
302
303/**
304 * badblocks_clear() - Remove a range of bad blocks to the table.
305 * @bb:		the badblocks structure that holds all badblock information
306 * @s:		first sector to mark as bad
307 * @sectors:	number of sectors to mark as bad
308 *
309 * This may involve extending the table if we spilt a region,
310 * but it must not fail.  So if the table becomes full, we just
311 * drop the remove request.
312 *
313 * Return:
314 *  0: success
315 *  1: failed to clear badblocks
316 */
317int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
318{
319	u64 *p;
320	int lo, hi;
321	sector_t target = s + sectors;
322	int rv = 0;
323
324	if (bb->shift > 0) {
325		/* When clearing we round the start up and the end down.
326		 * This should not matter as the shift should align with
327		 * the block size and no rounding should ever be needed.
328		 * However it is better the think a block is bad when it
329		 * isn't than to think a block is not bad when it is.
330		 */
331		s += (1<<bb->shift) - 1;
332		s >>= bb->shift;
333		target >>= bb->shift;
334		sectors = target - s;
335	}
336
337	write_seqlock_irq(&bb->lock);
338
339	p = bb->page;
340	lo = 0;
341	hi = bb->count;
342	/* Find the last range that starts before 'target' */
343	while (hi - lo > 1) {
344		int mid = (lo + hi) / 2;
345		sector_t a = BB_OFFSET(p[mid]);
346
347		if (a < target)
348			lo = mid;
349		else
350			hi = mid;
351	}
352	if (hi > lo) {
353		/* p[lo] is the last range that could overlap the
354		 * current range.  Earlier ranges could also overlap,
355		 * but only this one can overlap the end of the range.
356		 */
357		if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
 
358			/* Partial overlap, leave the tail of this range */
359			int ack = BB_ACK(p[lo]);
360			sector_t a = BB_OFFSET(p[lo]);
361			sector_t end = a + BB_LEN(p[lo]);
362
363			if (a < s) {
364				/* we need to split this range */
365				if (bb->count >= MAX_BADBLOCKS) {
366					rv = -ENOSPC;
367					goto out;
368				}
369				memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
370				bb->count++;
371				p[lo] = BB_MAKE(a, s-a, ack);
372				lo++;
373			}
374			p[lo] = BB_MAKE(target, end - target, ack);
375			/* there is no longer an overlap */
376			hi = lo;
377			lo--;
378		}
379		while (lo >= 0 &&
380		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
 
381			/* This range does overlap */
382			if (BB_OFFSET(p[lo]) < s) {
383				/* Keep the early parts of this range. */
384				int ack = BB_ACK(p[lo]);
385				sector_t start = BB_OFFSET(p[lo]);
386
387				p[lo] = BB_MAKE(start, s - start, ack);
388				/* now low doesn't overlap, so.. */
389				break;
390			}
391			lo--;
392		}
393		/* 'lo' is strictly before, 'hi' is strictly after,
394		 * anything between needs to be discarded
395		 */
396		if (hi - lo > 1) {
397			memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
398			bb->count -= (hi - lo - 1);
399		}
400	}
401
 
402	bb->changed = 1;
403out:
404	write_sequnlock_irq(&bb->lock);
405	return rv;
406}
407EXPORT_SYMBOL_GPL(badblocks_clear);
408
409/**
410 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
411 * @bb:		the badblocks structure that holds all badblock information
412 *
413 * This only succeeds if ->changed is clear.  It is used by
414 * in-kernel metadata updates
415 */
416void ack_all_badblocks(struct badblocks *bb)
417{
418	if (bb->page == NULL || bb->changed)
419		/* no point even trying */
420		return;
421	write_seqlock_irq(&bb->lock);
422
423	if (bb->changed == 0 && bb->unacked_exist) {
424		u64 *p = bb->page;
425		int i;
426
427		for (i = 0; i < bb->count ; i++) {
428			if (!BB_ACK(p[i])) {
429				sector_t start = BB_OFFSET(p[i]);
430				int len = BB_LEN(p[i]);
431
432				p[i] = BB_MAKE(start, len, 1);
433			}
434		}
435		bb->unacked_exist = 0;
436	}
437	write_sequnlock_irq(&bb->lock);
438}
439EXPORT_SYMBOL_GPL(ack_all_badblocks);
440
441/**
442 * badblocks_show() - sysfs access to bad-blocks list
443 * @bb:		the badblocks structure that holds all badblock information
444 * @page:	buffer received from sysfs
445 * @unack:	weather to show unacknowledged badblocks
446 *
447 * Return:
448 *  Length of returned data
449 */
450ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
451{
452	size_t len;
453	int i;
454	u64 *p = bb->page;
455	unsigned seq;
456
457	if (bb->shift < 0)
458		return 0;
459
460retry:
461	seq = read_seqbegin(&bb->lock);
462
463	len = 0;
464	i = 0;
465
466	while (len < PAGE_SIZE && i < bb->count) {
467		sector_t s = BB_OFFSET(p[i]);
468		unsigned int length = BB_LEN(p[i]);
469		int ack = BB_ACK(p[i]);
470
471		i++;
472
473		if (unack && ack)
474			continue;
475
476		len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
477				(unsigned long long)s << bb->shift,
478				length << bb->shift);
479	}
480	if (unack && len == 0)
481		bb->unacked_exist = 0;
482
483	if (read_seqretry(&bb->lock, seq))
484		goto retry;
485
486	return len;
487}
488EXPORT_SYMBOL_GPL(badblocks_show);
489
490/**
491 * badblocks_store() - sysfs access to bad-blocks list
492 * @bb:		the badblocks structure that holds all badblock information
493 * @page:	buffer received from sysfs
494 * @len:	length of data received from sysfs
495 * @unack:	weather to show unacknowledged badblocks
496 *
497 * Return:
498 *  Length of the buffer processed or -ve error.
499 */
500ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
501			int unack)
502{
503	unsigned long long sector;
504	int length;
505	char newline;
506
507	switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
508	case 3:
509		if (newline != '\n')
510			return -EINVAL;
 
511	case 2:
512		if (length <= 0)
513			return -EINVAL;
514		break;
515	default:
516		return -EINVAL;
517	}
518
519	if (badblocks_set(bb, sector, length, !unack))
520		return -ENOSPC;
521	else
522		return len;
523}
524EXPORT_SYMBOL_GPL(badblocks_store);
525
526static int __badblocks_init(struct device *dev, struct badblocks *bb,
527		int enable)
528{
529	bb->dev = dev;
530	bb->count = 0;
531	if (enable)
532		bb->shift = 0;
533	else
534		bb->shift = -1;
535	if (dev)
536		bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
537	else
538		bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
539	if (!bb->page) {
540		bb->shift = -1;
541		return -ENOMEM;
542	}
543	seqlock_init(&bb->lock);
544
545	return 0;
546}
547
548/**
549 * badblocks_init() - initialize the badblocks structure
550 * @bb:		the badblocks structure that holds all badblock information
551 * @enable:	weather to enable badblocks accounting
552 *
553 * Return:
554 *  0: success
555 *  -ve errno: on error
556 */
557int badblocks_init(struct badblocks *bb, int enable)
558{
559	return __badblocks_init(NULL, bb, enable);
560}
561EXPORT_SYMBOL_GPL(badblocks_init);
562
563int devm_init_badblocks(struct device *dev, struct badblocks *bb)
564{
565	if (!bb)
566		return -EINVAL;
567	return __badblocks_init(dev, bb, 1);
568}
569EXPORT_SYMBOL_GPL(devm_init_badblocks);
570
571/**
572 * badblocks_exit() - free the badblocks structure
573 * @bb:		the badblocks structure that holds all badblock information
574 */
575void badblocks_exit(struct badblocks *bb)
576{
577	if (!bb)
578		return;
579	if (bb->dev)
580		devm_kfree(bb->dev, bb->page);
581	else
582		kfree(bb->page);
583	bb->page = NULL;
584}
585EXPORT_SYMBOL_GPL(badblocks_exit);
v5.4
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Bad block management
  4 *
  5 * - Heavily based on MD badblocks code from Neil Brown
  6 *
  7 * Copyright (c) 2015, Intel Corporation.
 
 
 
 
 
 
 
 
 
  8 */
  9
 10#include <linux/badblocks.h>
 11#include <linux/seqlock.h>
 12#include <linux/device.h>
 13#include <linux/kernel.h>
 14#include <linux/module.h>
 15#include <linux/stddef.h>
 16#include <linux/types.h>
 17#include <linux/slab.h>
 18
 19/**
 20 * badblocks_check() - check a given range for bad sectors
 21 * @bb:		the badblocks structure that holds all badblock information
 22 * @s:		sector (start) at which to check for badblocks
 23 * @sectors:	number of sectors to check for badblocks
 24 * @first_bad:	pointer to store location of the first badblock
 25 * @bad_sectors: pointer to store number of badblocks after @first_bad
 26 *
 27 * We can record which blocks on each device are 'bad' and so just
 28 * fail those blocks, or that stripe, rather than the whole device.
 29 * Entries in the bad-block table are 64bits wide.  This comprises:
 30 * Length of bad-range, in sectors: 0-511 for lengths 1-512
 31 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
 32 *  A 'shift' can be set so that larger blocks are tracked and
 33 *  consequently larger devices can be covered.
 34 * 'Acknowledged' flag - 1 bit. - the most significant bit.
 35 *
 36 * Locking of the bad-block table uses a seqlock so badblocks_check
 37 * might need to retry if it is very unlucky.
 38 * We will sometimes want to check for bad blocks in a bi_end_io function,
 39 * so we use the write_seqlock_irq variant.
 40 *
 41 * When looking for a bad block we specify a range and want to
 42 * know if any block in the range is bad.  So we binary-search
 43 * to the last range that starts at-or-before the given endpoint,
 44 * (or "before the sector after the target range")
 45 * then see if it ends after the given start.
 46 *
 47 * Return:
 48 *  0: there are no known bad blocks in the range
 49 *  1: there are known bad block which are all acknowledged
 50 * -1: there are bad blocks which have not yet been acknowledged in metadata.
 51 * plus the start/length of the first bad section we overlap.
 52 */
 53int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
 54			sector_t *first_bad, int *bad_sectors)
 55{
 56	int hi;
 57	int lo;
 58	u64 *p = bb->page;
 59	int rv;
 60	sector_t target = s + sectors;
 61	unsigned seq;
 62
 63	if (bb->shift > 0) {
 64		/* round the start down, and the end up */
 65		s >>= bb->shift;
 66		target += (1<<bb->shift) - 1;
 67		target >>= bb->shift;
 68		sectors = target - s;
 69	}
 70	/* 'target' is now the first block after the bad range */
 71
 72retry:
 73	seq = read_seqbegin(&bb->lock);
 74	lo = 0;
 75	rv = 0;
 76	hi = bb->count;
 77
 78	/* Binary search between lo and hi for 'target'
 79	 * i.e. for the last range that starts before 'target'
 80	 */
 81	/* INVARIANT: ranges before 'lo' and at-or-after 'hi'
 82	 * are known not to be the last range before target.
 83	 * VARIANT: hi-lo is the number of possible
 84	 * ranges, and decreases until it reaches 1
 85	 */
 86	while (hi - lo > 1) {
 87		int mid = (lo + hi) / 2;
 88		sector_t a = BB_OFFSET(p[mid]);
 89
 90		if (a < target)
 91			/* This could still be the one, earlier ranges
 92			 * could not.
 93			 */
 94			lo = mid;
 95		else
 96			/* This and later ranges are definitely out. */
 97			hi = mid;
 98	}
 99	/* 'lo' might be the last that started before target, but 'hi' isn't */
100	if (hi > lo) {
101		/* need to check all range that end after 's' to see if
102		 * any are unacknowledged.
103		 */
104		while (lo >= 0 &&
105		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
106			if (BB_OFFSET(p[lo]) < target) {
107				/* starts before the end, and finishes after
108				 * the start, so they must overlap
109				 */
110				if (rv != -1 && BB_ACK(p[lo]))
111					rv = 1;
112				else
113					rv = -1;
114				*first_bad = BB_OFFSET(p[lo]);
115				*bad_sectors = BB_LEN(p[lo]);
116			}
117			lo--;
118		}
119	}
120
121	if (read_seqretry(&bb->lock, seq))
122		goto retry;
123
124	return rv;
125}
126EXPORT_SYMBOL_GPL(badblocks_check);
127
128static void badblocks_update_acked(struct badblocks *bb)
129{
130	u64 *p = bb->page;
131	int i;
132	bool unacked = false;
133
134	if (!bb->unacked_exist)
135		return;
136
137	for (i = 0; i < bb->count ; i++) {
138		if (!BB_ACK(p[i])) {
139			unacked = true;
140			break;
141		}
142	}
143
144	if (!unacked)
145		bb->unacked_exist = 0;
146}
147
148/**
149 * badblocks_set() - Add a range of bad blocks to the table.
150 * @bb:		the badblocks structure that holds all badblock information
151 * @s:		first sector to mark as bad
152 * @sectors:	number of sectors to mark as bad
153 * @acknowledged: weather to mark the bad sectors as acknowledged
154 *
155 * This might extend the table, or might contract it if two adjacent ranges
156 * can be merged. We binary-search to find the 'insertion' point, then
157 * decide how best to handle it.
158 *
159 * Return:
160 *  0: success
161 *  1: failed to set badblocks (out of space)
162 */
163int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
164			int acknowledged)
165{
166	u64 *p;
167	int lo, hi;
168	int rv = 0;
169	unsigned long flags;
170
171	if (bb->shift < 0)
172		/* badblocks are disabled */
173		return 1;
174
175	if (bb->shift) {
176		/* round the start down, and the end up */
177		sector_t next = s + sectors;
178
179		s >>= bb->shift;
180		next += (1<<bb->shift) - 1;
181		next >>= bb->shift;
182		sectors = next - s;
183	}
184
185	write_seqlock_irqsave(&bb->lock, flags);
186
187	p = bb->page;
188	lo = 0;
189	hi = bb->count;
190	/* Find the last range that starts at-or-before 's' */
191	while (hi - lo > 1) {
192		int mid = (lo + hi) / 2;
193		sector_t a = BB_OFFSET(p[mid]);
194
195		if (a <= s)
196			lo = mid;
197		else
198			hi = mid;
199	}
200	if (hi > lo && BB_OFFSET(p[lo]) > s)
201		hi = lo;
202
203	if (hi > lo) {
204		/* we found a range that might merge with the start
205		 * of our new range
206		 */
207		sector_t a = BB_OFFSET(p[lo]);
208		sector_t e = a + BB_LEN(p[lo]);
209		int ack = BB_ACK(p[lo]);
210
211		if (e >= s) {
212			/* Yes, we can merge with a previous range */
213			if (s == a && s + sectors >= e)
214				/* new range covers old */
215				ack = acknowledged;
216			else
217				ack = ack && acknowledged;
218
219			if (e < s + sectors)
220				e = s + sectors;
221			if (e - a <= BB_MAX_LEN) {
222				p[lo] = BB_MAKE(a, e-a, ack);
223				s = e;
224			} else {
225				/* does not all fit in one range,
226				 * make p[lo] maximal
227				 */
228				if (BB_LEN(p[lo]) != BB_MAX_LEN)
229					p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
230				s = a + BB_MAX_LEN;
231			}
232			sectors = e - s;
233		}
234	}
235	if (sectors && hi < bb->count) {
236		/* 'hi' points to the first range that starts after 's'.
237		 * Maybe we can merge with the start of that range
238		 */
239		sector_t a = BB_OFFSET(p[hi]);
240		sector_t e = a + BB_LEN(p[hi]);
241		int ack = BB_ACK(p[hi]);
242
243		if (a <= s + sectors) {
244			/* merging is possible */
245			if (e <= s + sectors) {
246				/* full overlap */
247				e = s + sectors;
248				ack = acknowledged;
249			} else
250				ack = ack && acknowledged;
251
252			a = s;
253			if (e - a <= BB_MAX_LEN) {
254				p[hi] = BB_MAKE(a, e-a, ack);
255				s = e;
256			} else {
257				p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
258				s = a + BB_MAX_LEN;
259			}
260			sectors = e - s;
261			lo = hi;
262			hi++;
263		}
264	}
265	if (sectors == 0 && hi < bb->count) {
266		/* we might be able to combine lo and hi */
267		/* Note: 's' is at the end of 'lo' */
268		sector_t a = BB_OFFSET(p[hi]);
269		int lolen = BB_LEN(p[lo]);
270		int hilen = BB_LEN(p[hi]);
271		int newlen = lolen + hilen - (s - a);
272
273		if (s >= a && newlen < BB_MAX_LEN) {
274			/* yes, we can combine them */
275			int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
276
277			p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
278			memmove(p + hi, p + hi + 1,
279				(bb->count - hi - 1) * 8);
280			bb->count--;
281		}
282	}
283	while (sectors) {
284		/* didn't merge (it all).
285		 * Need to add a range just before 'hi'
286		 */
287		if (bb->count >= MAX_BADBLOCKS) {
288			/* No room for more */
289			rv = 1;
290			break;
291		} else {
292			int this_sectors = sectors;
293
294			memmove(p + hi + 1, p + hi,
295				(bb->count - hi) * 8);
296			bb->count++;
297
298			if (this_sectors > BB_MAX_LEN)
299				this_sectors = BB_MAX_LEN;
300			p[hi] = BB_MAKE(s, this_sectors, acknowledged);
301			sectors -= this_sectors;
302			s += this_sectors;
303		}
304	}
305
306	bb->changed = 1;
307	if (!acknowledged)
308		bb->unacked_exist = 1;
309	else
310		badblocks_update_acked(bb);
311	write_sequnlock_irqrestore(&bb->lock, flags);
312
313	return rv;
314}
315EXPORT_SYMBOL_GPL(badblocks_set);
316
317/**
318 * badblocks_clear() - Remove a range of bad blocks to the table.
319 * @bb:		the badblocks structure that holds all badblock information
320 * @s:		first sector to mark as bad
321 * @sectors:	number of sectors to mark as bad
322 *
323 * This may involve extending the table if we spilt a region,
324 * but it must not fail.  So if the table becomes full, we just
325 * drop the remove request.
326 *
327 * Return:
328 *  0: success
329 *  1: failed to clear badblocks
330 */
331int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
332{
333	u64 *p;
334	int lo, hi;
335	sector_t target = s + sectors;
336	int rv = 0;
337
338	if (bb->shift > 0) {
339		/* When clearing we round the start up and the end down.
340		 * This should not matter as the shift should align with
341		 * the block size and no rounding should ever be needed.
342		 * However it is better the think a block is bad when it
343		 * isn't than to think a block is not bad when it is.
344		 */
345		s += (1<<bb->shift) - 1;
346		s >>= bb->shift;
347		target >>= bb->shift;
348		sectors = target - s;
349	}
350
351	write_seqlock_irq(&bb->lock);
352
353	p = bb->page;
354	lo = 0;
355	hi = bb->count;
356	/* Find the last range that starts before 'target' */
357	while (hi - lo > 1) {
358		int mid = (lo + hi) / 2;
359		sector_t a = BB_OFFSET(p[mid]);
360
361		if (a < target)
362			lo = mid;
363		else
364			hi = mid;
365	}
366	if (hi > lo) {
367		/* p[lo] is the last range that could overlap the
368		 * current range.  Earlier ranges could also overlap,
369		 * but only this one can overlap the end of the range.
370		 */
371		if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
372		    (BB_OFFSET(p[lo]) < target)) {
373			/* Partial overlap, leave the tail of this range */
374			int ack = BB_ACK(p[lo]);
375			sector_t a = BB_OFFSET(p[lo]);
376			sector_t end = a + BB_LEN(p[lo]);
377
378			if (a < s) {
379				/* we need to split this range */
380				if (bb->count >= MAX_BADBLOCKS) {
381					rv = -ENOSPC;
382					goto out;
383				}
384				memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
385				bb->count++;
386				p[lo] = BB_MAKE(a, s-a, ack);
387				lo++;
388			}
389			p[lo] = BB_MAKE(target, end - target, ack);
390			/* there is no longer an overlap */
391			hi = lo;
392			lo--;
393		}
394		while (lo >= 0 &&
395		       (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
396		       (BB_OFFSET(p[lo]) < target)) {
397			/* This range does overlap */
398			if (BB_OFFSET(p[lo]) < s) {
399				/* Keep the early parts of this range. */
400				int ack = BB_ACK(p[lo]);
401				sector_t start = BB_OFFSET(p[lo]);
402
403				p[lo] = BB_MAKE(start, s - start, ack);
404				/* now low doesn't overlap, so.. */
405				break;
406			}
407			lo--;
408		}
409		/* 'lo' is strictly before, 'hi' is strictly after,
410		 * anything between needs to be discarded
411		 */
412		if (hi - lo > 1) {
413			memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
414			bb->count -= (hi - lo - 1);
415		}
416	}
417
418	badblocks_update_acked(bb);
419	bb->changed = 1;
420out:
421	write_sequnlock_irq(&bb->lock);
422	return rv;
423}
424EXPORT_SYMBOL_GPL(badblocks_clear);
425
426/**
427 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
428 * @bb:		the badblocks structure that holds all badblock information
429 *
430 * This only succeeds if ->changed is clear.  It is used by
431 * in-kernel metadata updates
432 */
433void ack_all_badblocks(struct badblocks *bb)
434{
435	if (bb->page == NULL || bb->changed)
436		/* no point even trying */
437		return;
438	write_seqlock_irq(&bb->lock);
439
440	if (bb->changed == 0 && bb->unacked_exist) {
441		u64 *p = bb->page;
442		int i;
443
444		for (i = 0; i < bb->count ; i++) {
445			if (!BB_ACK(p[i])) {
446				sector_t start = BB_OFFSET(p[i]);
447				int len = BB_LEN(p[i]);
448
449				p[i] = BB_MAKE(start, len, 1);
450			}
451		}
452		bb->unacked_exist = 0;
453	}
454	write_sequnlock_irq(&bb->lock);
455}
456EXPORT_SYMBOL_GPL(ack_all_badblocks);
457
458/**
459 * badblocks_show() - sysfs access to bad-blocks list
460 * @bb:		the badblocks structure that holds all badblock information
461 * @page:	buffer received from sysfs
462 * @unack:	weather to show unacknowledged badblocks
463 *
464 * Return:
465 *  Length of returned data
466 */
467ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
468{
469	size_t len;
470	int i;
471	u64 *p = bb->page;
472	unsigned seq;
473
474	if (bb->shift < 0)
475		return 0;
476
477retry:
478	seq = read_seqbegin(&bb->lock);
479
480	len = 0;
481	i = 0;
482
483	while (len < PAGE_SIZE && i < bb->count) {
484		sector_t s = BB_OFFSET(p[i]);
485		unsigned int length = BB_LEN(p[i]);
486		int ack = BB_ACK(p[i]);
487
488		i++;
489
490		if (unack && ack)
491			continue;
492
493		len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
494				(unsigned long long)s << bb->shift,
495				length << bb->shift);
496	}
497	if (unack && len == 0)
498		bb->unacked_exist = 0;
499
500	if (read_seqretry(&bb->lock, seq))
501		goto retry;
502
503	return len;
504}
505EXPORT_SYMBOL_GPL(badblocks_show);
506
507/**
508 * badblocks_store() - sysfs access to bad-blocks list
509 * @bb:		the badblocks structure that holds all badblock information
510 * @page:	buffer received from sysfs
511 * @len:	length of data received from sysfs
512 * @unack:	weather to show unacknowledged badblocks
513 *
514 * Return:
515 *  Length of the buffer processed or -ve error.
516 */
517ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
518			int unack)
519{
520	unsigned long long sector;
521	int length;
522	char newline;
523
524	switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
525	case 3:
526		if (newline != '\n')
527			return -EINVAL;
528		/* fall through */
529	case 2:
530		if (length <= 0)
531			return -EINVAL;
532		break;
533	default:
534		return -EINVAL;
535	}
536
537	if (badblocks_set(bb, sector, length, !unack))
538		return -ENOSPC;
539	else
540		return len;
541}
542EXPORT_SYMBOL_GPL(badblocks_store);
543
544static int __badblocks_init(struct device *dev, struct badblocks *bb,
545		int enable)
546{
547	bb->dev = dev;
548	bb->count = 0;
549	if (enable)
550		bb->shift = 0;
551	else
552		bb->shift = -1;
553	if (dev)
554		bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
555	else
556		bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
557	if (!bb->page) {
558		bb->shift = -1;
559		return -ENOMEM;
560	}
561	seqlock_init(&bb->lock);
562
563	return 0;
564}
565
566/**
567 * badblocks_init() - initialize the badblocks structure
568 * @bb:		the badblocks structure that holds all badblock information
569 * @enable:	weather to enable badblocks accounting
570 *
571 * Return:
572 *  0: success
573 *  -ve errno: on error
574 */
575int badblocks_init(struct badblocks *bb, int enable)
576{
577	return __badblocks_init(NULL, bb, enable);
578}
579EXPORT_SYMBOL_GPL(badblocks_init);
580
581int devm_init_badblocks(struct device *dev, struct badblocks *bb)
582{
583	if (!bb)
584		return -EINVAL;
585	return __badblocks_init(dev, bb, 1);
586}
587EXPORT_SYMBOL_GPL(devm_init_badblocks);
588
589/**
590 * badblocks_exit() - free the badblocks structure
591 * @bb:		the badblocks structure that holds all badblock information
592 */
593void badblocks_exit(struct badblocks *bb)
594{
595	if (!bb)
596		return;
597	if (bb->dev)
598		devm_kfree(bb->dev, bb->page);
599	else
600		kfree(bb->page);
601	bb->page = NULL;
602}
603EXPORT_SYMBOL_GPL(badblocks_exit);