Linux Audio

Check our new training course

Loading...
v6.2
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Bad block management
  4 *
  5 * - Heavily based on MD badblocks code from Neil Brown
  6 *
  7 * Copyright (c) 2015, Intel Corporation.
 
 
 
 
 
 
 
 
 
  8 */
  9
 10#include <linux/badblocks.h>
 11#include <linux/seqlock.h>
 12#include <linux/device.h>
 13#include <linux/kernel.h>
 14#include <linux/module.h>
 15#include <linux/stddef.h>
 16#include <linux/types.h>
 17#include <linux/slab.h>
 18
 19/**
 20 * badblocks_check() - check a given range for bad sectors
 21 * @bb:		the badblocks structure that holds all badblock information
 22 * @s:		sector (start) at which to check for badblocks
 23 * @sectors:	number of sectors to check for badblocks
 24 * @first_bad:	pointer to store location of the first badblock
 25 * @bad_sectors: pointer to store number of badblocks after @first_bad
 26 *
 27 * We can record which blocks on each device are 'bad' and so just
 28 * fail those blocks, or that stripe, rather than the whole device.
 29 * Entries in the bad-block table are 64bits wide.  This comprises:
 30 * Length of bad-range, in sectors: 0-511 for lengths 1-512
 31 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
 32 *  A 'shift' can be set so that larger blocks are tracked and
 33 *  consequently larger devices can be covered.
 34 * 'Acknowledged' flag - 1 bit. - the most significant bit.
 35 *
 36 * Locking of the bad-block table uses a seqlock so badblocks_check
 37 * might need to retry if it is very unlucky.
 38 * We will sometimes want to check for bad blocks in a bi_end_io function,
 39 * so we use the write_seqlock_irq variant.
 40 *
 41 * When looking for a bad block we specify a range and want to
 42 * know if any block in the range is bad.  So we binary-search
 43 * to the last range that starts at-or-before the given endpoint,
 44 * (or "before the sector after the target range")
 45 * then see if it ends after the given start.
 46 *
 47 * Return:
 48 *  0: there are no known bad blocks in the range
 49 *  1: there are known bad block which are all acknowledged
 50 * -1: there are bad blocks which have not yet been acknowledged in metadata.
 51 * plus the start/length of the first bad section we overlap.
 52 */
 53int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
 54			sector_t *first_bad, int *bad_sectors)
 55{
 56	int hi;
 57	int lo;
 58	u64 *p = bb->page;
 59	int rv;
 60	sector_t target = s + sectors;
 61	unsigned seq;
 62
 63	if (bb->shift > 0) {
 64		/* round the start down, and the end up */
 65		s >>= bb->shift;
 66		target += (1<<bb->shift) - 1;
 67		target >>= bb->shift;
 
 68	}
 69	/* 'target' is now the first block after the bad range */
 70
 71retry:
 72	seq = read_seqbegin(&bb->lock);
 73	lo = 0;
 74	rv = 0;
 75	hi = bb->count;
 76
 77	/* Binary search between lo and hi for 'target'
 78	 * i.e. for the last range that starts before 'target'
 79	 */
 80	/* INVARIANT: ranges before 'lo' and at-or-after 'hi'
 81	 * are known not to be the last range before target.
 82	 * VARIANT: hi-lo is the number of possible
 83	 * ranges, and decreases until it reaches 1
 84	 */
 85	while (hi - lo > 1) {
 86		int mid = (lo + hi) / 2;
 87		sector_t a = BB_OFFSET(p[mid]);
 88
 89		if (a < target)
 90			/* This could still be the one, earlier ranges
 91			 * could not.
 92			 */
 93			lo = mid;
 94		else
 95			/* This and later ranges are definitely out. */
 96			hi = mid;
 97	}
 98	/* 'lo' might be the last that started before target, but 'hi' isn't */
 99	if (hi > lo) {
100		/* need to check all range that end after 's' to see if
101		 * any are unacknowledged.
102		 */
103		while (lo >= 0 &&
104		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
105			if (BB_OFFSET(p[lo]) < target) {
106				/* starts before the end, and finishes after
107				 * the start, so they must overlap
108				 */
109				if (rv != -1 && BB_ACK(p[lo]))
110					rv = 1;
111				else
112					rv = -1;
113				*first_bad = BB_OFFSET(p[lo]);
114				*bad_sectors = BB_LEN(p[lo]);
115			}
116			lo--;
117		}
118	}
119
120	if (read_seqretry(&bb->lock, seq))
121		goto retry;
122
123	return rv;
124}
125EXPORT_SYMBOL_GPL(badblocks_check);
126
127static void badblocks_update_acked(struct badblocks *bb)
128{
129	u64 *p = bb->page;
130	int i;
131	bool unacked = false;
132
133	if (!bb->unacked_exist)
134		return;
135
136	for (i = 0; i < bb->count ; i++) {
137		if (!BB_ACK(p[i])) {
138			unacked = true;
139			break;
140		}
141	}
142
143	if (!unacked)
144		bb->unacked_exist = 0;
145}
146
147/**
148 * badblocks_set() - Add a range of bad blocks to the table.
149 * @bb:		the badblocks structure that holds all badblock information
150 * @s:		first sector to mark as bad
151 * @sectors:	number of sectors to mark as bad
152 * @acknowledged: weather to mark the bad sectors as acknowledged
153 *
154 * This might extend the table, or might contract it if two adjacent ranges
155 * can be merged. We binary-search to find the 'insertion' point, then
156 * decide how best to handle it.
157 *
158 * Return:
159 *  0: success
160 *  1: failed to set badblocks (out of space)
161 */
162int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
163			int acknowledged)
164{
165	u64 *p;
166	int lo, hi;
167	int rv = 0;
168	unsigned long flags;
169
170	if (bb->shift < 0)
171		/* badblocks are disabled */
172		return 1;
173
174	if (bb->shift) {
175		/* round the start down, and the end up */
176		sector_t next = s + sectors;
177
178		s >>= bb->shift;
179		next += (1<<bb->shift) - 1;
180		next >>= bb->shift;
181		sectors = next - s;
182	}
183
184	write_seqlock_irqsave(&bb->lock, flags);
185
186	p = bb->page;
187	lo = 0;
188	hi = bb->count;
189	/* Find the last range that starts at-or-before 's' */
190	while (hi - lo > 1) {
191		int mid = (lo + hi) / 2;
192		sector_t a = BB_OFFSET(p[mid]);
193
194		if (a <= s)
195			lo = mid;
196		else
197			hi = mid;
198	}
199	if (hi > lo && BB_OFFSET(p[lo]) > s)
200		hi = lo;
201
202	if (hi > lo) {
203		/* we found a range that might merge with the start
204		 * of our new range
205		 */
206		sector_t a = BB_OFFSET(p[lo]);
207		sector_t e = a + BB_LEN(p[lo]);
208		int ack = BB_ACK(p[lo]);
209
210		if (e >= s) {
211			/* Yes, we can merge with a previous range */
212			if (s == a && s + sectors >= e)
213				/* new range covers old */
214				ack = acknowledged;
215			else
216				ack = ack && acknowledged;
217
218			if (e < s + sectors)
219				e = s + sectors;
220			if (e - a <= BB_MAX_LEN) {
221				p[lo] = BB_MAKE(a, e-a, ack);
222				s = e;
223			} else {
224				/* does not all fit in one range,
225				 * make p[lo] maximal
226				 */
227				if (BB_LEN(p[lo]) != BB_MAX_LEN)
228					p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
229				s = a + BB_MAX_LEN;
230			}
231			sectors = e - s;
232		}
233	}
234	if (sectors && hi < bb->count) {
235		/* 'hi' points to the first range that starts after 's'.
236		 * Maybe we can merge with the start of that range
237		 */
238		sector_t a = BB_OFFSET(p[hi]);
239		sector_t e = a + BB_LEN(p[hi]);
240		int ack = BB_ACK(p[hi]);
241
242		if (a <= s + sectors) {
243			/* merging is possible */
244			if (e <= s + sectors) {
245				/* full overlap */
246				e = s + sectors;
247				ack = acknowledged;
248			} else
249				ack = ack && acknowledged;
250
251			a = s;
252			if (e - a <= BB_MAX_LEN) {
253				p[hi] = BB_MAKE(a, e-a, ack);
254				s = e;
255			} else {
256				p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
257				s = a + BB_MAX_LEN;
258			}
259			sectors = e - s;
260			lo = hi;
261			hi++;
262		}
263	}
264	if (sectors == 0 && hi < bb->count) {
265		/* we might be able to combine lo and hi */
266		/* Note: 's' is at the end of 'lo' */
267		sector_t a = BB_OFFSET(p[hi]);
268		int lolen = BB_LEN(p[lo]);
269		int hilen = BB_LEN(p[hi]);
270		int newlen = lolen + hilen - (s - a);
271
272		if (s >= a && newlen < BB_MAX_LEN) {
273			/* yes, we can combine them */
274			int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
275
276			p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
277			memmove(p + hi, p + hi + 1,
278				(bb->count - hi - 1) * 8);
279			bb->count--;
280		}
281	}
282	while (sectors) {
283		/* didn't merge (it all).
284		 * Need to add a range just before 'hi'
285		 */
286		if (bb->count >= MAX_BADBLOCKS) {
287			/* No room for more */
288			rv = 1;
289			break;
290		} else {
291			int this_sectors = sectors;
292
293			memmove(p + hi + 1, p + hi,
294				(bb->count - hi) * 8);
295			bb->count++;
296
297			if (this_sectors > BB_MAX_LEN)
298				this_sectors = BB_MAX_LEN;
299			p[hi] = BB_MAKE(s, this_sectors, acknowledged);
300			sectors -= this_sectors;
301			s += this_sectors;
302		}
303	}
304
305	bb->changed = 1;
306	if (!acknowledged)
307		bb->unacked_exist = 1;
308	else
309		badblocks_update_acked(bb);
310	write_sequnlock_irqrestore(&bb->lock, flags);
311
312	return rv;
313}
314EXPORT_SYMBOL_GPL(badblocks_set);
315
316/**
317 * badblocks_clear() - Remove a range of bad blocks to the table.
318 * @bb:		the badblocks structure that holds all badblock information
319 * @s:		first sector to mark as bad
320 * @sectors:	number of sectors to mark as bad
321 *
322 * This may involve extending the table if we spilt a region,
323 * but it must not fail.  So if the table becomes full, we just
324 * drop the remove request.
325 *
326 * Return:
327 *  0: success
328 *  1: failed to clear badblocks
329 */
330int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
331{
332	u64 *p;
333	int lo, hi;
334	sector_t target = s + sectors;
335	int rv = 0;
336
337	if (bb->shift > 0) {
338		/* When clearing we round the start up and the end down.
339		 * This should not matter as the shift should align with
340		 * the block size and no rounding should ever be needed.
341		 * However it is better the think a block is bad when it
342		 * isn't than to think a block is not bad when it is.
343		 */
344		s += (1<<bb->shift) - 1;
345		s >>= bb->shift;
346		target >>= bb->shift;
 
347	}
348
349	write_seqlock_irq(&bb->lock);
350
351	p = bb->page;
352	lo = 0;
353	hi = bb->count;
354	/* Find the last range that starts before 'target' */
355	while (hi - lo > 1) {
356		int mid = (lo + hi) / 2;
357		sector_t a = BB_OFFSET(p[mid]);
358
359		if (a < target)
360			lo = mid;
361		else
362			hi = mid;
363	}
364	if (hi > lo) {
365		/* p[lo] is the last range that could overlap the
366		 * current range.  Earlier ranges could also overlap,
367		 * but only this one can overlap the end of the range.
368		 */
369		if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
370		    (BB_OFFSET(p[lo]) < target)) {
371			/* Partial overlap, leave the tail of this range */
372			int ack = BB_ACK(p[lo]);
373			sector_t a = BB_OFFSET(p[lo]);
374			sector_t end = a + BB_LEN(p[lo]);
375
376			if (a < s) {
377				/* we need to split this range */
378				if (bb->count >= MAX_BADBLOCKS) {
379					rv = -ENOSPC;
380					goto out;
381				}
382				memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
383				bb->count++;
384				p[lo] = BB_MAKE(a, s-a, ack);
385				lo++;
386			}
387			p[lo] = BB_MAKE(target, end - target, ack);
388			/* there is no longer an overlap */
389			hi = lo;
390			lo--;
391		}
392		while (lo >= 0 &&
393		       (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
394		       (BB_OFFSET(p[lo]) < target)) {
395			/* This range does overlap */
396			if (BB_OFFSET(p[lo]) < s) {
397				/* Keep the early parts of this range. */
398				int ack = BB_ACK(p[lo]);
399				sector_t start = BB_OFFSET(p[lo]);
400
401				p[lo] = BB_MAKE(start, s - start, ack);
402				/* now low doesn't overlap, so.. */
403				break;
404			}
405			lo--;
406		}
407		/* 'lo' is strictly before, 'hi' is strictly after,
408		 * anything between needs to be discarded
409		 */
410		if (hi - lo > 1) {
411			memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
412			bb->count -= (hi - lo - 1);
413		}
414	}
415
416	badblocks_update_acked(bb);
417	bb->changed = 1;
418out:
419	write_sequnlock_irq(&bb->lock);
420	return rv;
421}
422EXPORT_SYMBOL_GPL(badblocks_clear);
423
424/**
425 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
426 * @bb:		the badblocks structure that holds all badblock information
427 *
428 * This only succeeds if ->changed is clear.  It is used by
429 * in-kernel metadata updates
430 */
431void ack_all_badblocks(struct badblocks *bb)
432{
433	if (bb->page == NULL || bb->changed)
434		/* no point even trying */
435		return;
436	write_seqlock_irq(&bb->lock);
437
438	if (bb->changed == 0 && bb->unacked_exist) {
439		u64 *p = bb->page;
440		int i;
441
442		for (i = 0; i < bb->count ; i++) {
443			if (!BB_ACK(p[i])) {
444				sector_t start = BB_OFFSET(p[i]);
445				int len = BB_LEN(p[i]);
446
447				p[i] = BB_MAKE(start, len, 1);
448			}
449		}
450		bb->unacked_exist = 0;
451	}
452	write_sequnlock_irq(&bb->lock);
453}
454EXPORT_SYMBOL_GPL(ack_all_badblocks);
455
456/**
457 * badblocks_show() - sysfs access to bad-blocks list
458 * @bb:		the badblocks structure that holds all badblock information
459 * @page:	buffer received from sysfs
460 * @unack:	weather to show unacknowledged badblocks
461 *
462 * Return:
463 *  Length of returned data
464 */
465ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
466{
467	size_t len;
468	int i;
469	u64 *p = bb->page;
470	unsigned seq;
471
472	if (bb->shift < 0)
473		return 0;
474
475retry:
476	seq = read_seqbegin(&bb->lock);
477
478	len = 0;
479	i = 0;
480
481	while (len < PAGE_SIZE && i < bb->count) {
482		sector_t s = BB_OFFSET(p[i]);
483		unsigned int length = BB_LEN(p[i]);
484		int ack = BB_ACK(p[i]);
485
486		i++;
487
488		if (unack && ack)
489			continue;
490
491		len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
492				(unsigned long long)s << bb->shift,
493				length << bb->shift);
494	}
495	if (unack && len == 0)
496		bb->unacked_exist = 0;
497
498	if (read_seqretry(&bb->lock, seq))
499		goto retry;
500
501	return len;
502}
503EXPORT_SYMBOL_GPL(badblocks_show);
504
505/**
506 * badblocks_store() - sysfs access to bad-blocks list
507 * @bb:		the badblocks structure that holds all badblock information
508 * @page:	buffer received from sysfs
509 * @len:	length of data received from sysfs
510 * @unack:	weather to show unacknowledged badblocks
511 *
512 * Return:
513 *  Length of the buffer processed or -ve error.
514 */
515ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
516			int unack)
517{
518	unsigned long long sector;
519	int length;
520	char newline;
521
522	switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
523	case 3:
524		if (newline != '\n')
525			return -EINVAL;
526		fallthrough;
527	case 2:
528		if (length <= 0)
529			return -EINVAL;
530		break;
531	default:
532		return -EINVAL;
533	}
534
535	if (badblocks_set(bb, sector, length, !unack))
536		return -ENOSPC;
537	else
538		return len;
539}
540EXPORT_SYMBOL_GPL(badblocks_store);
541
542static int __badblocks_init(struct device *dev, struct badblocks *bb,
543		int enable)
544{
545	bb->dev = dev;
546	bb->count = 0;
547	if (enable)
548		bb->shift = 0;
549	else
550		bb->shift = -1;
551	if (dev)
552		bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
553	else
554		bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
555	if (!bb->page) {
556		bb->shift = -1;
557		return -ENOMEM;
558	}
559	seqlock_init(&bb->lock);
560
561	return 0;
562}
563
564/**
565 * badblocks_init() - initialize the badblocks structure
566 * @bb:		the badblocks structure that holds all badblock information
567 * @enable:	weather to enable badblocks accounting
568 *
569 * Return:
570 *  0: success
571 *  -ve errno: on error
572 */
573int badblocks_init(struct badblocks *bb, int enable)
574{
575	return __badblocks_init(NULL, bb, enable);
576}
577EXPORT_SYMBOL_GPL(badblocks_init);
578
579int devm_init_badblocks(struct device *dev, struct badblocks *bb)
580{
581	if (!bb)
582		return -EINVAL;
583	return __badblocks_init(dev, bb, 1);
584}
585EXPORT_SYMBOL_GPL(devm_init_badblocks);
586
587/**
588 * badblocks_exit() - free the badblocks structure
589 * @bb:		the badblocks structure that holds all badblock information
590 */
591void badblocks_exit(struct badblocks *bb)
592{
593	if (!bb)
594		return;
595	if (bb->dev)
596		devm_kfree(bb->dev, bb->page);
597	else
598		kfree(bb->page);
599	bb->page = NULL;
600}
601EXPORT_SYMBOL_GPL(badblocks_exit);
v4.10.11
 
  1/*
  2 * Bad block management
  3 *
  4 * - Heavily based on MD badblocks code from Neil Brown
  5 *
  6 * Copyright (c) 2015, Intel Corporation.
  7 *
  8 * This program is free software; you can redistribute it and/or modify it
  9 * under the terms and conditions of the GNU General Public License,
 10 * version 2, as published by the Free Software Foundation.
 11 *
 12 * This program is distributed in the hope it will be useful, but WITHOUT
 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 15 * more details.
 16 */
 17
 18#include <linux/badblocks.h>
 19#include <linux/seqlock.h>
 20#include <linux/device.h>
 21#include <linux/kernel.h>
 22#include <linux/module.h>
 23#include <linux/stddef.h>
 24#include <linux/types.h>
 25#include <linux/slab.h>
 26
 27/**
 28 * badblocks_check() - check a given range for bad sectors
 29 * @bb:		the badblocks structure that holds all badblock information
 30 * @s:		sector (start) at which to check for badblocks
 31 * @sectors:	number of sectors to check for badblocks
 32 * @first_bad:	pointer to store location of the first badblock
 33 * @bad_sectors: pointer to store number of badblocks after @first_bad
 34 *
 35 * We can record which blocks on each device are 'bad' and so just
 36 * fail those blocks, or that stripe, rather than the whole device.
 37 * Entries in the bad-block table are 64bits wide.  This comprises:
 38 * Length of bad-range, in sectors: 0-511 for lengths 1-512
 39 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
 40 *  A 'shift' can be set so that larger blocks are tracked and
 41 *  consequently larger devices can be covered.
 42 * 'Acknowledged' flag - 1 bit. - the most significant bit.
 43 *
 44 * Locking of the bad-block table uses a seqlock so badblocks_check
 45 * might need to retry if it is very unlucky.
 46 * We will sometimes want to check for bad blocks in a bi_end_io function,
 47 * so we use the write_seqlock_irq variant.
 48 *
 49 * When looking for a bad block we specify a range and want to
 50 * know if any block in the range is bad.  So we binary-search
 51 * to the last range that starts at-or-before the given endpoint,
 52 * (or "before the sector after the target range")
 53 * then see if it ends after the given start.
 54 *
 55 * Return:
 56 *  0: there are no known bad blocks in the range
 57 *  1: there are known bad block which are all acknowledged
 58 * -1: there are bad blocks which have not yet been acknowledged in metadata.
 59 * plus the start/length of the first bad section we overlap.
 60 */
 61int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
 62			sector_t *first_bad, int *bad_sectors)
 63{
 64	int hi;
 65	int lo;
 66	u64 *p = bb->page;
 67	int rv;
 68	sector_t target = s + sectors;
 69	unsigned seq;
 70
 71	if (bb->shift > 0) {
 72		/* round the start down, and the end up */
 73		s >>= bb->shift;
 74		target += (1<<bb->shift) - 1;
 75		target >>= bb->shift;
 76		sectors = target - s;
 77	}
 78	/* 'target' is now the first block after the bad range */
 79
 80retry:
 81	seq = read_seqbegin(&bb->lock);
 82	lo = 0;
 83	rv = 0;
 84	hi = bb->count;
 85
 86	/* Binary search between lo and hi for 'target'
 87	 * i.e. for the last range that starts before 'target'
 88	 */
 89	/* INVARIANT: ranges before 'lo' and at-or-after 'hi'
 90	 * are known not to be the last range before target.
 91	 * VARIANT: hi-lo is the number of possible
 92	 * ranges, and decreases until it reaches 1
 93	 */
 94	while (hi - lo > 1) {
 95		int mid = (lo + hi) / 2;
 96		sector_t a = BB_OFFSET(p[mid]);
 97
 98		if (a < target)
 99			/* This could still be the one, earlier ranges
100			 * could not.
101			 */
102			lo = mid;
103		else
104			/* This and later ranges are definitely out. */
105			hi = mid;
106	}
107	/* 'lo' might be the last that started before target, but 'hi' isn't */
108	if (hi > lo) {
109		/* need to check all range that end after 's' to see if
110		 * any are unacknowledged.
111		 */
112		while (lo >= 0 &&
113		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
114			if (BB_OFFSET(p[lo]) < target) {
115				/* starts before the end, and finishes after
116				 * the start, so they must overlap
117				 */
118				if (rv != -1 && BB_ACK(p[lo]))
119					rv = 1;
120				else
121					rv = -1;
122				*first_bad = BB_OFFSET(p[lo]);
123				*bad_sectors = BB_LEN(p[lo]);
124			}
125			lo--;
126		}
127	}
128
129	if (read_seqretry(&bb->lock, seq))
130		goto retry;
131
132	return rv;
133}
134EXPORT_SYMBOL_GPL(badblocks_check);
135
136static void badblocks_update_acked(struct badblocks *bb)
137{
138	u64 *p = bb->page;
139	int i;
140	bool unacked = false;
141
142	if (!bb->unacked_exist)
143		return;
144
145	for (i = 0; i < bb->count ; i++) {
146		if (!BB_ACK(p[i])) {
147			unacked = true;
148			break;
149		}
150	}
151
152	if (!unacked)
153		bb->unacked_exist = 0;
154}
155
156/**
157 * badblocks_set() - Add a range of bad blocks to the table.
158 * @bb:		the badblocks structure that holds all badblock information
159 * @s:		first sector to mark as bad
160 * @sectors:	number of sectors to mark as bad
161 * @acknowledged: weather to mark the bad sectors as acknowledged
162 *
163 * This might extend the table, or might contract it if two adjacent ranges
164 * can be merged. We binary-search to find the 'insertion' point, then
165 * decide how best to handle it.
166 *
167 * Return:
168 *  0: success
169 *  1: failed to set badblocks (out of space)
170 */
171int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
172			int acknowledged)
173{
174	u64 *p;
175	int lo, hi;
176	int rv = 0;
177	unsigned long flags;
178
179	if (bb->shift < 0)
180		/* badblocks are disabled */
181		return 0;
182
183	if (bb->shift) {
184		/* round the start down, and the end up */
185		sector_t next = s + sectors;
186
187		s >>= bb->shift;
188		next += (1<<bb->shift) - 1;
189		next >>= bb->shift;
190		sectors = next - s;
191	}
192
193	write_seqlock_irqsave(&bb->lock, flags);
194
195	p = bb->page;
196	lo = 0;
197	hi = bb->count;
198	/* Find the last range that starts at-or-before 's' */
199	while (hi - lo > 1) {
200		int mid = (lo + hi) / 2;
201		sector_t a = BB_OFFSET(p[mid]);
202
203		if (a <= s)
204			lo = mid;
205		else
206			hi = mid;
207	}
208	if (hi > lo && BB_OFFSET(p[lo]) > s)
209		hi = lo;
210
211	if (hi > lo) {
212		/* we found a range that might merge with the start
213		 * of our new range
214		 */
215		sector_t a = BB_OFFSET(p[lo]);
216		sector_t e = a + BB_LEN(p[lo]);
217		int ack = BB_ACK(p[lo]);
218
219		if (e >= s) {
220			/* Yes, we can merge with a previous range */
221			if (s == a && s + sectors >= e)
222				/* new range covers old */
223				ack = acknowledged;
224			else
225				ack = ack && acknowledged;
226
227			if (e < s + sectors)
228				e = s + sectors;
229			if (e - a <= BB_MAX_LEN) {
230				p[lo] = BB_MAKE(a, e-a, ack);
231				s = e;
232			} else {
233				/* does not all fit in one range,
234				 * make p[lo] maximal
235				 */
236				if (BB_LEN(p[lo]) != BB_MAX_LEN)
237					p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
238				s = a + BB_MAX_LEN;
239			}
240			sectors = e - s;
241		}
242	}
243	if (sectors && hi < bb->count) {
244		/* 'hi' points to the first range that starts after 's'.
245		 * Maybe we can merge with the start of that range
246		 */
247		sector_t a = BB_OFFSET(p[hi]);
248		sector_t e = a + BB_LEN(p[hi]);
249		int ack = BB_ACK(p[hi]);
250
251		if (a <= s + sectors) {
252			/* merging is possible */
253			if (e <= s + sectors) {
254				/* full overlap */
255				e = s + sectors;
256				ack = acknowledged;
257			} else
258				ack = ack && acknowledged;
259
260			a = s;
261			if (e - a <= BB_MAX_LEN) {
262				p[hi] = BB_MAKE(a, e-a, ack);
263				s = e;
264			} else {
265				p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
266				s = a + BB_MAX_LEN;
267			}
268			sectors = e - s;
269			lo = hi;
270			hi++;
271		}
272	}
273	if (sectors == 0 && hi < bb->count) {
274		/* we might be able to combine lo and hi */
275		/* Note: 's' is at the end of 'lo' */
276		sector_t a = BB_OFFSET(p[hi]);
277		int lolen = BB_LEN(p[lo]);
278		int hilen = BB_LEN(p[hi]);
279		int newlen = lolen + hilen - (s - a);
280
281		if (s >= a && newlen < BB_MAX_LEN) {
282			/* yes, we can combine them */
283			int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
284
285			p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
286			memmove(p + hi, p + hi + 1,
287				(bb->count - hi - 1) * 8);
288			bb->count--;
289		}
290	}
291	while (sectors) {
292		/* didn't merge (it all).
293		 * Need to add a range just before 'hi'
294		 */
295		if (bb->count >= MAX_BADBLOCKS) {
296			/* No room for more */
297			rv = 1;
298			break;
299		} else {
300			int this_sectors = sectors;
301
302			memmove(p + hi + 1, p + hi,
303				(bb->count - hi) * 8);
304			bb->count++;
305
306			if (this_sectors > BB_MAX_LEN)
307				this_sectors = BB_MAX_LEN;
308			p[hi] = BB_MAKE(s, this_sectors, acknowledged);
309			sectors -= this_sectors;
310			s += this_sectors;
311		}
312	}
313
314	bb->changed = 1;
315	if (!acknowledged)
316		bb->unacked_exist = 1;
317	else
318		badblocks_update_acked(bb);
319	write_sequnlock_irqrestore(&bb->lock, flags);
320
321	return rv;
322}
323EXPORT_SYMBOL_GPL(badblocks_set);
324
325/**
326 * badblocks_clear() - Remove a range of bad blocks to the table.
327 * @bb:		the badblocks structure that holds all badblock information
328 * @s:		first sector to mark as bad
329 * @sectors:	number of sectors to mark as bad
330 *
331 * This may involve extending the table if we spilt a region,
332 * but it must not fail.  So if the table becomes full, we just
333 * drop the remove request.
334 *
335 * Return:
336 *  0: success
337 *  1: failed to clear badblocks
338 */
339int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
340{
341	u64 *p;
342	int lo, hi;
343	sector_t target = s + sectors;
344	int rv = 0;
345
346	if (bb->shift > 0) {
347		/* When clearing we round the start up and the end down.
348		 * This should not matter as the shift should align with
349		 * the block size and no rounding should ever be needed.
350		 * However it is better the think a block is bad when it
351		 * isn't than to think a block is not bad when it is.
352		 */
353		s += (1<<bb->shift) - 1;
354		s >>= bb->shift;
355		target >>= bb->shift;
356		sectors = target - s;
357	}
358
359	write_seqlock_irq(&bb->lock);
360
361	p = bb->page;
362	lo = 0;
363	hi = bb->count;
364	/* Find the last range that starts before 'target' */
365	while (hi - lo > 1) {
366		int mid = (lo + hi) / 2;
367		sector_t a = BB_OFFSET(p[mid]);
368
369		if (a < target)
370			lo = mid;
371		else
372			hi = mid;
373	}
374	if (hi > lo) {
375		/* p[lo] is the last range that could overlap the
376		 * current range.  Earlier ranges could also overlap,
377		 * but only this one can overlap the end of the range.
378		 */
379		if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
380		    (BB_OFFSET(p[lo]) < target)) {
381			/* Partial overlap, leave the tail of this range */
382			int ack = BB_ACK(p[lo]);
383			sector_t a = BB_OFFSET(p[lo]);
384			sector_t end = a + BB_LEN(p[lo]);
385
386			if (a < s) {
387				/* we need to split this range */
388				if (bb->count >= MAX_BADBLOCKS) {
389					rv = -ENOSPC;
390					goto out;
391				}
392				memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
393				bb->count++;
394				p[lo] = BB_MAKE(a, s-a, ack);
395				lo++;
396			}
397			p[lo] = BB_MAKE(target, end - target, ack);
398			/* there is no longer an overlap */
399			hi = lo;
400			lo--;
401		}
402		while (lo >= 0 &&
403		       (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
404		       (BB_OFFSET(p[lo]) < target)) {
405			/* This range does overlap */
406			if (BB_OFFSET(p[lo]) < s) {
407				/* Keep the early parts of this range. */
408				int ack = BB_ACK(p[lo]);
409				sector_t start = BB_OFFSET(p[lo]);
410
411				p[lo] = BB_MAKE(start, s - start, ack);
412				/* now low doesn't overlap, so.. */
413				break;
414			}
415			lo--;
416		}
417		/* 'lo' is strictly before, 'hi' is strictly after,
418		 * anything between needs to be discarded
419		 */
420		if (hi - lo > 1) {
421			memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
422			bb->count -= (hi - lo - 1);
423		}
424	}
425
426	badblocks_update_acked(bb);
427	bb->changed = 1;
428out:
429	write_sequnlock_irq(&bb->lock);
430	return rv;
431}
432EXPORT_SYMBOL_GPL(badblocks_clear);
433
434/**
435 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
436 * @bb:		the badblocks structure that holds all badblock information
437 *
438 * This only succeeds if ->changed is clear.  It is used by
439 * in-kernel metadata updates
440 */
441void ack_all_badblocks(struct badblocks *bb)
442{
443	if (bb->page == NULL || bb->changed)
444		/* no point even trying */
445		return;
446	write_seqlock_irq(&bb->lock);
447
448	if (bb->changed == 0 && bb->unacked_exist) {
449		u64 *p = bb->page;
450		int i;
451
452		for (i = 0; i < bb->count ; i++) {
453			if (!BB_ACK(p[i])) {
454				sector_t start = BB_OFFSET(p[i]);
455				int len = BB_LEN(p[i]);
456
457				p[i] = BB_MAKE(start, len, 1);
458			}
459		}
460		bb->unacked_exist = 0;
461	}
462	write_sequnlock_irq(&bb->lock);
463}
464EXPORT_SYMBOL_GPL(ack_all_badblocks);
465
466/**
467 * badblocks_show() - sysfs access to bad-blocks list
468 * @bb:		the badblocks structure that holds all badblock information
469 * @page:	buffer received from sysfs
470 * @unack:	weather to show unacknowledged badblocks
471 *
472 * Return:
473 *  Length of returned data
474 */
475ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
476{
477	size_t len;
478	int i;
479	u64 *p = bb->page;
480	unsigned seq;
481
482	if (bb->shift < 0)
483		return 0;
484
485retry:
486	seq = read_seqbegin(&bb->lock);
487
488	len = 0;
489	i = 0;
490
491	while (len < PAGE_SIZE && i < bb->count) {
492		sector_t s = BB_OFFSET(p[i]);
493		unsigned int length = BB_LEN(p[i]);
494		int ack = BB_ACK(p[i]);
495
496		i++;
497
498		if (unack && ack)
499			continue;
500
501		len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
502				(unsigned long long)s << bb->shift,
503				length << bb->shift);
504	}
505	if (unack && len == 0)
506		bb->unacked_exist = 0;
507
508	if (read_seqretry(&bb->lock, seq))
509		goto retry;
510
511	return len;
512}
513EXPORT_SYMBOL_GPL(badblocks_show);
514
515/**
516 * badblocks_store() - sysfs access to bad-blocks list
517 * @bb:		the badblocks structure that holds all badblock information
518 * @page:	buffer received from sysfs
519 * @len:	length of data received from sysfs
520 * @unack:	weather to show unacknowledged badblocks
521 *
522 * Return:
523 *  Length of the buffer processed or -ve error.
524 */
525ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
526			int unack)
527{
528	unsigned long long sector;
529	int length;
530	char newline;
531
532	switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
533	case 3:
534		if (newline != '\n')
535			return -EINVAL;
 
536	case 2:
537		if (length <= 0)
538			return -EINVAL;
539		break;
540	default:
541		return -EINVAL;
542	}
543
544	if (badblocks_set(bb, sector, length, !unack))
545		return -ENOSPC;
546	else
547		return len;
548}
549EXPORT_SYMBOL_GPL(badblocks_store);
550
551static int __badblocks_init(struct device *dev, struct badblocks *bb,
552		int enable)
553{
554	bb->dev = dev;
555	bb->count = 0;
556	if (enable)
557		bb->shift = 0;
558	else
559		bb->shift = -1;
560	if (dev)
561		bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
562	else
563		bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
564	if (!bb->page) {
565		bb->shift = -1;
566		return -ENOMEM;
567	}
568	seqlock_init(&bb->lock);
569
570	return 0;
571}
572
573/**
574 * badblocks_init() - initialize the badblocks structure
575 * @bb:		the badblocks structure that holds all badblock information
576 * @enable:	weather to enable badblocks accounting
577 *
578 * Return:
579 *  0: success
580 *  -ve errno: on error
581 */
582int badblocks_init(struct badblocks *bb, int enable)
583{
584	return __badblocks_init(NULL, bb, enable);
585}
586EXPORT_SYMBOL_GPL(badblocks_init);
587
588int devm_init_badblocks(struct device *dev, struct badblocks *bb)
589{
590	if (!bb)
591		return -EINVAL;
592	return __badblocks_init(dev, bb, 1);
593}
594EXPORT_SYMBOL_GPL(devm_init_badblocks);
595
596/**
597 * badblocks_exit() - free the badblocks structure
598 * @bb:		the badblocks structure that holds all badblock information
599 */
600void badblocks_exit(struct badblocks *bb)
601{
602	if (!bb)
603		return;
604	if (bb->dev)
605		devm_kfree(bb->dev, bb->page);
606	else
607		kfree(bb->page);
608	bb->page = NULL;
609}
610EXPORT_SYMBOL_GPL(badblocks_exit);