Linux Audio

Check our new training course

Loading...
v5.4
  1// SPDX-License-Identifier: GPL-2.0+
  2/*
  3 * Copyright (C) 2017 Oracle.  All Rights Reserved.
  4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
  5 */
  6#include "xfs.h"
  7#include "xfs_fs.h"
  8#include "xfs_shared.h"
  9#include "xfs_format.h"
 10#include "xfs_trans_resv.h"
 11#include "xfs_mount.h"
 
 12#include "xfs_btree.h"
 13#include "scrub/scrub.h"
 14#include "scrub/common.h"
 15#include "scrub/btree.h"
 16#include "scrub/trace.h"
 17
 18/* btree scrubbing */
 19
 20/*
 21 * Check for btree operation errors.  See the section about handling
 22 * operational errors in common.c.
 23 */
 24static bool
 25__xchk_btree_process_error(
 26	struct xfs_scrub	*sc,
 27	struct xfs_btree_cur	*cur,
 28	int			level,
 29	int			*error,
 30	__u32			errflag,
 31	void			*ret_ip)
 32{
 33	if (*error == 0)
 34		return true;
 35
 36	switch (*error) {
 37	case -EDEADLOCK:
 38		/* Used to restart an op with deadlock avoidance. */
 39		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
 40		break;
 41	case -EFSBADCRC:
 42	case -EFSCORRUPTED:
 43		/* Note the badness but don't abort. */
 44		sc->sm->sm_flags |= errflag;
 45		*error = 0;
 46		/* fall through */
 47	default:
 48		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
 49			trace_xchk_ifork_btree_op_error(sc, cur, level,
 50					*error, ret_ip);
 51		else
 52			trace_xchk_btree_op_error(sc, cur, level,
 53					*error, ret_ip);
 54		break;
 55	}
 56	return false;
 57}
 58
 59bool
 60xchk_btree_process_error(
 61	struct xfs_scrub	*sc,
 62	struct xfs_btree_cur	*cur,
 63	int			level,
 64	int			*error)
 65{
 66	return __xchk_btree_process_error(sc, cur, level, error,
 67			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
 68}
 69
 70bool
 71xchk_btree_xref_process_error(
 72	struct xfs_scrub	*sc,
 73	struct xfs_btree_cur	*cur,
 74	int			level,
 75	int			*error)
 76{
 77	return __xchk_btree_process_error(sc, cur, level, error,
 78			XFS_SCRUB_OFLAG_XFAIL, __return_address);
 79}
 80
 81/* Record btree block corruption. */
 82static void
 83__xchk_btree_set_corrupt(
 84	struct xfs_scrub	*sc,
 85	struct xfs_btree_cur	*cur,
 86	int			level,
 87	__u32			errflag,
 88	void			*ret_ip)
 89{
 90	sc->sm->sm_flags |= errflag;
 91
 92	if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
 93		trace_xchk_ifork_btree_error(sc, cur, level,
 94				ret_ip);
 95	else
 96		trace_xchk_btree_error(sc, cur, level,
 97				ret_ip);
 98}
 99
100void
101xchk_btree_set_corrupt(
102	struct xfs_scrub	*sc,
103	struct xfs_btree_cur	*cur,
104	int			level)
105{
106	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
107			__return_address);
108}
109
110void
111xchk_btree_xref_set_corrupt(
112	struct xfs_scrub	*sc,
113	struct xfs_btree_cur	*cur,
114	int			level)
115{
116	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
117			__return_address);
118}
119
120/*
121 * Make sure this record is in order and doesn't stray outside of the parent
122 * keys.
123 */
124STATIC void
125xchk_btree_rec(
126	struct xchk_btree	*bs)
127{
128	struct xfs_btree_cur	*cur = bs->cur;
129	union xfs_btree_rec	*rec;
130	union xfs_btree_key	key;
131	union xfs_btree_key	hkey;
132	union xfs_btree_key	*keyp;
133	struct xfs_btree_block	*block;
134	struct xfs_btree_block	*keyblock;
135	struct xfs_buf		*bp;
136
137	block = xfs_btree_get_block(cur, 0, &bp);
138	rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
139
140	trace_xchk_btree_rec(bs->sc, cur, 0);
141
142	/* If this isn't the first record, are they in order? */
143	if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
144		xchk_btree_set_corrupt(bs->sc, cur, 0);
145	bs->firstrec = false;
146	memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
147
148	if (cur->bc_nlevels == 1)
149		return;
150
151	/* Is this at least as large as the parent low key? */
152	cur->bc_ops->init_key_from_rec(&key, rec);
153	keyblock = xfs_btree_get_block(cur, 1, &bp);
154	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
155	if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
156		xchk_btree_set_corrupt(bs->sc, cur, 1);
157
158	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
159		return;
160
161	/* Is this no larger than the parent high key? */
162	cur->bc_ops->init_high_key_from_rec(&hkey, rec);
163	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
164	if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
165		xchk_btree_set_corrupt(bs->sc, cur, 1);
166}
167
168/*
169 * Make sure this key is in order and doesn't stray outside of the parent
170 * keys.
171 */
172STATIC void
173xchk_btree_key(
174	struct xchk_btree	*bs,
175	int			level)
176{
177	struct xfs_btree_cur	*cur = bs->cur;
178	union xfs_btree_key	*key;
179	union xfs_btree_key	*keyp;
180	struct xfs_btree_block	*block;
181	struct xfs_btree_block	*keyblock;
182	struct xfs_buf		*bp;
183
184	block = xfs_btree_get_block(cur, level, &bp);
185	key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
186
187	trace_xchk_btree_key(bs->sc, cur, level);
188
189	/* If this isn't the first key, are they in order? */
190	if (!bs->firstkey[level] &&
191	    !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key))
192		xchk_btree_set_corrupt(bs->sc, cur, level);
193	bs->firstkey[level] = false;
194	memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len);
195
196	if (level + 1 >= cur->bc_nlevels)
197		return;
198
199	/* Is this at least as large as the parent low key? */
200	keyblock = xfs_btree_get_block(cur, level + 1, &bp);
201	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
202	if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
203		xchk_btree_set_corrupt(bs->sc, cur, level);
204
205	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
206		return;
207
208	/* Is this no larger than the parent high key? */
209	key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
210	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
211	if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
212		xchk_btree_set_corrupt(bs->sc, cur, level);
213}
214
215/*
216 * Check a btree pointer.  Returns true if it's ok to use this pointer.
217 * Callers do not need to set the corrupt flag.
218 */
219static bool
220xchk_btree_ptr_ok(
221	struct xchk_btree	*bs,
222	int			level,
223	union xfs_btree_ptr	*ptr)
224{
225	bool			res;
226
227	/* A btree rooted in an inode has no block pointer to the root. */
228	if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
229	    level == bs->cur->bc_nlevels)
230		return true;
231
232	/* Otherwise, check the pointers. */
233	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
234		res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
235	else
236		res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
237	if (!res)
238		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
239
240	return res;
241}
242
243/* Check that a btree block's sibling matches what we expect it. */
244STATIC int
245xchk_btree_block_check_sibling(
246	struct xchk_btree	*bs,
247	int			level,
248	int			direction,
249	union xfs_btree_ptr	*sibling)
250{
251	struct xfs_btree_cur	*cur = bs->cur;
252	struct xfs_btree_block	*pblock;
253	struct xfs_buf		*pbp;
254	struct xfs_btree_cur	*ncur = NULL;
255	union xfs_btree_ptr	*pp;
256	int			success;
257	int			error;
258
259	error = xfs_btree_dup_cursor(cur, &ncur);
260	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
261	    !ncur)
262		return error;
263
264	/*
265	 * If the pointer is null, we shouldn't be able to move the upper
266	 * level pointer anywhere.
267	 */
268	if (xfs_btree_ptr_is_null(cur, sibling)) {
269		if (direction > 0)
270			error = xfs_btree_increment(ncur, level + 1, &success);
271		else
272			error = xfs_btree_decrement(ncur, level + 1, &success);
273		if (error == 0 && success)
274			xchk_btree_set_corrupt(bs->sc, cur, level);
275		error = 0;
276		goto out;
277	}
278
279	/* Increment upper level pointer. */
280	if (direction > 0)
281		error = xfs_btree_increment(ncur, level + 1, &success);
282	else
283		error = xfs_btree_decrement(ncur, level + 1, &success);
284	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
285		goto out;
286	if (!success) {
287		xchk_btree_set_corrupt(bs->sc, cur, level + 1);
288		goto out;
289	}
290
291	/* Compare upper level pointer to sibling pointer. */
292	pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
293	pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
294	if (!xchk_btree_ptr_ok(bs, level + 1, pp))
295		goto out;
296	if (pbp)
297		xchk_buffer_recheck(bs->sc, pbp);
298
299	if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
300		xchk_btree_set_corrupt(bs->sc, cur, level);
301out:
302	xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
303	return error;
304}
305
306/* Check the siblings of a btree block. */
307STATIC int
308xchk_btree_block_check_siblings(
309	struct xchk_btree	*bs,
310	struct xfs_btree_block	*block)
311{
312	struct xfs_btree_cur	*cur = bs->cur;
313	union xfs_btree_ptr	leftsib;
314	union xfs_btree_ptr	rightsib;
315	int			level;
316	int			error = 0;
317
318	xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
319	xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
320	level = xfs_btree_get_level(block);
321
322	/* Root block should never have siblings. */
323	if (level == cur->bc_nlevels - 1) {
324		if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
325		    !xfs_btree_ptr_is_null(cur, &rightsib))
326			xchk_btree_set_corrupt(bs->sc, cur, level);
327		goto out;
328	}
329
330	/*
331	 * Does the left & right sibling pointers match the adjacent
332	 * parent level pointers?
333	 * (These function absorbs error codes for us.)
334	 */
335	error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
336	if (error)
337		return error;
338	error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
339	if (error)
340		return error;
341out:
342	return error;
343}
344
345struct check_owner {
346	struct list_head	list;
347	xfs_daddr_t		daddr;
348	int			level;
349};
350
351/*
352 * Make sure this btree block isn't in the free list and that there's
353 * an rmap record for it.
354 */
355STATIC int
356xchk_btree_check_block_owner(
357	struct xchk_btree	*bs,
358	int			level,
359	xfs_daddr_t		daddr)
360{
361	xfs_agnumber_t		agno;
362	xfs_agblock_t		agbno;
363	xfs_btnum_t		btnum;
364	bool			init_sa;
365	int			error = 0;
366
367	if (!bs->cur)
368		return 0;
369
370	btnum = bs->cur->bc_btnum;
371	agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
372	agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
373
374	init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
375	if (init_sa) {
376		error = xchk_ag_init(bs->sc, agno, &bs->sc->sa);
377		if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
378				level, &error))
379			return error;
380	}
381
382	xchk_xref_is_used_space(bs->sc, agbno, 1);
383	/*
384	 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
385	 * have to nullify it (to shut down further block owner checks) if
386	 * self-xref encounters problems.
387	 */
388	if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
389		bs->cur = NULL;
390
391	xchk_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
392	if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
393		bs->cur = NULL;
394
395	if (init_sa)
396		xchk_ag_free(bs->sc, &bs->sc->sa);
397
398	return error;
399}
400
401/* Check the owner of a btree block. */
402STATIC int
403xchk_btree_check_owner(
404	struct xchk_btree	*bs,
405	int			level,
406	struct xfs_buf		*bp)
407{
408	struct xfs_btree_cur	*cur = bs->cur;
409	struct check_owner	*co;
410
411	/*
412	 * In theory, xfs_btree_get_block should only give us a null buffer
413	 * pointer for the root of a root-in-inode btree type, but we need
414	 * to check defensively here in case the cursor state is also screwed
415	 * up.
416	 */
417	if (bp == NULL) {
418		if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE))
419			xchk_btree_set_corrupt(bs->sc, bs->cur, level);
420		return 0;
421	}
422
423	/*
424	 * We want to cross-reference each btree block with the bnobt
425	 * and the rmapbt.  We cannot cross-reference the bnobt or
426	 * rmapbt while scanning the bnobt or rmapbt, respectively,
427	 * because we cannot alter the cursor and we'd prefer not to
428	 * duplicate cursors.  Therefore, save the buffer daddr for
429	 * later scanning.
430	 */
431	if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
432		co = kmem_alloc(sizeof(struct check_owner),
433				KM_MAYFAIL);
434		if (!co)
435			return -ENOMEM;
436		co->level = level;
437		co->daddr = XFS_BUF_ADDR(bp);
438		list_add_tail(&co->list, &bs->to_check);
439		return 0;
440	}
441
442	return xchk_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
443}
444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445/*
446 * Check that this btree block has at least minrecs records or is one of the
447 * special blocks that don't require that.
448 */
449STATIC void
450xchk_btree_check_minrecs(
451	struct xchk_btree	*bs,
452	int			level,
453	struct xfs_btree_block	*block)
454{
455	unsigned int		numrecs;
456	int			ok_level;
457
458	numrecs = be16_to_cpu(block->bb_numrecs);
459
460	/* More records than minrecs means the block is ok. */
461	if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level))
462		return;
463
464	/*
465	 * Certain btree blocks /can/ have fewer than minrecs records.  Any
466	 * level greater than or equal to the level of the highest dedicated
467	 * btree block are allowed to violate this constraint.
468	 *
469	 * For a btree rooted in a block, the btree root can have fewer than
470	 * minrecs records.  If the btree is rooted in an inode and does not
471	 * store records in the root, the direct children of the root and the
472	 * root itself can have fewer than minrecs records.
473	 */
474	ok_level = bs->cur->bc_nlevels - 1;
475	if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
476		ok_level--;
477	if (level >= ok_level)
 
 
 
 
 
 
 
 
478		return;
 
479
480	xchk_btree_set_corrupt(bs->sc, bs->cur, level);
 
 
 
 
 
481}
482
483/*
484 * Grab and scrub a btree block given a btree pointer.  Returns block
485 * and buffer pointers (if applicable) if they're ok to use.
486 */
487STATIC int
488xchk_btree_get_block(
489	struct xchk_btree	*bs,
490	int			level,
491	union xfs_btree_ptr	*pp,
492	struct xfs_btree_block	**pblock,
493	struct xfs_buf		**pbp)
494{
495	xfs_failaddr_t		failed_at;
496	int			error;
497
498	*pblock = NULL;
499	*pbp = NULL;
500
501	error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
502	if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
503	    !*pblock)
504		return error;
505
506	xfs_btree_get_block(bs->cur, level, pbp);
507	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
508		failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
509				level, *pbp);
510	else
511		failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
512				 level, *pbp);
513	if (failed_at) {
514		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
515		return 0;
516	}
517	if (*pbp)
518		xchk_buffer_recheck(bs->sc, *pbp);
519
520	xchk_btree_check_minrecs(bs, level, *pblock);
521
522	/*
523	 * Check the block's owner; this function absorbs error codes
524	 * for us.
525	 */
526	error = xchk_btree_check_owner(bs, level, *pbp);
527	if (error)
528		return error;
529
530	/*
531	 * Check the block's siblings; this function absorbs error codes
532	 * for us.
533	 */
534	return xchk_btree_block_check_siblings(bs, *pblock);
535}
536
537/*
538 * Check that the low and high keys of this block match the keys stored
539 * in the parent block.
540 */
541STATIC void
542xchk_btree_block_keys(
543	struct xchk_btree	*bs,
544	int			level,
545	struct xfs_btree_block	*block)
546{
547	union xfs_btree_key	block_keys;
548	struct xfs_btree_cur	*cur = bs->cur;
549	union xfs_btree_key	*high_bk;
550	union xfs_btree_key	*parent_keys;
551	union xfs_btree_key	*high_pk;
552	struct xfs_btree_block	*parent_block;
553	struct xfs_buf		*bp;
554
555	if (level >= cur->bc_nlevels - 1)
556		return;
557
558	/* Calculate the keys for this block. */
559	xfs_btree_get_keys(cur, block, &block_keys);
560
561	/* Obtain the parent's copy of the keys for this block. */
562	parent_block = xfs_btree_get_block(cur, level + 1, &bp);
563	parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1],
564			parent_block);
565
566	if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
567		xchk_btree_set_corrupt(bs->sc, cur, 1);
568
569	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
570		return;
571
572	/* Get high keys */
573	high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
574	high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1],
575			parent_block);
576
577	if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
578		xchk_btree_set_corrupt(bs->sc, cur, 1);
579}
580
581/*
582 * Visit all nodes and leaves of a btree.  Check that all pointers and
583 * records are in order, that the keys reflect the records, and use a callback
584 * so that the caller can verify individual records.
585 */
586int
587xchk_btree(
588	struct xfs_scrub		*sc,
589	struct xfs_btree_cur		*cur,
590	xchk_btree_rec_fn		scrub_fn,
591	const struct xfs_owner_info	*oinfo,
592	void				*private)
593{
594	struct xchk_btree		bs = {
595		.cur			= cur,
596		.scrub_rec		= scrub_fn,
597		.oinfo			= oinfo,
598		.firstrec		= true,
599		.private		= private,
600		.sc			= sc,
601	};
602	union xfs_btree_ptr		ptr;
603	union xfs_btree_ptr		*pp;
604	union xfs_btree_rec		*recp;
605	struct xfs_btree_block		*block;
606	int				level;
607	struct xfs_buf			*bp;
608	struct check_owner		*co;
609	struct check_owner		*n;
610	int				i;
611	int				error = 0;
612
613	/* Initialize scrub state */
614	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
615		bs.firstkey[i] = true;
616	INIT_LIST_HEAD(&bs.to_check);
617
618	/* Don't try to check a tree with a height we can't handle. */
619	if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
620		xchk_btree_set_corrupt(sc, cur, 0);
621		goto out;
622	}
623
624	/*
625	 * Load the root of the btree.  The helper function absorbs
626	 * error codes for us.
627	 */
628	level = cur->bc_nlevels - 1;
629	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
630	if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
631		goto out;
632	error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp);
633	if (error || !block)
634		goto out;
635
636	cur->bc_ptrs[level] = 1;
637
638	while (level < cur->bc_nlevels) {
639		block = xfs_btree_get_block(cur, level, &bp);
640
641		if (level == 0) {
642			/* End of leaf, pop back towards the root. */
643			if (cur->bc_ptrs[level] >
644			    be16_to_cpu(block->bb_numrecs)) {
645				xchk_btree_block_keys(&bs, level, block);
646				if (level < cur->bc_nlevels - 1)
647					cur->bc_ptrs[level + 1]++;
648				level++;
649				continue;
650			}
651
652			/* Records in order for scrub? */
653			xchk_btree_rec(&bs);
654
655			/* Call out to the record checker. */
656			recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
657			error = bs.scrub_rec(&bs, recp);
658			if (error)
659				break;
660			if (xchk_should_terminate(sc, &error) ||
661			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
662				break;
663
664			cur->bc_ptrs[level]++;
665			continue;
666		}
667
668		/* End of node, pop back towards the root. */
669		if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
670			xchk_btree_block_keys(&bs, level, block);
671			if (level < cur->bc_nlevels - 1)
672				cur->bc_ptrs[level + 1]++;
673			level++;
674			continue;
675		}
676
677		/* Keys in order for scrub? */
678		xchk_btree_key(&bs, level);
679
680		/* Drill another level deeper. */
681		pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
682		if (!xchk_btree_ptr_ok(&bs, level, pp)) {
683			cur->bc_ptrs[level]++;
684			continue;
685		}
686		level--;
687		error = xchk_btree_get_block(&bs, level, pp, &block, &bp);
688		if (error || !block)
689			goto out;
690
691		cur->bc_ptrs[level] = 1;
692	}
693
694out:
695	/* Process deferred owner checks on btree blocks. */
696	list_for_each_entry_safe(co, n, &bs.to_check, list) {
697		if (!error && bs.cur)
698			error = xchk_btree_check_block_owner(&bs,
699					co->level, co->daddr);
700		list_del(&co->list);
701		kmem_free(co);
702	}
703
704	return error;
705}
v5.14.15
  1// SPDX-License-Identifier: GPL-2.0+
  2/*
  3 * Copyright (C) 2017 Oracle.  All Rights Reserved.
  4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
  5 */
  6#include "xfs.h"
  7#include "xfs_fs.h"
  8#include "xfs_shared.h"
  9#include "xfs_format.h"
 10#include "xfs_trans_resv.h"
 11#include "xfs_mount.h"
 12#include "xfs_inode.h"
 13#include "xfs_btree.h"
 14#include "scrub/scrub.h"
 15#include "scrub/common.h"
 16#include "scrub/btree.h"
 17#include "scrub/trace.h"
 18
 19/* btree scrubbing */
 20
 21/*
 22 * Check for btree operation errors.  See the section about handling
 23 * operational errors in common.c.
 24 */
 25static bool
 26__xchk_btree_process_error(
 27	struct xfs_scrub	*sc,
 28	struct xfs_btree_cur	*cur,
 29	int			level,
 30	int			*error,
 31	__u32			errflag,
 32	void			*ret_ip)
 33{
 34	if (*error == 0)
 35		return true;
 36
 37	switch (*error) {
 38	case -EDEADLOCK:
 39		/* Used to restart an op with deadlock avoidance. */
 40		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
 41		break;
 42	case -EFSBADCRC:
 43	case -EFSCORRUPTED:
 44		/* Note the badness but don't abort. */
 45		sc->sm->sm_flags |= errflag;
 46		*error = 0;
 47		fallthrough;
 48	default:
 49		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
 50			trace_xchk_ifork_btree_op_error(sc, cur, level,
 51					*error, ret_ip);
 52		else
 53			trace_xchk_btree_op_error(sc, cur, level,
 54					*error, ret_ip);
 55		break;
 56	}
 57	return false;
 58}
 59
 60bool
 61xchk_btree_process_error(
 62	struct xfs_scrub	*sc,
 63	struct xfs_btree_cur	*cur,
 64	int			level,
 65	int			*error)
 66{
 67	return __xchk_btree_process_error(sc, cur, level, error,
 68			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
 69}
 70
 71bool
 72xchk_btree_xref_process_error(
 73	struct xfs_scrub	*sc,
 74	struct xfs_btree_cur	*cur,
 75	int			level,
 76	int			*error)
 77{
 78	return __xchk_btree_process_error(sc, cur, level, error,
 79			XFS_SCRUB_OFLAG_XFAIL, __return_address);
 80}
 81
 82/* Record btree block corruption. */
 83static void
 84__xchk_btree_set_corrupt(
 85	struct xfs_scrub	*sc,
 86	struct xfs_btree_cur	*cur,
 87	int			level,
 88	__u32			errflag,
 89	void			*ret_ip)
 90{
 91	sc->sm->sm_flags |= errflag;
 92
 93	if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
 94		trace_xchk_ifork_btree_error(sc, cur, level,
 95				ret_ip);
 96	else
 97		trace_xchk_btree_error(sc, cur, level,
 98				ret_ip);
 99}
100
101void
102xchk_btree_set_corrupt(
103	struct xfs_scrub	*sc,
104	struct xfs_btree_cur	*cur,
105	int			level)
106{
107	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
108			__return_address);
109}
110
111void
112xchk_btree_xref_set_corrupt(
113	struct xfs_scrub	*sc,
114	struct xfs_btree_cur	*cur,
115	int			level)
116{
117	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
118			__return_address);
119}
120
121/*
122 * Make sure this record is in order and doesn't stray outside of the parent
123 * keys.
124 */
125STATIC void
126xchk_btree_rec(
127	struct xchk_btree	*bs)
128{
129	struct xfs_btree_cur	*cur = bs->cur;
130	union xfs_btree_rec	*rec;
131	union xfs_btree_key	key;
132	union xfs_btree_key	hkey;
133	union xfs_btree_key	*keyp;
134	struct xfs_btree_block	*block;
135	struct xfs_btree_block	*keyblock;
136	struct xfs_buf		*bp;
137
138	block = xfs_btree_get_block(cur, 0, &bp);
139	rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
140
141	trace_xchk_btree_rec(bs->sc, cur, 0);
142
143	/* If this isn't the first record, are they in order? */
144	if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
145		xchk_btree_set_corrupt(bs->sc, cur, 0);
146	bs->firstrec = false;
147	memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
148
149	if (cur->bc_nlevels == 1)
150		return;
151
152	/* Is this at least as large as the parent low key? */
153	cur->bc_ops->init_key_from_rec(&key, rec);
154	keyblock = xfs_btree_get_block(cur, 1, &bp);
155	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
156	if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
157		xchk_btree_set_corrupt(bs->sc, cur, 1);
158
159	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
160		return;
161
162	/* Is this no larger than the parent high key? */
163	cur->bc_ops->init_high_key_from_rec(&hkey, rec);
164	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
165	if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
166		xchk_btree_set_corrupt(bs->sc, cur, 1);
167}
168
169/*
170 * Make sure this key is in order and doesn't stray outside of the parent
171 * keys.
172 */
173STATIC void
174xchk_btree_key(
175	struct xchk_btree	*bs,
176	int			level)
177{
178	struct xfs_btree_cur	*cur = bs->cur;
179	union xfs_btree_key	*key;
180	union xfs_btree_key	*keyp;
181	struct xfs_btree_block	*block;
182	struct xfs_btree_block	*keyblock;
183	struct xfs_buf		*bp;
184
185	block = xfs_btree_get_block(cur, level, &bp);
186	key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
187
188	trace_xchk_btree_key(bs->sc, cur, level);
189
190	/* If this isn't the first key, are they in order? */
191	if (!bs->firstkey[level] &&
192	    !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key))
193		xchk_btree_set_corrupt(bs->sc, cur, level);
194	bs->firstkey[level] = false;
195	memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len);
196
197	if (level + 1 >= cur->bc_nlevels)
198		return;
199
200	/* Is this at least as large as the parent low key? */
201	keyblock = xfs_btree_get_block(cur, level + 1, &bp);
202	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
203	if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
204		xchk_btree_set_corrupt(bs->sc, cur, level);
205
206	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
207		return;
208
209	/* Is this no larger than the parent high key? */
210	key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
211	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
212	if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
213		xchk_btree_set_corrupt(bs->sc, cur, level);
214}
215
216/*
217 * Check a btree pointer.  Returns true if it's ok to use this pointer.
218 * Callers do not need to set the corrupt flag.
219 */
220static bool
221xchk_btree_ptr_ok(
222	struct xchk_btree	*bs,
223	int			level,
224	union xfs_btree_ptr	*ptr)
225{
226	bool			res;
227
228	/* A btree rooted in an inode has no block pointer to the root. */
229	if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
230	    level == bs->cur->bc_nlevels)
231		return true;
232
233	/* Otherwise, check the pointers. */
234	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
235		res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
236	else
237		res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
238	if (!res)
239		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
240
241	return res;
242}
243
244/* Check that a btree block's sibling matches what we expect it. */
245STATIC int
246xchk_btree_block_check_sibling(
247	struct xchk_btree	*bs,
248	int			level,
249	int			direction,
250	union xfs_btree_ptr	*sibling)
251{
252	struct xfs_btree_cur	*cur = bs->cur;
253	struct xfs_btree_block	*pblock;
254	struct xfs_buf		*pbp;
255	struct xfs_btree_cur	*ncur = NULL;
256	union xfs_btree_ptr	*pp;
257	int			success;
258	int			error;
259
260	error = xfs_btree_dup_cursor(cur, &ncur);
261	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
262	    !ncur)
263		return error;
264
265	/*
266	 * If the pointer is null, we shouldn't be able to move the upper
267	 * level pointer anywhere.
268	 */
269	if (xfs_btree_ptr_is_null(cur, sibling)) {
270		if (direction > 0)
271			error = xfs_btree_increment(ncur, level + 1, &success);
272		else
273			error = xfs_btree_decrement(ncur, level + 1, &success);
274		if (error == 0 && success)
275			xchk_btree_set_corrupt(bs->sc, cur, level);
276		error = 0;
277		goto out;
278	}
279
280	/* Increment upper level pointer. */
281	if (direction > 0)
282		error = xfs_btree_increment(ncur, level + 1, &success);
283	else
284		error = xfs_btree_decrement(ncur, level + 1, &success);
285	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
286		goto out;
287	if (!success) {
288		xchk_btree_set_corrupt(bs->sc, cur, level + 1);
289		goto out;
290	}
291
292	/* Compare upper level pointer to sibling pointer. */
293	pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
294	pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
295	if (!xchk_btree_ptr_ok(bs, level + 1, pp))
296		goto out;
297	if (pbp)
298		xchk_buffer_recheck(bs->sc, pbp);
299
300	if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
301		xchk_btree_set_corrupt(bs->sc, cur, level);
302out:
303	xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
304	return error;
305}
306
307/* Check the siblings of a btree block. */
308STATIC int
309xchk_btree_block_check_siblings(
310	struct xchk_btree	*bs,
311	struct xfs_btree_block	*block)
312{
313	struct xfs_btree_cur	*cur = bs->cur;
314	union xfs_btree_ptr	leftsib;
315	union xfs_btree_ptr	rightsib;
316	int			level;
317	int			error = 0;
318
319	xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
320	xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
321	level = xfs_btree_get_level(block);
322
323	/* Root block should never have siblings. */
324	if (level == cur->bc_nlevels - 1) {
325		if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
326		    !xfs_btree_ptr_is_null(cur, &rightsib))
327			xchk_btree_set_corrupt(bs->sc, cur, level);
328		goto out;
329	}
330
331	/*
332	 * Does the left & right sibling pointers match the adjacent
333	 * parent level pointers?
334	 * (These function absorbs error codes for us.)
335	 */
336	error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
337	if (error)
338		return error;
339	error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
340	if (error)
341		return error;
342out:
343	return error;
344}
345
346struct check_owner {
347	struct list_head	list;
348	xfs_daddr_t		daddr;
349	int			level;
350};
351
352/*
353 * Make sure this btree block isn't in the free list and that there's
354 * an rmap record for it.
355 */
356STATIC int
357xchk_btree_check_block_owner(
358	struct xchk_btree	*bs,
359	int			level,
360	xfs_daddr_t		daddr)
361{
362	xfs_agnumber_t		agno;
363	xfs_agblock_t		agbno;
364	xfs_btnum_t		btnum;
365	bool			init_sa;
366	int			error = 0;
367
368	if (!bs->cur)
369		return 0;
370
371	btnum = bs->cur->bc_btnum;
372	agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
373	agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
374
375	init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
376	if (init_sa) {
377		error = xchk_ag_init(bs->sc, agno, &bs->sc->sa);
378		if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
379				level, &error))
380			return error;
381	}
382
383	xchk_xref_is_used_space(bs->sc, agbno, 1);
384	/*
385	 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
386	 * have to nullify it (to shut down further block owner checks) if
387	 * self-xref encounters problems.
388	 */
389	if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
390		bs->cur = NULL;
391
392	xchk_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
393	if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
394		bs->cur = NULL;
395
396	if (init_sa)
397		xchk_ag_free(bs->sc, &bs->sc->sa);
398
399	return error;
400}
401
402/* Check the owner of a btree block. */
403STATIC int
404xchk_btree_check_owner(
405	struct xchk_btree	*bs,
406	int			level,
407	struct xfs_buf		*bp)
408{
409	struct xfs_btree_cur	*cur = bs->cur;
410	struct check_owner	*co;
411
412	/*
413	 * In theory, xfs_btree_get_block should only give us a null buffer
414	 * pointer for the root of a root-in-inode btree type, but we need
415	 * to check defensively here in case the cursor state is also screwed
416	 * up.
417	 */
418	if (bp == NULL) {
419		if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE))
420			xchk_btree_set_corrupt(bs->sc, bs->cur, level);
421		return 0;
422	}
423
424	/*
425	 * We want to cross-reference each btree block with the bnobt
426	 * and the rmapbt.  We cannot cross-reference the bnobt or
427	 * rmapbt while scanning the bnobt or rmapbt, respectively,
428	 * because we cannot alter the cursor and we'd prefer not to
429	 * duplicate cursors.  Therefore, save the buffer daddr for
430	 * later scanning.
431	 */
432	if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
433		co = kmem_alloc(sizeof(struct check_owner),
434				KM_MAYFAIL);
435		if (!co)
436			return -ENOMEM;
437		co->level = level;
438		co->daddr = XFS_BUF_ADDR(bp);
439		list_add_tail(&co->list, &bs->to_check);
440		return 0;
441	}
442
443	return xchk_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
444}
445
446/* Decide if we want to check minrecs of a btree block in the inode root. */
447static inline bool
448xchk_btree_check_iroot_minrecs(
449	struct xchk_btree	*bs)
450{
451	/*
452	 * xfs_bmap_add_attrfork_btree had an implementation bug wherein it
453	 * would miscalculate the space required for the data fork bmbt root
454	 * when adding an attr fork, and promote the iroot contents to an
455	 * external block unnecessarily.  This went unnoticed for many years
456	 * until scrub found filesystems in this state.  Inode rooted btrees are
457	 * not supposed to have immediate child blocks that are small enough
458	 * that the contents could fit in the inode root, but we can't fail
459	 * existing filesystems, so instead we disable the check for data fork
460	 * bmap btrees when there's an attr fork.
461	 */
462	if (bs->cur->bc_btnum == XFS_BTNUM_BMAP &&
463	    bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
464	    XFS_IFORK_Q(bs->sc->ip))
465		return false;
466
467	return true;
468}
469
470/*
471 * Check that this btree block has at least minrecs records or is one of the
472 * special blocks that don't require that.
473 */
474STATIC void
475xchk_btree_check_minrecs(
476	struct xchk_btree	*bs,
477	int			level,
478	struct xfs_btree_block	*block)
479{
480	struct xfs_btree_cur	*cur = bs->cur;
481	unsigned int		root_level = cur->bc_nlevels - 1;
482	unsigned int		numrecs = be16_to_cpu(block->bb_numrecs);
 
483
484	/* More records than minrecs means the block is ok. */
485	if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
486		return;
487
488	/*
489	 * For btrees rooted in the inode, it's possible that the root block
490	 * contents spilled into a regular ondisk block because there wasn't
491	 * enough space in the inode root.  The number of records in that
492	 * child block might be less than the standard minrecs, but that's ok
493	 * provided that there's only one direct child of the root.
 
 
 
494	 */
495	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
496	    level == cur->bc_nlevels - 2) {
497		struct xfs_btree_block	*root_block;
498		struct xfs_buf		*root_bp;
499		int			root_maxrecs;
500
501		root_block = xfs_btree_get_block(cur, root_level, &root_bp);
502		root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
503		if (xchk_btree_check_iroot_minrecs(bs) &&
504		    (be16_to_cpu(root_block->bb_numrecs) != 1 ||
505		     numrecs <= root_maxrecs))
506			xchk_btree_set_corrupt(bs->sc, cur, level);
507		return;
508	}
509
510	/*
511	 * Otherwise, only the root level is allowed to have fewer than minrecs
512	 * records or keyptrs.
513	 */
514	if (level < root_level)
515		xchk_btree_set_corrupt(bs->sc, cur, level);
516}
517
518/*
519 * Grab and scrub a btree block given a btree pointer.  Returns block
520 * and buffer pointers (if applicable) if they're ok to use.
521 */
522STATIC int
523xchk_btree_get_block(
524	struct xchk_btree	*bs,
525	int			level,
526	union xfs_btree_ptr	*pp,
527	struct xfs_btree_block	**pblock,
528	struct xfs_buf		**pbp)
529{
530	xfs_failaddr_t		failed_at;
531	int			error;
532
533	*pblock = NULL;
534	*pbp = NULL;
535
536	error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
537	if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
538	    !*pblock)
539		return error;
540
541	xfs_btree_get_block(bs->cur, level, pbp);
542	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
543		failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
544				level, *pbp);
545	else
546		failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
547				 level, *pbp);
548	if (failed_at) {
549		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
550		return 0;
551	}
552	if (*pbp)
553		xchk_buffer_recheck(bs->sc, *pbp);
554
555	xchk_btree_check_minrecs(bs, level, *pblock);
556
557	/*
558	 * Check the block's owner; this function absorbs error codes
559	 * for us.
560	 */
561	error = xchk_btree_check_owner(bs, level, *pbp);
562	if (error)
563		return error;
564
565	/*
566	 * Check the block's siblings; this function absorbs error codes
567	 * for us.
568	 */
569	return xchk_btree_block_check_siblings(bs, *pblock);
570}
571
572/*
573 * Check that the low and high keys of this block match the keys stored
574 * in the parent block.
575 */
576STATIC void
577xchk_btree_block_keys(
578	struct xchk_btree	*bs,
579	int			level,
580	struct xfs_btree_block	*block)
581{
582	union xfs_btree_key	block_keys;
583	struct xfs_btree_cur	*cur = bs->cur;
584	union xfs_btree_key	*high_bk;
585	union xfs_btree_key	*parent_keys;
586	union xfs_btree_key	*high_pk;
587	struct xfs_btree_block	*parent_block;
588	struct xfs_buf		*bp;
589
590	if (level >= cur->bc_nlevels - 1)
591		return;
592
593	/* Calculate the keys for this block. */
594	xfs_btree_get_keys(cur, block, &block_keys);
595
596	/* Obtain the parent's copy of the keys for this block. */
597	parent_block = xfs_btree_get_block(cur, level + 1, &bp);
598	parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1],
599			parent_block);
600
601	if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
602		xchk_btree_set_corrupt(bs->sc, cur, 1);
603
604	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
605		return;
606
607	/* Get high keys */
608	high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
609	high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1],
610			parent_block);
611
612	if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
613		xchk_btree_set_corrupt(bs->sc, cur, 1);
614}
615
616/*
617 * Visit all nodes and leaves of a btree.  Check that all pointers and
618 * records are in order, that the keys reflect the records, and use a callback
619 * so that the caller can verify individual records.
620 */
621int
622xchk_btree(
623	struct xfs_scrub		*sc,
624	struct xfs_btree_cur		*cur,
625	xchk_btree_rec_fn		scrub_fn,
626	const struct xfs_owner_info	*oinfo,
627	void				*private)
628{
629	struct xchk_btree		bs = {
630		.cur			= cur,
631		.scrub_rec		= scrub_fn,
632		.oinfo			= oinfo,
633		.firstrec		= true,
634		.private		= private,
635		.sc			= sc,
636	};
637	union xfs_btree_ptr		ptr;
638	union xfs_btree_ptr		*pp;
639	union xfs_btree_rec		*recp;
640	struct xfs_btree_block		*block;
641	int				level;
642	struct xfs_buf			*bp;
643	struct check_owner		*co;
644	struct check_owner		*n;
645	int				i;
646	int				error = 0;
647
648	/* Initialize scrub state */
649	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
650		bs.firstkey[i] = true;
651	INIT_LIST_HEAD(&bs.to_check);
652
653	/* Don't try to check a tree with a height we can't handle. */
654	if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
655		xchk_btree_set_corrupt(sc, cur, 0);
656		goto out;
657	}
658
659	/*
660	 * Load the root of the btree.  The helper function absorbs
661	 * error codes for us.
662	 */
663	level = cur->bc_nlevels - 1;
664	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
665	if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
666		goto out;
667	error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp);
668	if (error || !block)
669		goto out;
670
671	cur->bc_ptrs[level] = 1;
672
673	while (level < cur->bc_nlevels) {
674		block = xfs_btree_get_block(cur, level, &bp);
675
676		if (level == 0) {
677			/* End of leaf, pop back towards the root. */
678			if (cur->bc_ptrs[level] >
679			    be16_to_cpu(block->bb_numrecs)) {
680				xchk_btree_block_keys(&bs, level, block);
681				if (level < cur->bc_nlevels - 1)
682					cur->bc_ptrs[level + 1]++;
683				level++;
684				continue;
685			}
686
687			/* Records in order for scrub? */
688			xchk_btree_rec(&bs);
689
690			/* Call out to the record checker. */
691			recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
692			error = bs.scrub_rec(&bs, recp);
693			if (error)
694				break;
695			if (xchk_should_terminate(sc, &error) ||
696			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
697				break;
698
699			cur->bc_ptrs[level]++;
700			continue;
701		}
702
703		/* End of node, pop back towards the root. */
704		if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
705			xchk_btree_block_keys(&bs, level, block);
706			if (level < cur->bc_nlevels - 1)
707				cur->bc_ptrs[level + 1]++;
708			level++;
709			continue;
710		}
711
712		/* Keys in order for scrub? */
713		xchk_btree_key(&bs, level);
714
715		/* Drill another level deeper. */
716		pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
717		if (!xchk_btree_ptr_ok(&bs, level, pp)) {
718			cur->bc_ptrs[level]++;
719			continue;
720		}
721		level--;
722		error = xchk_btree_get_block(&bs, level, pp, &block, &bp);
723		if (error || !block)
724			goto out;
725
726		cur->bc_ptrs[level] = 1;
727	}
728
729out:
730	/* Process deferred owner checks on btree blocks. */
731	list_for_each_entry_safe(co, n, &bs.to_check, list) {
732		if (!error && bs.cur)
733			error = xchk_btree_check_block_owner(&bs,
734					co->level, co->daddr);
735		list_del(&co->list);
736		kmem_free(co);
737	}
738
739	return error;
740}