Linux Audio

Check our new training course

Linux debugging, profiling, tracing and performance analysis training

Mar 24-27, 2025, special US time zones
Register
Loading...
v6.9.4
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
  4 * Author: Darrick J. Wong <djwong@kernel.org>
  5 */
  6#include "xfs.h"
  7#include "xfs_fs.h"
  8#include "xfs_shared.h"
  9#include "xfs_format.h"
 10#include "xfs_trans_resv.h"
 11#include "xfs_mount.h"
 12#include "xfs_inode.h"
 13#include "xfs_btree.h"
 14#include "scrub/scrub.h"
 15#include "scrub/common.h"
 16#include "scrub/btree.h"
 17#include "scrub/trace.h"
 18
 19/* btree scrubbing */
 20
 21/*
 22 * Check for btree operation errors.  See the section about handling
 23 * operational errors in common.c.
 24 */
 25static bool
 26__xchk_btree_process_error(
 27	struct xfs_scrub	*sc,
 28	struct xfs_btree_cur	*cur,
 29	int			level,
 30	int			*error,
 31	__u32			errflag,
 32	void			*ret_ip)
 33{
 34	if (*error == 0)
 35		return true;
 36
 37	switch (*error) {
 38	case -EDEADLOCK:
 39	case -ECHRNG:
 40		/* Used to restart an op with deadlock avoidance. */
 41		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
 42		break;
 43	case -EFSBADCRC:
 44	case -EFSCORRUPTED:
 45		/* Note the badness but don't abort. */
 46		sc->sm->sm_flags |= errflag;
 47		*error = 0;
 48		fallthrough;
 49	default:
 50		if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
 51			trace_xchk_ifork_btree_op_error(sc, cur, level,
 52					*error, ret_ip);
 53		else
 54			trace_xchk_btree_op_error(sc, cur, level,
 55					*error, ret_ip);
 56		break;
 57	}
 58	return false;
 59}
 60
 61bool
 62xchk_btree_process_error(
 63	struct xfs_scrub	*sc,
 64	struct xfs_btree_cur	*cur,
 65	int			level,
 66	int			*error)
 67{
 68	return __xchk_btree_process_error(sc, cur, level, error,
 69			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
 70}
 71
 72bool
 73xchk_btree_xref_process_error(
 74	struct xfs_scrub	*sc,
 75	struct xfs_btree_cur	*cur,
 76	int			level,
 77	int			*error)
 78{
 79	return __xchk_btree_process_error(sc, cur, level, error,
 80			XFS_SCRUB_OFLAG_XFAIL, __return_address);
 81}
 82
 83/* Record btree block corruption. */
 84static void
 85__xchk_btree_set_corrupt(
 86	struct xfs_scrub	*sc,
 87	struct xfs_btree_cur	*cur,
 88	int			level,
 89	__u32			errflag,
 90	void			*ret_ip)
 91{
 92	sc->sm->sm_flags |= errflag;
 93
 94	if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
 95		trace_xchk_ifork_btree_error(sc, cur, level,
 96				ret_ip);
 97	else
 98		trace_xchk_btree_error(sc, cur, level,
 99				ret_ip);
100}
101
102void
103xchk_btree_set_corrupt(
104	struct xfs_scrub	*sc,
105	struct xfs_btree_cur	*cur,
106	int			level)
107{
108	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
109			__return_address);
110}
111
112void
113xchk_btree_xref_set_corrupt(
114	struct xfs_scrub	*sc,
115	struct xfs_btree_cur	*cur,
116	int			level)
117{
118	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
119			__return_address);
120}
121
122void
123xchk_btree_set_preen(
124	struct xfs_scrub	*sc,
125	struct xfs_btree_cur	*cur,
126	int			level)
127{
128	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_PREEN,
129			__return_address);
130}
131
132/*
133 * Make sure this record is in order and doesn't stray outside of the parent
134 * keys.
135 */
136STATIC void
137xchk_btree_rec(
138	struct xchk_btree	*bs)
139{
140	struct xfs_btree_cur	*cur = bs->cur;
141	union xfs_btree_rec	*rec;
142	union xfs_btree_key	key;
143	union xfs_btree_key	hkey;
144	union xfs_btree_key	*keyp;
145	struct xfs_btree_block	*block;
146	struct xfs_btree_block	*keyblock;
147	struct xfs_buf		*bp;
148
149	block = xfs_btree_get_block(cur, 0, &bp);
150	rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block);
151
152	trace_xchk_btree_rec(bs->sc, cur, 0);
153
154	/* Are all records across all record blocks in order? */
155	if (bs->lastrec_valid &&
156	    !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
157		xchk_btree_set_corrupt(bs->sc, cur, 0);
158	memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
159	bs->lastrec_valid = true;
160
161	if (cur->bc_nlevels == 1)
162		return;
163
164	/* Is low_key(rec) at least as large as the parent low key? */
165	cur->bc_ops->init_key_from_rec(&key, rec);
166	keyblock = xfs_btree_get_block(cur, 1, &bp);
167	keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
168	if (xfs_btree_keycmp_lt(cur, &key, keyp))
169		xchk_btree_set_corrupt(bs->sc, cur, 1);
170
171	if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
172		return;
173
174	/* Is high_key(rec) no larger than the parent high key? */
175	cur->bc_ops->init_high_key_from_rec(&hkey, rec);
176	keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
177	if (xfs_btree_keycmp_lt(cur, keyp, &hkey))
178		xchk_btree_set_corrupt(bs->sc, cur, 1);
179}
180
181/*
182 * Make sure this key is in order and doesn't stray outside of the parent
183 * keys.
184 */
185STATIC void
186xchk_btree_key(
187	struct xchk_btree	*bs,
188	int			level)
189{
190	struct xfs_btree_cur	*cur = bs->cur;
191	union xfs_btree_key	*key;
192	union xfs_btree_key	*keyp;
193	struct xfs_btree_block	*block;
194	struct xfs_btree_block	*keyblock;
195	struct xfs_buf		*bp;
196
197	block = xfs_btree_get_block(cur, level, &bp);
198	key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
199
200	trace_xchk_btree_key(bs->sc, cur, level);
201
202	/* Are all low keys across all node blocks in order? */
203	if (bs->lastkey[level - 1].valid &&
204	    !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1].key, key))
205		xchk_btree_set_corrupt(bs->sc, cur, level);
206	memcpy(&bs->lastkey[level - 1].key, key, cur->bc_ops->key_len);
207	bs->lastkey[level - 1].valid = true;
208
209	if (level + 1 >= cur->bc_nlevels)
210		return;
211
212	/* Is this block's low key at least as large as the parent low key? */
213	keyblock = xfs_btree_get_block(cur, level + 1, &bp);
214	keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock);
215	if (xfs_btree_keycmp_lt(cur, key, keyp))
216		xchk_btree_set_corrupt(bs->sc, cur, level);
217
218	if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
219		return;
220
221	/* Is this block's high key no larger than the parent high key? */
222	key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block);
223	keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
224			keyblock);
225	if (xfs_btree_keycmp_lt(cur, keyp, key))
226		xchk_btree_set_corrupt(bs->sc, cur, level);
227}
228
229/*
230 * Check a btree pointer.  Returns true if it's ok to use this pointer.
231 * Callers do not need to set the corrupt flag.
232 */
233static bool
234xchk_btree_ptr_ok(
235	struct xchk_btree	*bs,
236	int			level,
237	union xfs_btree_ptr	*ptr)
238{
 
 
239	/* A btree rooted in an inode has no block pointer to the root. */
240	if (bs->cur->bc_ops->type == XFS_BTREE_TYPE_INODE &&
241	    level == bs->cur->bc_nlevels)
242		return true;
243
244	/* Otherwise, check the pointers. */
245	if (__xfs_btree_check_ptr(bs->cur, ptr, 0, level)) {
 
 
 
 
246		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
247		return false;
248	}
249
250	return true;
251}
252
253/* Check that a btree block's sibling matches what we expect it. */
254STATIC int
255xchk_btree_block_check_sibling(
256	struct xchk_btree	*bs,
257	int			level,
258	int			direction,
259	union xfs_btree_ptr	*sibling)
260{
261	struct xfs_btree_cur	*cur = bs->cur;
262	struct xfs_btree_block	*pblock;
263	struct xfs_buf		*pbp;
264	struct xfs_btree_cur	*ncur = NULL;
265	union xfs_btree_ptr	*pp;
266	int			success;
267	int			error;
268
269	error = xfs_btree_dup_cursor(cur, &ncur);
270	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
271	    !ncur)
272		return error;
273
274	/*
275	 * If the pointer is null, we shouldn't be able to move the upper
276	 * level pointer anywhere.
277	 */
278	if (xfs_btree_ptr_is_null(cur, sibling)) {
279		if (direction > 0)
280			error = xfs_btree_increment(ncur, level + 1, &success);
281		else
282			error = xfs_btree_decrement(ncur, level + 1, &success);
283		if (error == 0 && success)
284			xchk_btree_set_corrupt(bs->sc, cur, level);
285		error = 0;
286		goto out;
287	}
288
289	/* Increment upper level pointer. */
290	if (direction > 0)
291		error = xfs_btree_increment(ncur, level + 1, &success);
292	else
293		error = xfs_btree_decrement(ncur, level + 1, &success);
294	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
295		goto out;
296	if (!success) {
297		xchk_btree_set_corrupt(bs->sc, cur, level + 1);
298		goto out;
299	}
300
301	/* Compare upper level pointer to sibling pointer. */
302	pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
303	pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock);
304	if (!xchk_btree_ptr_ok(bs, level + 1, pp))
305		goto out;
306	if (pbp)
307		xchk_buffer_recheck(bs->sc, pbp);
308
309	if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
310		xchk_btree_set_corrupt(bs->sc, cur, level);
311out:
312	xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
313	return error;
314}
315
316/* Check the siblings of a btree block. */
317STATIC int
318xchk_btree_block_check_siblings(
319	struct xchk_btree	*bs,
320	struct xfs_btree_block	*block)
321{
322	struct xfs_btree_cur	*cur = bs->cur;
323	union xfs_btree_ptr	leftsib;
324	union xfs_btree_ptr	rightsib;
325	int			level;
326	int			error = 0;
327
328	xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
329	xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
330	level = xfs_btree_get_level(block);
331
332	/* Root block should never have siblings. */
333	if (level == cur->bc_nlevels - 1) {
334		if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
335		    !xfs_btree_ptr_is_null(cur, &rightsib))
336			xchk_btree_set_corrupt(bs->sc, cur, level);
337		goto out;
338	}
339
340	/*
341	 * Does the left & right sibling pointers match the adjacent
342	 * parent level pointers?
343	 * (These function absorbs error codes for us.)
344	 */
345	error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
346	if (error)
347		return error;
348	error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
349	if (error)
350		return error;
351out:
352	return error;
353}
354
355struct check_owner {
356	struct list_head	list;
357	xfs_daddr_t		daddr;
358	int			level;
359};
360
361/*
362 * Make sure this btree block isn't in the free list and that there's
363 * an rmap record for it.
364 */
365STATIC int
366xchk_btree_check_block_owner(
367	struct xchk_btree	*bs,
368	int			level,
369	xfs_daddr_t		daddr)
370{
371	xfs_agnumber_t		agno;
372	xfs_agblock_t		agbno;
 
373	bool			init_sa;
374	int			error = 0;
375
376	if (!bs->cur)
377		return 0;
378
 
379	agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
380	agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
381
382	/*
383	 * If the btree being examined is not itself a per-AG btree, initialize
384	 * sc->sa so that we can check for the presence of an ownership record
385	 * in the rmap btree for the AG containing the block.
386	 */
387	init_sa = bs->cur->bc_ops->type != XFS_BTREE_TYPE_AG;
388	if (init_sa) {
389		error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa);
390		if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
391				level, &error))
392			goto out_free;
393	}
394
395	xchk_xref_is_used_space(bs->sc, agbno, 1);
396	/*
397	 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
398	 * have to nullify it (to shut down further block owner checks) if
399	 * self-xref encounters problems.
400	 */
401	if (!bs->sc->sa.bno_cur && xfs_btree_is_bno(bs->cur->bc_ops))
402		bs->cur = NULL;
403
404	xchk_xref_is_only_owned_by(bs->sc, agbno, 1, bs->oinfo);
405	if (!bs->sc->sa.rmap_cur && xfs_btree_is_rmap(bs->cur->bc_ops))
406		bs->cur = NULL;
407
408out_free:
409	if (init_sa)
410		xchk_ag_free(bs->sc, &bs->sc->sa);
411
412	return error;
413}
414
415/* Check the owner of a btree block. */
416STATIC int
417xchk_btree_check_owner(
418	struct xchk_btree	*bs,
419	int			level,
420	struct xfs_buf		*bp)
421{
422	struct xfs_btree_cur	*cur = bs->cur;
423
424	/*
425	 * In theory, xfs_btree_get_block should only give us a null buffer
426	 * pointer for the root of a root-in-inode btree type, but we need
427	 * to check defensively here in case the cursor state is also screwed
428	 * up.
429	 */
430	if (bp == NULL) {
431		if (cur->bc_ops->type != XFS_BTREE_TYPE_INODE)
432			xchk_btree_set_corrupt(bs->sc, bs->cur, level);
433		return 0;
434	}
435
436	/*
437	 * We want to cross-reference each btree block with the bnobt
438	 * and the rmapbt.  We cannot cross-reference the bnobt or
439	 * rmapbt while scanning the bnobt or rmapbt, respectively,
440	 * because we cannot alter the cursor and we'd prefer not to
441	 * duplicate cursors.  Therefore, save the buffer daddr for
442	 * later scanning.
443	 */
444	if (xfs_btree_is_bno(cur->bc_ops) || xfs_btree_is_rmap(cur->bc_ops)) {
445		struct check_owner	*co;
446
447		co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS);
448		if (!co)
449			return -ENOMEM;
450
451		INIT_LIST_HEAD(&co->list);
452		co->level = level;
453		co->daddr = xfs_buf_daddr(bp);
454		list_add_tail(&co->list, &bs->to_check);
455		return 0;
456	}
457
458	return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp));
459}
460
461/* Decide if we want to check minrecs of a btree block in the inode root. */
462static inline bool
463xchk_btree_check_iroot_minrecs(
464	struct xchk_btree	*bs)
465{
466	/*
467	 * xfs_bmap_add_attrfork_btree had an implementation bug wherein it
468	 * would miscalculate the space required for the data fork bmbt root
469	 * when adding an attr fork, and promote the iroot contents to an
470	 * external block unnecessarily.  This went unnoticed for many years
471	 * until scrub found filesystems in this state.  Inode rooted btrees are
472	 * not supposed to have immediate child blocks that are small enough
473	 * that the contents could fit in the inode root, but we can't fail
474	 * existing filesystems, so instead we disable the check for data fork
475	 * bmap btrees when there's an attr fork.
476	 */
477	if (xfs_btree_is_bmap(bs->cur->bc_ops) &&
478	    bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
479	    xfs_inode_has_attr_fork(bs->sc->ip))
480		return false;
481
482	return true;
483}
484
485/*
486 * Check that this btree block has at least minrecs records or is one of the
487 * special blocks that don't require that.
488 */
489STATIC void
490xchk_btree_check_minrecs(
491	struct xchk_btree	*bs,
492	int			level,
493	struct xfs_btree_block	*block)
494{
495	struct xfs_btree_cur	*cur = bs->cur;
496	unsigned int		root_level = cur->bc_nlevels - 1;
497	unsigned int		numrecs = be16_to_cpu(block->bb_numrecs);
498
499	/* More records than minrecs means the block is ok. */
500	if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
501		return;
502
503	/*
504	 * For btrees rooted in the inode, it's possible that the root block
505	 * contents spilled into a regular ondisk block because there wasn't
506	 * enough space in the inode root.  The number of records in that
507	 * child block might be less than the standard minrecs, but that's ok
508	 * provided that there's only one direct child of the root.
509	 */
510	if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE &&
511	    level == cur->bc_nlevels - 2) {
512		struct xfs_btree_block	*root_block;
513		struct xfs_buf		*root_bp;
514		int			root_maxrecs;
515
516		root_block = xfs_btree_get_block(cur, root_level, &root_bp);
517		root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
518		if (xchk_btree_check_iroot_minrecs(bs) &&
519		    (be16_to_cpu(root_block->bb_numrecs) != 1 ||
520		     numrecs <= root_maxrecs))
521			xchk_btree_set_corrupt(bs->sc, cur, level);
522		return;
523	}
524
525	/*
526	 * Otherwise, only the root level is allowed to have fewer than minrecs
527	 * records or keyptrs.
528	 */
529	if (level < root_level)
530		xchk_btree_set_corrupt(bs->sc, cur, level);
531}
532
533/*
534 * If this btree block has a parent, make sure that the parent's keys capture
535 * the keyspace contained in this block.
536 */
537STATIC void
538xchk_btree_block_check_keys(
539	struct xchk_btree	*bs,
540	int			level,
541	struct xfs_btree_block	*block)
542{
543	union xfs_btree_key	block_key;
544	union xfs_btree_key	*block_high_key;
545	union xfs_btree_key	*parent_low_key, *parent_high_key;
546	struct xfs_btree_cur	*cur = bs->cur;
547	struct xfs_btree_block	*parent_block;
548	struct xfs_buf		*bp;
549
550	if (level == cur->bc_nlevels - 1)
551		return;
552
553	xfs_btree_get_keys(cur, block, &block_key);
554
555	/* Make sure the low key of this block matches the parent. */
556	parent_block = xfs_btree_get_block(cur, level + 1, &bp);
557	parent_low_key = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
558			parent_block);
559	if (xfs_btree_keycmp_ne(cur, &block_key, parent_low_key)) {
560		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
561		return;
562	}
563
564	if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
565		return;
566
567	/* Make sure the high key of this block matches the parent. */
568	parent_high_key = xfs_btree_high_key_addr(cur,
569			cur->bc_levels[level + 1].ptr, parent_block);
570	block_high_key = xfs_btree_high_key_from_key(cur, &block_key);
571	if (xfs_btree_keycmp_ne(cur, block_high_key, parent_high_key))
572		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
573}
574
575/*
576 * Grab and scrub a btree block given a btree pointer.  Returns block
577 * and buffer pointers (if applicable) if they're ok to use.
578 */
579STATIC int
580xchk_btree_get_block(
581	struct xchk_btree	*bs,
582	int			level,
583	union xfs_btree_ptr	*pp,
584	struct xfs_btree_block	**pblock,
585	struct xfs_buf		**pbp)
586{
 
587	int			error;
588
589	*pblock = NULL;
590	*pbp = NULL;
591
592	error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
593	if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
594	    !*pblock)
595		return error;
596
597	xfs_btree_get_block(bs->cur, level, pbp);
598	if (__xfs_btree_check_block(bs->cur, *pblock, level, *pbp)) {
 
 
 
 
 
 
599		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
600		return 0;
601	}
602	if (*pbp)
603		xchk_buffer_recheck(bs->sc, *pbp);
604
605	xchk_btree_check_minrecs(bs, level, *pblock);
606
607	/*
608	 * Check the block's owner; this function absorbs error codes
609	 * for us.
610	 */
611	error = xchk_btree_check_owner(bs, level, *pbp);
612	if (error)
613		return error;
614
615	/*
616	 * Check the block's siblings; this function absorbs error codes
617	 * for us.
618	 */
619	error = xchk_btree_block_check_siblings(bs, *pblock);
620	if (error)
621		return error;
622
623	xchk_btree_block_check_keys(bs, level, *pblock);
624	return 0;
625}
626
627/*
628 * Check that the low and high keys of this block match the keys stored
629 * in the parent block.
630 */
631STATIC void
632xchk_btree_block_keys(
633	struct xchk_btree	*bs,
634	int			level,
635	struct xfs_btree_block	*block)
636{
637	union xfs_btree_key	block_keys;
638	struct xfs_btree_cur	*cur = bs->cur;
639	union xfs_btree_key	*high_bk;
640	union xfs_btree_key	*parent_keys;
641	union xfs_btree_key	*high_pk;
642	struct xfs_btree_block	*parent_block;
643	struct xfs_buf		*bp;
644
645	if (level >= cur->bc_nlevels - 1)
646		return;
647
648	/* Calculate the keys for this block. */
649	xfs_btree_get_keys(cur, block, &block_keys);
650
651	/* Obtain the parent's copy of the keys for this block. */
652	parent_block = xfs_btree_get_block(cur, level + 1, &bp);
653	parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
654			parent_block);
655
656	if (xfs_btree_keycmp_ne(cur, &block_keys, parent_keys))
657		xchk_btree_set_corrupt(bs->sc, cur, 1);
658
659	if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
660		return;
661
662	/* Get high keys */
663	high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
664	high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
665			parent_block);
666
667	if (xfs_btree_keycmp_ne(cur, high_bk, high_pk))
668		xchk_btree_set_corrupt(bs->sc, cur, 1);
669}
670
671/*
672 * Visit all nodes and leaves of a btree.  Check that all pointers and
673 * records are in order, that the keys reflect the records, and use a callback
674 * so that the caller can verify individual records.
675 */
676int
677xchk_btree(
678	struct xfs_scrub		*sc,
679	struct xfs_btree_cur		*cur,
680	xchk_btree_rec_fn		scrub_fn,
681	const struct xfs_owner_info	*oinfo,
682	void				*private)
683{
684	union xfs_btree_ptr		ptr;
685	struct xchk_btree		*bs;
686	union xfs_btree_ptr		*pp;
687	union xfs_btree_rec		*recp;
688	struct xfs_btree_block		*block;
689	struct xfs_buf			*bp;
690	struct check_owner		*co;
691	struct check_owner		*n;
692	size_t				cur_sz;
693	int				level;
694	int				error = 0;
695
696	/*
697	 * Allocate the btree scrub context from the heap, because this
698	 * structure can get rather large.  Don't let a caller feed us a
699	 * totally absurd size.
700	 */
701	cur_sz = xchk_btree_sizeof(cur->bc_nlevels);
702	if (cur_sz > PAGE_SIZE) {
703		xchk_btree_set_corrupt(sc, cur, 0);
704		return 0;
705	}
706	bs = kzalloc(cur_sz, XCHK_GFP_FLAGS);
707	if (!bs)
708		return -ENOMEM;
709	bs->cur = cur;
710	bs->scrub_rec = scrub_fn;
711	bs->oinfo = oinfo;
712	bs->private = private;
713	bs->sc = sc;
714
715	/* Initialize scrub state */
716	INIT_LIST_HEAD(&bs->to_check);
717
718	/*
719	 * Load the root of the btree.  The helper function absorbs
720	 * error codes for us.
721	 */
722	level = cur->bc_nlevels - 1;
723	xfs_btree_init_ptr_from_cur(cur, &ptr);
724	if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr))
725		goto out;
726	error = xchk_btree_get_block(bs, level, &ptr, &block, &bp);
727	if (error || !block)
728		goto out;
729
730	cur->bc_levels[level].ptr = 1;
731
732	while (level < cur->bc_nlevels) {
733		block = xfs_btree_get_block(cur, level, &bp);
734
735		if (level == 0) {
736			/* End of leaf, pop back towards the root. */
737			if (cur->bc_levels[level].ptr >
738			    be16_to_cpu(block->bb_numrecs)) {
739				xchk_btree_block_keys(bs, level, block);
740				if (level < cur->bc_nlevels - 1)
741					cur->bc_levels[level + 1].ptr++;
742				level++;
743				continue;
744			}
745
746			/* Records in order for scrub? */
747			xchk_btree_rec(bs);
748
749			/* Call out to the record checker. */
750			recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
751					block);
752			error = bs->scrub_rec(bs, recp);
753			if (error)
754				break;
755			if (xchk_should_terminate(sc, &error) ||
756			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
757				break;
758
759			cur->bc_levels[level].ptr++;
760			continue;
761		}
762
763		/* End of node, pop back towards the root. */
764		if (cur->bc_levels[level].ptr >
765					be16_to_cpu(block->bb_numrecs)) {
766			xchk_btree_block_keys(bs, level, block);
767			if (level < cur->bc_nlevels - 1)
768				cur->bc_levels[level + 1].ptr++;
769			level++;
770			continue;
771		}
772
773		/* Keys in order for scrub? */
774		xchk_btree_key(bs, level);
775
776		/* Drill another level deeper. */
777		pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
778		if (!xchk_btree_ptr_ok(bs, level, pp)) {
779			cur->bc_levels[level].ptr++;
780			continue;
781		}
782		level--;
783		error = xchk_btree_get_block(bs, level, pp, &block, &bp);
784		if (error || !block)
785			goto out;
786
787		cur->bc_levels[level].ptr = 1;
788	}
789
790out:
791	/* Process deferred owner checks on btree blocks. */
792	list_for_each_entry_safe(co, n, &bs->to_check, list) {
793		if (!error && bs->cur)
794			error = xchk_btree_check_block_owner(bs, co->level,
795					co->daddr);
796		list_del(&co->list);
797		kfree(co);
798	}
799	kfree(bs);
800
801	return error;
802}
v6.8
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
  4 * Author: Darrick J. Wong <djwong@kernel.org>
  5 */
  6#include "xfs.h"
  7#include "xfs_fs.h"
  8#include "xfs_shared.h"
  9#include "xfs_format.h"
 10#include "xfs_trans_resv.h"
 11#include "xfs_mount.h"
 12#include "xfs_inode.h"
 13#include "xfs_btree.h"
 14#include "scrub/scrub.h"
 15#include "scrub/common.h"
 16#include "scrub/btree.h"
 17#include "scrub/trace.h"
 18
 19/* btree scrubbing */
 20
 21/*
 22 * Check for btree operation errors.  See the section about handling
 23 * operational errors in common.c.
 24 */
 25static bool
 26__xchk_btree_process_error(
 27	struct xfs_scrub	*sc,
 28	struct xfs_btree_cur	*cur,
 29	int			level,
 30	int			*error,
 31	__u32			errflag,
 32	void			*ret_ip)
 33{
 34	if (*error == 0)
 35		return true;
 36
 37	switch (*error) {
 38	case -EDEADLOCK:
 39	case -ECHRNG:
 40		/* Used to restart an op with deadlock avoidance. */
 41		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
 42		break;
 43	case -EFSBADCRC:
 44	case -EFSCORRUPTED:
 45		/* Note the badness but don't abort. */
 46		sc->sm->sm_flags |= errflag;
 47		*error = 0;
 48		fallthrough;
 49	default:
 50		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
 51			trace_xchk_ifork_btree_op_error(sc, cur, level,
 52					*error, ret_ip);
 53		else
 54			trace_xchk_btree_op_error(sc, cur, level,
 55					*error, ret_ip);
 56		break;
 57	}
 58	return false;
 59}
 60
 61bool
 62xchk_btree_process_error(
 63	struct xfs_scrub	*sc,
 64	struct xfs_btree_cur	*cur,
 65	int			level,
 66	int			*error)
 67{
 68	return __xchk_btree_process_error(sc, cur, level, error,
 69			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
 70}
 71
 72bool
 73xchk_btree_xref_process_error(
 74	struct xfs_scrub	*sc,
 75	struct xfs_btree_cur	*cur,
 76	int			level,
 77	int			*error)
 78{
 79	return __xchk_btree_process_error(sc, cur, level, error,
 80			XFS_SCRUB_OFLAG_XFAIL, __return_address);
 81}
 82
 83/* Record btree block corruption. */
 84static void
 85__xchk_btree_set_corrupt(
 86	struct xfs_scrub	*sc,
 87	struct xfs_btree_cur	*cur,
 88	int			level,
 89	__u32			errflag,
 90	void			*ret_ip)
 91{
 92	sc->sm->sm_flags |= errflag;
 93
 94	if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
 95		trace_xchk_ifork_btree_error(sc, cur, level,
 96				ret_ip);
 97	else
 98		trace_xchk_btree_error(sc, cur, level,
 99				ret_ip);
100}
101
102void
103xchk_btree_set_corrupt(
104	struct xfs_scrub	*sc,
105	struct xfs_btree_cur	*cur,
106	int			level)
107{
108	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
109			__return_address);
110}
111
112void
113xchk_btree_xref_set_corrupt(
114	struct xfs_scrub	*sc,
115	struct xfs_btree_cur	*cur,
116	int			level)
117{
118	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
119			__return_address);
120}
121
122void
123xchk_btree_set_preen(
124	struct xfs_scrub	*sc,
125	struct xfs_btree_cur	*cur,
126	int			level)
127{
128	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_PREEN,
129			__return_address);
130}
131
132/*
133 * Make sure this record is in order and doesn't stray outside of the parent
134 * keys.
135 */
136STATIC void
137xchk_btree_rec(
138	struct xchk_btree	*bs)
139{
140	struct xfs_btree_cur	*cur = bs->cur;
141	union xfs_btree_rec	*rec;
142	union xfs_btree_key	key;
143	union xfs_btree_key	hkey;
144	union xfs_btree_key	*keyp;
145	struct xfs_btree_block	*block;
146	struct xfs_btree_block	*keyblock;
147	struct xfs_buf		*bp;
148
149	block = xfs_btree_get_block(cur, 0, &bp);
150	rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block);
151
152	trace_xchk_btree_rec(bs->sc, cur, 0);
153
154	/* Are all records across all record blocks in order? */
155	if (bs->lastrec_valid &&
156	    !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
157		xchk_btree_set_corrupt(bs->sc, cur, 0);
158	memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
159	bs->lastrec_valid = true;
160
161	if (cur->bc_nlevels == 1)
162		return;
163
164	/* Is low_key(rec) at least as large as the parent low key? */
165	cur->bc_ops->init_key_from_rec(&key, rec);
166	keyblock = xfs_btree_get_block(cur, 1, &bp);
167	keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
168	if (xfs_btree_keycmp_lt(cur, &key, keyp))
169		xchk_btree_set_corrupt(bs->sc, cur, 1);
170
171	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
172		return;
173
174	/* Is high_key(rec) no larger than the parent high key? */
175	cur->bc_ops->init_high_key_from_rec(&hkey, rec);
176	keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
177	if (xfs_btree_keycmp_lt(cur, keyp, &hkey))
178		xchk_btree_set_corrupt(bs->sc, cur, 1);
179}
180
181/*
182 * Make sure this key is in order and doesn't stray outside of the parent
183 * keys.
184 */
185STATIC void
186xchk_btree_key(
187	struct xchk_btree	*bs,
188	int			level)
189{
190	struct xfs_btree_cur	*cur = bs->cur;
191	union xfs_btree_key	*key;
192	union xfs_btree_key	*keyp;
193	struct xfs_btree_block	*block;
194	struct xfs_btree_block	*keyblock;
195	struct xfs_buf		*bp;
196
197	block = xfs_btree_get_block(cur, level, &bp);
198	key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
199
200	trace_xchk_btree_key(bs->sc, cur, level);
201
202	/* Are all low keys across all node blocks in order? */
203	if (bs->lastkey[level - 1].valid &&
204	    !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1].key, key))
205		xchk_btree_set_corrupt(bs->sc, cur, level);
206	memcpy(&bs->lastkey[level - 1].key, key, cur->bc_ops->key_len);
207	bs->lastkey[level - 1].valid = true;
208
209	if (level + 1 >= cur->bc_nlevels)
210		return;
211
212	/* Is this block's low key at least as large as the parent low key? */
213	keyblock = xfs_btree_get_block(cur, level + 1, &bp);
214	keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock);
215	if (xfs_btree_keycmp_lt(cur, key, keyp))
216		xchk_btree_set_corrupt(bs->sc, cur, level);
217
218	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
219		return;
220
221	/* Is this block's high key no larger than the parent high key? */
222	key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block);
223	keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
224			keyblock);
225	if (xfs_btree_keycmp_lt(cur, keyp, key))
226		xchk_btree_set_corrupt(bs->sc, cur, level);
227}
228
229/*
230 * Check a btree pointer.  Returns true if it's ok to use this pointer.
231 * Callers do not need to set the corrupt flag.
232 */
233static bool
234xchk_btree_ptr_ok(
235	struct xchk_btree	*bs,
236	int			level,
237	union xfs_btree_ptr	*ptr)
238{
239	bool			res;
240
241	/* A btree rooted in an inode has no block pointer to the root. */
242	if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
243	    level == bs->cur->bc_nlevels)
244		return true;
245
246	/* Otherwise, check the pointers. */
247	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
248		res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
249	else
250		res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
251	if (!res)
252		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
 
 
253
254	return res;
255}
256
257/* Check that a btree block's sibling matches what we expect it. */
258STATIC int
259xchk_btree_block_check_sibling(
260	struct xchk_btree	*bs,
261	int			level,
262	int			direction,
263	union xfs_btree_ptr	*sibling)
264{
265	struct xfs_btree_cur	*cur = bs->cur;
266	struct xfs_btree_block	*pblock;
267	struct xfs_buf		*pbp;
268	struct xfs_btree_cur	*ncur = NULL;
269	union xfs_btree_ptr	*pp;
270	int			success;
271	int			error;
272
273	error = xfs_btree_dup_cursor(cur, &ncur);
274	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
275	    !ncur)
276		return error;
277
278	/*
279	 * If the pointer is null, we shouldn't be able to move the upper
280	 * level pointer anywhere.
281	 */
282	if (xfs_btree_ptr_is_null(cur, sibling)) {
283		if (direction > 0)
284			error = xfs_btree_increment(ncur, level + 1, &success);
285		else
286			error = xfs_btree_decrement(ncur, level + 1, &success);
287		if (error == 0 && success)
288			xchk_btree_set_corrupt(bs->sc, cur, level);
289		error = 0;
290		goto out;
291	}
292
293	/* Increment upper level pointer. */
294	if (direction > 0)
295		error = xfs_btree_increment(ncur, level + 1, &success);
296	else
297		error = xfs_btree_decrement(ncur, level + 1, &success);
298	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
299		goto out;
300	if (!success) {
301		xchk_btree_set_corrupt(bs->sc, cur, level + 1);
302		goto out;
303	}
304
305	/* Compare upper level pointer to sibling pointer. */
306	pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
307	pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock);
308	if (!xchk_btree_ptr_ok(bs, level + 1, pp))
309		goto out;
310	if (pbp)
311		xchk_buffer_recheck(bs->sc, pbp);
312
313	if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
314		xchk_btree_set_corrupt(bs->sc, cur, level);
315out:
316	xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
317	return error;
318}
319
320/* Check the siblings of a btree block. */
321STATIC int
322xchk_btree_block_check_siblings(
323	struct xchk_btree	*bs,
324	struct xfs_btree_block	*block)
325{
326	struct xfs_btree_cur	*cur = bs->cur;
327	union xfs_btree_ptr	leftsib;
328	union xfs_btree_ptr	rightsib;
329	int			level;
330	int			error = 0;
331
332	xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
333	xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
334	level = xfs_btree_get_level(block);
335
336	/* Root block should never have siblings. */
337	if (level == cur->bc_nlevels - 1) {
338		if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
339		    !xfs_btree_ptr_is_null(cur, &rightsib))
340			xchk_btree_set_corrupt(bs->sc, cur, level);
341		goto out;
342	}
343
344	/*
345	 * Does the left & right sibling pointers match the adjacent
346	 * parent level pointers?
347	 * (These function absorbs error codes for us.)
348	 */
349	error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
350	if (error)
351		return error;
352	error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
353	if (error)
354		return error;
355out:
356	return error;
357}
358
359struct check_owner {
360	struct list_head	list;
361	xfs_daddr_t		daddr;
362	int			level;
363};
364
365/*
366 * Make sure this btree block isn't in the free list and that there's
367 * an rmap record for it.
368 */
369STATIC int
370xchk_btree_check_block_owner(
371	struct xchk_btree	*bs,
372	int			level,
373	xfs_daddr_t		daddr)
374{
375	xfs_agnumber_t		agno;
376	xfs_agblock_t		agbno;
377	xfs_btnum_t		btnum;
378	bool			init_sa;
379	int			error = 0;
380
381	if (!bs->cur)
382		return 0;
383
384	btnum = bs->cur->bc_btnum;
385	agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
386	agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
387
388	init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
 
 
 
 
 
389	if (init_sa) {
390		error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa);
391		if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
392				level, &error))
393			goto out_free;
394	}
395
396	xchk_xref_is_used_space(bs->sc, agbno, 1);
397	/*
398	 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
399	 * have to nullify it (to shut down further block owner checks) if
400	 * self-xref encounters problems.
401	 */
402	if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
403		bs->cur = NULL;
404
405	xchk_xref_is_only_owned_by(bs->sc, agbno, 1, bs->oinfo);
406	if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
407		bs->cur = NULL;
408
409out_free:
410	if (init_sa)
411		xchk_ag_free(bs->sc, &bs->sc->sa);
412
413	return error;
414}
415
416/* Check the owner of a btree block. */
417STATIC int
418xchk_btree_check_owner(
419	struct xchk_btree	*bs,
420	int			level,
421	struct xfs_buf		*bp)
422{
423	struct xfs_btree_cur	*cur = bs->cur;
424
425	/*
426	 * In theory, xfs_btree_get_block should only give us a null buffer
427	 * pointer for the root of a root-in-inode btree type, but we need
428	 * to check defensively here in case the cursor state is also screwed
429	 * up.
430	 */
431	if (bp == NULL) {
432		if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE))
433			xchk_btree_set_corrupt(bs->sc, bs->cur, level);
434		return 0;
435	}
436
437	/*
438	 * We want to cross-reference each btree block with the bnobt
439	 * and the rmapbt.  We cannot cross-reference the bnobt or
440	 * rmapbt while scanning the bnobt or rmapbt, respectively,
441	 * because we cannot alter the cursor and we'd prefer not to
442	 * duplicate cursors.  Therefore, save the buffer daddr for
443	 * later scanning.
444	 */
445	if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
446		struct check_owner	*co;
447
448		co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS);
449		if (!co)
450			return -ENOMEM;
451
452		INIT_LIST_HEAD(&co->list);
453		co->level = level;
454		co->daddr = xfs_buf_daddr(bp);
455		list_add_tail(&co->list, &bs->to_check);
456		return 0;
457	}
458
459	return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp));
460}
461
462/* Decide if we want to check minrecs of a btree block in the inode root. */
463static inline bool
464xchk_btree_check_iroot_minrecs(
465	struct xchk_btree	*bs)
466{
467	/*
468	 * xfs_bmap_add_attrfork_btree had an implementation bug wherein it
469	 * would miscalculate the space required for the data fork bmbt root
470	 * when adding an attr fork, and promote the iroot contents to an
471	 * external block unnecessarily.  This went unnoticed for many years
472	 * until scrub found filesystems in this state.  Inode rooted btrees are
473	 * not supposed to have immediate child blocks that are small enough
474	 * that the contents could fit in the inode root, but we can't fail
475	 * existing filesystems, so instead we disable the check for data fork
476	 * bmap btrees when there's an attr fork.
477	 */
478	if (bs->cur->bc_btnum == XFS_BTNUM_BMAP &&
479	    bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
480	    xfs_inode_has_attr_fork(bs->sc->ip))
481		return false;
482
483	return true;
484}
485
486/*
487 * Check that this btree block has at least minrecs records or is one of the
488 * special blocks that don't require that.
489 */
490STATIC void
491xchk_btree_check_minrecs(
492	struct xchk_btree	*bs,
493	int			level,
494	struct xfs_btree_block	*block)
495{
496	struct xfs_btree_cur	*cur = bs->cur;
497	unsigned int		root_level = cur->bc_nlevels - 1;
498	unsigned int		numrecs = be16_to_cpu(block->bb_numrecs);
499
500	/* More records than minrecs means the block is ok. */
501	if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
502		return;
503
504	/*
505	 * For btrees rooted in the inode, it's possible that the root block
506	 * contents spilled into a regular ondisk block because there wasn't
507	 * enough space in the inode root.  The number of records in that
508	 * child block might be less than the standard minrecs, but that's ok
509	 * provided that there's only one direct child of the root.
510	 */
511	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
512	    level == cur->bc_nlevels - 2) {
513		struct xfs_btree_block	*root_block;
514		struct xfs_buf		*root_bp;
515		int			root_maxrecs;
516
517		root_block = xfs_btree_get_block(cur, root_level, &root_bp);
518		root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
519		if (xchk_btree_check_iroot_minrecs(bs) &&
520		    (be16_to_cpu(root_block->bb_numrecs) != 1 ||
521		     numrecs <= root_maxrecs))
522			xchk_btree_set_corrupt(bs->sc, cur, level);
523		return;
524	}
525
526	/*
527	 * Otherwise, only the root level is allowed to have fewer than minrecs
528	 * records or keyptrs.
529	 */
530	if (level < root_level)
531		xchk_btree_set_corrupt(bs->sc, cur, level);
532}
533
534/*
535 * If this btree block has a parent, make sure that the parent's keys capture
536 * the keyspace contained in this block.
537 */
538STATIC void
539xchk_btree_block_check_keys(
540	struct xchk_btree	*bs,
541	int			level,
542	struct xfs_btree_block	*block)
543{
544	union xfs_btree_key	block_key;
545	union xfs_btree_key	*block_high_key;
546	union xfs_btree_key	*parent_low_key, *parent_high_key;
547	struct xfs_btree_cur	*cur = bs->cur;
548	struct xfs_btree_block	*parent_block;
549	struct xfs_buf		*bp;
550
551	if (level == cur->bc_nlevels - 1)
552		return;
553
554	xfs_btree_get_keys(cur, block, &block_key);
555
556	/* Make sure the low key of this block matches the parent. */
557	parent_block = xfs_btree_get_block(cur, level + 1, &bp);
558	parent_low_key = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
559			parent_block);
560	if (xfs_btree_keycmp_ne(cur, &block_key, parent_low_key)) {
561		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
562		return;
563	}
564
565	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
566		return;
567
568	/* Make sure the high key of this block matches the parent. */
569	parent_high_key = xfs_btree_high_key_addr(cur,
570			cur->bc_levels[level + 1].ptr, parent_block);
571	block_high_key = xfs_btree_high_key_from_key(cur, &block_key);
572	if (xfs_btree_keycmp_ne(cur, block_high_key, parent_high_key))
573		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
574}
575
576/*
577 * Grab and scrub a btree block given a btree pointer.  Returns block
578 * and buffer pointers (if applicable) if they're ok to use.
579 */
580STATIC int
581xchk_btree_get_block(
582	struct xchk_btree	*bs,
583	int			level,
584	union xfs_btree_ptr	*pp,
585	struct xfs_btree_block	**pblock,
586	struct xfs_buf		**pbp)
587{
588	xfs_failaddr_t		failed_at;
589	int			error;
590
591	*pblock = NULL;
592	*pbp = NULL;
593
594	error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
595	if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
596	    !*pblock)
597		return error;
598
599	xfs_btree_get_block(bs->cur, level, pbp);
600	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
601		failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
602				level, *pbp);
603	else
604		failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
605				 level, *pbp);
606	if (failed_at) {
607		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
608		return 0;
609	}
610	if (*pbp)
611		xchk_buffer_recheck(bs->sc, *pbp);
612
613	xchk_btree_check_minrecs(bs, level, *pblock);
614
615	/*
616	 * Check the block's owner; this function absorbs error codes
617	 * for us.
618	 */
619	error = xchk_btree_check_owner(bs, level, *pbp);
620	if (error)
621		return error;
622
623	/*
624	 * Check the block's siblings; this function absorbs error codes
625	 * for us.
626	 */
627	error = xchk_btree_block_check_siblings(bs, *pblock);
628	if (error)
629		return error;
630
631	xchk_btree_block_check_keys(bs, level, *pblock);
632	return 0;
633}
634
635/*
636 * Check that the low and high keys of this block match the keys stored
637 * in the parent block.
638 */
639STATIC void
640xchk_btree_block_keys(
641	struct xchk_btree	*bs,
642	int			level,
643	struct xfs_btree_block	*block)
644{
645	union xfs_btree_key	block_keys;
646	struct xfs_btree_cur	*cur = bs->cur;
647	union xfs_btree_key	*high_bk;
648	union xfs_btree_key	*parent_keys;
649	union xfs_btree_key	*high_pk;
650	struct xfs_btree_block	*parent_block;
651	struct xfs_buf		*bp;
652
653	if (level >= cur->bc_nlevels - 1)
654		return;
655
656	/* Calculate the keys for this block. */
657	xfs_btree_get_keys(cur, block, &block_keys);
658
659	/* Obtain the parent's copy of the keys for this block. */
660	parent_block = xfs_btree_get_block(cur, level + 1, &bp);
661	parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
662			parent_block);
663
664	if (xfs_btree_keycmp_ne(cur, &block_keys, parent_keys))
665		xchk_btree_set_corrupt(bs->sc, cur, 1);
666
667	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
668		return;
669
670	/* Get high keys */
671	high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
672	high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
673			parent_block);
674
675	if (xfs_btree_keycmp_ne(cur, high_bk, high_pk))
676		xchk_btree_set_corrupt(bs->sc, cur, 1);
677}
678
679/*
680 * Visit all nodes and leaves of a btree.  Check that all pointers and
681 * records are in order, that the keys reflect the records, and use a callback
682 * so that the caller can verify individual records.
683 */
684int
685xchk_btree(
686	struct xfs_scrub		*sc,
687	struct xfs_btree_cur		*cur,
688	xchk_btree_rec_fn		scrub_fn,
689	const struct xfs_owner_info	*oinfo,
690	void				*private)
691{
692	union xfs_btree_ptr		ptr;
693	struct xchk_btree		*bs;
694	union xfs_btree_ptr		*pp;
695	union xfs_btree_rec		*recp;
696	struct xfs_btree_block		*block;
697	struct xfs_buf			*bp;
698	struct check_owner		*co;
699	struct check_owner		*n;
700	size_t				cur_sz;
701	int				level;
702	int				error = 0;
703
704	/*
705	 * Allocate the btree scrub context from the heap, because this
706	 * structure can get rather large.  Don't let a caller feed us a
707	 * totally absurd size.
708	 */
709	cur_sz = xchk_btree_sizeof(cur->bc_nlevels);
710	if (cur_sz > PAGE_SIZE) {
711		xchk_btree_set_corrupt(sc, cur, 0);
712		return 0;
713	}
714	bs = kzalloc(cur_sz, XCHK_GFP_FLAGS);
715	if (!bs)
716		return -ENOMEM;
717	bs->cur = cur;
718	bs->scrub_rec = scrub_fn;
719	bs->oinfo = oinfo;
720	bs->private = private;
721	bs->sc = sc;
722
723	/* Initialize scrub state */
724	INIT_LIST_HEAD(&bs->to_check);
725
726	/*
727	 * Load the root of the btree.  The helper function absorbs
728	 * error codes for us.
729	 */
730	level = cur->bc_nlevels - 1;
731	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
732	if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr))
733		goto out;
734	error = xchk_btree_get_block(bs, level, &ptr, &block, &bp);
735	if (error || !block)
736		goto out;
737
738	cur->bc_levels[level].ptr = 1;
739
740	while (level < cur->bc_nlevels) {
741		block = xfs_btree_get_block(cur, level, &bp);
742
743		if (level == 0) {
744			/* End of leaf, pop back towards the root. */
745			if (cur->bc_levels[level].ptr >
746			    be16_to_cpu(block->bb_numrecs)) {
747				xchk_btree_block_keys(bs, level, block);
748				if (level < cur->bc_nlevels - 1)
749					cur->bc_levels[level + 1].ptr++;
750				level++;
751				continue;
752			}
753
754			/* Records in order for scrub? */
755			xchk_btree_rec(bs);
756
757			/* Call out to the record checker. */
758			recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
759					block);
760			error = bs->scrub_rec(bs, recp);
761			if (error)
762				break;
763			if (xchk_should_terminate(sc, &error) ||
764			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
765				break;
766
767			cur->bc_levels[level].ptr++;
768			continue;
769		}
770
771		/* End of node, pop back towards the root. */
772		if (cur->bc_levels[level].ptr >
773					be16_to_cpu(block->bb_numrecs)) {
774			xchk_btree_block_keys(bs, level, block);
775			if (level < cur->bc_nlevels - 1)
776				cur->bc_levels[level + 1].ptr++;
777			level++;
778			continue;
779		}
780
781		/* Keys in order for scrub? */
782		xchk_btree_key(bs, level);
783
784		/* Drill another level deeper. */
785		pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
786		if (!xchk_btree_ptr_ok(bs, level, pp)) {
787			cur->bc_levels[level].ptr++;
788			continue;
789		}
790		level--;
791		error = xchk_btree_get_block(bs, level, pp, &block, &bp);
792		if (error || !block)
793			goto out;
794
795		cur->bc_levels[level].ptr = 1;
796	}
797
798out:
799	/* Process deferred owner checks on btree blocks. */
800	list_for_each_entry_safe(co, n, &bs->to_check, list) {
801		if (!error && bs->cur)
802			error = xchk_btree_check_block_owner(bs, co->level,
803					co->daddr);
804		list_del(&co->list);
805		kfree(co);
806	}
807	kfree(bs);
808
809	return error;
810}