Linux Audio

Check our new training course

Loading...
v6.9.4
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
   4 * Author: Darrick J. Wong <djwong@kernel.org>
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_trans_resv.h"
  11#include "xfs_mount.h"
  12#include "xfs_btree.h"
  13#include "xfs_bit.h"
  14#include "xfs_log_format.h"
  15#include "xfs_trans.h"
  16#include "xfs_inode.h"
  17#include "xfs_alloc.h"
  18#include "xfs_bmap.h"
  19#include "xfs_bmap_btree.h"
  20#include "xfs_rmap.h"
  21#include "xfs_rmap_btree.h"
  22#include "xfs_health.h"
  23#include "scrub/scrub.h"
  24#include "scrub/common.h"
  25#include "scrub/btree.h"
  26#include "scrub/health.h"
  27#include "xfs_ag.h"
  28
  29/* Set us up with an inode's bmap. */
  30int
  31xchk_setup_inode_bmap(
  32	struct xfs_scrub	*sc)
  33{
  34	int			error;
  35
  36	if (xchk_need_intent_drain(sc))
  37		xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
  38
  39	error = xchk_iget_for_scrubbing(sc);
  40	if (error)
  41		goto out;
  42
  43	xchk_ilock(sc, XFS_IOLOCK_EXCL);
  44
  45	/*
  46	 * We don't want any ephemeral data/cow fork updates sitting around
  47	 * while we inspect block mappings, so wait for directio to finish
  48	 * and flush dirty data if we have delalloc reservations.
  49	 */
  50	if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
  51	    sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
  52		struct address_space	*mapping = VFS_I(sc->ip)->i_mapping;
  53		bool			is_repair = xchk_could_repair(sc);
  54
  55		xchk_ilock(sc, XFS_MMAPLOCK_EXCL);
  56
  57		/* Break all our leases, we're going to mess with things. */
  58		if (is_repair) {
  59			error = xfs_break_layouts(VFS_I(sc->ip),
  60					&sc->ilock_flags, BREAK_WRITE);
  61			if (error)
  62				goto out;
  63		}
  64
  65		inode_dio_wait(VFS_I(sc->ip));
  66
  67		/*
  68		 * Try to flush all incore state to disk before we examine the
  69		 * space mappings for the data fork.  Leave accumulated errors
  70		 * in the mapping for the writer threads to consume.
  71		 *
  72		 * On ENOSPC or EIO writeback errors, we continue into the
  73		 * extent mapping checks because write failures do not
  74		 * necessarily imply anything about the correctness of the file
  75		 * metadata.  The metadata and the file data could be on
  76		 * completely separate devices; a media failure might only
  77		 * affect a subset of the disk, etc.  We can handle delalloc
  78		 * extents in the scrubber, so leaving them in memory is fine.
  79		 */
  80		error = filemap_fdatawrite(mapping);
  81		if (!error)
  82			error = filemap_fdatawait_keep_errors(mapping);
  83		if (error && (error != -ENOSPC && error != -EIO))
  84			goto out;
  85
  86		/* Drop the page cache if we're repairing block mappings. */
  87		if (is_repair) {
  88			error = invalidate_inode_pages2(
  89					VFS_I(sc->ip)->i_mapping);
  90			if (error)
  91				goto out;
  92		}
  93
  94	}
  95
  96	/* Got the inode, lock it and we're ready to go. */
  97	error = xchk_trans_alloc(sc, 0);
  98	if (error)
  99		goto out;
 100
 101	error = xchk_ino_dqattach(sc);
 102	if (error)
 103		goto out;
 104
 105	xchk_ilock(sc, XFS_ILOCK_EXCL);
 106out:
 107	/* scrub teardown will unlock and release the inode */
 108	return error;
 109}
 110
 111/*
 112 * Inode fork block mapping (BMBT) scrubber.
 113 * More complex than the others because we have to scrub
 114 * all the extents regardless of whether or not the fork
 115 * is in btree format.
 116 */
 117
 118struct xchk_bmap_info {
 119	struct xfs_scrub	*sc;
 120
 121	/* Incore extent tree cursor */
 122	struct xfs_iext_cursor	icur;
 123
 124	/* Previous fork mapping that we examined */
 125	struct xfs_bmbt_irec	prev_rec;
 126
 127	/* Is this a realtime fork? */
 128	bool			is_rt;
 129
 130	/* May mappings point to shared space? */
 131	bool			is_shared;
 132
 133	/* Was the incore extent tree loaded? */
 134	bool			was_loaded;
 135
 136	/* Which inode fork are we checking? */
 137	int			whichfork;
 138};
 139
 140/* Look for a corresponding rmap for this irec. */
 141static inline bool
 142xchk_bmap_get_rmap(
 143	struct xchk_bmap_info	*info,
 144	struct xfs_bmbt_irec	*irec,
 145	xfs_agblock_t		agbno,
 146	uint64_t		owner,
 147	struct xfs_rmap_irec	*rmap)
 148{
 149	xfs_fileoff_t		offset;
 150	unsigned int		rflags = 0;
 151	int			has_rmap;
 152	int			error;
 153
 154	if (info->whichfork == XFS_ATTR_FORK)
 155		rflags |= XFS_RMAP_ATTR_FORK;
 156	if (irec->br_state == XFS_EXT_UNWRITTEN)
 157		rflags |= XFS_RMAP_UNWRITTEN;
 158
 159	/*
 160	 * CoW staging extents are owned (on disk) by the refcountbt, so
 161	 * their rmaps do not have offsets.
 162	 */
 163	if (info->whichfork == XFS_COW_FORK)
 164		offset = 0;
 165	else
 166		offset = irec->br_startoff;
 167
 168	/*
 169	 * If the caller thinks this could be a shared bmbt extent (IOWs,
 170	 * any data fork extent of a reflink inode) then we have to use the
 171	 * range rmap lookup to make sure we get the correct owner/offset.
 172	 */
 173	if (info->is_shared) {
 174		error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
 175				owner, offset, rflags, rmap, &has_rmap);
 176	} else {
 177		error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
 178				owner, offset, rflags, rmap, &has_rmap);
 179	}
 180	if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
 181		return false;
 182
 183	if (!has_rmap)
 184		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 185			irec->br_startoff);
 186	return has_rmap;
 187}
 188
 189/* Make sure that we have rmapbt records for this data/attr fork extent. */
 190STATIC void
 191xchk_bmap_xref_rmap(
 192	struct xchk_bmap_info	*info,
 193	struct xfs_bmbt_irec	*irec,
 194	xfs_agblock_t		agbno)
 195{
 196	struct xfs_rmap_irec	rmap;
 197	unsigned long long	rmap_end;
 198	uint64_t		owner = info->sc->ip->i_ino;
 199
 200	if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
 201		return;
 202
 203	/* Find the rmap record for this irec. */
 204	if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
 205		return;
 206
 207	/*
 208	 * The rmap must be an exact match for this incore file mapping record,
 209	 * which may have arisen from multiple ondisk records.
 210	 */
 211	if (rmap.rm_startblock != agbno)
 212		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 213				irec->br_startoff);
 214
 215	rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
 216	if (rmap_end != agbno + irec->br_blockcount)
 217		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 218				irec->br_startoff);
 219
 220	/* Check the logical offsets. */
 221	if (rmap.rm_offset != irec->br_startoff)
 222		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 223				irec->br_startoff);
 224
 225	rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount;
 226	if (rmap_end != irec->br_startoff + irec->br_blockcount)
 227		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 228				irec->br_startoff);
 229
 230	/* Check the owner */
 231	if (rmap.rm_owner != owner)
 232		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 233				irec->br_startoff);
 234
 235	/*
 236	 * Check for discrepancies between the unwritten flag in the irec and
 237	 * the rmap.  Note that the (in-memory) CoW fork distinguishes between
 238	 * unwritten and written extents, but we don't track that in the rmap
 239	 * records because the blocks are owned (on-disk) by the refcountbt,
 240	 * which doesn't track unwritten state.
 241	 */
 242	if (!!(irec->br_state == XFS_EXT_UNWRITTEN) !=
 243	    !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
 244		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 245				irec->br_startoff);
 246
 247	if (!!(info->whichfork == XFS_ATTR_FORK) !=
 248	    !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
 249		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 250				irec->br_startoff);
 251	if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
 252		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 253				irec->br_startoff);
 254}
 255
 256/* Make sure that we have rmapbt records for this COW fork extent. */
 257STATIC void
 258xchk_bmap_xref_rmap_cow(
 259	struct xchk_bmap_info	*info,
 260	struct xfs_bmbt_irec	*irec,
 261	xfs_agblock_t		agbno)
 262{
 263	struct xfs_rmap_irec	rmap;
 264	unsigned long long	rmap_end;
 265	uint64_t		owner = XFS_RMAP_OWN_COW;
 266
 267	if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
 268		return;
 269
 270	/* Find the rmap record for this irec. */
 271	if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
 272		return;
 273
 274	/*
 275	 * CoW staging extents are owned by the refcount btree, so the rmap
 276	 * can start before and end after the physical space allocated to this
 277	 * mapping.  There are no offsets to check.
 278	 */
 279	if (rmap.rm_startblock > agbno)
 280		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 281				irec->br_startoff);
 282
 283	rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
 284	if (rmap_end < agbno + irec->br_blockcount)
 285		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 286				irec->br_startoff);
 287
 288	/* Check the owner */
 289	if (rmap.rm_owner != owner)
 290		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 291				irec->br_startoff);
 292
 293	/*
 294	 * No flags allowed.  Note that the (in-memory) CoW fork distinguishes
 295	 * between unwritten and written extents, but we don't track that in
 296	 * the rmap records because the blocks are owned (on-disk) by the
 297	 * refcountbt, which doesn't track unwritten state.
 298	 */
 299	if (rmap.rm_flags & XFS_RMAP_ATTR_FORK)
 300		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 301				irec->br_startoff);
 302	if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
 303		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 304				irec->br_startoff);
 305	if (rmap.rm_flags & XFS_RMAP_UNWRITTEN)
 306		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 307				irec->br_startoff);
 308}
 309
 310/* Cross-reference a single rtdev extent record. */
 311STATIC void
 312xchk_bmap_rt_iextent_xref(
 313	struct xfs_inode	*ip,
 314	struct xchk_bmap_info	*info,
 315	struct xfs_bmbt_irec	*irec)
 316{
 317	xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
 318			irec->br_blockcount);
 319}
 320
 321/* Cross-reference a single datadev extent record. */
 322STATIC void
 323xchk_bmap_iextent_xref(
 324	struct xfs_inode	*ip,
 325	struct xchk_bmap_info	*info,
 326	struct xfs_bmbt_irec	*irec)
 327{
 328	struct xfs_owner_info	oinfo;
 329	struct xfs_mount	*mp = info->sc->mp;
 330	xfs_agnumber_t		agno;
 331	xfs_agblock_t		agbno;
 332	xfs_extlen_t		len;
 333	int			error;
 334
 335	agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
 336	agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
 337	len = irec->br_blockcount;
 338
 339	error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa);
 340	if (!xchk_fblock_process_error(info->sc, info->whichfork,
 341			irec->br_startoff, &error))
 342		goto out_free;
 343
 344	xchk_xref_is_used_space(info->sc, agbno, len);
 345	xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
 346	switch (info->whichfork) {
 347	case XFS_DATA_FORK:
 348		xchk_bmap_xref_rmap(info, irec, agbno);
 349		if (!xfs_is_reflink_inode(info->sc->ip)) {
 350			xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
 351					info->whichfork, irec->br_startoff);
 352			xchk_xref_is_only_owned_by(info->sc, agbno,
 353					irec->br_blockcount, &oinfo);
 354			xchk_xref_is_not_shared(info->sc, agbno,
 355					irec->br_blockcount);
 356		}
 357		xchk_xref_is_not_cow_staging(info->sc, agbno,
 358				irec->br_blockcount);
 359		break;
 360	case XFS_ATTR_FORK:
 361		xchk_bmap_xref_rmap(info, irec, agbno);
 362		xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
 363				info->whichfork, irec->br_startoff);
 364		xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
 365				&oinfo);
 366		xchk_xref_is_not_shared(info->sc, agbno,
 367				irec->br_blockcount);
 368		xchk_xref_is_not_cow_staging(info->sc, agbno,
 369				irec->br_blockcount);
 370		break;
 371	case XFS_COW_FORK:
 372		xchk_bmap_xref_rmap_cow(info, irec, agbno);
 373		xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
 374				&XFS_RMAP_OINFO_COW);
 375		xchk_xref_is_cow_staging(info->sc, agbno,
 376				irec->br_blockcount);
 377		xchk_xref_is_not_shared(info->sc, agbno,
 378				irec->br_blockcount);
 379		break;
 380	}
 381
 382out_free:
 383	xchk_ag_free(info->sc, &info->sc->sa);
 384}
 385
 386/*
 387 * Directories and attr forks should never have blocks that can't be addressed
 388 * by a xfs_dablk_t.
 389 */
 390STATIC void
 391xchk_bmap_dirattr_extent(
 392	struct xfs_inode	*ip,
 393	struct xchk_bmap_info	*info,
 394	struct xfs_bmbt_irec	*irec)
 395{
 396	struct xfs_mount	*mp = ip->i_mount;
 397	xfs_fileoff_t		off;
 398
 399	if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK)
 400		return;
 401
 402	if (!xfs_verify_dablk(mp, irec->br_startoff))
 403		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 404				irec->br_startoff);
 405
 406	off = irec->br_startoff + irec->br_blockcount - 1;
 407	if (!xfs_verify_dablk(mp, off))
 408		xchk_fblock_set_corrupt(info->sc, info->whichfork, off);
 409}
 410
 411/* Scrub a single extent record. */
 412STATIC void
 413xchk_bmap_iextent(
 414	struct xfs_inode	*ip,
 415	struct xchk_bmap_info	*info,
 416	struct xfs_bmbt_irec	*irec)
 417{
 418	struct xfs_mount	*mp = info->sc->mp;
 419
 420	/*
 421	 * Check for out-of-order extents.  This record could have come
 422	 * from the incore list, for which there is no ordering check.
 423	 */
 424	if (irec->br_startoff < info->prev_rec.br_startoff +
 425				info->prev_rec.br_blockcount)
 426		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 427				irec->br_startoff);
 428
 429	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
 430		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 431				irec->br_startoff);
 432
 433	xchk_bmap_dirattr_extent(ip, info, irec);
 434
 435	/* Make sure the extent points to a valid place. */
 436	if (info->is_rt &&
 437	    !xfs_verify_rtbext(mp, irec->br_startblock, irec->br_blockcount))
 438		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 439				irec->br_startoff);
 440	if (!info->is_rt &&
 441	    !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount))
 442		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 443				irec->br_startoff);
 444
 445	/* We don't allow unwritten extents on attr forks. */
 446	if (irec->br_state == XFS_EXT_UNWRITTEN &&
 447	    info->whichfork == XFS_ATTR_FORK)
 448		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 449				irec->br_startoff);
 450
 451	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 452		return;
 453
 454	if (info->is_rt)
 455		xchk_bmap_rt_iextent_xref(ip, info, irec);
 456	else
 457		xchk_bmap_iextent_xref(ip, info, irec);
 458}
 459
 460/* Scrub a bmbt record. */
 461STATIC int
 462xchk_bmapbt_rec(
 463	struct xchk_btree	*bs,
 464	const union xfs_btree_rec *rec)
 465{
 466	struct xfs_bmbt_irec	irec;
 467	struct xfs_bmbt_irec	iext_irec;
 468	struct xfs_iext_cursor	icur;
 469	struct xchk_bmap_info	*info = bs->private;
 470	struct xfs_inode	*ip = bs->cur->bc_ino.ip;
 471	struct xfs_buf		*bp = NULL;
 472	struct xfs_btree_block	*block;
 473	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, info->whichfork);
 474	uint64_t		owner;
 475	int			i;
 476
 477	/*
 478	 * Check the owners of the btree blocks up to the level below
 479	 * the root since the verifiers don't do that.
 480	 */
 481	if (xfs_has_crc(bs->cur->bc_mp) &&
 482	    bs->cur->bc_levels[0].ptr == 1) {
 483		for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
 484			block = xfs_btree_get_block(bs->cur, i, &bp);
 485			owner = be64_to_cpu(block->bb_u.l.bb_owner);
 486			if (owner != ip->i_ino)
 487				xchk_fblock_set_corrupt(bs->sc,
 488						info->whichfork, 0);
 489		}
 490	}
 491
 492	/*
 493	 * Check that the incore extent tree contains an extent that matches
 494	 * this one exactly.  We validate those cached bmaps later, so we don't
 495	 * need to check them here.  If the incore extent tree was just loaded
 496	 * from disk by the scrubber, we assume that its contents match what's
 497	 * on disk (we still hold the ILOCK) and skip the equivalence check.
 498	 */
 499	if (!info->was_loaded)
 500		return 0;
 501
 502	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
 503	if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) {
 504		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
 505				irec.br_startoff);
 506		return 0;
 507	}
 508
 509	if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
 510				&iext_irec) ||
 511	    irec.br_startoff != iext_irec.br_startoff ||
 512	    irec.br_startblock != iext_irec.br_startblock ||
 513	    irec.br_blockcount != iext_irec.br_blockcount ||
 514	    irec.br_state != iext_irec.br_state)
 515		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
 516				irec.br_startoff);
 517	return 0;
 518}
 519
 520/* Scan the btree records. */
 521STATIC int
 522xchk_bmap_btree(
 523	struct xfs_scrub	*sc,
 524	int			whichfork,
 525	struct xchk_bmap_info	*info)
 526{
 527	struct xfs_owner_info	oinfo;
 528	struct xfs_ifork	*ifp = xfs_ifork_ptr(sc->ip, whichfork);
 529	struct xfs_mount	*mp = sc->mp;
 530	struct xfs_inode	*ip = sc->ip;
 531	struct xfs_btree_cur	*cur;
 532	int			error;
 533
 534	/* Load the incore bmap cache if it's not loaded. */
 535	info->was_loaded = !xfs_need_iread_extents(ifp);
 536
 537	error = xfs_iread_extents(sc->tp, ip, whichfork);
 538	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
 539		goto out;
 540
 541	/* Check the btree structure. */
 542	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
 543	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
 544	error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
 545	xfs_btree_del_cursor(cur, error);
 546out:
 547	return error;
 548}
 549
 550struct xchk_bmap_check_rmap_info {
 551	struct xfs_scrub	*sc;
 552	int			whichfork;
 553	struct xfs_iext_cursor	icur;
 554};
 555
 556/* Can we find bmaps that fit this rmap? */
 557STATIC int
 558xchk_bmap_check_rmap(
 559	struct xfs_btree_cur		*cur,
 560	const struct xfs_rmap_irec	*rec,
 561	void				*priv)
 562{
 563	struct xfs_bmbt_irec		irec;
 564	struct xfs_rmap_irec		check_rec;
 565	struct xchk_bmap_check_rmap_info	*sbcri = priv;
 566	struct xfs_ifork		*ifp;
 567	struct xfs_scrub		*sc = sbcri->sc;
 568	bool				have_map;
 569
 570	/* Is this even the right fork? */
 571	if (rec->rm_owner != sc->ip->i_ino)
 572		return 0;
 573	if ((sbcri->whichfork == XFS_ATTR_FORK) ^
 574	    !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
 575		return 0;
 576	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
 577		return 0;
 578
 579	/* Now look up the bmbt record. */
 580	ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork);
 581	if (!ifp) {
 582		xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 583				rec->rm_offset);
 584		goto out;
 585	}
 586	have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
 587			&sbcri->icur, &irec);
 588	if (!have_map)
 589		xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 590				rec->rm_offset);
 591	/*
 592	 * bmap extent record lengths are constrained to 2^21 blocks in length
 593	 * because of space constraints in the on-disk metadata structure.
 594	 * However, rmap extent record lengths are constrained only by AG
 595	 * length, so we have to loop through the bmbt to make sure that the
 596	 * entire rmap is covered by bmbt records.
 597	 */
 598	check_rec = *rec;
 599	while (have_map) {
 600		if (irec.br_startoff != check_rec.rm_offset)
 601			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 602					check_rec.rm_offset);
 603		if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
 604				cur->bc_ag.pag->pag_agno,
 605				check_rec.rm_startblock))
 606			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 607					check_rec.rm_offset);
 608		if (irec.br_blockcount > check_rec.rm_blockcount)
 609			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 610					check_rec.rm_offset);
 611		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 612			break;
 613		check_rec.rm_startblock += irec.br_blockcount;
 614		check_rec.rm_offset += irec.br_blockcount;
 615		check_rec.rm_blockcount -= irec.br_blockcount;
 616		if (check_rec.rm_blockcount == 0)
 617			break;
 618		have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
 619		if (!have_map)
 620			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 621					check_rec.rm_offset);
 622	}
 623
 624out:
 625	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 626		return -ECANCELED;
 627	return 0;
 628}
 629
 630/* Make sure each rmap has a corresponding bmbt entry. */
 631STATIC int
 632xchk_bmap_check_ag_rmaps(
 633	struct xfs_scrub		*sc,
 634	int				whichfork,
 635	struct xfs_perag		*pag)
 636{
 637	struct xchk_bmap_check_rmap_info	sbcri;
 638	struct xfs_btree_cur		*cur;
 639	struct xfs_buf			*agf;
 640	int				error;
 641
 642	error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf);
 643	if (error)
 644		return error;
 645
 646	cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag);
 647
 648	sbcri.sc = sc;
 649	sbcri.whichfork = whichfork;
 650	error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
 651	if (error == -ECANCELED)
 652		error = 0;
 653
 654	xfs_btree_del_cursor(cur, error);
 655	xfs_trans_brelse(sc->tp, agf);
 656	return error;
 657}
 658
 659/*
 660 * Decide if we want to scan the reverse mappings to determine if the attr
 661 * fork /really/ has zero space mappings.
 662 */
 663STATIC bool
 664xchk_bmap_check_empty_attrfork(
 665	struct xfs_inode	*ip)
 666{
 667	struct xfs_ifork	*ifp = &ip->i_af;
 668
 669	/*
 670	 * If the dinode repair found a bad attr fork, it will reset the fork
 671	 * to extents format with zero records and wait for the this scrubber
 672	 * to reconstruct the block mappings.  If the fork is not in this
 673	 * state, then the fork cannot have been zapped.
 674	 */
 675	if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0)
 676		return false;
 677
 678	/*
 679	 * Files can have an attr fork in EXTENTS format with zero records for
 680	 * several reasons:
 681	 *
 682	 * a) an attr set created a fork but ran out of space
 683	 * b) attr replace deleted an old attr but failed during the set step
 684	 * c) the data fork was in btree format when all attrs were deleted, so
 685	 *    the fork was left in place
 686	 * d) the inode repair code zapped the fork
 687	 *
 688	 * Only in case (d) do we want to scan the rmapbt to see if we need to
 689	 * rebuild the attr fork.  The fork zap code clears all DAC permission
 690	 * bits and zeroes the uid and gid, so avoid the scan if any of those
 691	 * three conditions are not met.
 692	 */
 693	if ((VFS_I(ip)->i_mode & 0777) != 0)
 694		return false;
 695	if (!uid_eq(VFS_I(ip)->i_uid, GLOBAL_ROOT_UID))
 696		return false;
 697	if (!gid_eq(VFS_I(ip)->i_gid, GLOBAL_ROOT_GID))
 698		return false;
 699
 700	return true;
 701}
 702
 703/*
 704 * Decide if we want to scan the reverse mappings to determine if the data
 705 * fork /really/ has zero space mappings.
 706 */
 707STATIC bool
 708xchk_bmap_check_empty_datafork(
 709	struct xfs_inode	*ip)
 710{
 711	struct xfs_ifork	*ifp = &ip->i_df;
 712
 713	/* Don't support realtime rmap checks yet. */
 714	if (XFS_IS_REALTIME_INODE(ip))
 715		return false;
 716
 717	/*
 718	 * If the dinode repair found a bad data fork, it will reset the fork
 719	 * to extents format with zero records and wait for the this scrubber
 720	 * to reconstruct the block mappings.  If the fork is not in this
 721	 * state, then the fork cannot have been zapped.
 722	 */
 723	if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0)
 724		return false;
 725
 726	/*
 727	 * If we encounter an empty data fork along with evidence that the fork
 728	 * might not really be empty, we need to scan the reverse mappings to
 729	 * decide if we're going to rebuild the fork.  Data forks with nonzero
 730	 * file size are scanned.
 731	 */
 732	return i_size_read(VFS_I(ip)) != 0;
 733}
 734
 735/*
 736 * Decide if we want to walk every rmap btree in the fs to make sure that each
 737 * rmap for this file fork has corresponding bmbt entries.
 738 */
 739static bool
 740xchk_bmap_want_check_rmaps(
 741	struct xchk_bmap_info	*info)
 742{
 743	struct xfs_scrub	*sc = info->sc;
 744
 745	if (!xfs_has_rmapbt(sc->mp))
 746		return false;
 747	if (info->whichfork == XFS_COW_FORK)
 748		return false;
 749	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 750		return false;
 751
 752	if (info->whichfork == XFS_ATTR_FORK)
 753		return xchk_bmap_check_empty_attrfork(sc->ip);
 754
 755	return xchk_bmap_check_empty_datafork(sc->ip);
 756}
 757
 758/* Make sure each rmap has a corresponding bmbt entry. */
 759STATIC int
 760xchk_bmap_check_rmaps(
 761	struct xfs_scrub	*sc,
 762	int			whichfork)
 763{
 764	struct xfs_perag	*pag;
 765	xfs_agnumber_t		agno;
 766	int			error;
 767
 768	for_each_perag(sc->mp, agno, pag) {
 769		error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
 770		if (error ||
 771		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
 772			xfs_perag_rele(pag);
 773			return error;
 774		}
 775	}
 776
 777	return 0;
 778}
 779
 780/* Scrub a delalloc reservation from the incore extent map tree. */
 781STATIC void
 782xchk_bmap_iextent_delalloc(
 783	struct xfs_inode	*ip,
 784	struct xchk_bmap_info	*info,
 785	struct xfs_bmbt_irec	*irec)
 786{
 787	struct xfs_mount	*mp = info->sc->mp;
 788
 789	/*
 790	 * Check for out-of-order extents.  This record could have come
 791	 * from the incore list, for which there is no ordering check.
 792	 */
 793	if (irec->br_startoff < info->prev_rec.br_startoff +
 794				info->prev_rec.br_blockcount)
 795		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 796				irec->br_startoff);
 797
 798	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
 799		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 800				irec->br_startoff);
 801
 802	/* Make sure the extent points to a valid place. */
 803	if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
 804		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 805				irec->br_startoff);
 806}
 807
 808/* Decide if this individual fork mapping is ok. */
 809static bool
 810xchk_bmap_iext_mapping(
 811	struct xchk_bmap_info		*info,
 812	const struct xfs_bmbt_irec	*irec)
 813{
 814	/* There should never be a "hole" extent in either extent list. */
 815	if (irec->br_startblock == HOLESTARTBLOCK)
 816		return false;
 817	if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
 818		return false;
 819	return true;
 820}
 821
 822/* Are these two mappings contiguous with each other? */
 823static inline bool
 824xchk_are_bmaps_contiguous(
 825	const struct xfs_bmbt_irec	*b1,
 826	const struct xfs_bmbt_irec	*b2)
 827{
 828	/* Don't try to combine unallocated mappings. */
 829	if (!xfs_bmap_is_real_extent(b1))
 830		return false;
 831	if (!xfs_bmap_is_real_extent(b2))
 832		return false;
 833
 834	/* Does b2 come right after b1 in the logical and physical range? */
 835	if (b1->br_startoff + b1->br_blockcount != b2->br_startoff)
 836		return false;
 837	if (b1->br_startblock + b1->br_blockcount != b2->br_startblock)
 838		return false;
 839	if (b1->br_state != b2->br_state)
 840		return false;
 841	return true;
 842}
 843
 844/*
 845 * Walk the incore extent records, accumulating consecutive contiguous records
 846 * into a single incore mapping.  Returns true if @irec has been set to a
 847 * mapping or false if there are no more mappings.  Caller must ensure that
 848 * @info.icur is zeroed before the first call.
 849 */
 850static bool
 851xchk_bmap_iext_iter(
 852	struct xchk_bmap_info	*info,
 853	struct xfs_bmbt_irec	*irec)
 854{
 855	struct xfs_bmbt_irec	got;
 856	struct xfs_ifork	*ifp;
 857	unsigned int		nr = 0;
 858
 859	ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
 860
 861	/* Advance to the next iextent record and check the mapping. */
 862	xfs_iext_next(ifp, &info->icur);
 863	if (!xfs_iext_get_extent(ifp, &info->icur, irec))
 864		return false;
 865
 866	if (!xchk_bmap_iext_mapping(info, irec)) {
 867		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 868				irec->br_startoff);
 869		return false;
 870	}
 871	nr++;
 872
 873	/*
 874	 * Iterate subsequent iextent records and merge them with the one
 875	 * that we just read, if possible.
 876	 */
 877	while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
 878		if (!xchk_are_bmaps_contiguous(irec, &got))
 879			break;
 880
 881		if (!xchk_bmap_iext_mapping(info, &got)) {
 882			xchk_fblock_set_corrupt(info->sc, info->whichfork,
 883					got.br_startoff);
 884			return false;
 885		}
 886		nr++;
 887
 888		irec->br_blockcount += got.br_blockcount;
 889		xfs_iext_next(ifp, &info->icur);
 890	}
 891
 892	/*
 893	 * If the merged mapping could be expressed with fewer bmbt records
 894	 * than we actually found, notify the user that this fork could be
 895	 * optimized.  CoW forks only exist in memory so we ignore them.
 896	 */
 897	if (nr > 1 && info->whichfork != XFS_COW_FORK &&
 898	    howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr)
 899		xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
 900
 901	return true;
 902}
 903
 904/*
 905 * Scrub an inode fork's block mappings.
 906 *
 907 * First we scan every record in every btree block, if applicable.
 908 * Then we unconditionally scan the incore extent cache.
 909 */
 910STATIC int
 911xchk_bmap(
 912	struct xfs_scrub	*sc,
 913	int			whichfork)
 914{
 915	struct xfs_bmbt_irec	irec;
 916	struct xchk_bmap_info	info = { NULL };
 917	struct xfs_mount	*mp = sc->mp;
 918	struct xfs_inode	*ip = sc->ip;
 919	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
 920	xfs_fileoff_t		endoff;
 921	int			error = 0;
 922
 923	/* Non-existent forks can be ignored. */
 924	if (!ifp)
 925		return -ENOENT;
 926
 927	info.is_rt = xfs_ifork_is_realtime(ip, whichfork);
 928	info.whichfork = whichfork;
 929	info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
 930	info.sc = sc;
 931
 932	switch (whichfork) {
 933	case XFS_COW_FORK:
 934		/* No CoW forks on non-reflink filesystems. */
 935		if (!xfs_has_reflink(mp)) {
 936			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 937			return 0;
 938		}
 939		break;
 940	case XFS_ATTR_FORK:
 941		if (!xfs_has_attr(mp) && !xfs_has_attr2(mp))
 942			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 943		break;
 944	default:
 945		ASSERT(whichfork == XFS_DATA_FORK);
 946		break;
 947	}
 948
 949	/* Check the fork values */
 950	switch (ifp->if_format) {
 951	case XFS_DINODE_FMT_UUID:
 952	case XFS_DINODE_FMT_DEV:
 953	case XFS_DINODE_FMT_LOCAL:
 954		/* No mappings to check. */
 955		if (whichfork == XFS_COW_FORK)
 956			xchk_fblock_set_corrupt(sc, whichfork, 0);
 957		return 0;
 958	case XFS_DINODE_FMT_EXTENTS:
 959		break;
 960	case XFS_DINODE_FMT_BTREE:
 961		if (whichfork == XFS_COW_FORK) {
 962			xchk_fblock_set_corrupt(sc, whichfork, 0);
 963			return 0;
 964		}
 965
 966		error = xchk_bmap_btree(sc, whichfork, &info);
 967		if (error)
 968			return error;
 969		break;
 970	default:
 971		xchk_fblock_set_corrupt(sc, whichfork, 0);
 972		return 0;
 973	}
 974
 975	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 976		return 0;
 977
 978	/* Find the offset of the last extent in the mapping. */
 979	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
 980	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
 981		return error;
 982
 983	/*
 984	 * Scrub extent records.  We use a special iterator function here that
 985	 * combines adjacent mappings if they are logically and physically
 986	 * contiguous.   For large allocations that require multiple bmbt
 987	 * records, this reduces the number of cross-referencing calls, which
 988	 * reduces runtime.  Cross referencing with the rmap is simpler because
 989	 * the rmap must match the combined mapping exactly.
 990	 */
 991	while (xchk_bmap_iext_iter(&info, &irec)) {
 992		if (xchk_should_terminate(sc, &error) ||
 993		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 994			return 0;
 995
 996		if (irec.br_startoff >= endoff) {
 997			xchk_fblock_set_corrupt(sc, whichfork,
 998					irec.br_startoff);
 999			return 0;
1000		}
1001
1002		if (isnullstartblock(irec.br_startblock))
1003			xchk_bmap_iextent_delalloc(ip, &info, &irec);
1004		else
1005			xchk_bmap_iextent(ip, &info, &irec);
1006		memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec));
1007	}
1008
1009	if (xchk_bmap_want_check_rmaps(&info)) {
1010		error = xchk_bmap_check_rmaps(sc, whichfork);
1011		if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
1012			return error;
1013	}
1014
1015	return 0;
1016}
1017
1018/* Scrub an inode's data fork. */
1019int
1020xchk_bmap_data(
1021	struct xfs_scrub	*sc)
1022{
1023	int			error;
1024
1025	if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTD_ZAPPED)) {
1026		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1027		return 0;
1028	}
1029
1030	error = xchk_bmap(sc, XFS_DATA_FORK);
1031	if (error)
1032		return error;
1033
1034	/* If the data fork is clean, it is clearly not zapped. */
1035	xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTD_ZAPPED);
1036	return 0;
1037}
1038
1039/* Scrub an inode's attr fork. */
1040int
1041xchk_bmap_attr(
1042	struct xfs_scrub	*sc)
1043{
1044	int			error;
1045
1046	/*
1047	 * If the attr fork has been zapped, it's possible that forkoff was
1048	 * reset to zero and hence sc->ip->i_afp is NULL.  We don't want the
1049	 * NULL ifp check in xchk_bmap to conclude that the attr fork is ok,
1050	 * so short circuit that logic by setting the corruption flag and
1051	 * returning immediately.
1052	 */
1053	if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTA_ZAPPED)) {
1054		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1055		return 0;
1056	}
1057
1058	error = xchk_bmap(sc, XFS_ATTR_FORK);
1059	if (error)
1060		return error;
1061
1062	/* If the attr fork is clean, it is clearly not zapped. */
1063	xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTA_ZAPPED);
1064	return 0;
1065}
1066
1067/* Scrub an inode's CoW fork. */
1068int
1069xchk_bmap_cow(
1070	struct xfs_scrub	*sc)
1071{
1072	return xchk_bmap(sc, XFS_COW_FORK);
1073}
v6.8
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
   4 * Author: Darrick J. Wong <djwong@kernel.org>
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_trans_resv.h"
  11#include "xfs_mount.h"
  12#include "xfs_btree.h"
  13#include "xfs_bit.h"
  14#include "xfs_log_format.h"
  15#include "xfs_trans.h"
  16#include "xfs_inode.h"
  17#include "xfs_alloc.h"
  18#include "xfs_bmap.h"
  19#include "xfs_bmap_btree.h"
  20#include "xfs_rmap.h"
  21#include "xfs_rmap_btree.h"
  22#include "xfs_health.h"
  23#include "scrub/scrub.h"
  24#include "scrub/common.h"
  25#include "scrub/btree.h"
  26#include "scrub/health.h"
  27#include "xfs_ag.h"
  28
  29/* Set us up with an inode's bmap. */
  30int
  31xchk_setup_inode_bmap(
  32	struct xfs_scrub	*sc)
  33{
  34	int			error;
  35
  36	if (xchk_need_intent_drain(sc))
  37		xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
  38
  39	error = xchk_iget_for_scrubbing(sc);
  40	if (error)
  41		goto out;
  42
  43	xchk_ilock(sc, XFS_IOLOCK_EXCL);
  44
  45	/*
  46	 * We don't want any ephemeral data/cow fork updates sitting around
  47	 * while we inspect block mappings, so wait for directio to finish
  48	 * and flush dirty data if we have delalloc reservations.
  49	 */
  50	if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
  51	    sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
  52		struct address_space	*mapping = VFS_I(sc->ip)->i_mapping;
  53		bool			is_repair = xchk_could_repair(sc);
  54
  55		xchk_ilock(sc, XFS_MMAPLOCK_EXCL);
  56
  57		/* Break all our leases, we're going to mess with things. */
  58		if (is_repair) {
  59			error = xfs_break_layouts(VFS_I(sc->ip),
  60					&sc->ilock_flags, BREAK_WRITE);
  61			if (error)
  62				goto out;
  63		}
  64
  65		inode_dio_wait(VFS_I(sc->ip));
  66
  67		/*
  68		 * Try to flush all incore state to disk before we examine the
  69		 * space mappings for the data fork.  Leave accumulated errors
  70		 * in the mapping for the writer threads to consume.
  71		 *
  72		 * On ENOSPC or EIO writeback errors, we continue into the
  73		 * extent mapping checks because write failures do not
  74		 * necessarily imply anything about the correctness of the file
  75		 * metadata.  The metadata and the file data could be on
  76		 * completely separate devices; a media failure might only
  77		 * affect a subset of the disk, etc.  We can handle delalloc
  78		 * extents in the scrubber, so leaving them in memory is fine.
  79		 */
  80		error = filemap_fdatawrite(mapping);
  81		if (!error)
  82			error = filemap_fdatawait_keep_errors(mapping);
  83		if (error && (error != -ENOSPC && error != -EIO))
  84			goto out;
  85
  86		/* Drop the page cache if we're repairing block mappings. */
  87		if (is_repair) {
  88			error = invalidate_inode_pages2(
  89					VFS_I(sc->ip)->i_mapping);
  90			if (error)
  91				goto out;
  92		}
  93
  94	}
  95
  96	/* Got the inode, lock it and we're ready to go. */
  97	error = xchk_trans_alloc(sc, 0);
  98	if (error)
  99		goto out;
 100
 101	error = xchk_ino_dqattach(sc);
 102	if (error)
 103		goto out;
 104
 105	xchk_ilock(sc, XFS_ILOCK_EXCL);
 106out:
 107	/* scrub teardown will unlock and release the inode */
 108	return error;
 109}
 110
 111/*
 112 * Inode fork block mapping (BMBT) scrubber.
 113 * More complex than the others because we have to scrub
 114 * all the extents regardless of whether or not the fork
 115 * is in btree format.
 116 */
 117
 118struct xchk_bmap_info {
 119	struct xfs_scrub	*sc;
 120
 121	/* Incore extent tree cursor */
 122	struct xfs_iext_cursor	icur;
 123
 124	/* Previous fork mapping that we examined */
 125	struct xfs_bmbt_irec	prev_rec;
 126
 127	/* Is this a realtime fork? */
 128	bool			is_rt;
 129
 130	/* May mappings point to shared space? */
 131	bool			is_shared;
 132
 133	/* Was the incore extent tree loaded? */
 134	bool			was_loaded;
 135
 136	/* Which inode fork are we checking? */
 137	int			whichfork;
 138};
 139
 140/* Look for a corresponding rmap for this irec. */
 141static inline bool
 142xchk_bmap_get_rmap(
 143	struct xchk_bmap_info	*info,
 144	struct xfs_bmbt_irec	*irec,
 145	xfs_agblock_t		agbno,
 146	uint64_t		owner,
 147	struct xfs_rmap_irec	*rmap)
 148{
 149	xfs_fileoff_t		offset;
 150	unsigned int		rflags = 0;
 151	int			has_rmap;
 152	int			error;
 153
 154	if (info->whichfork == XFS_ATTR_FORK)
 155		rflags |= XFS_RMAP_ATTR_FORK;
 156	if (irec->br_state == XFS_EXT_UNWRITTEN)
 157		rflags |= XFS_RMAP_UNWRITTEN;
 158
 159	/*
 160	 * CoW staging extents are owned (on disk) by the refcountbt, so
 161	 * their rmaps do not have offsets.
 162	 */
 163	if (info->whichfork == XFS_COW_FORK)
 164		offset = 0;
 165	else
 166		offset = irec->br_startoff;
 167
 168	/*
 169	 * If the caller thinks this could be a shared bmbt extent (IOWs,
 170	 * any data fork extent of a reflink inode) then we have to use the
 171	 * range rmap lookup to make sure we get the correct owner/offset.
 172	 */
 173	if (info->is_shared) {
 174		error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
 175				owner, offset, rflags, rmap, &has_rmap);
 176	} else {
 177		error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
 178				owner, offset, rflags, rmap, &has_rmap);
 179	}
 180	if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
 181		return false;
 182
 183	if (!has_rmap)
 184		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 185			irec->br_startoff);
 186	return has_rmap;
 187}
 188
 189/* Make sure that we have rmapbt records for this data/attr fork extent. */
 190STATIC void
 191xchk_bmap_xref_rmap(
 192	struct xchk_bmap_info	*info,
 193	struct xfs_bmbt_irec	*irec,
 194	xfs_agblock_t		agbno)
 195{
 196	struct xfs_rmap_irec	rmap;
 197	unsigned long long	rmap_end;
 198	uint64_t		owner = info->sc->ip->i_ino;
 199
 200	if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
 201		return;
 202
 203	/* Find the rmap record for this irec. */
 204	if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
 205		return;
 206
 207	/*
 208	 * The rmap must be an exact match for this incore file mapping record,
 209	 * which may have arisen from multiple ondisk records.
 210	 */
 211	if (rmap.rm_startblock != agbno)
 212		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 213				irec->br_startoff);
 214
 215	rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
 216	if (rmap_end != agbno + irec->br_blockcount)
 217		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 218				irec->br_startoff);
 219
 220	/* Check the logical offsets. */
 221	if (rmap.rm_offset != irec->br_startoff)
 222		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 223				irec->br_startoff);
 224
 225	rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount;
 226	if (rmap_end != irec->br_startoff + irec->br_blockcount)
 227		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 228				irec->br_startoff);
 229
 230	/* Check the owner */
 231	if (rmap.rm_owner != owner)
 232		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 233				irec->br_startoff);
 234
 235	/*
 236	 * Check for discrepancies between the unwritten flag in the irec and
 237	 * the rmap.  Note that the (in-memory) CoW fork distinguishes between
 238	 * unwritten and written extents, but we don't track that in the rmap
 239	 * records because the blocks are owned (on-disk) by the refcountbt,
 240	 * which doesn't track unwritten state.
 241	 */
 242	if (!!(irec->br_state == XFS_EXT_UNWRITTEN) !=
 243	    !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
 244		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 245				irec->br_startoff);
 246
 247	if (!!(info->whichfork == XFS_ATTR_FORK) !=
 248	    !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
 249		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 250				irec->br_startoff);
 251	if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
 252		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 253				irec->br_startoff);
 254}
 255
 256/* Make sure that we have rmapbt records for this COW fork extent. */
 257STATIC void
 258xchk_bmap_xref_rmap_cow(
 259	struct xchk_bmap_info	*info,
 260	struct xfs_bmbt_irec	*irec,
 261	xfs_agblock_t		agbno)
 262{
 263	struct xfs_rmap_irec	rmap;
 264	unsigned long long	rmap_end;
 265	uint64_t		owner = XFS_RMAP_OWN_COW;
 266
 267	if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
 268		return;
 269
 270	/* Find the rmap record for this irec. */
 271	if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
 272		return;
 273
 274	/*
 275	 * CoW staging extents are owned by the refcount btree, so the rmap
 276	 * can start before and end after the physical space allocated to this
 277	 * mapping.  There are no offsets to check.
 278	 */
 279	if (rmap.rm_startblock > agbno)
 280		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 281				irec->br_startoff);
 282
 283	rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
 284	if (rmap_end < agbno + irec->br_blockcount)
 285		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 286				irec->br_startoff);
 287
 288	/* Check the owner */
 289	if (rmap.rm_owner != owner)
 290		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 291				irec->br_startoff);
 292
 293	/*
 294	 * No flags allowed.  Note that the (in-memory) CoW fork distinguishes
 295	 * between unwritten and written extents, but we don't track that in
 296	 * the rmap records because the blocks are owned (on-disk) by the
 297	 * refcountbt, which doesn't track unwritten state.
 298	 */
 299	if (rmap.rm_flags & XFS_RMAP_ATTR_FORK)
 300		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 301				irec->br_startoff);
 302	if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
 303		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 304				irec->br_startoff);
 305	if (rmap.rm_flags & XFS_RMAP_UNWRITTEN)
 306		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 307				irec->br_startoff);
 308}
 309
 310/* Cross-reference a single rtdev extent record. */
 311STATIC void
 312xchk_bmap_rt_iextent_xref(
 313	struct xfs_inode	*ip,
 314	struct xchk_bmap_info	*info,
 315	struct xfs_bmbt_irec	*irec)
 316{
 317	xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
 318			irec->br_blockcount);
 319}
 320
 321/* Cross-reference a single datadev extent record. */
 322STATIC void
 323xchk_bmap_iextent_xref(
 324	struct xfs_inode	*ip,
 325	struct xchk_bmap_info	*info,
 326	struct xfs_bmbt_irec	*irec)
 327{
 328	struct xfs_owner_info	oinfo;
 329	struct xfs_mount	*mp = info->sc->mp;
 330	xfs_agnumber_t		agno;
 331	xfs_agblock_t		agbno;
 332	xfs_extlen_t		len;
 333	int			error;
 334
 335	agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
 336	agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
 337	len = irec->br_blockcount;
 338
 339	error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa);
 340	if (!xchk_fblock_process_error(info->sc, info->whichfork,
 341			irec->br_startoff, &error))
 342		goto out_free;
 343
 344	xchk_xref_is_used_space(info->sc, agbno, len);
 345	xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
 346	switch (info->whichfork) {
 347	case XFS_DATA_FORK:
 348		xchk_bmap_xref_rmap(info, irec, agbno);
 349		if (!xfs_is_reflink_inode(info->sc->ip)) {
 350			xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
 351					info->whichfork, irec->br_startoff);
 352			xchk_xref_is_only_owned_by(info->sc, agbno,
 353					irec->br_blockcount, &oinfo);
 354			xchk_xref_is_not_shared(info->sc, agbno,
 355					irec->br_blockcount);
 356		}
 357		xchk_xref_is_not_cow_staging(info->sc, agbno,
 358				irec->br_blockcount);
 359		break;
 360	case XFS_ATTR_FORK:
 361		xchk_bmap_xref_rmap(info, irec, agbno);
 362		xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
 363				info->whichfork, irec->br_startoff);
 364		xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
 365				&oinfo);
 366		xchk_xref_is_not_shared(info->sc, agbno,
 367				irec->br_blockcount);
 368		xchk_xref_is_not_cow_staging(info->sc, agbno,
 369				irec->br_blockcount);
 370		break;
 371	case XFS_COW_FORK:
 372		xchk_bmap_xref_rmap_cow(info, irec, agbno);
 373		xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
 374				&XFS_RMAP_OINFO_COW);
 375		xchk_xref_is_cow_staging(info->sc, agbno,
 376				irec->br_blockcount);
 377		xchk_xref_is_not_shared(info->sc, agbno,
 378				irec->br_blockcount);
 379		break;
 380	}
 381
 382out_free:
 383	xchk_ag_free(info->sc, &info->sc->sa);
 384}
 385
 386/*
 387 * Directories and attr forks should never have blocks that can't be addressed
 388 * by a xfs_dablk_t.
 389 */
 390STATIC void
 391xchk_bmap_dirattr_extent(
 392	struct xfs_inode	*ip,
 393	struct xchk_bmap_info	*info,
 394	struct xfs_bmbt_irec	*irec)
 395{
 396	struct xfs_mount	*mp = ip->i_mount;
 397	xfs_fileoff_t		off;
 398
 399	if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK)
 400		return;
 401
 402	if (!xfs_verify_dablk(mp, irec->br_startoff))
 403		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 404				irec->br_startoff);
 405
 406	off = irec->br_startoff + irec->br_blockcount - 1;
 407	if (!xfs_verify_dablk(mp, off))
 408		xchk_fblock_set_corrupt(info->sc, info->whichfork, off);
 409}
 410
 411/* Scrub a single extent record. */
 412STATIC void
 413xchk_bmap_iextent(
 414	struct xfs_inode	*ip,
 415	struct xchk_bmap_info	*info,
 416	struct xfs_bmbt_irec	*irec)
 417{
 418	struct xfs_mount	*mp = info->sc->mp;
 419
 420	/*
 421	 * Check for out-of-order extents.  This record could have come
 422	 * from the incore list, for which there is no ordering check.
 423	 */
 424	if (irec->br_startoff < info->prev_rec.br_startoff +
 425				info->prev_rec.br_blockcount)
 426		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 427				irec->br_startoff);
 428
 429	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
 430		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 431				irec->br_startoff);
 432
 433	xchk_bmap_dirattr_extent(ip, info, irec);
 434
 435	/* Make sure the extent points to a valid place. */
 436	if (info->is_rt &&
 437	    !xfs_verify_rtbext(mp, irec->br_startblock, irec->br_blockcount))
 438		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 439				irec->br_startoff);
 440	if (!info->is_rt &&
 441	    !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount))
 442		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 443				irec->br_startoff);
 444
 445	/* We don't allow unwritten extents on attr forks. */
 446	if (irec->br_state == XFS_EXT_UNWRITTEN &&
 447	    info->whichfork == XFS_ATTR_FORK)
 448		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 449				irec->br_startoff);
 450
 451	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 452		return;
 453
 454	if (info->is_rt)
 455		xchk_bmap_rt_iextent_xref(ip, info, irec);
 456	else
 457		xchk_bmap_iextent_xref(ip, info, irec);
 458}
 459
 460/* Scrub a bmbt record. */
 461STATIC int
 462xchk_bmapbt_rec(
 463	struct xchk_btree	*bs,
 464	const union xfs_btree_rec *rec)
 465{
 466	struct xfs_bmbt_irec	irec;
 467	struct xfs_bmbt_irec	iext_irec;
 468	struct xfs_iext_cursor	icur;
 469	struct xchk_bmap_info	*info = bs->private;
 470	struct xfs_inode	*ip = bs->cur->bc_ino.ip;
 471	struct xfs_buf		*bp = NULL;
 472	struct xfs_btree_block	*block;
 473	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, info->whichfork);
 474	uint64_t		owner;
 475	int			i;
 476
 477	/*
 478	 * Check the owners of the btree blocks up to the level below
 479	 * the root since the verifiers don't do that.
 480	 */
 481	if (xfs_has_crc(bs->cur->bc_mp) &&
 482	    bs->cur->bc_levels[0].ptr == 1) {
 483		for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
 484			block = xfs_btree_get_block(bs->cur, i, &bp);
 485			owner = be64_to_cpu(block->bb_u.l.bb_owner);
 486			if (owner != ip->i_ino)
 487				xchk_fblock_set_corrupt(bs->sc,
 488						info->whichfork, 0);
 489		}
 490	}
 491
 492	/*
 493	 * Check that the incore extent tree contains an extent that matches
 494	 * this one exactly.  We validate those cached bmaps later, so we don't
 495	 * need to check them here.  If the incore extent tree was just loaded
 496	 * from disk by the scrubber, we assume that its contents match what's
 497	 * on disk (we still hold the ILOCK) and skip the equivalence check.
 498	 */
 499	if (!info->was_loaded)
 500		return 0;
 501
 502	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
 503	if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) {
 504		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
 505				irec.br_startoff);
 506		return 0;
 507	}
 508
 509	if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
 510				&iext_irec) ||
 511	    irec.br_startoff != iext_irec.br_startoff ||
 512	    irec.br_startblock != iext_irec.br_startblock ||
 513	    irec.br_blockcount != iext_irec.br_blockcount ||
 514	    irec.br_state != iext_irec.br_state)
 515		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
 516				irec.br_startoff);
 517	return 0;
 518}
 519
 520/* Scan the btree records. */
 521STATIC int
 522xchk_bmap_btree(
 523	struct xfs_scrub	*sc,
 524	int			whichfork,
 525	struct xchk_bmap_info	*info)
 526{
 527	struct xfs_owner_info	oinfo;
 528	struct xfs_ifork	*ifp = xfs_ifork_ptr(sc->ip, whichfork);
 529	struct xfs_mount	*mp = sc->mp;
 530	struct xfs_inode	*ip = sc->ip;
 531	struct xfs_btree_cur	*cur;
 532	int			error;
 533
 534	/* Load the incore bmap cache if it's not loaded. */
 535	info->was_loaded = !xfs_need_iread_extents(ifp);
 536
 537	error = xfs_iread_extents(sc->tp, ip, whichfork);
 538	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
 539		goto out;
 540
 541	/* Check the btree structure. */
 542	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
 543	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
 544	error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
 545	xfs_btree_del_cursor(cur, error);
 546out:
 547	return error;
 548}
 549
 550struct xchk_bmap_check_rmap_info {
 551	struct xfs_scrub	*sc;
 552	int			whichfork;
 553	struct xfs_iext_cursor	icur;
 554};
 555
 556/* Can we find bmaps that fit this rmap? */
 557STATIC int
 558xchk_bmap_check_rmap(
 559	struct xfs_btree_cur		*cur,
 560	const struct xfs_rmap_irec	*rec,
 561	void				*priv)
 562{
 563	struct xfs_bmbt_irec		irec;
 564	struct xfs_rmap_irec		check_rec;
 565	struct xchk_bmap_check_rmap_info	*sbcri = priv;
 566	struct xfs_ifork		*ifp;
 567	struct xfs_scrub		*sc = sbcri->sc;
 568	bool				have_map;
 569
 570	/* Is this even the right fork? */
 571	if (rec->rm_owner != sc->ip->i_ino)
 572		return 0;
 573	if ((sbcri->whichfork == XFS_ATTR_FORK) ^
 574	    !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
 575		return 0;
 576	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
 577		return 0;
 578
 579	/* Now look up the bmbt record. */
 580	ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork);
 581	if (!ifp) {
 582		xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 583				rec->rm_offset);
 584		goto out;
 585	}
 586	have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
 587			&sbcri->icur, &irec);
 588	if (!have_map)
 589		xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 590				rec->rm_offset);
 591	/*
 592	 * bmap extent record lengths are constrained to 2^21 blocks in length
 593	 * because of space constraints in the on-disk metadata structure.
 594	 * However, rmap extent record lengths are constrained only by AG
 595	 * length, so we have to loop through the bmbt to make sure that the
 596	 * entire rmap is covered by bmbt records.
 597	 */
 598	check_rec = *rec;
 599	while (have_map) {
 600		if (irec.br_startoff != check_rec.rm_offset)
 601			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 602					check_rec.rm_offset);
 603		if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
 604				cur->bc_ag.pag->pag_agno,
 605				check_rec.rm_startblock))
 606			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 607					check_rec.rm_offset);
 608		if (irec.br_blockcount > check_rec.rm_blockcount)
 609			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 610					check_rec.rm_offset);
 611		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 612			break;
 613		check_rec.rm_startblock += irec.br_blockcount;
 614		check_rec.rm_offset += irec.br_blockcount;
 615		check_rec.rm_blockcount -= irec.br_blockcount;
 616		if (check_rec.rm_blockcount == 0)
 617			break;
 618		have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
 619		if (!have_map)
 620			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 621					check_rec.rm_offset);
 622	}
 623
 624out:
 625	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 626		return -ECANCELED;
 627	return 0;
 628}
 629
 630/* Make sure each rmap has a corresponding bmbt entry. */
 631STATIC int
 632xchk_bmap_check_ag_rmaps(
 633	struct xfs_scrub		*sc,
 634	int				whichfork,
 635	struct xfs_perag		*pag)
 636{
 637	struct xchk_bmap_check_rmap_info	sbcri;
 638	struct xfs_btree_cur		*cur;
 639	struct xfs_buf			*agf;
 640	int				error;
 641
 642	error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf);
 643	if (error)
 644		return error;
 645
 646	cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag);
 647
 648	sbcri.sc = sc;
 649	sbcri.whichfork = whichfork;
 650	error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
 651	if (error == -ECANCELED)
 652		error = 0;
 653
 654	xfs_btree_del_cursor(cur, error);
 655	xfs_trans_brelse(sc->tp, agf);
 656	return error;
 657}
 658
 659/*
 660 * Decide if we want to scan the reverse mappings to determine if the attr
 661 * fork /really/ has zero space mappings.
 662 */
 663STATIC bool
 664xchk_bmap_check_empty_attrfork(
 665	struct xfs_inode	*ip)
 666{
 667	struct xfs_ifork	*ifp = &ip->i_af;
 668
 669	/*
 670	 * If the dinode repair found a bad attr fork, it will reset the fork
 671	 * to extents format with zero records and wait for the this scrubber
 672	 * to reconstruct the block mappings.  If the fork is not in this
 673	 * state, then the fork cannot have been zapped.
 674	 */
 675	if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0)
 676		return false;
 677
 678	/*
 679	 * Files can have an attr fork in EXTENTS format with zero records for
 680	 * several reasons:
 681	 *
 682	 * a) an attr set created a fork but ran out of space
 683	 * b) attr replace deleted an old attr but failed during the set step
 684	 * c) the data fork was in btree format when all attrs were deleted, so
 685	 *    the fork was left in place
 686	 * d) the inode repair code zapped the fork
 687	 *
 688	 * Only in case (d) do we want to scan the rmapbt to see if we need to
 689	 * rebuild the attr fork.  The fork zap code clears all DAC permission
 690	 * bits and zeroes the uid and gid, so avoid the scan if any of those
 691	 * three conditions are not met.
 692	 */
 693	if ((VFS_I(ip)->i_mode & 0777) != 0)
 694		return false;
 695	if (!uid_eq(VFS_I(ip)->i_uid, GLOBAL_ROOT_UID))
 696		return false;
 697	if (!gid_eq(VFS_I(ip)->i_gid, GLOBAL_ROOT_GID))
 698		return false;
 699
 700	return true;
 701}
 702
 703/*
 704 * Decide if we want to scan the reverse mappings to determine if the data
 705 * fork /really/ has zero space mappings.
 706 */
 707STATIC bool
 708xchk_bmap_check_empty_datafork(
 709	struct xfs_inode	*ip)
 710{
 711	struct xfs_ifork	*ifp = &ip->i_df;
 712
 713	/* Don't support realtime rmap checks yet. */
 714	if (XFS_IS_REALTIME_INODE(ip))
 715		return false;
 716
 717	/*
 718	 * If the dinode repair found a bad data fork, it will reset the fork
 719	 * to extents format with zero records and wait for the this scrubber
 720	 * to reconstruct the block mappings.  If the fork is not in this
 721	 * state, then the fork cannot have been zapped.
 722	 */
 723	if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0)
 724		return false;
 725
 726	/*
 727	 * If we encounter an empty data fork along with evidence that the fork
 728	 * might not really be empty, we need to scan the reverse mappings to
 729	 * decide if we're going to rebuild the fork.  Data forks with nonzero
 730	 * file size are scanned.
 731	 */
 732	return i_size_read(VFS_I(ip)) != 0;
 733}
 734
 735/*
 736 * Decide if we want to walk every rmap btree in the fs to make sure that each
 737 * rmap for this file fork has corresponding bmbt entries.
 738 */
 739static bool
 740xchk_bmap_want_check_rmaps(
 741	struct xchk_bmap_info	*info)
 742{
 743	struct xfs_scrub	*sc = info->sc;
 744
 745	if (!xfs_has_rmapbt(sc->mp))
 746		return false;
 747	if (info->whichfork == XFS_COW_FORK)
 748		return false;
 749	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 750		return false;
 751
 752	if (info->whichfork == XFS_ATTR_FORK)
 753		return xchk_bmap_check_empty_attrfork(sc->ip);
 754
 755	return xchk_bmap_check_empty_datafork(sc->ip);
 756}
 757
 758/* Make sure each rmap has a corresponding bmbt entry. */
 759STATIC int
 760xchk_bmap_check_rmaps(
 761	struct xfs_scrub	*sc,
 762	int			whichfork)
 763{
 764	struct xfs_perag	*pag;
 765	xfs_agnumber_t		agno;
 766	int			error;
 767
 768	for_each_perag(sc->mp, agno, pag) {
 769		error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
 770		if (error ||
 771		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
 772			xfs_perag_rele(pag);
 773			return error;
 774		}
 775	}
 776
 777	return 0;
 778}
 779
 780/* Scrub a delalloc reservation from the incore extent map tree. */
 781STATIC void
 782xchk_bmap_iextent_delalloc(
 783	struct xfs_inode	*ip,
 784	struct xchk_bmap_info	*info,
 785	struct xfs_bmbt_irec	*irec)
 786{
 787	struct xfs_mount	*mp = info->sc->mp;
 788
 789	/*
 790	 * Check for out-of-order extents.  This record could have come
 791	 * from the incore list, for which there is no ordering check.
 792	 */
 793	if (irec->br_startoff < info->prev_rec.br_startoff +
 794				info->prev_rec.br_blockcount)
 795		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 796				irec->br_startoff);
 797
 798	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
 799		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 800				irec->br_startoff);
 801
 802	/* Make sure the extent points to a valid place. */
 803	if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
 804		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 805				irec->br_startoff);
 806}
 807
 808/* Decide if this individual fork mapping is ok. */
 809static bool
 810xchk_bmap_iext_mapping(
 811	struct xchk_bmap_info		*info,
 812	const struct xfs_bmbt_irec	*irec)
 813{
 814	/* There should never be a "hole" extent in either extent list. */
 815	if (irec->br_startblock == HOLESTARTBLOCK)
 816		return false;
 817	if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
 818		return false;
 819	return true;
 820}
 821
 822/* Are these two mappings contiguous with each other? */
 823static inline bool
 824xchk_are_bmaps_contiguous(
 825	const struct xfs_bmbt_irec	*b1,
 826	const struct xfs_bmbt_irec	*b2)
 827{
 828	/* Don't try to combine unallocated mappings. */
 829	if (!xfs_bmap_is_real_extent(b1))
 830		return false;
 831	if (!xfs_bmap_is_real_extent(b2))
 832		return false;
 833
 834	/* Does b2 come right after b1 in the logical and physical range? */
 835	if (b1->br_startoff + b1->br_blockcount != b2->br_startoff)
 836		return false;
 837	if (b1->br_startblock + b1->br_blockcount != b2->br_startblock)
 838		return false;
 839	if (b1->br_state != b2->br_state)
 840		return false;
 841	return true;
 842}
 843
 844/*
 845 * Walk the incore extent records, accumulating consecutive contiguous records
 846 * into a single incore mapping.  Returns true if @irec has been set to a
 847 * mapping or false if there are no more mappings.  Caller must ensure that
 848 * @info.icur is zeroed before the first call.
 849 */
 850static bool
 851xchk_bmap_iext_iter(
 852	struct xchk_bmap_info	*info,
 853	struct xfs_bmbt_irec	*irec)
 854{
 855	struct xfs_bmbt_irec	got;
 856	struct xfs_ifork	*ifp;
 857	unsigned int		nr = 0;
 858
 859	ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
 860
 861	/* Advance to the next iextent record and check the mapping. */
 862	xfs_iext_next(ifp, &info->icur);
 863	if (!xfs_iext_get_extent(ifp, &info->icur, irec))
 864		return false;
 865
 866	if (!xchk_bmap_iext_mapping(info, irec)) {
 867		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 868				irec->br_startoff);
 869		return false;
 870	}
 871	nr++;
 872
 873	/*
 874	 * Iterate subsequent iextent records and merge them with the one
 875	 * that we just read, if possible.
 876	 */
 877	while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
 878		if (!xchk_are_bmaps_contiguous(irec, &got))
 879			break;
 880
 881		if (!xchk_bmap_iext_mapping(info, &got)) {
 882			xchk_fblock_set_corrupt(info->sc, info->whichfork,
 883					got.br_startoff);
 884			return false;
 885		}
 886		nr++;
 887
 888		irec->br_blockcount += got.br_blockcount;
 889		xfs_iext_next(ifp, &info->icur);
 890	}
 891
 892	/*
 893	 * If the merged mapping could be expressed with fewer bmbt records
 894	 * than we actually found, notify the user that this fork could be
 895	 * optimized.  CoW forks only exist in memory so we ignore them.
 896	 */
 897	if (nr > 1 && info->whichfork != XFS_COW_FORK &&
 898	    howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr)
 899		xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
 900
 901	return true;
 902}
 903
 904/*
 905 * Scrub an inode fork's block mappings.
 906 *
 907 * First we scan every record in every btree block, if applicable.
 908 * Then we unconditionally scan the incore extent cache.
 909 */
 910STATIC int
 911xchk_bmap(
 912	struct xfs_scrub	*sc,
 913	int			whichfork)
 914{
 915	struct xfs_bmbt_irec	irec;
 916	struct xchk_bmap_info	info = { NULL };
 917	struct xfs_mount	*mp = sc->mp;
 918	struct xfs_inode	*ip = sc->ip;
 919	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
 920	xfs_fileoff_t		endoff;
 921	int			error = 0;
 922
 923	/* Non-existent forks can be ignored. */
 924	if (!ifp)
 925		return -ENOENT;
 926
 927	info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
 928	info.whichfork = whichfork;
 929	info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
 930	info.sc = sc;
 931
 932	switch (whichfork) {
 933	case XFS_COW_FORK:
 934		/* No CoW forks on non-reflink filesystems. */
 935		if (!xfs_has_reflink(mp)) {
 936			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 937			return 0;
 938		}
 939		break;
 940	case XFS_ATTR_FORK:
 941		if (!xfs_has_attr(mp) && !xfs_has_attr2(mp))
 942			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 943		break;
 944	default:
 945		ASSERT(whichfork == XFS_DATA_FORK);
 946		break;
 947	}
 948
 949	/* Check the fork values */
 950	switch (ifp->if_format) {
 951	case XFS_DINODE_FMT_UUID:
 952	case XFS_DINODE_FMT_DEV:
 953	case XFS_DINODE_FMT_LOCAL:
 954		/* No mappings to check. */
 955		if (whichfork == XFS_COW_FORK)
 956			xchk_fblock_set_corrupt(sc, whichfork, 0);
 957		return 0;
 958	case XFS_DINODE_FMT_EXTENTS:
 959		break;
 960	case XFS_DINODE_FMT_BTREE:
 961		if (whichfork == XFS_COW_FORK) {
 962			xchk_fblock_set_corrupt(sc, whichfork, 0);
 963			return 0;
 964		}
 965
 966		error = xchk_bmap_btree(sc, whichfork, &info);
 967		if (error)
 968			return error;
 969		break;
 970	default:
 971		xchk_fblock_set_corrupt(sc, whichfork, 0);
 972		return 0;
 973	}
 974
 975	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 976		return 0;
 977
 978	/* Find the offset of the last extent in the mapping. */
 979	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
 980	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
 981		return error;
 982
 983	/*
 984	 * Scrub extent records.  We use a special iterator function here that
 985	 * combines adjacent mappings if they are logically and physically
 986	 * contiguous.   For large allocations that require multiple bmbt
 987	 * records, this reduces the number of cross-referencing calls, which
 988	 * reduces runtime.  Cross referencing with the rmap is simpler because
 989	 * the rmap must match the combined mapping exactly.
 990	 */
 991	while (xchk_bmap_iext_iter(&info, &irec)) {
 992		if (xchk_should_terminate(sc, &error) ||
 993		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 994			return 0;
 995
 996		if (irec.br_startoff >= endoff) {
 997			xchk_fblock_set_corrupt(sc, whichfork,
 998					irec.br_startoff);
 999			return 0;
1000		}
1001
1002		if (isnullstartblock(irec.br_startblock))
1003			xchk_bmap_iextent_delalloc(ip, &info, &irec);
1004		else
1005			xchk_bmap_iextent(ip, &info, &irec);
1006		memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec));
1007	}
1008
1009	if (xchk_bmap_want_check_rmaps(&info)) {
1010		error = xchk_bmap_check_rmaps(sc, whichfork);
1011		if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
1012			return error;
1013	}
1014
1015	return 0;
1016}
1017
1018/* Scrub an inode's data fork. */
1019int
1020xchk_bmap_data(
1021	struct xfs_scrub	*sc)
1022{
1023	int			error;
1024
1025	if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTD_ZAPPED)) {
1026		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1027		return 0;
1028	}
1029
1030	error = xchk_bmap(sc, XFS_DATA_FORK);
1031	if (error)
1032		return error;
1033
1034	/* If the data fork is clean, it is clearly not zapped. */
1035	xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTD_ZAPPED);
1036	return 0;
1037}
1038
1039/* Scrub an inode's attr fork. */
1040int
1041xchk_bmap_attr(
1042	struct xfs_scrub	*sc)
1043{
1044	int			error;
1045
1046	/*
1047	 * If the attr fork has been zapped, it's possible that forkoff was
1048	 * reset to zero and hence sc->ip->i_afp is NULL.  We don't want the
1049	 * NULL ifp check in xchk_bmap to conclude that the attr fork is ok,
1050	 * so short circuit that logic by setting the corruption flag and
1051	 * returning immediately.
1052	 */
1053	if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTA_ZAPPED)) {
1054		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1055		return 0;
1056	}
1057
1058	error = xchk_bmap(sc, XFS_ATTR_FORK);
1059	if (error)
1060		return error;
1061
1062	/* If the attr fork is clean, it is clearly not zapped. */
1063	xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTA_ZAPPED);
1064	return 0;
1065}
1066
1067/* Scrub an inode's CoW fork. */
1068int
1069xchk_bmap_cow(
1070	struct xfs_scrub	*sc)
1071{
1072	return xchk_bmap(sc, XFS_COW_FORK);
1073}