inode_repair.c - fs/xfs/scrub/inode_repair.c - Linux diff v6.9.4

   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
   4 * Author: Darrick J. Wong <djwong@kernel.org>
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_trans_resv.h"
  11#include "xfs_mount.h"
  12#include "xfs_defer.h"
  13#include "xfs_btree.h"
  14#include "xfs_bit.h"
  15#include "xfs_log_format.h"
  16#include "xfs_trans.h"
  17#include "xfs_sb.h"
  18#include "xfs_inode.h"
  19#include "xfs_icache.h"
  20#include "xfs_inode_buf.h"
  21#include "xfs_inode_fork.h"
  22#include "xfs_ialloc.h"
  23#include "xfs_da_format.h"
  24#include "xfs_reflink.h"
  25#include "xfs_alloc.h"
  26#include "xfs_rmap.h"
  27#include "xfs_rmap_btree.h"
  28#include "xfs_bmap.h"
  29#include "xfs_bmap_btree.h"
  30#include "xfs_bmap_util.h"
  31#include "xfs_dir2.h"
  32#include "xfs_dir2_priv.h"
  33#include "xfs_quota_defs.h"
  34#include "xfs_quota.h"
  35#include "xfs_ag.h"
  36#include "xfs_rtbitmap.h"
  37#include "xfs_attr_leaf.h"
  38#include "xfs_log_priv.h"
  39#include "xfs_health.h"
  40#include "xfs_symlink_remote.h"
  41#include "scrub/xfs_scrub.h"
  42#include "scrub/scrub.h"
  43#include "scrub/common.h"
  44#include "scrub/btree.h"
  45#include "scrub/trace.h"
  46#include "scrub/repair.h"
  47#include "scrub/iscan.h"
  48#include "scrub/readdir.h"
  49
  50/*
  51 * Inode Record Repair
  52 * ===================
  53 *
  54 * Roughly speaking, inode problems can be classified based on whether or not
  55 * they trip the dinode verifiers.  If those trip, then we won't be able to
  56 * xfs_iget ourselves the inode.
  57 *
  58 * Therefore, the xrep_dinode_* functions fix anything that will cause the
  59 * inode buffer verifier or the dinode verifier.  The xrep_inode_* functions
  60 * fix things on live incore inodes.  The inode repair functions make decisions
  61 * with security and usability implications when reviving a file:
  62 *
  63 * - Files with zero di_mode or a garbage di_mode are converted to regular file
  64 *   that only root can read.  This file may not actually contain user data,
  65 *   if the file was not previously a regular file.  Setuid and setgid bits
  66 *   are cleared.
  67 *
  68 * - Zero-size directories can be truncated to look empty.  It is necessary to
  69 *   run the bmapbtd and directory repair functions to fully rebuild the
  70 *   directory.
  71 *
  72 * - Zero-size symbolic link targets can be truncated to '?'.  It is necessary
  73 *   to run the bmapbtd and symlink repair functions to salvage the symlink.
  74 *
  75 * - Invalid extent size hints will be removed.
  76 *
  77 * - Quotacheck will be scheduled if we repaired an inode that was so badly
  78 *   damaged that the ondisk inode had to be rebuilt.
  79 *
  80 * - Invalid user, group, or project IDs (aka -1U) will be reset to zero.
  81 *   Setuid and setgid bits are cleared.
  82 *
  83 * - Data and attr forks are reset to extents format with zero extents if the
  84 *   fork data is inconsistent.  It is necessary to run the bmapbtd or bmapbta
  85 *   repair functions to recover the space mapping.
  86 *
  87 * - ACLs will not be recovered if the attr fork is zapped or the extended
  88 *   attribute structure itself requires salvaging.
  89 *
  90 * - If the attr fork is zapped, the user and group ids are reset to root and
  91 *   the setuid and setgid bits are removed.
  92 */
  93
  94/*
  95 * All the information we need to repair the ondisk inode if we can't iget the
  96 * incore inode.  We don't allocate this buffer unless we're going to perform
  97 * a repair to the ondisk inode cluster buffer.
  98 */
  99struct xrep_inode {
 100	/* Inode mapping that we saved from the initial lookup attempt. */
 101	struct xfs_imap		imap;
 102
 103	struct xfs_scrub	*sc;
 104
 105	/* Blocks in use on the data device by data extents or bmbt blocks. */
 106	xfs_rfsblock_t		data_blocks;
 107
 108	/* Blocks in use on the rt device. */
 109	xfs_rfsblock_t		rt_blocks;
 110
 111	/* Blocks in use by the attr fork. */
 112	xfs_rfsblock_t		attr_blocks;
 113
 114	/* Number of data device extents for the data fork. */
 115	xfs_extnum_t		data_extents;
 116
 117	/*
 118	 * Number of realtime device extents for the data fork.  If
 119	 * data_extents and rt_extents indicate that the data fork has extents
 120	 * on both devices, we'll just back away slowly.
 121	 */
 122	xfs_extnum_t		rt_extents;
 123
 124	/* Number of (data device) extents for the attr fork. */
 125	xfs_aextnum_t		attr_extents;
 126
 127	/* Sick state to set after zapping parts of the inode. */
 128	unsigned int		ino_sick_mask;
 129
 130	/* Must we remove all access from this file? */
 131	bool			zap_acls;
 132
 133	/* Inode scanner to see if we can find the ftype from dirents */
 134	struct xchk_iscan	ftype_iscan;
 135	uint8_t			alleged_ftype;
 136};
 137
 138/*
 139 * Setup function for inode repair.  @imap contains the ondisk inode mapping
 140 * information so that we can correct the ondisk inode cluster buffer if
 141 * necessary to make iget work.
 142 */
 143int
 144xrep_setup_inode(
 145	struct xfs_scrub	*sc,
 146	const struct xfs_imap	*imap)
 147{
 148	struct xrep_inode	*ri;
 149
 150	sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS);
 151	if (!sc->buf)
 152		return -ENOMEM;
 153
 154	ri = sc->buf;
 155	memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
 156	ri->sc = sc;
 157	return 0;
 158}
 159
 160/*
 161 * Make sure this ondisk inode can pass the inode buffer verifier.  This is
 162 * not the same as the dinode verifier.
 163 */
 164STATIC void
 165xrep_dinode_buf_core(
 166	struct xfs_scrub	*sc,
 167	struct xfs_buf		*bp,
 168	unsigned int		ioffset)
 169{
 170	struct xfs_dinode	*dip = xfs_buf_offset(bp, ioffset);
 171	struct xfs_trans	*tp = sc->tp;
 172	struct xfs_mount	*mp = sc->mp;
 173	xfs_agino_t		agino;
 174	bool			crc_ok = false;
 175	bool			magic_ok = false;
 176	bool			unlinked_ok = false;
 177
 178	agino = be32_to_cpu(dip->di_next_unlinked);
 179
 180	if (xfs_verify_agino_or_null(bp->b_pag, agino))
 181		unlinked_ok = true;
 182
 183	if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
 184	    xfs_dinode_good_version(mp, dip->di_version))
 185		magic_ok = true;
 186
 187	if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
 188			XFS_DINODE_CRC_OFF))
 189		crc_ok = true;
 190
 191	if (magic_ok && unlinked_ok && crc_ok)
 192		return;
 193
 194	if (!magic_ok) {
 195		dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 196		dip->di_version = 3;
 197	}
 198	if (!unlinked_ok)
 199		dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
 200	xfs_dinode_calc_crc(mp, dip);
 201	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 202	xfs_trans_log_buf(tp, bp, ioffset,
 203				  ioffset + sizeof(struct xfs_dinode) - 1);
 204}
 205
 206/* Make sure this inode cluster buffer can pass the inode buffer verifier. */
 207STATIC void
 208xrep_dinode_buf(
 209	struct xfs_scrub	*sc,
 210	struct xfs_buf		*bp)
 211{
 212	struct xfs_mount	*mp = sc->mp;
 213	int			i;
 214	int			ni;
 215
 216	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
 217	for (i = 0; i < ni; i++)
 218		xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog);
 219}
 220
 221/* Reinitialize things that never change in an inode. */
 222STATIC void
 223xrep_dinode_header(
 224	struct xfs_scrub	*sc,
 225	struct xfs_dinode	*dip)
 226{
 227	trace_xrep_dinode_header(sc, dip);
 228
 229	dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 230	if (!xfs_dinode_good_version(sc->mp, dip->di_version))
 231		dip->di_version = 3;
 232	dip->di_ino = cpu_to_be64(sc->sm->sm_ino);
 233	uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid);
 234	dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
 235}
 236
 237/*
 238 * If this directory entry points to the scrub target inode, then the directory
 239 * we're scanning is the parent of the scrub target inode.
 240 */
 241STATIC int
 242xrep_dinode_findmode_dirent(
 243	struct xfs_scrub		*sc,
 244	struct xfs_inode		*dp,
 245	xfs_dir2_dataptr_t		dapos,
 246	const struct xfs_name		*name,
 247	xfs_ino_t			ino,
 248	void				*priv)
 249{
 250	struct xrep_inode		*ri = priv;
 251	int				error = 0;
 252
 253	if (xchk_should_terminate(ri->sc, &error))
 254		return error;
 255
 256	if (ino != sc->sm->sm_ino)
 257		return 0;
 258
 259	/* Ignore garbage directory entry names. */
 260	if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
 261		return -EFSCORRUPTED;
 262
 263	/* Don't pick up dot or dotdot entries; we only want child dirents. */
 264	if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
 265	    xfs_dir2_samename(name, &xfs_name_dot))
 266		return 0;
 267
 268	/*
 269	 * Uhoh, more than one parent for this inode and they don't agree on
 270	 * the file type?
 271	 */
 272	if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN &&
 273	    ri->alleged_ftype != name->type) {
 274		trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type,
 275				ri->alleged_ftype);
 276		return -EFSCORRUPTED;
 277	}
 278
 279	/* We found a potential parent; remember the ftype. */
 280	trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type);
 281	ri->alleged_ftype = name->type;
 282	return 0;
 283}
 284
 285/*
 286 * If this is a directory, walk the dirents looking for any that point to the
 287 * scrub target inode.
 288 */
 289STATIC int
 290xrep_dinode_findmode_walk_directory(
 291	struct xrep_inode	*ri,
 292	struct xfs_inode	*dp)
 293{
 294	struct xfs_scrub	*sc = ri->sc;
 295	unsigned int		lock_mode;
 296	int			error = 0;
 297
 298	/*
 299	 * Scan the directory to see if there it contains an entry pointing to
 300	 * the directory that we are repairing.
 301	 */
 302	lock_mode = xfs_ilock_data_map_shared(dp);
 303
 304	/*
 305	 * If this directory is known to be sick, we cannot scan it reliably
 306	 * and must abort.
 307	 */
 308	if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
 309				       XFS_SICK_INO_BMBTD |
 310				       XFS_SICK_INO_DIR)) {
 311		error = -EFSCORRUPTED;
 312		goto out_unlock;
 313	}
 314
 315	/*
 316	 * We cannot complete our parent pointer scan if a directory looks as
 317	 * though it has been zapped by the inode record repair code.
 318	 */
 319	if (xchk_dir_looks_zapped(dp)) {
 320		error = -EBUSY;
 321		goto out_unlock;
 322	}
 323
 324	error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri);
 325	if (error)
 326		goto out_unlock;
 327
 328out_unlock:
 329	xfs_iunlock(dp, lock_mode);
 330	return error;
 331}
 332
 333/*
 334 * Try to find the mode of the inode being repaired by looking for directories
 335 * that point down to this file.
 336 */
 337STATIC int
 338xrep_dinode_find_mode(
 339	struct xrep_inode	*ri,
 340	uint16_t		*mode)
 341{
 342	struct xfs_scrub	*sc = ri->sc;
 343	struct xfs_inode	*dp;
 344	int			error;
 345
 346	/* No ftype means we have no other metadata to consult. */
 347	if (!xfs_has_ftype(sc->mp)) {
 348		*mode = S_IFREG;
 349		return 0;
 350	}
 351
 352	/*
 353	 * Scan all directories for parents that might point down to this
 354	 * inode.  Skip the inode being repaired during the scan since it
 355	 * cannot be its own parent.  Note that we still hold the AGI locked
 356	 * so there's a real possibility that _iscan_iter can return EBUSY.
 357	 */
 358	xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan);
 359	ri->ftype_iscan.skip_ino = sc->sm->sm_ino;
 360	ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN;
 361	while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) {
 362		if (S_ISDIR(VFS_I(dp)->i_mode))
 363			error = xrep_dinode_findmode_walk_directory(ri, dp);
 364		xchk_iscan_mark_visited(&ri->ftype_iscan, dp);
 365		xchk_irele(sc, dp);
 366		if (error < 0)
 367			break;
 368		if (xchk_should_terminate(sc, &error))
 369			break;
 370	}
 371	xchk_iscan_iter_finish(&ri->ftype_iscan);
 372	xchk_iscan_teardown(&ri->ftype_iscan);
 373
 374	if (error == -EBUSY) {
 375		if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) {
 376			/*
 377			 * If we got an EBUSY after finding at least one
 378			 * dirent, that means the scan found an inode on the
 379			 * inactivation list and could not open it.  Accept the
 380			 * alleged ftype and install a new mode below.
 381			 */
 382			error = 0;
 383		} else if (!(sc->flags & XCHK_TRY_HARDER)) {
 384			/*
 385			 * Otherwise, retry the operation one time to see if
 386			 * the reason for the delay is an inode from the same
 387			 * cluster buffer waiting on the inactivation list.
 388			 */
 389			error = -EDEADLOCK;
 390		}
 391	}
 392	if (error)
 393		return error;
 394
 395	/*
 396	 * Convert the discovered ftype into the file mode.  If all else fails,
 397	 * return S_IFREG.
 398	 */
 399	switch (ri->alleged_ftype) {
 400	case XFS_DIR3_FT_DIR:
 401		*mode = S_IFDIR;
 402		break;
 403	case XFS_DIR3_FT_WHT:
 404	case XFS_DIR3_FT_CHRDEV:
 405		*mode = S_IFCHR;
 406		break;
 407	case XFS_DIR3_FT_BLKDEV:
 408		*mode = S_IFBLK;
 409		break;
 410	case XFS_DIR3_FT_FIFO:
 411		*mode = S_IFIFO;
 412		break;
 413	case XFS_DIR3_FT_SOCK:
 414		*mode = S_IFSOCK;
 415		break;
 416	case XFS_DIR3_FT_SYMLINK:
 417		*mode = S_IFLNK;
 418		break;
 419	default:
 420		*mode = S_IFREG;
 421		break;
 422	}
 423	return 0;
 424}
 425
 426/* Turn di_mode into /something/ recognizable.  Returns true if we succeed. */
 427STATIC int
 428xrep_dinode_mode(
 429	struct xrep_inode	*ri,
 430	struct xfs_dinode	*dip)
 431{
 432	struct xfs_scrub	*sc = ri->sc;
 433	uint16_t		mode = be16_to_cpu(dip->di_mode);
 434	int			error;
 435
 436	trace_xrep_dinode_mode(sc, dip);
 437
 438	if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
 439		return 0;
 440
 441	/* Try to fix the mode.  If we cannot, then leave everything alone. */
 442	error = xrep_dinode_find_mode(ri, &mode);
 443	switch (error) {
 444	case -EINTR:
 445	case -EBUSY:
 446	case -EDEADLOCK:
 447		/* temporary failure or fatal signal */
 448		return error;
 449	case 0:
 450		/* found mode */
 451		break;
 452	default:
 453		/* some other error, assume S_IFREG */
 454		mode = S_IFREG;
 455		break;
 456	}
 457
 458	/* bad mode, so we set it to a file that only root can read */
 
 459	dip->di_mode = cpu_to_be16(mode);
 460	dip->di_uid = 0;
 461	dip->di_gid = 0;
 462	ri->zap_acls = true;
 463	return 0;
 464}
 465
 466/* Fix any conflicting flags that the verifiers complain about. */
 467STATIC void
 468xrep_dinode_flags(
 469	struct xfs_scrub	*sc,
 470	struct xfs_dinode	*dip,
 471	bool			isrt)
 472{
 473	struct xfs_mount	*mp = sc->mp;
 474	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
 475	uint16_t		flags = be16_to_cpu(dip->di_flags);
 476	uint16_t		mode = be16_to_cpu(dip->di_mode);
 477
 478	trace_xrep_dinode_flags(sc, dip);
 479
 480	if (isrt)
 481		flags |= XFS_DIFLAG_REALTIME;
 482	else
 483		flags &= ~XFS_DIFLAG_REALTIME;
 484
 485	/*
 486	 * For regular files on a reflink filesystem, set the REFLINK flag to
 487	 * protect shared extents.  A later stage will actually check those
 488	 * extents and clear the flag if possible.
 489	 */
 490	if (xfs_has_reflink(mp) && S_ISREG(mode))
 491		flags2 |= XFS_DIFLAG2_REFLINK;
 492	else
 493		flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
 494	if (flags & XFS_DIFLAG_REALTIME)
 495		flags2 &= ~XFS_DIFLAG2_REFLINK;
 496	if (!xfs_has_bigtime(mp))
 497		flags2 &= ~XFS_DIFLAG2_BIGTIME;
 498	if (!xfs_has_large_extent_counts(mp))
 499		flags2 &= ~XFS_DIFLAG2_NREXT64;
 500	if (flags2 & XFS_DIFLAG2_NREXT64)
 501		dip->di_nrext64_pad = 0;
 502	else if (dip->di_version >= 3)
 503		dip->di_v3_pad = 0;
 504	dip->di_flags = cpu_to_be16(flags);
 505	dip->di_flags2 = cpu_to_be64(flags2);
 506}
 507
 508/*
 509 * Blow out symlink; now it points nowhere.  We don't have to worry about
 510 * incore state because this inode is failing the verifiers.
 511 */
 512STATIC void
 513xrep_dinode_zap_symlink(
 514	struct xrep_inode	*ri,
 515	struct xfs_dinode	*dip)
 516{
 517	struct xfs_scrub	*sc = ri->sc;
 518	char			*p;
 519
 520	trace_xrep_dinode_zap_symlink(sc, dip);
 521
 522	dip->di_format = XFS_DINODE_FMT_LOCAL;
 523	dip->di_size = cpu_to_be64(1);
 524	p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 525	*p = '?';
 526	ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
 527}
 528
 529/*
 530 * Blow out dir, make the parent point to the root.  In the future repair will
 531 * reconstruct this directory for us.  Note that there's no in-core directory
 532 * inode because the sf verifier tripped, so we don't have to worry about the
 533 * dentry cache.
 534 */
 535STATIC void
 536xrep_dinode_zap_dir(
 537	struct xrep_inode	*ri,
 538	struct xfs_dinode	*dip)
 539{
 540	struct xfs_scrub	*sc = ri->sc;
 541	struct xfs_mount	*mp = sc->mp;
 542	struct xfs_dir2_sf_hdr	*sfp;
 543	int			i8count;
 544
 545	trace_xrep_dinode_zap_dir(sc, dip);
 546
 547	dip->di_format = XFS_DINODE_FMT_LOCAL;
 548	i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
 549	sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 550	sfp->count = 0;
 551	sfp->i8count = i8count;
 552	xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
 553	dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));
 554	ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED;
 555}
 556
 557/* Make sure we don't have a garbage file size. */
 558STATIC void
 559xrep_dinode_size(
 560	struct xrep_inode	*ri,
 561	struct xfs_dinode	*dip)
 562{
 563	struct xfs_scrub	*sc = ri->sc;
 564	uint64_t		size = be64_to_cpu(dip->di_size);
 565	uint16_t		mode = be16_to_cpu(dip->di_mode);
 566
 567	trace_xrep_dinode_size(sc, dip);
 568
 569	switch (mode & S_IFMT) {
 570	case S_IFIFO:
 571	case S_IFCHR:
 572	case S_IFBLK:
 573	case S_IFSOCK:
 574		/* di_size can't be nonzero for special files */
 575		dip->di_size = 0;
 576		break;
 577	case S_IFREG:
 578		/* Regular files can't be larger than 2^63-1 bytes. */
 579		dip->di_size = cpu_to_be64(size & ~(1ULL << 63));
 580		break;
 581	case S_IFLNK:
 582		/*
 583		 * Truncate ridiculously oversized symlinks.  If the size is
 584		 * zero, reset it to point to the current directory.  Both of
 585		 * these conditions trigger dinode verifier errors, so there
 586		 * is no in-core state to reset.
 587		 */
 588		if (size > XFS_SYMLINK_MAXLEN)
 589			dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN);
 590		else if (size == 0)
 591			xrep_dinode_zap_symlink(ri, dip);
 592		break;
 593	case S_IFDIR:
 594		/*
 595		 * Directories can't have a size larger than 32G.  If the size
 596		 * is zero, reset it to an empty directory.  Both of these
 597		 * conditions trigger dinode verifier errors, so there is no
 598		 * in-core state to reset.
 599		 */
 600		if (size > XFS_DIR2_SPACE_SIZE)
 601			dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE);
 602		else if (size == 0)
 603			xrep_dinode_zap_dir(ri, dip);
 604		break;
 605	}
 606}
 607
 608/* Fix extent size hints. */
 609STATIC void
 610xrep_dinode_extsize_hints(
 611	struct xfs_scrub	*sc,
 612	struct xfs_dinode	*dip)
 613{
 614	struct xfs_mount	*mp = sc->mp;
 615	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
 616	uint16_t		flags = be16_to_cpu(dip->di_flags);
 617	uint16_t		mode = be16_to_cpu(dip->di_mode);
 618
 619	xfs_failaddr_t		fa;
 620
 621	trace_xrep_dinode_extsize_hints(sc, dip);
 622
 623	fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
 624			mode, flags);
 625	if (fa) {
 626		dip->di_extsize = 0;
 627		dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE |
 628					      XFS_DIFLAG_EXTSZINHERIT);
 629	}
 630
 631	if (dip->di_version < 3)
 632		return;
 633
 634	fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
 635			mode, flags, flags2);
 636	if (fa) {
 637		dip->di_cowextsize = 0;
 638		dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
 639	}
 640}
 641
 642/* Count extents and blocks for an inode given an rmap. */
 643STATIC int
 644xrep_dinode_walk_rmap(
 645	struct xfs_btree_cur		*cur,
 646	const struct xfs_rmap_irec	*rec,
 647	void				*priv)
 648{
 649	struct xrep_inode		*ri = priv;
 650	int				error = 0;
 651
 652	if (xchk_should_terminate(ri->sc, &error))
 653		return error;
 654
 655	/* We only care about this inode. */
 656	if (rec->rm_owner != ri->sc->sm->sm_ino)
 657		return 0;
 658
 659	if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
 660		ri->attr_blocks += rec->rm_blockcount;
 661		if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
 662			ri->attr_extents++;
 663
 664		return 0;
 665	}
 666
 667	ri->data_blocks += rec->rm_blockcount;
 668	if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
 669		ri->data_extents++;
 670
 671	return 0;
 672}
 673
 674/* Count extents and blocks for an inode from all AG rmap data. */
 675STATIC int
 676xrep_dinode_count_ag_rmaps(
 677	struct xrep_inode	*ri,
 678	struct xfs_perag	*pag)
 679{
 680	struct xfs_btree_cur	*cur;
 681	struct xfs_buf		*agf;
 682	int			error;
 683
 684	error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf);
 685	if (error)
 686		return error;
 687
 688	cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag);
 689	error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri);
 690	xfs_btree_del_cursor(cur, error);
 691	xfs_trans_brelse(ri->sc->tp, agf);
 692	return error;
 693}
 694
 695/* Count extents and blocks for a given inode from all rmap data. */
 696STATIC int
 697xrep_dinode_count_rmaps(
 698	struct xrep_inode	*ri)
 699{
 700	struct xfs_perag	*pag;
 701	xfs_agnumber_t		agno;
 702	int			error;
 703
 704	if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp))
 705		return -EOPNOTSUPP;
 706
 707	for_each_perag(ri->sc->mp, agno, pag) {
 708		error = xrep_dinode_count_ag_rmaps(ri, pag);
 709		if (error) {
 710			xfs_perag_rele(pag);
 711			return error;
 712		}
 713	}
 714
 715	/* Can't have extents on both the rt and the data device. */
 716	if (ri->data_extents && ri->rt_extents)
 717		return -EFSCORRUPTED;
 718
 719	trace_xrep_dinode_count_rmaps(ri->sc,
 720			ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
 721			ri->data_extents, ri->rt_extents, ri->attr_extents);
 722	return 0;
 723}
 724
 725/* Return true if this extents-format ifork looks like garbage. */
 726STATIC bool
 727xrep_dinode_bad_extents_fork(
 728	struct xfs_scrub	*sc,
 729	struct xfs_dinode	*dip,
 730	unsigned int		dfork_size,
 731	int			whichfork)
 732{
 733	struct xfs_bmbt_irec	new;
 734	struct xfs_bmbt_rec	*dp;
 735	xfs_extnum_t		nex;
 736	bool			isrt;
 737	unsigned int		i;
 738
 739	nex = xfs_dfork_nextents(dip, whichfork);
 740	if (nex > dfork_size / sizeof(struct xfs_bmbt_rec))
 741		return true;
 742
 743	dp = XFS_DFORK_PTR(dip, whichfork);
 744
 745	isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
 746	for (i = 0; i < nex; i++, dp++) {
 747		xfs_failaddr_t	fa;
 748
 749		xfs_bmbt_disk_get_all(dp, &new);
 750		fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork,
 751				&new);
 752		if (fa)
 753			return true;
 754	}
 755
 756	return false;
 757}
 758
 759/* Return true if this btree-format ifork looks like garbage. */
 760STATIC bool
 761xrep_dinode_bad_bmbt_fork(
 762	struct xfs_scrub	*sc,
 763	struct xfs_dinode	*dip,
 764	unsigned int		dfork_size,
 765	int			whichfork)
 766{
 767	struct xfs_bmdr_block	*dfp;
 768	xfs_extnum_t		nex;
 769	unsigned int		i;
 770	unsigned int		dmxr;
 771	unsigned int		nrecs;
 772	unsigned int		level;
 773
 774	nex = xfs_dfork_nextents(dip, whichfork);
 775	if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec))
 776		return true;
 777
 778	if (dfork_size < sizeof(struct xfs_bmdr_block))
 779		return true;
 780
 781	dfp = XFS_DFORK_PTR(dip, whichfork);
 782	nrecs = be16_to_cpu(dfp->bb_numrecs);
 783	level = be16_to_cpu(dfp->bb_level);
 784
 785	if (nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > dfork_size)
 786		return true;
 787	if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork))
 788		return true;
 789
 790	dmxr = xfs_bmdr_maxrecs(dfork_size, 0);
 791	for (i = 1; i <= nrecs; i++) {
 792		struct xfs_bmbt_key	*fkp;
 793		xfs_bmbt_ptr_t		*fpp;
 794		xfs_fileoff_t		fileoff;
 795		xfs_fsblock_t		fsbno;
 796
 797		fkp = XFS_BMDR_KEY_ADDR(dfp, i);
 798		fileoff = be64_to_cpu(fkp->br_startoff);
 799		if (!xfs_verify_fileoff(sc->mp, fileoff))
 800			return true;
 801
 802		fpp = XFS_BMDR_PTR_ADDR(dfp, i, dmxr);
 803		fsbno = be64_to_cpu(*fpp);
 804		if (!xfs_verify_fsbno(sc->mp, fsbno))
 805			return true;
 806	}
 807
 808	return false;
 809}
 810
 811/*
 812 * Check the data fork for things that will fail the ifork verifiers or the
 813 * ifork formatters.
 814 */
 815STATIC bool
 816xrep_dinode_check_dfork(
 817	struct xfs_scrub	*sc,
 818	struct xfs_dinode	*dip,
 819	uint16_t		mode)
 820{
 821	void			*dfork_ptr;
 822	int64_t			data_size;
 823	unsigned int		fmt;
 824	unsigned int		dfork_size;
 825
 826	/*
 827	 * Verifier functions take signed int64_t, so check for bogus negative
 828	 * values first.
 829	 */
 830	data_size = be64_to_cpu(dip->di_size);
 831	if (data_size < 0)
 832		return true;
 833
 834	fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK);
 835	switch (mode & S_IFMT) {
 836	case S_IFIFO:
 837	case S_IFCHR:
 838	case S_IFBLK:
 839	case S_IFSOCK:
 840		if (fmt != XFS_DINODE_FMT_DEV)
 841			return true;
 842		break;
 843	case S_IFREG:
 844		if (fmt == XFS_DINODE_FMT_LOCAL)
 845			return true;
 846		fallthrough;
 847	case S_IFLNK:
 848	case S_IFDIR:
 849		switch (fmt) {
 850		case XFS_DINODE_FMT_LOCAL:
 851		case XFS_DINODE_FMT_EXTENTS:
 852		case XFS_DINODE_FMT_BTREE:
 853			break;
 854		default:
 855			return true;
 856		}
 857		break;
 858	default:
 859		return true;
 860	}
 861
 862	dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
 863	dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 864
 865	switch (fmt) {
 866	case XFS_DINODE_FMT_DEV:
 867		break;
 868	case XFS_DINODE_FMT_LOCAL:
 869		/* dir/symlink structure cannot be larger than the fork */
 870		if (data_size > dfork_size)
 871			return true;
 872		/* directory structure must pass verification. */
 873		if (S_ISDIR(mode) &&
 874		    xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL)
 875			return true;
 876		/* symlink structure must pass verification. */
 877		if (S_ISLNK(mode) &&
 878		    xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL)
 879			return true;
 880		break;
 881	case XFS_DINODE_FMT_EXTENTS:
 882		if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size,
 883				XFS_DATA_FORK))
 884			return true;
 885		break;
 886	case XFS_DINODE_FMT_BTREE:
 887		if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size,
 888				XFS_DATA_FORK))
 889			return true;
 890		break;
 891	default:
 892		return true;
 893	}
 894
 895	return false;
 896}
 897
 898static void
 899xrep_dinode_set_data_nextents(
 900	struct xfs_dinode	*dip,
 901	xfs_extnum_t		nextents)
 902{
 903	if (xfs_dinode_has_large_extent_counts(dip))
 904		dip->di_big_nextents = cpu_to_be64(nextents);
 905	else
 906		dip->di_nextents = cpu_to_be32(nextents);
 907}
 908
 909static void
 910xrep_dinode_set_attr_nextents(
 911	struct xfs_dinode	*dip,
 912	xfs_extnum_t		nextents)
 913{
 914	if (xfs_dinode_has_large_extent_counts(dip))
 915		dip->di_big_anextents = cpu_to_be32(nextents);
 916	else
 917		dip->di_anextents = cpu_to_be16(nextents);
 918}
 919
 920/* Reset the data fork to something sane. */
 921STATIC void
 922xrep_dinode_zap_dfork(
 923	struct xrep_inode	*ri,
 924	struct xfs_dinode	*dip,
 925	uint16_t		mode)
 926{
 927	struct xfs_scrub	*sc = ri->sc;
 928
 929	trace_xrep_dinode_zap_dfork(sc, dip);
 930
 931	ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED;
 932
 933	xrep_dinode_set_data_nextents(dip, 0);
 934	ri->data_blocks = 0;
 935	ri->rt_blocks = 0;
 936
 937	/* Special files always get reset to DEV */
 938	switch (mode & S_IFMT) {
 939	case S_IFIFO:
 940	case S_IFCHR:
 941	case S_IFBLK:
 942	case S_IFSOCK:
 943		dip->di_format = XFS_DINODE_FMT_DEV;
 944		dip->di_size = 0;
 945		return;
 946	}
 947
 948	/*
 949	 * If we have data extents, reset to an empty map and hope the user
 950	 * will run the bmapbtd checker next.
 951	 */
 952	if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
 953		dip->di_format = XFS_DINODE_FMT_EXTENTS;
 954		return;
 955	}
 956
 957	/* Otherwise, reset the local format to the minimum. */
 958	switch (mode & S_IFMT) {
 959	case S_IFLNK:
 960		xrep_dinode_zap_symlink(ri, dip);
 961		break;
 962	case S_IFDIR:
 963		xrep_dinode_zap_dir(ri, dip);
 964		break;
 965	}
 966}
 967
 968/*
 969 * Check the attr fork for things that will fail the ifork verifiers or the
 970 * ifork formatters.
 971 */
 972STATIC bool
 973xrep_dinode_check_afork(
 974	struct xfs_scrub		*sc,
 975	struct xfs_dinode		*dip)
 976{
 977	struct xfs_attr_sf_hdr		*afork_ptr;
 978	size_t				attr_size;
 979	unsigned int			afork_size;
 980
 981	if (XFS_DFORK_BOFF(dip) == 0)
 982		return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
 983		       xfs_dfork_attr_extents(dip) != 0;
 984
 985	afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
 986	afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
 987
 988	switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
 989	case XFS_DINODE_FMT_LOCAL:
 990		/* Fork has to be large enough to extract the xattr size. */
 991		if (afork_size < sizeof(struct xfs_attr_sf_hdr))
 992			return true;
 993
 994		/* xattr structure cannot be larger than the fork */
 995		attr_size = be16_to_cpu(afork_ptr->totsize);
 996		if (attr_size > afork_size)
 997			return true;
 998
 999		/* xattr structure must pass verification. */
1000		return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL;
1001	case XFS_DINODE_FMT_EXTENTS:
1002		if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
1003					XFS_ATTR_FORK))
1004			return true;
1005		break;
1006	case XFS_DINODE_FMT_BTREE:
1007		if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
1008					XFS_ATTR_FORK))
1009			return true;
1010		break;
1011	default:
1012		return true;
1013	}
1014
1015	return false;
1016}
1017
1018/*
1019 * Reset the attr fork to empty.  Since the attr fork could have contained
1020 * ACLs, make the file readable only by root.
1021 */
1022STATIC void
1023xrep_dinode_zap_afork(
1024	struct xrep_inode	*ri,
1025	struct xfs_dinode	*dip,
1026	uint16_t		mode)
1027{
1028	struct xfs_scrub	*sc = ri->sc;
1029
1030	trace_xrep_dinode_zap_afork(sc, dip);
1031
1032	ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED;
1033
1034	dip->di_aformat = XFS_DINODE_FMT_EXTENTS;
1035	xrep_dinode_set_attr_nextents(dip, 0);
1036	ri->attr_blocks = 0;
1037
1038	/*
1039	 * If the data fork is in btree format, removing the attr fork entirely
1040	 * might cause verifier failures if the next level down in the bmbt
1041	 * could now fit in the data fork area.
1042	 */
1043	if (dip->di_format != XFS_DINODE_FMT_BTREE)
1044		dip->di_forkoff = 0;
1045	dip->di_mode = cpu_to_be16(mode & ~0777);
1046	dip->di_uid = 0;
1047	dip->di_gid = 0;
1048}
1049
1050/* Make sure the fork offset is a sensible value. */
1051STATIC void
1052xrep_dinode_ensure_forkoff(
1053	struct xrep_inode	*ri,
1054	struct xfs_dinode	*dip,
1055	uint16_t		mode)
1056{
1057	struct xfs_bmdr_block	*bmdr;
1058	struct xfs_scrub	*sc = ri->sc;
1059	xfs_extnum_t		attr_extents, data_extents;
1060	size_t			bmdr_minsz = XFS_BMDR_SPACE_CALC(1);
1061	unsigned int		lit_sz = XFS_LITINO(sc->mp);
1062	unsigned int		afork_min, dfork_min;
1063
1064	trace_xrep_dinode_ensure_forkoff(sc, dip);
1065
1066	/*
1067	 * Before calling this function, xrep_dinode_core ensured that both
1068	 * forks actually fit inside their respective literal areas.  If this
1069	 * was not the case, the fork was reset to FMT_EXTENTS with zero
1070	 * records.  If the rmapbt scan found attr or data fork blocks, this
1071	 * will be noted in the dinode_stats, and we must leave enough room
1072	 * for the bmap repair code to reconstruct the mapping structure.
1073	 *
1074	 * First, compute the minimum space required for the attr fork.
1075	 */
1076	switch (dip->di_aformat) {
1077	case XFS_DINODE_FMT_LOCAL:
1078		/*
1079		 * If we still have a shortform xattr structure at all, that
1080		 * means the attr fork area was exactly large enough to fit
1081		 * the sf structure.
1082		 */
1083		afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
1084		break;
1085	case XFS_DINODE_FMT_EXTENTS:
1086		attr_extents = xfs_dfork_attr_extents(dip);
1087		if (attr_extents) {
1088			/*
1089			 * We must maintain sufficient space to hold the entire
1090			 * extent map array in the data fork.  Note that we
1091			 * previously zapped the fork if it had no chance of
1092			 * fitting in the inode.
1093			 */
1094			afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
1095		} else if (ri->attr_extents > 0) {
1096			/*
1097			 * The attr fork thinks it has zero extents, but we
1098			 * found some xattr extents.  We need to leave enough
1099			 * empty space here so that the incore attr fork will
1100			 * get created (and hence trigger the attr fork bmap
1101			 * repairer).
1102			 */
1103			afork_min = bmdr_minsz;
1104		} else {
1105			/* No extents on disk or found in rmapbt. */
1106			afork_min = 0;
1107		}
1108		break;
1109	case XFS_DINODE_FMT_BTREE:
1110		/* Must have space for btree header and key/pointers. */
1111		bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1112		afork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr);
1113		break;
1114	default:
1115		/* We should never see any other formats. */
1116		afork_min = 0;
1117		break;
1118	}
1119
1120	/* Compute the minimum space required for the data fork. */
1121	switch (dip->di_format) {
1122	case XFS_DINODE_FMT_DEV:
1123		dfork_min = sizeof(__be32);
1124		break;
1125	case XFS_DINODE_FMT_UUID:
1126		dfork_min = sizeof(uuid_t);
1127		break;
1128	case XFS_DINODE_FMT_LOCAL:
1129		/*
1130		 * If we still have a shortform data fork at all, that means
1131		 * the data fork area was large enough to fit whatever was in
1132		 * there.
1133		 */
1134		dfork_min = be64_to_cpu(dip->di_size);
1135		break;
1136	case XFS_DINODE_FMT_EXTENTS:
1137		data_extents = xfs_dfork_data_extents(dip);
1138		if (data_extents) {
1139			/*
1140			 * We must maintain sufficient space to hold the entire
1141			 * extent map array in the data fork.  Note that we
1142			 * previously zapped the fork if it had no chance of
1143			 * fitting in the inode.
1144			 */
1145			dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
1146		} else if (ri->data_extents > 0 || ri->rt_extents > 0) {
1147			/*
1148			 * The data fork thinks it has zero extents, but we
1149			 * found some data extents.  We need to leave enough
1150			 * empty space here so that the data fork bmap repair
1151			 * will recover the mappings.
1152			 */
1153			dfork_min = bmdr_minsz;
1154		} else {
1155			/* No extents on disk or found in rmapbt. */
1156			dfork_min = 0;
1157		}
1158		break;
1159	case XFS_DINODE_FMT_BTREE:
1160		/* Must have space for btree header and key/pointers. */
1161		bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1162		dfork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr);
1163		break;
1164	default:
1165		dfork_min = 0;
1166		break;
1167	}
1168
1169	/*
1170	 * Round all values up to the nearest 8 bytes, because that is the
1171	 * precision of di_forkoff.
1172	 */
1173	afork_min = roundup(afork_min, 8);
1174	dfork_min = roundup(dfork_min, 8);
1175	bmdr_minsz = roundup(bmdr_minsz, 8);
1176
1177	ASSERT(dfork_min <= lit_sz);
1178	ASSERT(afork_min <= lit_sz);
1179
1180	/*
1181	 * If the data fork was zapped and we don't have enough space for the
1182	 * recovery fork, move the attr fork up.
1183	 */
1184	if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
1185	    xfs_dfork_data_extents(dip) == 0 &&
1186	    (ri->data_extents > 0 || ri->rt_extents > 0) &&
1187	    bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) {
1188		if (bmdr_minsz + afork_min > lit_sz) {
1189			/*
1190			 * The attr for and the stub fork we need to recover
1191			 * the data fork won't both fit.  Zap the attr fork.
1192			 */
1193			xrep_dinode_zap_afork(ri, dip, mode);
1194			afork_min = bmdr_minsz;
1195		} else {
1196			void	*before, *after;
1197
1198			/* Otherwise, just slide the attr fork up. */
1199			before = XFS_DFORK_APTR(dip);
1200			dip->di_forkoff = bmdr_minsz >> 3;
1201			after = XFS_DFORK_APTR(dip);
1202			memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
1203		}
1204	}
1205
1206	/*
1207	 * If the attr fork was zapped and we don't have enough space for the
1208	 * recovery fork, move the attr fork down.
1209	 */
1210	if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
1211	    xfs_dfork_attr_extents(dip) == 0 &&
1212	    ri->attr_extents > 0 &&
1213	    bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) {
1214		if (dip->di_format == XFS_DINODE_FMT_BTREE) {
1215			/*
1216			 * If the data fork is in btree format then we can't
1217			 * adjust forkoff because that runs the risk of
1218			 * violating the extents/btree format transition rules.
1219			 */
1220		} else if (bmdr_minsz + dfork_min > lit_sz) {
1221			/*
1222			 * If we can't move the attr fork, too bad, we lose the
1223			 * attr fork and leak its blocks.
1224			 */
1225			xrep_dinode_zap_afork(ri, dip, mode);
1226		} else {
1227			/*
1228			 * Otherwise, just slide the attr fork down.  The attr
1229			 * fork is empty, so we don't have any old contents to
1230			 * move here.
1231			 */
1232			dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
1233		}
1234	}
1235}
1236
1237/*
1238 * Zap the data/attr forks if we spot anything that isn't going to pass the
1239 * ifork verifiers or the ifork formatters, because we need to get the inode
1240 * into good enough shape that the higher level repair functions can run.
1241 */
1242STATIC void
1243xrep_dinode_zap_forks(
1244	struct xrep_inode	*ri,
1245	struct xfs_dinode	*dip)
1246{
1247	struct xfs_scrub	*sc = ri->sc;
1248	xfs_extnum_t		data_extents;
1249	xfs_extnum_t		attr_extents;
1250	xfs_filblks_t		nblocks;
1251	uint16_t		mode;
1252	bool			zap_datafork = false;
1253	bool			zap_attrfork = ri->zap_acls;
1254
1255	trace_xrep_dinode_zap_forks(sc, dip);
1256
1257	mode = be16_to_cpu(dip->di_mode);
1258
1259	data_extents = xfs_dfork_data_extents(dip);
1260	attr_extents = xfs_dfork_attr_extents(dip);
1261	nblocks = be64_to_cpu(dip->di_nblocks);
1262
1263	/* Inode counters don't make sense? */
1264	if (data_extents > nblocks)
1265		zap_datafork = true;
1266	if (attr_extents > nblocks)
1267		zap_attrfork = true;
1268	if (data_extents + attr_extents > nblocks)
1269		zap_datafork = zap_attrfork = true;
1270
1271	if (!zap_datafork)
1272		zap_datafork = xrep_dinode_check_dfork(sc, dip, mode);
1273	if (!zap_attrfork)
1274		zap_attrfork = xrep_dinode_check_afork(sc, dip);
1275
1276	/* Zap whatever's bad. */
1277	if (zap_attrfork)
1278		xrep_dinode_zap_afork(ri, dip, mode);
1279	if (zap_datafork)
1280		xrep_dinode_zap_dfork(ri, dip, mode);
1281	xrep_dinode_ensure_forkoff(ri, dip, mode);
1282
1283	/*
1284	 * Zero di_nblocks if we don't have any extents at all to satisfy the
1285	 * buffer verifier.
1286	 */
1287	data_extents = xfs_dfork_data_extents(dip);
1288	attr_extents = xfs_dfork_attr_extents(dip);
1289	if (data_extents + attr_extents == 0)
1290		dip->di_nblocks = 0;
1291}
1292
1293/* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */
1294STATIC int
1295xrep_dinode_core(
1296	struct xrep_inode	*ri)
1297{
1298	struct xfs_scrub	*sc = ri->sc;
1299	struct xfs_buf		*bp;
1300	struct xfs_dinode	*dip;
1301	xfs_ino_t		ino = sc->sm->sm_ino;
1302	int			error;
1303	int			iget_error;
1304
1305	/* Figure out what this inode had mapped in both forks. */
1306	error = xrep_dinode_count_rmaps(ri);
1307	if (error)
1308		return error;
1309
1310	/* Read the inode cluster buffer. */
1311	error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
1312			ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp,
1313			NULL);
1314	if (error)
1315		return error;
1316
1317	/* Make sure we can pass the inode buffer verifier. */
1318	xrep_dinode_buf(sc, bp);
1319	bp->b_ops = &xfs_inode_buf_ops;
1320
1321	/* Fix everything the verifier will complain about. */
1322	dip = xfs_buf_offset(bp, ri->imap.im_boffset);
1323	xrep_dinode_header(sc, dip);
1324	iget_error = xrep_dinode_mode(ri, dip);
1325	if (iget_error)
1326		goto write;
1327	xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
1328	xrep_dinode_size(ri, dip);
1329	xrep_dinode_extsize_hints(sc, dip);
1330	xrep_dinode_zap_forks(ri, dip);
1331
1332write:
1333	/* Write out the inode. */
1334	trace_xrep_dinode_fixed(sc, dip);
1335	xfs_dinode_calc_crc(sc->mp, dip);
1336	xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
1337	xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset,
1338			ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1);
1339
1340	/*
1341	 * In theory, we've fixed the ondisk inode record enough that we should
1342	 * be able to load the inode into the cache.  Try to iget that inode
1343	 * now while we hold the AGI and the inode cluster buffer and take the
1344	 * IOLOCK so that we can continue with repairs without anyone else
1345	 * accessing the inode.  If iget fails, we still need to commit the
1346	 * changes.
1347	 */
1348	if (!iget_error)
1349		iget_error = xchk_iget(sc, ino, &sc->ip);
1350	if (!iget_error)
1351		xchk_ilock(sc, XFS_IOLOCK_EXCL);
1352
1353	/*
1354	 * Commit the inode cluster buffer updates and drop the AGI buffer that
1355	 * we've been holding since scrub setup.  From here on out, repairs
1356	 * deal only with the cached inode.
1357	 */
1358	error = xrep_trans_commit(sc);
1359	if (error)
1360		return error;
1361
1362	if (iget_error)
1363		return iget_error;
1364
1365	error = xchk_trans_alloc(sc, 0);
1366	if (error)
1367		return error;
1368
1369	error = xrep_ino_dqattach(sc);
1370	if (error)
1371		return error;
1372
1373	xchk_ilock(sc, XFS_ILOCK_EXCL);
1374	if (ri->ino_sick_mask)
1375		xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask);
1376	return 0;
1377}
1378
1379/* Fix everything xfs_dinode_verify cares about. */
1380STATIC int
1381xrep_dinode_problems(
1382	struct xrep_inode	*ri)
1383{
1384	struct xfs_scrub	*sc = ri->sc;
1385	int			error;
1386
1387	error = xrep_dinode_core(ri);
1388	if (error)
1389		return error;
1390
1391	/* We had to fix a totally busted inode, schedule quotacheck. */
1392	if (XFS_IS_UQUOTA_ON(sc->mp))
1393		xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1394	if (XFS_IS_GQUOTA_ON(sc->mp))
1395		xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1396	if (XFS_IS_PQUOTA_ON(sc->mp))
1397		xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1398
1399	return 0;
1400}
1401
1402/*
1403 * Fix problems that the verifiers don't care about.  In general these are
1404 * errors that don't cause problems elsewhere in the kernel that we can easily
1405 * detect, so we don't check them all that rigorously.
1406 */
1407
1408/* Make sure block and extent counts are ok. */
1409STATIC int
1410xrep_inode_blockcounts(
1411	struct xfs_scrub	*sc)
1412{
1413	struct xfs_ifork	*ifp;
1414	xfs_filblks_t		count;
1415	xfs_filblks_t		acount;
1416	xfs_extnum_t		nextents;
1417	int			error;
1418
1419	trace_xrep_inode_blockcounts(sc);
1420
1421	/* Set data fork counters from the data fork mappings. */
1422	error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
1423			&nextents, &count);
1424	if (error)
1425		return error;
1426	if (xfs_is_reflink_inode(sc->ip)) {
1427		/*
1428		 * data fork blockcount can exceed physical storage if a user
1429		 * reflinks the same block over and over again.
1430		 */
1431		;
1432	} else if (XFS_IS_REALTIME_INODE(sc->ip)) {
1433		if (count >= sc->mp->m_sb.sb_rblocks)
1434			return -EFSCORRUPTED;
1435	} else {
1436		if (count >= sc->mp->m_sb.sb_dblocks)
1437			return -EFSCORRUPTED;
1438	}
1439	error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents);
1440	if (error)
1441		return error;
1442	sc->ip->i_df.if_nextents = nextents;
1443
1444	/* Set attr fork counters from the attr fork mappings. */
1445	ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1446	if (ifp) {
1447		error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
1448				&nextents, &acount);
1449		if (error)
1450			return error;
1451		if (count >= sc->mp->m_sb.sb_dblocks)
1452			return -EFSCORRUPTED;
1453		error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
1454				nextents);
1455		if (error)
1456			return error;
1457		ifp->if_nextents = nextents;
1458	} else {
1459		acount = 0;
1460	}
1461
1462	sc->ip->i_nblocks = count + acount;
1463	return 0;
1464}
1465
1466/* Check for invalid uid/gid/prid. */
1467STATIC void
1468xrep_inode_ids(
1469	struct xfs_scrub	*sc)
1470{
1471	bool			dirty = false;
1472
1473	trace_xrep_inode_ids(sc);
1474
1475	if (!uid_valid(VFS_I(sc->ip)->i_uid)) {
1476		i_uid_write(VFS_I(sc->ip), 0);
1477		dirty = true;
1478		if (XFS_IS_UQUOTA_ON(sc->mp))
1479			xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1480	}
1481
1482	if (!gid_valid(VFS_I(sc->ip)->i_gid)) {
1483		i_gid_write(VFS_I(sc->ip), 0);
1484		dirty = true;
1485		if (XFS_IS_GQUOTA_ON(sc->mp))
1486			xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1487	}
1488
1489	if (sc->ip->i_projid == -1U) {
1490		sc->ip->i_projid = 0;
1491		dirty = true;
1492		if (XFS_IS_PQUOTA_ON(sc->mp))
1493			xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1494	}
1495
1496	/* strip setuid/setgid if we touched any of the ids */
1497	if (dirty)
1498		VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
1499}
1500
1501static inline void
1502xrep_clamp_timestamp(
1503	struct xfs_inode	*ip,
1504	struct timespec64	*ts)
1505{
1506	ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC);
1507	*ts = timestamp_truncate(*ts, VFS_I(ip));
1508}
1509
1510/* Nanosecond counters can't have more than 1 billion. */
1511STATIC void
1512xrep_inode_timestamps(
1513	struct xfs_inode	*ip)
1514{
1515	struct timespec64	tstamp;
1516	struct inode		*inode = VFS_I(ip);
1517
1518	tstamp = inode_get_atime(inode);
1519	xrep_clamp_timestamp(ip, &tstamp);
1520	inode_set_atime_to_ts(inode, tstamp);
1521
1522	tstamp = inode_get_mtime(inode);
1523	xrep_clamp_timestamp(ip, &tstamp);
1524	inode_set_mtime_to_ts(inode, tstamp);
1525
1526	tstamp = inode_get_ctime(inode);
1527	xrep_clamp_timestamp(ip, &tstamp);
1528	inode_set_ctime_to_ts(inode, tstamp);
1529
1530	xrep_clamp_timestamp(ip, &ip->i_crtime);
1531}
1532
1533/* Fix inode flags that don't make sense together. */
1534STATIC void
1535xrep_inode_flags(
1536	struct xfs_scrub	*sc)
1537{
1538	uint16_t		mode;
1539
1540	trace_xrep_inode_flags(sc);
1541
1542	mode = VFS_I(sc->ip)->i_mode;
1543
1544	/* Clear junk flags */
1545	if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY)
1546		sc->ip->i_diflags &= ~XFS_DIFLAG_ANY;
1547
1548	/* NEWRTBM only applies to realtime bitmaps */
1549	if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
1550		sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
1551	else
1552		sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
1553
1554	/* These only make sense for directories. */
1555	if (!S_ISDIR(mode))
1556		sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
1557					  XFS_DIFLAG_EXTSZINHERIT |
1558					  XFS_DIFLAG_PROJINHERIT |
1559					  XFS_DIFLAG_NOSYMLINKS);
1560
1561	/* These only make sense for files. */
1562	if (!S_ISREG(mode))
1563		sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
1564					  XFS_DIFLAG_EXTSIZE);
1565
1566	/* These only make sense for non-rt files. */
1567	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1568		sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
1569
1570	/* Immutable and append only?  Drop the append. */
1571	if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
1572	    (sc->ip->i_diflags & XFS_DIFLAG_APPEND))
1573		sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
1574
1575	/* Clear junk flags. */
1576	if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY)
1577		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY;
1578
1579	/* No reflink flag unless we support it and it's a file. */
1580	if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
1581		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1582
1583	/* DAX only applies to files and dirs. */
1584	if (!(S_ISREG(mode) || S_ISDIR(mode)))
1585		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
1586
1587	/* No reflink files on the realtime device. */
1588	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1589		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1590}
1591
1592/*
1593 * Fix size problems with block/node format directories.  If we fail to find
1594 * the extent list, just bail out and let the bmapbtd repair functions clean
1595 * up that mess.
1596 */
1597STATIC void
1598xrep_inode_blockdir_size(
1599	struct xfs_scrub	*sc)
1600{
1601	struct xfs_iext_cursor	icur;
1602	struct xfs_bmbt_irec	got;
1603	struct xfs_ifork	*ifp;
1604	xfs_fileoff_t		off;
1605	int			error;
1606
1607	trace_xrep_inode_blockdir_size(sc);
1608
1609	error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK);
1610	if (error)
1611		return;
1612
1613	/* Find the last block before 32G; this is the dir size. */
1614	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1615	off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE);
1616	if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) {
1617		/* zero-extents directory? */
1618		return;
1619	}
1620
1621	off = got.br_startoff + got.br_blockcount;
1622	sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE,
1623			XFS_FSB_TO_B(sc->mp, off));
1624}
1625
1626/* Fix size problems with short format directories. */
1627STATIC void
1628xrep_inode_sfdir_size(
1629	struct xfs_scrub	*sc)
1630{
1631	struct xfs_ifork	*ifp;
1632
1633	trace_xrep_inode_sfdir_size(sc);
1634
1635	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1636	sc->ip->i_disk_size = ifp->if_bytes;
1637}
1638
1639/*
1640 * Fix any irregularities in a directory inode's size now that we can iterate
1641 * extent maps and access other regular inode data.
1642 */
1643STATIC void
1644xrep_inode_dir_size(
1645	struct xfs_scrub	*sc)
1646{
1647	trace_xrep_inode_dir_size(sc);
1648
1649	switch (sc->ip->i_df.if_format) {
1650	case XFS_DINODE_FMT_EXTENTS:
1651	case XFS_DINODE_FMT_BTREE:
1652		xrep_inode_blockdir_size(sc);
1653		break;
1654	case XFS_DINODE_FMT_LOCAL:
1655		xrep_inode_sfdir_size(sc);
1656		break;
1657	}
1658}
1659
1660/* Fix extent size hint problems. */
1661STATIC void
1662xrep_inode_extsize(
1663	struct xfs_scrub	*sc)
1664{
1665	/* Fix misaligned extent size hints on a directory. */
1666	if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1667	    (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
1668	    xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) {
1669		sc->ip->i_extsize = 0;
1670		sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT;
1671	}
1672}
1673
1674/* Fix any irregularities in an inode that the verifiers don't catch. */
1675STATIC int
1676xrep_inode_problems(
1677	struct xfs_scrub	*sc)
1678{
1679	int			error;
1680
1681	error = xrep_inode_blockcounts(sc);
1682	if (error)
1683		return error;
1684	xrep_inode_timestamps(sc->ip);
1685	xrep_inode_flags(sc);
1686	xrep_inode_ids(sc);
1687	/*
1688	 * We can now do a better job fixing the size of a directory now that
1689	 * we can scan the data fork extents than we could in xrep_dinode_size.
1690	 */
1691	if (S_ISDIR(VFS_I(sc->ip)->i_mode))
1692		xrep_inode_dir_size(sc);
1693	xrep_inode_extsize(sc);
1694
1695	trace_xrep_inode_fixed(sc);
1696	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
1697	return xrep_roll_trans(sc);
1698}
1699
1700/* Repair an inode's fields. */
1701int
1702xrep_inode(
1703	struct xfs_scrub	*sc)
1704{
1705	int			error = 0;
1706
1707	/*
1708	 * No inode?  That means we failed the _iget verifiers.  Repair all
1709	 * the things that the inode verifiers care about, then retry _iget.
1710	 */
1711	if (!sc->ip) {
1712		struct xrep_inode	*ri = sc->buf;
1713
1714		ASSERT(ri != NULL);
1715
1716		error = xrep_dinode_problems(ri);
1717		if (error == -EBUSY) {
1718			/*
1719			 * Directory scan to recover inode mode encountered a
1720			 * busy inode, so we did not continue repairing things.
1721			 */
1722			return 0;
1723		}
1724		if (error)
1725			return error;
1726
1727		/* By this point we had better have a working incore inode. */
1728		if (!sc->ip)
1729			return -EFSCORRUPTED;
1730	}
1731
1732	xfs_trans_ijoin(sc->tp, sc->ip, 0);
1733
1734	/* If we found corruption of any kind, try to fix it. */
1735	if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
1736	    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
1737		error = xrep_inode_problems(sc);
1738		if (error)
1739			return error;
1740	}
1741
1742	/* See if we can clear the reflink flag. */
1743	if (xfs_is_reflink_inode(sc->ip)) {
1744		error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
1745		if (error)
1746			return error;
1747	}
1748
1749	return xrep_defer_finish(sc);
1750}

   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
   4 * Author: Darrick J. Wong <djwong@kernel.org>
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_trans_resv.h"
  11#include "xfs_mount.h"
  12#include "xfs_defer.h"
  13#include "xfs_btree.h"
  14#include "xfs_bit.h"
  15#include "xfs_log_format.h"
  16#include "xfs_trans.h"
  17#include "xfs_sb.h"
  18#include "xfs_inode.h"
  19#include "xfs_icache.h"
  20#include "xfs_inode_buf.h"
  21#include "xfs_inode_fork.h"
  22#include "xfs_ialloc.h"
  23#include "xfs_da_format.h"
  24#include "xfs_reflink.h"
  25#include "xfs_alloc.h"
  26#include "xfs_rmap.h"
  27#include "xfs_rmap_btree.h"
  28#include "xfs_bmap.h"
  29#include "xfs_bmap_btree.h"
  30#include "xfs_bmap_util.h"
  31#include "xfs_dir2.h"
  32#include "xfs_dir2_priv.h"
  33#include "xfs_quota_defs.h"
  34#include "xfs_quota.h"
  35#include "xfs_ag.h"
  36#include "xfs_rtbitmap.h"
  37#include "xfs_attr_leaf.h"
  38#include "xfs_log_priv.h"
  39#include "xfs_health.h"
 
  40#include "scrub/xfs_scrub.h"
  41#include "scrub/scrub.h"
  42#include "scrub/common.h"
  43#include "scrub/btree.h"
  44#include "scrub/trace.h"
  45#include "scrub/repair.h"
 
 
  46
  47/*
  48 * Inode Record Repair
  49 * ===================
  50 *
  51 * Roughly speaking, inode problems can be classified based on whether or not
  52 * they trip the dinode verifiers.  If those trip, then we won't be able to
  53 * xfs_iget ourselves the inode.
  54 *
  55 * Therefore, the xrep_dinode_* functions fix anything that will cause the
  56 * inode buffer verifier or the dinode verifier.  The xrep_inode_* functions
  57 * fix things on live incore inodes.  The inode repair functions make decisions
  58 * with security and usability implications when reviving a file:
  59 *
  60 * - Files with zero di_mode or a garbage di_mode are converted to regular file
  61 *   that only root can read.  This file may not actually contain user data,
  62 *   if the file was not previously a regular file.  Setuid and setgid bits
  63 *   are cleared.
  64 *
  65 * - Zero-size directories can be truncated to look empty.  It is necessary to
  66 *   run the bmapbtd and directory repair functions to fully rebuild the
  67 *   directory.
  68 *
  69 * - Zero-size symbolic link targets can be truncated to '?'.  It is necessary
  70 *   to run the bmapbtd and symlink repair functions to salvage the symlink.
  71 *
  72 * - Invalid extent size hints will be removed.
  73 *
  74 * - Quotacheck will be scheduled if we repaired an inode that was so badly
  75 *   damaged that the ondisk inode had to be rebuilt.
  76 *
  77 * - Invalid user, group, or project IDs (aka -1U) will be reset to zero.
  78 *   Setuid and setgid bits are cleared.
  79 *
  80 * - Data and attr forks are reset to extents format with zero extents if the
  81 *   fork data is inconsistent.  It is necessary to run the bmapbtd or bmapbta
  82 *   repair functions to recover the space mapping.
  83 *
  84 * - ACLs will not be recovered if the attr fork is zapped or the extended
  85 *   attribute structure itself requires salvaging.
  86 *
  87 * - If the attr fork is zapped, the user and group ids are reset to root and
  88 *   the setuid and setgid bits are removed.
  89 */
  90
  91/*
  92 * All the information we need to repair the ondisk inode if we can't iget the
  93 * incore inode.  We don't allocate this buffer unless we're going to perform
  94 * a repair to the ondisk inode cluster buffer.
  95 */
  96struct xrep_inode {
  97	/* Inode mapping that we saved from the initial lookup attempt. */
  98	struct xfs_imap		imap;
  99
 100	struct xfs_scrub	*sc;
 101
 102	/* Blocks in use on the data device by data extents or bmbt blocks. */
 103	xfs_rfsblock_t		data_blocks;
 104
 105	/* Blocks in use on the rt device. */
 106	xfs_rfsblock_t		rt_blocks;
 107
 108	/* Blocks in use by the attr fork. */
 109	xfs_rfsblock_t		attr_blocks;
 110
 111	/* Number of data device extents for the data fork. */
 112	xfs_extnum_t		data_extents;
 113
 114	/*
 115	 * Number of realtime device extents for the data fork.  If
 116	 * data_extents and rt_extents indicate that the data fork has extents
 117	 * on both devices, we'll just back away slowly.
 118	 */
 119	xfs_extnum_t		rt_extents;
 120
 121	/* Number of (data device) extents for the attr fork. */
 122	xfs_aextnum_t		attr_extents;
 123
 124	/* Sick state to set after zapping parts of the inode. */
 125	unsigned int		ino_sick_mask;
 126
 127	/* Must we remove all access from this file? */
 128	bool			zap_acls;
 
 
 
 
 129};
 130
 131/*
 132 * Setup function for inode repair.  @imap contains the ondisk inode mapping
 133 * information so that we can correct the ondisk inode cluster buffer if
 134 * necessary to make iget work.
 135 */
 136int
 137xrep_setup_inode(
 138	struct xfs_scrub	*sc,
 139	const struct xfs_imap	*imap)
 140{
 141	struct xrep_inode	*ri;
 142
 143	sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS);
 144	if (!sc->buf)
 145		return -ENOMEM;
 146
 147	ri = sc->buf;
 148	memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
 149	ri->sc = sc;
 150	return 0;
 151}
 152
 153/*
 154 * Make sure this ondisk inode can pass the inode buffer verifier.  This is
 155 * not the same as the dinode verifier.
 156 */
 157STATIC void
 158xrep_dinode_buf_core(
 159	struct xfs_scrub	*sc,
 160	struct xfs_buf		*bp,
 161	unsigned int		ioffset)
 162{
 163	struct xfs_dinode	*dip = xfs_buf_offset(bp, ioffset);
 164	struct xfs_trans	*tp = sc->tp;
 165	struct xfs_mount	*mp = sc->mp;
 166	xfs_agino_t		agino;
 167	bool			crc_ok = false;
 168	bool			magic_ok = false;
 169	bool			unlinked_ok = false;
 170
 171	agino = be32_to_cpu(dip->di_next_unlinked);
 172
 173	if (xfs_verify_agino_or_null(bp->b_pag, agino))
 174		unlinked_ok = true;
 175
 176	if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
 177	    xfs_dinode_good_version(mp, dip->di_version))
 178		magic_ok = true;
 179
 180	if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
 181			XFS_DINODE_CRC_OFF))
 182		crc_ok = true;
 183
 184	if (magic_ok && unlinked_ok && crc_ok)
 185		return;
 186
 187	if (!magic_ok) {
 188		dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 189		dip->di_version = 3;
 190	}
 191	if (!unlinked_ok)
 192		dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
 193	xfs_dinode_calc_crc(mp, dip);
 194	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 195	xfs_trans_log_buf(tp, bp, ioffset,
 196				  ioffset + sizeof(struct xfs_dinode) - 1);
 197}
 198
 199/* Make sure this inode cluster buffer can pass the inode buffer verifier. */
 200STATIC void
 201xrep_dinode_buf(
 202	struct xfs_scrub	*sc,
 203	struct xfs_buf		*bp)
 204{
 205	struct xfs_mount	*mp = sc->mp;
 206	int			i;
 207	int			ni;
 208
 209	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
 210	for (i = 0; i < ni; i++)
 211		xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog);
 212}
 213
 214/* Reinitialize things that never change in an inode. */
 215STATIC void
 216xrep_dinode_header(
 217	struct xfs_scrub	*sc,
 218	struct xfs_dinode	*dip)
 219{
 220	trace_xrep_dinode_header(sc, dip);
 221
 222	dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 223	if (!xfs_dinode_good_version(sc->mp, dip->di_version))
 224		dip->di_version = 3;
 225	dip->di_ino = cpu_to_be64(sc->sm->sm_ino);
 226	uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid);
 227	dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
 228}
 229
 230/* Turn di_mode into /something/ recognizable. */
 231STATIC void
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 232xrep_dinode_mode(
 233	struct xrep_inode	*ri,
 234	struct xfs_dinode	*dip)
 235{
 236	struct xfs_scrub	*sc = ri->sc;
 237	uint16_t		mode = be16_to_cpu(dip->di_mode);
 
 238
 239	trace_xrep_dinode_mode(sc, dip);
 240
 241	if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
 242		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 243
 244	/* bad mode, so we set it to a file that only root can read */
 245	mode = S_IFREG;
 246	dip->di_mode = cpu_to_be16(mode);
 247	dip->di_uid = 0;
 248	dip->di_gid = 0;
 249	ri->zap_acls = true;
 
 250}
 251
 252/* Fix any conflicting flags that the verifiers complain about. */
 253STATIC void
 254xrep_dinode_flags(
 255	struct xfs_scrub	*sc,
 256	struct xfs_dinode	*dip,
 257	bool			isrt)
 258{
 259	struct xfs_mount	*mp = sc->mp;
 260	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
 261	uint16_t		flags = be16_to_cpu(dip->di_flags);
 262	uint16_t		mode = be16_to_cpu(dip->di_mode);
 263
 264	trace_xrep_dinode_flags(sc, dip);
 265
 266	if (isrt)
 267		flags |= XFS_DIFLAG_REALTIME;
 268	else
 269		flags &= ~XFS_DIFLAG_REALTIME;
 270
 271	/*
 272	 * For regular files on a reflink filesystem, set the REFLINK flag to
 273	 * protect shared extents.  A later stage will actually check those
 274	 * extents and clear the flag if possible.
 275	 */
 276	if (xfs_has_reflink(mp) && S_ISREG(mode))
 277		flags2 |= XFS_DIFLAG2_REFLINK;
 278	else
 279		flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
 280	if (flags & XFS_DIFLAG_REALTIME)
 281		flags2 &= ~XFS_DIFLAG2_REFLINK;
 282	if (!xfs_has_bigtime(mp))
 283		flags2 &= ~XFS_DIFLAG2_BIGTIME;
 284	if (!xfs_has_large_extent_counts(mp))
 285		flags2 &= ~XFS_DIFLAG2_NREXT64;
 286	if (flags2 & XFS_DIFLAG2_NREXT64)
 287		dip->di_nrext64_pad = 0;
 288	else if (dip->di_version >= 3)
 289		dip->di_v3_pad = 0;
 290	dip->di_flags = cpu_to_be16(flags);
 291	dip->di_flags2 = cpu_to_be64(flags2);
 292}
 293
 294/*
 295 * Blow out symlink; now it points nowhere.  We don't have to worry about
 296 * incore state because this inode is failing the verifiers.
 297 */
 298STATIC void
 299xrep_dinode_zap_symlink(
 300	struct xrep_inode	*ri,
 301	struct xfs_dinode	*dip)
 302{
 303	struct xfs_scrub	*sc = ri->sc;
 304	char			*p;
 305
 306	trace_xrep_dinode_zap_symlink(sc, dip);
 307
 308	dip->di_format = XFS_DINODE_FMT_LOCAL;
 309	dip->di_size = cpu_to_be64(1);
 310	p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 311	*p = '?';
 312	ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
 313}
 314
 315/*
 316 * Blow out dir, make the parent point to the root.  In the future repair will
 317 * reconstruct this directory for us.  Note that there's no in-core directory
 318 * inode because the sf verifier tripped, so we don't have to worry about the
 319 * dentry cache.
 320 */
 321STATIC void
 322xrep_dinode_zap_dir(
 323	struct xrep_inode	*ri,
 324	struct xfs_dinode	*dip)
 325{
 326	struct xfs_scrub	*sc = ri->sc;
 327	struct xfs_mount	*mp = sc->mp;
 328	struct xfs_dir2_sf_hdr	*sfp;
 329	int			i8count;
 330
 331	trace_xrep_dinode_zap_dir(sc, dip);
 332
 333	dip->di_format = XFS_DINODE_FMT_LOCAL;
 334	i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
 335	sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 336	sfp->count = 0;
 337	sfp->i8count = i8count;
 338	xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
 339	dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));
 340	ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED;
 341}
 342
 343/* Make sure we don't have a garbage file size. */
 344STATIC void
 345xrep_dinode_size(
 346	struct xrep_inode	*ri,
 347	struct xfs_dinode	*dip)
 348{
 349	struct xfs_scrub	*sc = ri->sc;
 350	uint64_t		size = be64_to_cpu(dip->di_size);
 351	uint16_t		mode = be16_to_cpu(dip->di_mode);
 352
 353	trace_xrep_dinode_size(sc, dip);
 354
 355	switch (mode & S_IFMT) {
 356	case S_IFIFO:
 357	case S_IFCHR:
 358	case S_IFBLK:
 359	case S_IFSOCK:
 360		/* di_size can't be nonzero for special files */
 361		dip->di_size = 0;
 362		break;
 363	case S_IFREG:
 364		/* Regular files can't be larger than 2^63-1 bytes. */
 365		dip->di_size = cpu_to_be64(size & ~(1ULL << 63));
 366		break;
 367	case S_IFLNK:
 368		/*
 369		 * Truncate ridiculously oversized symlinks.  If the size is
 370		 * zero, reset it to point to the current directory.  Both of
 371		 * these conditions trigger dinode verifier errors, so there
 372		 * is no in-core state to reset.
 373		 */
 374		if (size > XFS_SYMLINK_MAXLEN)
 375			dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN);
 376		else if (size == 0)
 377			xrep_dinode_zap_symlink(ri, dip);
 378		break;
 379	case S_IFDIR:
 380		/*
 381		 * Directories can't have a size larger than 32G.  If the size
 382		 * is zero, reset it to an empty directory.  Both of these
 383		 * conditions trigger dinode verifier errors, so there is no
 384		 * in-core state to reset.
 385		 */
 386		if (size > XFS_DIR2_SPACE_SIZE)
 387			dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE);
 388		else if (size == 0)
 389			xrep_dinode_zap_dir(ri, dip);
 390		break;
 391	}
 392}
 393
 394/* Fix extent size hints. */
 395STATIC void
 396xrep_dinode_extsize_hints(
 397	struct xfs_scrub	*sc,
 398	struct xfs_dinode	*dip)
 399{
 400	struct xfs_mount	*mp = sc->mp;
 401	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
 402	uint16_t		flags = be16_to_cpu(dip->di_flags);
 403	uint16_t		mode = be16_to_cpu(dip->di_mode);
 404
 405	xfs_failaddr_t		fa;
 406
 407	trace_xrep_dinode_extsize_hints(sc, dip);
 408
 409	fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
 410			mode, flags);
 411	if (fa) {
 412		dip->di_extsize = 0;
 413		dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE |
 414					      XFS_DIFLAG_EXTSZINHERIT);
 415	}
 416
 417	if (dip->di_version < 3)
 418		return;
 419
 420	fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
 421			mode, flags, flags2);
 422	if (fa) {
 423		dip->di_cowextsize = 0;
 424		dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
 425	}
 426}
 427
 428/* Count extents and blocks for an inode given an rmap. */
 429STATIC int
 430xrep_dinode_walk_rmap(
 431	struct xfs_btree_cur		*cur,
 432	const struct xfs_rmap_irec	*rec,
 433	void				*priv)
 434{
 435	struct xrep_inode		*ri = priv;
 436	int				error = 0;
 437
 438	if (xchk_should_terminate(ri->sc, &error))
 439		return error;
 440
 441	/* We only care about this inode. */
 442	if (rec->rm_owner != ri->sc->sm->sm_ino)
 443		return 0;
 444
 445	if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
 446		ri->attr_blocks += rec->rm_blockcount;
 447		if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
 448			ri->attr_extents++;
 449
 450		return 0;
 451	}
 452
 453	ri->data_blocks += rec->rm_blockcount;
 454	if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
 455		ri->data_extents++;
 456
 457	return 0;
 458}
 459
 460/* Count extents and blocks for an inode from all AG rmap data. */
 461STATIC int
 462xrep_dinode_count_ag_rmaps(
 463	struct xrep_inode	*ri,
 464	struct xfs_perag	*pag)
 465{
 466	struct xfs_btree_cur	*cur;
 467	struct xfs_buf		*agf;
 468	int			error;
 469
 470	error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf);
 471	if (error)
 472		return error;
 473
 474	cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag);
 475	error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri);
 476	xfs_btree_del_cursor(cur, error);
 477	xfs_trans_brelse(ri->sc->tp, agf);
 478	return error;
 479}
 480
 481/* Count extents and blocks for a given inode from all rmap data. */
 482STATIC int
 483xrep_dinode_count_rmaps(
 484	struct xrep_inode	*ri)
 485{
 486	struct xfs_perag	*pag;
 487	xfs_agnumber_t		agno;
 488	int			error;
 489
 490	if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp))
 491		return -EOPNOTSUPP;
 492
 493	for_each_perag(ri->sc->mp, agno, pag) {
 494		error = xrep_dinode_count_ag_rmaps(ri, pag);
 495		if (error) {
 496			xfs_perag_rele(pag);
 497			return error;
 498		}
 499	}
 500
 501	/* Can't have extents on both the rt and the data device. */
 502	if (ri->data_extents && ri->rt_extents)
 503		return -EFSCORRUPTED;
 504
 505	trace_xrep_dinode_count_rmaps(ri->sc,
 506			ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
 507			ri->data_extents, ri->rt_extents, ri->attr_extents);
 508	return 0;
 509}
 510
 511/* Return true if this extents-format ifork looks like garbage. */
 512STATIC bool
 513xrep_dinode_bad_extents_fork(
 514	struct xfs_scrub	*sc,
 515	struct xfs_dinode	*dip,
 516	unsigned int		dfork_size,
 517	int			whichfork)
 518{
 519	struct xfs_bmbt_irec	new;
 520	struct xfs_bmbt_rec	*dp;
 521	xfs_extnum_t		nex;
 522	bool			isrt;
 523	unsigned int		i;
 524
 525	nex = xfs_dfork_nextents(dip, whichfork);
 526	if (nex > dfork_size / sizeof(struct xfs_bmbt_rec))
 527		return true;
 528
 529	dp = XFS_DFORK_PTR(dip, whichfork);
 530
 531	isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
 532	for (i = 0; i < nex; i++, dp++) {
 533		xfs_failaddr_t	fa;
 534
 535		xfs_bmbt_disk_get_all(dp, &new);
 536		fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork,
 537				&new);
 538		if (fa)
 539			return true;
 540	}
 541
 542	return false;
 543}
 544
 545/* Return true if this btree-format ifork looks like garbage. */
 546STATIC bool
 547xrep_dinode_bad_bmbt_fork(
 548	struct xfs_scrub	*sc,
 549	struct xfs_dinode	*dip,
 550	unsigned int		dfork_size,
 551	int			whichfork)
 552{
 553	struct xfs_bmdr_block	*dfp;
 554	xfs_extnum_t		nex;
 555	unsigned int		i;
 556	unsigned int		dmxr;
 557	unsigned int		nrecs;
 558	unsigned int		level;
 559
 560	nex = xfs_dfork_nextents(dip, whichfork);
 561	if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec))
 562		return true;
 563
 564	if (dfork_size < sizeof(struct xfs_bmdr_block))
 565		return true;
 566
 567	dfp = XFS_DFORK_PTR(dip, whichfork);
 568	nrecs = be16_to_cpu(dfp->bb_numrecs);
 569	level = be16_to_cpu(dfp->bb_level);
 570
 571	if (nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > dfork_size)
 572		return true;
 573	if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork))
 574		return true;
 575
 576	dmxr = xfs_bmdr_maxrecs(dfork_size, 0);
 577	for (i = 1; i <= nrecs; i++) {
 578		struct xfs_bmbt_key	*fkp;
 579		xfs_bmbt_ptr_t		*fpp;
 580		xfs_fileoff_t		fileoff;
 581		xfs_fsblock_t		fsbno;
 582
 583		fkp = XFS_BMDR_KEY_ADDR(dfp, i);
 584		fileoff = be64_to_cpu(fkp->br_startoff);
 585		if (!xfs_verify_fileoff(sc->mp, fileoff))
 586			return true;
 587
 588		fpp = XFS_BMDR_PTR_ADDR(dfp, i, dmxr);
 589		fsbno = be64_to_cpu(*fpp);
 590		if (!xfs_verify_fsbno(sc->mp, fsbno))
 591			return true;
 592	}
 593
 594	return false;
 595}
 596
 597/*
 598 * Check the data fork for things that will fail the ifork verifiers or the
 599 * ifork formatters.
 600 */
 601STATIC bool
 602xrep_dinode_check_dfork(
 603	struct xfs_scrub	*sc,
 604	struct xfs_dinode	*dip,
 605	uint16_t		mode)
 606{
 607	void			*dfork_ptr;
 608	int64_t			data_size;
 609	unsigned int		fmt;
 610	unsigned int		dfork_size;
 611
 612	/*
 613	 * Verifier functions take signed int64_t, so check for bogus negative
 614	 * values first.
 615	 */
 616	data_size = be64_to_cpu(dip->di_size);
 617	if (data_size < 0)
 618		return true;
 619
 620	fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK);
 621	switch (mode & S_IFMT) {
 622	case S_IFIFO:
 623	case S_IFCHR:
 624	case S_IFBLK:
 625	case S_IFSOCK:
 626		if (fmt != XFS_DINODE_FMT_DEV)
 627			return true;
 628		break;
 629	case S_IFREG:
 630		if (fmt == XFS_DINODE_FMT_LOCAL)
 631			return true;
 632		fallthrough;
 633	case S_IFLNK:
 634	case S_IFDIR:
 635		switch (fmt) {
 636		case XFS_DINODE_FMT_LOCAL:
 637		case XFS_DINODE_FMT_EXTENTS:
 638		case XFS_DINODE_FMT_BTREE:
 639			break;
 640		default:
 641			return true;
 642		}
 643		break;
 644	default:
 645		return true;
 646	}
 647
 648	dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
 649	dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 650
 651	switch (fmt) {
 652	case XFS_DINODE_FMT_DEV:
 653		break;
 654	case XFS_DINODE_FMT_LOCAL:
 655		/* dir/symlink structure cannot be larger than the fork */
 656		if (data_size > dfork_size)
 657			return true;
 658		/* directory structure must pass verification. */
 659		if (S_ISDIR(mode) &&
 660		    xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL)
 661			return true;
 662		/* symlink structure must pass verification. */
 663		if (S_ISLNK(mode) &&
 664		    xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL)
 665			return true;
 666		break;
 667	case XFS_DINODE_FMT_EXTENTS:
 668		if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size,
 669				XFS_DATA_FORK))
 670			return true;
 671		break;
 672	case XFS_DINODE_FMT_BTREE:
 673		if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size,
 674				XFS_DATA_FORK))
 675			return true;
 676		break;
 677	default:
 678		return true;
 679	}
 680
 681	return false;
 682}
 683
 684static void
 685xrep_dinode_set_data_nextents(
 686	struct xfs_dinode	*dip,
 687	xfs_extnum_t		nextents)
 688{
 689	if (xfs_dinode_has_large_extent_counts(dip))
 690		dip->di_big_nextents = cpu_to_be64(nextents);
 691	else
 692		dip->di_nextents = cpu_to_be32(nextents);
 693}
 694
 695static void
 696xrep_dinode_set_attr_nextents(
 697	struct xfs_dinode	*dip,
 698	xfs_extnum_t		nextents)
 699{
 700	if (xfs_dinode_has_large_extent_counts(dip))
 701		dip->di_big_anextents = cpu_to_be32(nextents);
 702	else
 703		dip->di_anextents = cpu_to_be16(nextents);
 704}
 705
 706/* Reset the data fork to something sane. */
 707STATIC void
 708xrep_dinode_zap_dfork(
 709	struct xrep_inode	*ri,
 710	struct xfs_dinode	*dip,
 711	uint16_t		mode)
 712{
 713	struct xfs_scrub	*sc = ri->sc;
 714
 715	trace_xrep_dinode_zap_dfork(sc, dip);
 716
 717	ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED;
 718
 719	xrep_dinode_set_data_nextents(dip, 0);
 720	ri->data_blocks = 0;
 721	ri->rt_blocks = 0;
 722
 723	/* Special files always get reset to DEV */
 724	switch (mode & S_IFMT) {
 725	case S_IFIFO:
 726	case S_IFCHR:
 727	case S_IFBLK:
 728	case S_IFSOCK:
 729		dip->di_format = XFS_DINODE_FMT_DEV;
 730		dip->di_size = 0;
 731		return;
 732	}
 733
 734	/*
 735	 * If we have data extents, reset to an empty map and hope the user
 736	 * will run the bmapbtd checker next.
 737	 */
 738	if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
 739		dip->di_format = XFS_DINODE_FMT_EXTENTS;
 740		return;
 741	}
 742
 743	/* Otherwise, reset the local format to the minimum. */
 744	switch (mode & S_IFMT) {
 745	case S_IFLNK:
 746		xrep_dinode_zap_symlink(ri, dip);
 747		break;
 748	case S_IFDIR:
 749		xrep_dinode_zap_dir(ri, dip);
 750		break;
 751	}
 752}
 753
 754/*
 755 * Check the attr fork for things that will fail the ifork verifiers or the
 756 * ifork formatters.
 757 */
 758STATIC bool
 759xrep_dinode_check_afork(
 760	struct xfs_scrub		*sc,
 761	struct xfs_dinode		*dip)
 762{
 763	struct xfs_attr_sf_hdr		*afork_ptr;
 764	size_t				attr_size;
 765	unsigned int			afork_size;
 766
 767	if (XFS_DFORK_BOFF(dip) == 0)
 768		return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
 769		       xfs_dfork_attr_extents(dip) != 0;
 770
 771	afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
 772	afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
 773
 774	switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
 775	case XFS_DINODE_FMT_LOCAL:
 776		/* Fork has to be large enough to extract the xattr size. */
 777		if (afork_size < sizeof(struct xfs_attr_sf_hdr))
 778			return true;
 779
 780		/* xattr structure cannot be larger than the fork */
 781		attr_size = be16_to_cpu(afork_ptr->totsize);
 782		if (attr_size > afork_size)
 783			return true;
 784
 785		/* xattr structure must pass verification. */
 786		return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL;
 787	case XFS_DINODE_FMT_EXTENTS:
 788		if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
 789					XFS_ATTR_FORK))
 790			return true;
 791		break;
 792	case XFS_DINODE_FMT_BTREE:
 793		if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
 794					XFS_ATTR_FORK))
 795			return true;
 796		break;
 797	default:
 798		return true;
 799	}
 800
 801	return false;
 802}
 803
 804/*
 805 * Reset the attr fork to empty.  Since the attr fork could have contained
 806 * ACLs, make the file readable only by root.
 807 */
 808STATIC void
 809xrep_dinode_zap_afork(
 810	struct xrep_inode	*ri,
 811	struct xfs_dinode	*dip,
 812	uint16_t		mode)
 813{
 814	struct xfs_scrub	*sc = ri->sc;
 815
 816	trace_xrep_dinode_zap_afork(sc, dip);
 817
 818	ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED;
 819
 820	dip->di_aformat = XFS_DINODE_FMT_EXTENTS;
 821	xrep_dinode_set_attr_nextents(dip, 0);
 822	ri->attr_blocks = 0;
 823
 824	/*
 825	 * If the data fork is in btree format, removing the attr fork entirely
 826	 * might cause verifier failures if the next level down in the bmbt
 827	 * could now fit in the data fork area.
 828	 */
 829	if (dip->di_format != XFS_DINODE_FMT_BTREE)
 830		dip->di_forkoff = 0;
 831	dip->di_mode = cpu_to_be16(mode & ~0777);
 832	dip->di_uid = 0;
 833	dip->di_gid = 0;
 834}
 835
 836/* Make sure the fork offset is a sensible value. */
 837STATIC void
 838xrep_dinode_ensure_forkoff(
 839	struct xrep_inode	*ri,
 840	struct xfs_dinode	*dip,
 841	uint16_t		mode)
 842{
 843	struct xfs_bmdr_block	*bmdr;
 844	struct xfs_scrub	*sc = ri->sc;
 845	xfs_extnum_t		attr_extents, data_extents;
 846	size_t			bmdr_minsz = XFS_BMDR_SPACE_CALC(1);
 847	unsigned int		lit_sz = XFS_LITINO(sc->mp);
 848	unsigned int		afork_min, dfork_min;
 849
 850	trace_xrep_dinode_ensure_forkoff(sc, dip);
 851
 852	/*
 853	 * Before calling this function, xrep_dinode_core ensured that both
 854	 * forks actually fit inside their respective literal areas.  If this
 855	 * was not the case, the fork was reset to FMT_EXTENTS with zero
 856	 * records.  If the rmapbt scan found attr or data fork blocks, this
 857	 * will be noted in the dinode_stats, and we must leave enough room
 858	 * for the bmap repair code to reconstruct the mapping structure.
 859	 *
 860	 * First, compute the minimum space required for the attr fork.
 861	 */
 862	switch (dip->di_aformat) {
 863	case XFS_DINODE_FMT_LOCAL:
 864		/*
 865		 * If we still have a shortform xattr structure at all, that
 866		 * means the attr fork area was exactly large enough to fit
 867		 * the sf structure.
 868		 */
 869		afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
 870		break;
 871	case XFS_DINODE_FMT_EXTENTS:
 872		attr_extents = xfs_dfork_attr_extents(dip);
 873		if (attr_extents) {
 874			/*
 875			 * We must maintain sufficient space to hold the entire
 876			 * extent map array in the data fork.  Note that we
 877			 * previously zapped the fork if it had no chance of
 878			 * fitting in the inode.
 879			 */
 880			afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
 881		} else if (ri->attr_extents > 0) {
 882			/*
 883			 * The attr fork thinks it has zero extents, but we
 884			 * found some xattr extents.  We need to leave enough
 885			 * empty space here so that the incore attr fork will
 886			 * get created (and hence trigger the attr fork bmap
 887			 * repairer).
 888			 */
 889			afork_min = bmdr_minsz;
 890		} else {
 891			/* No extents on disk or found in rmapbt. */
 892			afork_min = 0;
 893		}
 894		break;
 895	case XFS_DINODE_FMT_BTREE:
 896		/* Must have space for btree header and key/pointers. */
 897		bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
 898		afork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr);
 899		break;
 900	default:
 901		/* We should never see any other formats. */
 902		afork_min = 0;
 903		break;
 904	}
 905
 906	/* Compute the minimum space required for the data fork. */
 907	switch (dip->di_format) {
 908	case XFS_DINODE_FMT_DEV:
 909		dfork_min = sizeof(__be32);
 910		break;
 911	case XFS_DINODE_FMT_UUID:
 912		dfork_min = sizeof(uuid_t);
 913		break;
 914	case XFS_DINODE_FMT_LOCAL:
 915		/*
 916		 * If we still have a shortform data fork at all, that means
 917		 * the data fork area was large enough to fit whatever was in
 918		 * there.
 919		 */
 920		dfork_min = be64_to_cpu(dip->di_size);
 921		break;
 922	case XFS_DINODE_FMT_EXTENTS:
 923		data_extents = xfs_dfork_data_extents(dip);
 924		if (data_extents) {
 925			/*
 926			 * We must maintain sufficient space to hold the entire
 927			 * extent map array in the data fork.  Note that we
 928			 * previously zapped the fork if it had no chance of
 929			 * fitting in the inode.
 930			 */
 931			dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
 932		} else if (ri->data_extents > 0 || ri->rt_extents > 0) {
 933			/*
 934			 * The data fork thinks it has zero extents, but we
 935			 * found some data extents.  We need to leave enough
 936			 * empty space here so that the data fork bmap repair
 937			 * will recover the mappings.
 938			 */
 939			dfork_min = bmdr_minsz;
 940		} else {
 941			/* No extents on disk or found in rmapbt. */
 942			dfork_min = 0;
 943		}
 944		break;
 945	case XFS_DINODE_FMT_BTREE:
 946		/* Must have space for btree header and key/pointers. */
 947		bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 948		dfork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr);
 949		break;
 950	default:
 951		dfork_min = 0;
 952		break;
 953	}
 954
 955	/*
 956	 * Round all values up to the nearest 8 bytes, because that is the
 957	 * precision of di_forkoff.
 958	 */
 959	afork_min = roundup(afork_min, 8);
 960	dfork_min = roundup(dfork_min, 8);
 961	bmdr_minsz = roundup(bmdr_minsz, 8);
 962
 963	ASSERT(dfork_min <= lit_sz);
 964	ASSERT(afork_min <= lit_sz);
 965
 966	/*
 967	 * If the data fork was zapped and we don't have enough space for the
 968	 * recovery fork, move the attr fork up.
 969	 */
 970	if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
 971	    xfs_dfork_data_extents(dip) == 0 &&
 972	    (ri->data_extents > 0 || ri->rt_extents > 0) &&
 973	    bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) {
 974		if (bmdr_minsz + afork_min > lit_sz) {
 975			/*
 976			 * The attr for and the stub fork we need to recover
 977			 * the data fork won't both fit.  Zap the attr fork.
 978			 */
 979			xrep_dinode_zap_afork(ri, dip, mode);
 980			afork_min = bmdr_minsz;
 981		} else {
 982			void	*before, *after;
 983
 984			/* Otherwise, just slide the attr fork up. */
 985			before = XFS_DFORK_APTR(dip);
 986			dip->di_forkoff = bmdr_minsz >> 3;
 987			after = XFS_DFORK_APTR(dip);
 988			memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
 989		}
 990	}
 991
 992	/*
 993	 * If the attr fork was zapped and we don't have enough space for the
 994	 * recovery fork, move the attr fork down.
 995	 */
 996	if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
 997	    xfs_dfork_attr_extents(dip) == 0 &&
 998	    ri->attr_extents > 0 &&
 999	    bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) {
1000		if (dip->di_format == XFS_DINODE_FMT_BTREE) {
1001			/*
1002			 * If the data fork is in btree format then we can't
1003			 * adjust forkoff because that runs the risk of
1004			 * violating the extents/btree format transition rules.
1005			 */
1006		} else if (bmdr_minsz + dfork_min > lit_sz) {
1007			/*
1008			 * If we can't move the attr fork, too bad, we lose the
1009			 * attr fork and leak its blocks.
1010			 */
1011			xrep_dinode_zap_afork(ri, dip, mode);
1012		} else {
1013			/*
1014			 * Otherwise, just slide the attr fork down.  The attr
1015			 * fork is empty, so we don't have any old contents to
1016			 * move here.
1017			 */
1018			dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
1019		}
1020	}
1021}
1022
1023/*
1024 * Zap the data/attr forks if we spot anything that isn't going to pass the
1025 * ifork verifiers or the ifork formatters, because we need to get the inode
1026 * into good enough shape that the higher level repair functions can run.
1027 */
1028STATIC void
1029xrep_dinode_zap_forks(
1030	struct xrep_inode	*ri,
1031	struct xfs_dinode	*dip)
1032{
1033	struct xfs_scrub	*sc = ri->sc;
1034	xfs_extnum_t		data_extents;
1035	xfs_extnum_t		attr_extents;
1036	xfs_filblks_t		nblocks;
1037	uint16_t		mode;
1038	bool			zap_datafork = false;
1039	bool			zap_attrfork = ri->zap_acls;
1040
1041	trace_xrep_dinode_zap_forks(sc, dip);
1042
1043	mode = be16_to_cpu(dip->di_mode);
1044
1045	data_extents = xfs_dfork_data_extents(dip);
1046	attr_extents = xfs_dfork_attr_extents(dip);
1047	nblocks = be64_to_cpu(dip->di_nblocks);
1048
1049	/* Inode counters don't make sense? */
1050	if (data_extents > nblocks)
1051		zap_datafork = true;
1052	if (attr_extents > nblocks)
1053		zap_attrfork = true;
1054	if (data_extents + attr_extents > nblocks)
1055		zap_datafork = zap_attrfork = true;
1056
1057	if (!zap_datafork)
1058		zap_datafork = xrep_dinode_check_dfork(sc, dip, mode);
1059	if (!zap_attrfork)
1060		zap_attrfork = xrep_dinode_check_afork(sc, dip);
1061
1062	/* Zap whatever's bad. */
1063	if (zap_attrfork)
1064		xrep_dinode_zap_afork(ri, dip, mode);
1065	if (zap_datafork)
1066		xrep_dinode_zap_dfork(ri, dip, mode);
1067	xrep_dinode_ensure_forkoff(ri, dip, mode);
1068
1069	/*
1070	 * Zero di_nblocks if we don't have any extents at all to satisfy the
1071	 * buffer verifier.
1072	 */
1073	data_extents = xfs_dfork_data_extents(dip);
1074	attr_extents = xfs_dfork_attr_extents(dip);
1075	if (data_extents + attr_extents == 0)
1076		dip->di_nblocks = 0;
1077}
1078
1079/* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */
1080STATIC int
1081xrep_dinode_core(
1082	struct xrep_inode	*ri)
1083{
1084	struct xfs_scrub	*sc = ri->sc;
1085	struct xfs_buf		*bp;
1086	struct xfs_dinode	*dip;
1087	xfs_ino_t		ino = sc->sm->sm_ino;
1088	int			error;
1089	int			iget_error;
1090
1091	/* Figure out what this inode had mapped in both forks. */
1092	error = xrep_dinode_count_rmaps(ri);
1093	if (error)
1094		return error;
1095
1096	/* Read the inode cluster buffer. */
1097	error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
1098			ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp,
1099			NULL);
1100	if (error)
1101		return error;
1102
1103	/* Make sure we can pass the inode buffer verifier. */
1104	xrep_dinode_buf(sc, bp);
1105	bp->b_ops = &xfs_inode_buf_ops;
1106
1107	/* Fix everything the verifier will complain about. */
1108	dip = xfs_buf_offset(bp, ri->imap.im_boffset);
1109	xrep_dinode_header(sc, dip);
1110	xrep_dinode_mode(ri, dip);
 
 
1111	xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
1112	xrep_dinode_size(ri, dip);
1113	xrep_dinode_extsize_hints(sc, dip);
1114	xrep_dinode_zap_forks(ri, dip);
1115
 
1116	/* Write out the inode. */
1117	trace_xrep_dinode_fixed(sc, dip);
1118	xfs_dinode_calc_crc(sc->mp, dip);
1119	xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
1120	xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset,
1121			ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1);
1122
1123	/*
1124	 * In theory, we've fixed the ondisk inode record enough that we should
1125	 * be able to load the inode into the cache.  Try to iget that inode
1126	 * now while we hold the AGI and the inode cluster buffer and take the
1127	 * IOLOCK so that we can continue with repairs without anyone else
1128	 * accessing the inode.  If iget fails, we still need to commit the
1129	 * changes.
1130	 */
1131	iget_error = xchk_iget(sc, ino, &sc->ip);
 
1132	if (!iget_error)
1133		xchk_ilock(sc, XFS_IOLOCK_EXCL);
1134
1135	/*
1136	 * Commit the inode cluster buffer updates and drop the AGI buffer that
1137	 * we've been holding since scrub setup.  From here on out, repairs
1138	 * deal only with the cached inode.
1139	 */
1140	error = xrep_trans_commit(sc);
1141	if (error)
1142		return error;
1143
1144	if (iget_error)
1145		return iget_error;
1146
1147	error = xchk_trans_alloc(sc, 0);
1148	if (error)
1149		return error;
1150
1151	error = xrep_ino_dqattach(sc);
1152	if (error)
1153		return error;
1154
1155	xchk_ilock(sc, XFS_ILOCK_EXCL);
1156	if (ri->ino_sick_mask)
1157		xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask);
1158	return 0;
1159}
1160
1161/* Fix everything xfs_dinode_verify cares about. */
1162STATIC int
1163xrep_dinode_problems(
1164	struct xrep_inode	*ri)
1165{
1166	struct xfs_scrub	*sc = ri->sc;
1167	int			error;
1168
1169	error = xrep_dinode_core(ri);
1170	if (error)
1171		return error;
1172
1173	/* We had to fix a totally busted inode, schedule quotacheck. */
1174	if (XFS_IS_UQUOTA_ON(sc->mp))
1175		xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1176	if (XFS_IS_GQUOTA_ON(sc->mp))
1177		xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1178	if (XFS_IS_PQUOTA_ON(sc->mp))
1179		xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1180
1181	return 0;
1182}
1183
1184/*
1185 * Fix problems that the verifiers don't care about.  In general these are
1186 * errors that don't cause problems elsewhere in the kernel that we can easily
1187 * detect, so we don't check them all that rigorously.
1188 */
1189
1190/* Make sure block and extent counts are ok. */
1191STATIC int
1192xrep_inode_blockcounts(
1193	struct xfs_scrub	*sc)
1194{
1195	struct xfs_ifork	*ifp;
1196	xfs_filblks_t		count;
1197	xfs_filblks_t		acount;
1198	xfs_extnum_t		nextents;
1199	int			error;
1200
1201	trace_xrep_inode_blockcounts(sc);
1202
1203	/* Set data fork counters from the data fork mappings. */
1204	error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
1205			&nextents, &count);
1206	if (error)
1207		return error;
1208	if (xfs_is_reflink_inode(sc->ip)) {
1209		/*
1210		 * data fork blockcount can exceed physical storage if a user
1211		 * reflinks the same block over and over again.
1212		 */
1213		;
1214	} else if (XFS_IS_REALTIME_INODE(sc->ip)) {
1215		if (count >= sc->mp->m_sb.sb_rblocks)
1216			return -EFSCORRUPTED;
1217	} else {
1218		if (count >= sc->mp->m_sb.sb_dblocks)
1219			return -EFSCORRUPTED;
1220	}
1221	error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents);
1222	if (error)
1223		return error;
1224	sc->ip->i_df.if_nextents = nextents;
1225
1226	/* Set attr fork counters from the attr fork mappings. */
1227	ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1228	if (ifp) {
1229		error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
1230				&nextents, &acount);
1231		if (error)
1232			return error;
1233		if (count >= sc->mp->m_sb.sb_dblocks)
1234			return -EFSCORRUPTED;
1235		error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
1236				nextents);
1237		if (error)
1238			return error;
1239		ifp->if_nextents = nextents;
1240	} else {
1241		acount = 0;
1242	}
1243
1244	sc->ip->i_nblocks = count + acount;
1245	return 0;
1246}
1247
1248/* Check for invalid uid/gid/prid. */
1249STATIC void
1250xrep_inode_ids(
1251	struct xfs_scrub	*sc)
1252{
1253	bool			dirty = false;
1254
1255	trace_xrep_inode_ids(sc);
1256
1257	if (!uid_valid(VFS_I(sc->ip)->i_uid)) {
1258		i_uid_write(VFS_I(sc->ip), 0);
1259		dirty = true;
1260		if (XFS_IS_UQUOTA_ON(sc->mp))
1261			xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1262	}
1263
1264	if (!gid_valid(VFS_I(sc->ip)->i_gid)) {
1265		i_gid_write(VFS_I(sc->ip), 0);
1266		dirty = true;
1267		if (XFS_IS_GQUOTA_ON(sc->mp))
1268			xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1269	}
1270
1271	if (sc->ip->i_projid == -1U) {
1272		sc->ip->i_projid = 0;
1273		dirty = true;
1274		if (XFS_IS_PQUOTA_ON(sc->mp))
1275			xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1276	}
1277
1278	/* strip setuid/setgid if we touched any of the ids */
1279	if (dirty)
1280		VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
1281}
1282
1283static inline void
1284xrep_clamp_timestamp(
1285	struct xfs_inode	*ip,
1286	struct timespec64	*ts)
1287{
1288	ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC);
1289	*ts = timestamp_truncate(*ts, VFS_I(ip));
1290}
1291
1292/* Nanosecond counters can't have more than 1 billion. */
1293STATIC void
1294xrep_inode_timestamps(
1295	struct xfs_inode	*ip)
1296{
1297	struct timespec64	tstamp;
1298	struct inode		*inode = VFS_I(ip);
1299
1300	tstamp = inode_get_atime(inode);
1301	xrep_clamp_timestamp(ip, &tstamp);
1302	inode_set_atime_to_ts(inode, tstamp);
1303
1304	tstamp = inode_get_mtime(inode);
1305	xrep_clamp_timestamp(ip, &tstamp);
1306	inode_set_mtime_to_ts(inode, tstamp);
1307
1308	tstamp = inode_get_ctime(inode);
1309	xrep_clamp_timestamp(ip, &tstamp);
1310	inode_set_ctime_to_ts(inode, tstamp);
1311
1312	xrep_clamp_timestamp(ip, &ip->i_crtime);
1313}
1314
1315/* Fix inode flags that don't make sense together. */
1316STATIC void
1317xrep_inode_flags(
1318	struct xfs_scrub	*sc)
1319{
1320	uint16_t		mode;
1321
1322	trace_xrep_inode_flags(sc);
1323
1324	mode = VFS_I(sc->ip)->i_mode;
1325
1326	/* Clear junk flags */
1327	if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY)
1328		sc->ip->i_diflags &= ~XFS_DIFLAG_ANY;
1329
1330	/* NEWRTBM only applies to realtime bitmaps */
1331	if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
1332		sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
1333	else
1334		sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
1335
1336	/* These only make sense for directories. */
1337	if (!S_ISDIR(mode))
1338		sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
1339					  XFS_DIFLAG_EXTSZINHERIT |
1340					  XFS_DIFLAG_PROJINHERIT |
1341					  XFS_DIFLAG_NOSYMLINKS);
1342
1343	/* These only make sense for files. */
1344	if (!S_ISREG(mode))
1345		sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
1346					  XFS_DIFLAG_EXTSIZE);
1347
1348	/* These only make sense for non-rt files. */
1349	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1350		sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
1351
1352	/* Immutable and append only?  Drop the append. */
1353	if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
1354	    (sc->ip->i_diflags & XFS_DIFLAG_APPEND))
1355		sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
1356
1357	/* Clear junk flags. */
1358	if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY)
1359		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY;
1360
1361	/* No reflink flag unless we support it and it's a file. */
1362	if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
1363		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1364
1365	/* DAX only applies to files and dirs. */
1366	if (!(S_ISREG(mode) || S_ISDIR(mode)))
1367		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
1368
1369	/* No reflink files on the realtime device. */
1370	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1371		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1372}
1373
1374/*
1375 * Fix size problems with block/node format directories.  If we fail to find
1376 * the extent list, just bail out and let the bmapbtd repair functions clean
1377 * up that mess.
1378 */
1379STATIC void
1380xrep_inode_blockdir_size(
1381	struct xfs_scrub	*sc)
1382{
1383	struct xfs_iext_cursor	icur;
1384	struct xfs_bmbt_irec	got;
1385	struct xfs_ifork	*ifp;
1386	xfs_fileoff_t		off;
1387	int			error;
1388
1389	trace_xrep_inode_blockdir_size(sc);
1390
1391	error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK);
1392	if (error)
1393		return;
1394
1395	/* Find the last block before 32G; this is the dir size. */
1396	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1397	off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE);
1398	if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) {
1399		/* zero-extents directory? */
1400		return;
1401	}
1402
1403	off = got.br_startoff + got.br_blockcount;
1404	sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE,
1405			XFS_FSB_TO_B(sc->mp, off));
1406}
1407
1408/* Fix size problems with short format directories. */
1409STATIC void
1410xrep_inode_sfdir_size(
1411	struct xfs_scrub	*sc)
1412{
1413	struct xfs_ifork	*ifp;
1414
1415	trace_xrep_inode_sfdir_size(sc);
1416
1417	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1418	sc->ip->i_disk_size = ifp->if_bytes;
1419}
1420
1421/*
1422 * Fix any irregularities in a directory inode's size now that we can iterate
1423 * extent maps and access other regular inode data.
1424 */
1425STATIC void
1426xrep_inode_dir_size(
1427	struct xfs_scrub	*sc)
1428{
1429	trace_xrep_inode_dir_size(sc);
1430
1431	switch (sc->ip->i_df.if_format) {
1432	case XFS_DINODE_FMT_EXTENTS:
1433	case XFS_DINODE_FMT_BTREE:
1434		xrep_inode_blockdir_size(sc);
1435		break;
1436	case XFS_DINODE_FMT_LOCAL:
1437		xrep_inode_sfdir_size(sc);
1438		break;
1439	}
1440}
1441
1442/* Fix extent size hint problems. */
1443STATIC void
1444xrep_inode_extsize(
1445	struct xfs_scrub	*sc)
1446{
1447	/* Fix misaligned extent size hints on a directory. */
1448	if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1449	    (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
1450	    xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) {
1451		sc->ip->i_extsize = 0;
1452		sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT;
1453	}
1454}
1455
1456/* Fix any irregularities in an inode that the verifiers don't catch. */
1457STATIC int
1458xrep_inode_problems(
1459	struct xfs_scrub	*sc)
1460{
1461	int			error;
1462
1463	error = xrep_inode_blockcounts(sc);
1464	if (error)
1465		return error;
1466	xrep_inode_timestamps(sc->ip);
1467	xrep_inode_flags(sc);
1468	xrep_inode_ids(sc);
1469	/*
1470	 * We can now do a better job fixing the size of a directory now that
1471	 * we can scan the data fork extents than we could in xrep_dinode_size.
1472	 */
1473	if (S_ISDIR(VFS_I(sc->ip)->i_mode))
1474		xrep_inode_dir_size(sc);
1475	xrep_inode_extsize(sc);
1476
1477	trace_xrep_inode_fixed(sc);
1478	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
1479	return xrep_roll_trans(sc);
1480}
1481
1482/* Repair an inode's fields. */
1483int
1484xrep_inode(
1485	struct xfs_scrub	*sc)
1486{
1487	int			error = 0;
1488
1489	/*
1490	 * No inode?  That means we failed the _iget verifiers.  Repair all
1491	 * the things that the inode verifiers care about, then retry _iget.
1492	 */
1493	if (!sc->ip) {
1494		struct xrep_inode	*ri = sc->buf;
1495
1496		ASSERT(ri != NULL);
1497
1498		error = xrep_dinode_problems(ri);
 
 
 
 
 
 
 
1499		if (error)
1500			return error;
1501
1502		/* By this point we had better have a working incore inode. */
1503		if (!sc->ip)
1504			return -EFSCORRUPTED;
1505	}
1506
1507	xfs_trans_ijoin(sc->tp, sc->ip, 0);
1508
1509	/* If we found corruption of any kind, try to fix it. */
1510	if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
1511	    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
1512		error = xrep_inode_problems(sc);
1513		if (error)
1514			return error;
1515	}
1516
1517	/* See if we can clear the reflink flag. */
1518	if (xfs_is_reflink_inode(sc->ip)) {
1519		error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
1520		if (error)
1521			return error;
1522	}
1523
1524	return xrep_defer_finish(sc);
1525}