Linux Audio

Check our new training course

Real-Time Linux with PREEMPT_RT training

Feb 18-20, 2025
Register
Loading...
v6.9.4
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_bit.h"
  13#include "xfs_sb.h"
  14#include "xfs_mount.h"
  15#include "xfs_defer.h"
  16#include "xfs_dir2.h"
  17#include "xfs_inode.h"
  18#include "xfs_btree.h"
  19#include "xfs_trans.h"
  20#include "xfs_alloc.h"
  21#include "xfs_bmap.h"
  22#include "xfs_bmap_util.h"
  23#include "xfs_bmap_btree.h"
  24#include "xfs_rtbitmap.h"
  25#include "xfs_errortag.h"
  26#include "xfs_error.h"
  27#include "xfs_quota.h"
  28#include "xfs_trans_space.h"
  29#include "xfs_buf_item.h"
  30#include "xfs_trace.h"
  31#include "xfs_attr_leaf.h"
  32#include "xfs_filestream.h"
  33#include "xfs_rmap.h"
  34#include "xfs_ag.h"
  35#include "xfs_ag_resv.h"
  36#include "xfs_refcount.h"
  37#include "xfs_icache.h"
  38#include "xfs_iomap.h"
  39#include "xfs_health.h"
  40#include "xfs_bmap_item.h"
  41#include "xfs_symlink_remote.h"
  42
  43struct kmem_cache		*xfs_bmap_intent_cache;
 
  44
  45/*
  46 * Miscellaneous helper functions
  47 */
  48
  49/*
  50 * Compute and fill in the value of the maximum depth of a bmap btree
  51 * in this filesystem.  Done once, during mount.
  52 */
  53void
  54xfs_bmap_compute_maxlevels(
  55	xfs_mount_t	*mp,		/* file system mount structure */
  56	int		whichfork)	/* data or attr fork */
  57{
  58	uint64_t	maxblocks;	/* max blocks at this level */
  59	xfs_extnum_t	maxleafents;	/* max leaf entries possible */
  60	int		level;		/* btree level */
 
 
  61	int		maxrootrecs;	/* max records in root block */
  62	int		minleafrecs;	/* min records in leaf block */
  63	int		minnoderecs;	/* min records in node block */
  64	int		sz;		/* root block size */
  65
  66	/*
  67	 * The maximum number of extents in a fork, hence the maximum number of
  68	 * leaf entries, is controlled by the size of the on-disk extent count.
 
 
  69	 *
  70	 * Note that we can no longer assume that if we are in ATTR1 that the
  71	 * fork offset of all the inodes will be
  72	 * (xfs_default_attroffset(ip) >> 3) because we could have mounted with
  73	 * ATTR2 and then mounted back with ATTR1, keeping the i_forkoff's fixed
  74	 * but probably at various positions. Therefore, for both ATTR1 and
  75	 * ATTR2 we have to assume the worst case scenario of a minimum size
  76	 * available.
  77	 */
  78	maxleafents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
  79				whichfork);
  80	if (whichfork == XFS_DATA_FORK)
  81		sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
  82	else
 
  83		sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
  84
  85	maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
  86	minleafrecs = mp->m_bmap_dmnr[0];
  87	minnoderecs = mp->m_bmap_dmnr[1];
  88	maxblocks = howmany_64(maxleafents, minleafrecs);
  89	for (level = 1; maxblocks > 1; level++) {
  90		if (maxblocks <= maxrootrecs)
  91			maxblocks = 1;
  92		else
  93			maxblocks = howmany_64(maxblocks, minnoderecs);
  94	}
  95	mp->m_bm_maxlevels[whichfork] = level;
  96	ASSERT(mp->m_bm_maxlevels[whichfork] <= xfs_bmbt_maxlevels_ondisk());
  97}
  98
  99unsigned int
 100xfs_bmap_compute_attr_offset(
 101	struct xfs_mount	*mp)
 102{
 103	if (mp->m_sb.sb_inodesize == 256)
 104		return XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
 105	return XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
 106}
 107
 108STATIC int				/* error */
 109xfs_bmbt_lookup_eq(
 110	struct xfs_btree_cur	*cur,
 111	struct xfs_bmbt_irec	*irec,
 112	int			*stat)	/* success/failure */
 113{
 114	cur->bc_rec.b = *irec;
 115	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 116}
 117
 118STATIC int				/* error */
 119xfs_bmbt_lookup_first(
 120	struct xfs_btree_cur	*cur,
 121	int			*stat)	/* success/failure */
 122{
 123	cur->bc_rec.b.br_startoff = 0;
 124	cur->bc_rec.b.br_startblock = 0;
 125	cur->bc_rec.b.br_blockcount = 0;
 126	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 127}
 128
 129/*
 130 * Check if the inode needs to be converted to btree format.
 131 */
 132static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
 133{
 134	struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
 135
 136	return whichfork != XFS_COW_FORK &&
 137		ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
 138		ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
 139}
 140
 141/*
 142 * Check if the inode should be converted to extent format.
 143 */
 144static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
 145{
 146	struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
 147
 148	return whichfork != XFS_COW_FORK &&
 149		ifp->if_format == XFS_DINODE_FMT_BTREE &&
 150		ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
 151}
 152
 153/*
 154 * Update the record referred to by cur to the value given by irec
 155 * This either works (return 0) or gets an EFSCORRUPTED error.
 156 */
 157STATIC int
 158xfs_bmbt_update(
 159	struct xfs_btree_cur	*cur,
 160	struct xfs_bmbt_irec	*irec)
 161{
 162	union xfs_btree_rec	rec;
 163
 164	xfs_bmbt_disk_set_all(&rec.bmbt, irec);
 165	return xfs_btree_update(cur, &rec);
 166}
 167
 168/*
 169 * Compute the worst-case number of indirect blocks that will be used
 170 * for ip's delayed extent of length "len".
 171 */
 172STATIC xfs_filblks_t
 173xfs_bmap_worst_indlen(
 174	xfs_inode_t	*ip,		/* incore inode pointer */
 175	xfs_filblks_t	len)		/* delayed extent length */
 176{
 177	int		level;		/* btree level number */
 178	int		maxrecs;	/* maximum record count at this level */
 179	xfs_mount_t	*mp;		/* mount structure */
 180	xfs_filblks_t	rval;		/* return value */
 181
 182	mp = ip->i_mount;
 183	maxrecs = mp->m_bmap_dmxr[0];
 184	for (level = 0, rval = 0;
 185	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
 186	     level++) {
 187		len += maxrecs - 1;
 188		do_div(len, maxrecs);
 189		rval += len;
 190		if (len == 1)
 191			return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
 192				level - 1;
 193		if (level == 0)
 194			maxrecs = mp->m_bmap_dmxr[1];
 195	}
 196	return rval;
 197}
 198
 199/*
 200 * Calculate the default attribute fork offset for newly created inodes.
 201 */
 202uint
 203xfs_default_attroffset(
 204	struct xfs_inode	*ip)
 205{
 206	if (ip->i_df.if_format == XFS_DINODE_FMT_DEV)
 207		return roundup(sizeof(xfs_dev_t), 8);
 208	return M_IGEO(ip->i_mount)->attr_fork_offset;
 209}
 210
 211/*
 212 * Helper routine to reset inode i_forkoff field when switching attribute fork
 213 * from local to extent format - we reset it where possible to make space
 214 * available for inline data fork extents.
 215 */
 216STATIC void
 217xfs_bmap_forkoff_reset(
 218	xfs_inode_t	*ip,
 219	int		whichfork)
 220{
 221	if (whichfork == XFS_ATTR_FORK &&
 222	    ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
 223	    ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
 224		uint	dfl_forkoff = xfs_default_attroffset(ip) >> 3;
 225
 226		if (dfl_forkoff > ip->i_forkoff)
 227			ip->i_forkoff = dfl_forkoff;
 228	}
 229}
 230
 231static int
 232xfs_bmap_read_buf(
 233	struct xfs_mount	*mp,		/* file system mount point */
 234	struct xfs_trans	*tp,		/* transaction pointer */
 235	xfs_fsblock_t		fsbno,		/* file system block number */
 236	struct xfs_buf		**bpp)		/* buffer for fsbno */
 237{
 238	struct xfs_buf		*bp;		/* return value */
 239	int			error;
 240
 241	if (!xfs_verify_fsbno(mp, fsbno))
 242		return -EFSCORRUPTED;
 243	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
 244			XFS_FSB_TO_DADDR(mp, fsbno), mp->m_bsize, 0, &bp,
 245			&xfs_bmbt_buf_ops);
 246	if (!error) {
 247		xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
 248		*bpp = bp;
 249	}
 250	return error;
 251}
 252
 253#ifdef DEBUG
 254STATIC struct xfs_buf *
 255xfs_bmap_get_bp(
 256	struct xfs_btree_cur	*cur,
 257	xfs_fsblock_t		bno)
 258{
 259	struct xfs_log_item	*lip;
 260	int			i;
 261
 262	if (!cur)
 263		return NULL;
 264
 265	for (i = 0; i < cur->bc_maxlevels; i++) {
 266		if (!cur->bc_levels[i].bp)
 267			break;
 268		if (xfs_buf_daddr(cur->bc_levels[i].bp) == bno)
 269			return cur->bc_levels[i].bp;
 270	}
 271
 272	/* Chase down all the log items to see if the bp is there */
 273	list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
 274		struct xfs_buf_log_item	*bip = (struct xfs_buf_log_item *)lip;
 275
 276		if (bip->bli_item.li_type == XFS_LI_BUF &&
 277		    xfs_buf_daddr(bip->bli_buf) == bno)
 278			return bip->bli_buf;
 279	}
 280
 281	return NULL;
 282}
 283
 284STATIC void
 285xfs_check_block(
 286	struct xfs_btree_block	*block,
 287	xfs_mount_t		*mp,
 288	int			root,
 289	short			sz)
 290{
 291	int			i, j, dmxr;
 292	__be64			*pp, *thispa;	/* pointer to block address */
 293	xfs_bmbt_key_t		*prevp, *keyp;
 294
 295	ASSERT(be16_to_cpu(block->bb_level) > 0);
 296
 297	prevp = NULL;
 298	for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
 299		dmxr = mp->m_bmap_dmxr[0];
 300		keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
 301
 302		if (prevp) {
 303			ASSERT(be64_to_cpu(prevp->br_startoff) <
 304			       be64_to_cpu(keyp->br_startoff));
 305		}
 306		prevp = keyp;
 307
 308		/*
 309		 * Compare the block numbers to see if there are dups.
 310		 */
 311		if (root)
 312			pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
 313		else
 314			pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
 315
 316		for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
 317			if (root)
 318				thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
 319			else
 320				thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
 321			if (*thispa == *pp) {
 322				xfs_warn(mp, "%s: thispa(%d) == pp(%d) %lld",
 323					__func__, j, i,
 324					(unsigned long long)be64_to_cpu(*thispa));
 325				xfs_err(mp, "%s: ptrs are equal in node\n",
 326					__func__);
 327				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 328			}
 329		}
 330	}
 331}
 332
 333/*
 334 * Check that the extents for the inode ip are in the right order in all
 335 * btree leaves. THis becomes prohibitively expensive for large extent count
 336 * files, so don't bother with inodes that have more than 10,000 extents in
 337 * them. The btree record ordering checks will still be done, so for such large
 338 * bmapbt constructs that is going to catch most corruptions.
 339 */
 340STATIC void
 341xfs_bmap_check_leaf_extents(
 342	struct xfs_btree_cur	*cur,	/* btree cursor or null */
 343	xfs_inode_t		*ip,		/* incore inode pointer */
 344	int			whichfork)	/* data or attr fork */
 345{
 346	struct xfs_mount	*mp = ip->i_mount;
 347	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
 348	struct xfs_btree_block	*block;	/* current btree block */
 349	xfs_fsblock_t		bno;	/* block # of "block" */
 350	struct xfs_buf		*bp;	/* buffer for "block" */
 351	int			error;	/* error return value */
 352	xfs_extnum_t		i=0, j;	/* index into the extents list */
 353	int			level;	/* btree level, for checking */
 354	__be64			*pp;	/* pointer to block address */
 355	xfs_bmbt_rec_t		*ep;	/* pointer to current extent */
 356	xfs_bmbt_rec_t		last = {0, 0}; /* last extent in prev block */
 357	xfs_bmbt_rec_t		*nextp;	/* pointer to next extent */
 358	int			bp_release = 0;
 359
 360	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
 361		return;
 362
 363	/* skip large extent count inodes */
 364	if (ip->i_df.if_nextents > 10000)
 365		return;
 366
 367	bno = NULLFSBLOCK;
 368	block = ifp->if_broot;
 369	/*
 370	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
 371	 */
 372	level = be16_to_cpu(block->bb_level);
 373	ASSERT(level > 0);
 374	xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
 375	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
 376	bno = be64_to_cpu(*pp);
 377
 378	ASSERT(bno != NULLFSBLOCK);
 379	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
 380	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
 381
 382	/*
 383	 * Go down the tree until leaf level is reached, following the first
 384	 * pointer (leftmost) at each level.
 385	 */
 386	while (level-- > 0) {
 387		/* See if buf is in cur first */
 388		bp_release = 0;
 389		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 390		if (!bp) {
 391			bp_release = 1;
 392			error = xfs_bmap_read_buf(mp, NULL, bno, &bp);
 393			if (xfs_metadata_is_sick(error))
 394				xfs_btree_mark_sick(cur);
 395			if (error)
 396				goto error_norelse;
 397		}
 398		block = XFS_BUF_TO_BLOCK(bp);
 399		if (level == 0)
 400			break;
 401
 402		/*
 403		 * Check this block for basic sanity (increasing keys and
 404		 * no duplicate blocks).
 405		 */
 406
 407		xfs_check_block(block, mp, 0, 0);
 408		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
 409		bno = be64_to_cpu(*pp);
 410		if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
 411			xfs_btree_mark_sick(cur);
 412			error = -EFSCORRUPTED;
 413			goto error0;
 414		}
 415		if (bp_release) {
 416			bp_release = 0;
 417			xfs_trans_brelse(NULL, bp);
 418		}
 419	}
 420
 421	/*
 422	 * Here with bp and block set to the leftmost leaf node in the tree.
 423	 */
 424	i = 0;
 425
 426	/*
 427	 * Loop over all leaf nodes checking that all extents are in the right order.
 428	 */
 429	for (;;) {
 430		xfs_fsblock_t	nextbno;
 431		xfs_extnum_t	num_recs;
 432
 433
 434		num_recs = xfs_btree_get_numrecs(block);
 435
 436		/*
 437		 * Read-ahead the next leaf block, if any.
 438		 */
 439
 440		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
 441
 442		/*
 443		 * Check all the extents to make sure they are OK.
 444		 * If we had a previous block, the last entry should
 445		 * conform with the first entry in this one.
 446		 */
 447
 448		ep = XFS_BMBT_REC_ADDR(mp, block, 1);
 449		if (i) {
 450			ASSERT(xfs_bmbt_disk_get_startoff(&last) +
 451			       xfs_bmbt_disk_get_blockcount(&last) <=
 452			       xfs_bmbt_disk_get_startoff(ep));
 453		}
 454		for (j = 1; j < num_recs; j++) {
 455			nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
 456			ASSERT(xfs_bmbt_disk_get_startoff(ep) +
 457			       xfs_bmbt_disk_get_blockcount(ep) <=
 458			       xfs_bmbt_disk_get_startoff(nextp));
 459			ep = nextp;
 460		}
 461
 462		last = *ep;
 463		i += num_recs;
 464		if (bp_release) {
 465			bp_release = 0;
 466			xfs_trans_brelse(NULL, bp);
 467		}
 468		bno = nextbno;
 469		/*
 470		 * If we've reached the end, stop.
 471		 */
 472		if (bno == NULLFSBLOCK)
 473			break;
 474
 475		bp_release = 0;
 476		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 477		if (!bp) {
 478			bp_release = 1;
 479			error = xfs_bmap_read_buf(mp, NULL, bno, &bp);
 480			if (xfs_metadata_is_sick(error))
 481				xfs_btree_mark_sick(cur);
 482			if (error)
 483				goto error_norelse;
 484		}
 485		block = XFS_BUF_TO_BLOCK(bp);
 486	}
 487
 488	return;
 489
 490error0:
 491	xfs_warn(mp, "%s: at error0", __func__);
 492	if (bp_release)
 493		xfs_trans_brelse(NULL, bp);
 494error_norelse:
 495	xfs_warn(mp, "%s: BAD after btree leaves for %llu extents",
 496		__func__, i);
 497	xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
 498	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 499	return;
 500}
 501
 502/*
 503 * Validate that the bmbt_irecs being returned from bmapi are valid
 504 * given the caller's original parameters.  Specifically check the
 505 * ranges of the returned irecs to ensure that they only extend beyond
 506 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
 507 */
 508STATIC void
 509xfs_bmap_validate_ret(
 510	xfs_fileoff_t		bno,
 511	xfs_filblks_t		len,
 512	uint32_t		flags,
 513	xfs_bmbt_irec_t		*mval,
 514	int			nmap,
 515	int			ret_nmap)
 516{
 517	int			i;		/* index to map values */
 518
 519	ASSERT(ret_nmap <= nmap);
 520
 521	for (i = 0; i < ret_nmap; i++) {
 522		ASSERT(mval[i].br_blockcount > 0);
 523		if (!(flags & XFS_BMAPI_ENTIRE)) {
 524			ASSERT(mval[i].br_startoff >= bno);
 525			ASSERT(mval[i].br_blockcount <= len);
 526			ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
 527			       bno + len);
 528		} else {
 529			ASSERT(mval[i].br_startoff < bno + len);
 530			ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
 531			       bno);
 532		}
 533		ASSERT(i == 0 ||
 534		       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
 535		       mval[i].br_startoff);
 536		ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
 537		       mval[i].br_startblock != HOLESTARTBLOCK);
 538		ASSERT(mval[i].br_state == XFS_EXT_NORM ||
 539		       mval[i].br_state == XFS_EXT_UNWRITTEN);
 540	}
 541}
 542
 543#else
 544#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)		do { } while (0)
 545#define	xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)	do { } while (0)
 546#endif /* DEBUG */
 547
 548/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 549 * Inode fork format manipulation functions
 550 */
 551
 552/*
 553 * Convert the inode format to extent format if it currently is in btree format,
 554 * but the extent list is small enough that it fits into the extent format.
 555 *
 556 * Since the extents are already in-core, all we have to do is give up the space
 557 * for the btree root and pitch the leaf block.
 558 */
 559STATIC int				/* error */
 560xfs_bmap_btree_to_extents(
 561	struct xfs_trans	*tp,	/* transaction pointer */
 562	struct xfs_inode	*ip,	/* incore inode pointer */
 563	struct xfs_btree_cur	*cur,	/* btree cursor */
 564	int			*logflagsp, /* inode logging flags */
 565	int			whichfork)  /* data or attr fork */
 566{
 567	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
 568	struct xfs_mount	*mp = ip->i_mount;
 569	struct xfs_btree_block	*rblock = ifp->if_broot;
 570	struct xfs_btree_block	*cblock;/* child btree block */
 571	xfs_fsblock_t		cbno;	/* child block number */
 572	struct xfs_buf		*cbp;	/* child block's buffer */
 573	int			error;	/* error return value */
 574	__be64			*pp;	/* ptr to block address */
 575	struct xfs_owner_info	oinfo;
 576
 577	/* check if we actually need the extent format first: */
 578	if (!xfs_bmap_wants_extents(ip, whichfork))
 579		return 0;
 580
 581	ASSERT(cur);
 582	ASSERT(whichfork != XFS_COW_FORK);
 583	ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
 584	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
 585	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
 586	ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
 587
 588	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
 589	cbno = be64_to_cpu(*pp);
 590#ifdef DEBUG
 591	if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_verify_fsbno(mp, cbno))) {
 592		xfs_btree_mark_sick(cur);
 593		return -EFSCORRUPTED;
 594	}
 595#endif
 596	error = xfs_bmap_read_buf(mp, tp, cbno, &cbp);
 597	if (xfs_metadata_is_sick(error))
 598		xfs_btree_mark_sick(cur);
 599	if (error)
 600		return error;
 601	cblock = XFS_BUF_TO_BLOCK(cbp);
 602	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 603		return error;
 604
 605	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
 606	error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
 607			XFS_AG_RESV_NONE, false);
 608	if (error)
 609		return error;
 610
 611	ip->i_nblocks--;
 612	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 613	xfs_trans_binval(tp, cbp);
 614	if (cur->bc_levels[0].bp == cbp)
 615		cur->bc_levels[0].bp = NULL;
 616	xfs_iroot_realloc(ip, -1, whichfork);
 617	ASSERT(ifp->if_broot == NULL);
 618	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 619	*logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 620	return 0;
 621}
 622
 623/*
 624 * Convert an extents-format file into a btree-format file.
 625 * The new file will have a root block (in the inode) and a single child block.
 626 */
 627STATIC int					/* error */
 628xfs_bmap_extents_to_btree(
 629	struct xfs_trans	*tp,		/* transaction pointer */
 630	struct xfs_inode	*ip,		/* incore inode pointer */
 631	struct xfs_btree_cur	**curp,		/* cursor returned to caller */
 632	int			wasdel,		/* converting a delayed alloc */
 633	int			*logflagsp,	/* inode logging flags */
 634	int			whichfork)	/* data or attr fork */
 635{
 636	struct xfs_btree_block	*ablock;	/* allocated (child) bt block */
 637	struct xfs_buf		*abp;		/* buffer for ablock */
 638	struct xfs_alloc_arg	args;		/* allocation arguments */
 639	struct xfs_bmbt_rec	*arp;		/* child record pointer */
 640	struct xfs_btree_block	*block;		/* btree root block */
 641	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
 642	int			error;		/* error return value */
 643	struct xfs_ifork	*ifp;		/* inode fork pointer */
 644	struct xfs_bmbt_key	*kp;		/* root block key pointer */
 645	struct xfs_mount	*mp;		/* mount structure */
 646	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
 647	struct xfs_iext_cursor	icur;
 648	struct xfs_bmbt_irec	rec;
 649	xfs_extnum_t		cnt = 0;
 650
 651	mp = ip->i_mount;
 652	ASSERT(whichfork != XFS_COW_FORK);
 653	ifp = xfs_ifork_ptr(ip, whichfork);
 654	ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
 655
 656	/*
 657	 * Make space in the inode incore. This needs to be undone if we fail
 658	 * to expand the root.
 659	 */
 660	xfs_iroot_realloc(ip, 1, whichfork);
 661
 662	/*
 663	 * Fill in the root.
 664	 */
 665	block = ifp->if_broot;
 666	xfs_bmbt_init_block(ip, block, NULL, 1, 1);
 
 
 667	/*
 668	 * Need a cursor.  Can't allocate until bb_level is filled in.
 669	 */
 670	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 671	if (wasdel)
 672		cur->bc_flags |= XFS_BTREE_BMBT_WASDEL;
 673	/*
 674	 * Convert to a btree with two levels, one record in root.
 675	 */
 676	ifp->if_format = XFS_DINODE_FMT_BTREE;
 677	memset(&args, 0, sizeof(args));
 678	args.tp = tp;
 679	args.mp = mp;
 680	xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
 681
 
 
 
 
 
 
 
 
 
 682	args.minlen = args.maxlen = args.prod = 1;
 683	args.wasdel = wasdel;
 684	*logflagsp = 0;
 685	error = xfs_alloc_vextent_start_ag(&args,
 686				XFS_INO_TO_FSB(mp, ip->i_ino));
 687	if (error)
 688		goto out_root_realloc;
 689
 690	/*
 691	 * Allocation can't fail, the space was reserved.
 692	 */
 693	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
 694		error = -ENOSPC;
 695		goto out_root_realloc;
 696	}
 697
 698	cur->bc_bmap.allocated++;
 
 
 
 
 
 
 699	ip->i_nblocks++;
 700	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 701	error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
 702			XFS_FSB_TO_DADDR(mp, args.fsbno),
 703			mp->m_bsize, 0, &abp);
 704	if (error)
 705		goto out_unreserve_dquot;
 706
 707	/*
 708	 * Fill in the child block.
 709	 */
 
 710	ablock = XFS_BUF_TO_BLOCK(abp);
 711	xfs_bmbt_init_block(ip, ablock, abp, 0, 0);
 
 
 712
 713	for_each_xfs_iext(ifp, &icur, &rec) {
 714		if (isnullstartblock(rec.br_startblock))
 715			continue;
 716		arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
 717		xfs_bmbt_disk_set_all(arp, &rec);
 718		cnt++;
 719	}
 720	ASSERT(cnt == ifp->if_nextents);
 721	xfs_btree_set_numrecs(ablock, cnt);
 722
 723	/*
 724	 * Fill in the root key and pointer.
 725	 */
 726	kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
 727	arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
 728	kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
 729	pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
 730						be16_to_cpu(block->bb_level)));
 731	*pp = cpu_to_be64(args.fsbno);
 732
 733	/*
 734	 * Do all this logging at the end so that
 735	 * the root is at the right level.
 736	 */
 737	xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
 738	xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
 739	ASSERT(*curp == NULL);
 740	*curp = cur;
 741	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
 742	return 0;
 743
 744out_unreserve_dquot:
 745	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 746out_root_realloc:
 747	xfs_iroot_realloc(ip, -1, whichfork);
 748	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 749	ASSERT(ifp->if_broot == NULL);
 750	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 751
 752	return error;
 753}
 754
 755/*
 756 * Convert a local file to an extents file.
 757 * This code is out of bounds for data forks of regular files,
 758 * since the file data needs to get logged so things will stay consistent.
 759 * (The bmap-level manipulations are ok, though).
 760 */
 761void
 762xfs_bmap_local_to_extents_empty(
 763	struct xfs_trans	*tp,
 764	struct xfs_inode	*ip,
 765	int			whichfork)
 766{
 767	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
 768
 769	ASSERT(whichfork != XFS_COW_FORK);
 770	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 771	ASSERT(ifp->if_bytes == 0);
 772	ASSERT(ifp->if_nextents == 0);
 773
 774	xfs_bmap_forkoff_reset(ip, whichfork);
 775	ifp->if_data = NULL;
 776	ifp->if_height = 0;
 777	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 778	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 779}
 780
 781
 782STATIC int				/* error */
 783xfs_bmap_local_to_extents(
 784	xfs_trans_t	*tp,		/* transaction pointer */
 785	xfs_inode_t	*ip,		/* incore inode pointer */
 786	xfs_extlen_t	total,		/* total blocks needed by transaction */
 787	int		*logflagsp,	/* inode logging flags */
 788	int		whichfork,
 789	void		(*init_fn)(struct xfs_trans *tp,
 790				   struct xfs_buf *bp,
 791				   struct xfs_inode *ip,
 792				   struct xfs_ifork *ifp))
 793{
 794	int		error = 0;
 795	int		flags;		/* logging flags returned */
 796	struct xfs_ifork *ifp;		/* inode fork pointer */
 797	xfs_alloc_arg_t	args;		/* allocation arguments */
 798	struct xfs_buf	*bp;		/* buffer for extent block */
 799	struct xfs_bmbt_irec rec;
 800	struct xfs_iext_cursor icur;
 801
 802	/*
 803	 * We don't want to deal with the case of keeping inode data inline yet.
 804	 * So sending the data fork of a regular inode is invalid.
 805	 */
 806	ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
 807	ifp = xfs_ifork_ptr(ip, whichfork);
 808	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 809
 810	if (!ifp->if_bytes) {
 811		xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 812		flags = XFS_ILOG_CORE;
 813		goto done;
 814	}
 815
 816	flags = 0;
 817	error = 0;
 818	memset(&args, 0, sizeof(args));
 819	args.tp = tp;
 820	args.mp = ip->i_mount;
 821	args.total = total;
 822	args.minlen = args.maxlen = args.prod = 1;
 823	xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
 824
 825	/*
 826	 * Allocate a block.  We know we need only one, since the
 827	 * file currently fits in an inode.
 828	 */
 
 
 
 
 
 
 
 829	args.total = total;
 830	args.minlen = args.maxlen = args.prod = 1;
 831	error = xfs_alloc_vextent_start_ag(&args,
 832			XFS_INO_TO_FSB(args.mp, ip->i_ino));
 833	if (error)
 834		goto done;
 835
 836	/* Can't fail, the space was reserved. */
 837	ASSERT(args.fsbno != NULLFSBLOCK);
 838	ASSERT(args.len == 1);
 
 839	error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
 840			XFS_FSB_TO_DADDR(args.mp, args.fsbno),
 841			args.mp->m_bsize, 0, &bp);
 842	if (error)
 843		goto done;
 844
 845	/*
 846	 * Initialize the block, copy the data and log the remote buffer.
 847	 *
 848	 * The callout is responsible for logging because the remote format
 849	 * might differ from the local format and thus we don't know how much to
 850	 * log here. Note that init_fn must also set the buffer log item type
 851	 * correctly.
 852	 */
 853	init_fn(tp, bp, ip, ifp);
 854
 855	/* account for the change in fork size */
 856	xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 857	xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 858	flags |= XFS_ILOG_CORE;
 859
 860	ifp->if_data = NULL;
 861	ifp->if_height = 0;
 862
 863	rec.br_startoff = 0;
 864	rec.br_startblock = args.fsbno;
 865	rec.br_blockcount = 1;
 866	rec.br_state = XFS_EXT_NORM;
 867	xfs_iext_first(ifp, &icur);
 868	xfs_iext_insert(ip, &icur, &rec, 0);
 869
 870	ifp->if_nextents = 1;
 871	ip->i_nblocks = 1;
 872	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 
 873	flags |= xfs_ilog_fext(whichfork);
 874
 875done:
 876	*logflagsp = flags;
 877	return error;
 878}
 879
 880/*
 881 * Called from xfs_bmap_add_attrfork to handle btree format files.
 882 */
 883STATIC int					/* error */
 884xfs_bmap_add_attrfork_btree(
 885	xfs_trans_t		*tp,		/* transaction pointer */
 886	xfs_inode_t		*ip,		/* incore inode pointer */
 887	int			*flags)		/* inode logging flags */
 888{
 889	struct xfs_btree_block	*block = ip->i_df.if_broot;
 890	struct xfs_btree_cur	*cur;		/* btree cursor */
 891	int			error;		/* error return value */
 892	xfs_mount_t		*mp;		/* file system mount struct */
 893	int			stat;		/* newroot status */
 894
 895	mp = ip->i_mount;
 896
 897	if (XFS_BMAP_BMDR_SPACE(block) <= xfs_inode_data_fork_size(ip))
 898		*flags |= XFS_ILOG_DBROOT;
 899	else {
 900		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
 901		error = xfs_bmbt_lookup_first(cur, &stat);
 902		if (error)
 903			goto error0;
 904		/* must be at least one entry */
 905		if (XFS_IS_CORRUPT(mp, stat != 1)) {
 906			xfs_btree_mark_sick(cur);
 907			error = -EFSCORRUPTED;
 908			goto error0;
 909		}
 910		if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
 911			goto error0;
 912		if (stat == 0) {
 913			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 914			return -ENOSPC;
 915		}
 916		cur->bc_bmap.allocated = 0;
 917		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 918	}
 919	return 0;
 920error0:
 921	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 922	return error;
 923}
 924
 925/*
 926 * Called from xfs_bmap_add_attrfork to handle extents format files.
 927 */
 928STATIC int					/* error */
 929xfs_bmap_add_attrfork_extents(
 930	struct xfs_trans	*tp,		/* transaction pointer */
 931	struct xfs_inode	*ip,		/* incore inode pointer */
 932	int			*flags)		/* inode logging flags */
 933{
 934	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
 935	int			error;		/* error return value */
 936
 937	if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
 938	    xfs_inode_data_fork_size(ip))
 939		return 0;
 940	cur = NULL;
 941	error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
 942					  XFS_DATA_FORK);
 943	if (cur) {
 944		cur->bc_bmap.allocated = 0;
 945		xfs_btree_del_cursor(cur, error);
 946	}
 947	return error;
 948}
 949
 950/*
 951 * Called from xfs_bmap_add_attrfork to handle local format files. Each
 952 * different data fork content type needs a different callout to do the
 953 * conversion. Some are basic and only require special block initialisation
 954 * callouts for the data formating, others (directories) are so specialised they
 955 * handle everything themselves.
 956 *
 957 * XXX (dgc): investigate whether directory conversion can use the generic
 958 * formatting callout. It should be possible - it's just a very complex
 959 * formatter.
 960 */
 961STATIC int					/* error */
 962xfs_bmap_add_attrfork_local(
 963	struct xfs_trans	*tp,		/* transaction pointer */
 964	struct xfs_inode	*ip,		/* incore inode pointer */
 965	int			*flags)		/* inode logging flags */
 966{
 967	struct xfs_da_args	dargs;		/* args for dir/attr code */
 968
 969	if (ip->i_df.if_bytes <= xfs_inode_data_fork_size(ip))
 970		return 0;
 971
 972	if (S_ISDIR(VFS_I(ip)->i_mode)) {
 973		memset(&dargs, 0, sizeof(dargs));
 974		dargs.geo = ip->i_mount->m_dir_geo;
 975		dargs.dp = ip;
 976		dargs.total = dargs.geo->fsbcount;
 977		dargs.whichfork = XFS_DATA_FORK;
 978		dargs.trans = tp;
 979		return xfs_dir2_sf_to_block(&dargs);
 980	}
 981
 982	if (S_ISLNK(VFS_I(ip)->i_mode))
 983		return xfs_bmap_local_to_extents(tp, ip, 1, flags,
 984						 XFS_DATA_FORK,
 985						 xfs_symlink_local_to_remote);
 986
 987	/* should only be called for types that support local format data */
 988	ASSERT(0);
 989	xfs_bmap_mark_sick(ip, XFS_ATTR_FORK);
 990	return -EFSCORRUPTED;
 991}
 992
 993/*
 994 * Set an inode attr fork offset based on the format of the data fork.
 995 */
 996static int
 997xfs_bmap_set_attrforkoff(
 998	struct xfs_inode	*ip,
 999	int			size,
1000	int			*version)
1001{
1002	int			default_size = xfs_default_attroffset(ip) >> 3;
1003
1004	switch (ip->i_df.if_format) {
1005	case XFS_DINODE_FMT_DEV:
1006		ip->i_forkoff = default_size;
1007		break;
1008	case XFS_DINODE_FMT_LOCAL:
1009	case XFS_DINODE_FMT_EXTENTS:
1010	case XFS_DINODE_FMT_BTREE:
1011		ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1012		if (!ip->i_forkoff)
1013			ip->i_forkoff = default_size;
1014		else if (xfs_has_attr2(ip->i_mount) && version)
1015			*version = 2;
1016		break;
1017	default:
1018		ASSERT(0);
1019		return -EINVAL;
1020	}
1021
1022	return 0;
1023}
1024
1025/*
1026 * Convert inode from non-attributed to attributed.
1027 * Must not be in a transaction, ip must not be locked.
1028 */
1029int						/* error code */
1030xfs_bmap_add_attrfork(
1031	xfs_inode_t		*ip,		/* incore inode pointer */
1032	int			size,		/* space new attribute needs */
1033	int			rsvd)		/* xact may use reserved blks */
1034{
1035	xfs_mount_t		*mp;		/* mount structure */
1036	xfs_trans_t		*tp;		/* transaction pointer */
1037	int			blks;		/* space reservation */
1038	int			version = 1;	/* superblock attr version */
1039	int			logflags;	/* logging flags */
1040	int			error;		/* error return value */
1041
1042	ASSERT(xfs_inode_has_attr_fork(ip) == 0);
1043
1044	mp = ip->i_mount;
1045	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1046
1047	blks = XFS_ADDAFORK_SPACE_RES(mp);
1048
1049	error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0,
1050			rsvd, &tp);
1051	if (error)
1052		return error;
1053	if (xfs_inode_has_attr_fork(ip))
1054		goto trans_cancel;
1055
1056	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1057	error = xfs_bmap_set_attrforkoff(ip, size, &version);
1058	if (error)
1059		goto trans_cancel;
 
1060
1061	xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
1062	logflags = 0;
1063	switch (ip->i_df.if_format) {
1064	case XFS_DINODE_FMT_LOCAL:
1065		error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
1066		break;
1067	case XFS_DINODE_FMT_EXTENTS:
1068		error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
1069		break;
1070	case XFS_DINODE_FMT_BTREE:
1071		error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
1072		break;
1073	default:
1074		error = 0;
1075		break;
1076	}
1077	if (logflags)
1078		xfs_trans_log_inode(tp, ip, logflags);
1079	if (error)
1080		goto trans_cancel;
1081	if (!xfs_has_attr(mp) ||
1082	   (!xfs_has_attr2(mp) && version == 2)) {
1083		bool log_sb = false;
1084
1085		spin_lock(&mp->m_sb_lock);
1086		if (!xfs_has_attr(mp)) {
1087			xfs_add_attr(mp);
1088			log_sb = true;
1089		}
1090		if (!xfs_has_attr2(mp) && version == 2) {
1091			xfs_add_attr2(mp);
1092			log_sb = true;
1093		}
1094		spin_unlock(&mp->m_sb_lock);
1095		if (log_sb)
1096			xfs_log_sb(tp);
1097	}
1098
1099	error = xfs_trans_commit(tp);
1100	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1101	return error;
1102
1103trans_cancel:
1104	xfs_trans_cancel(tp);
1105	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1106	return error;
1107}
1108
1109/*
1110 * Internal and external extent tree search functions.
1111 */
1112
1113struct xfs_iread_state {
1114	struct xfs_iext_cursor	icur;
1115	xfs_extnum_t		loaded;
1116};
1117
1118int
1119xfs_bmap_complain_bad_rec(
1120	struct xfs_inode		*ip,
1121	int				whichfork,
1122	xfs_failaddr_t			fa,
1123	const struct xfs_bmbt_irec	*irec)
1124{
1125	struct xfs_mount		*mp = ip->i_mount;
1126	const char			*forkname;
1127
1128	switch (whichfork) {
1129	case XFS_DATA_FORK:	forkname = "data"; break;
1130	case XFS_ATTR_FORK:	forkname = "attr"; break;
1131	case XFS_COW_FORK:	forkname = "CoW"; break;
1132	default:		forkname = "???"; break;
1133	}
1134
1135	xfs_warn(mp,
1136 "Bmap BTree record corruption in inode 0x%llx %s fork detected at %pS!",
1137				ip->i_ino, forkname, fa);
1138	xfs_warn(mp,
1139		"Offset 0x%llx, start block 0x%llx, block count 0x%llx state 0x%x",
1140		irec->br_startoff, irec->br_startblock, irec->br_blockcount,
1141		irec->br_state);
1142
1143	return -EFSCORRUPTED;
1144}
1145
1146/* Stuff every bmbt record from this block into the incore extent map. */
1147static int
1148xfs_iread_bmbt_block(
1149	struct xfs_btree_cur	*cur,
1150	int			level,
1151	void			*priv)
1152{
1153	struct xfs_iread_state	*ir = priv;
1154	struct xfs_mount	*mp = cur->bc_mp;
1155	struct xfs_inode	*ip = cur->bc_ino.ip;
1156	struct xfs_btree_block	*block;
1157	struct xfs_buf		*bp;
1158	struct xfs_bmbt_rec	*frp;
1159	xfs_extnum_t		num_recs;
1160	xfs_extnum_t		j;
1161	int			whichfork = cur->bc_ino.whichfork;
1162	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1163
1164	block = xfs_btree_get_block(cur, level, &bp);
1165
1166	/* Abort if we find more records than nextents. */
1167	num_recs = xfs_btree_get_numrecs(block);
1168	if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
1169		xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
1170				(unsigned long long)ip->i_ino);
1171		xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
1172				sizeof(*block), __this_address);
1173		xfs_bmap_mark_sick(ip, whichfork);
1174		return -EFSCORRUPTED;
1175	}
1176
1177	/* Copy records into the incore cache. */
1178	frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1179	for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
1180		struct xfs_bmbt_irec	new;
1181		xfs_failaddr_t		fa;
1182
1183		xfs_bmbt_disk_get_all(frp, &new);
1184		fa = xfs_bmap_validate_extent(ip, whichfork, &new);
1185		if (fa) {
1186			xfs_inode_verifier_error(ip, -EFSCORRUPTED,
1187					"xfs_iread_extents(2)", frp,
1188					sizeof(*frp), fa);
1189			xfs_bmap_mark_sick(ip, whichfork);
1190			return xfs_bmap_complain_bad_rec(ip, whichfork, fa,
1191					&new);
1192		}
1193		xfs_iext_insert(ip, &ir->icur, &new,
1194				xfs_bmap_fork_to_state(whichfork));
1195		trace_xfs_read_extent(ip, &ir->icur,
1196				xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
1197		xfs_iext_next(ifp, &ir->icur);
1198	}
1199
1200	return 0;
1201}
1202
1203/*
1204 * Read in extents from a btree-format inode.
1205 */
1206int
1207xfs_iread_extents(
1208	struct xfs_trans	*tp,
1209	struct xfs_inode	*ip,
1210	int			whichfork)
1211{
1212	struct xfs_iread_state	ir;
1213	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1214	struct xfs_mount	*mp = ip->i_mount;
1215	struct xfs_btree_cur	*cur;
1216	int			error;
1217
1218	if (!xfs_need_iread_extents(ifp))
1219		return 0;
1220
1221	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
1222
1223	ir.loaded = 0;
1224	xfs_iext_first(ifp, &ir.icur);
1225	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
1226	error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
1227			XFS_BTREE_VISIT_RECORDS, &ir);
1228	xfs_btree_del_cursor(cur, error);
1229	if (error)
1230		goto out;
1231
1232	if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
1233		xfs_bmap_mark_sick(ip, whichfork);
1234		error = -EFSCORRUPTED;
1235		goto out;
1236	}
1237	ASSERT(ir.loaded == xfs_iext_count(ifp));
1238	/*
1239	 * Use release semantics so that we can use acquire semantics in
1240	 * xfs_need_iread_extents and be guaranteed to see a valid mapping tree
1241	 * after that load.
1242	 */
1243	smp_store_release(&ifp->if_needextents, 0);
1244	return 0;
1245out:
1246	if (xfs_metadata_is_sick(error))
1247		xfs_bmap_mark_sick(ip, whichfork);
1248	xfs_iext_destroy(ifp);
1249	return error;
1250}
1251
1252/*
1253 * Returns the relative block number of the first unused block(s) in the given
1254 * fork with at least "len" logically contiguous blocks free.  This is the
1255 * lowest-address hole if the fork has holes, else the first block past the end
1256 * of fork.  Return 0 if the fork is currently local (in-inode).
1257 */
1258int						/* error */
1259xfs_bmap_first_unused(
1260	struct xfs_trans	*tp,		/* transaction pointer */
1261	struct xfs_inode	*ip,		/* incore inode */
1262	xfs_extlen_t		len,		/* size of hole to find */
1263	xfs_fileoff_t		*first_unused,	/* unused block */
1264	int			whichfork)	/* data or attr fork */
1265{
1266	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1267	struct xfs_bmbt_irec	got;
1268	struct xfs_iext_cursor	icur;
1269	xfs_fileoff_t		lastaddr = 0;
1270	xfs_fileoff_t		lowest, max;
1271	int			error;
1272
1273	if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
1274		*first_unused = 0;
1275		return 0;
1276	}
1277
1278	ASSERT(xfs_ifork_has_extents(ifp));
1279
1280	error = xfs_iread_extents(tp, ip, whichfork);
1281	if (error)
1282		return error;
1283
1284	lowest = max = *first_unused;
1285	for_each_xfs_iext(ifp, &icur, &got) {
1286		/*
1287		 * See if the hole before this extent will work.
1288		 */
1289		if (got.br_startoff >= lowest + len &&
1290		    got.br_startoff - max >= len)
1291			break;
1292		lastaddr = got.br_startoff + got.br_blockcount;
1293		max = XFS_FILEOFF_MAX(lastaddr, lowest);
1294	}
1295
1296	*first_unused = max;
1297	return 0;
1298}
1299
1300/*
1301 * Returns the file-relative block number of the last block - 1 before
1302 * last_block (input value) in the file.
1303 * This is not based on i_size, it is based on the extent records.
1304 * Returns 0 for local files, as they do not have extent records.
1305 */
1306int						/* error */
1307xfs_bmap_last_before(
1308	struct xfs_trans	*tp,		/* transaction pointer */
1309	struct xfs_inode	*ip,		/* incore inode */
1310	xfs_fileoff_t		*last_block,	/* last block */
1311	int			whichfork)	/* data or attr fork */
1312{
1313	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1314	struct xfs_bmbt_irec	got;
1315	struct xfs_iext_cursor	icur;
1316	int			error;
1317
1318	switch (ifp->if_format) {
1319	case XFS_DINODE_FMT_LOCAL:
1320		*last_block = 0;
1321		return 0;
1322	case XFS_DINODE_FMT_BTREE:
1323	case XFS_DINODE_FMT_EXTENTS:
1324		break;
1325	default:
1326		ASSERT(0);
1327		xfs_bmap_mark_sick(ip, whichfork);
1328		return -EFSCORRUPTED;
1329	}
1330
1331	error = xfs_iread_extents(tp, ip, whichfork);
1332	if (error)
1333		return error;
1334
1335	if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1336		*last_block = 0;
1337	return 0;
1338}
1339
1340int
1341xfs_bmap_last_extent(
1342	struct xfs_trans	*tp,
1343	struct xfs_inode	*ip,
1344	int			whichfork,
1345	struct xfs_bmbt_irec	*rec,
1346	int			*is_empty)
1347{
1348	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1349	struct xfs_iext_cursor	icur;
1350	int			error;
1351
1352	error = xfs_iread_extents(tp, ip, whichfork);
1353	if (error)
1354		return error;
1355
1356	xfs_iext_last(ifp, &icur);
1357	if (!xfs_iext_get_extent(ifp, &icur, rec))
1358		*is_empty = 1;
1359	else
1360		*is_empty = 0;
1361	return 0;
1362}
1363
1364/*
1365 * Check the last inode extent to determine whether this allocation will result
1366 * in blocks being allocated at the end of the file. When we allocate new data
1367 * blocks at the end of the file which do not start at the previous data block,
1368 * we will try to align the new blocks at stripe unit boundaries.
1369 *
1370 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1371 * at, or past the EOF.
1372 */
1373STATIC int
1374xfs_bmap_isaeof(
1375	struct xfs_bmalloca	*bma,
1376	int			whichfork)
1377{
1378	struct xfs_bmbt_irec	rec;
1379	int			is_empty;
1380	int			error;
1381
1382	bma->aeof = false;
1383	error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1384				     &is_empty);
1385	if (error)
1386		return error;
1387
1388	if (is_empty) {
1389		bma->aeof = true;
1390		return 0;
1391	}
1392
1393	/*
1394	 * Check if we are allocation or past the last extent, or at least into
1395	 * the last delayed allocated extent.
1396	 */
1397	bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1398		(bma->offset >= rec.br_startoff &&
1399		 isnullstartblock(rec.br_startblock));
1400	return 0;
1401}
1402
1403/*
1404 * Returns the file-relative block number of the first block past eof in
1405 * the file.  This is not based on i_size, it is based on the extent records.
1406 * Returns 0 for local files, as they do not have extent records.
1407 */
1408int
1409xfs_bmap_last_offset(
1410	struct xfs_inode	*ip,
1411	xfs_fileoff_t		*last_block,
1412	int			whichfork)
1413{
1414	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1415	struct xfs_bmbt_irec	rec;
1416	int			is_empty;
1417	int			error;
1418
1419	*last_block = 0;
1420
1421	if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
1422		return 0;
1423
1424	if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp))) {
1425		xfs_bmap_mark_sick(ip, whichfork);
1426		return -EFSCORRUPTED;
1427	}
1428
1429	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1430	if (error || is_empty)
1431		return error;
1432
1433	*last_block = rec.br_startoff + rec.br_blockcount;
1434	return 0;
1435}
1436
1437/*
1438 * Extent tree manipulation functions used during allocation.
1439 */
1440
1441/*
1442 * Convert a delayed allocation to a real allocation.
1443 */
1444STATIC int				/* error */
1445xfs_bmap_add_extent_delay_real(
1446	struct xfs_bmalloca	*bma,
1447	int			whichfork)
1448{
1449	struct xfs_mount	*mp = bma->ip->i_mount;
1450	struct xfs_ifork	*ifp = xfs_ifork_ptr(bma->ip, whichfork);
1451	struct xfs_bmbt_irec	*new = &bma->got;
1452	int			error;	/* error return value */
1453	int			i;	/* temp state */
1454	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
1455	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
1456					/* left is 0, right is 1, prev is 2 */
1457	int			rval=0;	/* return value (logging flags) */
1458	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
1459	xfs_filblks_t		da_new; /* new count del alloc blocks used */
1460	xfs_filblks_t		da_old; /* old count del alloc blocks used */
1461	xfs_filblks_t		temp=0;	/* value for da_new calculations */
1462	int			tmp_rval;	/* partial logging flags */
1463	struct xfs_bmbt_irec	old;
1464
1465	ASSERT(whichfork != XFS_ATTR_FORK);
1466	ASSERT(!isnullstartblock(new->br_startblock));
1467	ASSERT(!bma->cur || (bma->cur->bc_flags & XFS_BTREE_BMBT_WASDEL));
 
1468
1469	XFS_STATS_INC(mp, xs_add_exlist);
1470
1471#define	LEFT		r[0]
1472#define	RIGHT		r[1]
1473#define	PREV		r[2]
1474
1475	/*
1476	 * Set up a bunch of variables to make the tests simpler.
1477	 */
1478	xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1479	new_endoff = new->br_startoff + new->br_blockcount;
1480	ASSERT(isnullstartblock(PREV.br_startblock));
1481	ASSERT(PREV.br_startoff <= new->br_startoff);
1482	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1483
1484	da_old = startblockval(PREV.br_startblock);
1485	da_new = 0;
1486
1487	/*
1488	 * Set flags determining what part of the previous delayed allocation
1489	 * extent is being replaced by a real allocation.
1490	 */
1491	if (PREV.br_startoff == new->br_startoff)
1492		state |= BMAP_LEFT_FILLING;
1493	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1494		state |= BMAP_RIGHT_FILLING;
1495
1496	/*
1497	 * Check and set flags if this segment has a left neighbor.
1498	 * Don't set contiguous if the combined extent would be too large.
1499	 */
1500	if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1501		state |= BMAP_LEFT_VALID;
1502		if (isnullstartblock(LEFT.br_startblock))
1503			state |= BMAP_LEFT_DELAY;
1504	}
1505
1506	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1507	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1508	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1509	    LEFT.br_state == new->br_state &&
1510	    LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
1511		state |= BMAP_LEFT_CONTIG;
1512
1513	/*
1514	 * Check and set flags if this segment has a right neighbor.
1515	 * Don't set contiguous if the combined extent would be too large.
1516	 * Also check for all-three-contiguous being too large.
1517	 */
1518	if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1519		state |= BMAP_RIGHT_VALID;
1520		if (isnullstartblock(RIGHT.br_startblock))
1521			state |= BMAP_RIGHT_DELAY;
1522	}
1523
1524	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1525	    new_endoff == RIGHT.br_startoff &&
1526	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1527	    new->br_state == RIGHT.br_state &&
1528	    new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
1529	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1530		       BMAP_RIGHT_FILLING)) !=
1531		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1532		       BMAP_RIGHT_FILLING) ||
1533	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1534			<= XFS_MAX_BMBT_EXTLEN))
1535		state |= BMAP_RIGHT_CONTIG;
1536
1537	error = 0;
1538	/*
1539	 * Switch out based on the FILLING and CONTIG state bits.
1540	 */
1541	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1542			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1543	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1544	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1545		/*
1546		 * Filling in all of a previously delayed allocation extent.
1547		 * The left and right neighbors are both contiguous with new.
1548		 */
1549		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1550
1551		xfs_iext_remove(bma->ip, &bma->icur, state);
1552		xfs_iext_remove(bma->ip, &bma->icur, state);
1553		xfs_iext_prev(ifp, &bma->icur);
1554		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1555		ifp->if_nextents--;
1556
1557		if (bma->cur == NULL)
1558			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1559		else {
1560			rval = XFS_ILOG_CORE;
1561			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1562			if (error)
1563				goto done;
1564			if (XFS_IS_CORRUPT(mp, i != 1)) {
1565				xfs_btree_mark_sick(bma->cur);
1566				error = -EFSCORRUPTED;
1567				goto done;
1568			}
1569			error = xfs_btree_delete(bma->cur, &i);
1570			if (error)
1571				goto done;
1572			if (XFS_IS_CORRUPT(mp, i != 1)) {
1573				xfs_btree_mark_sick(bma->cur);
1574				error = -EFSCORRUPTED;
1575				goto done;
1576			}
1577			error = xfs_btree_decrement(bma->cur, 0, &i);
1578			if (error)
1579				goto done;
1580			if (XFS_IS_CORRUPT(mp, i != 1)) {
1581				xfs_btree_mark_sick(bma->cur);
1582				error = -EFSCORRUPTED;
1583				goto done;
1584			}
1585			error = xfs_bmbt_update(bma->cur, &LEFT);
1586			if (error)
1587				goto done;
1588		}
1589		break;
1590
1591	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1592		/*
1593		 * Filling in all of a previously delayed allocation extent.
1594		 * The left neighbor is contiguous, the right is not.
1595		 */
1596		old = LEFT;
1597		LEFT.br_blockcount += PREV.br_blockcount;
1598
1599		xfs_iext_remove(bma->ip, &bma->icur, state);
1600		xfs_iext_prev(ifp, &bma->icur);
1601		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1602
1603		if (bma->cur == NULL)
1604			rval = XFS_ILOG_DEXT;
1605		else {
1606			rval = 0;
1607			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1608			if (error)
1609				goto done;
1610			if (XFS_IS_CORRUPT(mp, i != 1)) {
1611				xfs_btree_mark_sick(bma->cur);
1612				error = -EFSCORRUPTED;
1613				goto done;
1614			}
1615			error = xfs_bmbt_update(bma->cur, &LEFT);
1616			if (error)
1617				goto done;
1618		}
1619		break;
1620
1621	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1622		/*
1623		 * Filling in all of a previously delayed allocation extent.
1624		 * The right neighbor is contiguous, the left is not. Take care
1625		 * with delay -> unwritten extent allocation here because the
1626		 * delalloc record we are overwriting is always written.
1627		 */
1628		PREV.br_startblock = new->br_startblock;
1629		PREV.br_blockcount += RIGHT.br_blockcount;
1630		PREV.br_state = new->br_state;
1631
1632		xfs_iext_next(ifp, &bma->icur);
1633		xfs_iext_remove(bma->ip, &bma->icur, state);
1634		xfs_iext_prev(ifp, &bma->icur);
1635		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1636
1637		if (bma->cur == NULL)
1638			rval = XFS_ILOG_DEXT;
1639		else {
1640			rval = 0;
1641			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1642			if (error)
1643				goto done;
1644			if (XFS_IS_CORRUPT(mp, i != 1)) {
1645				xfs_btree_mark_sick(bma->cur);
1646				error = -EFSCORRUPTED;
1647				goto done;
1648			}
1649			error = xfs_bmbt_update(bma->cur, &PREV);
1650			if (error)
1651				goto done;
1652		}
1653		break;
1654
1655	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1656		/*
1657		 * Filling in all of a previously delayed allocation extent.
1658		 * Neither the left nor right neighbors are contiguous with
1659		 * the new one.
1660		 */
1661		PREV.br_startblock = new->br_startblock;
1662		PREV.br_state = new->br_state;
1663		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1664		ifp->if_nextents++;
1665
1666		if (bma->cur == NULL)
1667			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1668		else {
1669			rval = XFS_ILOG_CORE;
1670			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1671			if (error)
1672				goto done;
1673			if (XFS_IS_CORRUPT(mp, i != 0)) {
1674				xfs_btree_mark_sick(bma->cur);
1675				error = -EFSCORRUPTED;
1676				goto done;
1677			}
1678			error = xfs_btree_insert(bma->cur, &i);
1679			if (error)
1680				goto done;
1681			if (XFS_IS_CORRUPT(mp, i != 1)) {
1682				xfs_btree_mark_sick(bma->cur);
1683				error = -EFSCORRUPTED;
1684				goto done;
1685			}
1686		}
1687		break;
1688
1689	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1690		/*
1691		 * Filling in the first part of a previous delayed allocation.
1692		 * The left neighbor is contiguous.
1693		 */
1694		old = LEFT;
1695		temp = PREV.br_blockcount - new->br_blockcount;
1696		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1697				startblockval(PREV.br_startblock));
1698
1699		LEFT.br_blockcount += new->br_blockcount;
1700
1701		PREV.br_blockcount = temp;
1702		PREV.br_startoff += new->br_blockcount;
1703		PREV.br_startblock = nullstartblock(da_new);
1704
1705		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1706		xfs_iext_prev(ifp, &bma->icur);
1707		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1708
1709		if (bma->cur == NULL)
1710			rval = XFS_ILOG_DEXT;
1711		else {
1712			rval = 0;
1713			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1714			if (error)
1715				goto done;
1716			if (XFS_IS_CORRUPT(mp, i != 1)) {
1717				xfs_btree_mark_sick(bma->cur);
1718				error = -EFSCORRUPTED;
1719				goto done;
1720			}
1721			error = xfs_bmbt_update(bma->cur, &LEFT);
1722			if (error)
1723				goto done;
1724		}
1725		break;
1726
1727	case BMAP_LEFT_FILLING:
1728		/*
1729		 * Filling in the first part of a previous delayed allocation.
1730		 * The left neighbor is not contiguous.
1731		 */
1732		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1733		ifp->if_nextents++;
1734
1735		if (bma->cur == NULL)
1736			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1737		else {
1738			rval = XFS_ILOG_CORE;
1739			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1740			if (error)
1741				goto done;
1742			if (XFS_IS_CORRUPT(mp, i != 0)) {
1743				xfs_btree_mark_sick(bma->cur);
1744				error = -EFSCORRUPTED;
1745				goto done;
1746			}
1747			error = xfs_btree_insert(bma->cur, &i);
1748			if (error)
1749				goto done;
1750			if (XFS_IS_CORRUPT(mp, i != 1)) {
1751				xfs_btree_mark_sick(bma->cur);
1752				error = -EFSCORRUPTED;
1753				goto done;
1754			}
1755		}
1756
1757		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1758			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1759					&bma->cur, 1, &tmp_rval, whichfork);
1760			rval |= tmp_rval;
1761			if (error)
1762				goto done;
1763		}
1764
1765		temp = PREV.br_blockcount - new->br_blockcount;
1766		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1767			startblockval(PREV.br_startblock) -
1768			(bma->cur ? bma->cur->bc_bmap.allocated : 0));
1769
1770		PREV.br_startoff = new_endoff;
1771		PREV.br_blockcount = temp;
1772		PREV.br_startblock = nullstartblock(da_new);
1773		xfs_iext_next(ifp, &bma->icur);
1774		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1775		xfs_iext_prev(ifp, &bma->icur);
1776		break;
1777
1778	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1779		/*
1780		 * Filling in the last part of a previous delayed allocation.
1781		 * The right neighbor is contiguous with the new allocation.
1782		 */
1783		old = RIGHT;
1784		RIGHT.br_startoff = new->br_startoff;
1785		RIGHT.br_startblock = new->br_startblock;
1786		RIGHT.br_blockcount += new->br_blockcount;
1787
1788		if (bma->cur == NULL)
1789			rval = XFS_ILOG_DEXT;
1790		else {
1791			rval = 0;
1792			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1793			if (error)
1794				goto done;
1795			if (XFS_IS_CORRUPT(mp, i != 1)) {
1796				xfs_btree_mark_sick(bma->cur);
1797				error = -EFSCORRUPTED;
1798				goto done;
1799			}
1800			error = xfs_bmbt_update(bma->cur, &RIGHT);
1801			if (error)
1802				goto done;
1803		}
1804
1805		temp = PREV.br_blockcount - new->br_blockcount;
1806		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1807			startblockval(PREV.br_startblock));
1808
1809		PREV.br_blockcount = temp;
1810		PREV.br_startblock = nullstartblock(da_new);
1811
1812		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1813		xfs_iext_next(ifp, &bma->icur);
1814		xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1815		break;
1816
1817	case BMAP_RIGHT_FILLING:
1818		/*
1819		 * Filling in the last part of a previous delayed allocation.
1820		 * The right neighbor is not contiguous.
1821		 */
1822		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1823		ifp->if_nextents++;
1824
1825		if (bma->cur == NULL)
1826			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1827		else {
1828			rval = XFS_ILOG_CORE;
1829			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1830			if (error)
1831				goto done;
1832			if (XFS_IS_CORRUPT(mp, i != 0)) {
1833				xfs_btree_mark_sick(bma->cur);
1834				error = -EFSCORRUPTED;
1835				goto done;
1836			}
1837			error = xfs_btree_insert(bma->cur, &i);
1838			if (error)
1839				goto done;
1840			if (XFS_IS_CORRUPT(mp, i != 1)) {
1841				xfs_btree_mark_sick(bma->cur);
1842				error = -EFSCORRUPTED;
1843				goto done;
1844			}
1845		}
1846
1847		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1848			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1849				&bma->cur, 1, &tmp_rval, whichfork);
1850			rval |= tmp_rval;
1851			if (error)
1852				goto done;
1853		}
1854
1855		temp = PREV.br_blockcount - new->br_blockcount;
1856		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1857			startblockval(PREV.br_startblock) -
1858			(bma->cur ? bma->cur->bc_bmap.allocated : 0));
1859
1860		PREV.br_startblock = nullstartblock(da_new);
1861		PREV.br_blockcount = temp;
1862		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1863		xfs_iext_next(ifp, &bma->icur);
1864		break;
1865
1866	case 0:
1867		/*
1868		 * Filling in the middle part of a previous delayed allocation.
1869		 * Contiguity is impossible here.
1870		 * This case is avoided almost all the time.
1871		 *
1872		 * We start with a delayed allocation:
1873		 *
1874		 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1875		 *  PREV @ idx
1876		 *
1877	         * and we are allocating:
1878		 *                     +rrrrrrrrrrrrrrrrr+
1879		 *			      new
1880		 *
1881		 * and we set it up for insertion as:
1882		 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1883		 *                            new
1884		 *  PREV @ idx          LEFT              RIGHT
1885		 *                      inserted at idx + 1
1886		 */
1887		old = PREV;
1888
1889		/* LEFT is the new middle */
1890		LEFT = *new;
1891
1892		/* RIGHT is the new right */
1893		RIGHT.br_state = PREV.br_state;
1894		RIGHT.br_startoff = new_endoff;
1895		RIGHT.br_blockcount =
1896			PREV.br_startoff + PREV.br_blockcount - new_endoff;
1897		RIGHT.br_startblock =
1898			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1899					RIGHT.br_blockcount));
1900
1901		/* truncate PREV */
1902		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1903		PREV.br_startblock =
1904			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1905					PREV.br_blockcount));
1906		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1907
1908		xfs_iext_next(ifp, &bma->icur);
1909		xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1910		xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
1911		ifp->if_nextents++;
1912
1913		if (bma->cur == NULL)
1914			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1915		else {
1916			rval = XFS_ILOG_CORE;
1917			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1918			if (error)
1919				goto done;
1920			if (XFS_IS_CORRUPT(mp, i != 0)) {
1921				xfs_btree_mark_sick(bma->cur);
1922				error = -EFSCORRUPTED;
1923				goto done;
1924			}
1925			error = xfs_btree_insert(bma->cur, &i);
1926			if (error)
1927				goto done;
1928			if (XFS_IS_CORRUPT(mp, i != 1)) {
1929				xfs_btree_mark_sick(bma->cur);
1930				error = -EFSCORRUPTED;
1931				goto done;
1932			}
1933		}
1934
1935		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1936			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1937					&bma->cur, 1, &tmp_rval, whichfork);
1938			rval |= tmp_rval;
1939			if (error)
1940				goto done;
1941		}
1942
1943		da_new = startblockval(PREV.br_startblock) +
1944			 startblockval(RIGHT.br_startblock);
1945		break;
1946
1947	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1948	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1949	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1950	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1951	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1952	case BMAP_LEFT_CONTIG:
1953	case BMAP_RIGHT_CONTIG:
1954		/*
1955		 * These cases are all impossible.
1956		 */
1957		ASSERT(0);
1958	}
1959
1960	/* add reverse mapping unless caller opted out */
1961	if (!(bma->flags & XFS_BMAPI_NORMAP))
1962		xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
1963
1964	/* convert to a btree if necessary */
1965	if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1966		int	tmp_logflags;	/* partial log flag return val */
1967
1968		ASSERT(bma->cur == NULL);
1969		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1970				&bma->cur, da_old > 0, &tmp_logflags,
1971				whichfork);
1972		bma->logflags |= tmp_logflags;
1973		if (error)
1974			goto done;
1975	}
1976
1977	if (da_new != da_old)
1978		xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
1979
1980	if (bma->cur) {
1981		da_new += bma->cur->bc_bmap.allocated;
1982		bma->cur->bc_bmap.allocated = 0;
1983	}
1984
1985	/* adjust for changes in reserved delayed indirect blocks */
1986	if (da_new != da_old) {
1987		ASSERT(state == 0 || da_new < da_old);
1988		error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
1989				false);
1990	}
1991
1992	xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
1993done:
1994	if (whichfork != XFS_COW_FORK)
1995		bma->logflags |= rval;
1996	return error;
1997#undef	LEFT
1998#undef	RIGHT
1999#undef	PREV
2000}
2001
2002/*
2003 * Convert an unwritten allocation to a real allocation or vice versa.
2004 */
2005int					/* error */
2006xfs_bmap_add_extent_unwritten_real(
2007	struct xfs_trans	*tp,
2008	xfs_inode_t		*ip,	/* incore inode pointer */
2009	int			whichfork,
2010	struct xfs_iext_cursor	*icur,
2011	struct xfs_btree_cur	**curp,	/* if *curp is null, not a btree */
2012	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
2013	int			*logflagsp) /* inode logging flags */
2014{
2015	struct xfs_btree_cur	*cur;	/* btree cursor */
2016	int			error;	/* error return value */
2017	int			i;	/* temp state */
2018	struct xfs_ifork	*ifp;	/* inode fork pointer */
2019	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
2020	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
2021					/* left is 0, right is 1, prev is 2 */
2022	int			rval=0;	/* return value (logging flags) */
2023	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
2024	struct xfs_mount	*mp = ip->i_mount;
2025	struct xfs_bmbt_irec	old;
2026
2027	*logflagsp = 0;
2028
2029	cur = *curp;
2030	ifp = xfs_ifork_ptr(ip, whichfork);
2031
2032	ASSERT(!isnullstartblock(new->br_startblock));
2033
2034	XFS_STATS_INC(mp, xs_add_exlist);
2035
2036#define	LEFT		r[0]
2037#define	RIGHT		r[1]
2038#define	PREV		r[2]
2039
2040	/*
2041	 * Set up a bunch of variables to make the tests simpler.
2042	 */
2043	error = 0;
2044	xfs_iext_get_extent(ifp, icur, &PREV);
2045	ASSERT(new->br_state != PREV.br_state);
2046	new_endoff = new->br_startoff + new->br_blockcount;
2047	ASSERT(PREV.br_startoff <= new->br_startoff);
2048	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2049
2050	/*
2051	 * Set flags determining what part of the previous oldext allocation
2052	 * extent is being replaced by a newext allocation.
2053	 */
2054	if (PREV.br_startoff == new->br_startoff)
2055		state |= BMAP_LEFT_FILLING;
2056	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2057		state |= BMAP_RIGHT_FILLING;
2058
2059	/*
2060	 * Check and set flags if this segment has a left neighbor.
2061	 * Don't set contiguous if the combined extent would be too large.
2062	 */
2063	if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2064		state |= BMAP_LEFT_VALID;
2065		if (isnullstartblock(LEFT.br_startblock))
2066			state |= BMAP_LEFT_DELAY;
2067	}
2068
2069	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2070	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2071	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2072	    LEFT.br_state == new->br_state &&
2073	    LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
2074		state |= BMAP_LEFT_CONTIG;
2075
2076	/*
2077	 * Check and set flags if this segment has a right neighbor.
2078	 * Don't set contiguous if the combined extent would be too large.
2079	 * Also check for all-three-contiguous being too large.
2080	 */
2081	if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2082		state |= BMAP_RIGHT_VALID;
2083		if (isnullstartblock(RIGHT.br_startblock))
2084			state |= BMAP_RIGHT_DELAY;
2085	}
2086
2087	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2088	    new_endoff == RIGHT.br_startoff &&
2089	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2090	    new->br_state == RIGHT.br_state &&
2091	    new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2092	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2093		       BMAP_RIGHT_FILLING)) !=
2094		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2095		       BMAP_RIGHT_FILLING) ||
2096	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2097			<= XFS_MAX_BMBT_EXTLEN))
2098		state |= BMAP_RIGHT_CONTIG;
2099
2100	/*
2101	 * Switch out based on the FILLING and CONTIG state bits.
2102	 */
2103	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2104			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2105	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2106	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2107		/*
2108		 * Setting all of a previous oldext extent to newext.
2109		 * The left and right neighbors are both contiguous with new.
2110		 */
2111		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2112
2113		xfs_iext_remove(ip, icur, state);
2114		xfs_iext_remove(ip, icur, state);
2115		xfs_iext_prev(ifp, icur);
2116		xfs_iext_update_extent(ip, state, icur, &LEFT);
2117		ifp->if_nextents -= 2;
2118		if (cur == NULL)
2119			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2120		else {
2121			rval = XFS_ILOG_CORE;
2122			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2123			if (error)
2124				goto done;
2125			if (XFS_IS_CORRUPT(mp, i != 1)) {
2126				xfs_btree_mark_sick(cur);
2127				error = -EFSCORRUPTED;
2128				goto done;
2129			}
2130			if ((error = xfs_btree_delete(cur, &i)))
2131				goto done;
2132			if (XFS_IS_CORRUPT(mp, i != 1)) {
2133				xfs_btree_mark_sick(cur);
2134				error = -EFSCORRUPTED;
2135				goto done;
2136			}
2137			if ((error = xfs_btree_decrement(cur, 0, &i)))
2138				goto done;
2139			if (XFS_IS_CORRUPT(mp, i != 1)) {
2140				xfs_btree_mark_sick(cur);
2141				error = -EFSCORRUPTED;
2142				goto done;
2143			}
2144			if ((error = xfs_btree_delete(cur, &i)))
2145				goto done;
2146			if (XFS_IS_CORRUPT(mp, i != 1)) {
2147				xfs_btree_mark_sick(cur);
2148				error = -EFSCORRUPTED;
2149				goto done;
2150			}
2151			if ((error = xfs_btree_decrement(cur, 0, &i)))
2152				goto done;
2153			if (XFS_IS_CORRUPT(mp, i != 1)) {
2154				xfs_btree_mark_sick(cur);
2155				error = -EFSCORRUPTED;
2156				goto done;
2157			}
2158			error = xfs_bmbt_update(cur, &LEFT);
2159			if (error)
2160				goto done;
2161		}
2162		break;
2163
2164	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2165		/*
2166		 * Setting all of a previous oldext extent to newext.
2167		 * The left neighbor is contiguous, the right is not.
2168		 */
2169		LEFT.br_blockcount += PREV.br_blockcount;
2170
2171		xfs_iext_remove(ip, icur, state);
2172		xfs_iext_prev(ifp, icur);
2173		xfs_iext_update_extent(ip, state, icur, &LEFT);
2174		ifp->if_nextents--;
2175		if (cur == NULL)
2176			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2177		else {
2178			rval = XFS_ILOG_CORE;
2179			error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2180			if (error)
2181				goto done;
2182			if (XFS_IS_CORRUPT(mp, i != 1)) {
2183				xfs_btree_mark_sick(cur);
2184				error = -EFSCORRUPTED;
2185				goto done;
2186			}
2187			if ((error = xfs_btree_delete(cur, &i)))
2188				goto done;
2189			if (XFS_IS_CORRUPT(mp, i != 1)) {
2190				xfs_btree_mark_sick(cur);
2191				error = -EFSCORRUPTED;
2192				goto done;
2193			}
2194			if ((error = xfs_btree_decrement(cur, 0, &i)))
2195				goto done;
2196			if (XFS_IS_CORRUPT(mp, i != 1)) {
2197				xfs_btree_mark_sick(cur);
2198				error = -EFSCORRUPTED;
2199				goto done;
2200			}
2201			error = xfs_bmbt_update(cur, &LEFT);
2202			if (error)
2203				goto done;
2204		}
2205		break;
2206
2207	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2208		/*
2209		 * Setting all of a previous oldext extent to newext.
2210		 * The right neighbor is contiguous, the left is not.
2211		 */
2212		PREV.br_blockcount += RIGHT.br_blockcount;
2213		PREV.br_state = new->br_state;
2214
2215		xfs_iext_next(ifp, icur);
2216		xfs_iext_remove(ip, icur, state);
2217		xfs_iext_prev(ifp, icur);
2218		xfs_iext_update_extent(ip, state, icur, &PREV);
2219		ifp->if_nextents--;
2220
2221		if (cur == NULL)
2222			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2223		else {
2224			rval = XFS_ILOG_CORE;
2225			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2226			if (error)
2227				goto done;
2228			if (XFS_IS_CORRUPT(mp, i != 1)) {
2229				xfs_btree_mark_sick(cur);
2230				error = -EFSCORRUPTED;
2231				goto done;
2232			}
2233			if ((error = xfs_btree_delete(cur, &i)))
2234				goto done;
2235			if (XFS_IS_CORRUPT(mp, i != 1)) {
2236				xfs_btree_mark_sick(cur);
2237				error = -EFSCORRUPTED;
2238				goto done;
2239			}
2240			if ((error = xfs_btree_decrement(cur, 0, &i)))
2241				goto done;
2242			if (XFS_IS_CORRUPT(mp, i != 1)) {
2243				xfs_btree_mark_sick(cur);
2244				error = -EFSCORRUPTED;
2245				goto done;
2246			}
2247			error = xfs_bmbt_update(cur, &PREV);
2248			if (error)
2249				goto done;
2250		}
2251		break;
2252
2253	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2254		/*
2255		 * Setting all of a previous oldext extent to newext.
2256		 * Neither the left nor right neighbors are contiguous with
2257		 * the new one.
2258		 */
2259		PREV.br_state = new->br_state;
2260		xfs_iext_update_extent(ip, state, icur, &PREV);
2261
2262		if (cur == NULL)
2263			rval = XFS_ILOG_DEXT;
2264		else {
2265			rval = 0;
2266			error = xfs_bmbt_lookup_eq(cur, new, &i);
2267			if (error)
2268				goto done;
2269			if (XFS_IS_CORRUPT(mp, i != 1)) {
2270				xfs_btree_mark_sick(cur);
2271				error = -EFSCORRUPTED;
2272				goto done;
2273			}
2274			error = xfs_bmbt_update(cur, &PREV);
2275			if (error)
2276				goto done;
2277		}
2278		break;
2279
2280	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2281		/*
2282		 * Setting the first part of a previous oldext extent to newext.
2283		 * The left neighbor is contiguous.
2284		 */
2285		LEFT.br_blockcount += new->br_blockcount;
2286
2287		old = PREV;
2288		PREV.br_startoff += new->br_blockcount;
2289		PREV.br_startblock += new->br_blockcount;
2290		PREV.br_blockcount -= new->br_blockcount;
2291
2292		xfs_iext_update_extent(ip, state, icur, &PREV);
2293		xfs_iext_prev(ifp, icur);
2294		xfs_iext_update_extent(ip, state, icur, &LEFT);
2295
2296		if (cur == NULL)
2297			rval = XFS_ILOG_DEXT;
2298		else {
2299			rval = 0;
2300			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2301			if (error)
2302				goto done;
2303			if (XFS_IS_CORRUPT(mp, i != 1)) {
2304				xfs_btree_mark_sick(cur);
2305				error = -EFSCORRUPTED;
2306				goto done;
2307			}
2308			error = xfs_bmbt_update(cur, &PREV);
2309			if (error)
2310				goto done;
2311			error = xfs_btree_decrement(cur, 0, &i);
2312			if (error)
2313				goto done;
2314			error = xfs_bmbt_update(cur, &LEFT);
2315			if (error)
2316				goto done;
2317		}
2318		break;
2319
2320	case BMAP_LEFT_FILLING:
2321		/*
2322		 * Setting the first part of a previous oldext extent to newext.
2323		 * The left neighbor is not contiguous.
2324		 */
2325		old = PREV;
2326		PREV.br_startoff += new->br_blockcount;
2327		PREV.br_startblock += new->br_blockcount;
2328		PREV.br_blockcount -= new->br_blockcount;
2329
2330		xfs_iext_update_extent(ip, state, icur, &PREV);
2331		xfs_iext_insert(ip, icur, new, state);
2332		ifp->if_nextents++;
2333
2334		if (cur == NULL)
2335			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2336		else {
2337			rval = XFS_ILOG_CORE;
2338			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2339			if (error)
2340				goto done;
2341			if (XFS_IS_CORRUPT(mp, i != 1)) {
2342				xfs_btree_mark_sick(cur);
2343				error = -EFSCORRUPTED;
2344				goto done;
2345			}
2346			error = xfs_bmbt_update(cur, &PREV);
2347			if (error)
2348				goto done;
2349			cur->bc_rec.b = *new;
2350			if ((error = xfs_btree_insert(cur, &i)))
2351				goto done;
2352			if (XFS_IS_CORRUPT(mp, i != 1)) {
2353				xfs_btree_mark_sick(cur);
2354				error = -EFSCORRUPTED;
2355				goto done;
2356			}
2357		}
2358		break;
2359
2360	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2361		/*
2362		 * Setting the last part of a previous oldext extent to newext.
2363		 * The right neighbor is contiguous with the new allocation.
2364		 */
2365		old = PREV;
2366		PREV.br_blockcount -= new->br_blockcount;
2367
2368		RIGHT.br_startoff = new->br_startoff;
2369		RIGHT.br_startblock = new->br_startblock;
2370		RIGHT.br_blockcount += new->br_blockcount;
2371
2372		xfs_iext_update_extent(ip, state, icur, &PREV);
2373		xfs_iext_next(ifp, icur);
2374		xfs_iext_update_extent(ip, state, icur, &RIGHT);
2375
2376		if (cur == NULL)
2377			rval = XFS_ILOG_DEXT;
2378		else {
2379			rval = 0;
2380			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2381			if (error)
2382				goto done;
2383			if (XFS_IS_CORRUPT(mp, i != 1)) {
2384				xfs_btree_mark_sick(cur);
2385				error = -EFSCORRUPTED;
2386				goto done;
2387			}
2388			error = xfs_bmbt_update(cur, &PREV);
2389			if (error)
2390				goto done;
2391			error = xfs_btree_increment(cur, 0, &i);
2392			if (error)
2393				goto done;
2394			error = xfs_bmbt_update(cur, &RIGHT);
2395			if (error)
2396				goto done;
2397		}
2398		break;
2399
2400	case BMAP_RIGHT_FILLING:
2401		/*
2402		 * Setting the last part of a previous oldext extent to newext.
2403		 * The right neighbor is not contiguous.
2404		 */
2405		old = PREV;
2406		PREV.br_blockcount -= new->br_blockcount;
2407
2408		xfs_iext_update_extent(ip, state, icur, &PREV);
2409		xfs_iext_next(ifp, icur);
2410		xfs_iext_insert(ip, icur, new, state);
2411		ifp->if_nextents++;
2412
2413		if (cur == NULL)
2414			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2415		else {
2416			rval = XFS_ILOG_CORE;
2417			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2418			if (error)
2419				goto done;
2420			if (XFS_IS_CORRUPT(mp, i != 1)) {
2421				xfs_btree_mark_sick(cur);
2422				error = -EFSCORRUPTED;
2423				goto done;
2424			}
2425			error = xfs_bmbt_update(cur, &PREV);
2426			if (error)
2427				goto done;
2428			error = xfs_bmbt_lookup_eq(cur, new, &i);
2429			if (error)
2430				goto done;
2431			if (XFS_IS_CORRUPT(mp, i != 0)) {
2432				xfs_btree_mark_sick(cur);
2433				error = -EFSCORRUPTED;
2434				goto done;
2435			}
2436			if ((error = xfs_btree_insert(cur, &i)))
2437				goto done;
2438			if (XFS_IS_CORRUPT(mp, i != 1)) {
2439				xfs_btree_mark_sick(cur);
2440				error = -EFSCORRUPTED;
2441				goto done;
2442			}
2443		}
2444		break;
2445
2446	case 0:
2447		/*
2448		 * Setting the middle part of a previous oldext extent to
2449		 * newext.  Contiguity is impossible here.
2450		 * One extent becomes three extents.
2451		 */
2452		old = PREV;
2453		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2454
2455		r[0] = *new;
2456		r[1].br_startoff = new_endoff;
2457		r[1].br_blockcount =
2458			old.br_startoff + old.br_blockcount - new_endoff;
2459		r[1].br_startblock = new->br_startblock + new->br_blockcount;
2460		r[1].br_state = PREV.br_state;
2461
2462		xfs_iext_update_extent(ip, state, icur, &PREV);
2463		xfs_iext_next(ifp, icur);
2464		xfs_iext_insert(ip, icur, &r[1], state);
2465		xfs_iext_insert(ip, icur, &r[0], state);
2466		ifp->if_nextents += 2;
2467
2468		if (cur == NULL)
2469			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2470		else {
2471			rval = XFS_ILOG_CORE;
2472			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2473			if (error)
2474				goto done;
2475			if (XFS_IS_CORRUPT(mp, i != 1)) {
2476				xfs_btree_mark_sick(cur);
2477				error = -EFSCORRUPTED;
2478				goto done;
2479			}
2480			/* new right extent - oldext */
2481			error = xfs_bmbt_update(cur, &r[1]);
2482			if (error)
2483				goto done;
2484			/* new left extent - oldext */
2485			cur->bc_rec.b = PREV;
2486			if ((error = xfs_btree_insert(cur, &i)))
2487				goto done;
2488			if (XFS_IS_CORRUPT(mp, i != 1)) {
2489				xfs_btree_mark_sick(cur);
2490				error = -EFSCORRUPTED;
2491				goto done;
2492			}
2493			/*
2494			 * Reset the cursor to the position of the new extent
2495			 * we are about to insert as we can't trust it after
2496			 * the previous insert.
2497			 */
2498			error = xfs_bmbt_lookup_eq(cur, new, &i);
2499			if (error)
2500				goto done;
2501			if (XFS_IS_CORRUPT(mp, i != 0)) {
2502				xfs_btree_mark_sick(cur);
2503				error = -EFSCORRUPTED;
2504				goto done;
2505			}
2506			/* new middle extent - newext */
2507			if ((error = xfs_btree_insert(cur, &i)))
2508				goto done;
2509			if (XFS_IS_CORRUPT(mp, i != 1)) {
2510				xfs_btree_mark_sick(cur);
2511				error = -EFSCORRUPTED;
2512				goto done;
2513			}
2514		}
2515		break;
2516
2517	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2518	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2519	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2520	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2521	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2522	case BMAP_LEFT_CONTIG:
2523	case BMAP_RIGHT_CONTIG:
2524		/*
2525		 * These cases are all impossible.
2526		 */
2527		ASSERT(0);
2528	}
2529
2530	/* update reverse mappings */
2531	xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
2532
2533	/* convert to a btree if necessary */
2534	if (xfs_bmap_needs_btree(ip, whichfork)) {
2535		int	tmp_logflags;	/* partial log flag return val */
2536
2537		ASSERT(cur == NULL);
2538		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
2539				&tmp_logflags, whichfork);
2540		*logflagsp |= tmp_logflags;
2541		if (error)
2542			goto done;
2543	}
2544
2545	/* clear out the allocated field, done with it now in any case. */
2546	if (cur) {
2547		cur->bc_bmap.allocated = 0;
2548		*curp = cur;
2549	}
2550
2551	xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2552done:
2553	*logflagsp |= rval;
2554	return error;
2555#undef	LEFT
2556#undef	RIGHT
2557#undef	PREV
2558}
2559
2560/*
2561 * Convert a hole to a delayed allocation.
2562 */
2563STATIC void
2564xfs_bmap_add_extent_hole_delay(
2565	xfs_inode_t		*ip,	/* incore inode pointer */
2566	int			whichfork,
2567	struct xfs_iext_cursor	*icur,
2568	xfs_bmbt_irec_t		*new)	/* new data to add to file extents */
2569{
2570	struct xfs_ifork	*ifp;	/* inode fork pointer */
2571	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2572	xfs_filblks_t		newlen=0;	/* new indirect size */
2573	xfs_filblks_t		oldlen=0;	/* old indirect size */
2574	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2575	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
2576	xfs_filblks_t		temp;	 /* temp for indirect calculations */
2577
2578	ifp = xfs_ifork_ptr(ip, whichfork);
2579	ASSERT(isnullstartblock(new->br_startblock));
2580
2581	/*
2582	 * Check and set flags if this segment has a left neighbor
2583	 */
2584	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2585		state |= BMAP_LEFT_VALID;
2586		if (isnullstartblock(left.br_startblock))
2587			state |= BMAP_LEFT_DELAY;
2588	}
2589
2590	/*
2591	 * Check and set flags if the current (right) segment exists.
2592	 * If it doesn't exist, we're converting the hole at end-of-file.
2593	 */
2594	if (xfs_iext_get_extent(ifp, icur, &right)) {
2595		state |= BMAP_RIGHT_VALID;
2596		if (isnullstartblock(right.br_startblock))
2597			state |= BMAP_RIGHT_DELAY;
2598	}
2599
2600	/*
2601	 * Set contiguity flags on the left and right neighbors.
2602	 * Don't let extents get too large, even if the pieces are contiguous.
2603	 */
2604	if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2605	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2606	    left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
2607		state |= BMAP_LEFT_CONTIG;
2608
2609	if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2610	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2611	    new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2612	    (!(state & BMAP_LEFT_CONTIG) ||
2613	     (left.br_blockcount + new->br_blockcount +
2614	      right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)))
2615		state |= BMAP_RIGHT_CONTIG;
2616
2617	/*
2618	 * Switch out based on the contiguity flags.
2619	 */
2620	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2621	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2622		/*
2623		 * New allocation is contiguous with delayed allocations
2624		 * on the left and on the right.
2625		 * Merge all three into a single extent record.
2626		 */
2627		temp = left.br_blockcount + new->br_blockcount +
2628			right.br_blockcount;
2629
2630		oldlen = startblockval(left.br_startblock) +
2631			startblockval(new->br_startblock) +
2632			startblockval(right.br_startblock);
2633		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2634					 oldlen);
2635		left.br_startblock = nullstartblock(newlen);
2636		left.br_blockcount = temp;
2637
2638		xfs_iext_remove(ip, icur, state);
2639		xfs_iext_prev(ifp, icur);
2640		xfs_iext_update_extent(ip, state, icur, &left);
2641		break;
2642
2643	case BMAP_LEFT_CONTIG:
2644		/*
2645		 * New allocation is contiguous with a delayed allocation
2646		 * on the left.
2647		 * Merge the new allocation with the left neighbor.
2648		 */
2649		temp = left.br_blockcount + new->br_blockcount;
2650
2651		oldlen = startblockval(left.br_startblock) +
2652			startblockval(new->br_startblock);
2653		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2654					 oldlen);
2655		left.br_blockcount = temp;
2656		left.br_startblock = nullstartblock(newlen);
2657
2658		xfs_iext_prev(ifp, icur);
2659		xfs_iext_update_extent(ip, state, icur, &left);
2660		break;
2661
2662	case BMAP_RIGHT_CONTIG:
2663		/*
2664		 * New allocation is contiguous with a delayed allocation
2665		 * on the right.
2666		 * Merge the new allocation with the right neighbor.
2667		 */
2668		temp = new->br_blockcount + right.br_blockcount;
2669		oldlen = startblockval(new->br_startblock) +
2670			startblockval(right.br_startblock);
2671		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2672					 oldlen);
2673		right.br_startoff = new->br_startoff;
2674		right.br_startblock = nullstartblock(newlen);
2675		right.br_blockcount = temp;
2676		xfs_iext_update_extent(ip, state, icur, &right);
2677		break;
2678
2679	case 0:
2680		/*
2681		 * New allocation is not contiguous with another
2682		 * delayed allocation.
2683		 * Insert a new entry.
2684		 */
2685		oldlen = newlen = 0;
2686		xfs_iext_insert(ip, icur, new, state);
2687		break;
2688	}
2689	if (oldlen != newlen) {
2690		ASSERT(oldlen > newlen);
2691		xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2692				 false);
2693		/*
2694		 * Nothing to do for disk quota accounting here.
2695		 */
2696		xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
2697	}
2698}
2699
2700/*
2701 * Convert a hole to a real allocation.
2702 */
2703STATIC int				/* error */
2704xfs_bmap_add_extent_hole_real(
2705	struct xfs_trans	*tp,
2706	struct xfs_inode	*ip,
2707	int			whichfork,
2708	struct xfs_iext_cursor	*icur,
2709	struct xfs_btree_cur	**curp,
2710	struct xfs_bmbt_irec	*new,
2711	int			*logflagsp,
2712	uint32_t		flags)
2713{
2714	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
2715	struct xfs_mount	*mp = ip->i_mount;
2716	struct xfs_btree_cur	*cur = *curp;
2717	int			error;	/* error return value */
2718	int			i;	/* temp state */
2719	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2720	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2721	int			rval=0;	/* return value (logging flags) */
2722	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
2723	struct xfs_bmbt_irec	old;
2724
2725	ASSERT(!isnullstartblock(new->br_startblock));
2726	ASSERT(!cur || !(cur->bc_flags & XFS_BTREE_BMBT_WASDEL));
2727
2728	XFS_STATS_INC(mp, xs_add_exlist);
2729
2730	/*
2731	 * Check and set flags if this segment has a left neighbor.
2732	 */
2733	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2734		state |= BMAP_LEFT_VALID;
2735		if (isnullstartblock(left.br_startblock))
2736			state |= BMAP_LEFT_DELAY;
2737	}
2738
2739	/*
2740	 * Check and set flags if this segment has a current value.
2741	 * Not true if we're inserting into the "hole" at eof.
2742	 */
2743	if (xfs_iext_get_extent(ifp, icur, &right)) {
2744		state |= BMAP_RIGHT_VALID;
2745		if (isnullstartblock(right.br_startblock))
2746			state |= BMAP_RIGHT_DELAY;
2747	}
2748
2749	/*
2750	 * We're inserting a real allocation between "left" and "right".
2751	 * Set the contiguity flags.  Don't let extents get too large.
2752	 */
2753	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2754	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2755	    left.br_startblock + left.br_blockcount == new->br_startblock &&
2756	    left.br_state == new->br_state &&
2757	    left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
2758		state |= BMAP_LEFT_CONTIG;
2759
2760	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2761	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2762	    new->br_startblock + new->br_blockcount == right.br_startblock &&
2763	    new->br_state == right.br_state &&
2764	    new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2765	    (!(state & BMAP_LEFT_CONTIG) ||
2766	     left.br_blockcount + new->br_blockcount +
2767	     right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))
2768		state |= BMAP_RIGHT_CONTIG;
2769
2770	error = 0;
2771	/*
2772	 * Select which case we're in here, and implement it.
2773	 */
2774	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2775	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2776		/*
2777		 * New allocation is contiguous with real allocations on the
2778		 * left and on the right.
2779		 * Merge all three into a single extent record.
2780		 */
2781		left.br_blockcount += new->br_blockcount + right.br_blockcount;
2782
2783		xfs_iext_remove(ip, icur, state);
2784		xfs_iext_prev(ifp, icur);
2785		xfs_iext_update_extent(ip, state, icur, &left);
2786		ifp->if_nextents--;
2787
2788		if (cur == NULL) {
2789			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2790		} else {
2791			rval = XFS_ILOG_CORE;
2792			error = xfs_bmbt_lookup_eq(cur, &right, &i);
2793			if (error)
2794				goto done;
2795			if (XFS_IS_CORRUPT(mp, i != 1)) {
2796				xfs_btree_mark_sick(cur);
2797				error = -EFSCORRUPTED;
2798				goto done;
2799			}
2800			error = xfs_btree_delete(cur, &i);
2801			if (error)
2802				goto done;
2803			if (XFS_IS_CORRUPT(mp, i != 1)) {
2804				xfs_btree_mark_sick(cur);
2805				error = -EFSCORRUPTED;
2806				goto done;
2807			}
2808			error = xfs_btree_decrement(cur, 0, &i);
2809			if (error)
2810				goto done;
2811			if (XFS_IS_CORRUPT(mp, i != 1)) {
2812				xfs_btree_mark_sick(cur);
2813				error = -EFSCORRUPTED;
2814				goto done;
2815			}
2816			error = xfs_bmbt_update(cur, &left);
2817			if (error)
2818				goto done;
2819		}
2820		break;
2821
2822	case BMAP_LEFT_CONTIG:
2823		/*
2824		 * New allocation is contiguous with a real allocation
2825		 * on the left.
2826		 * Merge the new allocation with the left neighbor.
2827		 */
2828		old = left;
2829		left.br_blockcount += new->br_blockcount;
2830
2831		xfs_iext_prev(ifp, icur);
2832		xfs_iext_update_extent(ip, state, icur, &left);
2833
2834		if (cur == NULL) {
2835			rval = xfs_ilog_fext(whichfork);
2836		} else {
2837			rval = 0;
2838			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2839			if (error)
2840				goto done;
2841			if (XFS_IS_CORRUPT(mp, i != 1)) {
2842				xfs_btree_mark_sick(cur);
2843				error = -EFSCORRUPTED;
2844				goto done;
2845			}
2846			error = xfs_bmbt_update(cur, &left);
2847			if (error)
2848				goto done;
2849		}
2850		break;
2851
2852	case BMAP_RIGHT_CONTIG:
2853		/*
2854		 * New allocation is contiguous with a real allocation
2855		 * on the right.
2856		 * Merge the new allocation with the right neighbor.
2857		 */
2858		old = right;
2859
2860		right.br_startoff = new->br_startoff;
2861		right.br_startblock = new->br_startblock;
2862		right.br_blockcount += new->br_blockcount;
2863		xfs_iext_update_extent(ip, state, icur, &right);
2864
2865		if (cur == NULL) {
2866			rval = xfs_ilog_fext(whichfork);
2867		} else {
2868			rval = 0;
2869			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2870			if (error)
2871				goto done;
2872			if (XFS_IS_CORRUPT(mp, i != 1)) {
2873				xfs_btree_mark_sick(cur);
2874				error = -EFSCORRUPTED;
2875				goto done;
2876			}
2877			error = xfs_bmbt_update(cur, &right);
2878			if (error)
2879				goto done;
2880		}
2881		break;
2882
2883	case 0:
2884		/*
2885		 * New allocation is not contiguous with another
2886		 * real allocation.
2887		 * Insert a new entry.
2888		 */
2889		xfs_iext_insert(ip, icur, new, state);
2890		ifp->if_nextents++;
2891
2892		if (cur == NULL) {
2893			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2894		} else {
2895			rval = XFS_ILOG_CORE;
2896			error = xfs_bmbt_lookup_eq(cur, new, &i);
2897			if (error)
2898				goto done;
2899			if (XFS_IS_CORRUPT(mp, i != 0)) {
2900				xfs_btree_mark_sick(cur);
2901				error = -EFSCORRUPTED;
2902				goto done;
2903			}
2904			error = xfs_btree_insert(cur, &i);
2905			if (error)
2906				goto done;
2907			if (XFS_IS_CORRUPT(mp, i != 1)) {
2908				xfs_btree_mark_sick(cur);
2909				error = -EFSCORRUPTED;
2910				goto done;
2911			}
2912		}
2913		break;
2914	}
2915
2916	/* add reverse mapping unless caller opted out */
2917	if (!(flags & XFS_BMAPI_NORMAP))
2918		xfs_rmap_map_extent(tp, ip, whichfork, new);
2919
2920	/* convert to a btree if necessary */
2921	if (xfs_bmap_needs_btree(ip, whichfork)) {
2922		int	tmp_logflags;	/* partial log flag return val */
2923
2924		ASSERT(cur == NULL);
2925		error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
2926				&tmp_logflags, whichfork);
2927		*logflagsp |= tmp_logflags;
2928		cur = *curp;
2929		if (error)
2930			goto done;
2931	}
2932
2933	/* clear out the allocated field, done with it now in any case. */
2934	if (cur)
2935		cur->bc_bmap.allocated = 0;
2936
2937	xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2938done:
2939	*logflagsp |= rval;
2940	return error;
2941}
2942
2943/*
2944 * Functions used in the extent read, allocate and remove paths
2945 */
2946
2947/*
2948 * Adjust the size of the new extent based on i_extsize and rt extsize.
2949 */
2950int
2951xfs_bmap_extsize_align(
2952	xfs_mount_t	*mp,
2953	xfs_bmbt_irec_t	*gotp,		/* next extent pointer */
2954	xfs_bmbt_irec_t	*prevp,		/* previous extent pointer */
2955	xfs_extlen_t	extsz,		/* align to this extent size */
2956	int		rt,		/* is this a realtime inode? */
2957	int		eof,		/* is extent at end-of-file? */
2958	int		delay,		/* creating delalloc extent? */
2959	int		convert,	/* overwriting unwritten extent? */
2960	xfs_fileoff_t	*offp,		/* in/out: aligned offset */
2961	xfs_extlen_t	*lenp)		/* in/out: aligned length */
2962{
2963	xfs_fileoff_t	orig_off;	/* original offset */
2964	xfs_extlen_t	orig_alen;	/* original length */
2965	xfs_fileoff_t	orig_end;	/* original off+len */
2966	xfs_fileoff_t	nexto;		/* next file offset */
2967	xfs_fileoff_t	prevo;		/* previous file offset */
2968	xfs_fileoff_t	align_off;	/* temp for offset */
2969	xfs_extlen_t	align_alen;	/* temp for length */
2970	xfs_extlen_t	temp;		/* temp for calculations */
2971
2972	if (convert)
2973		return 0;
2974
2975	orig_off = align_off = *offp;
2976	orig_alen = align_alen = *lenp;
2977	orig_end = orig_off + orig_alen;
2978
2979	/*
2980	 * If this request overlaps an existing extent, then don't
2981	 * attempt to perform any additional alignment.
2982	 */
2983	if (!delay && !eof &&
2984	    (orig_off >= gotp->br_startoff) &&
2985	    (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2986		return 0;
2987	}
2988
2989	/*
2990	 * If the file offset is unaligned vs. the extent size
2991	 * we need to align it.  This will be possible unless
2992	 * the file was previously written with a kernel that didn't
2993	 * perform this alignment, or if a truncate shot us in the
2994	 * foot.
2995	 */
2996	div_u64_rem(orig_off, extsz, &temp);
2997	if (temp) {
2998		align_alen += temp;
2999		align_off -= temp;
3000	}
3001
3002	/* Same adjustment for the end of the requested area. */
3003	temp = (align_alen % extsz);
3004	if (temp)
3005		align_alen += extsz - temp;
3006
3007	/*
3008	 * For large extent hint sizes, the aligned extent might be larger than
3009	 * XFS_BMBT_MAX_EXTLEN. In that case, reduce the size by an extsz so
3010	 * that it pulls the length back under XFS_BMBT_MAX_EXTLEN. The outer
3011	 * allocation loops handle short allocation just fine, so it is safe to
3012	 * do this. We only want to do it when we are forced to, though, because
3013	 * it means more allocation operations are required.
3014	 */
3015	while (align_alen > XFS_MAX_BMBT_EXTLEN)
3016		align_alen -= extsz;
3017	ASSERT(align_alen <= XFS_MAX_BMBT_EXTLEN);
3018
3019	/*
3020	 * If the previous block overlaps with this proposed allocation
3021	 * then move the start forward without adjusting the length.
3022	 */
3023	if (prevp->br_startoff != NULLFILEOFF) {
3024		if (prevp->br_startblock == HOLESTARTBLOCK)
3025			prevo = prevp->br_startoff;
3026		else
3027			prevo = prevp->br_startoff + prevp->br_blockcount;
3028	} else
3029		prevo = 0;
3030	if (align_off != orig_off && align_off < prevo)
3031		align_off = prevo;
3032	/*
3033	 * If the next block overlaps with this proposed allocation
3034	 * then move the start back without adjusting the length,
3035	 * but not before offset 0.
3036	 * This may of course make the start overlap previous block,
3037	 * and if we hit the offset 0 limit then the next block
3038	 * can still overlap too.
3039	 */
3040	if (!eof && gotp->br_startoff != NULLFILEOFF) {
3041		if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3042		    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3043			nexto = gotp->br_startoff + gotp->br_blockcount;
3044		else
3045			nexto = gotp->br_startoff;
3046	} else
3047		nexto = NULLFILEOFF;
3048	if (!eof &&
3049	    align_off + align_alen != orig_end &&
3050	    align_off + align_alen > nexto)
3051		align_off = nexto > align_alen ? nexto - align_alen : 0;
3052	/*
3053	 * If we're now overlapping the next or previous extent that
3054	 * means we can't fit an extsz piece in this hole.  Just move
3055	 * the start forward to the first valid spot and set
3056	 * the length so we hit the end.
3057	 */
3058	if (align_off != orig_off && align_off < prevo)
3059		align_off = prevo;
3060	if (align_off + align_alen != orig_end &&
3061	    align_off + align_alen > nexto &&
3062	    nexto != NULLFILEOFF) {
3063		ASSERT(nexto > prevo);
3064		align_alen = nexto - align_off;
3065	}
3066
3067	/*
3068	 * If realtime, and the result isn't a multiple of the realtime
3069	 * extent size we need to remove blocks until it is.
3070	 */
3071	if (rt && (temp = xfs_extlen_to_rtxmod(mp, align_alen))) {
3072		/*
3073		 * We're not covering the original request, or
3074		 * we won't be able to once we fix the length.
3075		 */
3076		if (orig_off < align_off ||
3077		    orig_end > align_off + align_alen ||
3078		    align_alen - temp < orig_alen)
3079			return -EINVAL;
3080		/*
3081		 * Try to fix it by moving the start up.
3082		 */
3083		if (align_off + temp <= orig_off) {
3084			align_alen -= temp;
3085			align_off += temp;
3086		}
3087		/*
3088		 * Try to fix it by moving the end in.
3089		 */
3090		else if (align_off + align_alen - temp >= orig_end)
3091			align_alen -= temp;
3092		/*
3093		 * Set the start to the minimum then trim the length.
3094		 */
3095		else {
3096			align_alen -= orig_off - align_off;
3097			align_off = orig_off;
3098			align_alen -= xfs_extlen_to_rtxmod(mp, align_alen);
3099		}
3100		/*
3101		 * Result doesn't cover the request, fail it.
3102		 */
3103		if (orig_off < align_off || orig_end > align_off + align_alen)
3104			return -EINVAL;
3105	} else {
3106		ASSERT(orig_off >= align_off);
3107		/* see XFS_BMBT_MAX_EXTLEN handling above */
3108		ASSERT(orig_end <= align_off + align_alen ||
3109		       align_alen + extsz > XFS_MAX_BMBT_EXTLEN);
3110	}
3111
3112#ifdef DEBUG
3113	if (!eof && gotp->br_startoff != NULLFILEOFF)
3114		ASSERT(align_off + align_alen <= gotp->br_startoff);
3115	if (prevp->br_startoff != NULLFILEOFF)
3116		ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3117#endif
3118
3119	*lenp = align_alen;
3120	*offp = align_off;
3121	return 0;
3122}
3123
3124#define XFS_ALLOC_GAP_UNITS	4
3125
3126/* returns true if ap->blkno was modified */
3127bool
3128xfs_bmap_adjacent(
3129	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
3130{
3131	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
 
3132	xfs_mount_t	*mp;		/* mount point structure */
 
3133	int		rt;		/* true if inode is realtime */
3134
3135#define	ISVALID(x,y)	\
3136	(rt ? \
3137		(x) < mp->m_sb.sb_rblocks : \
3138		XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3139		XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3140		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3141
3142	mp = ap->ip->i_mount;
 
3143	rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3144		(ap->datatype & XFS_ALLOC_USERDATA);
 
 
3145	/*
3146	 * If allocating at eof, and there's a previous real block,
3147	 * try to use its last block as our starting point.
3148	 */
3149	if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3150	    !isnullstartblock(ap->prev.br_startblock) &&
3151	    ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3152		    ap->prev.br_startblock)) {
3153		ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3154		/*
3155		 * Adjust for the gap between prevp and us.
3156		 */
3157		adjust = ap->offset -
3158			(ap->prev.br_startoff + ap->prev.br_blockcount);
3159		if (adjust &&
3160		    ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3161			ap->blkno += adjust;
3162		return true;
3163	}
3164	/*
3165	 * If not at eof, then compare the two neighbor blocks.
3166	 * Figure out whether either one gives us a good starting point,
3167	 * and pick the better one.
3168	 */
3169	if (!ap->eof) {
3170		xfs_fsblock_t	gotbno;		/* right side block number */
3171		xfs_fsblock_t	gotdiff=0;	/* right side difference */
3172		xfs_fsblock_t	prevbno;	/* left side block number */
3173		xfs_fsblock_t	prevdiff=0;	/* left side difference */
3174
3175		/*
3176		 * If there's a previous (left) block, select a requested
3177		 * start block based on it.
3178		 */
3179		if (ap->prev.br_startoff != NULLFILEOFF &&
3180		    !isnullstartblock(ap->prev.br_startblock) &&
3181		    (prevbno = ap->prev.br_startblock +
3182			       ap->prev.br_blockcount) &&
3183		    ISVALID(prevbno, ap->prev.br_startblock)) {
3184			/*
3185			 * Calculate gap to end of previous block.
3186			 */
3187			adjust = prevdiff = ap->offset -
3188				(ap->prev.br_startoff +
3189				 ap->prev.br_blockcount);
3190			/*
3191			 * Figure the startblock based on the previous block's
3192			 * end and the gap size.
3193			 * Heuristic!
3194			 * If the gap is large relative to the piece we're
3195			 * allocating, or using it gives us an invalid block
3196			 * number, then just use the end of the previous block.
3197			 */
3198			if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3199			    ISVALID(prevbno + prevdiff,
3200				    ap->prev.br_startblock))
3201				prevbno += adjust;
3202			else
3203				prevdiff += adjust;
 
 
 
 
 
 
 
3204		}
3205		/*
3206		 * No previous block or can't follow it, just default.
3207		 */
3208		else
3209			prevbno = NULLFSBLOCK;
3210		/*
3211		 * If there's a following (right) block, select a requested
3212		 * start block based on it.
3213		 */
3214		if (!isnullstartblock(ap->got.br_startblock)) {
3215			/*
3216			 * Calculate gap to start of next block.
3217			 */
3218			adjust = gotdiff = ap->got.br_startoff - ap->offset;
3219			/*
3220			 * Figure the startblock based on the next block's
3221			 * start and the gap size.
3222			 */
3223			gotbno = ap->got.br_startblock;
3224			/*
3225			 * Heuristic!
3226			 * If the gap is large relative to the piece we're
3227			 * allocating, or using it gives us an invalid block
3228			 * number, then just use the start of the next block
3229			 * offset by our length.
3230			 */
3231			if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3232			    ISVALID(gotbno - gotdiff, gotbno))
3233				gotbno -= adjust;
3234			else if (ISVALID(gotbno - ap->length, gotbno)) {
3235				gotbno -= ap->length;
3236				gotdiff += adjust - ap->length;
3237			} else
3238				gotdiff += adjust;
 
 
 
 
 
 
 
3239		}
3240		/*
3241		 * No next block, just default.
3242		 */
3243		else
3244			gotbno = NULLFSBLOCK;
3245		/*
3246		 * If both valid, pick the better one, else the only good
3247		 * one, else ap->blkno is already set (to 0 or the inode block).
3248		 */
3249		if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) {
3250			ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3251			return true;
3252		}
3253		if (prevbno != NULLFSBLOCK) {
3254			ap->blkno = prevbno;
3255			return true;
3256		}
3257		if (gotbno != NULLFSBLOCK) {
3258			ap->blkno = gotbno;
3259			return true;
3260		}
3261	}
3262#undef ISVALID
3263	return false;
3264}
3265
3266int
3267xfs_bmap_longest_free_extent(
3268	struct xfs_perag	*pag,
3269	struct xfs_trans	*tp,
3270	xfs_extlen_t		*blen)
 
 
3271{
 
 
3272	xfs_extlen_t		longest;
3273	int			error = 0;
3274
3275	if (!xfs_perag_initialised_agf(pag)) {
3276		error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_TRYLOCK,
3277				NULL);
3278		if (error)
3279			return error;
 
 
 
 
 
 
3280	}
3281
3282	longest = xfs_alloc_longest_free_extent(pag,
3283				xfs_alloc_min_freelist(pag->pag_mount, pag),
3284				xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3285	if (*blen < longest)
3286		*blen = longest;
3287
3288	return 0;
 
 
3289}
3290
3291static xfs_extlen_t
3292xfs_bmap_select_minlen(
3293	struct xfs_bmalloca	*ap,
3294	struct xfs_alloc_arg	*args,
3295	xfs_extlen_t		blen)
 
3296{
3297
3298	/*
3299	 * Since we used XFS_ALLOC_FLAG_TRYLOCK in _longest_free_extent(), it is
3300	 * possible that there is enough contiguous free space for this request.
3301	 */
3302	if (blen < ap->minlen)
3303		return ap->minlen;
3304
3305	/*
3306	 * If the best seen length is less than the request length,
3307	 * use the best as the minimum, otherwise we've got the maxlen we
3308	 * were asked for.
3309	 */
3310	if (blen < args->maxlen)
3311		return blen;
3312	return args->maxlen;
 
 
 
3313}
3314
3315static int
3316xfs_bmap_btalloc_select_lengths(
3317	struct xfs_bmalloca	*ap,
3318	struct xfs_alloc_arg	*args,
3319	xfs_extlen_t		*blen)
3320{
3321	struct xfs_mount	*mp = args->mp;
3322	struct xfs_perag	*pag;
3323	xfs_agnumber_t		agno, startag;
3324	int			error = 0;
3325
3326	if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3327		args->total = ap->minlen;
3328		args->minlen = ap->minlen;
3329		return 0;
3330	}
3331
 
3332	args->total = ap->total;
3333	startag = XFS_FSB_TO_AGNO(mp, ap->blkno);
 
3334	if (startag == NULLAGNUMBER)
3335		startag = 0;
3336
3337	*blen = 0;
3338	for_each_perag_wrap(mp, startag, agno, pag) {
3339		error = xfs_bmap_longest_free_extent(pag, args->tp, blen);
3340		if (error && error != -EAGAIN)
3341			break;
3342		error = 0;
3343		if (*blen >= args->maxlen)
 
 
3344			break;
3345	}
3346	if (pag)
3347		xfs_perag_rele(pag);
3348
3349	args->minlen = xfs_bmap_select_minlen(ap, args, *blen);
3350	return error;
3351}
3352
3353/* Update all inode and quota accounting for the allocation we just did. */
3354void
3355xfs_bmap_alloc_account(
3356	struct xfs_bmalloca	*ap)
 
3357{
3358	bool			isrt = XFS_IS_REALTIME_INODE(ap->ip) &&
3359					!(ap->flags & XFS_BMAPI_ATTRFORK);
3360	uint			fld;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3361
 
 
 
 
 
 
3362	if (ap->flags & XFS_BMAPI_COWFORK) {
3363		/*
3364		 * COW fork blocks are in-core only and thus are treated as
3365		 * in-core quota reservation (like delalloc blocks) even when
3366		 * converted to real blocks. The quota reservation is not
3367		 * accounted to disk until blocks are remapped to the data
3368		 * fork. So if these blocks were previously delalloc, we
3369		 * already have quota reservation and there's nothing to do
3370		 * yet.
3371		 */
3372		if (ap->wasdel) {
3373			xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length);
3374			return;
3375		}
3376
3377		/*
3378		 * Otherwise, we've allocated blocks in a hole. The transaction
3379		 * has acquired in-core quota reservation for this extent.
3380		 * Rather than account these as real blocks, however, we reduce
3381		 * the transaction quota reservation based on the allocation.
3382		 * This essentially transfers the transaction quota reservation
3383		 * to that of a delalloc extent.
3384		 */
3385		ap->ip->i_delayed_blks += ap->length;
3386		xfs_trans_mod_dquot_byino(ap->tp, ap->ip, isrt ?
3387				XFS_TRANS_DQ_RES_RTBLKS : XFS_TRANS_DQ_RES_BLKS,
3388				-(long)ap->length);
3389		return;
3390	}
3391
3392	/* data/attr fork only */
3393	ap->ip->i_nblocks += ap->length;
3394	xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3395	if (ap->wasdel) {
3396		ap->ip->i_delayed_blks -= ap->length;
3397		xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length);
3398		fld = isrt ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_DELBCOUNT;
3399	} else {
3400		fld = isrt ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
3401	}
3402
3403	xfs_trans_mod_dquot_byino(ap->tp, ap->ip, fld, ap->length);
 
3404}
3405
3406static int
3407xfs_bmap_compute_alignments(
3408	struct xfs_bmalloca	*ap,
3409	struct xfs_alloc_arg	*args)
3410{
3411	struct xfs_mount	*mp = args->mp;
3412	xfs_extlen_t		align = 0; /* minimum allocation alignment */
3413	int			stripe_align = 0;
3414
3415	/* stripe alignment for allocation is determined by mount parameters */
3416	if (mp->m_swidth && xfs_has_swalloc(mp))
3417		stripe_align = mp->m_swidth;
3418	else if (mp->m_dalign)
3419		stripe_align = mp->m_dalign;
3420
3421	if (ap->flags & XFS_BMAPI_COWFORK)
3422		align = xfs_get_cowextsz_hint(ap->ip);
3423	else if (ap->datatype & XFS_ALLOC_USERDATA)
3424		align = xfs_get_extsz_hint(ap->ip);
3425	if (align) {
3426		if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
3427					ap->eof, 0, ap->conv, &ap->offset,
3428					&ap->length))
3429			ASSERT(0);
3430		ASSERT(ap->length);
3431	}
3432
3433	/* apply extent size hints if obtained earlier */
3434	if (align) {
3435		args->prod = align;
3436		div_u64_rem(ap->offset, args->prod, &args->mod);
3437		if (args->mod)
3438			args->mod = args->prod - args->mod;
3439	} else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3440		args->prod = 1;
3441		args->mod = 0;
3442	} else {
3443		args->prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3444		div_u64_rem(ap->offset, args->prod, &args->mod);
3445		if (args->mod)
3446			args->mod = args->prod - args->mod;
3447	}
3448
3449	return stripe_align;
3450}
3451
3452static void
3453xfs_bmap_process_allocated_extent(
3454	struct xfs_bmalloca	*ap,
3455	struct xfs_alloc_arg	*args,
3456	xfs_fileoff_t		orig_offset,
3457	xfs_extlen_t		orig_length)
3458{
 
 
 
 
 
 
 
 
 
 
 
 
3459	ap->blkno = args->fsbno;
 
 
3460	ap->length = args->len;
3461	/*
3462	 * If the extent size hint is active, we tried to round the
3463	 * caller's allocation request offset down to extsz and the
3464	 * length up to another extsz boundary.  If we found a free
3465	 * extent we mapped it in starting at this new offset.  If the
3466	 * newly mapped space isn't long enough to cover any of the
3467	 * range of offsets that was originally requested, move the
3468	 * mapping up so that we can fill as much of the caller's
3469	 * original request as possible.  Free space is apparently
3470	 * very fragmented so we're unlikely to be able to satisfy the
3471	 * hints anyway.
3472	 */
3473	if (ap->length <= orig_length)
3474		ap->offset = orig_offset;
3475	else if (ap->offset + ap->length < orig_offset + orig_length)
3476		ap->offset = orig_offset + orig_length - ap->length;
3477	xfs_bmap_alloc_account(ap);
3478}
3479
3480#ifdef DEBUG
3481static int
3482xfs_bmap_exact_minlen_extent_alloc(
3483	struct xfs_bmalloca	*ap)
3484{
3485	struct xfs_mount	*mp = ap->ip->i_mount;
3486	struct xfs_alloc_arg	args = { .tp = ap->tp, .mp = mp };
3487	xfs_fileoff_t		orig_offset;
3488	xfs_extlen_t		orig_length;
3489	int			error;
3490
3491	ASSERT(ap->length);
3492
3493	if (ap->minlen != 1) {
3494		ap->blkno = NULLFSBLOCK;
3495		ap->length = 0;
3496		return 0;
3497	}
3498
3499	orig_offset = ap->offset;
3500	orig_length = ap->length;
3501
3502	args.alloc_minlen_only = 1;
3503
3504	xfs_bmap_compute_alignments(ap, &args);
3505
3506	/*
3507	 * Unlike the longest extent available in an AG, we don't track
3508	 * the length of an AG's shortest extent.
3509	 * XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and
3510	 * hence we can afford to start traversing from the 0th AG since
3511	 * we need not be concerned about a drop in performance in
3512	 * "debug only" code paths.
3513	 */
3514	ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
 
 
 
 
3515
 
3516	args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
 
3517	args.minlen = args.maxlen = ap->minlen;
3518	args.total = ap->total;
3519
3520	args.alignment = 1;
3521	args.minalignslop = 0;
3522
3523	args.minleft = ap->minleft;
3524	args.wasdel = ap->wasdel;
3525	args.resv = XFS_AG_RESV_NONE;
3526	args.datatype = ap->datatype;
3527
3528	error = xfs_alloc_vextent_first_ag(&args, ap->blkno);
3529	if (error)
3530		return error;
3531
3532	if (args.fsbno != NULLFSBLOCK) {
3533		xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
3534			orig_length);
3535	} else {
3536		ap->blkno = NULLFSBLOCK;
3537		ap->length = 0;
3538	}
3539
3540	return 0;
3541}
3542#else
3543
3544#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED)
3545
3546#endif
3547
3548/*
3549 * If we are not low on available data blocks and we are allocating at
3550 * EOF, optimise allocation for contiguous file extension and/or stripe
3551 * alignment of the new extent.
3552 *
3553 * NOTE: ap->aeof is only set if the allocation length is >= the
3554 * stripe unit and the allocation offset is at the end of file.
3555 */
3556static int
3557xfs_bmap_btalloc_at_eof(
3558	struct xfs_bmalloca	*ap,
3559	struct xfs_alloc_arg	*args,
3560	xfs_extlen_t		blen,
3561	int			stripe_align,
3562	bool			ag_only)
3563{
3564	struct xfs_mount	*mp = args->mp;
3565	struct xfs_perag	*caller_pag = args->pag;
 
 
 
 
 
 
 
 
 
 
3566	int			error;
 
3567
3568	/*
3569	 * If there are already extents in the file, try an exact EOF block
3570	 * allocation to extend the file as a contiguous extent. If that fails,
3571	 * or it's the first allocation in a file, just try for a stripe aligned
3572	 * allocation.
3573	 */
3574	if (ap->offset) {
3575		xfs_extlen_t	nextminlen = 0;
3576
3577		/*
3578		 * Compute the minlen+alignment for the next case.  Set slop so
3579		 * that the value of minlen+alignment+slop doesn't go up between
3580		 * the calls.
3581		 */
3582		args->alignment = 1;
3583		if (blen > stripe_align && blen <= args->maxlen)
3584			nextminlen = blen - stripe_align;
3585		else
3586			nextminlen = args->minlen;
3587		if (nextminlen + stripe_align > args->minlen + 1)
3588			args->minalignslop = nextminlen + stripe_align -
3589					args->minlen - 1;
3590		else
3591			args->minalignslop = 0;
3592
3593		if (!caller_pag)
3594			args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
3595		error = xfs_alloc_vextent_exact_bno(args, ap->blkno);
3596		if (!caller_pag) {
3597			xfs_perag_put(args->pag);
3598			args->pag = NULL;
3599		}
3600		if (error)
3601			return error;
3602
3603		if (args->fsbno != NULLFSBLOCK)
3604			return 0;
3605		/*
3606		 * Exact allocation failed. Reset to try an aligned allocation
3607		 * according to the original allocation specification.
3608		 */
3609		args->alignment = stripe_align;
3610		args->minlen = nextminlen;
3611		args->minalignslop = 0;
3612	} else {
3613		/*
3614		 * Adjust minlen to try and preserve alignment if we
3615		 * can't guarantee an aligned maxlen extent.
3616		 */
3617		args->alignment = stripe_align;
3618		if (blen > args->alignment &&
3619		    blen <= args->maxlen + args->alignment)
3620			args->minlen = blen - args->alignment;
3621		args->minalignslop = 0;
3622	}
3623
3624	if (ag_only) {
3625		error = xfs_alloc_vextent_near_bno(args, ap->blkno);
3626	} else {
3627		args->pag = NULL;
3628		error = xfs_alloc_vextent_start_ag(args, ap->blkno);
3629		ASSERT(args->pag == NULL);
3630		args->pag = caller_pag;
3631	}
3632	if (error)
3633		return error;
 
 
 
 
3634
3635	if (args->fsbno != NULLFSBLOCK)
3636		return 0;
3637
3638	/*
3639	 * Allocation failed, so turn return the allocation args to their
3640	 * original non-aligned state so the caller can proceed on allocation
3641	 * failure as if this function was never called.
3642	 */
3643	args->alignment = 1;
3644	return 0;
3645}
3646
3647/*
3648 * We have failed multiple allocation attempts so now are in a low space
3649 * allocation situation. Try a locality first full filesystem minimum length
3650 * allocation whilst still maintaining necessary total block reservation
3651 * requirements.
3652 *
3653 * If that fails, we are now critically low on space, so perform a last resort
3654 * allocation attempt: no reserve, no locality, blocking, minimum length, full
3655 * filesystem free space scan. We also indicate to future allocations in this
3656 * transaction that we are critically low on space so they don't waste time on
3657 * allocation modes that are unlikely to succeed.
3658 */
3659int
3660xfs_bmap_btalloc_low_space(
3661	struct xfs_bmalloca	*ap,
3662	struct xfs_alloc_arg	*args)
3663{
3664	int			error;
3665
3666	if (args->minlen > ap->minlen) {
3667		args->minlen = ap->minlen;
3668		error = xfs_alloc_vextent_start_ag(args, ap->blkno);
3669		if (error || args->fsbno != NULLFSBLOCK)
 
 
 
 
 
 
 
 
 
 
 
3670			return error;
 
 
 
 
 
 
 
 
 
 
3671	}
3672
3673	/* Last ditch attempt before failure is declared. */
3674	args->total = ap->minlen;
3675	error = xfs_alloc_vextent_first_ag(args, 0);
3676	if (error)
3677		return error;
3678	ap->tp->t_flags |= XFS_TRANS_LOWMODE;
3679	return 0;
3680}
3681
3682static int
3683xfs_bmap_btalloc_filestreams(
3684	struct xfs_bmalloca	*ap,
3685	struct xfs_alloc_arg	*args,
3686	int			stripe_align)
3687{
3688	xfs_extlen_t		blen = 0;
3689	int			error = 0;
3690
3691
3692	error = xfs_filestream_select_ag(ap, args, &blen);
3693	if (error)
3694		return error;
3695	ASSERT(args->pag);
3696
3697	/*
3698	 * If we are in low space mode, then optimal allocation will fail so
3699	 * prepare for minimal allocation and jump to the low space algorithm
3700	 * immediately.
3701	 */
3702	if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3703		args->minlen = ap->minlen;
3704		ASSERT(args->fsbno == NULLFSBLOCK);
3705		goto out_low_space;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3706	}
 
 
 
 
3707
3708	args->minlen = xfs_bmap_select_minlen(ap, args, blen);
3709	if (ap->aeof)
3710		error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
3711				true);
3712
3713	if (!error && args->fsbno == NULLFSBLOCK)
3714		error = xfs_alloc_vextent_near_bno(args, ap->blkno);
3715
3716out_low_space:
3717	/*
3718	 * We are now done with the perag reference for the filestreams
3719	 * association provided by xfs_filestream_select_ag(). Release it now as
3720	 * we've either succeeded, had a fatal error or we are out of space and
3721	 * need to do a full filesystem scan for free space which will take it's
3722	 * own references.
3723	 */
3724	xfs_perag_rele(args->pag);
3725	args->pag = NULL;
3726	if (error || args->fsbno != NULLFSBLOCK)
3727		return error;
3728
3729	return xfs_bmap_btalloc_low_space(ap, args);
3730}
3731
3732static int
3733xfs_bmap_btalloc_best_length(
3734	struct xfs_bmalloca	*ap,
3735	struct xfs_alloc_arg	*args,
3736	int			stripe_align)
3737{
3738	xfs_extlen_t		blen = 0;
3739	int			error;
3740
3741	ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino);
3742	xfs_bmap_adjacent(ap);
3743
3744	/*
3745	 * Search for an allocation group with a single extent large enough for
3746	 * the request.  If one isn't found, then adjust the minimum allocation
3747	 * size to the largest space found.
3748	 */
3749	error = xfs_bmap_btalloc_select_lengths(ap, args, &blen);
3750	if (error)
3751		return error;
3752
3753	/*
3754	 * Don't attempt optimal EOF allocation if previous allocations barely
3755	 * succeeded due to being near ENOSPC. It is highly unlikely we'll get
3756	 * optimal or even aligned allocations in this case, so don't waste time
3757	 * trying.
3758	 */
3759	if (ap->aeof && !(ap->tp->t_flags & XFS_TRANS_LOWMODE)) {
3760		error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
3761				false);
3762		if (error || args->fsbno != NULLFSBLOCK)
 
 
3763			return error;
3764	}
3765
3766	error = xfs_alloc_vextent_start_ag(args, ap->blkno);
3767	if (error || args->fsbno != NULLFSBLOCK)
3768		return error;
3769
3770	return xfs_bmap_btalloc_low_space(ap, args);
3771}
3772
3773static int
3774xfs_bmap_btalloc(
3775	struct xfs_bmalloca	*ap)
3776{
3777	struct xfs_mount	*mp = ap->ip->i_mount;
3778	struct xfs_alloc_arg	args = {
3779		.tp		= ap->tp,
3780		.mp		= mp,
3781		.fsbno		= NULLFSBLOCK,
3782		.oinfo		= XFS_RMAP_OINFO_SKIP_UPDATE,
3783		.minleft	= ap->minleft,
3784		.wasdel		= ap->wasdel,
3785		.resv		= XFS_AG_RESV_NONE,
3786		.datatype	= ap->datatype,
3787		.alignment	= 1,
3788		.minalignslop	= 0,
3789	};
3790	xfs_fileoff_t		orig_offset;
3791	xfs_extlen_t		orig_length;
3792	int			error;
3793	int			stripe_align;
3794
3795	ASSERT(ap->length);
3796	orig_offset = ap->offset;
3797	orig_length = ap->length;
3798
3799	stripe_align = xfs_bmap_compute_alignments(ap, &args);
3800
3801	/* Trim the allocation back to the maximum an AG can fit. */
3802	args.maxlen = min(ap->length, mp->m_ag_max_usable);
3803
3804	if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3805	    xfs_inode_is_filestream(ap->ip))
3806		error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align);
3807	else
3808		error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align);
3809	if (error)
3810		return error;
3811
3812	if (args.fsbno != NULLFSBLOCK) {
3813		xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
3814			orig_length);
3815	} else {
3816		ap->blkno = NULLFSBLOCK;
3817		ap->length = 0;
3818	}
3819	return 0;
3820}
3821
3822/* Trim extent to fit a logical block range. */
3823void
3824xfs_trim_extent(
3825	struct xfs_bmbt_irec	*irec,
3826	xfs_fileoff_t		bno,
3827	xfs_filblks_t		len)
3828{
3829	xfs_fileoff_t		distance;
3830	xfs_fileoff_t		end = bno + len;
3831
3832	if (irec->br_startoff + irec->br_blockcount <= bno ||
3833	    irec->br_startoff >= end) {
3834		irec->br_blockcount = 0;
3835		return;
3836	}
3837
3838	if (irec->br_startoff < bno) {
3839		distance = bno - irec->br_startoff;
3840		if (isnullstartblock(irec->br_startblock))
3841			irec->br_startblock = DELAYSTARTBLOCK;
3842		if (irec->br_startblock != DELAYSTARTBLOCK &&
3843		    irec->br_startblock != HOLESTARTBLOCK)
3844			irec->br_startblock += distance;
3845		irec->br_startoff += distance;
3846		irec->br_blockcount -= distance;
3847	}
3848
3849	if (end < irec->br_startoff + irec->br_blockcount) {
3850		distance = irec->br_startoff + irec->br_blockcount - end;
3851		irec->br_blockcount -= distance;
3852	}
3853}
3854
3855/*
3856 * Trim the returned map to the required bounds
3857 */
3858STATIC void
3859xfs_bmapi_trim_map(
3860	struct xfs_bmbt_irec	*mval,
3861	struct xfs_bmbt_irec	*got,
3862	xfs_fileoff_t		*bno,
3863	xfs_filblks_t		len,
3864	xfs_fileoff_t		obno,
3865	xfs_fileoff_t		end,
3866	int			n,
3867	uint32_t		flags)
3868{
3869	if ((flags & XFS_BMAPI_ENTIRE) ||
3870	    got->br_startoff + got->br_blockcount <= obno) {
3871		*mval = *got;
3872		if (isnullstartblock(got->br_startblock))
3873			mval->br_startblock = DELAYSTARTBLOCK;
3874		return;
3875	}
3876
3877	if (obno > *bno)
3878		*bno = obno;
3879	ASSERT((*bno >= obno) || (n == 0));
3880	ASSERT(*bno < end);
3881	mval->br_startoff = *bno;
3882	if (isnullstartblock(got->br_startblock))
3883		mval->br_startblock = DELAYSTARTBLOCK;
3884	else
3885		mval->br_startblock = got->br_startblock +
3886					(*bno - got->br_startoff);
3887	/*
3888	 * Return the minimum of what we got and what we asked for for
3889	 * the length.  We can use the len variable here because it is
3890	 * modified below and we could have been there before coming
3891	 * here if the first part of the allocation didn't overlap what
3892	 * was asked for.
3893	 */
3894	mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3895			got->br_blockcount - (*bno - got->br_startoff));
3896	mval->br_state = got->br_state;
3897	ASSERT(mval->br_blockcount <= len);
3898	return;
3899}
3900
3901/*
3902 * Update and validate the extent map to return
3903 */
3904STATIC void
3905xfs_bmapi_update_map(
3906	struct xfs_bmbt_irec	**map,
3907	xfs_fileoff_t		*bno,
3908	xfs_filblks_t		*len,
3909	xfs_fileoff_t		obno,
3910	xfs_fileoff_t		end,
3911	int			*n,
3912	uint32_t		flags)
3913{
3914	xfs_bmbt_irec_t	*mval = *map;
3915
3916	ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3917	       ((mval->br_startoff + mval->br_blockcount) <= end));
3918	ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3919	       (mval->br_startoff < obno));
3920
3921	*bno = mval->br_startoff + mval->br_blockcount;
3922	*len = end - *bno;
3923	if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3924		/* update previous map with new information */
3925		ASSERT(mval->br_startblock == mval[-1].br_startblock);
3926		ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3927		ASSERT(mval->br_state == mval[-1].br_state);
3928		mval[-1].br_blockcount = mval->br_blockcount;
3929		mval[-1].br_state = mval->br_state;
3930	} else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3931		   mval[-1].br_startblock != DELAYSTARTBLOCK &&
3932		   mval[-1].br_startblock != HOLESTARTBLOCK &&
3933		   mval->br_startblock == mval[-1].br_startblock +
3934					  mval[-1].br_blockcount &&
3935		   mval[-1].br_state == mval->br_state) {
3936		ASSERT(mval->br_startoff ==
3937		       mval[-1].br_startoff + mval[-1].br_blockcount);
3938		mval[-1].br_blockcount += mval->br_blockcount;
3939	} else if (*n > 0 &&
3940		   mval->br_startblock == DELAYSTARTBLOCK &&
3941		   mval[-1].br_startblock == DELAYSTARTBLOCK &&
3942		   mval->br_startoff ==
3943		   mval[-1].br_startoff + mval[-1].br_blockcount) {
3944		mval[-1].br_blockcount += mval->br_blockcount;
3945		mval[-1].br_state = mval->br_state;
3946	} else if (!((*n == 0) &&
3947		     ((mval->br_startoff + mval->br_blockcount) <=
3948		      obno))) {
3949		mval++;
3950		(*n)++;
3951	}
3952	*map = mval;
3953}
3954
3955/*
3956 * Map file blocks to filesystem blocks without allocation.
3957 */
3958int
3959xfs_bmapi_read(
3960	struct xfs_inode	*ip,
3961	xfs_fileoff_t		bno,
3962	xfs_filblks_t		len,
3963	struct xfs_bmbt_irec	*mval,
3964	int			*nmap,
3965	uint32_t		flags)
3966{
3967	struct xfs_mount	*mp = ip->i_mount;
3968	int			whichfork = xfs_bmapi_whichfork(flags);
3969	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
3970	struct xfs_bmbt_irec	got;
3971	xfs_fileoff_t		obno;
3972	xfs_fileoff_t		end;
3973	struct xfs_iext_cursor	icur;
3974	int			error;
3975	bool			eof = false;
3976	int			n = 0;
3977
3978	ASSERT(*nmap >= 1);
3979	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
3980	xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
3981
3982	if (WARN_ON_ONCE(!ifp)) {
3983		xfs_bmap_mark_sick(ip, whichfork);
3984		return -EFSCORRUPTED;
3985	}
3986
3987	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
3988	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
3989		xfs_bmap_mark_sick(ip, whichfork);
3990		return -EFSCORRUPTED;
3991	}
3992
3993	if (xfs_is_shutdown(mp))
3994		return -EIO;
3995
3996	XFS_STATS_INC(mp, xs_blk_mapr);
3997
3998	error = xfs_iread_extents(NULL, ip, whichfork);
3999	if (error)
4000		return error;
4001
4002	if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
4003		eof = true;
4004	end = bno + len;
4005	obno = bno;
4006
4007	while (bno < end && n < *nmap) {
4008		/* Reading past eof, act as though there's a hole up to end. */
4009		if (eof)
4010			got.br_startoff = end;
4011		if (got.br_startoff > bno) {
4012			/* Reading in a hole.  */
4013			mval->br_startoff = bno;
4014			mval->br_startblock = HOLESTARTBLOCK;
4015			mval->br_blockcount =
4016				XFS_FILBLKS_MIN(len, got.br_startoff - bno);
4017			mval->br_state = XFS_EXT_NORM;
4018			bno += mval->br_blockcount;
4019			len -= mval->br_blockcount;
4020			mval++;
4021			n++;
4022			continue;
4023		}
4024
4025		/* set up the extent map to return. */
4026		xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4027		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4028
4029		/* If we're done, stop now. */
4030		if (bno >= end || n >= *nmap)
4031			break;
4032
4033		/* Else go on to the next record. */
4034		if (!xfs_iext_next_extent(ifp, &icur, &got))
4035			eof = true;
4036	}
4037	*nmap = n;
4038	return 0;
4039}
4040
4041/*
4042 * Add a delayed allocation extent to an inode. Blocks are reserved from the
4043 * global pool and the extent inserted into the inode in-core extent tree.
4044 *
4045 * On entry, got refers to the first extent beyond the offset of the extent to
4046 * allocate or eof is specified if no such extent exists. On return, got refers
4047 * to the extent record that was inserted to the inode fork.
4048 *
4049 * Note that the allocated extent may have been merged with contiguous extents
4050 * during insertion into the inode fork. Thus, got does not reflect the current
4051 * state of the inode fork on return. If necessary, the caller can use lastx to
4052 * look up the updated record in the inode fork.
4053 */
4054int
4055xfs_bmapi_reserve_delalloc(
4056	struct xfs_inode	*ip,
4057	int			whichfork,
4058	xfs_fileoff_t		off,
4059	xfs_filblks_t		len,
4060	xfs_filblks_t		prealloc,
4061	struct xfs_bmbt_irec	*got,
4062	struct xfs_iext_cursor	*icur,
4063	int			eof)
4064{
4065	struct xfs_mount	*mp = ip->i_mount;
4066	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
4067	xfs_extlen_t		alen;
4068	xfs_extlen_t		indlen;
4069	int			error;
4070	xfs_fileoff_t		aoff = off;
4071
4072	/*
4073	 * Cap the alloc length. Keep track of prealloc so we know whether to
4074	 * tag the inode before we return.
4075	 */
4076	alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN);
4077	if (!eof)
4078		alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
4079	if (prealloc && alen >= len)
4080		prealloc = alen - len;
4081
4082	/* Figure out the extent size, adjust alen */
4083	if (whichfork == XFS_COW_FORK) {
4084		struct xfs_bmbt_irec	prev;
4085		xfs_extlen_t		extsz = xfs_get_cowextsz_hint(ip);
4086
4087		if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
4088			prev.br_startoff = NULLFILEOFF;
4089
4090		error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
4091					       1, 0, &aoff, &alen);
4092		ASSERT(!error);
4093	}
4094
4095	/*
4096	 * Make a transaction-less quota reservation for delayed allocation
4097	 * blocks.  This number gets adjusted later.  We return if we haven't
4098	 * allocated blocks already inside this loop.
4099	 */
4100	error = xfs_quota_reserve_blkres(ip, alen);
4101	if (error)
4102		return error;
4103
4104	/*
4105	 * Split changing sb for alen and indlen since they could be coming
4106	 * from different places.
4107	 */
4108	indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4109	ASSERT(indlen > 0);
4110
4111	error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4112	if (error)
4113		goto out_unreserve_quota;
4114
4115	error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4116	if (error)
4117		goto out_unreserve_blocks;
4118
4119
4120	ip->i_delayed_blks += alen;
4121	xfs_mod_delalloc(ip->i_mount, alen + indlen);
4122
4123	got->br_startoff = aoff;
4124	got->br_startblock = nullstartblock(indlen);
4125	got->br_blockcount = alen;
4126	got->br_state = XFS_EXT_NORM;
4127
4128	xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
4129
4130	/*
4131	 * Tag the inode if blocks were preallocated. Note that COW fork
4132	 * preallocation can occur at the start or end of the extent, even when
4133	 * prealloc == 0, so we must also check the aligned offset and length.
4134	 */
4135	if (whichfork == XFS_DATA_FORK && prealloc)
4136		xfs_inode_set_eofblocks_tag(ip);
4137	if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
4138		xfs_inode_set_cowblocks_tag(ip);
4139
4140	return 0;
4141
4142out_unreserve_blocks:
4143	xfs_mod_fdblocks(mp, alen, false);
4144out_unreserve_quota:
4145	if (XFS_IS_QUOTA_ON(mp))
4146		xfs_quota_unreserve_blkres(ip, alen);
4147	return error;
4148}
4149
4150static int
4151xfs_bmap_alloc_userdata(
4152	struct xfs_bmalloca	*bma)
4153{
4154	struct xfs_mount	*mp = bma->ip->i_mount;
4155	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4156	int			error;
4157
4158	/*
4159	 * Set the data type being allocated. For the data fork, the first data
4160	 * in the file is treated differently to all other allocations. For the
4161	 * attribute fork, we only need to ensure the allocated range is not on
4162	 * the busy list.
4163	 */
4164	bma->datatype = XFS_ALLOC_NOBUSY;
4165	if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
4166		bma->datatype |= XFS_ALLOC_USERDATA;
4167		if (bma->offset == 0)
4168			bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4169
4170		if (mp->m_dalign && bma->length >= mp->m_dalign) {
4171			error = xfs_bmap_isaeof(bma, whichfork);
4172			if (error)
4173				return error;
4174		}
4175
4176		if (XFS_IS_REALTIME_INODE(bma->ip))
4177			return xfs_bmap_rtalloc(bma);
4178	}
4179
4180	if (unlikely(XFS_TEST_ERROR(false, mp,
4181			XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
4182		return xfs_bmap_exact_minlen_extent_alloc(bma);
4183
4184	return xfs_bmap_btalloc(bma);
4185}
4186
4187static int
4188xfs_bmapi_allocate(
4189	struct xfs_bmalloca	*bma)
4190{
4191	struct xfs_mount	*mp = bma->ip->i_mount;
4192	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4193	struct xfs_ifork	*ifp = xfs_ifork_ptr(bma->ip, whichfork);
4194	int			tmp_logflags = 0;
4195	int			error;
4196
4197	ASSERT(bma->length > 0);
4198
4199	/*
4200	 * For the wasdelay case, we could also just allocate the stuff asked
4201	 * for in this bmap call but that wouldn't be as good.
4202	 */
4203	if (bma->wasdel) {
4204		bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4205		bma->offset = bma->got.br_startoff;
4206		if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
4207			bma->prev.br_startoff = NULLFILEOFF;
4208	} else {
4209		bma->length = XFS_FILBLKS_MIN(bma->length, XFS_MAX_BMBT_EXTLEN);
4210		if (!bma->eof)
4211			bma->length = XFS_FILBLKS_MIN(bma->length,
4212					bma->got.br_startoff - bma->offset);
4213	}
4214
4215	if (bma->flags & XFS_BMAPI_CONTIG)
4216		bma->minlen = bma->length;
4217	else
4218		bma->minlen = 1;
4219
4220	if (bma->flags & XFS_BMAPI_METADATA) {
4221		if (unlikely(XFS_TEST_ERROR(false, mp,
4222				XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
4223			error = xfs_bmap_exact_minlen_extent_alloc(bma);
4224		else
4225			error = xfs_bmap_btalloc(bma);
4226	} else {
4227		error = xfs_bmap_alloc_userdata(bma);
4228	}
4229	if (error || bma->blkno == NULLFSBLOCK)
4230		return error;
4231
4232	if (bma->flags & XFS_BMAPI_ZERO) {
4233		error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
4234		if (error)
4235			return error;
4236	}
4237
4238	if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur)
4239		bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4240	/*
4241	 * Bump the number of extents we've allocated
4242	 * in this call.
4243	 */
4244	bma->nallocs++;
4245
4246	if (bma->cur && bma->wasdel)
4247		bma->cur->bc_flags |= XFS_BTREE_BMBT_WASDEL;
 
4248
4249	bma->got.br_startoff = bma->offset;
4250	bma->got.br_startblock = bma->blkno;
4251	bma->got.br_blockcount = bma->length;
4252	bma->got.br_state = XFS_EXT_NORM;
4253
4254	if (bma->flags & XFS_BMAPI_PREALLOC)
4255		bma->got.br_state = XFS_EXT_UNWRITTEN;
4256
4257	if (bma->wasdel)
4258		error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4259	else
4260		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4261				whichfork, &bma->icur, &bma->cur, &bma->got,
4262				&bma->logflags, bma->flags);
4263
4264	bma->logflags |= tmp_logflags;
4265	if (error)
4266		return error;
4267
4268	/*
4269	 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4270	 * or xfs_bmap_add_extent_hole_real might have merged it into one of
4271	 * the neighbouring ones.
4272	 */
4273	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4274
4275	ASSERT(bma->got.br_startoff <= bma->offset);
4276	ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4277	       bma->offset + bma->length);
4278	ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4279	       bma->got.br_state == XFS_EXT_UNWRITTEN);
4280	return 0;
4281}
4282
4283STATIC int
4284xfs_bmapi_convert_unwritten(
4285	struct xfs_bmalloca	*bma,
4286	struct xfs_bmbt_irec	*mval,
4287	xfs_filblks_t		len,
4288	uint32_t		flags)
4289{
4290	int			whichfork = xfs_bmapi_whichfork(flags);
4291	struct xfs_ifork	*ifp = xfs_ifork_ptr(bma->ip, whichfork);
4292	int			tmp_logflags = 0;
4293	int			error;
4294
4295	/* check if we need to do unwritten->real conversion */
4296	if (mval->br_state == XFS_EXT_UNWRITTEN &&
4297	    (flags & XFS_BMAPI_PREALLOC))
4298		return 0;
4299
4300	/* check if we need to do real->unwritten conversion */
4301	if (mval->br_state == XFS_EXT_NORM &&
4302	    (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4303			(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4304		return 0;
4305
4306	/*
4307	 * Modify (by adding) the state flag, if writing.
4308	 */
4309	ASSERT(mval->br_blockcount <= len);
4310	if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur) {
4311		bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4312					bma->ip, whichfork);
4313	}
4314	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4315				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4316
4317	/*
4318	 * Before insertion into the bmbt, zero the range being converted
4319	 * if required.
4320	 */
4321	if (flags & XFS_BMAPI_ZERO) {
4322		error = xfs_zero_extent(bma->ip, mval->br_startblock,
4323					mval->br_blockcount);
4324		if (error)
4325			return error;
4326	}
4327
4328	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4329			&bma->icur, &bma->cur, mval, &tmp_logflags);
4330	/*
4331	 * Log the inode core unconditionally in the unwritten extent conversion
4332	 * path because the conversion might not have done so (e.g., if the
4333	 * extent count hasn't changed). We need to make sure the inode is dirty
4334	 * in the transaction for the sake of fsync(), even if nothing has
4335	 * changed, because fsync() will not force the log for this transaction
4336	 * unless it sees the inode pinned.
4337	 *
4338	 * Note: If we're only converting cow fork extents, there aren't
4339	 * any on-disk updates to make, so we don't need to log anything.
4340	 */
4341	if (whichfork != XFS_COW_FORK)
4342		bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4343	if (error)
4344		return error;
4345
4346	/*
4347	 * Update our extent pointer, given that
4348	 * xfs_bmap_add_extent_unwritten_real might have merged it into one
4349	 * of the neighbouring ones.
4350	 */
4351	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4352
4353	/*
4354	 * We may have combined previously unwritten space with written space,
4355	 * so generate another request.
4356	 */
4357	if (mval->br_blockcount < len)
4358		return -EAGAIN;
4359	return 0;
4360}
4361
4362xfs_extlen_t
4363xfs_bmapi_minleft(
4364	struct xfs_trans	*tp,
4365	struct xfs_inode	*ip,
4366	int			fork)
4367{
4368	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, fork);
4369
4370	if (tp && tp->t_highest_agno != NULLAGNUMBER)
4371		return 0;
4372	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
4373		return 1;
4374	return be16_to_cpu(ifp->if_broot->bb_level) + 1;
4375}
4376
4377/*
4378 * Log whatever the flags say, even if error.  Otherwise we might miss detecting
4379 * a case where the data is changed, there's an error, and it's not logged so we
4380 * don't shutdown when we should.  Don't bother logging extents/btree changes if
4381 * we converted to the other format.
4382 */
4383static void
4384xfs_bmapi_finish(
4385	struct xfs_bmalloca	*bma,
4386	int			whichfork,
4387	int			error)
4388{
4389	struct xfs_ifork	*ifp = xfs_ifork_ptr(bma->ip, whichfork);
4390
4391	if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
4392	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
4393		bma->logflags &= ~xfs_ilog_fext(whichfork);
4394	else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
4395		 ifp->if_format != XFS_DINODE_FMT_BTREE)
4396		bma->logflags &= ~xfs_ilog_fbroot(whichfork);
4397
4398	if (bma->logflags)
4399		xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
4400	if (bma->cur)
4401		xfs_btree_del_cursor(bma->cur, error);
4402}
4403
4404/*
4405 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4406 * extent state if necessary.  Details behaviour is controlled by the flags
4407 * parameter.  Only allocates blocks from a single allocation group, to avoid
4408 * locking problems.
4409 */
4410int
4411xfs_bmapi_write(
4412	struct xfs_trans	*tp,		/* transaction pointer */
4413	struct xfs_inode	*ip,		/* incore inode */
4414	xfs_fileoff_t		bno,		/* starting file offs. mapped */
4415	xfs_filblks_t		len,		/* length to map in file */
4416	uint32_t		flags,		/* XFS_BMAPI_... */
4417	xfs_extlen_t		total,		/* total blocks needed */
4418	struct xfs_bmbt_irec	*mval,		/* output: map values */
4419	int			*nmap)		/* i/o: mval size/count */
4420{
4421	struct xfs_bmalloca	bma = {
4422		.tp		= tp,
4423		.ip		= ip,
4424		.total		= total,
4425	};
4426	struct xfs_mount	*mp = ip->i_mount;
4427	int			whichfork = xfs_bmapi_whichfork(flags);
4428	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
4429	xfs_fileoff_t		end;		/* end of mapped file region */
4430	bool			eof = false;	/* after the end of extents */
4431	int			error;		/* error return */
4432	int			n;		/* current extent index */
4433	xfs_fileoff_t		obno;		/* old block number (offset) */
4434
4435#ifdef DEBUG
4436	xfs_fileoff_t		orig_bno;	/* original block number value */
4437	int			orig_flags;	/* original flags arg value */
4438	xfs_filblks_t		orig_len;	/* original value of len arg */
4439	struct xfs_bmbt_irec	*orig_mval;	/* original value of mval */
4440	int			orig_nmap;	/* original value of *nmap */
4441
4442	orig_bno = bno;
4443	orig_len = len;
4444	orig_flags = flags;
4445	orig_mval = mval;
4446	orig_nmap = *nmap;
4447#endif
4448
4449	ASSERT(*nmap >= 1);
4450	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4451	ASSERT(tp != NULL);
4452	ASSERT(len > 0);
4453	ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
4454	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
4455	ASSERT(!(flags & XFS_BMAPI_REMAP));
4456
4457	/* zeroing is for currently only for data extents, not metadata */
4458	ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4459			(XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4460	/*
4461	 * we can allocate unwritten extents or pre-zero allocated blocks,
4462	 * but it makes no sense to do both at once. This would result in
4463	 * zeroing the unwritten extent twice, but it still being an
4464	 * unwritten extent....
4465	 */
4466	ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4467			(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4468
4469	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4470	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4471		xfs_bmap_mark_sick(ip, whichfork);
4472		return -EFSCORRUPTED;
4473	}
4474
4475	if (xfs_is_shutdown(mp))
4476		return -EIO;
4477
4478	XFS_STATS_INC(mp, xs_blk_mapw);
4479
4480	error = xfs_iread_extents(tp, ip, whichfork);
4481	if (error)
4482		goto error0;
4483
4484	if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4485		eof = true;
4486	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4487		bma.prev.br_startoff = NULLFILEOFF;
4488	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4489
4490	n = 0;
4491	end = bno + len;
4492	obno = bno;
4493	while (bno < end && n < *nmap) {
4494		bool			need_alloc = false, wasdelay = false;
4495
4496		/* in hole or beyond EOF? */
4497		if (eof || bma.got.br_startoff > bno) {
4498			/*
4499			 * CoW fork conversions should /never/ hit EOF or
4500			 * holes.  There should always be something for us
4501			 * to work on.
4502			 */
4503			ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
4504			         (flags & XFS_BMAPI_COWFORK)));
4505
4506			need_alloc = true;
4507		} else if (isnullstartblock(bma.got.br_startblock)) {
4508			wasdelay = true;
4509		}
4510
4511		/*
4512		 * First, deal with the hole before the allocated space
4513		 * that we found, if any.
4514		 */
4515		if (need_alloc || wasdelay) {
4516			bma.eof = eof;
4517			bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4518			bma.wasdel = wasdelay;
4519			bma.offset = bno;
4520			bma.flags = flags;
4521
4522			/*
4523			 * There's a 32/64 bit type mismatch between the
4524			 * allocation length request (which can be 64 bits in
4525			 * length) and the bma length request, which is
4526			 * xfs_extlen_t and therefore 32 bits. Hence we have to
4527			 * check for 32-bit overflows and handle them here.
4528			 */
4529			if (len > (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN)
4530				bma.length = XFS_MAX_BMBT_EXTLEN;
4531			else
4532				bma.length = len;
4533
4534			ASSERT(len > 0);
4535			ASSERT(bma.length > 0);
4536			error = xfs_bmapi_allocate(&bma);
4537			if (error)
4538				goto error0;
4539			if (bma.blkno == NULLFSBLOCK)
4540				break;
4541
4542			/*
4543			 * If this is a CoW allocation, record the data in
4544			 * the refcount btree for orphan recovery.
4545			 */
4546			if (whichfork == XFS_COW_FORK)
4547				xfs_refcount_alloc_cow_extent(tp, bma.blkno,
4548						bma.length);
4549		}
4550
4551		/* Deal with the allocated space we found.  */
4552		xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4553							end, n, flags);
4554
4555		/* Execute unwritten extent conversion if necessary */
4556		error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4557		if (error == -EAGAIN)
4558			continue;
4559		if (error)
4560			goto error0;
4561
4562		/* update the extent map to return */
4563		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4564
4565		/*
4566		 * If we're done, stop now.  Stop when we've allocated
4567		 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4568		 * the transaction may get too big.
4569		 */
4570		if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4571			break;
4572
4573		/* Else go on to the next record. */
4574		bma.prev = bma.got;
4575		if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4576			eof = true;
4577	}
4578	*nmap = n;
4579
4580	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4581			whichfork);
4582	if (error)
4583		goto error0;
4584
4585	ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
4586	       ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
4587	xfs_bmapi_finish(&bma, whichfork, 0);
4588	xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4589		orig_nmap, *nmap);
4590	return 0;
4591error0:
4592	xfs_bmapi_finish(&bma, whichfork, error);
4593	return error;
4594}
4595
4596/*
4597 * Convert an existing delalloc extent to real blocks based on file offset. This
4598 * attempts to allocate the entire delalloc extent and may require multiple
4599 * invocations to allocate the target offset if a large enough physical extent
4600 * is not available.
4601 */
4602int
4603xfs_bmapi_convert_delalloc(
4604	struct xfs_inode	*ip,
4605	int			whichfork,
4606	xfs_off_t		offset,
4607	struct iomap		*iomap,
4608	unsigned int		*seq)
4609{
4610	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
4611	struct xfs_mount	*mp = ip->i_mount;
4612	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
4613	struct xfs_bmalloca	bma = { NULL };
4614	uint16_t		flags = 0;
4615	struct xfs_trans	*tp;
4616	int			error;
4617
4618	if (whichfork == XFS_COW_FORK)
4619		flags |= IOMAP_F_SHARED;
4620
4621	/*
4622	 * Space for the extent and indirect blocks was reserved when the
4623	 * delalloc extent was created so there's no need to do so here.
4624	 */
4625	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
4626				XFS_TRANS_RESERVE, &tp);
4627	if (error)
4628		return error;
4629
4630	xfs_ilock(ip, XFS_ILOCK_EXCL);
4631	xfs_trans_ijoin(tp, ip, 0);
4632
4633	error = xfs_iext_count_may_overflow(ip, whichfork,
4634			XFS_IEXT_ADD_NOSPLIT_CNT);
4635	if (error == -EFBIG)
4636		error = xfs_iext_count_upgrade(tp, ip,
4637				XFS_IEXT_ADD_NOSPLIT_CNT);
4638	if (error)
4639		goto out_trans_cancel;
4640
 
 
4641	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
4642	    bma.got.br_startoff > offset_fsb) {
4643		/*
4644		 * No extent found in the range we are trying to convert.  This
4645		 * should only happen for the COW fork, where another thread
4646		 * might have moved the extent to the data fork in the meantime.
4647		 */
4648		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
4649		error = -EAGAIN;
4650		goto out_trans_cancel;
4651	}
4652
4653	/*
4654	 * If we find a real extent here we raced with another thread converting
4655	 * the extent.  Just return the real extent at this offset.
4656	 */
4657	if (!isnullstartblock(bma.got.br_startblock)) {
4658		xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
4659				xfs_iomap_inode_sequence(ip, flags));
4660		*seq = READ_ONCE(ifp->if_seq);
4661		goto out_trans_cancel;
4662	}
4663
4664	bma.tp = tp;
4665	bma.ip = ip;
4666	bma.wasdel = true;
4667	bma.offset = bma.got.br_startoff;
4668	bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount,
4669			XFS_MAX_BMBT_EXTLEN);
4670	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4671
4672	/*
4673	 * When we're converting the delalloc reservations backing dirty pages
4674	 * in the page cache, we must be careful about how we create the new
4675	 * extents:
4676	 *
4677	 * New CoW fork extents are created unwritten, turned into real extents
4678	 * when we're about to write the data to disk, and mapped into the data
4679	 * fork after the write finishes.  End of story.
4680	 *
4681	 * New data fork extents must be mapped in as unwritten and converted
4682	 * to real extents after the write succeeds to avoid exposing stale
4683	 * disk contents if we crash.
4684	 */
4685	bma.flags = XFS_BMAPI_PREALLOC;
4686	if (whichfork == XFS_COW_FORK)
4687		bma.flags |= XFS_BMAPI_COWFORK;
4688
4689	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4690		bma.prev.br_startoff = NULLFILEOFF;
4691
4692	error = xfs_bmapi_allocate(&bma);
4693	if (error)
4694		goto out_finish;
4695
4696	error = -ENOSPC;
4697	if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
4698		goto out_finish;
4699	if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock))) {
4700		xfs_bmap_mark_sick(ip, whichfork);
4701		error = -EFSCORRUPTED;
4702		goto out_finish;
4703	}
4704
4705	XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
4706	XFS_STATS_INC(mp, xs_xstrat_quick);
4707
4708	ASSERT(!isnullstartblock(bma.got.br_startblock));
4709	xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
4710				xfs_iomap_inode_sequence(ip, flags));
4711	*seq = READ_ONCE(ifp->if_seq);
4712
4713	if (whichfork == XFS_COW_FORK)
4714		xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
4715
4716	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4717			whichfork);
4718	if (error)
4719		goto out_finish;
4720
4721	xfs_bmapi_finish(&bma, whichfork, 0);
4722	error = xfs_trans_commit(tp);
4723	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4724	return error;
4725
4726out_finish:
4727	xfs_bmapi_finish(&bma, whichfork, error);
4728out_trans_cancel:
4729	xfs_trans_cancel(tp);
4730	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4731	return error;
4732}
4733
4734int
4735xfs_bmapi_remap(
4736	struct xfs_trans	*tp,
4737	struct xfs_inode	*ip,
4738	xfs_fileoff_t		bno,
4739	xfs_filblks_t		len,
4740	xfs_fsblock_t		startblock,
4741	uint32_t		flags)
4742{
4743	struct xfs_mount	*mp = ip->i_mount;
4744	struct xfs_ifork	*ifp;
4745	struct xfs_btree_cur	*cur = NULL;
4746	struct xfs_bmbt_irec	got;
4747	struct xfs_iext_cursor	icur;
4748	int			whichfork = xfs_bmapi_whichfork(flags);
4749	int			logflags = 0, error;
4750
4751	ifp = xfs_ifork_ptr(ip, whichfork);
4752	ASSERT(len > 0);
4753	ASSERT(len <= (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN);
4754	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
4755	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
4756			   XFS_BMAPI_NORMAP)));
4757	ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
4758			(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
4759
4760	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4761	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4762		xfs_bmap_mark_sick(ip, whichfork);
4763		return -EFSCORRUPTED;
4764	}
4765
4766	if (xfs_is_shutdown(mp))
4767		return -EIO;
4768
4769	error = xfs_iread_extents(tp, ip, whichfork);
4770	if (error)
4771		return error;
4772
4773	if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4774		/* make sure we only reflink into a hole. */
4775		ASSERT(got.br_startoff > bno);
4776		ASSERT(got.br_startoff - bno >= len);
4777	}
4778
4779	ip->i_nblocks += len;
4780	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4781
4782	if (ifp->if_format == XFS_DINODE_FMT_BTREE)
4783		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 
 
4784
4785	got.br_startoff = bno;
4786	got.br_startblock = startblock;
4787	got.br_blockcount = len;
4788	if (flags & XFS_BMAPI_PREALLOC)
4789		got.br_state = XFS_EXT_UNWRITTEN;
4790	else
4791		got.br_state = XFS_EXT_NORM;
4792
4793	error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
4794			&cur, &got, &logflags, flags);
4795	if (error)
4796		goto error0;
4797
4798	error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
4799
4800error0:
4801	if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
4802		logflags &= ~XFS_ILOG_DEXT;
4803	else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
4804		logflags &= ~XFS_ILOG_DBROOT;
4805
4806	if (logflags)
4807		xfs_trans_log_inode(tp, ip, logflags);
4808	if (cur)
4809		xfs_btree_del_cursor(cur, error);
4810	return error;
4811}
4812
4813/*
4814 * When a delalloc extent is split (e.g., due to a hole punch), the original
4815 * indlen reservation must be shared across the two new extents that are left
4816 * behind.
4817 *
4818 * Given the original reservation and the worst case indlen for the two new
4819 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4820 * reservation fairly across the two new extents. If necessary, steal available
4821 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4822 * ores == 1). The number of stolen blocks is returned. The availability and
4823 * subsequent accounting of stolen blocks is the responsibility of the caller.
4824 */
4825static xfs_filblks_t
4826xfs_bmap_split_indlen(
4827	xfs_filblks_t			ores,		/* original res. */
4828	xfs_filblks_t			*indlen1,	/* ext1 worst indlen */
4829	xfs_filblks_t			*indlen2,	/* ext2 worst indlen */
4830	xfs_filblks_t			avail)		/* stealable blocks */
4831{
4832	xfs_filblks_t			len1 = *indlen1;
4833	xfs_filblks_t			len2 = *indlen2;
4834	xfs_filblks_t			nres = len1 + len2; /* new total res. */
4835	xfs_filblks_t			stolen = 0;
4836	xfs_filblks_t			resfactor;
4837
4838	/*
4839	 * Steal as many blocks as we can to try and satisfy the worst case
4840	 * indlen for both new extents.
4841	 */
4842	if (ores < nres && avail)
4843		stolen = XFS_FILBLKS_MIN(nres - ores, avail);
4844	ores += stolen;
4845
4846	 /* nothing else to do if we've satisfied the new reservation */
4847	if (ores >= nres)
4848		return stolen;
4849
4850	/*
4851	 * We can't meet the total required reservation for the two extents.
4852	 * Calculate the percent of the overall shortage between both extents
4853	 * and apply this percentage to each of the requested indlen values.
4854	 * This distributes the shortage fairly and reduces the chances that one
4855	 * of the two extents is left with nothing when extents are repeatedly
4856	 * split.
4857	 */
4858	resfactor = (ores * 100);
4859	do_div(resfactor, nres);
4860	len1 *= resfactor;
4861	do_div(len1, 100);
4862	len2 *= resfactor;
4863	do_div(len2, 100);
4864	ASSERT(len1 + len2 <= ores);
4865	ASSERT(len1 < *indlen1 && len2 < *indlen2);
4866
4867	/*
4868	 * Hand out the remainder to each extent. If one of the two reservations
4869	 * is zero, we want to make sure that one gets a block first. The loop
4870	 * below starts with len1, so hand len2 a block right off the bat if it
4871	 * is zero.
4872	 */
4873	ores -= (len1 + len2);
4874	ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4875	if (ores && !len2 && *indlen2) {
4876		len2++;
4877		ores--;
4878	}
4879	while (ores) {
4880		if (len1 < *indlen1) {
4881			len1++;
4882			ores--;
4883		}
4884		if (!ores)
4885			break;
4886		if (len2 < *indlen2) {
4887			len2++;
4888			ores--;
4889		}
4890	}
4891
4892	*indlen1 = len1;
4893	*indlen2 = len2;
4894
4895	return stolen;
4896}
4897
4898int
4899xfs_bmap_del_extent_delay(
4900	struct xfs_inode	*ip,
4901	int			whichfork,
4902	struct xfs_iext_cursor	*icur,
4903	struct xfs_bmbt_irec	*got,
4904	struct xfs_bmbt_irec	*del)
4905{
4906	struct xfs_mount	*mp = ip->i_mount;
4907	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
4908	struct xfs_bmbt_irec	new;
4909	int64_t			da_old, da_new, da_diff = 0;
4910	xfs_fileoff_t		del_endoff, got_endoff;
4911	xfs_filblks_t		got_indlen, new_indlen, stolen;
4912	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
4913	int			error = 0;
4914	bool			isrt;
4915
4916	XFS_STATS_INC(mp, xs_del_exlist);
4917
4918	isrt = xfs_ifork_is_realtime(ip, whichfork);
4919	del_endoff = del->br_startoff + del->br_blockcount;
4920	got_endoff = got->br_startoff + got->br_blockcount;
4921	da_old = startblockval(got->br_startblock);
4922	da_new = 0;
4923
4924	ASSERT(del->br_blockcount > 0);
4925	ASSERT(got->br_startoff <= del->br_startoff);
4926	ASSERT(got_endoff >= del_endoff);
4927
4928	if (isrt)
4929		xfs_mod_frextents(mp, xfs_rtb_to_rtx(mp, del->br_blockcount));
 
 
 
 
4930
4931	/*
4932	 * Update the inode delalloc counter now and wait to update the
4933	 * sb counters as we might have to borrow some blocks for the
4934	 * indirect block accounting.
4935	 */
4936	ASSERT(!isrt);
4937	error = xfs_quota_unreserve_blkres(ip, del->br_blockcount);
4938	if (error)
4939		return error;
4940	ip->i_delayed_blks -= del->br_blockcount;
4941
4942	if (got->br_startoff == del->br_startoff)
4943		state |= BMAP_LEFT_FILLING;
4944	if (got_endoff == del_endoff)
4945		state |= BMAP_RIGHT_FILLING;
4946
4947	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4948	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4949		/*
4950		 * Matches the whole extent.  Delete the entry.
4951		 */
4952		xfs_iext_remove(ip, icur, state);
4953		xfs_iext_prev(ifp, icur);
4954		break;
4955	case BMAP_LEFT_FILLING:
4956		/*
4957		 * Deleting the first part of the extent.
4958		 */
4959		got->br_startoff = del_endoff;
4960		got->br_blockcount -= del->br_blockcount;
4961		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4962				got->br_blockcount), da_old);
4963		got->br_startblock = nullstartblock((int)da_new);
4964		xfs_iext_update_extent(ip, state, icur, got);
4965		break;
4966	case BMAP_RIGHT_FILLING:
4967		/*
4968		 * Deleting the last part of the extent.
4969		 */
4970		got->br_blockcount = got->br_blockcount - del->br_blockcount;
4971		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4972				got->br_blockcount), da_old);
4973		got->br_startblock = nullstartblock((int)da_new);
4974		xfs_iext_update_extent(ip, state, icur, got);
4975		break;
4976	case 0:
4977		/*
4978		 * Deleting the middle of the extent.
4979		 *
4980		 * Distribute the original indlen reservation across the two new
4981		 * extents.  Steal blocks from the deleted extent if necessary.
4982		 * Stealing blocks simply fudges the fdblocks accounting below.
4983		 * Warn if either of the new indlen reservations is zero as this
4984		 * can lead to delalloc problems.
4985		 */
4986		got->br_blockcount = del->br_startoff - got->br_startoff;
4987		got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4988
4989		new.br_blockcount = got_endoff - del_endoff;
4990		new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4991
4992		WARN_ON_ONCE(!got_indlen || !new_indlen);
4993		stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4994						       del->br_blockcount);
4995
4996		got->br_startblock = nullstartblock((int)got_indlen);
4997
4998		new.br_startoff = del_endoff;
4999		new.br_state = got->br_state;
5000		new.br_startblock = nullstartblock((int)new_indlen);
5001
5002		xfs_iext_update_extent(ip, state, icur, got);
5003		xfs_iext_next(ifp, icur);
5004		xfs_iext_insert(ip, icur, &new, state);
5005
5006		da_new = got_indlen + new_indlen - stolen;
5007		del->br_blockcount -= stolen;
5008		break;
5009	}
5010
5011	ASSERT(da_old >= da_new);
5012	da_diff = da_old - da_new;
5013	if (!isrt)
5014		da_diff += del->br_blockcount;
5015	if (da_diff) {
5016		xfs_mod_fdblocks(mp, da_diff, false);
5017		xfs_mod_delalloc(mp, -da_diff);
5018	}
5019	return error;
5020}
5021
5022void
5023xfs_bmap_del_extent_cow(
5024	struct xfs_inode	*ip,
5025	struct xfs_iext_cursor	*icur,
5026	struct xfs_bmbt_irec	*got,
5027	struct xfs_bmbt_irec	*del)
5028{
5029	struct xfs_mount	*mp = ip->i_mount;
5030	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
5031	struct xfs_bmbt_irec	new;
5032	xfs_fileoff_t		del_endoff, got_endoff;
5033	uint32_t		state = BMAP_COWFORK;
5034
5035	XFS_STATS_INC(mp, xs_del_exlist);
5036
5037	del_endoff = del->br_startoff + del->br_blockcount;
5038	got_endoff = got->br_startoff + got->br_blockcount;
5039
5040	ASSERT(del->br_blockcount > 0);
5041	ASSERT(got->br_startoff <= del->br_startoff);
5042	ASSERT(got_endoff >= del_endoff);
5043	ASSERT(!isnullstartblock(got->br_startblock));
5044
5045	if (got->br_startoff == del->br_startoff)
5046		state |= BMAP_LEFT_FILLING;
5047	if (got_endoff == del_endoff)
5048		state |= BMAP_RIGHT_FILLING;
5049
5050	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5051	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5052		/*
5053		 * Matches the whole extent.  Delete the entry.
5054		 */
5055		xfs_iext_remove(ip, icur, state);
5056		xfs_iext_prev(ifp, icur);
5057		break;
5058	case BMAP_LEFT_FILLING:
5059		/*
5060		 * Deleting the first part of the extent.
5061		 */
5062		got->br_startoff = del_endoff;
5063		got->br_blockcount -= del->br_blockcount;
5064		got->br_startblock = del->br_startblock + del->br_blockcount;
5065		xfs_iext_update_extent(ip, state, icur, got);
5066		break;
5067	case BMAP_RIGHT_FILLING:
5068		/*
5069		 * Deleting the last part of the extent.
5070		 */
5071		got->br_blockcount -= del->br_blockcount;
5072		xfs_iext_update_extent(ip, state, icur, got);
5073		break;
5074	case 0:
5075		/*
5076		 * Deleting the middle of the extent.
5077		 */
5078		got->br_blockcount = del->br_startoff - got->br_startoff;
5079
5080		new.br_startoff = del_endoff;
5081		new.br_blockcount = got_endoff - del_endoff;
5082		new.br_state = got->br_state;
5083		new.br_startblock = del->br_startblock + del->br_blockcount;
5084
5085		xfs_iext_update_extent(ip, state, icur, got);
5086		xfs_iext_next(ifp, icur);
5087		xfs_iext_insert(ip, icur, &new, state);
5088		break;
5089	}
5090	ip->i_delayed_blks -= del->br_blockcount;
5091}
5092
5093/*
5094 * Called by xfs_bmapi to update file extent records and the btree
5095 * after removing space.
5096 */
5097STATIC int				/* error */
5098xfs_bmap_del_extent_real(
5099	xfs_inode_t		*ip,	/* incore inode pointer */
5100	xfs_trans_t		*tp,	/* current transaction pointer */
5101	struct xfs_iext_cursor	*icur,
5102	struct xfs_btree_cur	*cur,	/* if null, not a btree */
5103	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
5104	int			*logflagsp, /* inode logging flags */
5105	int			whichfork, /* data or attr fork */
5106	uint32_t		bflags)	/* bmapi flags */
5107{
5108	xfs_fsblock_t		del_endblock=0;	/* first block past del */
5109	xfs_fileoff_t		del_endoff;	/* first offset past del */
5110	int			do_fx;	/* free extent at end of routine */
5111	int			error;	/* error return value */
 
5112	struct xfs_bmbt_irec	got;	/* current extent entry */
5113	xfs_fileoff_t		got_endoff;	/* first offset past got */
5114	int			i;	/* temp state */
5115	struct xfs_ifork	*ifp;	/* inode fork pointer */
5116	xfs_mount_t		*mp;	/* mount structure */
5117	xfs_filblks_t		nblks;	/* quota/sb block count */
5118	xfs_bmbt_irec_t		new;	/* new record to be inserted */
5119	/* REFERENCED */
5120	uint			qfield;	/* quota field to update */
5121	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
5122	struct xfs_bmbt_irec	old;
5123
5124	*logflagsp = 0;
5125
5126	mp = ip->i_mount;
5127	XFS_STATS_INC(mp, xs_del_exlist);
5128
5129	ifp = xfs_ifork_ptr(ip, whichfork);
5130	ASSERT(del->br_blockcount > 0);
5131	xfs_iext_get_extent(ifp, icur, &got);
5132	ASSERT(got.br_startoff <= del->br_startoff);
5133	del_endoff = del->br_startoff + del->br_blockcount;
5134	got_endoff = got.br_startoff + got.br_blockcount;
5135	ASSERT(got_endoff >= del_endoff);
5136	ASSERT(!isnullstartblock(got.br_startblock));
5137	qfield = 0;
 
5138
5139	/*
5140	 * If it's the case where the directory code is running with no block
5141	 * reservation, and the deleted block is in the middle of its extent,
5142	 * and the resulting insert of an extent would cause transformation to
5143	 * btree format, then reject it.  The calling code will then swap blocks
5144	 * around instead.  We have to do this now, rather than waiting for the
5145	 * conversion to btree format, since the transaction will be dirty then.
5146	 */
5147	if (tp->t_blk_res == 0 &&
5148	    ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
5149	    ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
5150	    del->br_startoff > got.br_startoff && del_endoff < got_endoff)
5151		return -ENOSPC;
5152
5153	*logflagsp = XFS_ILOG_CORE;
5154	if (xfs_ifork_is_realtime(ip, whichfork)) {
 
 
 
 
 
 
 
5155		if (!(bflags & XFS_BMAPI_REMAP)) {
5156			error = xfs_rtfree_blocks(tp, del->br_startblock,
5157					del->br_blockcount);
 
 
 
 
 
5158			if (error)
5159				return error;
5160		}
5161
5162		do_fx = 0;
 
5163		qfield = XFS_TRANS_DQ_RTBCOUNT;
5164	} else {
5165		do_fx = 1;
 
5166		qfield = XFS_TRANS_DQ_BCOUNT;
5167	}
5168	nblks = del->br_blockcount;
5169
5170	del_endblock = del->br_startblock + del->br_blockcount;
5171	if (cur) {
5172		error = xfs_bmbt_lookup_eq(cur, &got, &i);
5173		if (error)
5174			return error;
5175		if (XFS_IS_CORRUPT(mp, i != 1)) {
5176			xfs_btree_mark_sick(cur);
5177			return -EFSCORRUPTED;
5178		}
5179	}
5180
5181	if (got.br_startoff == del->br_startoff)
5182		state |= BMAP_LEFT_FILLING;
5183	if (got_endoff == del_endoff)
5184		state |= BMAP_RIGHT_FILLING;
5185
5186	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5187	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5188		/*
5189		 * Matches the whole extent.  Delete the entry.
5190		 */
5191		xfs_iext_remove(ip, icur, state);
5192		xfs_iext_prev(ifp, icur);
5193		ifp->if_nextents--;
5194
5195		*logflagsp |= XFS_ILOG_CORE;
5196		if (!cur) {
5197			*logflagsp |= xfs_ilog_fext(whichfork);
5198			break;
5199		}
5200		if ((error = xfs_btree_delete(cur, &i)))
5201			return error;
5202		if (XFS_IS_CORRUPT(mp, i != 1)) {
5203			xfs_btree_mark_sick(cur);
5204			return -EFSCORRUPTED;
5205		}
5206		break;
5207	case BMAP_LEFT_FILLING:
5208		/*
5209		 * Deleting the first part of the extent.
5210		 */
5211		got.br_startoff = del_endoff;
5212		got.br_startblock = del_endblock;
5213		got.br_blockcount -= del->br_blockcount;
5214		xfs_iext_update_extent(ip, state, icur, &got);
5215		if (!cur) {
5216			*logflagsp |= xfs_ilog_fext(whichfork);
5217			break;
5218		}
5219		error = xfs_bmbt_update(cur, &got);
5220		if (error)
5221			return error;
5222		break;
5223	case BMAP_RIGHT_FILLING:
5224		/*
5225		 * Deleting the last part of the extent.
5226		 */
5227		got.br_blockcount -= del->br_blockcount;
5228		xfs_iext_update_extent(ip, state, icur, &got);
5229		if (!cur) {
5230			*logflagsp |= xfs_ilog_fext(whichfork);
5231			break;
5232		}
5233		error = xfs_bmbt_update(cur, &got);
5234		if (error)
5235			return error;
5236		break;
5237	case 0:
5238		/*
5239		 * Deleting the middle of the extent.
5240		 */
5241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5242		old = got;
5243
5244		got.br_blockcount = del->br_startoff - got.br_startoff;
5245		xfs_iext_update_extent(ip, state, icur, &got);
5246
5247		new.br_startoff = del_endoff;
5248		new.br_blockcount = got_endoff - del_endoff;
5249		new.br_state = got.br_state;
5250		new.br_startblock = del_endblock;
5251
5252		*logflagsp |= XFS_ILOG_CORE;
5253		if (cur) {
5254			error = xfs_bmbt_update(cur, &got);
5255			if (error)
5256				return error;
5257			error = xfs_btree_increment(cur, 0, &i);
5258			if (error)
5259				return error;
5260			cur->bc_rec.b = new;
5261			error = xfs_btree_insert(cur, &i);
5262			if (error && error != -ENOSPC)
5263				return error;
5264			/*
5265			 * If get no-space back from btree insert, it tried a
5266			 * split, and we have a zero block reservation.  Fix up
5267			 * our state and return the error.
5268			 */
5269			if (error == -ENOSPC) {
5270				/*
5271				 * Reset the cursor, don't trust it after any
5272				 * insert operation.
5273				 */
5274				error = xfs_bmbt_lookup_eq(cur, &got, &i);
5275				if (error)
5276					return error;
5277				if (XFS_IS_CORRUPT(mp, i != 1)) {
5278					xfs_btree_mark_sick(cur);
5279					return -EFSCORRUPTED;
5280				}
5281				/*
5282				 * Update the btree record back
5283				 * to the original value.
5284				 */
5285				error = xfs_bmbt_update(cur, &old);
5286				if (error)
5287					return error;
5288				/*
5289				 * Reset the extent record back
5290				 * to the original value.
5291				 */
5292				xfs_iext_update_extent(ip, state, icur, &old);
5293				*logflagsp = 0;
5294				return -ENOSPC;
 
5295			}
5296			if (XFS_IS_CORRUPT(mp, i != 1)) {
5297				xfs_btree_mark_sick(cur);
5298				return -EFSCORRUPTED;
5299			}
5300		} else
5301			*logflagsp |= xfs_ilog_fext(whichfork);
5302
5303		ifp->if_nextents++;
5304		xfs_iext_next(ifp, icur);
5305		xfs_iext_insert(ip, icur, &new, state);
5306		break;
5307	}
5308
5309	/* remove reverse mapping */
5310	xfs_rmap_unmap_extent(tp, ip, whichfork, del);
5311
5312	/*
5313	 * If we need to, add to list of extents to delete.
5314	 */
5315	if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5316		if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5317			xfs_refcount_decrease_extent(tp, del);
5318		} else {
5319			error = xfs_free_extent_later(tp, del->br_startblock,
5320					del->br_blockcount, NULL,
5321					XFS_AG_RESV_NONE,
5322					((bflags & XFS_BMAPI_NODISCARD) ||
5323					del->br_state == XFS_EXT_UNWRITTEN));
5324			if (error)
5325				return error;
5326		}
5327	}
5328
5329	/*
5330	 * Adjust inode # blocks in the file.
5331	 */
5332	if (nblks)
5333		ip->i_nblocks -= nblks;
5334	/*
5335	 * Adjust quota data.
5336	 */
5337	if (qfield && !(bflags & XFS_BMAPI_REMAP))
5338		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5339
5340	return 0;
 
 
5341}
5342
5343/*
5344 * Unmap (remove) blocks from a file.
5345 * If nexts is nonzero then the number of extents to remove is limited to
5346 * that value.  If not all extents in the block range can be removed then
5347 * *done is set.
5348 */
5349static int
5350__xfs_bunmapi(
5351	struct xfs_trans	*tp,		/* transaction pointer */
5352	struct xfs_inode	*ip,		/* incore inode */
5353	xfs_fileoff_t		start,		/* first file offset deleted */
5354	xfs_filblks_t		*rlen,		/* i/o: amount remaining */
5355	uint32_t		flags,		/* misc flags */
5356	xfs_extnum_t		nexts)		/* number of extents max */
5357{
5358	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
5359	struct xfs_bmbt_irec	del;		/* extent being deleted */
5360	int			error;		/* error return value */
5361	xfs_extnum_t		extno;		/* extent number in list */
5362	struct xfs_bmbt_irec	got;		/* current extent record */
5363	struct xfs_ifork	*ifp;		/* inode fork pointer */
5364	int			isrt;		/* freeing in rt area */
5365	int			logflags;	/* transaction logging flags */
5366	xfs_extlen_t		mod;		/* rt extent offset */
5367	struct xfs_mount	*mp = ip->i_mount;
5368	int			tmp_logflags;	/* partial logging flags */
5369	int			wasdel;		/* was a delayed alloc extent */
5370	int			whichfork;	/* data or attribute fork */
 
5371	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
 
5372	xfs_fileoff_t		end;
5373	struct xfs_iext_cursor	icur;
5374	bool			done = false;
5375
5376	trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5377
5378	whichfork = xfs_bmapi_whichfork(flags);
5379	ASSERT(whichfork != XFS_COW_FORK);
5380	ifp = xfs_ifork_ptr(ip, whichfork);
5381	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp))) {
5382		xfs_bmap_mark_sick(ip, whichfork);
5383		return -EFSCORRUPTED;
5384	}
5385	if (xfs_is_shutdown(mp))
5386		return -EIO;
5387
5388	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
5389	ASSERT(len > 0);
5390	ASSERT(nexts >= 0);
5391
 
 
 
 
 
 
 
 
 
 
5392	error = xfs_iread_extents(tp, ip, whichfork);
5393	if (error)
5394		return error;
5395
5396	if (xfs_iext_count(ifp) == 0) {
5397		*rlen = 0;
5398		return 0;
5399	}
5400	XFS_STATS_INC(mp, xs_blk_unmap);
5401	isrt = xfs_ifork_is_realtime(ip, whichfork);
5402	end = start + len;
5403
5404	if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5405		*rlen = 0;
5406		return 0;
5407	}
5408	end--;
5409
5410	logflags = 0;
5411	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
5412		ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
5413		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 
5414	} else
5415		cur = NULL;
5416
5417	if (isrt) {
5418		/*
5419		 * Synchronize by locking the bitmap inode.
5420		 */
5421		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5422		xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5423		xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5424		xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5425	}
5426
5427	extno = 0;
5428	while (end != (xfs_fileoff_t)-1 && end >= start &&
5429	       (nexts == 0 || extno < nexts)) {
5430		/*
5431		 * Is the found extent after a hole in which end lives?
5432		 * Just back up to the previous extent, if so.
5433		 */
5434		if (got.br_startoff > end &&
5435		    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5436			done = true;
5437			break;
5438		}
5439		/*
5440		 * Is the last block of this extent before the range
5441		 * we're supposed to delete?  If so, we're done.
5442		 */
5443		end = XFS_FILEOFF_MIN(end,
5444			got.br_startoff + got.br_blockcount - 1);
5445		if (end < start)
5446			break;
5447		/*
5448		 * Then deal with the (possibly delayed) allocated space
5449		 * we found.
5450		 */
5451		del = got;
5452		wasdel = isnullstartblock(del.br_startblock);
5453
5454		if (got.br_startoff < start) {
5455			del.br_startoff = start;
5456			del.br_blockcount -= start - got.br_startoff;
5457			if (!wasdel)
5458				del.br_startblock += start - got.br_startoff;
5459		}
5460		if (del.br_startoff + del.br_blockcount > end + 1)
5461			del.br_blockcount = end + 1 - del.br_startoff;
5462
5463		if (!isrt || (flags & XFS_BMAPI_REMAP))
 
 
 
 
 
 
 
 
5464			goto delete;
5465
5466		mod = xfs_rtb_to_rtxoff(mp,
5467				del.br_startblock + del.br_blockcount);
5468		if (mod) {
5469			/*
5470			 * Realtime extent not lined up at the end.
5471			 * The extent could have been split into written
5472			 * and unwritten pieces, or we could just be
5473			 * unmapping part of it.  But we can't really
5474			 * get rid of part of a realtime extent.
5475			 */
5476			if (del.br_state == XFS_EXT_UNWRITTEN) {
5477				/*
5478				 * This piece is unwritten, or we're not
5479				 * using unwritten extents.  Skip over it.
5480				 */
5481				ASSERT((flags & XFS_BMAPI_REMAP) || end >= mod);
5482				end -= mod > del.br_blockcount ?
5483					del.br_blockcount : mod;
5484				if (end < got.br_startoff &&
5485				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5486					done = true;
5487					break;
5488				}
5489				continue;
5490			}
5491			/*
5492			 * It's written, turn it unwritten.
5493			 * This is better than zeroing it.
5494			 */
5495			ASSERT(del.br_state == XFS_EXT_NORM);
5496			ASSERT(tp->t_blk_res > 0);
5497			/*
5498			 * If this spans a realtime extent boundary,
5499			 * chop it back to the start of the one we end at.
5500			 */
5501			if (del.br_blockcount > mod) {
5502				del.br_startoff += del.br_blockcount - mod;
5503				del.br_startblock += del.br_blockcount - mod;
5504				del.br_blockcount = mod;
5505			}
5506			del.br_state = XFS_EXT_UNWRITTEN;
5507			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5508					whichfork, &icur, &cur, &del,
5509					&logflags);
5510			if (error)
5511				goto error0;
5512			goto nodelete;
5513		}
5514
5515		mod = xfs_rtb_to_rtxoff(mp, del.br_startblock);
5516		if (mod) {
5517			xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
5518
5519			/*
5520			 * Realtime extent is lined up at the end but not
5521			 * at the front.  We'll get rid of full extents if
5522			 * we can.
5523			 */
5524			if (del.br_blockcount > off) {
5525				del.br_blockcount -= off;
5526				del.br_startoff += off;
5527				del.br_startblock += off;
5528			} else if (del.br_startoff == start &&
5529				   (del.br_state == XFS_EXT_UNWRITTEN ||
5530				    tp->t_blk_res == 0)) {
5531				/*
5532				 * Can't make it unwritten.  There isn't
5533				 * a full extent here so just skip it.
5534				 */
5535				ASSERT(end >= del.br_blockcount);
5536				end -= del.br_blockcount;
5537				if (got.br_startoff > end &&
5538				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5539					done = true;
5540					break;
5541				}
5542				continue;
5543			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
5544				struct xfs_bmbt_irec	prev;
5545				xfs_fileoff_t		unwrite_start;
5546
5547				/*
5548				 * This one is already unwritten.
5549				 * It must have a written left neighbor.
5550				 * Unwrite the killed part of that one and
5551				 * try again.
5552				 */
5553				if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5554					ASSERT(0);
5555				ASSERT(prev.br_state == XFS_EXT_NORM);
5556				ASSERT(!isnullstartblock(prev.br_startblock));
5557				ASSERT(del.br_startblock ==
5558				       prev.br_startblock + prev.br_blockcount);
5559				unwrite_start = max3(start,
5560						     del.br_startoff - mod,
5561						     prev.br_startoff);
5562				mod = unwrite_start - prev.br_startoff;
5563				prev.br_startoff = unwrite_start;
5564				prev.br_startblock += mod;
5565				prev.br_blockcount -= mod;
5566				prev.br_state = XFS_EXT_UNWRITTEN;
5567				error = xfs_bmap_add_extent_unwritten_real(tp,
5568						ip, whichfork, &icur, &cur,
5569						&prev, &logflags);
5570				if (error)
5571					goto error0;
5572				goto nodelete;
5573			} else {
5574				ASSERT(del.br_state == XFS_EXT_NORM);
5575				del.br_state = XFS_EXT_UNWRITTEN;
5576				error = xfs_bmap_add_extent_unwritten_real(tp,
5577						ip, whichfork, &icur, &cur,
5578						&del, &logflags);
5579				if (error)
5580					goto error0;
5581				goto nodelete;
5582			}
5583		}
5584
5585delete:
5586		if (wasdel) {
5587			error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
5588					&got, &del);
5589		} else {
5590			error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
5591					&del, &tmp_logflags, whichfork,
5592					flags);
5593			logflags |= tmp_logflags;
5594		}
5595
5596		if (error)
5597			goto error0;
5598
 
5599		end = del.br_startoff - 1;
5600nodelete:
5601		/*
5602		 * If not done go on to the next (previous) record.
5603		 */
5604		if (end != (xfs_fileoff_t)-1 && end >= start) {
5605			if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5606			    (got.br_startoff > end &&
5607			     !xfs_iext_prev_extent(ifp, &icur, &got))) {
5608				done = true;
5609				break;
5610			}
5611			extno++;
5612		}
5613	}
5614	if (done || end == (xfs_fileoff_t)-1 || end < start)
5615		*rlen = 0;
5616	else
5617		*rlen = end - start + 1;
5618
5619	/*
5620	 * Convert to a btree if necessary.
5621	 */
5622	if (xfs_bmap_needs_btree(ip, whichfork)) {
5623		ASSERT(cur == NULL);
5624		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
5625				&tmp_logflags, whichfork);
5626		logflags |= tmp_logflags;
5627	} else {
5628		error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
5629			whichfork);
5630	}
5631
5632error0:
5633	/*
5634	 * Log everything.  Do this after conversion, there's no point in
5635	 * logging the extent records if we've converted to btree format.
5636	 */
5637	if ((logflags & xfs_ilog_fext(whichfork)) &&
5638	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
5639		logflags &= ~xfs_ilog_fext(whichfork);
5640	else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5641		 ifp->if_format != XFS_DINODE_FMT_BTREE)
5642		logflags &= ~xfs_ilog_fbroot(whichfork);
5643	/*
5644	 * Log inode even in the error case, if the transaction
5645	 * is dirty we'll need to shut down the filesystem.
5646	 */
5647	if (logflags)
5648		xfs_trans_log_inode(tp, ip, logflags);
5649	if (cur) {
5650		if (!error)
5651			cur->bc_bmap.allocated = 0;
5652		xfs_btree_del_cursor(cur, error);
5653	}
5654	return error;
5655}
5656
5657/* Unmap a range of a file. */
5658int
5659xfs_bunmapi(
5660	xfs_trans_t		*tp,
5661	struct xfs_inode	*ip,
5662	xfs_fileoff_t		bno,
5663	xfs_filblks_t		len,
5664	uint32_t		flags,
5665	xfs_extnum_t		nexts,
5666	int			*done)
5667{
5668	int			error;
5669
5670	error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
5671	*done = (len == 0);
5672	return error;
5673}
5674
5675/*
5676 * Determine whether an extent shift can be accomplished by a merge with the
5677 * extent that precedes the target hole of the shift.
5678 */
5679STATIC bool
5680xfs_bmse_can_merge(
5681	struct xfs_bmbt_irec	*left,	/* preceding extent */
5682	struct xfs_bmbt_irec	*got,	/* current extent to shift */
5683	xfs_fileoff_t		shift)	/* shift fsb */
5684{
5685	xfs_fileoff_t		startoff;
5686
5687	startoff = got->br_startoff - shift;
5688
5689	/*
5690	 * The extent, once shifted, must be adjacent in-file and on-disk with
5691	 * the preceding extent.
5692	 */
5693	if ((left->br_startoff + left->br_blockcount != startoff) ||
5694	    (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5695	    (left->br_state != got->br_state) ||
5696	    (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN))
5697		return false;
5698
5699	return true;
5700}
5701
5702/*
5703 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5704 * hole in the file. If an extent shift would result in the extent being fully
5705 * adjacent to the extent that currently precedes the hole, we can merge with
5706 * the preceding extent rather than do the shift.
5707 *
5708 * This function assumes the caller has verified a shift-by-merge is possible
5709 * with the provided extents via xfs_bmse_can_merge().
5710 */
5711STATIC int
5712xfs_bmse_merge(
5713	struct xfs_trans		*tp,
5714	struct xfs_inode		*ip,
5715	int				whichfork,
5716	xfs_fileoff_t			shift,		/* shift fsb */
5717	struct xfs_iext_cursor		*icur,
5718	struct xfs_bmbt_irec		*got,		/* extent to shift */
5719	struct xfs_bmbt_irec		*left,		/* preceding extent */
5720	struct xfs_btree_cur		*cur,
5721	int				*logflags)	/* output */
5722{
5723	struct xfs_ifork		*ifp = xfs_ifork_ptr(ip, whichfork);
5724	struct xfs_bmbt_irec		new;
5725	xfs_filblks_t			blockcount;
5726	int				error, i;
5727	struct xfs_mount		*mp = ip->i_mount;
5728
5729	blockcount = left->br_blockcount + got->br_blockcount;
5730
5731	xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 
5732	ASSERT(xfs_bmse_can_merge(left, got, shift));
5733
5734	new = *left;
5735	new.br_blockcount = blockcount;
5736
5737	/*
5738	 * Update the on-disk extent count, the btree if necessary and log the
5739	 * inode.
5740	 */
5741	ifp->if_nextents--;
5742	*logflags |= XFS_ILOG_CORE;
5743	if (!cur) {
5744		*logflags |= XFS_ILOG_DEXT;
5745		goto done;
5746	}
5747
5748	/* lookup and remove the extent to merge */
5749	error = xfs_bmbt_lookup_eq(cur, got, &i);
5750	if (error)
5751		return error;
5752	if (XFS_IS_CORRUPT(mp, i != 1)) {
5753		xfs_btree_mark_sick(cur);
5754		return -EFSCORRUPTED;
5755	}
5756
5757	error = xfs_btree_delete(cur, &i);
5758	if (error)
5759		return error;
5760	if (XFS_IS_CORRUPT(mp, i != 1)) {
5761		xfs_btree_mark_sick(cur);
5762		return -EFSCORRUPTED;
5763	}
5764
5765	/* lookup and update size of the previous extent */
5766	error = xfs_bmbt_lookup_eq(cur, left, &i);
5767	if (error)
5768		return error;
5769	if (XFS_IS_CORRUPT(mp, i != 1)) {
5770		xfs_btree_mark_sick(cur);
5771		return -EFSCORRUPTED;
5772	}
5773
5774	error = xfs_bmbt_update(cur, &new);
5775	if (error)
5776		return error;
5777
5778	/* change to extent format if required after extent removal */
5779	error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
5780	if (error)
5781		return error;
5782
5783done:
5784	xfs_iext_remove(ip, icur, 0);
5785	xfs_iext_prev(ifp, icur);
5786	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5787			&new);
5788
5789	/* update reverse mapping. rmap functions merge the rmaps for us */
5790	xfs_rmap_unmap_extent(tp, ip, whichfork, got);
5791	memcpy(&new, got, sizeof(new));
5792	new.br_startoff = left->br_startoff + left->br_blockcount;
5793	xfs_rmap_map_extent(tp, ip, whichfork, &new);
5794	return 0;
5795}
5796
5797static int
5798xfs_bmap_shift_update_extent(
5799	struct xfs_trans	*tp,
5800	struct xfs_inode	*ip,
5801	int			whichfork,
5802	struct xfs_iext_cursor	*icur,
5803	struct xfs_bmbt_irec	*got,
5804	struct xfs_btree_cur	*cur,
5805	int			*logflags,
5806	xfs_fileoff_t		startoff)
5807{
5808	struct xfs_mount	*mp = ip->i_mount;
5809	struct xfs_bmbt_irec	prev = *got;
5810	int			error, i;
5811
5812	*logflags |= XFS_ILOG_CORE;
5813
5814	got->br_startoff = startoff;
5815
5816	if (cur) {
5817		error = xfs_bmbt_lookup_eq(cur, &prev, &i);
5818		if (error)
5819			return error;
5820		if (XFS_IS_CORRUPT(mp, i != 1)) {
5821			xfs_btree_mark_sick(cur);
5822			return -EFSCORRUPTED;
5823		}
5824
5825		error = xfs_bmbt_update(cur, got);
5826		if (error)
5827			return error;
5828	} else {
5829		*logflags |= XFS_ILOG_DEXT;
5830	}
5831
5832	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5833			got);
5834
5835	/* update reverse mapping */
5836	xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
5837	xfs_rmap_map_extent(tp, ip, whichfork, got);
5838	return 0;
5839}
5840
5841int
5842xfs_bmap_collapse_extents(
5843	struct xfs_trans	*tp,
5844	struct xfs_inode	*ip,
5845	xfs_fileoff_t		*next_fsb,
5846	xfs_fileoff_t		offset_shift_fsb,
5847	bool			*done)
5848{
5849	int			whichfork = XFS_DATA_FORK;
5850	struct xfs_mount	*mp = ip->i_mount;
5851	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
5852	struct xfs_btree_cur	*cur = NULL;
5853	struct xfs_bmbt_irec	got, prev;
5854	struct xfs_iext_cursor	icur;
5855	xfs_fileoff_t		new_startoff;
5856	int			error = 0;
5857	int			logflags = 0;
5858
5859	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5860	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5861		xfs_bmap_mark_sick(ip, whichfork);
5862		return -EFSCORRUPTED;
5863	}
5864
5865	if (xfs_is_shutdown(mp))
5866		return -EIO;
5867
5868	xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
5869
5870	error = xfs_iread_extents(tp, ip, whichfork);
5871	if (error)
5872		return error;
5873
5874	if (ifp->if_format == XFS_DINODE_FMT_BTREE)
5875		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 
 
5876
5877	if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5878		*done = true;
5879		goto del_cursor;
5880	}
5881	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5882		xfs_bmap_mark_sick(ip, whichfork);
5883		error = -EFSCORRUPTED;
5884		goto del_cursor;
5885	}
5886
5887	new_startoff = got.br_startoff - offset_shift_fsb;
5888	if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
5889		if (new_startoff < prev.br_startoff + prev.br_blockcount) {
5890			error = -EINVAL;
5891			goto del_cursor;
5892		}
5893
5894		if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5895			error = xfs_bmse_merge(tp, ip, whichfork,
5896					offset_shift_fsb, &icur, &got, &prev,
5897					cur, &logflags);
5898			if (error)
5899				goto del_cursor;
5900			goto done;
5901		}
5902	} else {
5903		if (got.br_startoff < offset_shift_fsb) {
5904			error = -EINVAL;
5905			goto del_cursor;
5906		}
5907	}
5908
5909	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5910			cur, &logflags, new_startoff);
5911	if (error)
5912		goto del_cursor;
5913
5914done:
5915	if (!xfs_iext_next_extent(ifp, &icur, &got)) {
5916		*done = true;
5917		goto del_cursor;
5918	}
5919
5920	*next_fsb = got.br_startoff;
5921del_cursor:
5922	if (cur)
5923		xfs_btree_del_cursor(cur, error);
5924	if (logflags)
5925		xfs_trans_log_inode(tp, ip, logflags);
5926	return error;
5927}
5928
5929/* Make sure we won't be right-shifting an extent past the maximum bound. */
5930int
5931xfs_bmap_can_insert_extents(
5932	struct xfs_inode	*ip,
5933	xfs_fileoff_t		off,
5934	xfs_fileoff_t		shift)
5935{
5936	struct xfs_bmbt_irec	got;
5937	int			is_empty;
5938	int			error = 0;
5939
5940	xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
5941
5942	if (xfs_is_shutdown(ip->i_mount))
5943		return -EIO;
5944
5945	xfs_ilock(ip, XFS_ILOCK_EXCL);
5946	error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
5947	if (!error && !is_empty && got.br_startoff >= off &&
5948	    ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
5949		error = -EINVAL;
5950	xfs_iunlock(ip, XFS_ILOCK_EXCL);
5951
5952	return error;
5953}
5954
5955int
5956xfs_bmap_insert_extents(
5957	struct xfs_trans	*tp,
5958	struct xfs_inode	*ip,
5959	xfs_fileoff_t		*next_fsb,
5960	xfs_fileoff_t		offset_shift_fsb,
5961	bool			*done,
5962	xfs_fileoff_t		stop_fsb)
5963{
5964	int			whichfork = XFS_DATA_FORK;
5965	struct xfs_mount	*mp = ip->i_mount;
5966	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
5967	struct xfs_btree_cur	*cur = NULL;
5968	struct xfs_bmbt_irec	got, next;
5969	struct xfs_iext_cursor	icur;
5970	xfs_fileoff_t		new_startoff;
5971	int			error = 0;
5972	int			logflags = 0;
5973
5974	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5975	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5976		xfs_bmap_mark_sick(ip, whichfork);
5977		return -EFSCORRUPTED;
5978	}
5979
5980	if (xfs_is_shutdown(mp))
5981		return -EIO;
5982
5983	xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
5984
5985	error = xfs_iread_extents(tp, ip, whichfork);
5986	if (error)
5987		return error;
5988
5989	if (ifp->if_format == XFS_DINODE_FMT_BTREE)
5990		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 
 
5991
5992	if (*next_fsb == NULLFSBLOCK) {
5993		xfs_iext_last(ifp, &icur);
5994		if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5995		    stop_fsb > got.br_startoff) {
5996			*done = true;
5997			goto del_cursor;
5998		}
5999	} else {
6000		if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
6001			*done = true;
6002			goto del_cursor;
6003		}
6004	}
6005	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
6006		xfs_bmap_mark_sick(ip, whichfork);
6007		error = -EFSCORRUPTED;
6008		goto del_cursor;
6009	}
6010
6011	if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
6012		xfs_bmap_mark_sick(ip, whichfork);
6013		error = -EFSCORRUPTED;
6014		goto del_cursor;
6015	}
6016
6017	new_startoff = got.br_startoff + offset_shift_fsb;
6018	if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
6019		if (new_startoff + got.br_blockcount > next.br_startoff) {
6020			error = -EINVAL;
6021			goto del_cursor;
6022		}
6023
6024		/*
6025		 * Unlike a left shift (which involves a hole punch), a right
6026		 * shift does not modify extent neighbors in any way.  We should
6027		 * never find mergeable extents in this scenario.  Check anyways
6028		 * and warn if we encounter two extents that could be one.
6029		 */
6030		if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
6031			WARN_ON_ONCE(1);
6032	}
6033
6034	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
6035			cur, &logflags, new_startoff);
6036	if (error)
6037		goto del_cursor;
6038
6039	if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
6040	    stop_fsb >= got.br_startoff + got.br_blockcount) {
6041		*done = true;
6042		goto del_cursor;
6043	}
6044
6045	*next_fsb = got.br_startoff;
6046del_cursor:
6047	if (cur)
6048		xfs_btree_del_cursor(cur, error);
6049	if (logflags)
6050		xfs_trans_log_inode(tp, ip, logflags);
6051	return error;
6052}
6053
6054/*
6055 * Splits an extent into two extents at split_fsb block such that it is the
6056 * first block of the current_ext. @ext is a target extent to be split.
6057 * @split_fsb is a block where the extents is split.  If split_fsb lies in a
6058 * hole or the first block of extents, just return 0.
6059 */
6060int
6061xfs_bmap_split_extent(
6062	struct xfs_trans	*tp,
6063	struct xfs_inode	*ip,
6064	xfs_fileoff_t		split_fsb)
6065{
6066	int				whichfork = XFS_DATA_FORK;
6067	struct xfs_ifork		*ifp = xfs_ifork_ptr(ip, whichfork);
6068	struct xfs_btree_cur		*cur = NULL;
6069	struct xfs_bmbt_irec		got;
6070	struct xfs_bmbt_irec		new; /* split extent */
6071	struct xfs_mount		*mp = ip->i_mount;
6072	xfs_fsblock_t			gotblkcnt; /* new block count for got */
6073	struct xfs_iext_cursor		icur;
6074	int				error = 0;
6075	int				logflags = 0;
6076	int				i = 0;
6077
6078	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
6079	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
6080		xfs_bmap_mark_sick(ip, whichfork);
6081		return -EFSCORRUPTED;
6082	}
6083
6084	if (xfs_is_shutdown(mp))
6085		return -EIO;
6086
6087	/* Read in all the extents */
6088	error = xfs_iread_extents(tp, ip, whichfork);
6089	if (error)
6090		return error;
6091
6092	/*
6093	 * If there are not extents, or split_fsb lies in a hole we are done.
6094	 */
6095	if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
6096	    got.br_startoff >= split_fsb)
6097		return 0;
6098
6099	gotblkcnt = split_fsb - got.br_startoff;
6100	new.br_startoff = split_fsb;
6101	new.br_startblock = got.br_startblock + gotblkcnt;
6102	new.br_blockcount = got.br_blockcount - gotblkcnt;
6103	new.br_state = got.br_state;
6104
6105	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
6106		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 
6107		error = xfs_bmbt_lookup_eq(cur, &got, &i);
6108		if (error)
6109			goto del_cursor;
6110		if (XFS_IS_CORRUPT(mp, i != 1)) {
6111			xfs_btree_mark_sick(cur);
6112			error = -EFSCORRUPTED;
6113			goto del_cursor;
6114		}
6115	}
6116
6117	got.br_blockcount = gotblkcnt;
6118	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
6119			&got);
6120
6121	logflags = XFS_ILOG_CORE;
6122	if (cur) {
6123		error = xfs_bmbt_update(cur, &got);
6124		if (error)
6125			goto del_cursor;
6126	} else
6127		logflags |= XFS_ILOG_DEXT;
6128
6129	/* Add new extent */
6130	xfs_iext_next(ifp, &icur);
6131	xfs_iext_insert(ip, &icur, &new, 0);
6132	ifp->if_nextents++;
6133
6134	if (cur) {
6135		error = xfs_bmbt_lookup_eq(cur, &new, &i);
6136		if (error)
6137			goto del_cursor;
6138		if (XFS_IS_CORRUPT(mp, i != 0)) {
6139			xfs_btree_mark_sick(cur);
6140			error = -EFSCORRUPTED;
6141			goto del_cursor;
6142		}
6143		error = xfs_btree_insert(cur, &i);
6144		if (error)
6145			goto del_cursor;
6146		if (XFS_IS_CORRUPT(mp, i != 1)) {
6147			xfs_btree_mark_sick(cur);
6148			error = -EFSCORRUPTED;
6149			goto del_cursor;
6150		}
6151	}
6152
6153	/*
6154	 * Convert to a btree if necessary.
6155	 */
6156	if (xfs_bmap_needs_btree(ip, whichfork)) {
6157		int tmp_logflags; /* partial log flag return val */
6158
6159		ASSERT(cur == NULL);
6160		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
6161				&tmp_logflags, whichfork);
6162		logflags |= tmp_logflags;
6163	}
6164
6165del_cursor:
6166	if (cur) {
6167		cur->bc_bmap.allocated = 0;
6168		xfs_btree_del_cursor(cur, error);
6169	}
6170
6171	if (logflags)
6172		xfs_trans_log_inode(tp, ip, logflags);
6173	return error;
6174}
6175
 
 
 
 
 
 
 
 
 
6176/* Record a bmap intent. */
6177static inline void
6178__xfs_bmap_add(
6179	struct xfs_trans		*tp,
6180	enum xfs_bmap_intent_type	type,
6181	struct xfs_inode		*ip,
6182	int				whichfork,
6183	struct xfs_bmbt_irec		*bmap)
6184{
6185	struct xfs_bmap_intent		*bi;
6186
6187	if ((whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK) ||
6188	    bmap->br_startblock == HOLESTARTBLOCK ||
6189	    bmap->br_startblock == DELAYSTARTBLOCK)
6190		return;
 
 
 
 
6191
6192	bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL);
6193	INIT_LIST_HEAD(&bi->bi_list);
6194	bi->bi_type = type;
6195	bi->bi_owner = ip;
6196	bi->bi_whichfork = whichfork;
6197	bi->bi_bmap = *bmap;
6198
6199	xfs_bmap_defer_add(tp, bi);
 
6200}
6201
6202/* Map an extent into a file. */
6203void
6204xfs_bmap_map_extent(
6205	struct xfs_trans	*tp,
6206	struct xfs_inode	*ip,
6207	int			whichfork,
6208	struct xfs_bmbt_irec	*PREV)
6209{
6210	__xfs_bmap_add(tp, XFS_BMAP_MAP, ip, whichfork, PREV);
 
 
 
6211}
6212
6213/* Unmap an extent out of a file. */
6214void
6215xfs_bmap_unmap_extent(
6216	struct xfs_trans	*tp,
6217	struct xfs_inode	*ip,
6218	int			whichfork,
6219	struct xfs_bmbt_irec	*PREV)
6220{
6221	__xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, whichfork, PREV);
 
 
 
6222}
6223
6224/*
6225 * Process one of the deferred bmap operations.  We pass back the
6226 * btree cursor to maintain our lock on the bmapbt between calls.
6227 */
6228int
6229xfs_bmap_finish_one(
6230	struct xfs_trans		*tp,
6231	struct xfs_bmap_intent		*bi)
 
 
 
 
 
 
6232{
6233	struct xfs_bmbt_irec		*bmap = &bi->bi_bmap;
6234	int				error = 0;
6235	int				flags = 0;
6236
6237	if (bi->bi_whichfork == XFS_ATTR_FORK)
6238		flags |= XFS_BMAPI_ATTRFORK;
6239
6240	ASSERT(tp->t_highest_agno == NULLAGNUMBER);
 
 
 
6241
6242	trace_xfs_bmap_deferred(bi);
 
6243
6244	if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE))
 
6245		return -EIO;
6246
6247	switch (bi->bi_type) {
6248	case XFS_BMAP_MAP:
6249		if (bi->bi_bmap.br_state == XFS_EXT_UNWRITTEN)
6250			flags |= XFS_BMAPI_PREALLOC;
6251		error = xfs_bmapi_remap(tp, bi->bi_owner, bmap->br_startoff,
6252				bmap->br_blockcount, bmap->br_startblock,
6253				flags);
6254		bmap->br_blockcount = 0;
6255		break;
6256	case XFS_BMAP_UNMAP:
6257		error = __xfs_bunmapi(tp, bi->bi_owner, bmap->br_startoff,
6258				&bmap->br_blockcount, flags | XFS_BMAPI_REMAP,
6259				1);
6260		break;
6261	default:
6262		ASSERT(0);
6263		xfs_bmap_mark_sick(bi->bi_owner, bi->bi_whichfork);
6264		error = -EFSCORRUPTED;
6265	}
6266
6267	return error;
6268}
6269
6270/* Check that an extent does not have invalid flags or bad ranges. */
6271xfs_failaddr_t
6272xfs_bmap_validate_extent_raw(
6273	struct xfs_mount	*mp,
6274	bool			rtfile,
6275	int			whichfork,
6276	struct xfs_bmbt_irec	*irec)
6277{
 
 
6278	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
6279		return __this_address;
6280
6281	if (rtfile && whichfork == XFS_DATA_FORK) {
6282		if (!xfs_verify_rtbext(mp, irec->br_startblock,
6283					   irec->br_blockcount))
6284			return __this_address;
6285	} else {
6286		if (!xfs_verify_fsbext(mp, irec->br_startblock,
6287					   irec->br_blockcount))
6288			return __this_address;
6289	}
6290	if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
6291		return __this_address;
6292	return NULL;
6293}
6294
6295int __init
6296xfs_bmap_intent_init_cache(void)
6297{
6298	xfs_bmap_intent_cache = kmem_cache_create("xfs_bmap_intent",
6299			sizeof(struct xfs_bmap_intent),
6300			0, 0, NULL);
6301
6302	return xfs_bmap_intent_cache != NULL ? 0 : -ENOMEM;
6303}
6304
6305void
6306xfs_bmap_intent_destroy_cache(void)
6307{
6308	kmem_cache_destroy(xfs_bmap_intent_cache);
6309	xfs_bmap_intent_cache = NULL;
6310}
6311
6312/* Check that an inode's extent does not have invalid flags or bad ranges. */
6313xfs_failaddr_t
6314xfs_bmap_validate_extent(
6315	struct xfs_inode	*ip,
6316	int			whichfork,
6317	struct xfs_bmbt_irec	*irec)
6318{
6319	return xfs_bmap_validate_extent_raw(ip->i_mount,
6320			XFS_IS_REALTIME_INODE(ip), whichfork, irec);
6321}
6322
6323/*
6324 * Used in xfs_itruncate_extents().  This is the maximum number of extents
6325 * freed from a file in a single transaction.
6326 */
6327#define	XFS_ITRUNC_MAX_EXTENTS	2
6328
6329/*
6330 * Unmap every extent in part of an inode's fork.  We don't do any higher level
6331 * invalidation work at all.
6332 */
6333int
6334xfs_bunmapi_range(
6335	struct xfs_trans	**tpp,
6336	struct xfs_inode	*ip,
6337	uint32_t		flags,
6338	xfs_fileoff_t		startoff,
6339	xfs_fileoff_t		endoff)
6340{
6341	xfs_filblks_t		unmap_len = endoff - startoff + 1;
6342	int			error = 0;
6343
6344	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
6345
6346	while (unmap_len > 0) {
6347		ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER);
6348		error = __xfs_bunmapi(*tpp, ip, startoff, &unmap_len, flags,
6349				XFS_ITRUNC_MAX_EXTENTS);
6350		if (error)
6351			goto out;
6352
6353		/* free the just unmapped extents */
6354		error = xfs_defer_finish(tpp);
6355		if (error)
6356			goto out;
6357	}
6358out:
6359	return error;
6360}
6361
6362struct xfs_bmap_query_range {
6363	xfs_bmap_query_range_fn	fn;
6364	void			*priv;
6365};
6366
6367/* Format btree record and pass to our callback. */
6368STATIC int
6369xfs_bmap_query_range_helper(
6370	struct xfs_btree_cur		*cur,
6371	const union xfs_btree_rec	*rec,
6372	void				*priv)
6373{
6374	struct xfs_bmap_query_range	*query = priv;
6375	struct xfs_bmbt_irec		irec;
6376	xfs_failaddr_t			fa;
6377
6378	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
6379	fa = xfs_bmap_validate_extent(cur->bc_ino.ip, cur->bc_ino.whichfork,
6380			&irec);
6381	if (fa) {
6382		xfs_btree_mark_sick(cur);
6383		return xfs_bmap_complain_bad_rec(cur->bc_ino.ip,
6384				cur->bc_ino.whichfork, fa, &irec);
6385	}
6386
6387	return query->fn(cur, &irec, query->priv);
6388}
6389
6390/* Find all bmaps. */
6391int
6392xfs_bmap_query_all(
6393	struct xfs_btree_cur		*cur,
6394	xfs_bmap_query_range_fn		fn,
6395	void				*priv)
6396{
6397	struct xfs_bmap_query_range	query = {
6398		.priv			= priv,
6399		.fn			= fn,
6400	};
6401
6402	return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query);
6403}
v5.14.15
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_bit.h"
  13#include "xfs_sb.h"
  14#include "xfs_mount.h"
  15#include "xfs_defer.h"
  16#include "xfs_dir2.h"
  17#include "xfs_inode.h"
  18#include "xfs_btree.h"
  19#include "xfs_trans.h"
  20#include "xfs_alloc.h"
  21#include "xfs_bmap.h"
  22#include "xfs_bmap_util.h"
  23#include "xfs_bmap_btree.h"
  24#include "xfs_rtalloc.h"
  25#include "xfs_errortag.h"
  26#include "xfs_error.h"
  27#include "xfs_quota.h"
  28#include "xfs_trans_space.h"
  29#include "xfs_buf_item.h"
  30#include "xfs_trace.h"
  31#include "xfs_attr_leaf.h"
  32#include "xfs_filestream.h"
  33#include "xfs_rmap.h"
  34#include "xfs_ag.h"
  35#include "xfs_ag_resv.h"
  36#include "xfs_refcount.h"
  37#include "xfs_icache.h"
  38#include "xfs_iomap.h"
 
 
 
  39
  40
  41kmem_zone_t		*xfs_bmap_free_item_zone;
  42
  43/*
  44 * Miscellaneous helper functions
  45 */
  46
  47/*
  48 * Compute and fill in the value of the maximum depth of a bmap btree
  49 * in this filesystem.  Done once, during mount.
  50 */
  51void
  52xfs_bmap_compute_maxlevels(
  53	xfs_mount_t	*mp,		/* file system mount structure */
  54	int		whichfork)	/* data or attr fork */
  55{
 
 
  56	int		level;		/* btree level */
  57	uint		maxblocks;	/* max blocks at this level */
  58	uint		maxleafents;	/* max leaf entries possible */
  59	int		maxrootrecs;	/* max records in root block */
  60	int		minleafrecs;	/* min records in leaf block */
  61	int		minnoderecs;	/* min records in node block */
  62	int		sz;		/* root block size */
  63
  64	/*
  65	 * The maximum number of extents in a file, hence the maximum number of
  66	 * leaf entries, is controlled by the size of the on-disk extent count,
  67	 * either a signed 32-bit number for the data fork, or a signed 16-bit
  68	 * number for the attr fork.
  69	 *
  70	 * Note that we can no longer assume that if we are in ATTR1 that the
  71	 * fork offset of all the inodes will be
  72	 * (xfs_default_attroffset(ip) >> 3) because we could have mounted with
  73	 * ATTR2 and then mounted back with ATTR1, keeping the i_forkoff's fixed
  74	 * but probably at various positions. Therefore, for both ATTR1 and
  75	 * ATTR2 we have to assume the worst case scenario of a minimum size
  76	 * available.
  77	 */
  78	if (whichfork == XFS_DATA_FORK) {
  79		maxleafents = MAXEXTNUM;
 
  80		sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
  81	} else {
  82		maxleafents = MAXAEXTNUM;
  83		sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
  84	}
  85	maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
  86	minleafrecs = mp->m_bmap_dmnr[0];
  87	minnoderecs = mp->m_bmap_dmnr[1];
  88	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
  89	for (level = 1; maxblocks > 1; level++) {
  90		if (maxblocks <= maxrootrecs)
  91			maxblocks = 1;
  92		else
  93			maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
  94	}
  95	mp->m_bm_maxlevels[whichfork] = level;
 
  96}
  97
  98unsigned int
  99xfs_bmap_compute_attr_offset(
 100	struct xfs_mount	*mp)
 101{
 102	if (mp->m_sb.sb_inodesize == 256)
 103		return XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
 104	return XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
 105}
 106
 107STATIC int				/* error */
 108xfs_bmbt_lookup_eq(
 109	struct xfs_btree_cur	*cur,
 110	struct xfs_bmbt_irec	*irec,
 111	int			*stat)	/* success/failure */
 112{
 113	cur->bc_rec.b = *irec;
 114	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 115}
 116
 117STATIC int				/* error */
 118xfs_bmbt_lookup_first(
 119	struct xfs_btree_cur	*cur,
 120	int			*stat)	/* success/failure */
 121{
 122	cur->bc_rec.b.br_startoff = 0;
 123	cur->bc_rec.b.br_startblock = 0;
 124	cur->bc_rec.b.br_blockcount = 0;
 125	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 126}
 127
 128/*
 129 * Check if the inode needs to be converted to btree format.
 130 */
 131static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
 132{
 133	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
 134
 135	return whichfork != XFS_COW_FORK &&
 136		ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
 137		ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
 138}
 139
 140/*
 141 * Check if the inode should be converted to extent format.
 142 */
 143static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
 144{
 145	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
 146
 147	return whichfork != XFS_COW_FORK &&
 148		ifp->if_format == XFS_DINODE_FMT_BTREE &&
 149		ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
 150}
 151
 152/*
 153 * Update the record referred to by cur to the value given by irec
 154 * This either works (return 0) or gets an EFSCORRUPTED error.
 155 */
 156STATIC int
 157xfs_bmbt_update(
 158	struct xfs_btree_cur	*cur,
 159	struct xfs_bmbt_irec	*irec)
 160{
 161	union xfs_btree_rec	rec;
 162
 163	xfs_bmbt_disk_set_all(&rec.bmbt, irec);
 164	return xfs_btree_update(cur, &rec);
 165}
 166
 167/*
 168 * Compute the worst-case number of indirect blocks that will be used
 169 * for ip's delayed extent of length "len".
 170 */
 171STATIC xfs_filblks_t
 172xfs_bmap_worst_indlen(
 173	xfs_inode_t	*ip,		/* incore inode pointer */
 174	xfs_filblks_t	len)		/* delayed extent length */
 175{
 176	int		level;		/* btree level number */
 177	int		maxrecs;	/* maximum record count at this level */
 178	xfs_mount_t	*mp;		/* mount structure */
 179	xfs_filblks_t	rval;		/* return value */
 180
 181	mp = ip->i_mount;
 182	maxrecs = mp->m_bmap_dmxr[0];
 183	for (level = 0, rval = 0;
 184	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
 185	     level++) {
 186		len += maxrecs - 1;
 187		do_div(len, maxrecs);
 188		rval += len;
 189		if (len == 1)
 190			return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
 191				level - 1;
 192		if (level == 0)
 193			maxrecs = mp->m_bmap_dmxr[1];
 194	}
 195	return rval;
 196}
 197
 198/*
 199 * Calculate the default attribute fork offset for newly created inodes.
 200 */
 201uint
 202xfs_default_attroffset(
 203	struct xfs_inode	*ip)
 204{
 205	if (ip->i_df.if_format == XFS_DINODE_FMT_DEV)
 206		return roundup(sizeof(xfs_dev_t), 8);
 207	return M_IGEO(ip->i_mount)->attr_fork_offset;
 208}
 209
 210/*
 211 * Helper routine to reset inode i_forkoff field when switching attribute fork
 212 * from local to extent format - we reset it where possible to make space
 213 * available for inline data fork extents.
 214 */
 215STATIC void
 216xfs_bmap_forkoff_reset(
 217	xfs_inode_t	*ip,
 218	int		whichfork)
 219{
 220	if (whichfork == XFS_ATTR_FORK &&
 221	    ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
 222	    ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
 223		uint	dfl_forkoff = xfs_default_attroffset(ip) >> 3;
 224
 225		if (dfl_forkoff > ip->i_forkoff)
 226			ip->i_forkoff = dfl_forkoff;
 227	}
 228}
 229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 230#ifdef DEBUG
 231STATIC struct xfs_buf *
 232xfs_bmap_get_bp(
 233	struct xfs_btree_cur	*cur,
 234	xfs_fsblock_t		bno)
 235{
 236	struct xfs_log_item	*lip;
 237	int			i;
 238
 239	if (!cur)
 240		return NULL;
 241
 242	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
 243		if (!cur->bc_bufs[i])
 244			break;
 245		if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
 246			return cur->bc_bufs[i];
 247	}
 248
 249	/* Chase down all the log items to see if the bp is there */
 250	list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
 251		struct xfs_buf_log_item	*bip = (struct xfs_buf_log_item *)lip;
 252
 253		if (bip->bli_item.li_type == XFS_LI_BUF &&
 254		    XFS_BUF_ADDR(bip->bli_buf) == bno)
 255			return bip->bli_buf;
 256	}
 257
 258	return NULL;
 259}
 260
 261STATIC void
 262xfs_check_block(
 263	struct xfs_btree_block	*block,
 264	xfs_mount_t		*mp,
 265	int			root,
 266	short			sz)
 267{
 268	int			i, j, dmxr;
 269	__be64			*pp, *thispa;	/* pointer to block address */
 270	xfs_bmbt_key_t		*prevp, *keyp;
 271
 272	ASSERT(be16_to_cpu(block->bb_level) > 0);
 273
 274	prevp = NULL;
 275	for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
 276		dmxr = mp->m_bmap_dmxr[0];
 277		keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
 278
 279		if (prevp) {
 280			ASSERT(be64_to_cpu(prevp->br_startoff) <
 281			       be64_to_cpu(keyp->br_startoff));
 282		}
 283		prevp = keyp;
 284
 285		/*
 286		 * Compare the block numbers to see if there are dups.
 287		 */
 288		if (root)
 289			pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
 290		else
 291			pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
 292
 293		for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
 294			if (root)
 295				thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
 296			else
 297				thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
 298			if (*thispa == *pp) {
 299				xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
 300					__func__, j, i,
 301					(unsigned long long)be64_to_cpu(*thispa));
 302				xfs_err(mp, "%s: ptrs are equal in node\n",
 303					__func__);
 304				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 305			}
 306		}
 307	}
 308}
 309
 310/*
 311 * Check that the extents for the inode ip are in the right order in all
 312 * btree leaves. THis becomes prohibitively expensive for large extent count
 313 * files, so don't bother with inodes that have more than 10,000 extents in
 314 * them. The btree record ordering checks will still be done, so for such large
 315 * bmapbt constructs that is going to catch most corruptions.
 316 */
 317STATIC void
 318xfs_bmap_check_leaf_extents(
 319	xfs_btree_cur_t		*cur,	/* btree cursor or null */
 320	xfs_inode_t		*ip,		/* incore inode pointer */
 321	int			whichfork)	/* data or attr fork */
 322{
 323	struct xfs_mount	*mp = ip->i_mount;
 324	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 325	struct xfs_btree_block	*block;	/* current btree block */
 326	xfs_fsblock_t		bno;	/* block # of "block" */
 327	struct xfs_buf		*bp;	/* buffer for "block" */
 328	int			error;	/* error return value */
 329	xfs_extnum_t		i=0, j;	/* index into the extents list */
 330	int			level;	/* btree level, for checking */
 331	__be64			*pp;	/* pointer to block address */
 332	xfs_bmbt_rec_t		*ep;	/* pointer to current extent */
 333	xfs_bmbt_rec_t		last = {0, 0}; /* last extent in prev block */
 334	xfs_bmbt_rec_t		*nextp;	/* pointer to next extent */
 335	int			bp_release = 0;
 336
 337	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
 338		return;
 339
 340	/* skip large extent count inodes */
 341	if (ip->i_df.if_nextents > 10000)
 342		return;
 343
 344	bno = NULLFSBLOCK;
 345	block = ifp->if_broot;
 346	/*
 347	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
 348	 */
 349	level = be16_to_cpu(block->bb_level);
 350	ASSERT(level > 0);
 351	xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
 352	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
 353	bno = be64_to_cpu(*pp);
 354
 355	ASSERT(bno != NULLFSBLOCK);
 356	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
 357	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
 358
 359	/*
 360	 * Go down the tree until leaf level is reached, following the first
 361	 * pointer (leftmost) at each level.
 362	 */
 363	while (level-- > 0) {
 364		/* See if buf is in cur first */
 365		bp_release = 0;
 366		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 367		if (!bp) {
 368			bp_release = 1;
 369			error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
 370						XFS_BMAP_BTREE_REF,
 371						&xfs_bmbt_buf_ops);
 372			if (error)
 373				goto error_norelse;
 374		}
 375		block = XFS_BUF_TO_BLOCK(bp);
 376		if (level == 0)
 377			break;
 378
 379		/*
 380		 * Check this block for basic sanity (increasing keys and
 381		 * no duplicate blocks).
 382		 */
 383
 384		xfs_check_block(block, mp, 0, 0);
 385		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
 386		bno = be64_to_cpu(*pp);
 387		if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
 
 388			error = -EFSCORRUPTED;
 389			goto error0;
 390		}
 391		if (bp_release) {
 392			bp_release = 0;
 393			xfs_trans_brelse(NULL, bp);
 394		}
 395	}
 396
 397	/*
 398	 * Here with bp and block set to the leftmost leaf node in the tree.
 399	 */
 400	i = 0;
 401
 402	/*
 403	 * Loop over all leaf nodes checking that all extents are in the right order.
 404	 */
 405	for (;;) {
 406		xfs_fsblock_t	nextbno;
 407		xfs_extnum_t	num_recs;
 408
 409
 410		num_recs = xfs_btree_get_numrecs(block);
 411
 412		/*
 413		 * Read-ahead the next leaf block, if any.
 414		 */
 415
 416		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
 417
 418		/*
 419		 * Check all the extents to make sure they are OK.
 420		 * If we had a previous block, the last entry should
 421		 * conform with the first entry in this one.
 422		 */
 423
 424		ep = XFS_BMBT_REC_ADDR(mp, block, 1);
 425		if (i) {
 426			ASSERT(xfs_bmbt_disk_get_startoff(&last) +
 427			       xfs_bmbt_disk_get_blockcount(&last) <=
 428			       xfs_bmbt_disk_get_startoff(ep));
 429		}
 430		for (j = 1; j < num_recs; j++) {
 431			nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
 432			ASSERT(xfs_bmbt_disk_get_startoff(ep) +
 433			       xfs_bmbt_disk_get_blockcount(ep) <=
 434			       xfs_bmbt_disk_get_startoff(nextp));
 435			ep = nextp;
 436		}
 437
 438		last = *ep;
 439		i += num_recs;
 440		if (bp_release) {
 441			bp_release = 0;
 442			xfs_trans_brelse(NULL, bp);
 443		}
 444		bno = nextbno;
 445		/*
 446		 * If we've reached the end, stop.
 447		 */
 448		if (bno == NULLFSBLOCK)
 449			break;
 450
 451		bp_release = 0;
 452		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 453		if (!bp) {
 454			bp_release = 1;
 455			error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
 456						XFS_BMAP_BTREE_REF,
 457						&xfs_bmbt_buf_ops);
 458			if (error)
 459				goto error_norelse;
 460		}
 461		block = XFS_BUF_TO_BLOCK(bp);
 462	}
 463
 464	return;
 465
 466error0:
 467	xfs_warn(mp, "%s: at error0", __func__);
 468	if (bp_release)
 469		xfs_trans_brelse(NULL, bp);
 470error_norelse:
 471	xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
 472		__func__, i);
 473	xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
 474	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 475	return;
 476}
 477
 478/*
 479 * Validate that the bmbt_irecs being returned from bmapi are valid
 480 * given the caller's original parameters.  Specifically check the
 481 * ranges of the returned irecs to ensure that they only extend beyond
 482 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
 483 */
 484STATIC void
 485xfs_bmap_validate_ret(
 486	xfs_fileoff_t		bno,
 487	xfs_filblks_t		len,
 488	int			flags,
 489	xfs_bmbt_irec_t		*mval,
 490	int			nmap,
 491	int			ret_nmap)
 492{
 493	int			i;		/* index to map values */
 494
 495	ASSERT(ret_nmap <= nmap);
 496
 497	for (i = 0; i < ret_nmap; i++) {
 498		ASSERT(mval[i].br_blockcount > 0);
 499		if (!(flags & XFS_BMAPI_ENTIRE)) {
 500			ASSERT(mval[i].br_startoff >= bno);
 501			ASSERT(mval[i].br_blockcount <= len);
 502			ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
 503			       bno + len);
 504		} else {
 505			ASSERT(mval[i].br_startoff < bno + len);
 506			ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
 507			       bno);
 508		}
 509		ASSERT(i == 0 ||
 510		       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
 511		       mval[i].br_startoff);
 512		ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
 513		       mval[i].br_startblock != HOLESTARTBLOCK);
 514		ASSERT(mval[i].br_state == XFS_EXT_NORM ||
 515		       mval[i].br_state == XFS_EXT_UNWRITTEN);
 516	}
 517}
 518
 519#else
 520#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)		do { } while (0)
 521#define	xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)	do { } while (0)
 522#endif /* DEBUG */
 523
 524/*
 525 * bmap free list manipulation functions
 526 */
 527
 528/*
 529 * Add the extent to the list of extents to be free at transaction end.
 530 * The list is maintained sorted (by block number).
 531 */
 532void
 533__xfs_bmap_add_free(
 534	struct xfs_trans		*tp,
 535	xfs_fsblock_t			bno,
 536	xfs_filblks_t			len,
 537	const struct xfs_owner_info	*oinfo,
 538	bool				skip_discard)
 539{
 540	struct xfs_extent_free_item	*new;		/* new element */
 541#ifdef DEBUG
 542	struct xfs_mount		*mp = tp->t_mountp;
 543	xfs_agnumber_t			agno;
 544	xfs_agblock_t			agbno;
 545
 546	ASSERT(bno != NULLFSBLOCK);
 547	ASSERT(len > 0);
 548	ASSERT(len <= MAXEXTLEN);
 549	ASSERT(!isnullstartblock(bno));
 550	agno = XFS_FSB_TO_AGNO(mp, bno);
 551	agbno = XFS_FSB_TO_AGBNO(mp, bno);
 552	ASSERT(agno < mp->m_sb.sb_agcount);
 553	ASSERT(agbno < mp->m_sb.sb_agblocks);
 554	ASSERT(len < mp->m_sb.sb_agblocks);
 555	ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
 556#endif
 557	ASSERT(xfs_bmap_free_item_zone != NULL);
 558
 559	new = kmem_cache_alloc(xfs_bmap_free_item_zone,
 560			       GFP_KERNEL | __GFP_NOFAIL);
 561	new->xefi_startblock = bno;
 562	new->xefi_blockcount = (xfs_extlen_t)len;
 563	if (oinfo)
 564		new->xefi_oinfo = *oinfo;
 565	else
 566		new->xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
 567	new->xefi_skip_discard = skip_discard;
 568	trace_xfs_bmap_free_defer(tp->t_mountp,
 569			XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
 570			XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
 571	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
 572}
 573
 574/*
 575 * Inode fork format manipulation functions
 576 */
 577
 578/*
 579 * Convert the inode format to extent format if it currently is in btree format,
 580 * but the extent list is small enough that it fits into the extent format.
 581 *
 582 * Since the extents are already in-core, all we have to do is give up the space
 583 * for the btree root and pitch the leaf block.
 584 */
 585STATIC int				/* error */
 586xfs_bmap_btree_to_extents(
 587	struct xfs_trans	*tp,	/* transaction pointer */
 588	struct xfs_inode	*ip,	/* incore inode pointer */
 589	struct xfs_btree_cur	*cur,	/* btree cursor */
 590	int			*logflagsp, /* inode logging flags */
 591	int			whichfork)  /* data or attr fork */
 592{
 593	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 594	struct xfs_mount	*mp = ip->i_mount;
 595	struct xfs_btree_block	*rblock = ifp->if_broot;
 596	struct xfs_btree_block	*cblock;/* child btree block */
 597	xfs_fsblock_t		cbno;	/* child block number */
 598	struct xfs_buf		*cbp;	/* child block's buffer */
 599	int			error;	/* error return value */
 600	__be64			*pp;	/* ptr to block address */
 601	struct xfs_owner_info	oinfo;
 602
 603	/* check if we actually need the extent format first: */
 604	if (!xfs_bmap_wants_extents(ip, whichfork))
 605		return 0;
 606
 607	ASSERT(cur);
 608	ASSERT(whichfork != XFS_COW_FORK);
 609	ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
 610	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
 611	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
 612	ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
 613
 614	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
 615	cbno = be64_to_cpu(*pp);
 616#ifdef DEBUG
 617	if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1)))
 
 618		return -EFSCORRUPTED;
 
 619#endif
 620	error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
 621				&xfs_bmbt_buf_ops);
 
 622	if (error)
 623		return error;
 624	cblock = XFS_BUF_TO_BLOCK(cbp);
 625	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 626		return error;
 
 627	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
 628	xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo);
 
 
 
 
 629	ip->i_nblocks--;
 630	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 631	xfs_trans_binval(tp, cbp);
 632	if (cur->bc_bufs[0] == cbp)
 633		cur->bc_bufs[0] = NULL;
 634	xfs_iroot_realloc(ip, -1, whichfork);
 635	ASSERT(ifp->if_broot == NULL);
 636	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 637	*logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 638	return 0;
 639}
 640
 641/*
 642 * Convert an extents-format file into a btree-format file.
 643 * The new file will have a root block (in the inode) and a single child block.
 644 */
 645STATIC int					/* error */
 646xfs_bmap_extents_to_btree(
 647	struct xfs_trans	*tp,		/* transaction pointer */
 648	struct xfs_inode	*ip,		/* incore inode pointer */
 649	struct xfs_btree_cur	**curp,		/* cursor returned to caller */
 650	int			wasdel,		/* converting a delayed alloc */
 651	int			*logflagsp,	/* inode logging flags */
 652	int			whichfork)	/* data or attr fork */
 653{
 654	struct xfs_btree_block	*ablock;	/* allocated (child) bt block */
 655	struct xfs_buf		*abp;		/* buffer for ablock */
 656	struct xfs_alloc_arg	args;		/* allocation arguments */
 657	struct xfs_bmbt_rec	*arp;		/* child record pointer */
 658	struct xfs_btree_block	*block;		/* btree root block */
 659	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
 660	int			error;		/* error return value */
 661	struct xfs_ifork	*ifp;		/* inode fork pointer */
 662	struct xfs_bmbt_key	*kp;		/* root block key pointer */
 663	struct xfs_mount	*mp;		/* mount structure */
 664	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
 665	struct xfs_iext_cursor	icur;
 666	struct xfs_bmbt_irec	rec;
 667	xfs_extnum_t		cnt = 0;
 668
 669	mp = ip->i_mount;
 670	ASSERT(whichfork != XFS_COW_FORK);
 671	ifp = XFS_IFORK_PTR(ip, whichfork);
 672	ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
 673
 674	/*
 675	 * Make space in the inode incore. This needs to be undone if we fail
 676	 * to expand the root.
 677	 */
 678	xfs_iroot_realloc(ip, 1, whichfork);
 679
 680	/*
 681	 * Fill in the root.
 682	 */
 683	block = ifp->if_broot;
 684	xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
 685				 XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
 686				 XFS_BTREE_LONG_PTRS);
 687	/*
 688	 * Need a cursor.  Can't allocate until bb_level is filled in.
 689	 */
 690	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 691	cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
 
 692	/*
 693	 * Convert to a btree with two levels, one record in root.
 694	 */
 695	ifp->if_format = XFS_DINODE_FMT_BTREE;
 696	memset(&args, 0, sizeof(args));
 697	args.tp = tp;
 698	args.mp = mp;
 699	xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
 700	if (tp->t_firstblock == NULLFSBLOCK) {
 701		args.type = XFS_ALLOCTYPE_START_BNO;
 702		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
 703	} else if (tp->t_flags & XFS_TRANS_LOWMODE) {
 704		args.type = XFS_ALLOCTYPE_START_BNO;
 705		args.fsbno = tp->t_firstblock;
 706	} else {
 707		args.type = XFS_ALLOCTYPE_NEAR_BNO;
 708		args.fsbno = tp->t_firstblock;
 709	}
 710	args.minlen = args.maxlen = args.prod = 1;
 711	args.wasdel = wasdel;
 712	*logflagsp = 0;
 713	error = xfs_alloc_vextent(&args);
 
 714	if (error)
 715		goto out_root_realloc;
 716
 
 
 
 717	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
 718		error = -ENOSPC;
 719		goto out_root_realloc;
 720	}
 721
 722	/*
 723	 * Allocation can't fail, the space was reserved.
 724	 */
 725	ASSERT(tp->t_firstblock == NULLFSBLOCK ||
 726	       args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
 727	tp->t_firstblock = args.fsbno;
 728	cur->bc_ino.allocated++;
 729	ip->i_nblocks++;
 730	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 731	error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
 732			XFS_FSB_TO_DADDR(mp, args.fsbno),
 733			mp->m_bsize, 0, &abp);
 734	if (error)
 735		goto out_unreserve_dquot;
 736
 737	/*
 738	 * Fill in the child block.
 739	 */
 740	abp->b_ops = &xfs_bmbt_buf_ops;
 741	ablock = XFS_BUF_TO_BLOCK(abp);
 742	xfs_btree_init_block_int(mp, ablock, abp->b_bn,
 743				XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
 744				XFS_BTREE_LONG_PTRS);
 745
 746	for_each_xfs_iext(ifp, &icur, &rec) {
 747		if (isnullstartblock(rec.br_startblock))
 748			continue;
 749		arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
 750		xfs_bmbt_disk_set_all(arp, &rec);
 751		cnt++;
 752	}
 753	ASSERT(cnt == ifp->if_nextents);
 754	xfs_btree_set_numrecs(ablock, cnt);
 755
 756	/*
 757	 * Fill in the root key and pointer.
 758	 */
 759	kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
 760	arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
 761	kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
 762	pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
 763						be16_to_cpu(block->bb_level)));
 764	*pp = cpu_to_be64(args.fsbno);
 765
 766	/*
 767	 * Do all this logging at the end so that
 768	 * the root is at the right level.
 769	 */
 770	xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
 771	xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
 772	ASSERT(*curp == NULL);
 773	*curp = cur;
 774	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
 775	return 0;
 776
 777out_unreserve_dquot:
 778	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 779out_root_realloc:
 780	xfs_iroot_realloc(ip, -1, whichfork);
 781	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 782	ASSERT(ifp->if_broot == NULL);
 783	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 784
 785	return error;
 786}
 787
 788/*
 789 * Convert a local file to an extents file.
 790 * This code is out of bounds for data forks of regular files,
 791 * since the file data needs to get logged so things will stay consistent.
 792 * (The bmap-level manipulations are ok, though).
 793 */
 794void
 795xfs_bmap_local_to_extents_empty(
 796	struct xfs_trans	*tp,
 797	struct xfs_inode	*ip,
 798	int			whichfork)
 799{
 800	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 801
 802	ASSERT(whichfork != XFS_COW_FORK);
 803	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 804	ASSERT(ifp->if_bytes == 0);
 805	ASSERT(ifp->if_nextents == 0);
 806
 807	xfs_bmap_forkoff_reset(ip, whichfork);
 808	ifp->if_u1.if_root = NULL;
 809	ifp->if_height = 0;
 810	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 811	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 812}
 813
 814
 815STATIC int				/* error */
 816xfs_bmap_local_to_extents(
 817	xfs_trans_t	*tp,		/* transaction pointer */
 818	xfs_inode_t	*ip,		/* incore inode pointer */
 819	xfs_extlen_t	total,		/* total blocks needed by transaction */
 820	int		*logflagsp,	/* inode logging flags */
 821	int		whichfork,
 822	void		(*init_fn)(struct xfs_trans *tp,
 823				   struct xfs_buf *bp,
 824				   struct xfs_inode *ip,
 825				   struct xfs_ifork *ifp))
 826{
 827	int		error = 0;
 828	int		flags;		/* logging flags returned */
 829	struct xfs_ifork *ifp;		/* inode fork pointer */
 830	xfs_alloc_arg_t	args;		/* allocation arguments */
 831	struct xfs_buf	*bp;		/* buffer for extent block */
 832	struct xfs_bmbt_irec rec;
 833	struct xfs_iext_cursor icur;
 834
 835	/*
 836	 * We don't want to deal with the case of keeping inode data inline yet.
 837	 * So sending the data fork of a regular inode is invalid.
 838	 */
 839	ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
 840	ifp = XFS_IFORK_PTR(ip, whichfork);
 841	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 842
 843	if (!ifp->if_bytes) {
 844		xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 845		flags = XFS_ILOG_CORE;
 846		goto done;
 847	}
 848
 849	flags = 0;
 850	error = 0;
 851	memset(&args, 0, sizeof(args));
 852	args.tp = tp;
 853	args.mp = ip->i_mount;
 
 
 854	xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
 
 855	/*
 856	 * Allocate a block.  We know we need only one, since the
 857	 * file currently fits in an inode.
 858	 */
 859	if (tp->t_firstblock == NULLFSBLOCK) {
 860		args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
 861		args.type = XFS_ALLOCTYPE_START_BNO;
 862	} else {
 863		args.fsbno = tp->t_firstblock;
 864		args.type = XFS_ALLOCTYPE_NEAR_BNO;
 865	}
 866	args.total = total;
 867	args.minlen = args.maxlen = args.prod = 1;
 868	error = xfs_alloc_vextent(&args);
 
 869	if (error)
 870		goto done;
 871
 872	/* Can't fail, the space was reserved. */
 873	ASSERT(args.fsbno != NULLFSBLOCK);
 874	ASSERT(args.len == 1);
 875	tp->t_firstblock = args.fsbno;
 876	error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
 877			XFS_FSB_TO_DADDR(args.mp, args.fsbno),
 878			args.mp->m_bsize, 0, &bp);
 879	if (error)
 880		goto done;
 881
 882	/*
 883	 * Initialize the block, copy the data and log the remote buffer.
 884	 *
 885	 * The callout is responsible for logging because the remote format
 886	 * might differ from the local format and thus we don't know how much to
 887	 * log here. Note that init_fn must also set the buffer log item type
 888	 * correctly.
 889	 */
 890	init_fn(tp, bp, ip, ifp);
 891
 892	/* account for the change in fork size */
 893	xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 894	xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 895	flags |= XFS_ILOG_CORE;
 896
 897	ifp->if_u1.if_root = NULL;
 898	ifp->if_height = 0;
 899
 900	rec.br_startoff = 0;
 901	rec.br_startblock = args.fsbno;
 902	rec.br_blockcount = 1;
 903	rec.br_state = XFS_EXT_NORM;
 904	xfs_iext_first(ifp, &icur);
 905	xfs_iext_insert(ip, &icur, &rec, 0);
 906
 907	ifp->if_nextents = 1;
 908	ip->i_nblocks = 1;
 909	xfs_trans_mod_dquot_byino(tp, ip,
 910		XFS_TRANS_DQ_BCOUNT, 1L);
 911	flags |= xfs_ilog_fext(whichfork);
 912
 913done:
 914	*logflagsp = flags;
 915	return error;
 916}
 917
 918/*
 919 * Called from xfs_bmap_add_attrfork to handle btree format files.
 920 */
 921STATIC int					/* error */
 922xfs_bmap_add_attrfork_btree(
 923	xfs_trans_t		*tp,		/* transaction pointer */
 924	xfs_inode_t		*ip,		/* incore inode pointer */
 925	int			*flags)		/* inode logging flags */
 926{
 927	struct xfs_btree_block	*block = ip->i_df.if_broot;
 928	xfs_btree_cur_t		*cur;		/* btree cursor */
 929	int			error;		/* error return value */
 930	xfs_mount_t		*mp;		/* file system mount struct */
 931	int			stat;		/* newroot status */
 932
 933	mp = ip->i_mount;
 934
 935	if (XFS_BMAP_BMDR_SPACE(block) <= XFS_IFORK_DSIZE(ip))
 936		*flags |= XFS_ILOG_DBROOT;
 937	else {
 938		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
 939		error = xfs_bmbt_lookup_first(cur, &stat);
 940		if (error)
 941			goto error0;
 942		/* must be at least one entry */
 943		if (XFS_IS_CORRUPT(mp, stat != 1)) {
 
 944			error = -EFSCORRUPTED;
 945			goto error0;
 946		}
 947		if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
 948			goto error0;
 949		if (stat == 0) {
 950			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 951			return -ENOSPC;
 952		}
 953		cur->bc_ino.allocated = 0;
 954		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 955	}
 956	return 0;
 957error0:
 958	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 959	return error;
 960}
 961
 962/*
 963 * Called from xfs_bmap_add_attrfork to handle extents format files.
 964 */
 965STATIC int					/* error */
 966xfs_bmap_add_attrfork_extents(
 967	struct xfs_trans	*tp,		/* transaction pointer */
 968	struct xfs_inode	*ip,		/* incore inode pointer */
 969	int			*flags)		/* inode logging flags */
 970{
 971	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
 972	int			error;		/* error return value */
 973
 974	if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
 975	    XFS_IFORK_DSIZE(ip))
 976		return 0;
 977	cur = NULL;
 978	error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
 979					  XFS_DATA_FORK);
 980	if (cur) {
 981		cur->bc_ino.allocated = 0;
 982		xfs_btree_del_cursor(cur, error);
 983	}
 984	return error;
 985}
 986
 987/*
 988 * Called from xfs_bmap_add_attrfork to handle local format files. Each
 989 * different data fork content type needs a different callout to do the
 990 * conversion. Some are basic and only require special block initialisation
 991 * callouts for the data formating, others (directories) are so specialised they
 992 * handle everything themselves.
 993 *
 994 * XXX (dgc): investigate whether directory conversion can use the generic
 995 * formatting callout. It should be possible - it's just a very complex
 996 * formatter.
 997 */
 998STATIC int					/* error */
 999xfs_bmap_add_attrfork_local(
1000	struct xfs_trans	*tp,		/* transaction pointer */
1001	struct xfs_inode	*ip,		/* incore inode pointer */
1002	int			*flags)		/* inode logging flags */
1003{
1004	struct xfs_da_args	dargs;		/* args for dir/attr code */
1005
1006	if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1007		return 0;
1008
1009	if (S_ISDIR(VFS_I(ip)->i_mode)) {
1010		memset(&dargs, 0, sizeof(dargs));
1011		dargs.geo = ip->i_mount->m_dir_geo;
1012		dargs.dp = ip;
1013		dargs.total = dargs.geo->fsbcount;
1014		dargs.whichfork = XFS_DATA_FORK;
1015		dargs.trans = tp;
1016		return xfs_dir2_sf_to_block(&dargs);
1017	}
1018
1019	if (S_ISLNK(VFS_I(ip)->i_mode))
1020		return xfs_bmap_local_to_extents(tp, ip, 1, flags,
1021						 XFS_DATA_FORK,
1022						 xfs_symlink_local_to_remote);
1023
1024	/* should only be called for types that support local format data */
1025	ASSERT(0);
 
1026	return -EFSCORRUPTED;
1027}
1028
1029/*
1030 * Set an inode attr fork offset based on the format of the data fork.
1031 */
1032static int
1033xfs_bmap_set_attrforkoff(
1034	struct xfs_inode	*ip,
1035	int			size,
1036	int			*version)
1037{
1038	int			default_size = xfs_default_attroffset(ip) >> 3;
1039
1040	switch (ip->i_df.if_format) {
1041	case XFS_DINODE_FMT_DEV:
1042		ip->i_forkoff = default_size;
1043		break;
1044	case XFS_DINODE_FMT_LOCAL:
1045	case XFS_DINODE_FMT_EXTENTS:
1046	case XFS_DINODE_FMT_BTREE:
1047		ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1048		if (!ip->i_forkoff)
1049			ip->i_forkoff = default_size;
1050		else if ((ip->i_mount->m_flags & XFS_MOUNT_ATTR2) && version)
1051			*version = 2;
1052		break;
1053	default:
1054		ASSERT(0);
1055		return -EINVAL;
1056	}
1057
1058	return 0;
1059}
1060
1061/*
1062 * Convert inode from non-attributed to attributed.
1063 * Must not be in a transaction, ip must not be locked.
1064 */
1065int						/* error code */
1066xfs_bmap_add_attrfork(
1067	xfs_inode_t		*ip,		/* incore inode pointer */
1068	int			size,		/* space new attribute needs */
1069	int			rsvd)		/* xact may use reserved blks */
1070{
1071	xfs_mount_t		*mp;		/* mount structure */
1072	xfs_trans_t		*tp;		/* transaction pointer */
1073	int			blks;		/* space reservation */
1074	int			version = 1;	/* superblock attr version */
1075	int			logflags;	/* logging flags */
1076	int			error;		/* error return value */
1077
1078	ASSERT(XFS_IFORK_Q(ip) == 0);
1079
1080	mp = ip->i_mount;
1081	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1082
1083	blks = XFS_ADDAFORK_SPACE_RES(mp);
1084
1085	error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0,
1086			rsvd, &tp);
1087	if (error)
1088		return error;
1089	if (XFS_IFORK_Q(ip))
1090		goto trans_cancel;
1091
1092	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1093	error = xfs_bmap_set_attrforkoff(ip, size, &version);
1094	if (error)
1095		goto trans_cancel;
1096	ASSERT(ip->i_afp == NULL);
1097
1098	ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
1099	logflags = 0;
1100	switch (ip->i_df.if_format) {
1101	case XFS_DINODE_FMT_LOCAL:
1102		error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
1103		break;
1104	case XFS_DINODE_FMT_EXTENTS:
1105		error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
1106		break;
1107	case XFS_DINODE_FMT_BTREE:
1108		error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
1109		break;
1110	default:
1111		error = 0;
1112		break;
1113	}
1114	if (logflags)
1115		xfs_trans_log_inode(tp, ip, logflags);
1116	if (error)
1117		goto trans_cancel;
1118	if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1119	   (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1120		bool log_sb = false;
1121
1122		spin_lock(&mp->m_sb_lock);
1123		if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1124			xfs_sb_version_addattr(&mp->m_sb);
1125			log_sb = true;
1126		}
1127		if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1128			xfs_sb_version_addattr2(&mp->m_sb);
1129			log_sb = true;
1130		}
1131		spin_unlock(&mp->m_sb_lock);
1132		if (log_sb)
1133			xfs_log_sb(tp);
1134	}
1135
1136	error = xfs_trans_commit(tp);
1137	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1138	return error;
1139
1140trans_cancel:
1141	xfs_trans_cancel(tp);
1142	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1143	return error;
1144}
1145
1146/*
1147 * Internal and external extent tree search functions.
1148 */
1149
1150struct xfs_iread_state {
1151	struct xfs_iext_cursor	icur;
1152	xfs_extnum_t		loaded;
1153};
1154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1155/* Stuff every bmbt record from this block into the incore extent map. */
1156static int
1157xfs_iread_bmbt_block(
1158	struct xfs_btree_cur	*cur,
1159	int			level,
1160	void			*priv)
1161{
1162	struct xfs_iread_state	*ir = priv;
1163	struct xfs_mount	*mp = cur->bc_mp;
1164	struct xfs_inode	*ip = cur->bc_ino.ip;
1165	struct xfs_btree_block	*block;
1166	struct xfs_buf		*bp;
1167	struct xfs_bmbt_rec	*frp;
1168	xfs_extnum_t		num_recs;
1169	xfs_extnum_t		j;
1170	int			whichfork = cur->bc_ino.whichfork;
1171	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1172
1173	block = xfs_btree_get_block(cur, level, &bp);
1174
1175	/* Abort if we find more records than nextents. */
1176	num_recs = xfs_btree_get_numrecs(block);
1177	if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
1178		xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
1179				(unsigned long long)ip->i_ino);
1180		xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
1181				sizeof(*block), __this_address);
 
1182		return -EFSCORRUPTED;
1183	}
1184
1185	/* Copy records into the incore cache. */
1186	frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1187	for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
1188		struct xfs_bmbt_irec	new;
1189		xfs_failaddr_t		fa;
1190
1191		xfs_bmbt_disk_get_all(frp, &new);
1192		fa = xfs_bmap_validate_extent(ip, whichfork, &new);
1193		if (fa) {
1194			xfs_inode_verifier_error(ip, -EFSCORRUPTED,
1195					"xfs_iread_extents(2)", frp,
1196					sizeof(*frp), fa);
1197			return -EFSCORRUPTED;
 
 
1198		}
1199		xfs_iext_insert(ip, &ir->icur, &new,
1200				xfs_bmap_fork_to_state(whichfork));
1201		trace_xfs_read_extent(ip, &ir->icur,
1202				xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
1203		xfs_iext_next(ifp, &ir->icur);
1204	}
1205
1206	return 0;
1207}
1208
1209/*
1210 * Read in extents from a btree-format inode.
1211 */
1212int
1213xfs_iread_extents(
1214	struct xfs_trans	*tp,
1215	struct xfs_inode	*ip,
1216	int			whichfork)
1217{
1218	struct xfs_iread_state	ir;
1219	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1220	struct xfs_mount	*mp = ip->i_mount;
1221	struct xfs_btree_cur	*cur;
1222	int			error;
1223
1224	if (!xfs_need_iread_extents(ifp))
1225		return 0;
1226
1227	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1228
1229	ir.loaded = 0;
1230	xfs_iext_first(ifp, &ir.icur);
1231	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
1232	error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
1233			XFS_BTREE_VISIT_RECORDS, &ir);
1234	xfs_btree_del_cursor(cur, error);
1235	if (error)
1236		goto out;
1237
1238	if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
 
1239		error = -EFSCORRUPTED;
1240		goto out;
1241	}
1242	ASSERT(ir.loaded == xfs_iext_count(ifp));
 
 
 
 
 
 
1243	return 0;
1244out:
 
 
1245	xfs_iext_destroy(ifp);
1246	return error;
1247}
1248
1249/*
1250 * Returns the relative block number of the first unused block(s) in the given
1251 * fork with at least "len" logically contiguous blocks free.  This is the
1252 * lowest-address hole if the fork has holes, else the first block past the end
1253 * of fork.  Return 0 if the fork is currently local (in-inode).
1254 */
1255int						/* error */
1256xfs_bmap_first_unused(
1257	struct xfs_trans	*tp,		/* transaction pointer */
1258	struct xfs_inode	*ip,		/* incore inode */
1259	xfs_extlen_t		len,		/* size of hole to find */
1260	xfs_fileoff_t		*first_unused,	/* unused block */
1261	int			whichfork)	/* data or attr fork */
1262{
1263	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1264	struct xfs_bmbt_irec	got;
1265	struct xfs_iext_cursor	icur;
1266	xfs_fileoff_t		lastaddr = 0;
1267	xfs_fileoff_t		lowest, max;
1268	int			error;
1269
1270	if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
1271		*first_unused = 0;
1272		return 0;
1273	}
1274
1275	ASSERT(xfs_ifork_has_extents(ifp));
1276
1277	error = xfs_iread_extents(tp, ip, whichfork);
1278	if (error)
1279		return error;
1280
1281	lowest = max = *first_unused;
1282	for_each_xfs_iext(ifp, &icur, &got) {
1283		/*
1284		 * See if the hole before this extent will work.
1285		 */
1286		if (got.br_startoff >= lowest + len &&
1287		    got.br_startoff - max >= len)
1288			break;
1289		lastaddr = got.br_startoff + got.br_blockcount;
1290		max = XFS_FILEOFF_MAX(lastaddr, lowest);
1291	}
1292
1293	*first_unused = max;
1294	return 0;
1295}
1296
1297/*
1298 * Returns the file-relative block number of the last block - 1 before
1299 * last_block (input value) in the file.
1300 * This is not based on i_size, it is based on the extent records.
1301 * Returns 0 for local files, as they do not have extent records.
1302 */
1303int						/* error */
1304xfs_bmap_last_before(
1305	struct xfs_trans	*tp,		/* transaction pointer */
1306	struct xfs_inode	*ip,		/* incore inode */
1307	xfs_fileoff_t		*last_block,	/* last block */
1308	int			whichfork)	/* data or attr fork */
1309{
1310	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1311	struct xfs_bmbt_irec	got;
1312	struct xfs_iext_cursor	icur;
1313	int			error;
1314
1315	switch (ifp->if_format) {
1316	case XFS_DINODE_FMT_LOCAL:
1317		*last_block = 0;
1318		return 0;
1319	case XFS_DINODE_FMT_BTREE:
1320	case XFS_DINODE_FMT_EXTENTS:
1321		break;
1322	default:
1323		ASSERT(0);
 
1324		return -EFSCORRUPTED;
1325	}
1326
1327	error = xfs_iread_extents(tp, ip, whichfork);
1328	if (error)
1329		return error;
1330
1331	if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1332		*last_block = 0;
1333	return 0;
1334}
1335
1336int
1337xfs_bmap_last_extent(
1338	struct xfs_trans	*tp,
1339	struct xfs_inode	*ip,
1340	int			whichfork,
1341	struct xfs_bmbt_irec	*rec,
1342	int			*is_empty)
1343{
1344	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1345	struct xfs_iext_cursor	icur;
1346	int			error;
1347
1348	error = xfs_iread_extents(tp, ip, whichfork);
1349	if (error)
1350		return error;
1351
1352	xfs_iext_last(ifp, &icur);
1353	if (!xfs_iext_get_extent(ifp, &icur, rec))
1354		*is_empty = 1;
1355	else
1356		*is_empty = 0;
1357	return 0;
1358}
1359
1360/*
1361 * Check the last inode extent to determine whether this allocation will result
1362 * in blocks being allocated at the end of the file. When we allocate new data
1363 * blocks at the end of the file which do not start at the previous data block,
1364 * we will try to align the new blocks at stripe unit boundaries.
1365 *
1366 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1367 * at, or past the EOF.
1368 */
1369STATIC int
1370xfs_bmap_isaeof(
1371	struct xfs_bmalloca	*bma,
1372	int			whichfork)
1373{
1374	struct xfs_bmbt_irec	rec;
1375	int			is_empty;
1376	int			error;
1377
1378	bma->aeof = false;
1379	error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1380				     &is_empty);
1381	if (error)
1382		return error;
1383
1384	if (is_empty) {
1385		bma->aeof = true;
1386		return 0;
1387	}
1388
1389	/*
1390	 * Check if we are allocation or past the last extent, or at least into
1391	 * the last delayed allocated extent.
1392	 */
1393	bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1394		(bma->offset >= rec.br_startoff &&
1395		 isnullstartblock(rec.br_startblock));
1396	return 0;
1397}
1398
1399/*
1400 * Returns the file-relative block number of the first block past eof in
1401 * the file.  This is not based on i_size, it is based on the extent records.
1402 * Returns 0 for local files, as they do not have extent records.
1403 */
1404int
1405xfs_bmap_last_offset(
1406	struct xfs_inode	*ip,
1407	xfs_fileoff_t		*last_block,
1408	int			whichfork)
1409{
1410	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1411	struct xfs_bmbt_irec	rec;
1412	int			is_empty;
1413	int			error;
1414
1415	*last_block = 0;
1416
1417	if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
1418		return 0;
1419
1420	if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp)))
 
1421		return -EFSCORRUPTED;
 
1422
1423	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1424	if (error || is_empty)
1425		return error;
1426
1427	*last_block = rec.br_startoff + rec.br_blockcount;
1428	return 0;
1429}
1430
1431/*
1432 * Extent tree manipulation functions used during allocation.
1433 */
1434
1435/*
1436 * Convert a delayed allocation to a real allocation.
1437 */
1438STATIC int				/* error */
1439xfs_bmap_add_extent_delay_real(
1440	struct xfs_bmalloca	*bma,
1441	int			whichfork)
1442{
1443	struct xfs_mount	*mp = bma->ip->i_mount;
1444	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1445	struct xfs_bmbt_irec	*new = &bma->got;
1446	int			error;	/* error return value */
1447	int			i;	/* temp state */
1448	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
1449	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
1450					/* left is 0, right is 1, prev is 2 */
1451	int			rval=0;	/* return value (logging flags) */
1452	int			state = xfs_bmap_fork_to_state(whichfork);
1453	xfs_filblks_t		da_new; /* new count del alloc blocks used */
1454	xfs_filblks_t		da_old; /* old count del alloc blocks used */
1455	xfs_filblks_t		temp=0;	/* value for da_new calculations */
1456	int			tmp_rval;	/* partial logging flags */
1457	struct xfs_bmbt_irec	old;
1458
1459	ASSERT(whichfork != XFS_ATTR_FORK);
1460	ASSERT(!isnullstartblock(new->br_startblock));
1461	ASSERT(!bma->cur ||
1462	       (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
1463
1464	XFS_STATS_INC(mp, xs_add_exlist);
1465
1466#define	LEFT		r[0]
1467#define	RIGHT		r[1]
1468#define	PREV		r[2]
1469
1470	/*
1471	 * Set up a bunch of variables to make the tests simpler.
1472	 */
1473	xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1474	new_endoff = new->br_startoff + new->br_blockcount;
1475	ASSERT(isnullstartblock(PREV.br_startblock));
1476	ASSERT(PREV.br_startoff <= new->br_startoff);
1477	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1478
1479	da_old = startblockval(PREV.br_startblock);
1480	da_new = 0;
1481
1482	/*
1483	 * Set flags determining what part of the previous delayed allocation
1484	 * extent is being replaced by a real allocation.
1485	 */
1486	if (PREV.br_startoff == new->br_startoff)
1487		state |= BMAP_LEFT_FILLING;
1488	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1489		state |= BMAP_RIGHT_FILLING;
1490
1491	/*
1492	 * Check and set flags if this segment has a left neighbor.
1493	 * Don't set contiguous if the combined extent would be too large.
1494	 */
1495	if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1496		state |= BMAP_LEFT_VALID;
1497		if (isnullstartblock(LEFT.br_startblock))
1498			state |= BMAP_LEFT_DELAY;
1499	}
1500
1501	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1502	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1503	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1504	    LEFT.br_state == new->br_state &&
1505	    LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1506		state |= BMAP_LEFT_CONTIG;
1507
1508	/*
1509	 * Check and set flags if this segment has a right neighbor.
1510	 * Don't set contiguous if the combined extent would be too large.
1511	 * Also check for all-three-contiguous being too large.
1512	 */
1513	if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1514		state |= BMAP_RIGHT_VALID;
1515		if (isnullstartblock(RIGHT.br_startblock))
1516			state |= BMAP_RIGHT_DELAY;
1517	}
1518
1519	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1520	    new_endoff == RIGHT.br_startoff &&
1521	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1522	    new->br_state == RIGHT.br_state &&
1523	    new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1524	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1525		       BMAP_RIGHT_FILLING)) !=
1526		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1527		       BMAP_RIGHT_FILLING) ||
1528	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1529			<= MAXEXTLEN))
1530		state |= BMAP_RIGHT_CONTIG;
1531
1532	error = 0;
1533	/*
1534	 * Switch out based on the FILLING and CONTIG state bits.
1535	 */
1536	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1537			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1538	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1539	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1540		/*
1541		 * Filling in all of a previously delayed allocation extent.
1542		 * The left and right neighbors are both contiguous with new.
1543		 */
1544		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1545
1546		xfs_iext_remove(bma->ip, &bma->icur, state);
1547		xfs_iext_remove(bma->ip, &bma->icur, state);
1548		xfs_iext_prev(ifp, &bma->icur);
1549		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1550		ifp->if_nextents--;
1551
1552		if (bma->cur == NULL)
1553			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1554		else {
1555			rval = XFS_ILOG_CORE;
1556			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1557			if (error)
1558				goto done;
1559			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
1560				error = -EFSCORRUPTED;
1561				goto done;
1562			}
1563			error = xfs_btree_delete(bma->cur, &i);
1564			if (error)
1565				goto done;
1566			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
1567				error = -EFSCORRUPTED;
1568				goto done;
1569			}
1570			error = xfs_btree_decrement(bma->cur, 0, &i);
1571			if (error)
1572				goto done;
1573			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
1574				error = -EFSCORRUPTED;
1575				goto done;
1576			}
1577			error = xfs_bmbt_update(bma->cur, &LEFT);
1578			if (error)
1579				goto done;
1580		}
1581		break;
1582
1583	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1584		/*
1585		 * Filling in all of a previously delayed allocation extent.
1586		 * The left neighbor is contiguous, the right is not.
1587		 */
1588		old = LEFT;
1589		LEFT.br_blockcount += PREV.br_blockcount;
1590
1591		xfs_iext_remove(bma->ip, &bma->icur, state);
1592		xfs_iext_prev(ifp, &bma->icur);
1593		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1594
1595		if (bma->cur == NULL)
1596			rval = XFS_ILOG_DEXT;
1597		else {
1598			rval = 0;
1599			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1600			if (error)
1601				goto done;
1602			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
1603				error = -EFSCORRUPTED;
1604				goto done;
1605			}
1606			error = xfs_bmbt_update(bma->cur, &LEFT);
1607			if (error)
1608				goto done;
1609		}
1610		break;
1611
1612	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1613		/*
1614		 * Filling in all of a previously delayed allocation extent.
1615		 * The right neighbor is contiguous, the left is not. Take care
1616		 * with delay -> unwritten extent allocation here because the
1617		 * delalloc record we are overwriting is always written.
1618		 */
1619		PREV.br_startblock = new->br_startblock;
1620		PREV.br_blockcount += RIGHT.br_blockcount;
1621		PREV.br_state = new->br_state;
1622
1623		xfs_iext_next(ifp, &bma->icur);
1624		xfs_iext_remove(bma->ip, &bma->icur, state);
1625		xfs_iext_prev(ifp, &bma->icur);
1626		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1627
1628		if (bma->cur == NULL)
1629			rval = XFS_ILOG_DEXT;
1630		else {
1631			rval = 0;
1632			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1633			if (error)
1634				goto done;
1635			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
1636				error = -EFSCORRUPTED;
1637				goto done;
1638			}
1639			error = xfs_bmbt_update(bma->cur, &PREV);
1640			if (error)
1641				goto done;
1642		}
1643		break;
1644
1645	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1646		/*
1647		 * Filling in all of a previously delayed allocation extent.
1648		 * Neither the left nor right neighbors are contiguous with
1649		 * the new one.
1650		 */
1651		PREV.br_startblock = new->br_startblock;
1652		PREV.br_state = new->br_state;
1653		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1654		ifp->if_nextents++;
1655
1656		if (bma->cur == NULL)
1657			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1658		else {
1659			rval = XFS_ILOG_CORE;
1660			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1661			if (error)
1662				goto done;
1663			if (XFS_IS_CORRUPT(mp, i != 0)) {
 
1664				error = -EFSCORRUPTED;
1665				goto done;
1666			}
1667			error = xfs_btree_insert(bma->cur, &i);
1668			if (error)
1669				goto done;
1670			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
1671				error = -EFSCORRUPTED;
1672				goto done;
1673			}
1674		}
1675		break;
1676
1677	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1678		/*
1679		 * Filling in the first part of a previous delayed allocation.
1680		 * The left neighbor is contiguous.
1681		 */
1682		old = LEFT;
1683		temp = PREV.br_blockcount - new->br_blockcount;
1684		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1685				startblockval(PREV.br_startblock));
1686
1687		LEFT.br_blockcount += new->br_blockcount;
1688
1689		PREV.br_blockcount = temp;
1690		PREV.br_startoff += new->br_blockcount;
1691		PREV.br_startblock = nullstartblock(da_new);
1692
1693		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1694		xfs_iext_prev(ifp, &bma->icur);
1695		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1696
1697		if (bma->cur == NULL)
1698			rval = XFS_ILOG_DEXT;
1699		else {
1700			rval = 0;
1701			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1702			if (error)
1703				goto done;
1704			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
1705				error = -EFSCORRUPTED;
1706				goto done;
1707			}
1708			error = xfs_bmbt_update(bma->cur, &LEFT);
1709			if (error)
1710				goto done;
1711		}
1712		break;
1713
1714	case BMAP_LEFT_FILLING:
1715		/*
1716		 * Filling in the first part of a previous delayed allocation.
1717		 * The left neighbor is not contiguous.
1718		 */
1719		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1720		ifp->if_nextents++;
1721
1722		if (bma->cur == NULL)
1723			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1724		else {
1725			rval = XFS_ILOG_CORE;
1726			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1727			if (error)
1728				goto done;
1729			if (XFS_IS_CORRUPT(mp, i != 0)) {
 
1730				error = -EFSCORRUPTED;
1731				goto done;
1732			}
1733			error = xfs_btree_insert(bma->cur, &i);
1734			if (error)
1735				goto done;
1736			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
1737				error = -EFSCORRUPTED;
1738				goto done;
1739			}
1740		}
1741
1742		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1743			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1744					&bma->cur, 1, &tmp_rval, whichfork);
1745			rval |= tmp_rval;
1746			if (error)
1747				goto done;
1748		}
1749
1750		temp = PREV.br_blockcount - new->br_blockcount;
1751		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1752			startblockval(PREV.br_startblock) -
1753			(bma->cur ? bma->cur->bc_ino.allocated : 0));
1754
1755		PREV.br_startoff = new_endoff;
1756		PREV.br_blockcount = temp;
1757		PREV.br_startblock = nullstartblock(da_new);
1758		xfs_iext_next(ifp, &bma->icur);
1759		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1760		xfs_iext_prev(ifp, &bma->icur);
1761		break;
1762
1763	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1764		/*
1765		 * Filling in the last part of a previous delayed allocation.
1766		 * The right neighbor is contiguous with the new allocation.
1767		 */
1768		old = RIGHT;
1769		RIGHT.br_startoff = new->br_startoff;
1770		RIGHT.br_startblock = new->br_startblock;
1771		RIGHT.br_blockcount += new->br_blockcount;
1772
1773		if (bma->cur == NULL)
1774			rval = XFS_ILOG_DEXT;
1775		else {
1776			rval = 0;
1777			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1778			if (error)
1779				goto done;
1780			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
1781				error = -EFSCORRUPTED;
1782				goto done;
1783			}
1784			error = xfs_bmbt_update(bma->cur, &RIGHT);
1785			if (error)
1786				goto done;
1787		}
1788
1789		temp = PREV.br_blockcount - new->br_blockcount;
1790		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1791			startblockval(PREV.br_startblock));
1792
1793		PREV.br_blockcount = temp;
1794		PREV.br_startblock = nullstartblock(da_new);
1795
1796		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1797		xfs_iext_next(ifp, &bma->icur);
1798		xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1799		break;
1800
1801	case BMAP_RIGHT_FILLING:
1802		/*
1803		 * Filling in the last part of a previous delayed allocation.
1804		 * The right neighbor is not contiguous.
1805		 */
1806		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1807		ifp->if_nextents++;
1808
1809		if (bma->cur == NULL)
1810			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1811		else {
1812			rval = XFS_ILOG_CORE;
1813			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1814			if (error)
1815				goto done;
1816			if (XFS_IS_CORRUPT(mp, i != 0)) {
 
1817				error = -EFSCORRUPTED;
1818				goto done;
1819			}
1820			error = xfs_btree_insert(bma->cur, &i);
1821			if (error)
1822				goto done;
1823			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
1824				error = -EFSCORRUPTED;
1825				goto done;
1826			}
1827		}
1828
1829		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1830			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1831				&bma->cur, 1, &tmp_rval, whichfork);
1832			rval |= tmp_rval;
1833			if (error)
1834				goto done;
1835		}
1836
1837		temp = PREV.br_blockcount - new->br_blockcount;
1838		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1839			startblockval(PREV.br_startblock) -
1840			(bma->cur ? bma->cur->bc_ino.allocated : 0));
1841
1842		PREV.br_startblock = nullstartblock(da_new);
1843		PREV.br_blockcount = temp;
1844		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1845		xfs_iext_next(ifp, &bma->icur);
1846		break;
1847
1848	case 0:
1849		/*
1850		 * Filling in the middle part of a previous delayed allocation.
1851		 * Contiguity is impossible here.
1852		 * This case is avoided almost all the time.
1853		 *
1854		 * We start with a delayed allocation:
1855		 *
1856		 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1857		 *  PREV @ idx
1858		 *
1859	         * and we are allocating:
1860		 *                     +rrrrrrrrrrrrrrrrr+
1861		 *			      new
1862		 *
1863		 * and we set it up for insertion as:
1864		 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1865		 *                            new
1866		 *  PREV @ idx          LEFT              RIGHT
1867		 *                      inserted at idx + 1
1868		 */
1869		old = PREV;
1870
1871		/* LEFT is the new middle */
1872		LEFT = *new;
1873
1874		/* RIGHT is the new right */
1875		RIGHT.br_state = PREV.br_state;
1876		RIGHT.br_startoff = new_endoff;
1877		RIGHT.br_blockcount =
1878			PREV.br_startoff + PREV.br_blockcount - new_endoff;
1879		RIGHT.br_startblock =
1880			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1881					RIGHT.br_blockcount));
1882
1883		/* truncate PREV */
1884		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1885		PREV.br_startblock =
1886			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1887					PREV.br_blockcount));
1888		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1889
1890		xfs_iext_next(ifp, &bma->icur);
1891		xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1892		xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
1893		ifp->if_nextents++;
1894
1895		if (bma->cur == NULL)
1896			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1897		else {
1898			rval = XFS_ILOG_CORE;
1899			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1900			if (error)
1901				goto done;
1902			if (XFS_IS_CORRUPT(mp, i != 0)) {
 
1903				error = -EFSCORRUPTED;
1904				goto done;
1905			}
1906			error = xfs_btree_insert(bma->cur, &i);
1907			if (error)
1908				goto done;
1909			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
1910				error = -EFSCORRUPTED;
1911				goto done;
1912			}
1913		}
1914
1915		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1916			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1917					&bma->cur, 1, &tmp_rval, whichfork);
1918			rval |= tmp_rval;
1919			if (error)
1920				goto done;
1921		}
1922
1923		da_new = startblockval(PREV.br_startblock) +
1924			 startblockval(RIGHT.br_startblock);
1925		break;
1926
1927	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1928	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1929	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1930	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1931	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1932	case BMAP_LEFT_CONTIG:
1933	case BMAP_RIGHT_CONTIG:
1934		/*
1935		 * These cases are all impossible.
1936		 */
1937		ASSERT(0);
1938	}
1939
1940	/* add reverse mapping unless caller opted out */
1941	if (!(bma->flags & XFS_BMAPI_NORMAP))
1942		xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
1943
1944	/* convert to a btree if necessary */
1945	if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1946		int	tmp_logflags;	/* partial log flag return val */
1947
1948		ASSERT(bma->cur == NULL);
1949		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1950				&bma->cur, da_old > 0, &tmp_logflags,
1951				whichfork);
1952		bma->logflags |= tmp_logflags;
1953		if (error)
1954			goto done;
1955	}
1956
1957	if (da_new != da_old)
1958		xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
1959
1960	if (bma->cur) {
1961		da_new += bma->cur->bc_ino.allocated;
1962		bma->cur->bc_ino.allocated = 0;
1963	}
1964
1965	/* adjust for changes in reserved delayed indirect blocks */
1966	if (da_new != da_old) {
1967		ASSERT(state == 0 || da_new < da_old);
1968		error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
1969				false);
1970	}
1971
1972	xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
1973done:
1974	if (whichfork != XFS_COW_FORK)
1975		bma->logflags |= rval;
1976	return error;
1977#undef	LEFT
1978#undef	RIGHT
1979#undef	PREV
1980}
1981
1982/*
1983 * Convert an unwritten allocation to a real allocation or vice versa.
1984 */
1985int					/* error */
1986xfs_bmap_add_extent_unwritten_real(
1987	struct xfs_trans	*tp,
1988	xfs_inode_t		*ip,	/* incore inode pointer */
1989	int			whichfork,
1990	struct xfs_iext_cursor	*icur,
1991	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
1992	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
1993	int			*logflagsp) /* inode logging flags */
1994{
1995	xfs_btree_cur_t		*cur;	/* btree cursor */
1996	int			error;	/* error return value */
1997	int			i;	/* temp state */
1998	struct xfs_ifork	*ifp;	/* inode fork pointer */
1999	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
2000	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
2001					/* left is 0, right is 1, prev is 2 */
2002	int			rval=0;	/* return value (logging flags) */
2003	int			state = xfs_bmap_fork_to_state(whichfork);
2004	struct xfs_mount	*mp = ip->i_mount;
2005	struct xfs_bmbt_irec	old;
2006
2007	*logflagsp = 0;
2008
2009	cur = *curp;
2010	ifp = XFS_IFORK_PTR(ip, whichfork);
2011
2012	ASSERT(!isnullstartblock(new->br_startblock));
2013
2014	XFS_STATS_INC(mp, xs_add_exlist);
2015
2016#define	LEFT		r[0]
2017#define	RIGHT		r[1]
2018#define	PREV		r[2]
2019
2020	/*
2021	 * Set up a bunch of variables to make the tests simpler.
2022	 */
2023	error = 0;
2024	xfs_iext_get_extent(ifp, icur, &PREV);
2025	ASSERT(new->br_state != PREV.br_state);
2026	new_endoff = new->br_startoff + new->br_blockcount;
2027	ASSERT(PREV.br_startoff <= new->br_startoff);
2028	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2029
2030	/*
2031	 * Set flags determining what part of the previous oldext allocation
2032	 * extent is being replaced by a newext allocation.
2033	 */
2034	if (PREV.br_startoff == new->br_startoff)
2035		state |= BMAP_LEFT_FILLING;
2036	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2037		state |= BMAP_RIGHT_FILLING;
2038
2039	/*
2040	 * Check and set flags if this segment has a left neighbor.
2041	 * Don't set contiguous if the combined extent would be too large.
2042	 */
2043	if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2044		state |= BMAP_LEFT_VALID;
2045		if (isnullstartblock(LEFT.br_startblock))
2046			state |= BMAP_LEFT_DELAY;
2047	}
2048
2049	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2050	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2051	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2052	    LEFT.br_state == new->br_state &&
2053	    LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2054		state |= BMAP_LEFT_CONTIG;
2055
2056	/*
2057	 * Check and set flags if this segment has a right neighbor.
2058	 * Don't set contiguous if the combined extent would be too large.
2059	 * Also check for all-three-contiguous being too large.
2060	 */
2061	if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2062		state |= BMAP_RIGHT_VALID;
2063		if (isnullstartblock(RIGHT.br_startblock))
2064			state |= BMAP_RIGHT_DELAY;
2065	}
2066
2067	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2068	    new_endoff == RIGHT.br_startoff &&
2069	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2070	    new->br_state == RIGHT.br_state &&
2071	    new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2072	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2073		       BMAP_RIGHT_FILLING)) !=
2074		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2075		       BMAP_RIGHT_FILLING) ||
2076	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2077			<= MAXEXTLEN))
2078		state |= BMAP_RIGHT_CONTIG;
2079
2080	/*
2081	 * Switch out based on the FILLING and CONTIG state bits.
2082	 */
2083	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2084			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2085	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2086	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2087		/*
2088		 * Setting all of a previous oldext extent to newext.
2089		 * The left and right neighbors are both contiguous with new.
2090		 */
2091		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2092
2093		xfs_iext_remove(ip, icur, state);
2094		xfs_iext_remove(ip, icur, state);
2095		xfs_iext_prev(ifp, icur);
2096		xfs_iext_update_extent(ip, state, icur, &LEFT);
2097		ifp->if_nextents -= 2;
2098		if (cur == NULL)
2099			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2100		else {
2101			rval = XFS_ILOG_CORE;
2102			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2103			if (error)
2104				goto done;
2105			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2106				error = -EFSCORRUPTED;
2107				goto done;
2108			}
2109			if ((error = xfs_btree_delete(cur, &i)))
2110				goto done;
2111			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2112				error = -EFSCORRUPTED;
2113				goto done;
2114			}
2115			if ((error = xfs_btree_decrement(cur, 0, &i)))
2116				goto done;
2117			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2118				error = -EFSCORRUPTED;
2119				goto done;
2120			}
2121			if ((error = xfs_btree_delete(cur, &i)))
2122				goto done;
2123			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2124				error = -EFSCORRUPTED;
2125				goto done;
2126			}
2127			if ((error = xfs_btree_decrement(cur, 0, &i)))
2128				goto done;
2129			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2130				error = -EFSCORRUPTED;
2131				goto done;
2132			}
2133			error = xfs_bmbt_update(cur, &LEFT);
2134			if (error)
2135				goto done;
2136		}
2137		break;
2138
2139	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2140		/*
2141		 * Setting all of a previous oldext extent to newext.
2142		 * The left neighbor is contiguous, the right is not.
2143		 */
2144		LEFT.br_blockcount += PREV.br_blockcount;
2145
2146		xfs_iext_remove(ip, icur, state);
2147		xfs_iext_prev(ifp, icur);
2148		xfs_iext_update_extent(ip, state, icur, &LEFT);
2149		ifp->if_nextents--;
2150		if (cur == NULL)
2151			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2152		else {
2153			rval = XFS_ILOG_CORE;
2154			error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2155			if (error)
2156				goto done;
2157			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2158				error = -EFSCORRUPTED;
2159				goto done;
2160			}
2161			if ((error = xfs_btree_delete(cur, &i)))
2162				goto done;
2163			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2164				error = -EFSCORRUPTED;
2165				goto done;
2166			}
2167			if ((error = xfs_btree_decrement(cur, 0, &i)))
2168				goto done;
2169			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2170				error = -EFSCORRUPTED;
2171				goto done;
2172			}
2173			error = xfs_bmbt_update(cur, &LEFT);
2174			if (error)
2175				goto done;
2176		}
2177		break;
2178
2179	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2180		/*
2181		 * Setting all of a previous oldext extent to newext.
2182		 * The right neighbor is contiguous, the left is not.
2183		 */
2184		PREV.br_blockcount += RIGHT.br_blockcount;
2185		PREV.br_state = new->br_state;
2186
2187		xfs_iext_next(ifp, icur);
2188		xfs_iext_remove(ip, icur, state);
2189		xfs_iext_prev(ifp, icur);
2190		xfs_iext_update_extent(ip, state, icur, &PREV);
2191		ifp->if_nextents--;
2192
2193		if (cur == NULL)
2194			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2195		else {
2196			rval = XFS_ILOG_CORE;
2197			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2198			if (error)
2199				goto done;
2200			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2201				error = -EFSCORRUPTED;
2202				goto done;
2203			}
2204			if ((error = xfs_btree_delete(cur, &i)))
2205				goto done;
2206			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2207				error = -EFSCORRUPTED;
2208				goto done;
2209			}
2210			if ((error = xfs_btree_decrement(cur, 0, &i)))
2211				goto done;
2212			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2213				error = -EFSCORRUPTED;
2214				goto done;
2215			}
2216			error = xfs_bmbt_update(cur, &PREV);
2217			if (error)
2218				goto done;
2219		}
2220		break;
2221
2222	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2223		/*
2224		 * Setting all of a previous oldext extent to newext.
2225		 * Neither the left nor right neighbors are contiguous with
2226		 * the new one.
2227		 */
2228		PREV.br_state = new->br_state;
2229		xfs_iext_update_extent(ip, state, icur, &PREV);
2230
2231		if (cur == NULL)
2232			rval = XFS_ILOG_DEXT;
2233		else {
2234			rval = 0;
2235			error = xfs_bmbt_lookup_eq(cur, new, &i);
2236			if (error)
2237				goto done;
2238			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2239				error = -EFSCORRUPTED;
2240				goto done;
2241			}
2242			error = xfs_bmbt_update(cur, &PREV);
2243			if (error)
2244				goto done;
2245		}
2246		break;
2247
2248	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2249		/*
2250		 * Setting the first part of a previous oldext extent to newext.
2251		 * The left neighbor is contiguous.
2252		 */
2253		LEFT.br_blockcount += new->br_blockcount;
2254
2255		old = PREV;
2256		PREV.br_startoff += new->br_blockcount;
2257		PREV.br_startblock += new->br_blockcount;
2258		PREV.br_blockcount -= new->br_blockcount;
2259
2260		xfs_iext_update_extent(ip, state, icur, &PREV);
2261		xfs_iext_prev(ifp, icur);
2262		xfs_iext_update_extent(ip, state, icur, &LEFT);
2263
2264		if (cur == NULL)
2265			rval = XFS_ILOG_DEXT;
2266		else {
2267			rval = 0;
2268			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2269			if (error)
2270				goto done;
2271			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2272				error = -EFSCORRUPTED;
2273				goto done;
2274			}
2275			error = xfs_bmbt_update(cur, &PREV);
2276			if (error)
2277				goto done;
2278			error = xfs_btree_decrement(cur, 0, &i);
2279			if (error)
2280				goto done;
2281			error = xfs_bmbt_update(cur, &LEFT);
2282			if (error)
2283				goto done;
2284		}
2285		break;
2286
2287	case BMAP_LEFT_FILLING:
2288		/*
2289		 * Setting the first part of a previous oldext extent to newext.
2290		 * The left neighbor is not contiguous.
2291		 */
2292		old = PREV;
2293		PREV.br_startoff += new->br_blockcount;
2294		PREV.br_startblock += new->br_blockcount;
2295		PREV.br_blockcount -= new->br_blockcount;
2296
2297		xfs_iext_update_extent(ip, state, icur, &PREV);
2298		xfs_iext_insert(ip, icur, new, state);
2299		ifp->if_nextents++;
2300
2301		if (cur == NULL)
2302			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2303		else {
2304			rval = XFS_ILOG_CORE;
2305			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2306			if (error)
2307				goto done;
2308			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2309				error = -EFSCORRUPTED;
2310				goto done;
2311			}
2312			error = xfs_bmbt_update(cur, &PREV);
2313			if (error)
2314				goto done;
2315			cur->bc_rec.b = *new;
2316			if ((error = xfs_btree_insert(cur, &i)))
2317				goto done;
2318			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2319				error = -EFSCORRUPTED;
2320				goto done;
2321			}
2322		}
2323		break;
2324
2325	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2326		/*
2327		 * Setting the last part of a previous oldext extent to newext.
2328		 * The right neighbor is contiguous with the new allocation.
2329		 */
2330		old = PREV;
2331		PREV.br_blockcount -= new->br_blockcount;
2332
2333		RIGHT.br_startoff = new->br_startoff;
2334		RIGHT.br_startblock = new->br_startblock;
2335		RIGHT.br_blockcount += new->br_blockcount;
2336
2337		xfs_iext_update_extent(ip, state, icur, &PREV);
2338		xfs_iext_next(ifp, icur);
2339		xfs_iext_update_extent(ip, state, icur, &RIGHT);
2340
2341		if (cur == NULL)
2342			rval = XFS_ILOG_DEXT;
2343		else {
2344			rval = 0;
2345			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2346			if (error)
2347				goto done;
2348			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2349				error = -EFSCORRUPTED;
2350				goto done;
2351			}
2352			error = xfs_bmbt_update(cur, &PREV);
2353			if (error)
2354				goto done;
2355			error = xfs_btree_increment(cur, 0, &i);
2356			if (error)
2357				goto done;
2358			error = xfs_bmbt_update(cur, &RIGHT);
2359			if (error)
2360				goto done;
2361		}
2362		break;
2363
2364	case BMAP_RIGHT_FILLING:
2365		/*
2366		 * Setting the last part of a previous oldext extent to newext.
2367		 * The right neighbor is not contiguous.
2368		 */
2369		old = PREV;
2370		PREV.br_blockcount -= new->br_blockcount;
2371
2372		xfs_iext_update_extent(ip, state, icur, &PREV);
2373		xfs_iext_next(ifp, icur);
2374		xfs_iext_insert(ip, icur, new, state);
2375		ifp->if_nextents++;
2376
2377		if (cur == NULL)
2378			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2379		else {
2380			rval = XFS_ILOG_CORE;
2381			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2382			if (error)
2383				goto done;
2384			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2385				error = -EFSCORRUPTED;
2386				goto done;
2387			}
2388			error = xfs_bmbt_update(cur, &PREV);
2389			if (error)
2390				goto done;
2391			error = xfs_bmbt_lookup_eq(cur, new, &i);
2392			if (error)
2393				goto done;
2394			if (XFS_IS_CORRUPT(mp, i != 0)) {
 
2395				error = -EFSCORRUPTED;
2396				goto done;
2397			}
2398			if ((error = xfs_btree_insert(cur, &i)))
2399				goto done;
2400			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2401				error = -EFSCORRUPTED;
2402				goto done;
2403			}
2404		}
2405		break;
2406
2407	case 0:
2408		/*
2409		 * Setting the middle part of a previous oldext extent to
2410		 * newext.  Contiguity is impossible here.
2411		 * One extent becomes three extents.
2412		 */
2413		old = PREV;
2414		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2415
2416		r[0] = *new;
2417		r[1].br_startoff = new_endoff;
2418		r[1].br_blockcount =
2419			old.br_startoff + old.br_blockcount - new_endoff;
2420		r[1].br_startblock = new->br_startblock + new->br_blockcount;
2421		r[1].br_state = PREV.br_state;
2422
2423		xfs_iext_update_extent(ip, state, icur, &PREV);
2424		xfs_iext_next(ifp, icur);
2425		xfs_iext_insert(ip, icur, &r[1], state);
2426		xfs_iext_insert(ip, icur, &r[0], state);
2427		ifp->if_nextents += 2;
2428
2429		if (cur == NULL)
2430			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2431		else {
2432			rval = XFS_ILOG_CORE;
2433			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2434			if (error)
2435				goto done;
2436			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2437				error = -EFSCORRUPTED;
2438				goto done;
2439			}
2440			/* new right extent - oldext */
2441			error = xfs_bmbt_update(cur, &r[1]);
2442			if (error)
2443				goto done;
2444			/* new left extent - oldext */
2445			cur->bc_rec.b = PREV;
2446			if ((error = xfs_btree_insert(cur, &i)))
2447				goto done;
2448			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2449				error = -EFSCORRUPTED;
2450				goto done;
2451			}
2452			/*
2453			 * Reset the cursor to the position of the new extent
2454			 * we are about to insert as we can't trust it after
2455			 * the previous insert.
2456			 */
2457			error = xfs_bmbt_lookup_eq(cur, new, &i);
2458			if (error)
2459				goto done;
2460			if (XFS_IS_CORRUPT(mp, i != 0)) {
 
2461				error = -EFSCORRUPTED;
2462				goto done;
2463			}
2464			/* new middle extent - newext */
2465			if ((error = xfs_btree_insert(cur, &i)))
2466				goto done;
2467			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2468				error = -EFSCORRUPTED;
2469				goto done;
2470			}
2471		}
2472		break;
2473
2474	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2475	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2476	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2477	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2478	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2479	case BMAP_LEFT_CONTIG:
2480	case BMAP_RIGHT_CONTIG:
2481		/*
2482		 * These cases are all impossible.
2483		 */
2484		ASSERT(0);
2485	}
2486
2487	/* update reverse mappings */
2488	xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
2489
2490	/* convert to a btree if necessary */
2491	if (xfs_bmap_needs_btree(ip, whichfork)) {
2492		int	tmp_logflags;	/* partial log flag return val */
2493
2494		ASSERT(cur == NULL);
2495		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
2496				&tmp_logflags, whichfork);
2497		*logflagsp |= tmp_logflags;
2498		if (error)
2499			goto done;
2500	}
2501
2502	/* clear out the allocated field, done with it now in any case. */
2503	if (cur) {
2504		cur->bc_ino.allocated = 0;
2505		*curp = cur;
2506	}
2507
2508	xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2509done:
2510	*logflagsp |= rval;
2511	return error;
2512#undef	LEFT
2513#undef	RIGHT
2514#undef	PREV
2515}
2516
2517/*
2518 * Convert a hole to a delayed allocation.
2519 */
2520STATIC void
2521xfs_bmap_add_extent_hole_delay(
2522	xfs_inode_t		*ip,	/* incore inode pointer */
2523	int			whichfork,
2524	struct xfs_iext_cursor	*icur,
2525	xfs_bmbt_irec_t		*new)	/* new data to add to file extents */
2526{
2527	struct xfs_ifork	*ifp;	/* inode fork pointer */
2528	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2529	xfs_filblks_t		newlen=0;	/* new indirect size */
2530	xfs_filblks_t		oldlen=0;	/* old indirect size */
2531	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2532	int			state = xfs_bmap_fork_to_state(whichfork);
2533	xfs_filblks_t		temp;	 /* temp for indirect calculations */
2534
2535	ifp = XFS_IFORK_PTR(ip, whichfork);
2536	ASSERT(isnullstartblock(new->br_startblock));
2537
2538	/*
2539	 * Check and set flags if this segment has a left neighbor
2540	 */
2541	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2542		state |= BMAP_LEFT_VALID;
2543		if (isnullstartblock(left.br_startblock))
2544			state |= BMAP_LEFT_DELAY;
2545	}
2546
2547	/*
2548	 * Check and set flags if the current (right) segment exists.
2549	 * If it doesn't exist, we're converting the hole at end-of-file.
2550	 */
2551	if (xfs_iext_get_extent(ifp, icur, &right)) {
2552		state |= BMAP_RIGHT_VALID;
2553		if (isnullstartblock(right.br_startblock))
2554			state |= BMAP_RIGHT_DELAY;
2555	}
2556
2557	/*
2558	 * Set contiguity flags on the left and right neighbors.
2559	 * Don't let extents get too large, even if the pieces are contiguous.
2560	 */
2561	if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2562	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2563	    left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2564		state |= BMAP_LEFT_CONTIG;
2565
2566	if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2567	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2568	    new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2569	    (!(state & BMAP_LEFT_CONTIG) ||
2570	     (left.br_blockcount + new->br_blockcount +
2571	      right.br_blockcount <= MAXEXTLEN)))
2572		state |= BMAP_RIGHT_CONTIG;
2573
2574	/*
2575	 * Switch out based on the contiguity flags.
2576	 */
2577	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2578	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2579		/*
2580		 * New allocation is contiguous with delayed allocations
2581		 * on the left and on the right.
2582		 * Merge all three into a single extent record.
2583		 */
2584		temp = left.br_blockcount + new->br_blockcount +
2585			right.br_blockcount;
2586
2587		oldlen = startblockval(left.br_startblock) +
2588			startblockval(new->br_startblock) +
2589			startblockval(right.br_startblock);
2590		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2591					 oldlen);
2592		left.br_startblock = nullstartblock(newlen);
2593		left.br_blockcount = temp;
2594
2595		xfs_iext_remove(ip, icur, state);
2596		xfs_iext_prev(ifp, icur);
2597		xfs_iext_update_extent(ip, state, icur, &left);
2598		break;
2599
2600	case BMAP_LEFT_CONTIG:
2601		/*
2602		 * New allocation is contiguous with a delayed allocation
2603		 * on the left.
2604		 * Merge the new allocation with the left neighbor.
2605		 */
2606		temp = left.br_blockcount + new->br_blockcount;
2607
2608		oldlen = startblockval(left.br_startblock) +
2609			startblockval(new->br_startblock);
2610		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2611					 oldlen);
2612		left.br_blockcount = temp;
2613		left.br_startblock = nullstartblock(newlen);
2614
2615		xfs_iext_prev(ifp, icur);
2616		xfs_iext_update_extent(ip, state, icur, &left);
2617		break;
2618
2619	case BMAP_RIGHT_CONTIG:
2620		/*
2621		 * New allocation is contiguous with a delayed allocation
2622		 * on the right.
2623		 * Merge the new allocation with the right neighbor.
2624		 */
2625		temp = new->br_blockcount + right.br_blockcount;
2626		oldlen = startblockval(new->br_startblock) +
2627			startblockval(right.br_startblock);
2628		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2629					 oldlen);
2630		right.br_startoff = new->br_startoff;
2631		right.br_startblock = nullstartblock(newlen);
2632		right.br_blockcount = temp;
2633		xfs_iext_update_extent(ip, state, icur, &right);
2634		break;
2635
2636	case 0:
2637		/*
2638		 * New allocation is not contiguous with another
2639		 * delayed allocation.
2640		 * Insert a new entry.
2641		 */
2642		oldlen = newlen = 0;
2643		xfs_iext_insert(ip, icur, new, state);
2644		break;
2645	}
2646	if (oldlen != newlen) {
2647		ASSERT(oldlen > newlen);
2648		xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2649				 false);
2650		/*
2651		 * Nothing to do for disk quota accounting here.
2652		 */
2653		xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
2654	}
2655}
2656
2657/*
2658 * Convert a hole to a real allocation.
2659 */
2660STATIC int				/* error */
2661xfs_bmap_add_extent_hole_real(
2662	struct xfs_trans	*tp,
2663	struct xfs_inode	*ip,
2664	int			whichfork,
2665	struct xfs_iext_cursor	*icur,
2666	struct xfs_btree_cur	**curp,
2667	struct xfs_bmbt_irec	*new,
2668	int			*logflagsp,
2669	int			flags)
2670{
2671	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
2672	struct xfs_mount	*mp = ip->i_mount;
2673	struct xfs_btree_cur	*cur = *curp;
2674	int			error;	/* error return value */
2675	int			i;	/* temp state */
2676	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2677	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2678	int			rval=0;	/* return value (logging flags) */
2679	int			state = xfs_bmap_fork_to_state(whichfork);
2680	struct xfs_bmbt_irec	old;
2681
2682	ASSERT(!isnullstartblock(new->br_startblock));
2683	ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
2684
2685	XFS_STATS_INC(mp, xs_add_exlist);
2686
2687	/*
2688	 * Check and set flags if this segment has a left neighbor.
2689	 */
2690	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2691		state |= BMAP_LEFT_VALID;
2692		if (isnullstartblock(left.br_startblock))
2693			state |= BMAP_LEFT_DELAY;
2694	}
2695
2696	/*
2697	 * Check and set flags if this segment has a current value.
2698	 * Not true if we're inserting into the "hole" at eof.
2699	 */
2700	if (xfs_iext_get_extent(ifp, icur, &right)) {
2701		state |= BMAP_RIGHT_VALID;
2702		if (isnullstartblock(right.br_startblock))
2703			state |= BMAP_RIGHT_DELAY;
2704	}
2705
2706	/*
2707	 * We're inserting a real allocation between "left" and "right".
2708	 * Set the contiguity flags.  Don't let extents get too large.
2709	 */
2710	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2711	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2712	    left.br_startblock + left.br_blockcount == new->br_startblock &&
2713	    left.br_state == new->br_state &&
2714	    left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2715		state |= BMAP_LEFT_CONTIG;
2716
2717	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2718	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2719	    new->br_startblock + new->br_blockcount == right.br_startblock &&
2720	    new->br_state == right.br_state &&
2721	    new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2722	    (!(state & BMAP_LEFT_CONTIG) ||
2723	     left.br_blockcount + new->br_blockcount +
2724	     right.br_blockcount <= MAXEXTLEN))
2725		state |= BMAP_RIGHT_CONTIG;
2726
2727	error = 0;
2728	/*
2729	 * Select which case we're in here, and implement it.
2730	 */
2731	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2732	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2733		/*
2734		 * New allocation is contiguous with real allocations on the
2735		 * left and on the right.
2736		 * Merge all three into a single extent record.
2737		 */
2738		left.br_blockcount += new->br_blockcount + right.br_blockcount;
2739
2740		xfs_iext_remove(ip, icur, state);
2741		xfs_iext_prev(ifp, icur);
2742		xfs_iext_update_extent(ip, state, icur, &left);
2743		ifp->if_nextents--;
2744
2745		if (cur == NULL) {
2746			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2747		} else {
2748			rval = XFS_ILOG_CORE;
2749			error = xfs_bmbt_lookup_eq(cur, &right, &i);
2750			if (error)
2751				goto done;
2752			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2753				error = -EFSCORRUPTED;
2754				goto done;
2755			}
2756			error = xfs_btree_delete(cur, &i);
2757			if (error)
2758				goto done;
2759			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2760				error = -EFSCORRUPTED;
2761				goto done;
2762			}
2763			error = xfs_btree_decrement(cur, 0, &i);
2764			if (error)
2765				goto done;
2766			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2767				error = -EFSCORRUPTED;
2768				goto done;
2769			}
2770			error = xfs_bmbt_update(cur, &left);
2771			if (error)
2772				goto done;
2773		}
2774		break;
2775
2776	case BMAP_LEFT_CONTIG:
2777		/*
2778		 * New allocation is contiguous with a real allocation
2779		 * on the left.
2780		 * Merge the new allocation with the left neighbor.
2781		 */
2782		old = left;
2783		left.br_blockcount += new->br_blockcount;
2784
2785		xfs_iext_prev(ifp, icur);
2786		xfs_iext_update_extent(ip, state, icur, &left);
2787
2788		if (cur == NULL) {
2789			rval = xfs_ilog_fext(whichfork);
2790		} else {
2791			rval = 0;
2792			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2793			if (error)
2794				goto done;
2795			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2796				error = -EFSCORRUPTED;
2797				goto done;
2798			}
2799			error = xfs_bmbt_update(cur, &left);
2800			if (error)
2801				goto done;
2802		}
2803		break;
2804
2805	case BMAP_RIGHT_CONTIG:
2806		/*
2807		 * New allocation is contiguous with a real allocation
2808		 * on the right.
2809		 * Merge the new allocation with the right neighbor.
2810		 */
2811		old = right;
2812
2813		right.br_startoff = new->br_startoff;
2814		right.br_startblock = new->br_startblock;
2815		right.br_blockcount += new->br_blockcount;
2816		xfs_iext_update_extent(ip, state, icur, &right);
2817
2818		if (cur == NULL) {
2819			rval = xfs_ilog_fext(whichfork);
2820		} else {
2821			rval = 0;
2822			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2823			if (error)
2824				goto done;
2825			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2826				error = -EFSCORRUPTED;
2827				goto done;
2828			}
2829			error = xfs_bmbt_update(cur, &right);
2830			if (error)
2831				goto done;
2832		}
2833		break;
2834
2835	case 0:
2836		/*
2837		 * New allocation is not contiguous with another
2838		 * real allocation.
2839		 * Insert a new entry.
2840		 */
2841		xfs_iext_insert(ip, icur, new, state);
2842		ifp->if_nextents++;
2843
2844		if (cur == NULL) {
2845			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2846		} else {
2847			rval = XFS_ILOG_CORE;
2848			error = xfs_bmbt_lookup_eq(cur, new, &i);
2849			if (error)
2850				goto done;
2851			if (XFS_IS_CORRUPT(mp, i != 0)) {
 
2852				error = -EFSCORRUPTED;
2853				goto done;
2854			}
2855			error = xfs_btree_insert(cur, &i);
2856			if (error)
2857				goto done;
2858			if (XFS_IS_CORRUPT(mp, i != 1)) {
 
2859				error = -EFSCORRUPTED;
2860				goto done;
2861			}
2862		}
2863		break;
2864	}
2865
2866	/* add reverse mapping unless caller opted out */
2867	if (!(flags & XFS_BMAPI_NORMAP))
2868		xfs_rmap_map_extent(tp, ip, whichfork, new);
2869
2870	/* convert to a btree if necessary */
2871	if (xfs_bmap_needs_btree(ip, whichfork)) {
2872		int	tmp_logflags;	/* partial log flag return val */
2873
2874		ASSERT(cur == NULL);
2875		error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
2876				&tmp_logflags, whichfork);
2877		*logflagsp |= tmp_logflags;
2878		cur = *curp;
2879		if (error)
2880			goto done;
2881	}
2882
2883	/* clear out the allocated field, done with it now in any case. */
2884	if (cur)
2885		cur->bc_ino.allocated = 0;
2886
2887	xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2888done:
2889	*logflagsp |= rval;
2890	return error;
2891}
2892
2893/*
2894 * Functions used in the extent read, allocate and remove paths
2895 */
2896
2897/*
2898 * Adjust the size of the new extent based on i_extsize and rt extsize.
2899 */
2900int
2901xfs_bmap_extsize_align(
2902	xfs_mount_t	*mp,
2903	xfs_bmbt_irec_t	*gotp,		/* next extent pointer */
2904	xfs_bmbt_irec_t	*prevp,		/* previous extent pointer */
2905	xfs_extlen_t	extsz,		/* align to this extent size */
2906	int		rt,		/* is this a realtime inode? */
2907	int		eof,		/* is extent at end-of-file? */
2908	int		delay,		/* creating delalloc extent? */
2909	int		convert,	/* overwriting unwritten extent? */
2910	xfs_fileoff_t	*offp,		/* in/out: aligned offset */
2911	xfs_extlen_t	*lenp)		/* in/out: aligned length */
2912{
2913	xfs_fileoff_t	orig_off;	/* original offset */
2914	xfs_extlen_t	orig_alen;	/* original length */
2915	xfs_fileoff_t	orig_end;	/* original off+len */
2916	xfs_fileoff_t	nexto;		/* next file offset */
2917	xfs_fileoff_t	prevo;		/* previous file offset */
2918	xfs_fileoff_t	align_off;	/* temp for offset */
2919	xfs_extlen_t	align_alen;	/* temp for length */
2920	xfs_extlen_t	temp;		/* temp for calculations */
2921
2922	if (convert)
2923		return 0;
2924
2925	orig_off = align_off = *offp;
2926	orig_alen = align_alen = *lenp;
2927	orig_end = orig_off + orig_alen;
2928
2929	/*
2930	 * If this request overlaps an existing extent, then don't
2931	 * attempt to perform any additional alignment.
2932	 */
2933	if (!delay && !eof &&
2934	    (orig_off >= gotp->br_startoff) &&
2935	    (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2936		return 0;
2937	}
2938
2939	/*
2940	 * If the file offset is unaligned vs. the extent size
2941	 * we need to align it.  This will be possible unless
2942	 * the file was previously written with a kernel that didn't
2943	 * perform this alignment, or if a truncate shot us in the
2944	 * foot.
2945	 */
2946	div_u64_rem(orig_off, extsz, &temp);
2947	if (temp) {
2948		align_alen += temp;
2949		align_off -= temp;
2950	}
2951
2952	/* Same adjustment for the end of the requested area. */
2953	temp = (align_alen % extsz);
2954	if (temp)
2955		align_alen += extsz - temp;
2956
2957	/*
2958	 * For large extent hint sizes, the aligned extent might be larger than
2959	 * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
2960	 * the length back under MAXEXTLEN. The outer allocation loops handle
2961	 * short allocation just fine, so it is safe to do this. We only want to
2962	 * do it when we are forced to, though, because it means more allocation
2963	 * operations are required.
2964	 */
2965	while (align_alen > MAXEXTLEN)
2966		align_alen -= extsz;
2967	ASSERT(align_alen <= MAXEXTLEN);
2968
2969	/*
2970	 * If the previous block overlaps with this proposed allocation
2971	 * then move the start forward without adjusting the length.
2972	 */
2973	if (prevp->br_startoff != NULLFILEOFF) {
2974		if (prevp->br_startblock == HOLESTARTBLOCK)
2975			prevo = prevp->br_startoff;
2976		else
2977			prevo = prevp->br_startoff + prevp->br_blockcount;
2978	} else
2979		prevo = 0;
2980	if (align_off != orig_off && align_off < prevo)
2981		align_off = prevo;
2982	/*
2983	 * If the next block overlaps with this proposed allocation
2984	 * then move the start back without adjusting the length,
2985	 * but not before offset 0.
2986	 * This may of course make the start overlap previous block,
2987	 * and if we hit the offset 0 limit then the next block
2988	 * can still overlap too.
2989	 */
2990	if (!eof && gotp->br_startoff != NULLFILEOFF) {
2991		if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
2992		    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
2993			nexto = gotp->br_startoff + gotp->br_blockcount;
2994		else
2995			nexto = gotp->br_startoff;
2996	} else
2997		nexto = NULLFILEOFF;
2998	if (!eof &&
2999	    align_off + align_alen != orig_end &&
3000	    align_off + align_alen > nexto)
3001		align_off = nexto > align_alen ? nexto - align_alen : 0;
3002	/*
3003	 * If we're now overlapping the next or previous extent that
3004	 * means we can't fit an extsz piece in this hole.  Just move
3005	 * the start forward to the first valid spot and set
3006	 * the length so we hit the end.
3007	 */
3008	if (align_off != orig_off && align_off < prevo)
3009		align_off = prevo;
3010	if (align_off + align_alen != orig_end &&
3011	    align_off + align_alen > nexto &&
3012	    nexto != NULLFILEOFF) {
3013		ASSERT(nexto > prevo);
3014		align_alen = nexto - align_off;
3015	}
3016
3017	/*
3018	 * If realtime, and the result isn't a multiple of the realtime
3019	 * extent size we need to remove blocks until it is.
3020	 */
3021	if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
3022		/*
3023		 * We're not covering the original request, or
3024		 * we won't be able to once we fix the length.
3025		 */
3026		if (orig_off < align_off ||
3027		    orig_end > align_off + align_alen ||
3028		    align_alen - temp < orig_alen)
3029			return -EINVAL;
3030		/*
3031		 * Try to fix it by moving the start up.
3032		 */
3033		if (align_off + temp <= orig_off) {
3034			align_alen -= temp;
3035			align_off += temp;
3036		}
3037		/*
3038		 * Try to fix it by moving the end in.
3039		 */
3040		else if (align_off + align_alen - temp >= orig_end)
3041			align_alen -= temp;
3042		/*
3043		 * Set the start to the minimum then trim the length.
3044		 */
3045		else {
3046			align_alen -= orig_off - align_off;
3047			align_off = orig_off;
3048			align_alen -= align_alen % mp->m_sb.sb_rextsize;
3049		}
3050		/*
3051		 * Result doesn't cover the request, fail it.
3052		 */
3053		if (orig_off < align_off || orig_end > align_off + align_alen)
3054			return -EINVAL;
3055	} else {
3056		ASSERT(orig_off >= align_off);
3057		/* see MAXEXTLEN handling above */
3058		ASSERT(orig_end <= align_off + align_alen ||
3059		       align_alen + extsz > MAXEXTLEN);
3060	}
3061
3062#ifdef DEBUG
3063	if (!eof && gotp->br_startoff != NULLFILEOFF)
3064		ASSERT(align_off + align_alen <= gotp->br_startoff);
3065	if (prevp->br_startoff != NULLFILEOFF)
3066		ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3067#endif
3068
3069	*lenp = align_alen;
3070	*offp = align_off;
3071	return 0;
3072}
3073
3074#define XFS_ALLOC_GAP_UNITS	4
3075
3076void
 
3077xfs_bmap_adjacent(
3078	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
3079{
3080	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
3081	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
3082	xfs_mount_t	*mp;		/* mount point structure */
3083	int		nullfb;		/* true if ap->firstblock isn't set */
3084	int		rt;		/* true if inode is realtime */
3085
3086#define	ISVALID(x,y)	\
3087	(rt ? \
3088		(x) < mp->m_sb.sb_rblocks : \
3089		XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3090		XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3091		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3092
3093	mp = ap->ip->i_mount;
3094	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3095	rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3096		(ap->datatype & XFS_ALLOC_USERDATA);
3097	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3098							ap->tp->t_firstblock);
3099	/*
3100	 * If allocating at eof, and there's a previous real block,
3101	 * try to use its last block as our starting point.
3102	 */
3103	if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3104	    !isnullstartblock(ap->prev.br_startblock) &&
3105	    ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3106		    ap->prev.br_startblock)) {
3107		ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3108		/*
3109		 * Adjust for the gap between prevp and us.
3110		 */
3111		adjust = ap->offset -
3112			(ap->prev.br_startoff + ap->prev.br_blockcount);
3113		if (adjust &&
3114		    ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3115			ap->blkno += adjust;
 
3116	}
3117	/*
3118	 * If not at eof, then compare the two neighbor blocks.
3119	 * Figure out whether either one gives us a good starting point,
3120	 * and pick the better one.
3121	 */
3122	else if (!ap->eof) {
3123		xfs_fsblock_t	gotbno;		/* right side block number */
3124		xfs_fsblock_t	gotdiff=0;	/* right side difference */
3125		xfs_fsblock_t	prevbno;	/* left side block number */
3126		xfs_fsblock_t	prevdiff=0;	/* left side difference */
3127
3128		/*
3129		 * If there's a previous (left) block, select a requested
3130		 * start block based on it.
3131		 */
3132		if (ap->prev.br_startoff != NULLFILEOFF &&
3133		    !isnullstartblock(ap->prev.br_startblock) &&
3134		    (prevbno = ap->prev.br_startblock +
3135			       ap->prev.br_blockcount) &&
3136		    ISVALID(prevbno, ap->prev.br_startblock)) {
3137			/*
3138			 * Calculate gap to end of previous block.
3139			 */
3140			adjust = prevdiff = ap->offset -
3141				(ap->prev.br_startoff +
3142				 ap->prev.br_blockcount);
3143			/*
3144			 * Figure the startblock based on the previous block's
3145			 * end and the gap size.
3146			 * Heuristic!
3147			 * If the gap is large relative to the piece we're
3148			 * allocating, or using it gives us an invalid block
3149			 * number, then just use the end of the previous block.
3150			 */
3151			if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3152			    ISVALID(prevbno + prevdiff,
3153				    ap->prev.br_startblock))
3154				prevbno += adjust;
3155			else
3156				prevdiff += adjust;
3157			/*
3158			 * If the firstblock forbids it, can't use it,
3159			 * must use default.
3160			 */
3161			if (!rt && !nullfb &&
3162			    XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3163				prevbno = NULLFSBLOCK;
3164		}
3165		/*
3166		 * No previous block or can't follow it, just default.
3167		 */
3168		else
3169			prevbno = NULLFSBLOCK;
3170		/*
3171		 * If there's a following (right) block, select a requested
3172		 * start block based on it.
3173		 */
3174		if (!isnullstartblock(ap->got.br_startblock)) {
3175			/*
3176			 * Calculate gap to start of next block.
3177			 */
3178			adjust = gotdiff = ap->got.br_startoff - ap->offset;
3179			/*
3180			 * Figure the startblock based on the next block's
3181			 * start and the gap size.
3182			 */
3183			gotbno = ap->got.br_startblock;
3184			/*
3185			 * Heuristic!
3186			 * If the gap is large relative to the piece we're
3187			 * allocating, or using it gives us an invalid block
3188			 * number, then just use the start of the next block
3189			 * offset by our length.
3190			 */
3191			if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3192			    ISVALID(gotbno - gotdiff, gotbno))
3193				gotbno -= adjust;
3194			else if (ISVALID(gotbno - ap->length, gotbno)) {
3195				gotbno -= ap->length;
3196				gotdiff += adjust - ap->length;
3197			} else
3198				gotdiff += adjust;
3199			/*
3200			 * If the firstblock forbids it, can't use it,
3201			 * must use default.
3202			 */
3203			if (!rt && !nullfb &&
3204			    XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3205				gotbno = NULLFSBLOCK;
3206		}
3207		/*
3208		 * No next block, just default.
3209		 */
3210		else
3211			gotbno = NULLFSBLOCK;
3212		/*
3213		 * If both valid, pick the better one, else the only good
3214		 * one, else ap->blkno is already set (to 0 or the inode block).
3215		 */
3216		if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3217			ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3218		else if (prevbno != NULLFSBLOCK)
 
 
3219			ap->blkno = prevbno;
3220		else if (gotbno != NULLFSBLOCK)
 
 
3221			ap->blkno = gotbno;
 
 
3222	}
3223#undef ISVALID
 
3224}
3225
3226static int
3227xfs_bmap_longest_free_extent(
 
3228	struct xfs_trans	*tp,
3229	xfs_agnumber_t		ag,
3230	xfs_extlen_t		*blen,
3231	int			*notinit)
3232{
3233	struct xfs_mount	*mp = tp->t_mountp;
3234	struct xfs_perag	*pag;
3235	xfs_extlen_t		longest;
3236	int			error = 0;
3237
3238	pag = xfs_perag_get(mp, ag);
3239	if (!pag->pagf_init) {
3240		error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3241		if (error) {
3242			/* Couldn't lock the AGF, so skip this AG. */
3243			if (error == -EAGAIN) {
3244				*notinit = 1;
3245				error = 0;
3246			}
3247			goto out;
3248		}
3249	}
3250
3251	longest = xfs_alloc_longest_free_extent(pag,
3252				xfs_alloc_min_freelist(mp, pag),
3253				xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3254	if (*blen < longest)
3255		*blen = longest;
3256
3257out:
3258	xfs_perag_put(pag);
3259	return error;
3260}
3261
3262static void
3263xfs_bmap_select_minlen(
3264	struct xfs_bmalloca	*ap,
3265	struct xfs_alloc_arg	*args,
3266	xfs_extlen_t		*blen,
3267	int			notinit)
3268{
3269	if (notinit || *blen < ap->minlen) {
3270		/*
3271		 * Since we did a BUF_TRYLOCK above, it is possible that
3272		 * there is space for this request.
3273		 */
3274		args->minlen = ap->minlen;
3275	} else if (*blen < args->maxlen) {
3276		/*
3277		 * If the best seen length is less than the request length,
3278		 * use the best as the minimum.
3279		 */
3280		args->minlen = *blen;
3281	} else {
3282		/*
3283		 * Otherwise we've seen an extent as big as maxlen, use that
3284		 * as the minimum.
3285		 */
3286		args->minlen = args->maxlen;
3287	}
3288}
3289
3290STATIC int
3291xfs_bmap_btalloc_nullfb(
3292	struct xfs_bmalloca	*ap,
3293	struct xfs_alloc_arg	*args,
3294	xfs_extlen_t		*blen)
3295{
3296	struct xfs_mount	*mp = ap->ip->i_mount;
3297	xfs_agnumber_t		ag, startag;
3298	int			notinit = 0;
3299	int			error;
 
 
 
 
 
 
3300
3301	args->type = XFS_ALLOCTYPE_START_BNO;
3302	args->total = ap->total;
3303
3304	startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3305	if (startag == NULLAGNUMBER)
3306		startag = ag = 0;
3307
3308	while (*blen < args->maxlen) {
3309		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3310						     &notinit);
3311		if (error)
3312			return error;
3313
3314		if (++ag == mp->m_sb.sb_agcount)
3315			ag = 0;
3316		if (ag == startag)
3317			break;
3318	}
 
 
3319
3320	xfs_bmap_select_minlen(ap, args, blen, notinit);
3321	return 0;
3322}
3323
3324STATIC int
3325xfs_bmap_btalloc_filestreams(
3326	struct xfs_bmalloca	*ap,
3327	struct xfs_alloc_arg	*args,
3328	xfs_extlen_t		*blen)
3329{
3330	struct xfs_mount	*mp = ap->ip->i_mount;
3331	xfs_agnumber_t		ag;
3332	int			notinit = 0;
3333	int			error;
3334
3335	args->type = XFS_ALLOCTYPE_NEAR_BNO;
3336	args->total = ap->total;
3337
3338	ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3339	if (ag == NULLAGNUMBER)
3340		ag = 0;
3341
3342	error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3343	if (error)
3344		return error;
3345
3346	if (*blen < args->maxlen) {
3347		error = xfs_filestream_new_ag(ap, &ag);
3348		if (error)
3349			return error;
3350
3351		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3352						     &notinit);
3353		if (error)
3354			return error;
3355
3356	}
3357
3358	xfs_bmap_select_minlen(ap, args, blen, notinit);
3359
3360	/*
3361	 * Set the failure fallback case to look in the selected AG as stream
3362	 * may have moved.
3363	 */
3364	ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3365	return 0;
3366}
3367
3368/* Update all inode and quota accounting for the allocation we just did. */
3369static void
3370xfs_bmap_btalloc_accounting(
3371	struct xfs_bmalloca	*ap,
3372	struct xfs_alloc_arg	*args)
3373{
3374	if (ap->flags & XFS_BMAPI_COWFORK) {
3375		/*
3376		 * COW fork blocks are in-core only and thus are treated as
3377		 * in-core quota reservation (like delalloc blocks) even when
3378		 * converted to real blocks. The quota reservation is not
3379		 * accounted to disk until blocks are remapped to the data
3380		 * fork. So if these blocks were previously delalloc, we
3381		 * already have quota reservation and there's nothing to do
3382		 * yet.
3383		 */
3384		if (ap->wasdel) {
3385			xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
3386			return;
3387		}
3388
3389		/*
3390		 * Otherwise, we've allocated blocks in a hole. The transaction
3391		 * has acquired in-core quota reservation for this extent.
3392		 * Rather than account these as real blocks, however, we reduce
3393		 * the transaction quota reservation based on the allocation.
3394		 * This essentially transfers the transaction quota reservation
3395		 * to that of a delalloc extent.
3396		 */
3397		ap->ip->i_delayed_blks += args->len;
3398		xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
3399				-(long)args->len);
 
3400		return;
3401	}
3402
3403	/* data/attr fork only */
3404	ap->ip->i_nblocks += args->len;
3405	xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3406	if (ap->wasdel) {
3407		ap->ip->i_delayed_blks -= args->len;
3408		xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
 
 
 
3409	}
3410	xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3411		ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
3412		args->len);
3413}
3414
3415static int
3416xfs_bmap_compute_alignments(
3417	struct xfs_bmalloca	*ap,
3418	struct xfs_alloc_arg	*args)
3419{
3420	struct xfs_mount	*mp = args->mp;
3421	xfs_extlen_t		align = 0; /* minimum allocation alignment */
3422	int			stripe_align = 0;
3423
3424	/* stripe alignment for allocation is determined by mount parameters */
3425	if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3426		stripe_align = mp->m_swidth;
3427	else if (mp->m_dalign)
3428		stripe_align = mp->m_dalign;
3429
3430	if (ap->flags & XFS_BMAPI_COWFORK)
3431		align = xfs_get_cowextsz_hint(ap->ip);
3432	else if (ap->datatype & XFS_ALLOC_USERDATA)
3433		align = xfs_get_extsz_hint(ap->ip);
3434	if (align) {
3435		if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
3436					ap->eof, 0, ap->conv, &ap->offset,
3437					&ap->length))
3438			ASSERT(0);
3439		ASSERT(ap->length);
3440	}
3441
3442	/* apply extent size hints if obtained earlier */
3443	if (align) {
3444		args->prod = align;
3445		div_u64_rem(ap->offset, args->prod, &args->mod);
3446		if (args->mod)
3447			args->mod = args->prod - args->mod;
3448	} else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3449		args->prod = 1;
3450		args->mod = 0;
3451	} else {
3452		args->prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3453		div_u64_rem(ap->offset, args->prod, &args->mod);
3454		if (args->mod)
3455			args->mod = args->prod - args->mod;
3456	}
3457
3458	return stripe_align;
3459}
3460
3461static void
3462xfs_bmap_process_allocated_extent(
3463	struct xfs_bmalloca	*ap,
3464	struct xfs_alloc_arg	*args,
3465	xfs_fileoff_t		orig_offset,
3466	xfs_extlen_t		orig_length)
3467{
3468	int			nullfb;
3469
3470	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3471
3472	/*
3473	 * check the allocation happened at the same or higher AG than
3474	 * the first block that was allocated.
3475	 */
3476	ASSERT(nullfb ||
3477		XFS_FSB_TO_AGNO(args->mp, ap->tp->t_firstblock) <=
3478		XFS_FSB_TO_AGNO(args->mp, args->fsbno));
3479
3480	ap->blkno = args->fsbno;
3481	if (nullfb)
3482		ap->tp->t_firstblock = args->fsbno;
3483	ap->length = args->len;
3484	/*
3485	 * If the extent size hint is active, we tried to round the
3486	 * caller's allocation request offset down to extsz and the
3487	 * length up to another extsz boundary.  If we found a free
3488	 * extent we mapped it in starting at this new offset.  If the
3489	 * newly mapped space isn't long enough to cover any of the
3490	 * range of offsets that was originally requested, move the
3491	 * mapping up so that we can fill as much of the caller's
3492	 * original request as possible.  Free space is apparently
3493	 * very fragmented so we're unlikely to be able to satisfy the
3494	 * hints anyway.
3495	 */
3496	if (ap->length <= orig_length)
3497		ap->offset = orig_offset;
3498	else if (ap->offset + ap->length < orig_offset + orig_length)
3499		ap->offset = orig_offset + orig_length - ap->length;
3500	xfs_bmap_btalloc_accounting(ap, args);
3501}
3502
3503#ifdef DEBUG
3504static int
3505xfs_bmap_exact_minlen_extent_alloc(
3506	struct xfs_bmalloca	*ap)
3507{
3508	struct xfs_mount	*mp = ap->ip->i_mount;
3509	struct xfs_alloc_arg	args = { .tp = ap->tp, .mp = mp };
3510	xfs_fileoff_t		orig_offset;
3511	xfs_extlen_t		orig_length;
3512	int			error;
3513
3514	ASSERT(ap->length);
3515
3516	if (ap->minlen != 1) {
3517		ap->blkno = NULLFSBLOCK;
3518		ap->length = 0;
3519		return 0;
3520	}
3521
3522	orig_offset = ap->offset;
3523	orig_length = ap->length;
3524
3525	args.alloc_minlen_only = 1;
3526
3527	xfs_bmap_compute_alignments(ap, &args);
3528
3529	if (ap->tp->t_firstblock == NULLFSBLOCK) {
3530		/*
3531		 * Unlike the longest extent available in an AG, we don't track
3532		 * the length of an AG's shortest extent.
3533		 * XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and
3534		 * hence we can afford to start traversing from the 0th AG since
3535		 * we need not be concerned about a drop in performance in
3536		 * "debug only" code paths.
3537		 */
3538		ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
3539	} else {
3540		ap->blkno = ap->tp->t_firstblock;
3541	}
3542
3543	args.fsbno = ap->blkno;
3544	args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
3545	args.type = XFS_ALLOCTYPE_FIRST_AG;
3546	args.minlen = args.maxlen = ap->minlen;
3547	args.total = ap->total;
3548
3549	args.alignment = 1;
3550	args.minalignslop = 0;
3551
3552	args.minleft = ap->minleft;
3553	args.wasdel = ap->wasdel;
3554	args.resv = XFS_AG_RESV_NONE;
3555	args.datatype = ap->datatype;
3556
3557	error = xfs_alloc_vextent(&args);
3558	if (error)
3559		return error;
3560
3561	if (args.fsbno != NULLFSBLOCK) {
3562		xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
3563			orig_length);
3564	} else {
3565		ap->blkno = NULLFSBLOCK;
3566		ap->length = 0;
3567	}
3568
3569	return 0;
3570}
3571#else
3572
3573#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED)
3574
3575#endif
3576
3577STATIC int
3578xfs_bmap_btalloc(
3579	struct xfs_bmalloca	*ap)
 
 
 
 
 
 
 
 
 
 
 
 
3580{
3581	struct xfs_mount	*mp = ap->ip->i_mount;
3582	struct xfs_alloc_arg	args = { .tp = ap->tp, .mp = mp };
3583	xfs_alloctype_t		atype = 0;
3584	xfs_agnumber_t		fb_agno;	/* ag number of ap->firstblock */
3585	xfs_agnumber_t		ag;
3586	xfs_fileoff_t		orig_offset;
3587	xfs_extlen_t		orig_length;
3588	xfs_extlen_t		blen;
3589	xfs_extlen_t		nextminlen = 0;
3590	int			nullfb; /* true if ap->firstblock isn't set */
3591	int			isaligned;
3592	int			tryagain;
3593	int			error;
3594	int			stripe_align;
3595
3596	ASSERT(ap->length);
3597	orig_offset = ap->offset;
3598	orig_length = ap->length;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3599
3600	stripe_align = xfs_bmap_compute_alignments(ap, &args);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3601
3602	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3603	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3604							ap->tp->t_firstblock);
3605	if (nullfb) {
3606		if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3607		    xfs_inode_is_filestream(ap->ip)) {
3608			ag = xfs_filestream_lookup_ag(ap->ip);
3609			ag = (ag != NULLAGNUMBER) ? ag : 0;
3610			ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3611		} else {
3612			ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3613		}
3614	} else
3615		ap->blkno = ap->tp->t_firstblock;
3616
3617	xfs_bmap_adjacent(ap);
 
3618
3619	/*
3620	 * If allowed, use ap->blkno; otherwise must use firstblock since
3621	 * it's in the right allocation group.
 
3622	 */
3623	if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3624		;
3625	else
3626		ap->blkno = ap->tp->t_firstblock;
3627	/*
3628	 * Normal allocation, done through xfs_alloc_vextent.
3629	 */
3630	tryagain = isaligned = 0;
3631	args.fsbno = ap->blkno;
3632	args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
 
 
 
 
 
 
 
 
 
 
 
 
3633
3634	/* Trim the allocation back to the maximum an AG can fit. */
3635	args.maxlen = min(ap->length, mp->m_ag_max_usable);
3636	blen = 0;
3637	if (nullfb) {
3638		/*
3639		 * Search for an allocation group with a single extent large
3640		 * enough for the request.  If one isn't found, then adjust
3641		 * the minimum allocation size to the largest space found.
3642		 */
3643		if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3644		    xfs_inode_is_filestream(ap->ip))
3645			error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3646		else
3647			error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3648		if (error)
3649			return error;
3650	} else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3651		if (xfs_inode_is_filestream(ap->ip))
3652			args.type = XFS_ALLOCTYPE_FIRST_AG;
3653		else
3654			args.type = XFS_ALLOCTYPE_START_BNO;
3655		args.total = args.minlen = ap->minlen;
3656	} else {
3657		args.type = XFS_ALLOCTYPE_NEAR_BNO;
3658		args.total = ap->total;
3659		args.minlen = ap->minlen;
3660	}
3661
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3662	/*
3663	 * If we are not low on available data blocks, and the underlying
3664	 * logical volume manager is a stripe, and the file offset is zero then
3665	 * try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof
3666	 * is only set if the allocation length is >= the stripe unit and the
3667	 * allocation offset is at the end of file.
3668	 */
3669	if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
3670		if (!ap->offset) {
3671			args.alignment = stripe_align;
3672			atype = args.type;
3673			isaligned = 1;
3674			/*
3675			 * Adjust minlen to try and preserve alignment if we
3676			 * can't guarantee an aligned maxlen extent.
3677			 */
3678			if (blen > args.alignment &&
3679			    blen <= args.maxlen + args.alignment)
3680				args.minlen = blen - args.alignment;
3681			args.minalignslop = 0;
3682		} else {
3683			/*
3684			 * First try an exact bno allocation.
3685			 * If it fails then do a near or start bno
3686			 * allocation with alignment turned on.
3687			 */
3688			atype = args.type;
3689			tryagain = 1;
3690			args.type = XFS_ALLOCTYPE_THIS_BNO;
3691			args.alignment = 1;
3692			/*
3693			 * Compute the minlen+alignment for the
3694			 * next case.  Set slop so that the value
3695			 * of minlen+alignment+slop doesn't go up
3696			 * between the calls.
3697			 */
3698			if (blen > stripe_align && blen <= args.maxlen)
3699				nextminlen = blen - stripe_align;
3700			else
3701				nextminlen = args.minlen;
3702			if (nextminlen + stripe_align > args.minlen + 1)
3703				args.minalignslop =
3704					nextminlen + stripe_align -
3705					args.minlen - 1;
3706			else
3707				args.minalignslop = 0;
3708		}
3709	} else {
3710		args.alignment = 1;
3711		args.minalignslop = 0;
3712	}
3713	args.minleft = ap->minleft;
3714	args.wasdel = ap->wasdel;
3715	args.resv = XFS_AG_RESV_NONE;
3716	args.datatype = ap->datatype;
3717
3718	error = xfs_alloc_vextent(&args);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3719	if (error)
3720		return error;
3721
3722	if (tryagain && args.fsbno == NULLFSBLOCK) {
3723		/*
3724		 * Exact allocation failed. Now try with alignment
3725		 * turned on.
3726		 */
3727		args.type = atype;
3728		args.fsbno = ap->blkno;
3729		args.alignment = stripe_align;
3730		args.minlen = nextminlen;
3731		args.minalignslop = 0;
3732		isaligned = 1;
3733		if ((error = xfs_alloc_vextent(&args)))
3734			return error;
3735	}
3736	if (isaligned && args.fsbno == NULLFSBLOCK) {
3737		/*
3738		 * allocation failed, so turn off alignment and
3739		 * try again.
3740		 */
3741		args.type = atype;
3742		args.fsbno = ap->blkno;
3743		args.alignment = 0;
3744		if ((error = xfs_alloc_vextent(&args)))
3745			return error;
3746	}
3747	if (args.fsbno == NULLFSBLOCK && nullfb &&
3748	    args.minlen > ap->minlen) {
3749		args.minlen = ap->minlen;
3750		args.type = XFS_ALLOCTYPE_START_BNO;
3751		args.fsbno = ap->blkno;
3752		if ((error = xfs_alloc_vextent(&args)))
3753			return error;
3754	}
3755	if (args.fsbno == NULLFSBLOCK && nullfb) {
3756		args.fsbno = 0;
3757		args.type = XFS_ALLOCTYPE_FIRST_AG;
3758		args.total = ap->minlen;
3759		if ((error = xfs_alloc_vextent(&args)))
3760			return error;
3761		ap->tp->t_flags |= XFS_TRANS_LOWMODE;
3762	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3763
3764	if (args.fsbno != NULLFSBLOCK) {
3765		xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
3766			orig_length);
3767	} else {
3768		ap->blkno = NULLFSBLOCK;
3769		ap->length = 0;
3770	}
3771	return 0;
3772}
3773
3774/* Trim extent to fit a logical block range. */
3775void
3776xfs_trim_extent(
3777	struct xfs_bmbt_irec	*irec,
3778	xfs_fileoff_t		bno,
3779	xfs_filblks_t		len)
3780{
3781	xfs_fileoff_t		distance;
3782	xfs_fileoff_t		end = bno + len;
3783
3784	if (irec->br_startoff + irec->br_blockcount <= bno ||
3785	    irec->br_startoff >= end) {
3786		irec->br_blockcount = 0;
3787		return;
3788	}
3789
3790	if (irec->br_startoff < bno) {
3791		distance = bno - irec->br_startoff;
3792		if (isnullstartblock(irec->br_startblock))
3793			irec->br_startblock = DELAYSTARTBLOCK;
3794		if (irec->br_startblock != DELAYSTARTBLOCK &&
3795		    irec->br_startblock != HOLESTARTBLOCK)
3796			irec->br_startblock += distance;
3797		irec->br_startoff += distance;
3798		irec->br_blockcount -= distance;
3799	}
3800
3801	if (end < irec->br_startoff + irec->br_blockcount) {
3802		distance = irec->br_startoff + irec->br_blockcount - end;
3803		irec->br_blockcount -= distance;
3804	}
3805}
3806
3807/*
3808 * Trim the returned map to the required bounds
3809 */
3810STATIC void
3811xfs_bmapi_trim_map(
3812	struct xfs_bmbt_irec	*mval,
3813	struct xfs_bmbt_irec	*got,
3814	xfs_fileoff_t		*bno,
3815	xfs_filblks_t		len,
3816	xfs_fileoff_t		obno,
3817	xfs_fileoff_t		end,
3818	int			n,
3819	int			flags)
3820{
3821	if ((flags & XFS_BMAPI_ENTIRE) ||
3822	    got->br_startoff + got->br_blockcount <= obno) {
3823		*mval = *got;
3824		if (isnullstartblock(got->br_startblock))
3825			mval->br_startblock = DELAYSTARTBLOCK;
3826		return;
3827	}
3828
3829	if (obno > *bno)
3830		*bno = obno;
3831	ASSERT((*bno >= obno) || (n == 0));
3832	ASSERT(*bno < end);
3833	mval->br_startoff = *bno;
3834	if (isnullstartblock(got->br_startblock))
3835		mval->br_startblock = DELAYSTARTBLOCK;
3836	else
3837		mval->br_startblock = got->br_startblock +
3838					(*bno - got->br_startoff);
3839	/*
3840	 * Return the minimum of what we got and what we asked for for
3841	 * the length.  We can use the len variable here because it is
3842	 * modified below and we could have been there before coming
3843	 * here if the first part of the allocation didn't overlap what
3844	 * was asked for.
3845	 */
3846	mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3847			got->br_blockcount - (*bno - got->br_startoff));
3848	mval->br_state = got->br_state;
3849	ASSERT(mval->br_blockcount <= len);
3850	return;
3851}
3852
3853/*
3854 * Update and validate the extent map to return
3855 */
3856STATIC void
3857xfs_bmapi_update_map(
3858	struct xfs_bmbt_irec	**map,
3859	xfs_fileoff_t		*bno,
3860	xfs_filblks_t		*len,
3861	xfs_fileoff_t		obno,
3862	xfs_fileoff_t		end,
3863	int			*n,
3864	int			flags)
3865{
3866	xfs_bmbt_irec_t	*mval = *map;
3867
3868	ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3869	       ((mval->br_startoff + mval->br_blockcount) <= end));
3870	ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3871	       (mval->br_startoff < obno));
3872
3873	*bno = mval->br_startoff + mval->br_blockcount;
3874	*len = end - *bno;
3875	if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3876		/* update previous map with new information */
3877		ASSERT(mval->br_startblock == mval[-1].br_startblock);
3878		ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3879		ASSERT(mval->br_state == mval[-1].br_state);
3880		mval[-1].br_blockcount = mval->br_blockcount;
3881		mval[-1].br_state = mval->br_state;
3882	} else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3883		   mval[-1].br_startblock != DELAYSTARTBLOCK &&
3884		   mval[-1].br_startblock != HOLESTARTBLOCK &&
3885		   mval->br_startblock == mval[-1].br_startblock +
3886					  mval[-1].br_blockcount &&
3887		   mval[-1].br_state == mval->br_state) {
3888		ASSERT(mval->br_startoff ==
3889		       mval[-1].br_startoff + mval[-1].br_blockcount);
3890		mval[-1].br_blockcount += mval->br_blockcount;
3891	} else if (*n > 0 &&
3892		   mval->br_startblock == DELAYSTARTBLOCK &&
3893		   mval[-1].br_startblock == DELAYSTARTBLOCK &&
3894		   mval->br_startoff ==
3895		   mval[-1].br_startoff + mval[-1].br_blockcount) {
3896		mval[-1].br_blockcount += mval->br_blockcount;
3897		mval[-1].br_state = mval->br_state;
3898	} else if (!((*n == 0) &&
3899		     ((mval->br_startoff + mval->br_blockcount) <=
3900		      obno))) {
3901		mval++;
3902		(*n)++;
3903	}
3904	*map = mval;
3905}
3906
3907/*
3908 * Map file blocks to filesystem blocks without allocation.
3909 */
3910int
3911xfs_bmapi_read(
3912	struct xfs_inode	*ip,
3913	xfs_fileoff_t		bno,
3914	xfs_filblks_t		len,
3915	struct xfs_bmbt_irec	*mval,
3916	int			*nmap,
3917	int			flags)
3918{
3919	struct xfs_mount	*mp = ip->i_mount;
3920	int			whichfork = xfs_bmapi_whichfork(flags);
3921	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
3922	struct xfs_bmbt_irec	got;
3923	xfs_fileoff_t		obno;
3924	xfs_fileoff_t		end;
3925	struct xfs_iext_cursor	icur;
3926	int			error;
3927	bool			eof = false;
3928	int			n = 0;
3929
3930	ASSERT(*nmap >= 1);
3931	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
3932	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
3933
3934	if (WARN_ON_ONCE(!ifp))
 
3935		return -EFSCORRUPTED;
 
3936
3937	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
3938	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT))
 
3939		return -EFSCORRUPTED;
 
3940
3941	if (XFS_FORCED_SHUTDOWN(mp))
3942		return -EIO;
3943
3944	XFS_STATS_INC(mp, xs_blk_mapr);
3945
3946	error = xfs_iread_extents(NULL, ip, whichfork);
3947	if (error)
3948		return error;
3949
3950	if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
3951		eof = true;
3952	end = bno + len;
3953	obno = bno;
3954
3955	while (bno < end && n < *nmap) {
3956		/* Reading past eof, act as though there's a hole up to end. */
3957		if (eof)
3958			got.br_startoff = end;
3959		if (got.br_startoff > bno) {
3960			/* Reading in a hole.  */
3961			mval->br_startoff = bno;
3962			mval->br_startblock = HOLESTARTBLOCK;
3963			mval->br_blockcount =
3964				XFS_FILBLKS_MIN(len, got.br_startoff - bno);
3965			mval->br_state = XFS_EXT_NORM;
3966			bno += mval->br_blockcount;
3967			len -= mval->br_blockcount;
3968			mval++;
3969			n++;
3970			continue;
3971		}
3972
3973		/* set up the extent map to return. */
3974		xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
3975		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
3976
3977		/* If we're done, stop now. */
3978		if (bno >= end || n >= *nmap)
3979			break;
3980
3981		/* Else go on to the next record. */
3982		if (!xfs_iext_next_extent(ifp, &icur, &got))
3983			eof = true;
3984	}
3985	*nmap = n;
3986	return 0;
3987}
3988
3989/*
3990 * Add a delayed allocation extent to an inode. Blocks are reserved from the
3991 * global pool and the extent inserted into the inode in-core extent tree.
3992 *
3993 * On entry, got refers to the first extent beyond the offset of the extent to
3994 * allocate or eof is specified if no such extent exists. On return, got refers
3995 * to the extent record that was inserted to the inode fork.
3996 *
3997 * Note that the allocated extent may have been merged with contiguous extents
3998 * during insertion into the inode fork. Thus, got does not reflect the current
3999 * state of the inode fork on return. If necessary, the caller can use lastx to
4000 * look up the updated record in the inode fork.
4001 */
4002int
4003xfs_bmapi_reserve_delalloc(
4004	struct xfs_inode	*ip,
4005	int			whichfork,
4006	xfs_fileoff_t		off,
4007	xfs_filblks_t		len,
4008	xfs_filblks_t		prealloc,
4009	struct xfs_bmbt_irec	*got,
4010	struct xfs_iext_cursor	*icur,
4011	int			eof)
4012{
4013	struct xfs_mount	*mp = ip->i_mount;
4014	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
4015	xfs_extlen_t		alen;
4016	xfs_extlen_t		indlen;
4017	int			error;
4018	xfs_fileoff_t		aoff = off;
4019
4020	/*
4021	 * Cap the alloc length. Keep track of prealloc so we know whether to
4022	 * tag the inode before we return.
4023	 */
4024	alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
4025	if (!eof)
4026		alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
4027	if (prealloc && alen >= len)
4028		prealloc = alen - len;
4029
4030	/* Figure out the extent size, adjust alen */
4031	if (whichfork == XFS_COW_FORK) {
4032		struct xfs_bmbt_irec	prev;
4033		xfs_extlen_t		extsz = xfs_get_cowextsz_hint(ip);
4034
4035		if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
4036			prev.br_startoff = NULLFILEOFF;
4037
4038		error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
4039					       1, 0, &aoff, &alen);
4040		ASSERT(!error);
4041	}
4042
4043	/*
4044	 * Make a transaction-less quota reservation for delayed allocation
4045	 * blocks.  This number gets adjusted later.  We return if we haven't
4046	 * allocated blocks already inside this loop.
4047	 */
4048	error = xfs_quota_reserve_blkres(ip, alen);
4049	if (error)
4050		return error;
4051
4052	/*
4053	 * Split changing sb for alen and indlen since they could be coming
4054	 * from different places.
4055	 */
4056	indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4057	ASSERT(indlen > 0);
4058
4059	error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4060	if (error)
4061		goto out_unreserve_quota;
4062
4063	error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4064	if (error)
4065		goto out_unreserve_blocks;
4066
4067
4068	ip->i_delayed_blks += alen;
4069	xfs_mod_delalloc(ip->i_mount, alen + indlen);
4070
4071	got->br_startoff = aoff;
4072	got->br_startblock = nullstartblock(indlen);
4073	got->br_blockcount = alen;
4074	got->br_state = XFS_EXT_NORM;
4075
4076	xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
4077
4078	/*
4079	 * Tag the inode if blocks were preallocated. Note that COW fork
4080	 * preallocation can occur at the start or end of the extent, even when
4081	 * prealloc == 0, so we must also check the aligned offset and length.
4082	 */
4083	if (whichfork == XFS_DATA_FORK && prealloc)
4084		xfs_inode_set_eofblocks_tag(ip);
4085	if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
4086		xfs_inode_set_cowblocks_tag(ip);
4087
4088	return 0;
4089
4090out_unreserve_blocks:
4091	xfs_mod_fdblocks(mp, alen, false);
4092out_unreserve_quota:
4093	if (XFS_IS_QUOTA_ON(mp))
4094		xfs_quota_unreserve_blkres(ip, alen);
4095	return error;
4096}
4097
4098static int
4099xfs_bmap_alloc_userdata(
4100	struct xfs_bmalloca	*bma)
4101{
4102	struct xfs_mount	*mp = bma->ip->i_mount;
4103	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4104	int			error;
4105
4106	/*
4107	 * Set the data type being allocated. For the data fork, the first data
4108	 * in the file is treated differently to all other allocations. For the
4109	 * attribute fork, we only need to ensure the allocated range is not on
4110	 * the busy list.
4111	 */
4112	bma->datatype = XFS_ALLOC_NOBUSY;
4113	if (whichfork == XFS_DATA_FORK) {
4114		bma->datatype |= XFS_ALLOC_USERDATA;
4115		if (bma->offset == 0)
4116			bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4117
4118		if (mp->m_dalign && bma->length >= mp->m_dalign) {
4119			error = xfs_bmap_isaeof(bma, whichfork);
4120			if (error)
4121				return error;
4122		}
4123
4124		if (XFS_IS_REALTIME_INODE(bma->ip))
4125			return xfs_bmap_rtalloc(bma);
4126	}
4127
4128	if (unlikely(XFS_TEST_ERROR(false, mp,
4129			XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
4130		return xfs_bmap_exact_minlen_extent_alloc(bma);
4131
4132	return xfs_bmap_btalloc(bma);
4133}
4134
4135static int
4136xfs_bmapi_allocate(
4137	struct xfs_bmalloca	*bma)
4138{
4139	struct xfs_mount	*mp = bma->ip->i_mount;
4140	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4141	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4142	int			tmp_logflags = 0;
4143	int			error;
4144
4145	ASSERT(bma->length > 0);
4146
4147	/*
4148	 * For the wasdelay case, we could also just allocate the stuff asked
4149	 * for in this bmap call but that wouldn't be as good.
4150	 */
4151	if (bma->wasdel) {
4152		bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4153		bma->offset = bma->got.br_startoff;
4154		if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
4155			bma->prev.br_startoff = NULLFILEOFF;
4156	} else {
4157		bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4158		if (!bma->eof)
4159			bma->length = XFS_FILBLKS_MIN(bma->length,
4160					bma->got.br_startoff - bma->offset);
4161	}
4162
4163	if (bma->flags & XFS_BMAPI_CONTIG)
4164		bma->minlen = bma->length;
4165	else
4166		bma->minlen = 1;
4167
4168	if (bma->flags & XFS_BMAPI_METADATA) {
4169		if (unlikely(XFS_TEST_ERROR(false, mp,
4170				XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
4171			error = xfs_bmap_exact_minlen_extent_alloc(bma);
4172		else
4173			error = xfs_bmap_btalloc(bma);
4174	} else {
4175		error = xfs_bmap_alloc_userdata(bma);
4176	}
4177	if (error || bma->blkno == NULLFSBLOCK)
4178		return error;
4179
4180	if (bma->flags & XFS_BMAPI_ZERO) {
4181		error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
4182		if (error)
4183			return error;
4184	}
4185
4186	if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur)
4187		bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4188	/*
4189	 * Bump the number of extents we've allocated
4190	 * in this call.
4191	 */
4192	bma->nallocs++;
4193
4194	if (bma->cur)
4195		bma->cur->bc_ino.flags =
4196			bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
4197
4198	bma->got.br_startoff = bma->offset;
4199	bma->got.br_startblock = bma->blkno;
4200	bma->got.br_blockcount = bma->length;
4201	bma->got.br_state = XFS_EXT_NORM;
4202
4203	if (bma->flags & XFS_BMAPI_PREALLOC)
4204		bma->got.br_state = XFS_EXT_UNWRITTEN;
4205
4206	if (bma->wasdel)
4207		error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4208	else
4209		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4210				whichfork, &bma->icur, &bma->cur, &bma->got,
4211				&bma->logflags, bma->flags);
4212
4213	bma->logflags |= tmp_logflags;
4214	if (error)
4215		return error;
4216
4217	/*
4218	 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4219	 * or xfs_bmap_add_extent_hole_real might have merged it into one of
4220	 * the neighbouring ones.
4221	 */
4222	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4223
4224	ASSERT(bma->got.br_startoff <= bma->offset);
4225	ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4226	       bma->offset + bma->length);
4227	ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4228	       bma->got.br_state == XFS_EXT_UNWRITTEN);
4229	return 0;
4230}
4231
4232STATIC int
4233xfs_bmapi_convert_unwritten(
4234	struct xfs_bmalloca	*bma,
4235	struct xfs_bmbt_irec	*mval,
4236	xfs_filblks_t		len,
4237	int			flags)
4238{
4239	int			whichfork = xfs_bmapi_whichfork(flags);
4240	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4241	int			tmp_logflags = 0;
4242	int			error;
4243
4244	/* check if we need to do unwritten->real conversion */
4245	if (mval->br_state == XFS_EXT_UNWRITTEN &&
4246	    (flags & XFS_BMAPI_PREALLOC))
4247		return 0;
4248
4249	/* check if we need to do real->unwritten conversion */
4250	if (mval->br_state == XFS_EXT_NORM &&
4251	    (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4252			(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4253		return 0;
4254
4255	/*
4256	 * Modify (by adding) the state flag, if writing.
4257	 */
4258	ASSERT(mval->br_blockcount <= len);
4259	if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur) {
4260		bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4261					bma->ip, whichfork);
4262	}
4263	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4264				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4265
4266	/*
4267	 * Before insertion into the bmbt, zero the range being converted
4268	 * if required.
4269	 */
4270	if (flags & XFS_BMAPI_ZERO) {
4271		error = xfs_zero_extent(bma->ip, mval->br_startblock,
4272					mval->br_blockcount);
4273		if (error)
4274			return error;
4275	}
4276
4277	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4278			&bma->icur, &bma->cur, mval, &tmp_logflags);
4279	/*
4280	 * Log the inode core unconditionally in the unwritten extent conversion
4281	 * path because the conversion might not have done so (e.g., if the
4282	 * extent count hasn't changed). We need to make sure the inode is dirty
4283	 * in the transaction for the sake of fsync(), even if nothing has
4284	 * changed, because fsync() will not force the log for this transaction
4285	 * unless it sees the inode pinned.
4286	 *
4287	 * Note: If we're only converting cow fork extents, there aren't
4288	 * any on-disk updates to make, so we don't need to log anything.
4289	 */
4290	if (whichfork != XFS_COW_FORK)
4291		bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4292	if (error)
4293		return error;
4294
4295	/*
4296	 * Update our extent pointer, given that
4297	 * xfs_bmap_add_extent_unwritten_real might have merged it into one
4298	 * of the neighbouring ones.
4299	 */
4300	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4301
4302	/*
4303	 * We may have combined previously unwritten space with written space,
4304	 * so generate another request.
4305	 */
4306	if (mval->br_blockcount < len)
4307		return -EAGAIN;
4308	return 0;
4309}
4310
4311static inline xfs_extlen_t
4312xfs_bmapi_minleft(
4313	struct xfs_trans	*tp,
4314	struct xfs_inode	*ip,
4315	int			fork)
4316{
4317	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, fork);
4318
4319	if (tp && tp->t_firstblock != NULLFSBLOCK)
4320		return 0;
4321	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
4322		return 1;
4323	return be16_to_cpu(ifp->if_broot->bb_level) + 1;
4324}
4325
4326/*
4327 * Log whatever the flags say, even if error.  Otherwise we might miss detecting
4328 * a case where the data is changed, there's an error, and it's not logged so we
4329 * don't shutdown when we should.  Don't bother logging extents/btree changes if
4330 * we converted to the other format.
4331 */
4332static void
4333xfs_bmapi_finish(
4334	struct xfs_bmalloca	*bma,
4335	int			whichfork,
4336	int			error)
4337{
4338	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4339
4340	if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
4341	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
4342		bma->logflags &= ~xfs_ilog_fext(whichfork);
4343	else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
4344		 ifp->if_format != XFS_DINODE_FMT_BTREE)
4345		bma->logflags &= ~xfs_ilog_fbroot(whichfork);
4346
4347	if (bma->logflags)
4348		xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
4349	if (bma->cur)
4350		xfs_btree_del_cursor(bma->cur, error);
4351}
4352
4353/*
4354 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4355 * extent state if necessary.  Details behaviour is controlled by the flags
4356 * parameter.  Only allocates blocks from a single allocation group, to avoid
4357 * locking problems.
4358 */
4359int
4360xfs_bmapi_write(
4361	struct xfs_trans	*tp,		/* transaction pointer */
4362	struct xfs_inode	*ip,		/* incore inode */
4363	xfs_fileoff_t		bno,		/* starting file offs. mapped */
4364	xfs_filblks_t		len,		/* length to map in file */
4365	int			flags,		/* XFS_BMAPI_... */
4366	xfs_extlen_t		total,		/* total blocks needed */
4367	struct xfs_bmbt_irec	*mval,		/* output: map values */
4368	int			*nmap)		/* i/o: mval size/count */
4369{
4370	struct xfs_bmalloca	bma = {
4371		.tp		= tp,
4372		.ip		= ip,
4373		.total		= total,
4374	};
4375	struct xfs_mount	*mp = ip->i_mount;
4376	int			whichfork = xfs_bmapi_whichfork(flags);
4377	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
4378	xfs_fileoff_t		end;		/* end of mapped file region */
4379	bool			eof = false;	/* after the end of extents */
4380	int			error;		/* error return */
4381	int			n;		/* current extent index */
4382	xfs_fileoff_t		obno;		/* old block number (offset) */
4383
4384#ifdef DEBUG
4385	xfs_fileoff_t		orig_bno;	/* original block number value */
4386	int			orig_flags;	/* original flags arg value */
4387	xfs_filblks_t		orig_len;	/* original value of len arg */
4388	struct xfs_bmbt_irec	*orig_mval;	/* original value of mval */
4389	int			orig_nmap;	/* original value of *nmap */
4390
4391	orig_bno = bno;
4392	orig_len = len;
4393	orig_flags = flags;
4394	orig_mval = mval;
4395	orig_nmap = *nmap;
4396#endif
4397
4398	ASSERT(*nmap >= 1);
4399	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4400	ASSERT(tp != NULL);
4401	ASSERT(len > 0);
4402	ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
4403	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4404	ASSERT(!(flags & XFS_BMAPI_REMAP));
4405
4406	/* zeroing is for currently only for data extents, not metadata */
4407	ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4408			(XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4409	/*
4410	 * we can allocate unwritten extents or pre-zero allocated blocks,
4411	 * but it makes no sense to do both at once. This would result in
4412	 * zeroing the unwritten extent twice, but it still being an
4413	 * unwritten extent....
4414	 */
4415	ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4416			(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4417
4418	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4419	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
 
4420		return -EFSCORRUPTED;
4421	}
4422
4423	if (XFS_FORCED_SHUTDOWN(mp))
4424		return -EIO;
4425
4426	XFS_STATS_INC(mp, xs_blk_mapw);
4427
4428	error = xfs_iread_extents(tp, ip, whichfork);
4429	if (error)
4430		goto error0;
4431
4432	if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4433		eof = true;
4434	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4435		bma.prev.br_startoff = NULLFILEOFF;
4436	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4437
4438	n = 0;
4439	end = bno + len;
4440	obno = bno;
4441	while (bno < end && n < *nmap) {
4442		bool			need_alloc = false, wasdelay = false;
4443
4444		/* in hole or beyond EOF? */
4445		if (eof || bma.got.br_startoff > bno) {
4446			/*
4447			 * CoW fork conversions should /never/ hit EOF or
4448			 * holes.  There should always be something for us
4449			 * to work on.
4450			 */
4451			ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
4452			         (flags & XFS_BMAPI_COWFORK)));
4453
4454			need_alloc = true;
4455		} else if (isnullstartblock(bma.got.br_startblock)) {
4456			wasdelay = true;
4457		}
4458
4459		/*
4460		 * First, deal with the hole before the allocated space
4461		 * that we found, if any.
4462		 */
4463		if (need_alloc || wasdelay) {
4464			bma.eof = eof;
4465			bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4466			bma.wasdel = wasdelay;
4467			bma.offset = bno;
4468			bma.flags = flags;
4469
4470			/*
4471			 * There's a 32/64 bit type mismatch between the
4472			 * allocation length request (which can be 64 bits in
4473			 * length) and the bma length request, which is
4474			 * xfs_extlen_t and therefore 32 bits. Hence we have to
4475			 * check for 32-bit overflows and handle them here.
4476			 */
4477			if (len > (xfs_filblks_t)MAXEXTLEN)
4478				bma.length = MAXEXTLEN;
4479			else
4480				bma.length = len;
4481
4482			ASSERT(len > 0);
4483			ASSERT(bma.length > 0);
4484			error = xfs_bmapi_allocate(&bma);
4485			if (error)
4486				goto error0;
4487			if (bma.blkno == NULLFSBLOCK)
4488				break;
4489
4490			/*
4491			 * If this is a CoW allocation, record the data in
4492			 * the refcount btree for orphan recovery.
4493			 */
4494			if (whichfork == XFS_COW_FORK)
4495				xfs_refcount_alloc_cow_extent(tp, bma.blkno,
4496						bma.length);
4497		}
4498
4499		/* Deal with the allocated space we found.  */
4500		xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4501							end, n, flags);
4502
4503		/* Execute unwritten extent conversion if necessary */
4504		error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4505		if (error == -EAGAIN)
4506			continue;
4507		if (error)
4508			goto error0;
4509
4510		/* update the extent map to return */
4511		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4512
4513		/*
4514		 * If we're done, stop now.  Stop when we've allocated
4515		 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4516		 * the transaction may get too big.
4517		 */
4518		if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4519			break;
4520
4521		/* Else go on to the next record. */
4522		bma.prev = bma.got;
4523		if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4524			eof = true;
4525	}
4526	*nmap = n;
4527
4528	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4529			whichfork);
4530	if (error)
4531		goto error0;
4532
4533	ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
4534	       ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
4535	xfs_bmapi_finish(&bma, whichfork, 0);
4536	xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4537		orig_nmap, *nmap);
4538	return 0;
4539error0:
4540	xfs_bmapi_finish(&bma, whichfork, error);
4541	return error;
4542}
4543
4544/*
4545 * Convert an existing delalloc extent to real blocks based on file offset. This
4546 * attempts to allocate the entire delalloc extent and may require multiple
4547 * invocations to allocate the target offset if a large enough physical extent
4548 * is not available.
4549 */
4550int
4551xfs_bmapi_convert_delalloc(
4552	struct xfs_inode	*ip,
4553	int			whichfork,
4554	xfs_off_t		offset,
4555	struct iomap		*iomap,
4556	unsigned int		*seq)
4557{
4558	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
4559	struct xfs_mount	*mp = ip->i_mount;
4560	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
4561	struct xfs_bmalloca	bma = { NULL };
4562	uint16_t		flags = 0;
4563	struct xfs_trans	*tp;
4564	int			error;
4565
4566	if (whichfork == XFS_COW_FORK)
4567		flags |= IOMAP_F_SHARED;
4568
4569	/*
4570	 * Space for the extent and indirect blocks was reserved when the
4571	 * delalloc extent was created so there's no need to do so here.
4572	 */
4573	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
4574				XFS_TRANS_RESERVE, &tp);
4575	if (error)
4576		return error;
4577
4578	xfs_ilock(ip, XFS_ILOCK_EXCL);
 
4579
4580	error = xfs_iext_count_may_overflow(ip, whichfork,
4581			XFS_IEXT_ADD_NOSPLIT_CNT);
 
 
 
4582	if (error)
4583		goto out_trans_cancel;
4584
4585	xfs_trans_ijoin(tp, ip, 0);
4586
4587	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
4588	    bma.got.br_startoff > offset_fsb) {
4589		/*
4590		 * No extent found in the range we are trying to convert.  This
4591		 * should only happen for the COW fork, where another thread
4592		 * might have moved the extent to the data fork in the meantime.
4593		 */
4594		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
4595		error = -EAGAIN;
4596		goto out_trans_cancel;
4597	}
4598
4599	/*
4600	 * If we find a real extent here we raced with another thread converting
4601	 * the extent.  Just return the real extent at this offset.
4602	 */
4603	if (!isnullstartblock(bma.got.br_startblock)) {
4604		xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
 
4605		*seq = READ_ONCE(ifp->if_seq);
4606		goto out_trans_cancel;
4607	}
4608
4609	bma.tp = tp;
4610	bma.ip = ip;
4611	bma.wasdel = true;
4612	bma.offset = bma.got.br_startoff;
4613	bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
 
4614	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4615
4616	/*
4617	 * When we're converting the delalloc reservations backing dirty pages
4618	 * in the page cache, we must be careful about how we create the new
4619	 * extents:
4620	 *
4621	 * New CoW fork extents are created unwritten, turned into real extents
4622	 * when we're about to write the data to disk, and mapped into the data
4623	 * fork after the write finishes.  End of story.
4624	 *
4625	 * New data fork extents must be mapped in as unwritten and converted
4626	 * to real extents after the write succeeds to avoid exposing stale
4627	 * disk contents if we crash.
4628	 */
4629	bma.flags = XFS_BMAPI_PREALLOC;
4630	if (whichfork == XFS_COW_FORK)
4631		bma.flags |= XFS_BMAPI_COWFORK;
4632
4633	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4634		bma.prev.br_startoff = NULLFILEOFF;
4635
4636	error = xfs_bmapi_allocate(&bma);
4637	if (error)
4638		goto out_finish;
4639
4640	error = -ENOSPC;
4641	if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
4642		goto out_finish;
4643	error = -EFSCORRUPTED;
4644	if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
 
4645		goto out_finish;
 
4646
4647	XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
4648	XFS_STATS_INC(mp, xs_xstrat_quick);
4649
4650	ASSERT(!isnullstartblock(bma.got.br_startblock));
4651	xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
 
4652	*seq = READ_ONCE(ifp->if_seq);
4653
4654	if (whichfork == XFS_COW_FORK)
4655		xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
4656
4657	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4658			whichfork);
4659	if (error)
4660		goto out_finish;
4661
4662	xfs_bmapi_finish(&bma, whichfork, 0);
4663	error = xfs_trans_commit(tp);
4664	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4665	return error;
4666
4667out_finish:
4668	xfs_bmapi_finish(&bma, whichfork, error);
4669out_trans_cancel:
4670	xfs_trans_cancel(tp);
4671	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4672	return error;
4673}
4674
4675int
4676xfs_bmapi_remap(
4677	struct xfs_trans	*tp,
4678	struct xfs_inode	*ip,
4679	xfs_fileoff_t		bno,
4680	xfs_filblks_t		len,
4681	xfs_fsblock_t		startblock,
4682	int			flags)
4683{
4684	struct xfs_mount	*mp = ip->i_mount;
4685	struct xfs_ifork	*ifp;
4686	struct xfs_btree_cur	*cur = NULL;
4687	struct xfs_bmbt_irec	got;
4688	struct xfs_iext_cursor	icur;
4689	int			whichfork = xfs_bmapi_whichfork(flags);
4690	int			logflags = 0, error;
4691
4692	ifp = XFS_IFORK_PTR(ip, whichfork);
4693	ASSERT(len > 0);
4694	ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
4695	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4696	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
4697			   XFS_BMAPI_NORMAP)));
4698	ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
4699			(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
4700
4701	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4702	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
 
4703		return -EFSCORRUPTED;
4704	}
4705
4706	if (XFS_FORCED_SHUTDOWN(mp))
4707		return -EIO;
4708
4709	error = xfs_iread_extents(tp, ip, whichfork);
4710	if (error)
4711		return error;
4712
4713	if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4714		/* make sure we only reflink into a hole. */
4715		ASSERT(got.br_startoff > bno);
4716		ASSERT(got.br_startoff - bno >= len);
4717	}
4718
4719	ip->i_nblocks += len;
4720	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4721
4722	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
4723		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
4724		cur->bc_ino.flags = 0;
4725	}
4726
4727	got.br_startoff = bno;
4728	got.br_startblock = startblock;
4729	got.br_blockcount = len;
4730	if (flags & XFS_BMAPI_PREALLOC)
4731		got.br_state = XFS_EXT_UNWRITTEN;
4732	else
4733		got.br_state = XFS_EXT_NORM;
4734
4735	error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
4736			&cur, &got, &logflags, flags);
4737	if (error)
4738		goto error0;
4739
4740	error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
4741
4742error0:
4743	if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
4744		logflags &= ~XFS_ILOG_DEXT;
4745	else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
4746		logflags &= ~XFS_ILOG_DBROOT;
4747
4748	if (logflags)
4749		xfs_trans_log_inode(tp, ip, logflags);
4750	if (cur)
4751		xfs_btree_del_cursor(cur, error);
4752	return error;
4753}
4754
4755/*
4756 * When a delalloc extent is split (e.g., due to a hole punch), the original
4757 * indlen reservation must be shared across the two new extents that are left
4758 * behind.
4759 *
4760 * Given the original reservation and the worst case indlen for the two new
4761 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4762 * reservation fairly across the two new extents. If necessary, steal available
4763 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4764 * ores == 1). The number of stolen blocks is returned. The availability and
4765 * subsequent accounting of stolen blocks is the responsibility of the caller.
4766 */
4767static xfs_filblks_t
4768xfs_bmap_split_indlen(
4769	xfs_filblks_t			ores,		/* original res. */
4770	xfs_filblks_t			*indlen1,	/* ext1 worst indlen */
4771	xfs_filblks_t			*indlen2,	/* ext2 worst indlen */
4772	xfs_filblks_t			avail)		/* stealable blocks */
4773{
4774	xfs_filblks_t			len1 = *indlen1;
4775	xfs_filblks_t			len2 = *indlen2;
4776	xfs_filblks_t			nres = len1 + len2; /* new total res. */
4777	xfs_filblks_t			stolen = 0;
4778	xfs_filblks_t			resfactor;
4779
4780	/*
4781	 * Steal as many blocks as we can to try and satisfy the worst case
4782	 * indlen for both new extents.
4783	 */
4784	if (ores < nres && avail)
4785		stolen = XFS_FILBLKS_MIN(nres - ores, avail);
4786	ores += stolen;
4787
4788	 /* nothing else to do if we've satisfied the new reservation */
4789	if (ores >= nres)
4790		return stolen;
4791
4792	/*
4793	 * We can't meet the total required reservation for the two extents.
4794	 * Calculate the percent of the overall shortage between both extents
4795	 * and apply this percentage to each of the requested indlen values.
4796	 * This distributes the shortage fairly and reduces the chances that one
4797	 * of the two extents is left with nothing when extents are repeatedly
4798	 * split.
4799	 */
4800	resfactor = (ores * 100);
4801	do_div(resfactor, nres);
4802	len1 *= resfactor;
4803	do_div(len1, 100);
4804	len2 *= resfactor;
4805	do_div(len2, 100);
4806	ASSERT(len1 + len2 <= ores);
4807	ASSERT(len1 < *indlen1 && len2 < *indlen2);
4808
4809	/*
4810	 * Hand out the remainder to each extent. If one of the two reservations
4811	 * is zero, we want to make sure that one gets a block first. The loop
4812	 * below starts with len1, so hand len2 a block right off the bat if it
4813	 * is zero.
4814	 */
4815	ores -= (len1 + len2);
4816	ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4817	if (ores && !len2 && *indlen2) {
4818		len2++;
4819		ores--;
4820	}
4821	while (ores) {
4822		if (len1 < *indlen1) {
4823			len1++;
4824			ores--;
4825		}
4826		if (!ores)
4827			break;
4828		if (len2 < *indlen2) {
4829			len2++;
4830			ores--;
4831		}
4832	}
4833
4834	*indlen1 = len1;
4835	*indlen2 = len2;
4836
4837	return stolen;
4838}
4839
4840int
4841xfs_bmap_del_extent_delay(
4842	struct xfs_inode	*ip,
4843	int			whichfork,
4844	struct xfs_iext_cursor	*icur,
4845	struct xfs_bmbt_irec	*got,
4846	struct xfs_bmbt_irec	*del)
4847{
4848	struct xfs_mount	*mp = ip->i_mount;
4849	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
4850	struct xfs_bmbt_irec	new;
4851	int64_t			da_old, da_new, da_diff = 0;
4852	xfs_fileoff_t		del_endoff, got_endoff;
4853	xfs_filblks_t		got_indlen, new_indlen, stolen;
4854	int			state = xfs_bmap_fork_to_state(whichfork);
4855	int			error = 0;
4856	bool			isrt;
4857
4858	XFS_STATS_INC(mp, xs_del_exlist);
4859
4860	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4861	del_endoff = del->br_startoff + del->br_blockcount;
4862	got_endoff = got->br_startoff + got->br_blockcount;
4863	da_old = startblockval(got->br_startblock);
4864	da_new = 0;
4865
4866	ASSERT(del->br_blockcount > 0);
4867	ASSERT(got->br_startoff <= del->br_startoff);
4868	ASSERT(got_endoff >= del_endoff);
4869
4870	if (isrt) {
4871		uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
4872
4873		do_div(rtexts, mp->m_sb.sb_rextsize);
4874		xfs_mod_frextents(mp, rtexts);
4875	}
4876
4877	/*
4878	 * Update the inode delalloc counter now and wait to update the
4879	 * sb counters as we might have to borrow some blocks for the
4880	 * indirect block accounting.
4881	 */
4882	ASSERT(!isrt);
4883	error = xfs_quota_unreserve_blkres(ip, del->br_blockcount);
4884	if (error)
4885		return error;
4886	ip->i_delayed_blks -= del->br_blockcount;
4887
4888	if (got->br_startoff == del->br_startoff)
4889		state |= BMAP_LEFT_FILLING;
4890	if (got_endoff == del_endoff)
4891		state |= BMAP_RIGHT_FILLING;
4892
4893	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4894	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4895		/*
4896		 * Matches the whole extent.  Delete the entry.
4897		 */
4898		xfs_iext_remove(ip, icur, state);
4899		xfs_iext_prev(ifp, icur);
4900		break;
4901	case BMAP_LEFT_FILLING:
4902		/*
4903		 * Deleting the first part of the extent.
4904		 */
4905		got->br_startoff = del_endoff;
4906		got->br_blockcount -= del->br_blockcount;
4907		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4908				got->br_blockcount), da_old);
4909		got->br_startblock = nullstartblock((int)da_new);
4910		xfs_iext_update_extent(ip, state, icur, got);
4911		break;
4912	case BMAP_RIGHT_FILLING:
4913		/*
4914		 * Deleting the last part of the extent.
4915		 */
4916		got->br_blockcount = got->br_blockcount - del->br_blockcount;
4917		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4918				got->br_blockcount), da_old);
4919		got->br_startblock = nullstartblock((int)da_new);
4920		xfs_iext_update_extent(ip, state, icur, got);
4921		break;
4922	case 0:
4923		/*
4924		 * Deleting the middle of the extent.
4925		 *
4926		 * Distribute the original indlen reservation across the two new
4927		 * extents.  Steal blocks from the deleted extent if necessary.
4928		 * Stealing blocks simply fudges the fdblocks accounting below.
4929		 * Warn if either of the new indlen reservations is zero as this
4930		 * can lead to delalloc problems.
4931		 */
4932		got->br_blockcount = del->br_startoff - got->br_startoff;
4933		got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4934
4935		new.br_blockcount = got_endoff - del_endoff;
4936		new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4937
4938		WARN_ON_ONCE(!got_indlen || !new_indlen);
4939		stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4940						       del->br_blockcount);
4941
4942		got->br_startblock = nullstartblock((int)got_indlen);
4943
4944		new.br_startoff = del_endoff;
4945		new.br_state = got->br_state;
4946		new.br_startblock = nullstartblock((int)new_indlen);
4947
4948		xfs_iext_update_extent(ip, state, icur, got);
4949		xfs_iext_next(ifp, icur);
4950		xfs_iext_insert(ip, icur, &new, state);
4951
4952		da_new = got_indlen + new_indlen - stolen;
4953		del->br_blockcount -= stolen;
4954		break;
4955	}
4956
4957	ASSERT(da_old >= da_new);
4958	da_diff = da_old - da_new;
4959	if (!isrt)
4960		da_diff += del->br_blockcount;
4961	if (da_diff) {
4962		xfs_mod_fdblocks(mp, da_diff, false);
4963		xfs_mod_delalloc(mp, -da_diff);
4964	}
4965	return error;
4966}
4967
4968void
4969xfs_bmap_del_extent_cow(
4970	struct xfs_inode	*ip,
4971	struct xfs_iext_cursor	*icur,
4972	struct xfs_bmbt_irec	*got,
4973	struct xfs_bmbt_irec	*del)
4974{
4975	struct xfs_mount	*mp = ip->i_mount;
4976	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
4977	struct xfs_bmbt_irec	new;
4978	xfs_fileoff_t		del_endoff, got_endoff;
4979	int			state = BMAP_COWFORK;
4980
4981	XFS_STATS_INC(mp, xs_del_exlist);
4982
4983	del_endoff = del->br_startoff + del->br_blockcount;
4984	got_endoff = got->br_startoff + got->br_blockcount;
4985
4986	ASSERT(del->br_blockcount > 0);
4987	ASSERT(got->br_startoff <= del->br_startoff);
4988	ASSERT(got_endoff >= del_endoff);
4989	ASSERT(!isnullstartblock(got->br_startblock));
4990
4991	if (got->br_startoff == del->br_startoff)
4992		state |= BMAP_LEFT_FILLING;
4993	if (got_endoff == del_endoff)
4994		state |= BMAP_RIGHT_FILLING;
4995
4996	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4997	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4998		/*
4999		 * Matches the whole extent.  Delete the entry.
5000		 */
5001		xfs_iext_remove(ip, icur, state);
5002		xfs_iext_prev(ifp, icur);
5003		break;
5004	case BMAP_LEFT_FILLING:
5005		/*
5006		 * Deleting the first part of the extent.
5007		 */
5008		got->br_startoff = del_endoff;
5009		got->br_blockcount -= del->br_blockcount;
5010		got->br_startblock = del->br_startblock + del->br_blockcount;
5011		xfs_iext_update_extent(ip, state, icur, got);
5012		break;
5013	case BMAP_RIGHT_FILLING:
5014		/*
5015		 * Deleting the last part of the extent.
5016		 */
5017		got->br_blockcount -= del->br_blockcount;
5018		xfs_iext_update_extent(ip, state, icur, got);
5019		break;
5020	case 0:
5021		/*
5022		 * Deleting the middle of the extent.
5023		 */
5024		got->br_blockcount = del->br_startoff - got->br_startoff;
5025
5026		new.br_startoff = del_endoff;
5027		new.br_blockcount = got_endoff - del_endoff;
5028		new.br_state = got->br_state;
5029		new.br_startblock = del->br_startblock + del->br_blockcount;
5030
5031		xfs_iext_update_extent(ip, state, icur, got);
5032		xfs_iext_next(ifp, icur);
5033		xfs_iext_insert(ip, icur, &new, state);
5034		break;
5035	}
5036	ip->i_delayed_blks -= del->br_blockcount;
5037}
5038
5039/*
5040 * Called by xfs_bmapi to update file extent records and the btree
5041 * after removing space.
5042 */
5043STATIC int				/* error */
5044xfs_bmap_del_extent_real(
5045	xfs_inode_t		*ip,	/* incore inode pointer */
5046	xfs_trans_t		*tp,	/* current transaction pointer */
5047	struct xfs_iext_cursor	*icur,
5048	xfs_btree_cur_t		*cur,	/* if null, not a btree */
5049	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
5050	int			*logflagsp, /* inode logging flags */
5051	int			whichfork, /* data or attr fork */
5052	int			bflags)	/* bmapi flags */
5053{
5054	xfs_fsblock_t		del_endblock=0;	/* first block past del */
5055	xfs_fileoff_t		del_endoff;	/* first offset past del */
5056	int			do_fx;	/* free extent at end of routine */
5057	int			error;	/* error return value */
5058	int			flags = 0;/* inode logging flags */
5059	struct xfs_bmbt_irec	got;	/* current extent entry */
5060	xfs_fileoff_t		got_endoff;	/* first offset past got */
5061	int			i;	/* temp state */
5062	struct xfs_ifork	*ifp;	/* inode fork pointer */
5063	xfs_mount_t		*mp;	/* mount structure */
5064	xfs_filblks_t		nblks;	/* quota/sb block count */
5065	xfs_bmbt_irec_t		new;	/* new record to be inserted */
5066	/* REFERENCED */
5067	uint			qfield;	/* quota field to update */
5068	int			state = xfs_bmap_fork_to_state(whichfork);
5069	struct xfs_bmbt_irec	old;
5070
 
 
5071	mp = ip->i_mount;
5072	XFS_STATS_INC(mp, xs_del_exlist);
5073
5074	ifp = XFS_IFORK_PTR(ip, whichfork);
5075	ASSERT(del->br_blockcount > 0);
5076	xfs_iext_get_extent(ifp, icur, &got);
5077	ASSERT(got.br_startoff <= del->br_startoff);
5078	del_endoff = del->br_startoff + del->br_blockcount;
5079	got_endoff = got.br_startoff + got.br_blockcount;
5080	ASSERT(got_endoff >= del_endoff);
5081	ASSERT(!isnullstartblock(got.br_startblock));
5082	qfield = 0;
5083	error = 0;
5084
5085	/*
5086	 * If it's the case where the directory code is running with no block
5087	 * reservation, and the deleted block is in the middle of its extent,
5088	 * and the resulting insert of an extent would cause transformation to
5089	 * btree format, then reject it.  The calling code will then swap blocks
5090	 * around instead.  We have to do this now, rather than waiting for the
5091	 * conversion to btree format, since the transaction will be dirty then.
5092	 */
5093	if (tp->t_blk_res == 0 &&
5094	    ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
5095	    ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
5096	    del->br_startoff > got.br_startoff && del_endoff < got_endoff)
5097		return -ENOSPC;
5098
5099	flags = XFS_ILOG_CORE;
5100	if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
5101		xfs_filblks_t	len;
5102		xfs_extlen_t	mod;
5103
5104		len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
5105				  &mod);
5106		ASSERT(mod == 0);
5107
5108		if (!(bflags & XFS_BMAPI_REMAP)) {
5109			xfs_fsblock_t	bno;
5110
5111			bno = div_u64_rem(del->br_startblock,
5112					mp->m_sb.sb_rextsize, &mod);
5113			ASSERT(mod == 0);
5114
5115			error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
5116			if (error)
5117				goto done;
5118		}
5119
5120		do_fx = 0;
5121		nblks = len * mp->m_sb.sb_rextsize;
5122		qfield = XFS_TRANS_DQ_RTBCOUNT;
5123	} else {
5124		do_fx = 1;
5125		nblks = del->br_blockcount;
5126		qfield = XFS_TRANS_DQ_BCOUNT;
5127	}
 
5128
5129	del_endblock = del->br_startblock + del->br_blockcount;
5130	if (cur) {
5131		error = xfs_bmbt_lookup_eq(cur, &got, &i);
5132		if (error)
5133			goto done;
5134		if (XFS_IS_CORRUPT(mp, i != 1)) {
5135			error = -EFSCORRUPTED;
5136			goto done;
5137		}
5138	}
5139
5140	if (got.br_startoff == del->br_startoff)
5141		state |= BMAP_LEFT_FILLING;
5142	if (got_endoff == del_endoff)
5143		state |= BMAP_RIGHT_FILLING;
5144
5145	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5146	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5147		/*
5148		 * Matches the whole extent.  Delete the entry.
5149		 */
5150		xfs_iext_remove(ip, icur, state);
5151		xfs_iext_prev(ifp, icur);
5152		ifp->if_nextents--;
5153
5154		flags |= XFS_ILOG_CORE;
5155		if (!cur) {
5156			flags |= xfs_ilog_fext(whichfork);
5157			break;
5158		}
5159		if ((error = xfs_btree_delete(cur, &i)))
5160			goto done;
5161		if (XFS_IS_CORRUPT(mp, i != 1)) {
5162			error = -EFSCORRUPTED;
5163			goto done;
5164		}
5165		break;
5166	case BMAP_LEFT_FILLING:
5167		/*
5168		 * Deleting the first part of the extent.
5169		 */
5170		got.br_startoff = del_endoff;
5171		got.br_startblock = del_endblock;
5172		got.br_blockcount -= del->br_blockcount;
5173		xfs_iext_update_extent(ip, state, icur, &got);
5174		if (!cur) {
5175			flags |= xfs_ilog_fext(whichfork);
5176			break;
5177		}
5178		error = xfs_bmbt_update(cur, &got);
5179		if (error)
5180			goto done;
5181		break;
5182	case BMAP_RIGHT_FILLING:
5183		/*
5184		 * Deleting the last part of the extent.
5185		 */
5186		got.br_blockcount -= del->br_blockcount;
5187		xfs_iext_update_extent(ip, state, icur, &got);
5188		if (!cur) {
5189			flags |= xfs_ilog_fext(whichfork);
5190			break;
5191		}
5192		error = xfs_bmbt_update(cur, &got);
5193		if (error)
5194			goto done;
5195		break;
5196	case 0:
5197		/*
5198		 * Deleting the middle of the extent.
5199		 */
5200
5201		/*
5202		 * For directories, -ENOSPC is returned since a directory entry
5203		 * remove operation must not fail due to low extent count
5204		 * availability. -ENOSPC will be handled by higher layers of XFS
5205		 * by letting the corresponding empty Data/Free blocks to linger
5206		 * until a future remove operation. Dabtree blocks would be
5207		 * swapped with the last block in the leaf space and then the
5208		 * new last block will be unmapped.
5209		 *
5210		 * The above logic also applies to the source directory entry of
5211		 * a rename operation.
5212		 */
5213		error = xfs_iext_count_may_overflow(ip, whichfork, 1);
5214		if (error) {
5215			ASSERT(S_ISDIR(VFS_I(ip)->i_mode) &&
5216				whichfork == XFS_DATA_FORK);
5217			error = -ENOSPC;
5218			goto done;
5219		}
5220
5221		old = got;
5222
5223		got.br_blockcount = del->br_startoff - got.br_startoff;
5224		xfs_iext_update_extent(ip, state, icur, &got);
5225
5226		new.br_startoff = del_endoff;
5227		new.br_blockcount = got_endoff - del_endoff;
5228		new.br_state = got.br_state;
5229		new.br_startblock = del_endblock;
5230
5231		flags |= XFS_ILOG_CORE;
5232		if (cur) {
5233			error = xfs_bmbt_update(cur, &got);
5234			if (error)
5235				goto done;
5236			error = xfs_btree_increment(cur, 0, &i);
5237			if (error)
5238				goto done;
5239			cur->bc_rec.b = new;
5240			error = xfs_btree_insert(cur, &i);
5241			if (error && error != -ENOSPC)
5242				goto done;
5243			/*
5244			 * If get no-space back from btree insert, it tried a
5245			 * split, and we have a zero block reservation.  Fix up
5246			 * our state and return the error.
5247			 */
5248			if (error == -ENOSPC) {
5249				/*
5250				 * Reset the cursor, don't trust it after any
5251				 * insert operation.
5252				 */
5253				error = xfs_bmbt_lookup_eq(cur, &got, &i);
5254				if (error)
5255					goto done;
5256				if (XFS_IS_CORRUPT(mp, i != 1)) {
5257					error = -EFSCORRUPTED;
5258					goto done;
5259				}
5260				/*
5261				 * Update the btree record back
5262				 * to the original value.
5263				 */
5264				error = xfs_bmbt_update(cur, &old);
5265				if (error)
5266					goto done;
5267				/*
5268				 * Reset the extent record back
5269				 * to the original value.
5270				 */
5271				xfs_iext_update_extent(ip, state, icur, &old);
5272				flags = 0;
5273				error = -ENOSPC;
5274				goto done;
5275			}
5276			if (XFS_IS_CORRUPT(mp, i != 1)) {
5277				error = -EFSCORRUPTED;
5278				goto done;
5279			}
5280		} else
5281			flags |= xfs_ilog_fext(whichfork);
5282
5283		ifp->if_nextents++;
5284		xfs_iext_next(ifp, icur);
5285		xfs_iext_insert(ip, icur, &new, state);
5286		break;
5287	}
5288
5289	/* remove reverse mapping */
5290	xfs_rmap_unmap_extent(tp, ip, whichfork, del);
5291
5292	/*
5293	 * If we need to, add to list of extents to delete.
5294	 */
5295	if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5296		if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5297			xfs_refcount_decrease_extent(tp, del);
5298		} else {
5299			__xfs_bmap_add_free(tp, del->br_startblock,
5300					del->br_blockcount, NULL,
5301					(bflags & XFS_BMAPI_NODISCARD) ||
5302					del->br_state == XFS_EXT_UNWRITTEN);
 
 
 
5303		}
5304	}
5305
5306	/*
5307	 * Adjust inode # blocks in the file.
5308	 */
5309	if (nblks)
5310		ip->i_nblocks -= nblks;
5311	/*
5312	 * Adjust quota data.
5313	 */
5314	if (qfield && !(bflags & XFS_BMAPI_REMAP))
5315		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5316
5317done:
5318	*logflagsp = flags;
5319	return error;
5320}
5321
5322/*
5323 * Unmap (remove) blocks from a file.
5324 * If nexts is nonzero then the number of extents to remove is limited to
5325 * that value.  If not all extents in the block range can be removed then
5326 * *done is set.
5327 */
5328int						/* error */
5329__xfs_bunmapi(
5330	struct xfs_trans	*tp,		/* transaction pointer */
5331	struct xfs_inode	*ip,		/* incore inode */
5332	xfs_fileoff_t		start,		/* first file offset deleted */
5333	xfs_filblks_t		*rlen,		/* i/o: amount remaining */
5334	int			flags,		/* misc flags */
5335	xfs_extnum_t		nexts)		/* number of extents max */
5336{
5337	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
5338	struct xfs_bmbt_irec	del;		/* extent being deleted */
5339	int			error;		/* error return value */
5340	xfs_extnum_t		extno;		/* extent number in list */
5341	struct xfs_bmbt_irec	got;		/* current extent record */
5342	struct xfs_ifork	*ifp;		/* inode fork pointer */
5343	int			isrt;		/* freeing in rt area */
5344	int			logflags;	/* transaction logging flags */
5345	xfs_extlen_t		mod;		/* rt extent offset */
5346	struct xfs_mount	*mp = ip->i_mount;
5347	int			tmp_logflags;	/* partial logging flags */
5348	int			wasdel;		/* was a delayed alloc extent */
5349	int			whichfork;	/* data or attribute fork */
5350	xfs_fsblock_t		sum;
5351	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
5352	xfs_fileoff_t		max_len;
5353	xfs_fileoff_t		end;
5354	struct xfs_iext_cursor	icur;
5355	bool			done = false;
5356
5357	trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5358
5359	whichfork = xfs_bmapi_whichfork(flags);
5360	ASSERT(whichfork != XFS_COW_FORK);
5361	ifp = XFS_IFORK_PTR(ip, whichfork);
5362	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)))
 
5363		return -EFSCORRUPTED;
5364	if (XFS_FORCED_SHUTDOWN(mp))
 
5365		return -EIO;
5366
5367	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5368	ASSERT(len > 0);
5369	ASSERT(nexts >= 0);
5370
5371	/*
5372	 * Guesstimate how many blocks we can unmap without running the risk of
5373	 * blowing out the transaction with a mix of EFIs and reflink
5374	 * adjustments.
5375	 */
5376	if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5377		max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5378	else
5379		max_len = len;
5380
5381	error = xfs_iread_extents(tp, ip, whichfork);
5382	if (error)
5383		return error;
5384
5385	if (xfs_iext_count(ifp) == 0) {
5386		*rlen = 0;
5387		return 0;
5388	}
5389	XFS_STATS_INC(mp, xs_blk_unmap);
5390	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5391	end = start + len;
5392
5393	if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5394		*rlen = 0;
5395		return 0;
5396	}
5397	end--;
5398
5399	logflags = 0;
5400	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
5401		ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
5402		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5403		cur->bc_ino.flags = 0;
5404	} else
5405		cur = NULL;
5406
5407	if (isrt) {
5408		/*
5409		 * Synchronize by locking the bitmap inode.
5410		 */
5411		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5412		xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5413		xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5414		xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5415	}
5416
5417	extno = 0;
5418	while (end != (xfs_fileoff_t)-1 && end >= start &&
5419	       (nexts == 0 || extno < nexts) && max_len > 0) {
5420		/*
5421		 * Is the found extent after a hole in which end lives?
5422		 * Just back up to the previous extent, if so.
5423		 */
5424		if (got.br_startoff > end &&
5425		    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5426			done = true;
5427			break;
5428		}
5429		/*
5430		 * Is the last block of this extent before the range
5431		 * we're supposed to delete?  If so, we're done.
5432		 */
5433		end = XFS_FILEOFF_MIN(end,
5434			got.br_startoff + got.br_blockcount - 1);
5435		if (end < start)
5436			break;
5437		/*
5438		 * Then deal with the (possibly delayed) allocated space
5439		 * we found.
5440		 */
5441		del = got;
5442		wasdel = isnullstartblock(del.br_startblock);
5443
5444		if (got.br_startoff < start) {
5445			del.br_startoff = start;
5446			del.br_blockcount -= start - got.br_startoff;
5447			if (!wasdel)
5448				del.br_startblock += start - got.br_startoff;
5449		}
5450		if (del.br_startoff + del.br_blockcount > end + 1)
5451			del.br_blockcount = end + 1 - del.br_startoff;
5452
5453		/* How much can we safely unmap? */
5454		if (max_len < del.br_blockcount) {
5455			del.br_startoff += del.br_blockcount - max_len;
5456			if (!wasdel)
5457				del.br_startblock += del.br_blockcount - max_len;
5458			del.br_blockcount = max_len;
5459		}
5460
5461		if (!isrt)
5462			goto delete;
5463
5464		sum = del.br_startblock + del.br_blockcount;
5465		div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod);
5466		if (mod) {
5467			/*
5468			 * Realtime extent not lined up at the end.
5469			 * The extent could have been split into written
5470			 * and unwritten pieces, or we could just be
5471			 * unmapping part of it.  But we can't really
5472			 * get rid of part of a realtime extent.
5473			 */
5474			if (del.br_state == XFS_EXT_UNWRITTEN) {
5475				/*
5476				 * This piece is unwritten, or we're not
5477				 * using unwritten extents.  Skip over it.
5478				 */
5479				ASSERT(end >= mod);
5480				end -= mod > del.br_blockcount ?
5481					del.br_blockcount : mod;
5482				if (end < got.br_startoff &&
5483				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5484					done = true;
5485					break;
5486				}
5487				continue;
5488			}
5489			/*
5490			 * It's written, turn it unwritten.
5491			 * This is better than zeroing it.
5492			 */
5493			ASSERT(del.br_state == XFS_EXT_NORM);
5494			ASSERT(tp->t_blk_res > 0);
5495			/*
5496			 * If this spans a realtime extent boundary,
5497			 * chop it back to the start of the one we end at.
5498			 */
5499			if (del.br_blockcount > mod) {
5500				del.br_startoff += del.br_blockcount - mod;
5501				del.br_startblock += del.br_blockcount - mod;
5502				del.br_blockcount = mod;
5503			}
5504			del.br_state = XFS_EXT_UNWRITTEN;
5505			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5506					whichfork, &icur, &cur, &del,
5507					&logflags);
5508			if (error)
5509				goto error0;
5510			goto nodelete;
5511		}
5512		div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
 
5513		if (mod) {
5514			xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
5515
5516			/*
5517			 * Realtime extent is lined up at the end but not
5518			 * at the front.  We'll get rid of full extents if
5519			 * we can.
5520			 */
5521			if (del.br_blockcount > off) {
5522				del.br_blockcount -= off;
5523				del.br_startoff += off;
5524				del.br_startblock += off;
5525			} else if (del.br_startoff == start &&
5526				   (del.br_state == XFS_EXT_UNWRITTEN ||
5527				    tp->t_blk_res == 0)) {
5528				/*
5529				 * Can't make it unwritten.  There isn't
5530				 * a full extent here so just skip it.
5531				 */
5532				ASSERT(end >= del.br_blockcount);
5533				end -= del.br_blockcount;
5534				if (got.br_startoff > end &&
5535				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5536					done = true;
5537					break;
5538				}
5539				continue;
5540			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
5541				struct xfs_bmbt_irec	prev;
5542				xfs_fileoff_t		unwrite_start;
5543
5544				/*
5545				 * This one is already unwritten.
5546				 * It must have a written left neighbor.
5547				 * Unwrite the killed part of that one and
5548				 * try again.
5549				 */
5550				if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5551					ASSERT(0);
5552				ASSERT(prev.br_state == XFS_EXT_NORM);
5553				ASSERT(!isnullstartblock(prev.br_startblock));
5554				ASSERT(del.br_startblock ==
5555				       prev.br_startblock + prev.br_blockcount);
5556				unwrite_start = max3(start,
5557						     del.br_startoff - mod,
5558						     prev.br_startoff);
5559				mod = unwrite_start - prev.br_startoff;
5560				prev.br_startoff = unwrite_start;
5561				prev.br_startblock += mod;
5562				prev.br_blockcount -= mod;
5563				prev.br_state = XFS_EXT_UNWRITTEN;
5564				error = xfs_bmap_add_extent_unwritten_real(tp,
5565						ip, whichfork, &icur, &cur,
5566						&prev, &logflags);
5567				if (error)
5568					goto error0;
5569				goto nodelete;
5570			} else {
5571				ASSERT(del.br_state == XFS_EXT_NORM);
5572				del.br_state = XFS_EXT_UNWRITTEN;
5573				error = xfs_bmap_add_extent_unwritten_real(tp,
5574						ip, whichfork, &icur, &cur,
5575						&del, &logflags);
5576				if (error)
5577					goto error0;
5578				goto nodelete;
5579			}
5580		}
5581
5582delete:
5583		if (wasdel) {
5584			error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
5585					&got, &del);
5586		} else {
5587			error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
5588					&del, &tmp_logflags, whichfork,
5589					flags);
5590			logflags |= tmp_logflags;
5591		}
5592
5593		if (error)
5594			goto error0;
5595
5596		max_len -= del.br_blockcount;
5597		end = del.br_startoff - 1;
5598nodelete:
5599		/*
5600		 * If not done go on to the next (previous) record.
5601		 */
5602		if (end != (xfs_fileoff_t)-1 && end >= start) {
5603			if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5604			    (got.br_startoff > end &&
5605			     !xfs_iext_prev_extent(ifp, &icur, &got))) {
5606				done = true;
5607				break;
5608			}
5609			extno++;
5610		}
5611	}
5612	if (done || end == (xfs_fileoff_t)-1 || end < start)
5613		*rlen = 0;
5614	else
5615		*rlen = end - start + 1;
5616
5617	/*
5618	 * Convert to a btree if necessary.
5619	 */
5620	if (xfs_bmap_needs_btree(ip, whichfork)) {
5621		ASSERT(cur == NULL);
5622		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
5623				&tmp_logflags, whichfork);
5624		logflags |= tmp_logflags;
5625	} else {
5626		error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
5627			whichfork);
5628	}
5629
5630error0:
5631	/*
5632	 * Log everything.  Do this after conversion, there's no point in
5633	 * logging the extent records if we've converted to btree format.
5634	 */
5635	if ((logflags & xfs_ilog_fext(whichfork)) &&
5636	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
5637		logflags &= ~xfs_ilog_fext(whichfork);
5638	else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5639		 ifp->if_format != XFS_DINODE_FMT_BTREE)
5640		logflags &= ~xfs_ilog_fbroot(whichfork);
5641	/*
5642	 * Log inode even in the error case, if the transaction
5643	 * is dirty we'll need to shut down the filesystem.
5644	 */
5645	if (logflags)
5646		xfs_trans_log_inode(tp, ip, logflags);
5647	if (cur) {
5648		if (!error)
5649			cur->bc_ino.allocated = 0;
5650		xfs_btree_del_cursor(cur, error);
5651	}
5652	return error;
5653}
5654
5655/* Unmap a range of a file. */
5656int
5657xfs_bunmapi(
5658	xfs_trans_t		*tp,
5659	struct xfs_inode	*ip,
5660	xfs_fileoff_t		bno,
5661	xfs_filblks_t		len,
5662	int			flags,
5663	xfs_extnum_t		nexts,
5664	int			*done)
5665{
5666	int			error;
5667
5668	error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
5669	*done = (len == 0);
5670	return error;
5671}
5672
5673/*
5674 * Determine whether an extent shift can be accomplished by a merge with the
5675 * extent that precedes the target hole of the shift.
5676 */
5677STATIC bool
5678xfs_bmse_can_merge(
5679	struct xfs_bmbt_irec	*left,	/* preceding extent */
5680	struct xfs_bmbt_irec	*got,	/* current extent to shift */
5681	xfs_fileoff_t		shift)	/* shift fsb */
5682{
5683	xfs_fileoff_t		startoff;
5684
5685	startoff = got->br_startoff - shift;
5686
5687	/*
5688	 * The extent, once shifted, must be adjacent in-file and on-disk with
5689	 * the preceding extent.
5690	 */
5691	if ((left->br_startoff + left->br_blockcount != startoff) ||
5692	    (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5693	    (left->br_state != got->br_state) ||
5694	    (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5695		return false;
5696
5697	return true;
5698}
5699
5700/*
5701 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5702 * hole in the file. If an extent shift would result in the extent being fully
5703 * adjacent to the extent that currently precedes the hole, we can merge with
5704 * the preceding extent rather than do the shift.
5705 *
5706 * This function assumes the caller has verified a shift-by-merge is possible
5707 * with the provided extents via xfs_bmse_can_merge().
5708 */
5709STATIC int
5710xfs_bmse_merge(
5711	struct xfs_trans		*tp,
5712	struct xfs_inode		*ip,
5713	int				whichfork,
5714	xfs_fileoff_t			shift,		/* shift fsb */
5715	struct xfs_iext_cursor		*icur,
5716	struct xfs_bmbt_irec		*got,		/* extent to shift */
5717	struct xfs_bmbt_irec		*left,		/* preceding extent */
5718	struct xfs_btree_cur		*cur,
5719	int				*logflags)	/* output */
5720{
5721	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
5722	struct xfs_bmbt_irec		new;
5723	xfs_filblks_t			blockcount;
5724	int				error, i;
5725	struct xfs_mount		*mp = ip->i_mount;
5726
5727	blockcount = left->br_blockcount + got->br_blockcount;
5728
5729	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5730	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5731	ASSERT(xfs_bmse_can_merge(left, got, shift));
5732
5733	new = *left;
5734	new.br_blockcount = blockcount;
5735
5736	/*
5737	 * Update the on-disk extent count, the btree if necessary and log the
5738	 * inode.
5739	 */
5740	ifp->if_nextents--;
5741	*logflags |= XFS_ILOG_CORE;
5742	if (!cur) {
5743		*logflags |= XFS_ILOG_DEXT;
5744		goto done;
5745	}
5746
5747	/* lookup and remove the extent to merge */
5748	error = xfs_bmbt_lookup_eq(cur, got, &i);
5749	if (error)
5750		return error;
5751	if (XFS_IS_CORRUPT(mp, i != 1))
 
5752		return -EFSCORRUPTED;
 
5753
5754	error = xfs_btree_delete(cur, &i);
5755	if (error)
5756		return error;
5757	if (XFS_IS_CORRUPT(mp, i != 1))
 
5758		return -EFSCORRUPTED;
 
5759
5760	/* lookup and update size of the previous extent */
5761	error = xfs_bmbt_lookup_eq(cur, left, &i);
5762	if (error)
5763		return error;
5764	if (XFS_IS_CORRUPT(mp, i != 1))
 
5765		return -EFSCORRUPTED;
 
5766
5767	error = xfs_bmbt_update(cur, &new);
5768	if (error)
5769		return error;
5770
5771	/* change to extent format if required after extent removal */
5772	error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
5773	if (error)
5774		return error;
5775
5776done:
5777	xfs_iext_remove(ip, icur, 0);
5778	xfs_iext_prev(ifp, icur);
5779	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5780			&new);
5781
5782	/* update reverse mapping. rmap functions merge the rmaps for us */
5783	xfs_rmap_unmap_extent(tp, ip, whichfork, got);
5784	memcpy(&new, got, sizeof(new));
5785	new.br_startoff = left->br_startoff + left->br_blockcount;
5786	xfs_rmap_map_extent(tp, ip, whichfork, &new);
5787	return 0;
5788}
5789
5790static int
5791xfs_bmap_shift_update_extent(
5792	struct xfs_trans	*tp,
5793	struct xfs_inode	*ip,
5794	int			whichfork,
5795	struct xfs_iext_cursor	*icur,
5796	struct xfs_bmbt_irec	*got,
5797	struct xfs_btree_cur	*cur,
5798	int			*logflags,
5799	xfs_fileoff_t		startoff)
5800{
5801	struct xfs_mount	*mp = ip->i_mount;
5802	struct xfs_bmbt_irec	prev = *got;
5803	int			error, i;
5804
5805	*logflags |= XFS_ILOG_CORE;
5806
5807	got->br_startoff = startoff;
5808
5809	if (cur) {
5810		error = xfs_bmbt_lookup_eq(cur, &prev, &i);
5811		if (error)
5812			return error;
5813		if (XFS_IS_CORRUPT(mp, i != 1))
 
5814			return -EFSCORRUPTED;
 
5815
5816		error = xfs_bmbt_update(cur, got);
5817		if (error)
5818			return error;
5819	} else {
5820		*logflags |= XFS_ILOG_DEXT;
5821	}
5822
5823	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5824			got);
5825
5826	/* update reverse mapping */
5827	xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
5828	xfs_rmap_map_extent(tp, ip, whichfork, got);
5829	return 0;
5830}
5831
5832int
5833xfs_bmap_collapse_extents(
5834	struct xfs_trans	*tp,
5835	struct xfs_inode	*ip,
5836	xfs_fileoff_t		*next_fsb,
5837	xfs_fileoff_t		offset_shift_fsb,
5838	bool			*done)
5839{
5840	int			whichfork = XFS_DATA_FORK;
5841	struct xfs_mount	*mp = ip->i_mount;
5842	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
5843	struct xfs_btree_cur	*cur = NULL;
5844	struct xfs_bmbt_irec	got, prev;
5845	struct xfs_iext_cursor	icur;
5846	xfs_fileoff_t		new_startoff;
5847	int			error = 0;
5848	int			logflags = 0;
5849
5850	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5851	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
 
5852		return -EFSCORRUPTED;
5853	}
5854
5855	if (XFS_FORCED_SHUTDOWN(mp))
5856		return -EIO;
5857
5858	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5859
5860	error = xfs_iread_extents(tp, ip, whichfork);
5861	if (error)
5862		return error;
5863
5864	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
5865		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5866		cur->bc_ino.flags = 0;
5867	}
5868
5869	if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5870		*done = true;
5871		goto del_cursor;
5872	}
5873	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
 
5874		error = -EFSCORRUPTED;
5875		goto del_cursor;
5876	}
5877
5878	new_startoff = got.br_startoff - offset_shift_fsb;
5879	if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
5880		if (new_startoff < prev.br_startoff + prev.br_blockcount) {
5881			error = -EINVAL;
5882			goto del_cursor;
5883		}
5884
5885		if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5886			error = xfs_bmse_merge(tp, ip, whichfork,
5887					offset_shift_fsb, &icur, &got, &prev,
5888					cur, &logflags);
5889			if (error)
5890				goto del_cursor;
5891			goto done;
5892		}
5893	} else {
5894		if (got.br_startoff < offset_shift_fsb) {
5895			error = -EINVAL;
5896			goto del_cursor;
5897		}
5898	}
5899
5900	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5901			cur, &logflags, new_startoff);
5902	if (error)
5903		goto del_cursor;
5904
5905done:
5906	if (!xfs_iext_next_extent(ifp, &icur, &got)) {
5907		*done = true;
5908		goto del_cursor;
5909	}
5910
5911	*next_fsb = got.br_startoff;
5912del_cursor:
5913	if (cur)
5914		xfs_btree_del_cursor(cur, error);
5915	if (logflags)
5916		xfs_trans_log_inode(tp, ip, logflags);
5917	return error;
5918}
5919
5920/* Make sure we won't be right-shifting an extent past the maximum bound. */
5921int
5922xfs_bmap_can_insert_extents(
5923	struct xfs_inode	*ip,
5924	xfs_fileoff_t		off,
5925	xfs_fileoff_t		shift)
5926{
5927	struct xfs_bmbt_irec	got;
5928	int			is_empty;
5929	int			error = 0;
5930
5931	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5932
5933	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
5934		return -EIO;
5935
5936	xfs_ilock(ip, XFS_ILOCK_EXCL);
5937	error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
5938	if (!error && !is_empty && got.br_startoff >= off &&
5939	    ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
5940		error = -EINVAL;
5941	xfs_iunlock(ip, XFS_ILOCK_EXCL);
5942
5943	return error;
5944}
5945
5946int
5947xfs_bmap_insert_extents(
5948	struct xfs_trans	*tp,
5949	struct xfs_inode	*ip,
5950	xfs_fileoff_t		*next_fsb,
5951	xfs_fileoff_t		offset_shift_fsb,
5952	bool			*done,
5953	xfs_fileoff_t		stop_fsb)
5954{
5955	int			whichfork = XFS_DATA_FORK;
5956	struct xfs_mount	*mp = ip->i_mount;
5957	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
5958	struct xfs_btree_cur	*cur = NULL;
5959	struct xfs_bmbt_irec	got, next;
5960	struct xfs_iext_cursor	icur;
5961	xfs_fileoff_t		new_startoff;
5962	int			error = 0;
5963	int			logflags = 0;
5964
5965	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5966	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
 
5967		return -EFSCORRUPTED;
5968	}
5969
5970	if (XFS_FORCED_SHUTDOWN(mp))
5971		return -EIO;
5972
5973	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5974
5975	error = xfs_iread_extents(tp, ip, whichfork);
5976	if (error)
5977		return error;
5978
5979	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
5980		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5981		cur->bc_ino.flags = 0;
5982	}
5983
5984	if (*next_fsb == NULLFSBLOCK) {
5985		xfs_iext_last(ifp, &icur);
5986		if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5987		    stop_fsb > got.br_startoff) {
5988			*done = true;
5989			goto del_cursor;
5990		}
5991	} else {
5992		if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5993			*done = true;
5994			goto del_cursor;
5995		}
5996	}
5997	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
 
5998		error = -EFSCORRUPTED;
5999		goto del_cursor;
6000	}
6001
6002	if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
 
6003		error = -EFSCORRUPTED;
6004		goto del_cursor;
6005	}
6006
6007	new_startoff = got.br_startoff + offset_shift_fsb;
6008	if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
6009		if (new_startoff + got.br_blockcount > next.br_startoff) {
6010			error = -EINVAL;
6011			goto del_cursor;
6012		}
6013
6014		/*
6015		 * Unlike a left shift (which involves a hole punch), a right
6016		 * shift does not modify extent neighbors in any way.  We should
6017		 * never find mergeable extents in this scenario.  Check anyways
6018		 * and warn if we encounter two extents that could be one.
6019		 */
6020		if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
6021			WARN_ON_ONCE(1);
6022	}
6023
6024	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
6025			cur, &logflags, new_startoff);
6026	if (error)
6027		goto del_cursor;
6028
6029	if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
6030	    stop_fsb >= got.br_startoff + got.br_blockcount) {
6031		*done = true;
6032		goto del_cursor;
6033	}
6034
6035	*next_fsb = got.br_startoff;
6036del_cursor:
6037	if (cur)
6038		xfs_btree_del_cursor(cur, error);
6039	if (logflags)
6040		xfs_trans_log_inode(tp, ip, logflags);
6041	return error;
6042}
6043
6044/*
6045 * Splits an extent into two extents at split_fsb block such that it is the
6046 * first block of the current_ext. @ext is a target extent to be split.
6047 * @split_fsb is a block where the extents is split.  If split_fsb lies in a
6048 * hole or the first block of extents, just return 0.
6049 */
6050int
6051xfs_bmap_split_extent(
6052	struct xfs_trans	*tp,
6053	struct xfs_inode	*ip,
6054	xfs_fileoff_t		split_fsb)
6055{
6056	int				whichfork = XFS_DATA_FORK;
6057	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
6058	struct xfs_btree_cur		*cur = NULL;
6059	struct xfs_bmbt_irec		got;
6060	struct xfs_bmbt_irec		new; /* split extent */
6061	struct xfs_mount		*mp = ip->i_mount;
6062	xfs_fsblock_t			gotblkcnt; /* new block count for got */
6063	struct xfs_iext_cursor		icur;
6064	int				error = 0;
6065	int				logflags = 0;
6066	int				i = 0;
6067
6068	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
6069	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
 
6070		return -EFSCORRUPTED;
6071	}
6072
6073	if (XFS_FORCED_SHUTDOWN(mp))
6074		return -EIO;
6075
6076	/* Read in all the extents */
6077	error = xfs_iread_extents(tp, ip, whichfork);
6078	if (error)
6079		return error;
6080
6081	/*
6082	 * If there are not extents, or split_fsb lies in a hole we are done.
6083	 */
6084	if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
6085	    got.br_startoff >= split_fsb)
6086		return 0;
6087
6088	gotblkcnt = split_fsb - got.br_startoff;
6089	new.br_startoff = split_fsb;
6090	new.br_startblock = got.br_startblock + gotblkcnt;
6091	new.br_blockcount = got.br_blockcount - gotblkcnt;
6092	new.br_state = got.br_state;
6093
6094	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
6095		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
6096		cur->bc_ino.flags = 0;
6097		error = xfs_bmbt_lookup_eq(cur, &got, &i);
6098		if (error)
6099			goto del_cursor;
6100		if (XFS_IS_CORRUPT(mp, i != 1)) {
 
6101			error = -EFSCORRUPTED;
6102			goto del_cursor;
6103		}
6104	}
6105
6106	got.br_blockcount = gotblkcnt;
6107	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
6108			&got);
6109
6110	logflags = XFS_ILOG_CORE;
6111	if (cur) {
6112		error = xfs_bmbt_update(cur, &got);
6113		if (error)
6114			goto del_cursor;
6115	} else
6116		logflags |= XFS_ILOG_DEXT;
6117
6118	/* Add new extent */
6119	xfs_iext_next(ifp, &icur);
6120	xfs_iext_insert(ip, &icur, &new, 0);
6121	ifp->if_nextents++;
6122
6123	if (cur) {
6124		error = xfs_bmbt_lookup_eq(cur, &new, &i);
6125		if (error)
6126			goto del_cursor;
6127		if (XFS_IS_CORRUPT(mp, i != 0)) {
 
6128			error = -EFSCORRUPTED;
6129			goto del_cursor;
6130		}
6131		error = xfs_btree_insert(cur, &i);
6132		if (error)
6133			goto del_cursor;
6134		if (XFS_IS_CORRUPT(mp, i != 1)) {
 
6135			error = -EFSCORRUPTED;
6136			goto del_cursor;
6137		}
6138	}
6139
6140	/*
6141	 * Convert to a btree if necessary.
6142	 */
6143	if (xfs_bmap_needs_btree(ip, whichfork)) {
6144		int tmp_logflags; /* partial log flag return val */
6145
6146		ASSERT(cur == NULL);
6147		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
6148				&tmp_logflags, whichfork);
6149		logflags |= tmp_logflags;
6150	}
6151
6152del_cursor:
6153	if (cur) {
6154		cur->bc_ino.allocated = 0;
6155		xfs_btree_del_cursor(cur, error);
6156	}
6157
6158	if (logflags)
6159		xfs_trans_log_inode(tp, ip, logflags);
6160	return error;
6161}
6162
6163/* Deferred mapping is only for real extents in the data fork. */
6164static bool
6165xfs_bmap_is_update_needed(
6166	struct xfs_bmbt_irec	*bmap)
6167{
6168	return  bmap->br_startblock != HOLESTARTBLOCK &&
6169		bmap->br_startblock != DELAYSTARTBLOCK;
6170}
6171
6172/* Record a bmap intent. */
6173static int
6174__xfs_bmap_add(
6175	struct xfs_trans		*tp,
6176	enum xfs_bmap_intent_type	type,
6177	struct xfs_inode		*ip,
6178	int				whichfork,
6179	struct xfs_bmbt_irec		*bmap)
6180{
6181	struct xfs_bmap_intent		*bi;
6182
6183	trace_xfs_bmap_defer(tp->t_mountp,
6184			XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
6185			type,
6186			XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
6187			ip->i_ino, whichfork,
6188			bmap->br_startoff,
6189			bmap->br_blockcount,
6190			bmap->br_state);
6191
6192	bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS);
6193	INIT_LIST_HEAD(&bi->bi_list);
6194	bi->bi_type = type;
6195	bi->bi_owner = ip;
6196	bi->bi_whichfork = whichfork;
6197	bi->bi_bmap = *bmap;
6198
6199	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
6200	return 0;
6201}
6202
6203/* Map an extent into a file. */
6204void
6205xfs_bmap_map_extent(
6206	struct xfs_trans	*tp,
6207	struct xfs_inode	*ip,
 
6208	struct xfs_bmbt_irec	*PREV)
6209{
6210	if (!xfs_bmap_is_update_needed(PREV))
6211		return;
6212
6213	__xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
6214}
6215
6216/* Unmap an extent out of a file. */
6217void
6218xfs_bmap_unmap_extent(
6219	struct xfs_trans	*tp,
6220	struct xfs_inode	*ip,
 
6221	struct xfs_bmbt_irec	*PREV)
6222{
6223	if (!xfs_bmap_is_update_needed(PREV))
6224		return;
6225
6226	__xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
6227}
6228
6229/*
6230 * Process one of the deferred bmap operations.  We pass back the
6231 * btree cursor to maintain our lock on the bmapbt between calls.
6232 */
6233int
6234xfs_bmap_finish_one(
6235	struct xfs_trans		*tp,
6236	struct xfs_inode		*ip,
6237	enum xfs_bmap_intent_type	type,
6238	int				whichfork,
6239	xfs_fileoff_t			startoff,
6240	xfs_fsblock_t			startblock,
6241	xfs_filblks_t			*blockcount,
6242	xfs_exntst_t			state)
6243{
 
6244	int				error = 0;
 
6245
6246	ASSERT(tp->t_firstblock == NULLFSBLOCK);
 
6247
6248	trace_xfs_bmap_deferred(tp->t_mountp,
6249			XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
6250			XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
6251			ip->i_ino, whichfork, startoff, *blockcount, state);
6252
6253	if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
6254		return -EFSCORRUPTED;
6255
6256	if (XFS_TEST_ERROR(false, tp->t_mountp,
6257			XFS_ERRTAG_BMAP_FINISH_ONE))
6258		return -EIO;
6259
6260	switch (type) {
6261	case XFS_BMAP_MAP:
6262		error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
6263				startblock, 0);
6264		*blockcount = 0;
 
 
 
6265		break;
6266	case XFS_BMAP_UNMAP:
6267		error = __xfs_bunmapi(tp, ip, startoff, blockcount,
6268				XFS_BMAPI_REMAP, 1);
 
6269		break;
6270	default:
6271		ASSERT(0);
 
6272		error = -EFSCORRUPTED;
6273	}
6274
6275	return error;
6276}
6277
6278/* Check that an inode's extent does not have invalid flags or bad ranges. */
6279xfs_failaddr_t
6280xfs_bmap_validate_extent(
6281	struct xfs_inode	*ip,
 
6282	int			whichfork,
6283	struct xfs_bmbt_irec	*irec)
6284{
6285	struct xfs_mount	*mp = ip->i_mount;
6286
6287	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
6288		return __this_address;
6289
6290	if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) {
6291		if (!xfs_verify_rtext(mp, irec->br_startblock,
6292					  irec->br_blockcount))
6293			return __this_address;
6294	} else {
6295		if (!xfs_verify_fsbext(mp, irec->br_startblock,
6296					   irec->br_blockcount))
6297			return __this_address;
6298	}
6299	if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
6300		return __this_address;
6301	return NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6302}