Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_bit.h"
  13#include "xfs_sb.h"
  14#include "xfs_mount.h"
  15#include "xfs_defer.h"
  16#include "xfs_dir2.h"
  17#include "xfs_inode.h"
  18#include "xfs_btree.h"
  19#include "xfs_trans.h"
  20#include "xfs_alloc.h"
  21#include "xfs_bmap.h"
  22#include "xfs_bmap_util.h"
  23#include "xfs_bmap_btree.h"
  24#include "xfs_rtalloc.h"
  25#include "xfs_errortag.h"
  26#include "xfs_error.h"
  27#include "xfs_quota.h"
  28#include "xfs_trans_space.h"
  29#include "xfs_buf_item.h"
  30#include "xfs_trace.h"
  31#include "xfs_attr_leaf.h"
  32#include "xfs_filestream.h"
  33#include "xfs_rmap.h"
  34#include "xfs_ag_resv.h"
  35#include "xfs_refcount.h"
  36#include "xfs_icache.h"
  37#include "xfs_iomap.h"
  38
  39
  40kmem_zone_t		*xfs_bmap_free_item_zone;
  41
  42/*
  43 * Miscellaneous helper functions
  44 */
  45
  46/*
  47 * Compute and fill in the value of the maximum depth of a bmap btree
  48 * in this filesystem.  Done once, during mount.
  49 */
  50void
  51xfs_bmap_compute_maxlevels(
  52	xfs_mount_t	*mp,		/* file system mount structure */
  53	int		whichfork)	/* data or attr fork */
  54{
  55	int		level;		/* btree level */
  56	uint		maxblocks;	/* max blocks at this level */
  57	uint		maxleafents;	/* max leaf entries possible */
  58	int		maxrootrecs;	/* max records in root block */
  59	int		minleafrecs;	/* min records in leaf block */
  60	int		minnoderecs;	/* min records in node block */
  61	int		sz;		/* root block size */
  62
  63	/*
  64	 * The maximum number of extents in a file, hence the maximum number of
  65	 * leaf entries, is controlled by the size of the on-disk extent count,
  66	 * either a signed 32-bit number for the data fork, or a signed 16-bit
  67	 * number for the attr fork.
  68	 *
  69	 * Note that we can no longer assume that if we are in ATTR1 that
  70	 * the fork offset of all the inodes will be
  71	 * (xfs_default_attroffset(ip) >> 3) because we could have mounted
  72	 * with ATTR2 and then mounted back with ATTR1, keeping the
  73	 * di_forkoff's fixed but probably at various positions. Therefore,
  74	 * for both ATTR1 and ATTR2 we have to assume the worst case scenario
  75	 * of a minimum size available.
  76	 */
  77	if (whichfork == XFS_DATA_FORK) {
  78		maxleafents = MAXEXTNUM;
  79		sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
  80	} else {
  81		maxleafents = MAXAEXTNUM;
  82		sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
  83	}
  84	maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
  85	minleafrecs = mp->m_bmap_dmnr[0];
  86	minnoderecs = mp->m_bmap_dmnr[1];
  87	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
  88	for (level = 1; maxblocks > 1; level++) {
  89		if (maxblocks <= maxrootrecs)
  90			maxblocks = 1;
  91		else
  92			maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
  93	}
  94	mp->m_bm_maxlevels[whichfork] = level;
  95}
  96
  97STATIC int				/* error */
  98xfs_bmbt_lookup_eq(
  99	struct xfs_btree_cur	*cur,
 100	struct xfs_bmbt_irec	*irec,
 101	int			*stat)	/* success/failure */
 102{
 103	cur->bc_rec.b = *irec;
 104	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 105}
 106
 107STATIC int				/* error */
 108xfs_bmbt_lookup_first(
 109	struct xfs_btree_cur	*cur,
 110	int			*stat)	/* success/failure */
 111{
 112	cur->bc_rec.b.br_startoff = 0;
 113	cur->bc_rec.b.br_startblock = 0;
 114	cur->bc_rec.b.br_blockcount = 0;
 115	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 116}
 117
 118/*
 119 * Check if the inode needs to be converted to btree format.
 120 */
 121static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
 122{
 123	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
 124
 125	return whichfork != XFS_COW_FORK &&
 126		ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
 127		ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
 128}
 129
 130/*
 131 * Check if the inode should be converted to extent format.
 132 */
 133static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
 134{
 135	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
 136
 137	return whichfork != XFS_COW_FORK &&
 138		ifp->if_format == XFS_DINODE_FMT_BTREE &&
 139		ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
 140}
 141
 142/*
 143 * Update the record referred to by cur to the value given by irec
 144 * This either works (return 0) or gets an EFSCORRUPTED error.
 145 */
 146STATIC int
 147xfs_bmbt_update(
 148	struct xfs_btree_cur	*cur,
 149	struct xfs_bmbt_irec	*irec)
 150{
 151	union xfs_btree_rec	rec;
 152
 153	xfs_bmbt_disk_set_all(&rec.bmbt, irec);
 154	return xfs_btree_update(cur, &rec);
 155}
 156
 157/*
 158 * Compute the worst-case number of indirect blocks that will be used
 159 * for ip's delayed extent of length "len".
 160 */
 161STATIC xfs_filblks_t
 162xfs_bmap_worst_indlen(
 163	xfs_inode_t	*ip,		/* incore inode pointer */
 164	xfs_filblks_t	len)		/* delayed extent length */
 165{
 166	int		level;		/* btree level number */
 167	int		maxrecs;	/* maximum record count at this level */
 168	xfs_mount_t	*mp;		/* mount structure */
 169	xfs_filblks_t	rval;		/* return value */
 170
 171	mp = ip->i_mount;
 172	maxrecs = mp->m_bmap_dmxr[0];
 173	for (level = 0, rval = 0;
 174	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
 175	     level++) {
 176		len += maxrecs - 1;
 177		do_div(len, maxrecs);
 178		rval += len;
 179		if (len == 1)
 180			return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
 181				level - 1;
 182		if (level == 0)
 183			maxrecs = mp->m_bmap_dmxr[1];
 184	}
 185	return rval;
 186}
 187
 188/*
 189 * Calculate the default attribute fork offset for newly created inodes.
 190 */
 191uint
 192xfs_default_attroffset(
 193	struct xfs_inode	*ip)
 194{
 195	struct xfs_mount	*mp = ip->i_mount;
 196	uint			offset;
 197
 198	if (mp->m_sb.sb_inodesize == 256)
 199		offset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
 200	else
 201		offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
 202
 203	ASSERT(offset < XFS_LITINO(mp));
 204	return offset;
 205}
 206
 207/*
 208 * Helper routine to reset inode di_forkoff field when switching
 209 * attribute fork from local to extent format - we reset it where
 210 * possible to make space available for inline data fork extents.
 211 */
 212STATIC void
 213xfs_bmap_forkoff_reset(
 214	xfs_inode_t	*ip,
 215	int		whichfork)
 216{
 217	if (whichfork == XFS_ATTR_FORK &&
 218	    ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
 219	    ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
 220		uint	dfl_forkoff = xfs_default_attroffset(ip) >> 3;
 221
 222		if (dfl_forkoff > ip->i_d.di_forkoff)
 223			ip->i_d.di_forkoff = dfl_forkoff;
 224	}
 225}
 226
 227#ifdef DEBUG
 228STATIC struct xfs_buf *
 229xfs_bmap_get_bp(
 230	struct xfs_btree_cur	*cur,
 231	xfs_fsblock_t		bno)
 232{
 233	struct xfs_log_item	*lip;
 234	int			i;
 235
 236	if (!cur)
 237		return NULL;
 238
 239	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
 240		if (!cur->bc_bufs[i])
 241			break;
 242		if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
 243			return cur->bc_bufs[i];
 244	}
 245
 246	/* Chase down all the log items to see if the bp is there */
 247	list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
 248		struct xfs_buf_log_item	*bip = (struct xfs_buf_log_item *)lip;
 249
 250		if (bip->bli_item.li_type == XFS_LI_BUF &&
 251		    XFS_BUF_ADDR(bip->bli_buf) == bno)
 252			return bip->bli_buf;
 253	}
 254
 255	return NULL;
 256}
 257
 258STATIC void
 259xfs_check_block(
 260	struct xfs_btree_block	*block,
 261	xfs_mount_t		*mp,
 262	int			root,
 263	short			sz)
 264{
 265	int			i, j, dmxr;
 266	__be64			*pp, *thispa;	/* pointer to block address */
 267	xfs_bmbt_key_t		*prevp, *keyp;
 268
 269	ASSERT(be16_to_cpu(block->bb_level) > 0);
 270
 271	prevp = NULL;
 272	for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
 273		dmxr = mp->m_bmap_dmxr[0];
 274		keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
 275
 276		if (prevp) {
 277			ASSERT(be64_to_cpu(prevp->br_startoff) <
 278			       be64_to_cpu(keyp->br_startoff));
 279		}
 280		prevp = keyp;
 281
 282		/*
 283		 * Compare the block numbers to see if there are dups.
 284		 */
 285		if (root)
 286			pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
 287		else
 288			pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
 289
 290		for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
 291			if (root)
 292				thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
 293			else
 294				thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
 295			if (*thispa == *pp) {
 296				xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
 297					__func__, j, i,
 298					(unsigned long long)be64_to_cpu(*thispa));
 299				xfs_err(mp, "%s: ptrs are equal in node\n",
 300					__func__);
 301				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 302			}
 303		}
 304	}
 305}
 306
 307/*
 308 * Check that the extents for the inode ip are in the right order in all
 309 * btree leaves. THis becomes prohibitively expensive for large extent count
 310 * files, so don't bother with inodes that have more than 10,000 extents in
 311 * them. The btree record ordering checks will still be done, so for such large
 312 * bmapbt constructs that is going to catch most corruptions.
 313 */
 314STATIC void
 315xfs_bmap_check_leaf_extents(
 316	xfs_btree_cur_t		*cur,	/* btree cursor or null */
 317	xfs_inode_t		*ip,		/* incore inode pointer */
 318	int			whichfork)	/* data or attr fork */
 319{
 320	struct xfs_mount	*mp = ip->i_mount;
 321	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 322	struct xfs_btree_block	*block;	/* current btree block */
 323	xfs_fsblock_t		bno;	/* block # of "block" */
 324	xfs_buf_t		*bp;	/* buffer for "block" */
 325	int			error;	/* error return value */
 326	xfs_extnum_t		i=0, j;	/* index into the extents list */
 327	int			level;	/* btree level, for checking */
 328	__be64			*pp;	/* pointer to block address */
 329	xfs_bmbt_rec_t		*ep;	/* pointer to current extent */
 330	xfs_bmbt_rec_t		last = {0, 0}; /* last extent in prev block */
 331	xfs_bmbt_rec_t		*nextp;	/* pointer to next extent */
 332	int			bp_release = 0;
 333
 334	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
 335		return;
 336
 337	/* skip large extent count inodes */
 338	if (ip->i_df.if_nextents > 10000)
 339		return;
 340
 341	bno = NULLFSBLOCK;
 342	block = ifp->if_broot;
 343	/*
 344	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
 345	 */
 346	level = be16_to_cpu(block->bb_level);
 347	ASSERT(level > 0);
 348	xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
 349	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
 350	bno = be64_to_cpu(*pp);
 351
 352	ASSERT(bno != NULLFSBLOCK);
 353	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
 354	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
 355
 356	/*
 357	 * Go down the tree until leaf level is reached, following the first
 358	 * pointer (leftmost) at each level.
 359	 */
 360	while (level-- > 0) {
 361		/* See if buf is in cur first */
 362		bp_release = 0;
 363		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 364		if (!bp) {
 365			bp_release = 1;
 366			error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
 367						XFS_BMAP_BTREE_REF,
 368						&xfs_bmbt_buf_ops);
 369			if (error)
 370				goto error_norelse;
 371		}
 372		block = XFS_BUF_TO_BLOCK(bp);
 373		if (level == 0)
 374			break;
 375
 376		/*
 377		 * Check this block for basic sanity (increasing keys and
 378		 * no duplicate blocks).
 379		 */
 380
 381		xfs_check_block(block, mp, 0, 0);
 382		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
 383		bno = be64_to_cpu(*pp);
 384		if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
 385			error = -EFSCORRUPTED;
 386			goto error0;
 387		}
 388		if (bp_release) {
 389			bp_release = 0;
 390			xfs_trans_brelse(NULL, bp);
 391		}
 392	}
 393
 394	/*
 395	 * Here with bp and block set to the leftmost leaf node in the tree.
 396	 */
 397	i = 0;
 398
 399	/*
 400	 * Loop over all leaf nodes checking that all extents are in the right order.
 401	 */
 402	for (;;) {
 403		xfs_fsblock_t	nextbno;
 404		xfs_extnum_t	num_recs;
 405
 406
 407		num_recs = xfs_btree_get_numrecs(block);
 408
 409		/*
 410		 * Read-ahead the next leaf block, if any.
 411		 */
 412
 413		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
 414
 415		/*
 416		 * Check all the extents to make sure they are OK.
 417		 * If we had a previous block, the last entry should
 418		 * conform with the first entry in this one.
 419		 */
 420
 421		ep = XFS_BMBT_REC_ADDR(mp, block, 1);
 422		if (i) {
 423			ASSERT(xfs_bmbt_disk_get_startoff(&last) +
 424			       xfs_bmbt_disk_get_blockcount(&last) <=
 425			       xfs_bmbt_disk_get_startoff(ep));
 426		}
 427		for (j = 1; j < num_recs; j++) {
 428			nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
 429			ASSERT(xfs_bmbt_disk_get_startoff(ep) +
 430			       xfs_bmbt_disk_get_blockcount(ep) <=
 431			       xfs_bmbt_disk_get_startoff(nextp));
 432			ep = nextp;
 433		}
 434
 435		last = *ep;
 436		i += num_recs;
 437		if (bp_release) {
 438			bp_release = 0;
 439			xfs_trans_brelse(NULL, bp);
 440		}
 441		bno = nextbno;
 442		/*
 443		 * If we've reached the end, stop.
 444		 */
 445		if (bno == NULLFSBLOCK)
 446			break;
 447
 448		bp_release = 0;
 449		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 450		if (!bp) {
 451			bp_release = 1;
 452			error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
 453						XFS_BMAP_BTREE_REF,
 454						&xfs_bmbt_buf_ops);
 455			if (error)
 456				goto error_norelse;
 457		}
 458		block = XFS_BUF_TO_BLOCK(bp);
 459	}
 460
 461	return;
 462
 463error0:
 464	xfs_warn(mp, "%s: at error0", __func__);
 465	if (bp_release)
 466		xfs_trans_brelse(NULL, bp);
 467error_norelse:
 468	xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
 469		__func__, i);
 470	xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
 471	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 472	return;
 473}
 474
 475/*
 476 * Validate that the bmbt_irecs being returned from bmapi are valid
 477 * given the caller's original parameters.  Specifically check the
 478 * ranges of the returned irecs to ensure that they only extend beyond
 479 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
 480 */
 481STATIC void
 482xfs_bmap_validate_ret(
 483	xfs_fileoff_t		bno,
 484	xfs_filblks_t		len,
 485	int			flags,
 486	xfs_bmbt_irec_t		*mval,
 487	int			nmap,
 488	int			ret_nmap)
 489{
 490	int			i;		/* index to map values */
 491
 492	ASSERT(ret_nmap <= nmap);
 493
 494	for (i = 0; i < ret_nmap; i++) {
 495		ASSERT(mval[i].br_blockcount > 0);
 496		if (!(flags & XFS_BMAPI_ENTIRE)) {
 497			ASSERT(mval[i].br_startoff >= bno);
 498			ASSERT(mval[i].br_blockcount <= len);
 499			ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
 500			       bno + len);
 501		} else {
 502			ASSERT(mval[i].br_startoff < bno + len);
 503			ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
 504			       bno);
 505		}
 506		ASSERT(i == 0 ||
 507		       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
 508		       mval[i].br_startoff);
 509		ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
 510		       mval[i].br_startblock != HOLESTARTBLOCK);
 511		ASSERT(mval[i].br_state == XFS_EXT_NORM ||
 512		       mval[i].br_state == XFS_EXT_UNWRITTEN);
 513	}
 514}
 515
 516#else
 517#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)		do { } while (0)
 518#define	xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)	do { } while (0)
 519#endif /* DEBUG */
 520
 521/*
 522 * bmap free list manipulation functions
 523 */
 524
 525/*
 526 * Add the extent to the list of extents to be free at transaction end.
 527 * The list is maintained sorted (by block number).
 528 */
 529void
 530__xfs_bmap_add_free(
 531	struct xfs_trans		*tp,
 532	xfs_fsblock_t			bno,
 533	xfs_filblks_t			len,
 534	const struct xfs_owner_info	*oinfo,
 535	bool				skip_discard)
 536{
 537	struct xfs_extent_free_item	*new;		/* new element */
 538#ifdef DEBUG
 539	struct xfs_mount		*mp = tp->t_mountp;
 540	xfs_agnumber_t			agno;
 541	xfs_agblock_t			agbno;
 542
 543	ASSERT(bno != NULLFSBLOCK);
 544	ASSERT(len > 0);
 545	ASSERT(len <= MAXEXTLEN);
 546	ASSERT(!isnullstartblock(bno));
 547	agno = XFS_FSB_TO_AGNO(mp, bno);
 548	agbno = XFS_FSB_TO_AGBNO(mp, bno);
 549	ASSERT(agno < mp->m_sb.sb_agcount);
 550	ASSERT(agbno < mp->m_sb.sb_agblocks);
 551	ASSERT(len < mp->m_sb.sb_agblocks);
 552	ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
 553#endif
 554	ASSERT(xfs_bmap_free_item_zone != NULL);
 555
 556	new = kmem_cache_alloc(xfs_bmap_free_item_zone,
 557			       GFP_KERNEL | __GFP_NOFAIL);
 558	new->xefi_startblock = bno;
 559	new->xefi_blockcount = (xfs_extlen_t)len;
 560	if (oinfo)
 561		new->xefi_oinfo = *oinfo;
 562	else
 563		new->xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
 564	new->xefi_skip_discard = skip_discard;
 565	trace_xfs_bmap_free_defer(tp->t_mountp,
 566			XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
 567			XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
 568	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
 569}
 570
 571/*
 572 * Inode fork format manipulation functions
 573 */
 574
 575/*
 576 * Convert the inode format to extent format if it currently is in btree format,
 577 * but the extent list is small enough that it fits into the extent format.
 578 *
 579 * Since the extents are already in-core, all we have to do is give up the space
 580 * for the btree root and pitch the leaf block.
 581 */
 582STATIC int				/* error */
 583xfs_bmap_btree_to_extents(
 584	struct xfs_trans	*tp,	/* transaction pointer */
 585	struct xfs_inode	*ip,	/* incore inode pointer */
 586	struct xfs_btree_cur	*cur,	/* btree cursor */
 587	int			*logflagsp, /* inode logging flags */
 588	int			whichfork)  /* data or attr fork */
 589{
 590	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 591	struct xfs_mount	*mp = ip->i_mount;
 592	struct xfs_btree_block	*rblock = ifp->if_broot;
 593	struct xfs_btree_block	*cblock;/* child btree block */
 594	xfs_fsblock_t		cbno;	/* child block number */
 595	xfs_buf_t		*cbp;	/* child block's buffer */
 596	int			error;	/* error return value */
 597	__be64			*pp;	/* ptr to block address */
 598	struct xfs_owner_info	oinfo;
 599
 600	/* check if we actually need the extent format first: */
 601	if (!xfs_bmap_wants_extents(ip, whichfork))
 602		return 0;
 603
 604	ASSERT(cur);
 605	ASSERT(whichfork != XFS_COW_FORK);
 606	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
 607	ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
 608	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
 609	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
 610	ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
 611
 612	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
 613	cbno = be64_to_cpu(*pp);
 614#ifdef DEBUG
 615	if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1)))
 616		return -EFSCORRUPTED;
 617#endif
 618	error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
 619				&xfs_bmbt_buf_ops);
 620	if (error)
 621		return error;
 622	cblock = XFS_BUF_TO_BLOCK(cbp);
 623	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 624		return error;
 625	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
 626	xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo);
 627	ip->i_d.di_nblocks--;
 628	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 629	xfs_trans_binval(tp, cbp);
 630	if (cur->bc_bufs[0] == cbp)
 631		cur->bc_bufs[0] = NULL;
 632	xfs_iroot_realloc(ip, -1, whichfork);
 633	ASSERT(ifp->if_broot == NULL);
 634	ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
 635	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 636	*logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 637	return 0;
 638}
 639
 640/*
 641 * Convert an extents-format file into a btree-format file.
 642 * The new file will have a root block (in the inode) and a single child block.
 643 */
 644STATIC int					/* error */
 645xfs_bmap_extents_to_btree(
 646	struct xfs_trans	*tp,		/* transaction pointer */
 647	struct xfs_inode	*ip,		/* incore inode pointer */
 648	struct xfs_btree_cur	**curp,		/* cursor returned to caller */
 649	int			wasdel,		/* converting a delayed alloc */
 650	int			*logflagsp,	/* inode logging flags */
 651	int			whichfork)	/* data or attr fork */
 652{
 653	struct xfs_btree_block	*ablock;	/* allocated (child) bt block */
 654	struct xfs_buf		*abp;		/* buffer for ablock */
 655	struct xfs_alloc_arg	args;		/* allocation arguments */
 656	struct xfs_bmbt_rec	*arp;		/* child record pointer */
 657	struct xfs_btree_block	*block;		/* btree root block */
 658	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
 659	int			error;		/* error return value */
 660	struct xfs_ifork	*ifp;		/* inode fork pointer */
 661	struct xfs_bmbt_key	*kp;		/* root block key pointer */
 662	struct xfs_mount	*mp;		/* mount structure */
 663	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
 664	struct xfs_iext_cursor	icur;
 665	struct xfs_bmbt_irec	rec;
 666	xfs_extnum_t		cnt = 0;
 667
 668	mp = ip->i_mount;
 669	ASSERT(whichfork != XFS_COW_FORK);
 670	ifp = XFS_IFORK_PTR(ip, whichfork);
 671	ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
 672
 673	/*
 674	 * Make space in the inode incore. This needs to be undone if we fail
 675	 * to expand the root.
 676	 */
 677	xfs_iroot_realloc(ip, 1, whichfork);
 678	ifp->if_flags |= XFS_IFBROOT;
 679
 680	/*
 681	 * Fill in the root.
 682	 */
 683	block = ifp->if_broot;
 684	xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
 685				 XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
 686				 XFS_BTREE_LONG_PTRS);
 687	/*
 688	 * Need a cursor.  Can't allocate until bb_level is filled in.
 689	 */
 690	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 691	cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
 692	/*
 693	 * Convert to a btree with two levels, one record in root.
 694	 */
 695	ifp->if_format = XFS_DINODE_FMT_BTREE;
 696	memset(&args, 0, sizeof(args));
 697	args.tp = tp;
 698	args.mp = mp;
 699	xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
 700	if (tp->t_firstblock == NULLFSBLOCK) {
 701		args.type = XFS_ALLOCTYPE_START_BNO;
 702		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
 703	} else if (tp->t_flags & XFS_TRANS_LOWMODE) {
 704		args.type = XFS_ALLOCTYPE_START_BNO;
 705		args.fsbno = tp->t_firstblock;
 706	} else {
 707		args.type = XFS_ALLOCTYPE_NEAR_BNO;
 708		args.fsbno = tp->t_firstblock;
 709	}
 710	args.minlen = args.maxlen = args.prod = 1;
 711	args.wasdel = wasdel;
 712	*logflagsp = 0;
 713	error = xfs_alloc_vextent(&args);
 714	if (error)
 715		goto out_root_realloc;
 716
 717	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
 718		error = -ENOSPC;
 719		goto out_root_realloc;
 720	}
 721
 722	/*
 723	 * Allocation can't fail, the space was reserved.
 724	 */
 725	ASSERT(tp->t_firstblock == NULLFSBLOCK ||
 726	       args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
 727	tp->t_firstblock = args.fsbno;
 728	cur->bc_ino.allocated++;
 729	ip->i_d.di_nblocks++;
 730	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 731	error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
 732			XFS_FSB_TO_DADDR(mp, args.fsbno),
 733			mp->m_bsize, 0, &abp);
 734	if (error)
 735		goto out_unreserve_dquot;
 736
 737	/*
 738	 * Fill in the child block.
 739	 */
 740	abp->b_ops = &xfs_bmbt_buf_ops;
 741	ablock = XFS_BUF_TO_BLOCK(abp);
 742	xfs_btree_init_block_int(mp, ablock, abp->b_bn,
 743				XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
 744				XFS_BTREE_LONG_PTRS);
 745
 746	for_each_xfs_iext(ifp, &icur, &rec) {
 747		if (isnullstartblock(rec.br_startblock))
 748			continue;
 749		arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
 750		xfs_bmbt_disk_set_all(arp, &rec);
 751		cnt++;
 752	}
 753	ASSERT(cnt == ifp->if_nextents);
 754	xfs_btree_set_numrecs(ablock, cnt);
 755
 756	/*
 757	 * Fill in the root key and pointer.
 758	 */
 759	kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
 760	arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
 761	kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
 762	pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
 763						be16_to_cpu(block->bb_level)));
 764	*pp = cpu_to_be64(args.fsbno);
 765
 766	/*
 767	 * Do all this logging at the end so that
 768	 * the root is at the right level.
 769	 */
 770	xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
 771	xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
 772	ASSERT(*curp == NULL);
 773	*curp = cur;
 774	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
 775	return 0;
 776
 777out_unreserve_dquot:
 778	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 779out_root_realloc:
 780	xfs_iroot_realloc(ip, -1, whichfork);
 781	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 782	ASSERT(ifp->if_broot == NULL);
 783	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 784
 785	return error;
 786}
 787
 788/*
 789 * Convert a local file to an extents file.
 790 * This code is out of bounds for data forks of regular files,
 791 * since the file data needs to get logged so things will stay consistent.
 792 * (The bmap-level manipulations are ok, though).
 793 */
 794void
 795xfs_bmap_local_to_extents_empty(
 796	struct xfs_trans	*tp,
 797	struct xfs_inode	*ip,
 798	int			whichfork)
 799{
 800	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 801
 802	ASSERT(whichfork != XFS_COW_FORK);
 803	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 804	ASSERT(ifp->if_bytes == 0);
 805	ASSERT(ifp->if_nextents == 0);
 806
 807	xfs_bmap_forkoff_reset(ip, whichfork);
 808	ifp->if_flags &= ~XFS_IFINLINE;
 809	ifp->if_flags |= XFS_IFEXTENTS;
 810	ifp->if_u1.if_root = NULL;
 811	ifp->if_height = 0;
 812	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 813	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 814}
 815
 816
 817STATIC int				/* error */
 818xfs_bmap_local_to_extents(
 819	xfs_trans_t	*tp,		/* transaction pointer */
 820	xfs_inode_t	*ip,		/* incore inode pointer */
 821	xfs_extlen_t	total,		/* total blocks needed by transaction */
 822	int		*logflagsp,	/* inode logging flags */
 823	int		whichfork,
 824	void		(*init_fn)(struct xfs_trans *tp,
 825				   struct xfs_buf *bp,
 826				   struct xfs_inode *ip,
 827				   struct xfs_ifork *ifp))
 828{
 829	int		error = 0;
 830	int		flags;		/* logging flags returned */
 831	struct xfs_ifork *ifp;		/* inode fork pointer */
 832	xfs_alloc_arg_t	args;		/* allocation arguments */
 833	xfs_buf_t	*bp;		/* buffer for extent block */
 834	struct xfs_bmbt_irec rec;
 835	struct xfs_iext_cursor icur;
 836
 837	/*
 838	 * We don't want to deal with the case of keeping inode data inline yet.
 839	 * So sending the data fork of a regular inode is invalid.
 840	 */
 841	ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
 842	ifp = XFS_IFORK_PTR(ip, whichfork);
 843	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 844
 845	if (!ifp->if_bytes) {
 846		xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 847		flags = XFS_ILOG_CORE;
 848		goto done;
 849	}
 850
 851	flags = 0;
 852	error = 0;
 853	ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE);
 854	memset(&args, 0, sizeof(args));
 855	args.tp = tp;
 856	args.mp = ip->i_mount;
 857	xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
 858	/*
 859	 * Allocate a block.  We know we need only one, since the
 860	 * file currently fits in an inode.
 861	 */
 862	if (tp->t_firstblock == NULLFSBLOCK) {
 863		args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
 864		args.type = XFS_ALLOCTYPE_START_BNO;
 865	} else {
 866		args.fsbno = tp->t_firstblock;
 867		args.type = XFS_ALLOCTYPE_NEAR_BNO;
 868	}
 869	args.total = total;
 870	args.minlen = args.maxlen = args.prod = 1;
 871	error = xfs_alloc_vextent(&args);
 872	if (error)
 873		goto done;
 874
 875	/* Can't fail, the space was reserved. */
 876	ASSERT(args.fsbno != NULLFSBLOCK);
 877	ASSERT(args.len == 1);
 878	tp->t_firstblock = args.fsbno;
 879	error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
 880			XFS_FSB_TO_DADDR(args.mp, args.fsbno),
 881			args.mp->m_bsize, 0, &bp);
 882	if (error)
 883		goto done;
 884
 885	/*
 886	 * Initialize the block, copy the data and log the remote buffer.
 887	 *
 888	 * The callout is responsible for logging because the remote format
 889	 * might differ from the local format and thus we don't know how much to
 890	 * log here. Note that init_fn must also set the buffer log item type
 891	 * correctly.
 892	 */
 893	init_fn(tp, bp, ip, ifp);
 894
 895	/* account for the change in fork size */
 896	xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 897	xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 898	flags |= XFS_ILOG_CORE;
 899
 900	ifp->if_u1.if_root = NULL;
 901	ifp->if_height = 0;
 902
 903	rec.br_startoff = 0;
 904	rec.br_startblock = args.fsbno;
 905	rec.br_blockcount = 1;
 906	rec.br_state = XFS_EXT_NORM;
 907	xfs_iext_first(ifp, &icur);
 908	xfs_iext_insert(ip, &icur, &rec, 0);
 909
 910	ifp->if_nextents = 1;
 911	ip->i_d.di_nblocks = 1;
 912	xfs_trans_mod_dquot_byino(tp, ip,
 913		XFS_TRANS_DQ_BCOUNT, 1L);
 914	flags |= xfs_ilog_fext(whichfork);
 915
 916done:
 917	*logflagsp = flags;
 918	return error;
 919}
 920
 921/*
 922 * Called from xfs_bmap_add_attrfork to handle btree format files.
 923 */
 924STATIC int					/* error */
 925xfs_bmap_add_attrfork_btree(
 926	xfs_trans_t		*tp,		/* transaction pointer */
 927	xfs_inode_t		*ip,		/* incore inode pointer */
 928	int			*flags)		/* inode logging flags */
 929{
 930	xfs_btree_cur_t		*cur;		/* btree cursor */
 931	int			error;		/* error return value */
 932	xfs_mount_t		*mp;		/* file system mount struct */
 933	int			stat;		/* newroot status */
 934
 935	mp = ip->i_mount;
 936	if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
 937		*flags |= XFS_ILOG_DBROOT;
 938	else {
 939		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
 940		error = xfs_bmbt_lookup_first(cur, &stat);
 941		if (error)
 942			goto error0;
 943		/* must be at least one entry */
 944		if (XFS_IS_CORRUPT(mp, stat != 1)) {
 945			error = -EFSCORRUPTED;
 946			goto error0;
 947		}
 948		if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
 949			goto error0;
 950		if (stat == 0) {
 951			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 952			return -ENOSPC;
 953		}
 954		cur->bc_ino.allocated = 0;
 955		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 956	}
 957	return 0;
 958error0:
 959	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 960	return error;
 961}
 962
 963/*
 964 * Called from xfs_bmap_add_attrfork to handle extents format files.
 965 */
 966STATIC int					/* error */
 967xfs_bmap_add_attrfork_extents(
 968	struct xfs_trans	*tp,		/* transaction pointer */
 969	struct xfs_inode	*ip,		/* incore inode pointer */
 970	int			*flags)		/* inode logging flags */
 971{
 972	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
 973	int			error;		/* error return value */
 974
 975	if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
 976	    XFS_IFORK_DSIZE(ip))
 977		return 0;
 978	cur = NULL;
 979	error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
 980					  XFS_DATA_FORK);
 981	if (cur) {
 982		cur->bc_ino.allocated = 0;
 983		xfs_btree_del_cursor(cur, error);
 984	}
 985	return error;
 986}
 987
 988/*
 989 * Called from xfs_bmap_add_attrfork to handle local format files. Each
 990 * different data fork content type needs a different callout to do the
 991 * conversion. Some are basic and only require special block initialisation
 992 * callouts for the data formating, others (directories) are so specialised they
 993 * handle everything themselves.
 994 *
 995 * XXX (dgc): investigate whether directory conversion can use the generic
 996 * formatting callout. It should be possible - it's just a very complex
 997 * formatter.
 998 */
 999STATIC int					/* error */
1000xfs_bmap_add_attrfork_local(
1001	struct xfs_trans	*tp,		/* transaction pointer */
1002	struct xfs_inode	*ip,		/* incore inode pointer */
1003	int			*flags)		/* inode logging flags */
1004{
1005	struct xfs_da_args	dargs;		/* args for dir/attr code */
1006
1007	if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1008		return 0;
1009
1010	if (S_ISDIR(VFS_I(ip)->i_mode)) {
1011		memset(&dargs, 0, sizeof(dargs));
1012		dargs.geo = ip->i_mount->m_dir_geo;
1013		dargs.dp = ip;
1014		dargs.total = dargs.geo->fsbcount;
1015		dargs.whichfork = XFS_DATA_FORK;
1016		dargs.trans = tp;
1017		return xfs_dir2_sf_to_block(&dargs);
1018	}
1019
1020	if (S_ISLNK(VFS_I(ip)->i_mode))
1021		return xfs_bmap_local_to_extents(tp, ip, 1, flags,
1022						 XFS_DATA_FORK,
1023						 xfs_symlink_local_to_remote);
1024
1025	/* should only be called for types that support local format data */
1026	ASSERT(0);
1027	return -EFSCORRUPTED;
1028}
1029
1030/* Set an inode attr fork off based on the format */
1031int
1032xfs_bmap_set_attrforkoff(
1033	struct xfs_inode	*ip,
1034	int			size,
1035	int			*version)
1036{
1037	switch (ip->i_df.if_format) {
1038	case XFS_DINODE_FMT_DEV:
1039		ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1040		break;
1041	case XFS_DINODE_FMT_LOCAL:
1042	case XFS_DINODE_FMT_EXTENTS:
1043	case XFS_DINODE_FMT_BTREE:
1044		ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1045		if (!ip->i_d.di_forkoff)
1046			ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1047		else if ((ip->i_mount->m_flags & XFS_MOUNT_ATTR2) && version)
1048			*version = 2;
1049		break;
1050	default:
1051		ASSERT(0);
1052		return -EINVAL;
1053	}
1054
1055	return 0;
1056}
1057
1058/*
1059 * Convert inode from non-attributed to attributed.
1060 * Must not be in a transaction, ip must not be locked.
1061 */
1062int						/* error code */
1063xfs_bmap_add_attrfork(
1064	xfs_inode_t		*ip,		/* incore inode pointer */
1065	int			size,		/* space new attribute needs */
1066	int			rsvd)		/* xact may use reserved blks */
1067{
1068	xfs_mount_t		*mp;		/* mount structure */
1069	xfs_trans_t		*tp;		/* transaction pointer */
1070	int			blks;		/* space reservation */
1071	int			version = 1;	/* superblock attr version */
1072	int			logflags;	/* logging flags */
1073	int			error;		/* error return value */
1074
1075	ASSERT(XFS_IFORK_Q(ip) == 0);
1076
1077	mp = ip->i_mount;
1078	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1079
1080	blks = XFS_ADDAFORK_SPACE_RES(mp);
1081
1082	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
1083			rsvd ? XFS_TRANS_RESERVE : 0, &tp);
1084	if (error)
1085		return error;
1086
1087	xfs_ilock(ip, XFS_ILOCK_EXCL);
1088	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1089			XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1090			XFS_QMOPT_RES_REGBLKS);
1091	if (error)
1092		goto trans_cancel;
1093	if (XFS_IFORK_Q(ip))
1094		goto trans_cancel;
1095
1096	xfs_trans_ijoin(tp, ip, 0);
1097	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1098	error = xfs_bmap_set_attrforkoff(ip, size, &version);
1099	if (error)
1100		goto trans_cancel;
1101	ASSERT(ip->i_afp == NULL);
1102
1103	ip->i_afp = kmem_cache_zalloc(xfs_ifork_zone,
1104				      GFP_KERNEL | __GFP_NOFAIL);
1105
1106	ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
1107	ip->i_afp->if_flags = XFS_IFEXTENTS;
1108	logflags = 0;
1109	switch (ip->i_df.if_format) {
1110	case XFS_DINODE_FMT_LOCAL:
1111		error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
1112		break;
1113	case XFS_DINODE_FMT_EXTENTS:
1114		error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
1115		break;
1116	case XFS_DINODE_FMT_BTREE:
1117		error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
1118		break;
1119	default:
1120		error = 0;
1121		break;
1122	}
1123	if (logflags)
1124		xfs_trans_log_inode(tp, ip, logflags);
1125	if (error)
1126		goto trans_cancel;
1127	if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1128	   (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1129		bool log_sb = false;
1130
1131		spin_lock(&mp->m_sb_lock);
1132		if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1133			xfs_sb_version_addattr(&mp->m_sb);
1134			log_sb = true;
1135		}
1136		if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1137			xfs_sb_version_addattr2(&mp->m_sb);
1138			log_sb = true;
1139		}
1140		spin_unlock(&mp->m_sb_lock);
1141		if (log_sb)
1142			xfs_log_sb(tp);
1143	}
1144
1145	error = xfs_trans_commit(tp);
1146	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1147	return error;
1148
1149trans_cancel:
1150	xfs_trans_cancel(tp);
1151	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1152	return error;
1153}
1154
1155/*
1156 * Internal and external extent tree search functions.
1157 */
1158
1159struct xfs_iread_state {
1160	struct xfs_iext_cursor	icur;
1161	xfs_extnum_t		loaded;
1162};
1163
1164/* Stuff every bmbt record from this block into the incore extent map. */
1165static int
1166xfs_iread_bmbt_block(
1167	struct xfs_btree_cur	*cur,
1168	int			level,
1169	void			*priv)
1170{
1171	struct xfs_iread_state	*ir = priv;
1172	struct xfs_mount	*mp = cur->bc_mp;
1173	struct xfs_inode	*ip = cur->bc_ino.ip;
1174	struct xfs_btree_block	*block;
1175	struct xfs_buf		*bp;
1176	struct xfs_bmbt_rec	*frp;
1177	xfs_extnum_t		num_recs;
1178	xfs_extnum_t		j;
1179	int			whichfork = cur->bc_ino.whichfork;
1180	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1181
1182	block = xfs_btree_get_block(cur, level, &bp);
1183
1184	/* Abort if we find more records than nextents. */
1185	num_recs = xfs_btree_get_numrecs(block);
1186	if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
1187		xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
1188				(unsigned long long)ip->i_ino);
1189		xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
1190				sizeof(*block), __this_address);
1191		return -EFSCORRUPTED;
1192	}
1193
1194	/* Copy records into the incore cache. */
1195	frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1196	for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
1197		struct xfs_bmbt_irec	new;
1198		xfs_failaddr_t		fa;
1199
1200		xfs_bmbt_disk_get_all(frp, &new);
1201		fa = xfs_bmap_validate_extent(ip, whichfork, &new);
1202		if (fa) {
1203			xfs_inode_verifier_error(ip, -EFSCORRUPTED,
1204					"xfs_iread_extents(2)", frp,
1205					sizeof(*frp), fa);
1206			return -EFSCORRUPTED;
1207		}
1208		xfs_iext_insert(ip, &ir->icur, &new,
1209				xfs_bmap_fork_to_state(whichfork));
1210		trace_xfs_read_extent(ip, &ir->icur,
1211				xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
1212		xfs_iext_next(ifp, &ir->icur);
1213	}
1214
1215	return 0;
1216}
1217
1218/*
1219 * Read in extents from a btree-format inode.
1220 */
1221int
1222xfs_iread_extents(
1223	struct xfs_trans	*tp,
1224	struct xfs_inode	*ip,
1225	int			whichfork)
1226{
1227	struct xfs_iread_state	ir;
1228	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1229	struct xfs_mount	*mp = ip->i_mount;
1230	struct xfs_btree_cur	*cur;
1231	int			error;
1232
1233	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1234
1235	if (XFS_IS_CORRUPT(mp, ifp->if_format != XFS_DINODE_FMT_BTREE)) {
1236		error = -EFSCORRUPTED;
1237		goto out;
1238	}
1239
1240	ir.loaded = 0;
1241	xfs_iext_first(ifp, &ir.icur);
1242	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
1243	error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
1244			XFS_BTREE_VISIT_RECORDS, &ir);
1245	xfs_btree_del_cursor(cur, error);
1246	if (error)
1247		goto out;
1248
1249	if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
1250		error = -EFSCORRUPTED;
1251		goto out;
1252	}
1253	ASSERT(ir.loaded == xfs_iext_count(ifp));
1254
1255	ifp->if_flags |= XFS_IFEXTENTS;
1256	return 0;
1257out:
1258	xfs_iext_destroy(ifp);
1259	return error;
1260}
1261
1262/*
1263 * Returns the relative block number of the first unused block(s) in the given
1264 * fork with at least "len" logically contiguous blocks free.  This is the
1265 * lowest-address hole if the fork has holes, else the first block past the end
1266 * of fork.  Return 0 if the fork is currently local (in-inode).
1267 */
1268int						/* error */
1269xfs_bmap_first_unused(
1270	struct xfs_trans	*tp,		/* transaction pointer */
1271	struct xfs_inode	*ip,		/* incore inode */
1272	xfs_extlen_t		len,		/* size of hole to find */
1273	xfs_fileoff_t		*first_unused,	/* unused block */
1274	int			whichfork)	/* data or attr fork */
1275{
1276	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1277	struct xfs_bmbt_irec	got;
1278	struct xfs_iext_cursor	icur;
1279	xfs_fileoff_t		lastaddr = 0;
1280	xfs_fileoff_t		lowest, max;
1281	int			error;
1282
1283	if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
1284		*first_unused = 0;
1285		return 0;
1286	}
1287
1288	ASSERT(xfs_ifork_has_extents(ifp));
1289
1290	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1291		error = xfs_iread_extents(tp, ip, whichfork);
1292		if (error)
1293			return error;
1294	}
1295
1296	lowest = max = *first_unused;
1297	for_each_xfs_iext(ifp, &icur, &got) {
1298		/*
1299		 * See if the hole before this extent will work.
1300		 */
1301		if (got.br_startoff >= lowest + len &&
1302		    got.br_startoff - max >= len)
1303			break;
1304		lastaddr = got.br_startoff + got.br_blockcount;
1305		max = XFS_FILEOFF_MAX(lastaddr, lowest);
1306	}
1307
1308	*first_unused = max;
1309	return 0;
1310}
1311
1312/*
1313 * Returns the file-relative block number of the last block - 1 before
1314 * last_block (input value) in the file.
1315 * This is not based on i_size, it is based on the extent records.
1316 * Returns 0 for local files, as they do not have extent records.
1317 */
1318int						/* error */
1319xfs_bmap_last_before(
1320	struct xfs_trans	*tp,		/* transaction pointer */
1321	struct xfs_inode	*ip,		/* incore inode */
1322	xfs_fileoff_t		*last_block,	/* last block */
1323	int			whichfork)	/* data or attr fork */
1324{
1325	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1326	struct xfs_bmbt_irec	got;
1327	struct xfs_iext_cursor	icur;
1328	int			error;
1329
1330	switch (ifp->if_format) {
1331	case XFS_DINODE_FMT_LOCAL:
1332		*last_block = 0;
1333		return 0;
1334	case XFS_DINODE_FMT_BTREE:
1335	case XFS_DINODE_FMT_EXTENTS:
1336		break;
1337	default:
1338		ASSERT(0);
1339		return -EFSCORRUPTED;
1340	}
1341
1342	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1343		error = xfs_iread_extents(tp, ip, whichfork);
1344		if (error)
1345			return error;
1346	}
1347
1348	if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1349		*last_block = 0;
1350	return 0;
1351}
1352
1353int
1354xfs_bmap_last_extent(
1355	struct xfs_trans	*tp,
1356	struct xfs_inode	*ip,
1357	int			whichfork,
1358	struct xfs_bmbt_irec	*rec,
1359	int			*is_empty)
1360{
1361	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1362	struct xfs_iext_cursor	icur;
1363	int			error;
1364
1365	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1366		error = xfs_iread_extents(tp, ip, whichfork);
1367		if (error)
1368			return error;
1369	}
1370
1371	xfs_iext_last(ifp, &icur);
1372	if (!xfs_iext_get_extent(ifp, &icur, rec))
1373		*is_empty = 1;
1374	else
1375		*is_empty = 0;
1376	return 0;
1377}
1378
1379/*
1380 * Check the last inode extent to determine whether this allocation will result
1381 * in blocks being allocated at the end of the file. When we allocate new data
1382 * blocks at the end of the file which do not start at the previous data block,
1383 * we will try to align the new blocks at stripe unit boundaries.
1384 *
1385 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1386 * at, or past the EOF.
1387 */
1388STATIC int
1389xfs_bmap_isaeof(
1390	struct xfs_bmalloca	*bma,
1391	int			whichfork)
1392{
1393	struct xfs_bmbt_irec	rec;
1394	int			is_empty;
1395	int			error;
1396
1397	bma->aeof = false;
1398	error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1399				     &is_empty);
1400	if (error)
1401		return error;
1402
1403	if (is_empty) {
1404		bma->aeof = true;
1405		return 0;
1406	}
1407
1408	/*
1409	 * Check if we are allocation or past the last extent, or at least into
1410	 * the last delayed allocated extent.
1411	 */
1412	bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1413		(bma->offset >= rec.br_startoff &&
1414		 isnullstartblock(rec.br_startblock));
1415	return 0;
1416}
1417
1418/*
1419 * Returns the file-relative block number of the first block past eof in
1420 * the file.  This is not based on i_size, it is based on the extent records.
1421 * Returns 0 for local files, as they do not have extent records.
1422 */
1423int
1424xfs_bmap_last_offset(
1425	struct xfs_inode	*ip,
1426	xfs_fileoff_t		*last_block,
1427	int			whichfork)
1428{
1429	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1430	struct xfs_bmbt_irec	rec;
1431	int			is_empty;
1432	int			error;
1433
1434	*last_block = 0;
1435
1436	if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
1437		return 0;
1438
1439	if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp)))
1440		return -EFSCORRUPTED;
1441
1442	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1443	if (error || is_empty)
1444		return error;
1445
1446	*last_block = rec.br_startoff + rec.br_blockcount;
1447	return 0;
1448}
1449
1450/*
1451 * Returns whether the selected fork of the inode has exactly one
1452 * block or not.  For the data fork we check this matches di_size,
1453 * implying the file's range is 0..bsize-1.
1454 */
1455int					/* 1=>1 block, 0=>otherwise */
1456xfs_bmap_one_block(
1457	struct xfs_inode	*ip,		/* incore inode */
1458	int			whichfork)	/* data or attr fork */
1459{
1460	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1461	int			rval;		/* return value */
1462	struct xfs_bmbt_irec	s;		/* internal version of extent */
1463	struct xfs_iext_cursor icur;
1464
1465#ifndef DEBUG
1466	if (whichfork == XFS_DATA_FORK)
1467		return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1468#endif	/* !DEBUG */
1469	if (ifp->if_nextents != 1)
1470		return 0;
1471	if (ifp->if_format != XFS_DINODE_FMT_EXTENTS)
1472		return 0;
1473	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1474	xfs_iext_first(ifp, &icur);
1475	xfs_iext_get_extent(ifp, &icur, &s);
1476	rval = s.br_startoff == 0 && s.br_blockcount == 1;
1477	if (rval && whichfork == XFS_DATA_FORK)
1478		ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1479	return rval;
1480}
1481
1482/*
1483 * Extent tree manipulation functions used during allocation.
1484 */
1485
1486/*
1487 * Convert a delayed allocation to a real allocation.
1488 */
1489STATIC int				/* error */
1490xfs_bmap_add_extent_delay_real(
1491	struct xfs_bmalloca	*bma,
1492	int			whichfork)
1493{
1494	struct xfs_mount	*mp = bma->ip->i_mount;
1495	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1496	struct xfs_bmbt_irec	*new = &bma->got;
1497	int			error;	/* error return value */
1498	int			i;	/* temp state */
1499	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
1500	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
1501					/* left is 0, right is 1, prev is 2 */
1502	int			rval=0;	/* return value (logging flags) */
1503	int			state = xfs_bmap_fork_to_state(whichfork);
1504	xfs_filblks_t		da_new; /* new count del alloc blocks used */
1505	xfs_filblks_t		da_old; /* old count del alloc blocks used */
1506	xfs_filblks_t		temp=0;	/* value for da_new calculations */
1507	int			tmp_rval;	/* partial logging flags */
1508	struct xfs_bmbt_irec	old;
1509
1510	ASSERT(whichfork != XFS_ATTR_FORK);
1511	ASSERT(!isnullstartblock(new->br_startblock));
1512	ASSERT(!bma->cur ||
1513	       (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
1514
1515	XFS_STATS_INC(mp, xs_add_exlist);
1516
1517#define	LEFT		r[0]
1518#define	RIGHT		r[1]
1519#define	PREV		r[2]
1520
1521	/*
1522	 * Set up a bunch of variables to make the tests simpler.
1523	 */
1524	xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1525	new_endoff = new->br_startoff + new->br_blockcount;
1526	ASSERT(isnullstartblock(PREV.br_startblock));
1527	ASSERT(PREV.br_startoff <= new->br_startoff);
1528	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1529
1530	da_old = startblockval(PREV.br_startblock);
1531	da_new = 0;
1532
1533	/*
1534	 * Set flags determining what part of the previous delayed allocation
1535	 * extent is being replaced by a real allocation.
1536	 */
1537	if (PREV.br_startoff == new->br_startoff)
1538		state |= BMAP_LEFT_FILLING;
1539	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1540		state |= BMAP_RIGHT_FILLING;
1541
1542	/*
1543	 * Check and set flags if this segment has a left neighbor.
1544	 * Don't set contiguous if the combined extent would be too large.
1545	 */
1546	if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1547		state |= BMAP_LEFT_VALID;
1548		if (isnullstartblock(LEFT.br_startblock))
1549			state |= BMAP_LEFT_DELAY;
1550	}
1551
1552	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1553	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1554	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1555	    LEFT.br_state == new->br_state &&
1556	    LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1557		state |= BMAP_LEFT_CONTIG;
1558
1559	/*
1560	 * Check and set flags if this segment has a right neighbor.
1561	 * Don't set contiguous if the combined extent would be too large.
1562	 * Also check for all-three-contiguous being too large.
1563	 */
1564	if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1565		state |= BMAP_RIGHT_VALID;
1566		if (isnullstartblock(RIGHT.br_startblock))
1567			state |= BMAP_RIGHT_DELAY;
1568	}
1569
1570	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1571	    new_endoff == RIGHT.br_startoff &&
1572	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1573	    new->br_state == RIGHT.br_state &&
1574	    new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1575	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1576		       BMAP_RIGHT_FILLING)) !=
1577		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1578		       BMAP_RIGHT_FILLING) ||
1579	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1580			<= MAXEXTLEN))
1581		state |= BMAP_RIGHT_CONTIG;
1582
1583	error = 0;
1584	/*
1585	 * Switch out based on the FILLING and CONTIG state bits.
1586	 */
1587	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1588			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1589	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1590	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1591		/*
1592		 * Filling in all of a previously delayed allocation extent.
1593		 * The left and right neighbors are both contiguous with new.
1594		 */
1595		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1596
1597		xfs_iext_remove(bma->ip, &bma->icur, state);
1598		xfs_iext_remove(bma->ip, &bma->icur, state);
1599		xfs_iext_prev(ifp, &bma->icur);
1600		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1601		ifp->if_nextents--;
1602
1603		if (bma->cur == NULL)
1604			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1605		else {
1606			rval = XFS_ILOG_CORE;
1607			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1608			if (error)
1609				goto done;
1610			if (XFS_IS_CORRUPT(mp, i != 1)) {
1611				error = -EFSCORRUPTED;
1612				goto done;
1613			}
1614			error = xfs_btree_delete(bma->cur, &i);
1615			if (error)
1616				goto done;
1617			if (XFS_IS_CORRUPT(mp, i != 1)) {
1618				error = -EFSCORRUPTED;
1619				goto done;
1620			}
1621			error = xfs_btree_decrement(bma->cur, 0, &i);
1622			if (error)
1623				goto done;
1624			if (XFS_IS_CORRUPT(mp, i != 1)) {
1625				error = -EFSCORRUPTED;
1626				goto done;
1627			}
1628			error = xfs_bmbt_update(bma->cur, &LEFT);
1629			if (error)
1630				goto done;
1631		}
1632		break;
1633
1634	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1635		/*
1636		 * Filling in all of a previously delayed allocation extent.
1637		 * The left neighbor is contiguous, the right is not.
1638		 */
1639		old = LEFT;
1640		LEFT.br_blockcount += PREV.br_blockcount;
1641
1642		xfs_iext_remove(bma->ip, &bma->icur, state);
1643		xfs_iext_prev(ifp, &bma->icur);
1644		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1645
1646		if (bma->cur == NULL)
1647			rval = XFS_ILOG_DEXT;
1648		else {
1649			rval = 0;
1650			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1651			if (error)
1652				goto done;
1653			if (XFS_IS_CORRUPT(mp, i != 1)) {
1654				error = -EFSCORRUPTED;
1655				goto done;
1656			}
1657			error = xfs_bmbt_update(bma->cur, &LEFT);
1658			if (error)
1659				goto done;
1660		}
1661		break;
1662
1663	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1664		/*
1665		 * Filling in all of a previously delayed allocation extent.
1666		 * The right neighbor is contiguous, the left is not. Take care
1667		 * with delay -> unwritten extent allocation here because the
1668		 * delalloc record we are overwriting is always written.
1669		 */
1670		PREV.br_startblock = new->br_startblock;
1671		PREV.br_blockcount += RIGHT.br_blockcount;
1672		PREV.br_state = new->br_state;
1673
1674		xfs_iext_next(ifp, &bma->icur);
1675		xfs_iext_remove(bma->ip, &bma->icur, state);
1676		xfs_iext_prev(ifp, &bma->icur);
1677		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1678
1679		if (bma->cur == NULL)
1680			rval = XFS_ILOG_DEXT;
1681		else {
1682			rval = 0;
1683			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1684			if (error)
1685				goto done;
1686			if (XFS_IS_CORRUPT(mp, i != 1)) {
1687				error = -EFSCORRUPTED;
1688				goto done;
1689			}
1690			error = xfs_bmbt_update(bma->cur, &PREV);
1691			if (error)
1692				goto done;
1693		}
1694		break;
1695
1696	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1697		/*
1698		 * Filling in all of a previously delayed allocation extent.
1699		 * Neither the left nor right neighbors are contiguous with
1700		 * the new one.
1701		 */
1702		PREV.br_startblock = new->br_startblock;
1703		PREV.br_state = new->br_state;
1704		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1705		ifp->if_nextents++;
1706
1707		if (bma->cur == NULL)
1708			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1709		else {
1710			rval = XFS_ILOG_CORE;
1711			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1712			if (error)
1713				goto done;
1714			if (XFS_IS_CORRUPT(mp, i != 0)) {
1715				error = -EFSCORRUPTED;
1716				goto done;
1717			}
1718			error = xfs_btree_insert(bma->cur, &i);
1719			if (error)
1720				goto done;
1721			if (XFS_IS_CORRUPT(mp, i != 1)) {
1722				error = -EFSCORRUPTED;
1723				goto done;
1724			}
1725		}
1726		break;
1727
1728	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1729		/*
1730		 * Filling in the first part of a previous delayed allocation.
1731		 * The left neighbor is contiguous.
1732		 */
1733		old = LEFT;
1734		temp = PREV.br_blockcount - new->br_blockcount;
1735		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1736				startblockval(PREV.br_startblock));
1737
1738		LEFT.br_blockcount += new->br_blockcount;
1739
1740		PREV.br_blockcount = temp;
1741		PREV.br_startoff += new->br_blockcount;
1742		PREV.br_startblock = nullstartblock(da_new);
1743
1744		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1745		xfs_iext_prev(ifp, &bma->icur);
1746		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1747
1748		if (bma->cur == NULL)
1749			rval = XFS_ILOG_DEXT;
1750		else {
1751			rval = 0;
1752			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1753			if (error)
1754				goto done;
1755			if (XFS_IS_CORRUPT(mp, i != 1)) {
1756				error = -EFSCORRUPTED;
1757				goto done;
1758			}
1759			error = xfs_bmbt_update(bma->cur, &LEFT);
1760			if (error)
1761				goto done;
1762		}
1763		break;
1764
1765	case BMAP_LEFT_FILLING:
1766		/*
1767		 * Filling in the first part of a previous delayed allocation.
1768		 * The left neighbor is not contiguous.
1769		 */
1770		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1771		ifp->if_nextents++;
1772
1773		if (bma->cur == NULL)
1774			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1775		else {
1776			rval = XFS_ILOG_CORE;
1777			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1778			if (error)
1779				goto done;
1780			if (XFS_IS_CORRUPT(mp, i != 0)) {
1781				error = -EFSCORRUPTED;
1782				goto done;
1783			}
1784			error = xfs_btree_insert(bma->cur, &i);
1785			if (error)
1786				goto done;
1787			if (XFS_IS_CORRUPT(mp, i != 1)) {
1788				error = -EFSCORRUPTED;
1789				goto done;
1790			}
1791		}
1792
1793		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1794			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1795					&bma->cur, 1, &tmp_rval, whichfork);
1796			rval |= tmp_rval;
1797			if (error)
1798				goto done;
1799		}
1800
1801		temp = PREV.br_blockcount - new->br_blockcount;
1802		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1803			startblockval(PREV.br_startblock) -
1804			(bma->cur ? bma->cur->bc_ino.allocated : 0));
1805
1806		PREV.br_startoff = new_endoff;
1807		PREV.br_blockcount = temp;
1808		PREV.br_startblock = nullstartblock(da_new);
1809		xfs_iext_next(ifp, &bma->icur);
1810		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1811		xfs_iext_prev(ifp, &bma->icur);
1812		break;
1813
1814	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1815		/*
1816		 * Filling in the last part of a previous delayed allocation.
1817		 * The right neighbor is contiguous with the new allocation.
1818		 */
1819		old = RIGHT;
1820		RIGHT.br_startoff = new->br_startoff;
1821		RIGHT.br_startblock = new->br_startblock;
1822		RIGHT.br_blockcount += new->br_blockcount;
1823
1824		if (bma->cur == NULL)
1825			rval = XFS_ILOG_DEXT;
1826		else {
1827			rval = 0;
1828			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1829			if (error)
1830				goto done;
1831			if (XFS_IS_CORRUPT(mp, i != 1)) {
1832				error = -EFSCORRUPTED;
1833				goto done;
1834			}
1835			error = xfs_bmbt_update(bma->cur, &RIGHT);
1836			if (error)
1837				goto done;
1838		}
1839
1840		temp = PREV.br_blockcount - new->br_blockcount;
1841		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1842			startblockval(PREV.br_startblock));
1843
1844		PREV.br_blockcount = temp;
1845		PREV.br_startblock = nullstartblock(da_new);
1846
1847		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1848		xfs_iext_next(ifp, &bma->icur);
1849		xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1850		break;
1851
1852	case BMAP_RIGHT_FILLING:
1853		/*
1854		 * Filling in the last part of a previous delayed allocation.
1855		 * The right neighbor is not contiguous.
1856		 */
1857		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1858		ifp->if_nextents++;
1859
1860		if (bma->cur == NULL)
1861			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1862		else {
1863			rval = XFS_ILOG_CORE;
1864			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1865			if (error)
1866				goto done;
1867			if (XFS_IS_CORRUPT(mp, i != 0)) {
1868				error = -EFSCORRUPTED;
1869				goto done;
1870			}
1871			error = xfs_btree_insert(bma->cur, &i);
1872			if (error)
1873				goto done;
1874			if (XFS_IS_CORRUPT(mp, i != 1)) {
1875				error = -EFSCORRUPTED;
1876				goto done;
1877			}
1878		}
1879
1880		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1881			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1882				&bma->cur, 1, &tmp_rval, whichfork);
1883			rval |= tmp_rval;
1884			if (error)
1885				goto done;
1886		}
1887
1888		temp = PREV.br_blockcount - new->br_blockcount;
1889		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1890			startblockval(PREV.br_startblock) -
1891			(bma->cur ? bma->cur->bc_ino.allocated : 0));
1892
1893		PREV.br_startblock = nullstartblock(da_new);
1894		PREV.br_blockcount = temp;
1895		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1896		xfs_iext_next(ifp, &bma->icur);
1897		break;
1898
1899	case 0:
1900		/*
1901		 * Filling in the middle part of a previous delayed allocation.
1902		 * Contiguity is impossible here.
1903		 * This case is avoided almost all the time.
1904		 *
1905		 * We start with a delayed allocation:
1906		 *
1907		 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1908		 *  PREV @ idx
1909		 *
1910	         * and we are allocating:
1911		 *                     +rrrrrrrrrrrrrrrrr+
1912		 *			      new
1913		 *
1914		 * and we set it up for insertion as:
1915		 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1916		 *                            new
1917		 *  PREV @ idx          LEFT              RIGHT
1918		 *                      inserted at idx + 1
1919		 */
1920		old = PREV;
1921
1922		/* LEFT is the new middle */
1923		LEFT = *new;
1924
1925		/* RIGHT is the new right */
1926		RIGHT.br_state = PREV.br_state;
1927		RIGHT.br_startoff = new_endoff;
1928		RIGHT.br_blockcount =
1929			PREV.br_startoff + PREV.br_blockcount - new_endoff;
1930		RIGHT.br_startblock =
1931			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1932					RIGHT.br_blockcount));
1933
1934		/* truncate PREV */
1935		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1936		PREV.br_startblock =
1937			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1938					PREV.br_blockcount));
1939		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1940
1941		xfs_iext_next(ifp, &bma->icur);
1942		xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1943		xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
1944		ifp->if_nextents++;
1945
1946		if (bma->cur == NULL)
1947			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1948		else {
1949			rval = XFS_ILOG_CORE;
1950			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1951			if (error)
1952				goto done;
1953			if (XFS_IS_CORRUPT(mp, i != 0)) {
1954				error = -EFSCORRUPTED;
1955				goto done;
1956			}
1957			error = xfs_btree_insert(bma->cur, &i);
1958			if (error)
1959				goto done;
1960			if (XFS_IS_CORRUPT(mp, i != 1)) {
1961				error = -EFSCORRUPTED;
1962				goto done;
1963			}
1964		}
1965
1966		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1967			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1968					&bma->cur, 1, &tmp_rval, whichfork);
1969			rval |= tmp_rval;
1970			if (error)
1971				goto done;
1972		}
1973
1974		da_new = startblockval(PREV.br_startblock) +
1975			 startblockval(RIGHT.br_startblock);
1976		break;
1977
1978	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1979	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1980	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1981	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1982	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1983	case BMAP_LEFT_CONTIG:
1984	case BMAP_RIGHT_CONTIG:
1985		/*
1986		 * These cases are all impossible.
1987		 */
1988		ASSERT(0);
1989	}
1990
1991	/* add reverse mapping unless caller opted out */
1992	if (!(bma->flags & XFS_BMAPI_NORMAP))
1993		xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
1994
1995	/* convert to a btree if necessary */
1996	if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1997		int	tmp_logflags;	/* partial log flag return val */
1998
1999		ASSERT(bma->cur == NULL);
2000		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2001				&bma->cur, da_old > 0, &tmp_logflags,
2002				whichfork);
2003		bma->logflags |= tmp_logflags;
2004		if (error)
2005			goto done;
2006	}
2007
2008	if (da_new != da_old)
2009		xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
2010
2011	if (bma->cur) {
2012		da_new += bma->cur->bc_ino.allocated;
2013		bma->cur->bc_ino.allocated = 0;
2014	}
2015
2016	/* adjust for changes in reserved delayed indirect blocks */
2017	if (da_new != da_old) {
2018		ASSERT(state == 0 || da_new < da_old);
2019		error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
2020				false);
2021	}
2022
2023	xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
2024done:
2025	if (whichfork != XFS_COW_FORK)
2026		bma->logflags |= rval;
2027	return error;
2028#undef	LEFT
2029#undef	RIGHT
2030#undef	PREV
2031}
2032
2033/*
2034 * Convert an unwritten allocation to a real allocation or vice versa.
2035 */
2036int					/* error */
2037xfs_bmap_add_extent_unwritten_real(
2038	struct xfs_trans	*tp,
2039	xfs_inode_t		*ip,	/* incore inode pointer */
2040	int			whichfork,
2041	struct xfs_iext_cursor	*icur,
2042	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
2043	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
2044	int			*logflagsp) /* inode logging flags */
2045{
2046	xfs_btree_cur_t		*cur;	/* btree cursor */
2047	int			error;	/* error return value */
2048	int			i;	/* temp state */
2049	struct xfs_ifork	*ifp;	/* inode fork pointer */
2050	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
2051	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
2052					/* left is 0, right is 1, prev is 2 */
2053	int			rval=0;	/* return value (logging flags) */
2054	int			state = xfs_bmap_fork_to_state(whichfork);
2055	struct xfs_mount	*mp = ip->i_mount;
2056	struct xfs_bmbt_irec	old;
2057
2058	*logflagsp = 0;
2059
2060	cur = *curp;
2061	ifp = XFS_IFORK_PTR(ip, whichfork);
2062
2063	ASSERT(!isnullstartblock(new->br_startblock));
2064
2065	XFS_STATS_INC(mp, xs_add_exlist);
2066
2067#define	LEFT		r[0]
2068#define	RIGHT		r[1]
2069#define	PREV		r[2]
2070
2071	/*
2072	 * Set up a bunch of variables to make the tests simpler.
2073	 */
2074	error = 0;
2075	xfs_iext_get_extent(ifp, icur, &PREV);
2076	ASSERT(new->br_state != PREV.br_state);
2077	new_endoff = new->br_startoff + new->br_blockcount;
2078	ASSERT(PREV.br_startoff <= new->br_startoff);
2079	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2080
2081	/*
2082	 * Set flags determining what part of the previous oldext allocation
2083	 * extent is being replaced by a newext allocation.
2084	 */
2085	if (PREV.br_startoff == new->br_startoff)
2086		state |= BMAP_LEFT_FILLING;
2087	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2088		state |= BMAP_RIGHT_FILLING;
2089
2090	/*
2091	 * Check and set flags if this segment has a left neighbor.
2092	 * Don't set contiguous if the combined extent would be too large.
2093	 */
2094	if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2095		state |= BMAP_LEFT_VALID;
2096		if (isnullstartblock(LEFT.br_startblock))
2097			state |= BMAP_LEFT_DELAY;
2098	}
2099
2100	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2101	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2102	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2103	    LEFT.br_state == new->br_state &&
2104	    LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2105		state |= BMAP_LEFT_CONTIG;
2106
2107	/*
2108	 * Check and set flags if this segment has a right neighbor.
2109	 * Don't set contiguous if the combined extent would be too large.
2110	 * Also check for all-three-contiguous being too large.
2111	 */
2112	if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2113		state |= BMAP_RIGHT_VALID;
2114		if (isnullstartblock(RIGHT.br_startblock))
2115			state |= BMAP_RIGHT_DELAY;
2116	}
2117
2118	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2119	    new_endoff == RIGHT.br_startoff &&
2120	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2121	    new->br_state == RIGHT.br_state &&
2122	    new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2123	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2124		       BMAP_RIGHT_FILLING)) !=
2125		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2126		       BMAP_RIGHT_FILLING) ||
2127	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2128			<= MAXEXTLEN))
2129		state |= BMAP_RIGHT_CONTIG;
2130
2131	/*
2132	 * Switch out based on the FILLING and CONTIG state bits.
2133	 */
2134	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2135			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2136	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2137	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2138		/*
2139		 * Setting all of a previous oldext extent to newext.
2140		 * The left and right neighbors are both contiguous with new.
2141		 */
2142		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2143
2144		xfs_iext_remove(ip, icur, state);
2145		xfs_iext_remove(ip, icur, state);
2146		xfs_iext_prev(ifp, icur);
2147		xfs_iext_update_extent(ip, state, icur, &LEFT);
2148		ifp->if_nextents -= 2;
2149		if (cur == NULL)
2150			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2151		else {
2152			rval = XFS_ILOG_CORE;
2153			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2154			if (error)
2155				goto done;
2156			if (XFS_IS_CORRUPT(mp, i != 1)) {
2157				error = -EFSCORRUPTED;
2158				goto done;
2159			}
2160			if ((error = xfs_btree_delete(cur, &i)))
2161				goto done;
2162			if (XFS_IS_CORRUPT(mp, i != 1)) {
2163				error = -EFSCORRUPTED;
2164				goto done;
2165			}
2166			if ((error = xfs_btree_decrement(cur, 0, &i)))
2167				goto done;
2168			if (XFS_IS_CORRUPT(mp, i != 1)) {
2169				error = -EFSCORRUPTED;
2170				goto done;
2171			}
2172			if ((error = xfs_btree_delete(cur, &i)))
2173				goto done;
2174			if (XFS_IS_CORRUPT(mp, i != 1)) {
2175				error = -EFSCORRUPTED;
2176				goto done;
2177			}
2178			if ((error = xfs_btree_decrement(cur, 0, &i)))
2179				goto done;
2180			if (XFS_IS_CORRUPT(mp, i != 1)) {
2181				error = -EFSCORRUPTED;
2182				goto done;
2183			}
2184			error = xfs_bmbt_update(cur, &LEFT);
2185			if (error)
2186				goto done;
2187		}
2188		break;
2189
2190	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2191		/*
2192		 * Setting all of a previous oldext extent to newext.
2193		 * The left neighbor is contiguous, the right is not.
2194		 */
2195		LEFT.br_blockcount += PREV.br_blockcount;
2196
2197		xfs_iext_remove(ip, icur, state);
2198		xfs_iext_prev(ifp, icur);
2199		xfs_iext_update_extent(ip, state, icur, &LEFT);
2200		ifp->if_nextents--;
2201		if (cur == NULL)
2202			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2203		else {
2204			rval = XFS_ILOG_CORE;
2205			error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2206			if (error)
2207				goto done;
2208			if (XFS_IS_CORRUPT(mp, i != 1)) {
2209				error = -EFSCORRUPTED;
2210				goto done;
2211			}
2212			if ((error = xfs_btree_delete(cur, &i)))
2213				goto done;
2214			if (XFS_IS_CORRUPT(mp, i != 1)) {
2215				error = -EFSCORRUPTED;
2216				goto done;
2217			}
2218			if ((error = xfs_btree_decrement(cur, 0, &i)))
2219				goto done;
2220			if (XFS_IS_CORRUPT(mp, i != 1)) {
2221				error = -EFSCORRUPTED;
2222				goto done;
2223			}
2224			error = xfs_bmbt_update(cur, &LEFT);
2225			if (error)
2226				goto done;
2227		}
2228		break;
2229
2230	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2231		/*
2232		 * Setting all of a previous oldext extent to newext.
2233		 * The right neighbor is contiguous, the left is not.
2234		 */
2235		PREV.br_blockcount += RIGHT.br_blockcount;
2236		PREV.br_state = new->br_state;
2237
2238		xfs_iext_next(ifp, icur);
2239		xfs_iext_remove(ip, icur, state);
2240		xfs_iext_prev(ifp, icur);
2241		xfs_iext_update_extent(ip, state, icur, &PREV);
2242		ifp->if_nextents--;
2243
2244		if (cur == NULL)
2245			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2246		else {
2247			rval = XFS_ILOG_CORE;
2248			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2249			if (error)
2250				goto done;
2251			if (XFS_IS_CORRUPT(mp, i != 1)) {
2252				error = -EFSCORRUPTED;
2253				goto done;
2254			}
2255			if ((error = xfs_btree_delete(cur, &i)))
2256				goto done;
2257			if (XFS_IS_CORRUPT(mp, i != 1)) {
2258				error = -EFSCORRUPTED;
2259				goto done;
2260			}
2261			if ((error = xfs_btree_decrement(cur, 0, &i)))
2262				goto done;
2263			if (XFS_IS_CORRUPT(mp, i != 1)) {
2264				error = -EFSCORRUPTED;
2265				goto done;
2266			}
2267			error = xfs_bmbt_update(cur, &PREV);
2268			if (error)
2269				goto done;
2270		}
2271		break;
2272
2273	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2274		/*
2275		 * Setting all of a previous oldext extent to newext.
2276		 * Neither the left nor right neighbors are contiguous with
2277		 * the new one.
2278		 */
2279		PREV.br_state = new->br_state;
2280		xfs_iext_update_extent(ip, state, icur, &PREV);
2281
2282		if (cur == NULL)
2283			rval = XFS_ILOG_DEXT;
2284		else {
2285			rval = 0;
2286			error = xfs_bmbt_lookup_eq(cur, new, &i);
2287			if (error)
2288				goto done;
2289			if (XFS_IS_CORRUPT(mp, i != 1)) {
2290				error = -EFSCORRUPTED;
2291				goto done;
2292			}
2293			error = xfs_bmbt_update(cur, &PREV);
2294			if (error)
2295				goto done;
2296		}
2297		break;
2298
2299	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2300		/*
2301		 * Setting the first part of a previous oldext extent to newext.
2302		 * The left neighbor is contiguous.
2303		 */
2304		LEFT.br_blockcount += new->br_blockcount;
2305
2306		old = PREV;
2307		PREV.br_startoff += new->br_blockcount;
2308		PREV.br_startblock += new->br_blockcount;
2309		PREV.br_blockcount -= new->br_blockcount;
2310
2311		xfs_iext_update_extent(ip, state, icur, &PREV);
2312		xfs_iext_prev(ifp, icur);
2313		xfs_iext_update_extent(ip, state, icur, &LEFT);
2314
2315		if (cur == NULL)
2316			rval = XFS_ILOG_DEXT;
2317		else {
2318			rval = 0;
2319			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2320			if (error)
2321				goto done;
2322			if (XFS_IS_CORRUPT(mp, i != 1)) {
2323				error = -EFSCORRUPTED;
2324				goto done;
2325			}
2326			error = xfs_bmbt_update(cur, &PREV);
2327			if (error)
2328				goto done;
2329			error = xfs_btree_decrement(cur, 0, &i);
2330			if (error)
2331				goto done;
2332			error = xfs_bmbt_update(cur, &LEFT);
2333			if (error)
2334				goto done;
2335		}
2336		break;
2337
2338	case BMAP_LEFT_FILLING:
2339		/*
2340		 * Setting the first part of a previous oldext extent to newext.
2341		 * The left neighbor is not contiguous.
2342		 */
2343		old = PREV;
2344		PREV.br_startoff += new->br_blockcount;
2345		PREV.br_startblock += new->br_blockcount;
2346		PREV.br_blockcount -= new->br_blockcount;
2347
2348		xfs_iext_update_extent(ip, state, icur, &PREV);
2349		xfs_iext_insert(ip, icur, new, state);
2350		ifp->if_nextents++;
2351
2352		if (cur == NULL)
2353			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2354		else {
2355			rval = XFS_ILOG_CORE;
2356			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2357			if (error)
2358				goto done;
2359			if (XFS_IS_CORRUPT(mp, i != 1)) {
2360				error = -EFSCORRUPTED;
2361				goto done;
2362			}
2363			error = xfs_bmbt_update(cur, &PREV);
2364			if (error)
2365				goto done;
2366			cur->bc_rec.b = *new;
2367			if ((error = xfs_btree_insert(cur, &i)))
2368				goto done;
2369			if (XFS_IS_CORRUPT(mp, i != 1)) {
2370				error = -EFSCORRUPTED;
2371				goto done;
2372			}
2373		}
2374		break;
2375
2376	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2377		/*
2378		 * Setting the last part of a previous oldext extent to newext.
2379		 * The right neighbor is contiguous with the new allocation.
2380		 */
2381		old = PREV;
2382		PREV.br_blockcount -= new->br_blockcount;
2383
2384		RIGHT.br_startoff = new->br_startoff;
2385		RIGHT.br_startblock = new->br_startblock;
2386		RIGHT.br_blockcount += new->br_blockcount;
2387
2388		xfs_iext_update_extent(ip, state, icur, &PREV);
2389		xfs_iext_next(ifp, icur);
2390		xfs_iext_update_extent(ip, state, icur, &RIGHT);
2391
2392		if (cur == NULL)
2393			rval = XFS_ILOG_DEXT;
2394		else {
2395			rval = 0;
2396			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2397			if (error)
2398				goto done;
2399			if (XFS_IS_CORRUPT(mp, i != 1)) {
2400				error = -EFSCORRUPTED;
2401				goto done;
2402			}
2403			error = xfs_bmbt_update(cur, &PREV);
2404			if (error)
2405				goto done;
2406			error = xfs_btree_increment(cur, 0, &i);
2407			if (error)
2408				goto done;
2409			error = xfs_bmbt_update(cur, &RIGHT);
2410			if (error)
2411				goto done;
2412		}
2413		break;
2414
2415	case BMAP_RIGHT_FILLING:
2416		/*
2417		 * Setting the last part of a previous oldext extent to newext.
2418		 * The right neighbor is not contiguous.
2419		 */
2420		old = PREV;
2421		PREV.br_blockcount -= new->br_blockcount;
2422
2423		xfs_iext_update_extent(ip, state, icur, &PREV);
2424		xfs_iext_next(ifp, icur);
2425		xfs_iext_insert(ip, icur, new, state);
2426		ifp->if_nextents++;
2427
2428		if (cur == NULL)
2429			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2430		else {
2431			rval = XFS_ILOG_CORE;
2432			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2433			if (error)
2434				goto done;
2435			if (XFS_IS_CORRUPT(mp, i != 1)) {
2436				error = -EFSCORRUPTED;
2437				goto done;
2438			}
2439			error = xfs_bmbt_update(cur, &PREV);
2440			if (error)
2441				goto done;
2442			error = xfs_bmbt_lookup_eq(cur, new, &i);
2443			if (error)
2444				goto done;
2445			if (XFS_IS_CORRUPT(mp, i != 0)) {
2446				error = -EFSCORRUPTED;
2447				goto done;
2448			}
2449			if ((error = xfs_btree_insert(cur, &i)))
2450				goto done;
2451			if (XFS_IS_CORRUPT(mp, i != 1)) {
2452				error = -EFSCORRUPTED;
2453				goto done;
2454			}
2455		}
2456		break;
2457
2458	case 0:
2459		/*
2460		 * Setting the middle part of a previous oldext extent to
2461		 * newext.  Contiguity is impossible here.
2462		 * One extent becomes three extents.
2463		 */
2464		old = PREV;
2465		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2466
2467		r[0] = *new;
2468		r[1].br_startoff = new_endoff;
2469		r[1].br_blockcount =
2470			old.br_startoff + old.br_blockcount - new_endoff;
2471		r[1].br_startblock = new->br_startblock + new->br_blockcount;
2472		r[1].br_state = PREV.br_state;
2473
2474		xfs_iext_update_extent(ip, state, icur, &PREV);
2475		xfs_iext_next(ifp, icur);
2476		xfs_iext_insert(ip, icur, &r[1], state);
2477		xfs_iext_insert(ip, icur, &r[0], state);
2478		ifp->if_nextents += 2;
2479
2480		if (cur == NULL)
2481			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2482		else {
2483			rval = XFS_ILOG_CORE;
2484			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2485			if (error)
2486				goto done;
2487			if (XFS_IS_CORRUPT(mp, i != 1)) {
2488				error = -EFSCORRUPTED;
2489				goto done;
2490			}
2491			/* new right extent - oldext */
2492			error = xfs_bmbt_update(cur, &r[1]);
2493			if (error)
2494				goto done;
2495			/* new left extent - oldext */
2496			cur->bc_rec.b = PREV;
2497			if ((error = xfs_btree_insert(cur, &i)))
2498				goto done;
2499			if (XFS_IS_CORRUPT(mp, i != 1)) {
2500				error = -EFSCORRUPTED;
2501				goto done;
2502			}
2503			/*
2504			 * Reset the cursor to the position of the new extent
2505			 * we are about to insert as we can't trust it after
2506			 * the previous insert.
2507			 */
2508			error = xfs_bmbt_lookup_eq(cur, new, &i);
2509			if (error)
2510				goto done;
2511			if (XFS_IS_CORRUPT(mp, i != 0)) {
2512				error = -EFSCORRUPTED;
2513				goto done;
2514			}
2515			/* new middle extent - newext */
2516			if ((error = xfs_btree_insert(cur, &i)))
2517				goto done;
2518			if (XFS_IS_CORRUPT(mp, i != 1)) {
2519				error = -EFSCORRUPTED;
2520				goto done;
2521			}
2522		}
2523		break;
2524
2525	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2526	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2527	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2528	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2529	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2530	case BMAP_LEFT_CONTIG:
2531	case BMAP_RIGHT_CONTIG:
2532		/*
2533		 * These cases are all impossible.
2534		 */
2535		ASSERT(0);
2536	}
2537
2538	/* update reverse mappings */
2539	xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
2540
2541	/* convert to a btree if necessary */
2542	if (xfs_bmap_needs_btree(ip, whichfork)) {
2543		int	tmp_logflags;	/* partial log flag return val */
2544
2545		ASSERT(cur == NULL);
2546		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
2547				&tmp_logflags, whichfork);
2548		*logflagsp |= tmp_logflags;
2549		if (error)
2550			goto done;
2551	}
2552
2553	/* clear out the allocated field, done with it now in any case. */
2554	if (cur) {
2555		cur->bc_ino.allocated = 0;
2556		*curp = cur;
2557	}
2558
2559	xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2560done:
2561	*logflagsp |= rval;
2562	return error;
2563#undef	LEFT
2564#undef	RIGHT
2565#undef	PREV
2566}
2567
2568/*
2569 * Convert a hole to a delayed allocation.
2570 */
2571STATIC void
2572xfs_bmap_add_extent_hole_delay(
2573	xfs_inode_t		*ip,	/* incore inode pointer */
2574	int			whichfork,
2575	struct xfs_iext_cursor	*icur,
2576	xfs_bmbt_irec_t		*new)	/* new data to add to file extents */
2577{
2578	struct xfs_ifork	*ifp;	/* inode fork pointer */
2579	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2580	xfs_filblks_t		newlen=0;	/* new indirect size */
2581	xfs_filblks_t		oldlen=0;	/* old indirect size */
2582	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2583	int			state = xfs_bmap_fork_to_state(whichfork);
2584	xfs_filblks_t		temp;	 /* temp for indirect calculations */
2585
2586	ifp = XFS_IFORK_PTR(ip, whichfork);
2587	ASSERT(isnullstartblock(new->br_startblock));
2588
2589	/*
2590	 * Check and set flags if this segment has a left neighbor
2591	 */
2592	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2593		state |= BMAP_LEFT_VALID;
2594		if (isnullstartblock(left.br_startblock))
2595			state |= BMAP_LEFT_DELAY;
2596	}
2597
2598	/*
2599	 * Check and set flags if the current (right) segment exists.
2600	 * If it doesn't exist, we're converting the hole at end-of-file.
2601	 */
2602	if (xfs_iext_get_extent(ifp, icur, &right)) {
2603		state |= BMAP_RIGHT_VALID;
2604		if (isnullstartblock(right.br_startblock))
2605			state |= BMAP_RIGHT_DELAY;
2606	}
2607
2608	/*
2609	 * Set contiguity flags on the left and right neighbors.
2610	 * Don't let extents get too large, even if the pieces are contiguous.
2611	 */
2612	if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2613	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2614	    left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2615		state |= BMAP_LEFT_CONTIG;
2616
2617	if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2618	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2619	    new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2620	    (!(state & BMAP_LEFT_CONTIG) ||
2621	     (left.br_blockcount + new->br_blockcount +
2622	      right.br_blockcount <= MAXEXTLEN)))
2623		state |= BMAP_RIGHT_CONTIG;
2624
2625	/*
2626	 * Switch out based on the contiguity flags.
2627	 */
2628	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2629	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2630		/*
2631		 * New allocation is contiguous with delayed allocations
2632		 * on the left and on the right.
2633		 * Merge all three into a single extent record.
2634		 */
2635		temp = left.br_blockcount + new->br_blockcount +
2636			right.br_blockcount;
2637
2638		oldlen = startblockval(left.br_startblock) +
2639			startblockval(new->br_startblock) +
2640			startblockval(right.br_startblock);
2641		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2642					 oldlen);
2643		left.br_startblock = nullstartblock(newlen);
2644		left.br_blockcount = temp;
2645
2646		xfs_iext_remove(ip, icur, state);
2647		xfs_iext_prev(ifp, icur);
2648		xfs_iext_update_extent(ip, state, icur, &left);
2649		break;
2650
2651	case BMAP_LEFT_CONTIG:
2652		/*
2653		 * New allocation is contiguous with a delayed allocation
2654		 * on the left.
2655		 * Merge the new allocation with the left neighbor.
2656		 */
2657		temp = left.br_blockcount + new->br_blockcount;
2658
2659		oldlen = startblockval(left.br_startblock) +
2660			startblockval(new->br_startblock);
2661		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2662					 oldlen);
2663		left.br_blockcount = temp;
2664		left.br_startblock = nullstartblock(newlen);
2665
2666		xfs_iext_prev(ifp, icur);
2667		xfs_iext_update_extent(ip, state, icur, &left);
2668		break;
2669
2670	case BMAP_RIGHT_CONTIG:
2671		/*
2672		 * New allocation is contiguous with a delayed allocation
2673		 * on the right.
2674		 * Merge the new allocation with the right neighbor.
2675		 */
2676		temp = new->br_blockcount + right.br_blockcount;
2677		oldlen = startblockval(new->br_startblock) +
2678			startblockval(right.br_startblock);
2679		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2680					 oldlen);
2681		right.br_startoff = new->br_startoff;
2682		right.br_startblock = nullstartblock(newlen);
2683		right.br_blockcount = temp;
2684		xfs_iext_update_extent(ip, state, icur, &right);
2685		break;
2686
2687	case 0:
2688		/*
2689		 * New allocation is not contiguous with another
2690		 * delayed allocation.
2691		 * Insert a new entry.
2692		 */
2693		oldlen = newlen = 0;
2694		xfs_iext_insert(ip, icur, new, state);
2695		break;
2696	}
2697	if (oldlen != newlen) {
2698		ASSERT(oldlen > newlen);
2699		xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2700				 false);
2701		/*
2702		 * Nothing to do for disk quota accounting here.
2703		 */
2704		xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
2705	}
2706}
2707
2708/*
2709 * Convert a hole to a real allocation.
2710 */
2711STATIC int				/* error */
2712xfs_bmap_add_extent_hole_real(
2713	struct xfs_trans	*tp,
2714	struct xfs_inode	*ip,
2715	int			whichfork,
2716	struct xfs_iext_cursor	*icur,
2717	struct xfs_btree_cur	**curp,
2718	struct xfs_bmbt_irec	*new,
2719	int			*logflagsp,
2720	int			flags)
2721{
2722	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
2723	struct xfs_mount	*mp = ip->i_mount;
2724	struct xfs_btree_cur	*cur = *curp;
2725	int			error;	/* error return value */
2726	int			i;	/* temp state */
2727	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2728	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2729	int			rval=0;	/* return value (logging flags) */
2730	int			state = xfs_bmap_fork_to_state(whichfork);
2731	struct xfs_bmbt_irec	old;
2732
2733	ASSERT(!isnullstartblock(new->br_startblock));
2734	ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
2735
2736	XFS_STATS_INC(mp, xs_add_exlist);
2737
2738	/*
2739	 * Check and set flags if this segment has a left neighbor.
2740	 */
2741	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2742		state |= BMAP_LEFT_VALID;
2743		if (isnullstartblock(left.br_startblock))
2744			state |= BMAP_LEFT_DELAY;
2745	}
2746
2747	/*
2748	 * Check and set flags if this segment has a current value.
2749	 * Not true if we're inserting into the "hole" at eof.
2750	 */
2751	if (xfs_iext_get_extent(ifp, icur, &right)) {
2752		state |= BMAP_RIGHT_VALID;
2753		if (isnullstartblock(right.br_startblock))
2754			state |= BMAP_RIGHT_DELAY;
2755	}
2756
2757	/*
2758	 * We're inserting a real allocation between "left" and "right".
2759	 * Set the contiguity flags.  Don't let extents get too large.
2760	 */
2761	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2762	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2763	    left.br_startblock + left.br_blockcount == new->br_startblock &&
2764	    left.br_state == new->br_state &&
2765	    left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2766		state |= BMAP_LEFT_CONTIG;
2767
2768	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2769	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2770	    new->br_startblock + new->br_blockcount == right.br_startblock &&
2771	    new->br_state == right.br_state &&
2772	    new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2773	    (!(state & BMAP_LEFT_CONTIG) ||
2774	     left.br_blockcount + new->br_blockcount +
2775	     right.br_blockcount <= MAXEXTLEN))
2776		state |= BMAP_RIGHT_CONTIG;
2777
2778	error = 0;
2779	/*
2780	 * Select which case we're in here, and implement it.
2781	 */
2782	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2783	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2784		/*
2785		 * New allocation is contiguous with real allocations on the
2786		 * left and on the right.
2787		 * Merge all three into a single extent record.
2788		 */
2789		left.br_blockcount += new->br_blockcount + right.br_blockcount;
2790
2791		xfs_iext_remove(ip, icur, state);
2792		xfs_iext_prev(ifp, icur);
2793		xfs_iext_update_extent(ip, state, icur, &left);
2794		ifp->if_nextents--;
2795
2796		if (cur == NULL) {
2797			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2798		} else {
2799			rval = XFS_ILOG_CORE;
2800			error = xfs_bmbt_lookup_eq(cur, &right, &i);
2801			if (error)
2802				goto done;
2803			if (XFS_IS_CORRUPT(mp, i != 1)) {
2804				error = -EFSCORRUPTED;
2805				goto done;
2806			}
2807			error = xfs_btree_delete(cur, &i);
2808			if (error)
2809				goto done;
2810			if (XFS_IS_CORRUPT(mp, i != 1)) {
2811				error = -EFSCORRUPTED;
2812				goto done;
2813			}
2814			error = xfs_btree_decrement(cur, 0, &i);
2815			if (error)
2816				goto done;
2817			if (XFS_IS_CORRUPT(mp, i != 1)) {
2818				error = -EFSCORRUPTED;
2819				goto done;
2820			}
2821			error = xfs_bmbt_update(cur, &left);
2822			if (error)
2823				goto done;
2824		}
2825		break;
2826
2827	case BMAP_LEFT_CONTIG:
2828		/*
2829		 * New allocation is contiguous with a real allocation
2830		 * on the left.
2831		 * Merge the new allocation with the left neighbor.
2832		 */
2833		old = left;
2834		left.br_blockcount += new->br_blockcount;
2835
2836		xfs_iext_prev(ifp, icur);
2837		xfs_iext_update_extent(ip, state, icur, &left);
2838
2839		if (cur == NULL) {
2840			rval = xfs_ilog_fext(whichfork);
2841		} else {
2842			rval = 0;
2843			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2844			if (error)
2845				goto done;
2846			if (XFS_IS_CORRUPT(mp, i != 1)) {
2847				error = -EFSCORRUPTED;
2848				goto done;
2849			}
2850			error = xfs_bmbt_update(cur, &left);
2851			if (error)
2852				goto done;
2853		}
2854		break;
2855
2856	case BMAP_RIGHT_CONTIG:
2857		/*
2858		 * New allocation is contiguous with a real allocation
2859		 * on the right.
2860		 * Merge the new allocation with the right neighbor.
2861		 */
2862		old = right;
2863
2864		right.br_startoff = new->br_startoff;
2865		right.br_startblock = new->br_startblock;
2866		right.br_blockcount += new->br_blockcount;
2867		xfs_iext_update_extent(ip, state, icur, &right);
2868
2869		if (cur == NULL) {
2870			rval = xfs_ilog_fext(whichfork);
2871		} else {
2872			rval = 0;
2873			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2874			if (error)
2875				goto done;
2876			if (XFS_IS_CORRUPT(mp, i != 1)) {
2877				error = -EFSCORRUPTED;
2878				goto done;
2879			}
2880			error = xfs_bmbt_update(cur, &right);
2881			if (error)
2882				goto done;
2883		}
2884		break;
2885
2886	case 0:
2887		/*
2888		 * New allocation is not contiguous with another
2889		 * real allocation.
2890		 * Insert a new entry.
2891		 */
2892		xfs_iext_insert(ip, icur, new, state);
2893		ifp->if_nextents++;
2894
2895		if (cur == NULL) {
2896			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2897		} else {
2898			rval = XFS_ILOG_CORE;
2899			error = xfs_bmbt_lookup_eq(cur, new, &i);
2900			if (error)
2901				goto done;
2902			if (XFS_IS_CORRUPT(mp, i != 0)) {
2903				error = -EFSCORRUPTED;
2904				goto done;
2905			}
2906			error = xfs_btree_insert(cur, &i);
2907			if (error)
2908				goto done;
2909			if (XFS_IS_CORRUPT(mp, i != 1)) {
2910				error = -EFSCORRUPTED;
2911				goto done;
2912			}
2913		}
2914		break;
2915	}
2916
2917	/* add reverse mapping unless caller opted out */
2918	if (!(flags & XFS_BMAPI_NORMAP))
2919		xfs_rmap_map_extent(tp, ip, whichfork, new);
2920
2921	/* convert to a btree if necessary */
2922	if (xfs_bmap_needs_btree(ip, whichfork)) {
2923		int	tmp_logflags;	/* partial log flag return val */
2924
2925		ASSERT(cur == NULL);
2926		error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
2927				&tmp_logflags, whichfork);
2928		*logflagsp |= tmp_logflags;
2929		cur = *curp;
2930		if (error)
2931			goto done;
2932	}
2933
2934	/* clear out the allocated field, done with it now in any case. */
2935	if (cur)
2936		cur->bc_ino.allocated = 0;
2937
2938	xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2939done:
2940	*logflagsp |= rval;
2941	return error;
2942}
2943
2944/*
2945 * Functions used in the extent read, allocate and remove paths
2946 */
2947
2948/*
2949 * Adjust the size of the new extent based on di_extsize and rt extsize.
2950 */
2951int
2952xfs_bmap_extsize_align(
2953	xfs_mount_t	*mp,
2954	xfs_bmbt_irec_t	*gotp,		/* next extent pointer */
2955	xfs_bmbt_irec_t	*prevp,		/* previous extent pointer */
2956	xfs_extlen_t	extsz,		/* align to this extent size */
2957	int		rt,		/* is this a realtime inode? */
2958	int		eof,		/* is extent at end-of-file? */
2959	int		delay,		/* creating delalloc extent? */
2960	int		convert,	/* overwriting unwritten extent? */
2961	xfs_fileoff_t	*offp,		/* in/out: aligned offset */
2962	xfs_extlen_t	*lenp)		/* in/out: aligned length */
2963{
2964	xfs_fileoff_t	orig_off;	/* original offset */
2965	xfs_extlen_t	orig_alen;	/* original length */
2966	xfs_fileoff_t	orig_end;	/* original off+len */
2967	xfs_fileoff_t	nexto;		/* next file offset */
2968	xfs_fileoff_t	prevo;		/* previous file offset */
2969	xfs_fileoff_t	align_off;	/* temp for offset */
2970	xfs_extlen_t	align_alen;	/* temp for length */
2971	xfs_extlen_t	temp;		/* temp for calculations */
2972
2973	if (convert)
2974		return 0;
2975
2976	orig_off = align_off = *offp;
2977	orig_alen = align_alen = *lenp;
2978	orig_end = orig_off + orig_alen;
2979
2980	/*
2981	 * If this request overlaps an existing extent, then don't
2982	 * attempt to perform any additional alignment.
2983	 */
2984	if (!delay && !eof &&
2985	    (orig_off >= gotp->br_startoff) &&
2986	    (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2987		return 0;
2988	}
2989
2990	/*
2991	 * If the file offset is unaligned vs. the extent size
2992	 * we need to align it.  This will be possible unless
2993	 * the file was previously written with a kernel that didn't
2994	 * perform this alignment, or if a truncate shot us in the
2995	 * foot.
2996	 */
2997	div_u64_rem(orig_off, extsz, &temp);
2998	if (temp) {
2999		align_alen += temp;
3000		align_off -= temp;
3001	}
3002
3003	/* Same adjustment for the end of the requested area. */
3004	temp = (align_alen % extsz);
3005	if (temp)
3006		align_alen += extsz - temp;
3007
3008	/*
3009	 * For large extent hint sizes, the aligned extent might be larger than
3010	 * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
3011	 * the length back under MAXEXTLEN. The outer allocation loops handle
3012	 * short allocation just fine, so it is safe to do this. We only want to
3013	 * do it when we are forced to, though, because it means more allocation
3014	 * operations are required.
3015	 */
3016	while (align_alen > MAXEXTLEN)
3017		align_alen -= extsz;
3018	ASSERT(align_alen <= MAXEXTLEN);
3019
3020	/*
3021	 * If the previous block overlaps with this proposed allocation
3022	 * then move the start forward without adjusting the length.
3023	 */
3024	if (prevp->br_startoff != NULLFILEOFF) {
3025		if (prevp->br_startblock == HOLESTARTBLOCK)
3026			prevo = prevp->br_startoff;
3027		else
3028			prevo = prevp->br_startoff + prevp->br_blockcount;
3029	} else
3030		prevo = 0;
3031	if (align_off != orig_off && align_off < prevo)
3032		align_off = prevo;
3033	/*
3034	 * If the next block overlaps with this proposed allocation
3035	 * then move the start back without adjusting the length,
3036	 * but not before offset 0.
3037	 * This may of course make the start overlap previous block,
3038	 * and if we hit the offset 0 limit then the next block
3039	 * can still overlap too.
3040	 */
3041	if (!eof && gotp->br_startoff != NULLFILEOFF) {
3042		if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3043		    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3044			nexto = gotp->br_startoff + gotp->br_blockcount;
3045		else
3046			nexto = gotp->br_startoff;
3047	} else
3048		nexto = NULLFILEOFF;
3049	if (!eof &&
3050	    align_off + align_alen != orig_end &&
3051	    align_off + align_alen > nexto)
3052		align_off = nexto > align_alen ? nexto - align_alen : 0;
3053	/*
3054	 * If we're now overlapping the next or previous extent that
3055	 * means we can't fit an extsz piece in this hole.  Just move
3056	 * the start forward to the first valid spot and set
3057	 * the length so we hit the end.
3058	 */
3059	if (align_off != orig_off && align_off < prevo)
3060		align_off = prevo;
3061	if (align_off + align_alen != orig_end &&
3062	    align_off + align_alen > nexto &&
3063	    nexto != NULLFILEOFF) {
3064		ASSERT(nexto > prevo);
3065		align_alen = nexto - align_off;
3066	}
3067
3068	/*
3069	 * If realtime, and the result isn't a multiple of the realtime
3070	 * extent size we need to remove blocks until it is.
3071	 */
3072	if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
3073		/*
3074		 * We're not covering the original request, or
3075		 * we won't be able to once we fix the length.
3076		 */
3077		if (orig_off < align_off ||
3078		    orig_end > align_off + align_alen ||
3079		    align_alen - temp < orig_alen)
3080			return -EINVAL;
3081		/*
3082		 * Try to fix it by moving the start up.
3083		 */
3084		if (align_off + temp <= orig_off) {
3085			align_alen -= temp;
3086			align_off += temp;
3087		}
3088		/*
3089		 * Try to fix it by moving the end in.
3090		 */
3091		else if (align_off + align_alen - temp >= orig_end)
3092			align_alen -= temp;
3093		/*
3094		 * Set the start to the minimum then trim the length.
3095		 */
3096		else {
3097			align_alen -= orig_off - align_off;
3098			align_off = orig_off;
3099			align_alen -= align_alen % mp->m_sb.sb_rextsize;
3100		}
3101		/*
3102		 * Result doesn't cover the request, fail it.
3103		 */
3104		if (orig_off < align_off || orig_end > align_off + align_alen)
3105			return -EINVAL;
3106	} else {
3107		ASSERT(orig_off >= align_off);
3108		/* see MAXEXTLEN handling above */
3109		ASSERT(orig_end <= align_off + align_alen ||
3110		       align_alen + extsz > MAXEXTLEN);
3111	}
3112
3113#ifdef DEBUG
3114	if (!eof && gotp->br_startoff != NULLFILEOFF)
3115		ASSERT(align_off + align_alen <= gotp->br_startoff);
3116	if (prevp->br_startoff != NULLFILEOFF)
3117		ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3118#endif
3119
3120	*lenp = align_alen;
3121	*offp = align_off;
3122	return 0;
3123}
3124
3125#define XFS_ALLOC_GAP_UNITS	4
3126
3127void
3128xfs_bmap_adjacent(
3129	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
3130{
3131	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
3132	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
3133	xfs_mount_t	*mp;		/* mount point structure */
3134	int		nullfb;		/* true if ap->firstblock isn't set */
3135	int		rt;		/* true if inode is realtime */
3136
3137#define	ISVALID(x,y)	\
3138	(rt ? \
3139		(x) < mp->m_sb.sb_rblocks : \
3140		XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3141		XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3142		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3143
3144	mp = ap->ip->i_mount;
3145	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3146	rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3147		(ap->datatype & XFS_ALLOC_USERDATA);
3148	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3149							ap->tp->t_firstblock);
3150	/*
3151	 * If allocating at eof, and there's a previous real block,
3152	 * try to use its last block as our starting point.
3153	 */
3154	if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3155	    !isnullstartblock(ap->prev.br_startblock) &&
3156	    ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3157		    ap->prev.br_startblock)) {
3158		ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3159		/*
3160		 * Adjust for the gap between prevp and us.
3161		 */
3162		adjust = ap->offset -
3163			(ap->prev.br_startoff + ap->prev.br_blockcount);
3164		if (adjust &&
3165		    ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3166			ap->blkno += adjust;
3167	}
3168	/*
3169	 * If not at eof, then compare the two neighbor blocks.
3170	 * Figure out whether either one gives us a good starting point,
3171	 * and pick the better one.
3172	 */
3173	else if (!ap->eof) {
3174		xfs_fsblock_t	gotbno;		/* right side block number */
3175		xfs_fsblock_t	gotdiff=0;	/* right side difference */
3176		xfs_fsblock_t	prevbno;	/* left side block number */
3177		xfs_fsblock_t	prevdiff=0;	/* left side difference */
3178
3179		/*
3180		 * If there's a previous (left) block, select a requested
3181		 * start block based on it.
3182		 */
3183		if (ap->prev.br_startoff != NULLFILEOFF &&
3184		    !isnullstartblock(ap->prev.br_startblock) &&
3185		    (prevbno = ap->prev.br_startblock +
3186			       ap->prev.br_blockcount) &&
3187		    ISVALID(prevbno, ap->prev.br_startblock)) {
3188			/*
3189			 * Calculate gap to end of previous block.
3190			 */
3191			adjust = prevdiff = ap->offset -
3192				(ap->prev.br_startoff +
3193				 ap->prev.br_blockcount);
3194			/*
3195			 * Figure the startblock based on the previous block's
3196			 * end and the gap size.
3197			 * Heuristic!
3198			 * If the gap is large relative to the piece we're
3199			 * allocating, or using it gives us an invalid block
3200			 * number, then just use the end of the previous block.
3201			 */
3202			if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3203			    ISVALID(prevbno + prevdiff,
3204				    ap->prev.br_startblock))
3205				prevbno += adjust;
3206			else
3207				prevdiff += adjust;
3208			/*
3209			 * If the firstblock forbids it, can't use it,
3210			 * must use default.
3211			 */
3212			if (!rt && !nullfb &&
3213			    XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3214				prevbno = NULLFSBLOCK;
3215		}
3216		/*
3217		 * No previous block or can't follow it, just default.
3218		 */
3219		else
3220			prevbno = NULLFSBLOCK;
3221		/*
3222		 * If there's a following (right) block, select a requested
3223		 * start block based on it.
3224		 */
3225		if (!isnullstartblock(ap->got.br_startblock)) {
3226			/*
3227			 * Calculate gap to start of next block.
3228			 */
3229			adjust = gotdiff = ap->got.br_startoff - ap->offset;
3230			/*
3231			 * Figure the startblock based on the next block's
3232			 * start and the gap size.
3233			 */
3234			gotbno = ap->got.br_startblock;
3235			/*
3236			 * Heuristic!
3237			 * If the gap is large relative to the piece we're
3238			 * allocating, or using it gives us an invalid block
3239			 * number, then just use the start of the next block
3240			 * offset by our length.
3241			 */
3242			if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3243			    ISVALID(gotbno - gotdiff, gotbno))
3244				gotbno -= adjust;
3245			else if (ISVALID(gotbno - ap->length, gotbno)) {
3246				gotbno -= ap->length;
3247				gotdiff += adjust - ap->length;
3248			} else
3249				gotdiff += adjust;
3250			/*
3251			 * If the firstblock forbids it, can't use it,
3252			 * must use default.
3253			 */
3254			if (!rt && !nullfb &&
3255			    XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3256				gotbno = NULLFSBLOCK;
3257		}
3258		/*
3259		 * No next block, just default.
3260		 */
3261		else
3262			gotbno = NULLFSBLOCK;
3263		/*
3264		 * If both valid, pick the better one, else the only good
3265		 * one, else ap->blkno is already set (to 0 or the inode block).
3266		 */
3267		if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3268			ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3269		else if (prevbno != NULLFSBLOCK)
3270			ap->blkno = prevbno;
3271		else if (gotbno != NULLFSBLOCK)
3272			ap->blkno = gotbno;
3273	}
3274#undef ISVALID
3275}
3276
3277static int
3278xfs_bmap_longest_free_extent(
3279	struct xfs_trans	*tp,
3280	xfs_agnumber_t		ag,
3281	xfs_extlen_t		*blen,
3282	int			*notinit)
3283{
3284	struct xfs_mount	*mp = tp->t_mountp;
3285	struct xfs_perag	*pag;
3286	xfs_extlen_t		longest;
3287	int			error = 0;
3288
3289	pag = xfs_perag_get(mp, ag);
3290	if (!pag->pagf_init) {
3291		error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3292		if (error) {
3293			/* Couldn't lock the AGF, so skip this AG. */
3294			if (error == -EAGAIN) {
3295				*notinit = 1;
3296				error = 0;
3297			}
3298			goto out;
3299		}
3300	}
3301
3302	longest = xfs_alloc_longest_free_extent(pag,
3303				xfs_alloc_min_freelist(mp, pag),
3304				xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3305	if (*blen < longest)
3306		*blen = longest;
3307
3308out:
3309	xfs_perag_put(pag);
3310	return error;
3311}
3312
3313static void
3314xfs_bmap_select_minlen(
3315	struct xfs_bmalloca	*ap,
3316	struct xfs_alloc_arg	*args,
3317	xfs_extlen_t		*blen,
3318	int			notinit)
3319{
3320	if (notinit || *blen < ap->minlen) {
3321		/*
3322		 * Since we did a BUF_TRYLOCK above, it is possible that
3323		 * there is space for this request.
3324		 */
3325		args->minlen = ap->minlen;
3326	} else if (*blen < args->maxlen) {
3327		/*
3328		 * If the best seen length is less than the request length,
3329		 * use the best as the minimum.
3330		 */
3331		args->minlen = *blen;
3332	} else {
3333		/*
3334		 * Otherwise we've seen an extent as big as maxlen, use that
3335		 * as the minimum.
3336		 */
3337		args->minlen = args->maxlen;
3338	}
3339}
3340
3341STATIC int
3342xfs_bmap_btalloc_nullfb(
3343	struct xfs_bmalloca	*ap,
3344	struct xfs_alloc_arg	*args,
3345	xfs_extlen_t		*blen)
3346{
3347	struct xfs_mount	*mp = ap->ip->i_mount;
3348	xfs_agnumber_t		ag, startag;
3349	int			notinit = 0;
3350	int			error;
3351
3352	args->type = XFS_ALLOCTYPE_START_BNO;
3353	args->total = ap->total;
3354
3355	startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3356	if (startag == NULLAGNUMBER)
3357		startag = ag = 0;
3358
3359	while (*blen < args->maxlen) {
3360		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3361						     &notinit);
3362		if (error)
3363			return error;
3364
3365		if (++ag == mp->m_sb.sb_agcount)
3366			ag = 0;
3367		if (ag == startag)
3368			break;
3369	}
3370
3371	xfs_bmap_select_minlen(ap, args, blen, notinit);
3372	return 0;
3373}
3374
3375STATIC int
3376xfs_bmap_btalloc_filestreams(
3377	struct xfs_bmalloca	*ap,
3378	struct xfs_alloc_arg	*args,
3379	xfs_extlen_t		*blen)
3380{
3381	struct xfs_mount	*mp = ap->ip->i_mount;
3382	xfs_agnumber_t		ag;
3383	int			notinit = 0;
3384	int			error;
3385
3386	args->type = XFS_ALLOCTYPE_NEAR_BNO;
3387	args->total = ap->total;
3388
3389	ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3390	if (ag == NULLAGNUMBER)
3391		ag = 0;
3392
3393	error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3394	if (error)
3395		return error;
3396
3397	if (*blen < args->maxlen) {
3398		error = xfs_filestream_new_ag(ap, &ag);
3399		if (error)
3400			return error;
3401
3402		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3403						     &notinit);
3404		if (error)
3405			return error;
3406
3407	}
3408
3409	xfs_bmap_select_minlen(ap, args, blen, notinit);
3410
3411	/*
3412	 * Set the failure fallback case to look in the selected AG as stream
3413	 * may have moved.
3414	 */
3415	ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3416	return 0;
3417}
3418
3419/* Update all inode and quota accounting for the allocation we just did. */
3420static void
3421xfs_bmap_btalloc_accounting(
3422	struct xfs_bmalloca	*ap,
3423	struct xfs_alloc_arg	*args)
3424{
3425	if (ap->flags & XFS_BMAPI_COWFORK) {
3426		/*
3427		 * COW fork blocks are in-core only and thus are treated as
3428		 * in-core quota reservation (like delalloc blocks) even when
3429		 * converted to real blocks. The quota reservation is not
3430		 * accounted to disk until blocks are remapped to the data
3431		 * fork. So if these blocks were previously delalloc, we
3432		 * already have quota reservation and there's nothing to do
3433		 * yet.
3434		 */
3435		if (ap->wasdel) {
3436			xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
3437			return;
3438		}
3439
3440		/*
3441		 * Otherwise, we've allocated blocks in a hole. The transaction
3442		 * has acquired in-core quota reservation for this extent.
3443		 * Rather than account these as real blocks, however, we reduce
3444		 * the transaction quota reservation based on the allocation.
3445		 * This essentially transfers the transaction quota reservation
3446		 * to that of a delalloc extent.
3447		 */
3448		ap->ip->i_delayed_blks += args->len;
3449		xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
3450				-(long)args->len);
3451		return;
3452	}
3453
3454	/* data/attr fork only */
3455	ap->ip->i_d.di_nblocks += args->len;
3456	xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3457	if (ap->wasdel) {
3458		ap->ip->i_delayed_blks -= args->len;
3459		xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
3460	}
3461	xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3462		ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
3463		args->len);
3464}
3465
3466STATIC int
3467xfs_bmap_btalloc(
3468	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
3469{
3470	xfs_mount_t	*mp;		/* mount point structure */
3471	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
3472	xfs_extlen_t	align = 0;	/* minimum allocation alignment */
3473	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
3474	xfs_agnumber_t	ag;
3475	xfs_alloc_arg_t	args;
3476	xfs_fileoff_t	orig_offset;
3477	xfs_extlen_t	orig_length;
3478	xfs_extlen_t	blen;
3479	xfs_extlen_t	nextminlen = 0;
3480	int		nullfb;		/* true if ap->firstblock isn't set */
3481	int		isaligned;
3482	int		tryagain;
3483	int		error;
3484	int		stripe_align;
3485
3486	ASSERT(ap->length);
3487	orig_offset = ap->offset;
3488	orig_length = ap->length;
3489
3490	mp = ap->ip->i_mount;
3491
3492	/* stripe alignment for allocation is determined by mount parameters */
3493	stripe_align = 0;
3494	if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3495		stripe_align = mp->m_swidth;
3496	else if (mp->m_dalign)
3497		stripe_align = mp->m_dalign;
3498
3499	if (ap->flags & XFS_BMAPI_COWFORK)
3500		align = xfs_get_cowextsz_hint(ap->ip);
3501	else if (ap->datatype & XFS_ALLOC_USERDATA)
3502		align = xfs_get_extsz_hint(ap->ip);
3503	if (align) {
3504		error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3505						align, 0, ap->eof, 0, ap->conv,
3506						&ap->offset, &ap->length);
3507		ASSERT(!error);
3508		ASSERT(ap->length);
3509	}
3510
3511
3512	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3513	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3514							ap->tp->t_firstblock);
3515	if (nullfb) {
3516		if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3517		    xfs_inode_is_filestream(ap->ip)) {
3518			ag = xfs_filestream_lookup_ag(ap->ip);
3519			ag = (ag != NULLAGNUMBER) ? ag : 0;
3520			ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3521		} else {
3522			ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3523		}
3524	} else
3525		ap->blkno = ap->tp->t_firstblock;
3526
3527	xfs_bmap_adjacent(ap);
3528
3529	/*
3530	 * If allowed, use ap->blkno; otherwise must use firstblock since
3531	 * it's in the right allocation group.
3532	 */
3533	if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3534		;
3535	else
3536		ap->blkno = ap->tp->t_firstblock;
3537	/*
3538	 * Normal allocation, done through xfs_alloc_vextent.
3539	 */
3540	tryagain = isaligned = 0;
3541	memset(&args, 0, sizeof(args));
3542	args.tp = ap->tp;
3543	args.mp = mp;
3544	args.fsbno = ap->blkno;
3545	args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
3546
3547	/* Trim the allocation back to the maximum an AG can fit. */
3548	args.maxlen = min(ap->length, mp->m_ag_max_usable);
3549	blen = 0;
3550	if (nullfb) {
3551		/*
3552		 * Search for an allocation group with a single extent large
3553		 * enough for the request.  If one isn't found, then adjust
3554		 * the minimum allocation size to the largest space found.
3555		 */
3556		if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3557		    xfs_inode_is_filestream(ap->ip))
3558			error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3559		else
3560			error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3561		if (error)
3562			return error;
3563	} else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3564		if (xfs_inode_is_filestream(ap->ip))
3565			args.type = XFS_ALLOCTYPE_FIRST_AG;
3566		else
3567			args.type = XFS_ALLOCTYPE_START_BNO;
3568		args.total = args.minlen = ap->minlen;
3569	} else {
3570		args.type = XFS_ALLOCTYPE_NEAR_BNO;
3571		args.total = ap->total;
3572		args.minlen = ap->minlen;
3573	}
3574	/* apply extent size hints if obtained earlier */
3575	if (align) {
3576		args.prod = align;
3577		div_u64_rem(ap->offset, args.prod, &args.mod);
3578		if (args.mod)
3579			args.mod = args.prod - args.mod;
3580	} else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3581		args.prod = 1;
3582		args.mod = 0;
3583	} else {
3584		args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3585		div_u64_rem(ap->offset, args.prod, &args.mod);
3586		if (args.mod)
3587			args.mod = args.prod - args.mod;
3588	}
3589	/*
3590	 * If we are not low on available data blocks, and the underlying
3591	 * logical volume manager is a stripe, and the file offset is zero then
3592	 * try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof
3593	 * is only set if the allocation length is >= the stripe unit and the
3594	 * allocation offset is at the end of file.
3595	 */
3596	if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
3597		if (!ap->offset) {
3598			args.alignment = stripe_align;
3599			atype = args.type;
3600			isaligned = 1;
3601			/*
3602			 * Adjust minlen to try and preserve alignment if we
3603			 * can't guarantee an aligned maxlen extent.
3604			 */
3605			if (blen > args.alignment &&
3606			    blen <= args.maxlen + args.alignment)
3607				args.minlen = blen - args.alignment;
3608			args.minalignslop = 0;
3609		} else {
3610			/*
3611			 * First try an exact bno allocation.
3612			 * If it fails then do a near or start bno
3613			 * allocation with alignment turned on.
3614			 */
3615			atype = args.type;
3616			tryagain = 1;
3617			args.type = XFS_ALLOCTYPE_THIS_BNO;
3618			args.alignment = 1;
3619			/*
3620			 * Compute the minlen+alignment for the
3621			 * next case.  Set slop so that the value
3622			 * of minlen+alignment+slop doesn't go up
3623			 * between the calls.
3624			 */
3625			if (blen > stripe_align && blen <= args.maxlen)
3626				nextminlen = blen - stripe_align;
3627			else
3628				nextminlen = args.minlen;
3629			if (nextminlen + stripe_align > args.minlen + 1)
3630				args.minalignslop =
3631					nextminlen + stripe_align -
3632					args.minlen - 1;
3633			else
3634				args.minalignslop = 0;
3635		}
3636	} else {
3637		args.alignment = 1;
3638		args.minalignslop = 0;
3639	}
3640	args.minleft = ap->minleft;
3641	args.wasdel = ap->wasdel;
3642	args.resv = XFS_AG_RESV_NONE;
3643	args.datatype = ap->datatype;
3644
3645	error = xfs_alloc_vextent(&args);
3646	if (error)
3647		return error;
3648
3649	if (tryagain && args.fsbno == NULLFSBLOCK) {
3650		/*
3651		 * Exact allocation failed. Now try with alignment
3652		 * turned on.
3653		 */
3654		args.type = atype;
3655		args.fsbno = ap->blkno;
3656		args.alignment = stripe_align;
3657		args.minlen = nextminlen;
3658		args.minalignslop = 0;
3659		isaligned = 1;
3660		if ((error = xfs_alloc_vextent(&args)))
3661			return error;
3662	}
3663	if (isaligned && args.fsbno == NULLFSBLOCK) {
3664		/*
3665		 * allocation failed, so turn off alignment and
3666		 * try again.
3667		 */
3668		args.type = atype;
3669		args.fsbno = ap->blkno;
3670		args.alignment = 0;
3671		if ((error = xfs_alloc_vextent(&args)))
3672			return error;
3673	}
3674	if (args.fsbno == NULLFSBLOCK && nullfb &&
3675	    args.minlen > ap->minlen) {
3676		args.minlen = ap->minlen;
3677		args.type = XFS_ALLOCTYPE_START_BNO;
3678		args.fsbno = ap->blkno;
3679		if ((error = xfs_alloc_vextent(&args)))
3680			return error;
3681	}
3682	if (args.fsbno == NULLFSBLOCK && nullfb) {
3683		args.fsbno = 0;
3684		args.type = XFS_ALLOCTYPE_FIRST_AG;
3685		args.total = ap->minlen;
3686		if ((error = xfs_alloc_vextent(&args)))
3687			return error;
3688		ap->tp->t_flags |= XFS_TRANS_LOWMODE;
3689	}
3690	if (args.fsbno != NULLFSBLOCK) {
3691		/*
3692		 * check the allocation happened at the same or higher AG than
3693		 * the first block that was allocated.
3694		 */
3695		ASSERT(ap->tp->t_firstblock == NULLFSBLOCK ||
3696		       XFS_FSB_TO_AGNO(mp, ap->tp->t_firstblock) <=
3697		       XFS_FSB_TO_AGNO(mp, args.fsbno));
3698
3699		ap->blkno = args.fsbno;
3700		if (ap->tp->t_firstblock == NULLFSBLOCK)
3701			ap->tp->t_firstblock = args.fsbno;
3702		ASSERT(nullfb || fb_agno <= args.agno);
3703		ap->length = args.len;
3704		/*
3705		 * If the extent size hint is active, we tried to round the
3706		 * caller's allocation request offset down to extsz and the
3707		 * length up to another extsz boundary.  If we found a free
3708		 * extent we mapped it in starting at this new offset.  If the
3709		 * newly mapped space isn't long enough to cover any of the
3710		 * range of offsets that was originally requested, move the
3711		 * mapping up so that we can fill as much of the caller's
3712		 * original request as possible.  Free space is apparently
3713		 * very fragmented so we're unlikely to be able to satisfy the
3714		 * hints anyway.
3715		 */
3716		if (ap->length <= orig_length)
3717			ap->offset = orig_offset;
3718		else if (ap->offset + ap->length < orig_offset + orig_length)
3719			ap->offset = orig_offset + orig_length - ap->length;
3720		xfs_bmap_btalloc_accounting(ap, &args);
3721	} else {
3722		ap->blkno = NULLFSBLOCK;
3723		ap->length = 0;
3724	}
3725	return 0;
3726}
3727
3728/* Trim extent to fit a logical block range. */
3729void
3730xfs_trim_extent(
3731	struct xfs_bmbt_irec	*irec,
3732	xfs_fileoff_t		bno,
3733	xfs_filblks_t		len)
3734{
3735	xfs_fileoff_t		distance;
3736	xfs_fileoff_t		end = bno + len;
3737
3738	if (irec->br_startoff + irec->br_blockcount <= bno ||
3739	    irec->br_startoff >= end) {
3740		irec->br_blockcount = 0;
3741		return;
3742	}
3743
3744	if (irec->br_startoff < bno) {
3745		distance = bno - irec->br_startoff;
3746		if (isnullstartblock(irec->br_startblock))
3747			irec->br_startblock = DELAYSTARTBLOCK;
3748		if (irec->br_startblock != DELAYSTARTBLOCK &&
3749		    irec->br_startblock != HOLESTARTBLOCK)
3750			irec->br_startblock += distance;
3751		irec->br_startoff += distance;
3752		irec->br_blockcount -= distance;
3753	}
3754
3755	if (end < irec->br_startoff + irec->br_blockcount) {
3756		distance = irec->br_startoff + irec->br_blockcount - end;
3757		irec->br_blockcount -= distance;
3758	}
3759}
3760
3761/*
3762 * Trim the returned map to the required bounds
3763 */
3764STATIC void
3765xfs_bmapi_trim_map(
3766	struct xfs_bmbt_irec	*mval,
3767	struct xfs_bmbt_irec	*got,
3768	xfs_fileoff_t		*bno,
3769	xfs_filblks_t		len,
3770	xfs_fileoff_t		obno,
3771	xfs_fileoff_t		end,
3772	int			n,
3773	int			flags)
3774{
3775	if ((flags & XFS_BMAPI_ENTIRE) ||
3776	    got->br_startoff + got->br_blockcount <= obno) {
3777		*mval = *got;
3778		if (isnullstartblock(got->br_startblock))
3779			mval->br_startblock = DELAYSTARTBLOCK;
3780		return;
3781	}
3782
3783	if (obno > *bno)
3784		*bno = obno;
3785	ASSERT((*bno >= obno) || (n == 0));
3786	ASSERT(*bno < end);
3787	mval->br_startoff = *bno;
3788	if (isnullstartblock(got->br_startblock))
3789		mval->br_startblock = DELAYSTARTBLOCK;
3790	else
3791		mval->br_startblock = got->br_startblock +
3792					(*bno - got->br_startoff);
3793	/*
3794	 * Return the minimum of what we got and what we asked for for
3795	 * the length.  We can use the len variable here because it is
3796	 * modified below and we could have been there before coming
3797	 * here if the first part of the allocation didn't overlap what
3798	 * was asked for.
3799	 */
3800	mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3801			got->br_blockcount - (*bno - got->br_startoff));
3802	mval->br_state = got->br_state;
3803	ASSERT(mval->br_blockcount <= len);
3804	return;
3805}
3806
3807/*
3808 * Update and validate the extent map to return
3809 */
3810STATIC void
3811xfs_bmapi_update_map(
3812	struct xfs_bmbt_irec	**map,
3813	xfs_fileoff_t		*bno,
3814	xfs_filblks_t		*len,
3815	xfs_fileoff_t		obno,
3816	xfs_fileoff_t		end,
3817	int			*n,
3818	int			flags)
3819{
3820	xfs_bmbt_irec_t	*mval = *map;
3821
3822	ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3823	       ((mval->br_startoff + mval->br_blockcount) <= end));
3824	ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3825	       (mval->br_startoff < obno));
3826
3827	*bno = mval->br_startoff + mval->br_blockcount;
3828	*len = end - *bno;
3829	if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3830		/* update previous map with new information */
3831		ASSERT(mval->br_startblock == mval[-1].br_startblock);
3832		ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3833		ASSERT(mval->br_state == mval[-1].br_state);
3834		mval[-1].br_blockcount = mval->br_blockcount;
3835		mval[-1].br_state = mval->br_state;
3836	} else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3837		   mval[-1].br_startblock != DELAYSTARTBLOCK &&
3838		   mval[-1].br_startblock != HOLESTARTBLOCK &&
3839		   mval->br_startblock == mval[-1].br_startblock +
3840					  mval[-1].br_blockcount &&
3841		   mval[-1].br_state == mval->br_state) {
3842		ASSERT(mval->br_startoff ==
3843		       mval[-1].br_startoff + mval[-1].br_blockcount);
3844		mval[-1].br_blockcount += mval->br_blockcount;
3845	} else if (*n > 0 &&
3846		   mval->br_startblock == DELAYSTARTBLOCK &&
3847		   mval[-1].br_startblock == DELAYSTARTBLOCK &&
3848		   mval->br_startoff ==
3849		   mval[-1].br_startoff + mval[-1].br_blockcount) {
3850		mval[-1].br_blockcount += mval->br_blockcount;
3851		mval[-1].br_state = mval->br_state;
3852	} else if (!((*n == 0) &&
3853		     ((mval->br_startoff + mval->br_blockcount) <=
3854		      obno))) {
3855		mval++;
3856		(*n)++;
3857	}
3858	*map = mval;
3859}
3860
3861/*
3862 * Map file blocks to filesystem blocks without allocation.
3863 */
3864int
3865xfs_bmapi_read(
3866	struct xfs_inode	*ip,
3867	xfs_fileoff_t		bno,
3868	xfs_filblks_t		len,
3869	struct xfs_bmbt_irec	*mval,
3870	int			*nmap,
3871	int			flags)
3872{
3873	struct xfs_mount	*mp = ip->i_mount;
3874	int			whichfork = xfs_bmapi_whichfork(flags);
3875	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
3876	struct xfs_bmbt_irec	got;
3877	xfs_fileoff_t		obno;
3878	xfs_fileoff_t		end;
3879	struct xfs_iext_cursor	icur;
3880	int			error;
3881	bool			eof = false;
3882	int			n = 0;
3883
3884	ASSERT(*nmap >= 1);
3885	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
3886	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
3887
3888	if (WARN_ON_ONCE(!ifp))
3889		return -EFSCORRUPTED;
3890
3891	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
3892	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT))
3893		return -EFSCORRUPTED;
3894
3895	if (XFS_FORCED_SHUTDOWN(mp))
3896		return -EIO;
3897
3898	XFS_STATS_INC(mp, xs_blk_mapr);
3899
3900	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
3901		error = xfs_iread_extents(NULL, ip, whichfork);
3902		if (error)
3903			return error;
3904	}
3905
3906	if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
3907		eof = true;
3908	end = bno + len;
3909	obno = bno;
3910
3911	while (bno < end && n < *nmap) {
3912		/* Reading past eof, act as though there's a hole up to end. */
3913		if (eof)
3914			got.br_startoff = end;
3915		if (got.br_startoff > bno) {
3916			/* Reading in a hole.  */
3917			mval->br_startoff = bno;
3918			mval->br_startblock = HOLESTARTBLOCK;
3919			mval->br_blockcount =
3920				XFS_FILBLKS_MIN(len, got.br_startoff - bno);
3921			mval->br_state = XFS_EXT_NORM;
3922			bno += mval->br_blockcount;
3923			len -= mval->br_blockcount;
3924			mval++;
3925			n++;
3926			continue;
3927		}
3928
3929		/* set up the extent map to return. */
3930		xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
3931		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
3932
3933		/* If we're done, stop now. */
3934		if (bno >= end || n >= *nmap)
3935			break;
3936
3937		/* Else go on to the next record. */
3938		if (!xfs_iext_next_extent(ifp, &icur, &got))
3939			eof = true;
3940	}
3941	*nmap = n;
3942	return 0;
3943}
3944
3945/*
3946 * Add a delayed allocation extent to an inode. Blocks are reserved from the
3947 * global pool and the extent inserted into the inode in-core extent tree.
3948 *
3949 * On entry, got refers to the first extent beyond the offset of the extent to
3950 * allocate or eof is specified if no such extent exists. On return, got refers
3951 * to the extent record that was inserted to the inode fork.
3952 *
3953 * Note that the allocated extent may have been merged with contiguous extents
3954 * during insertion into the inode fork. Thus, got does not reflect the current
3955 * state of the inode fork on return. If necessary, the caller can use lastx to
3956 * look up the updated record in the inode fork.
3957 */
3958int
3959xfs_bmapi_reserve_delalloc(
3960	struct xfs_inode	*ip,
3961	int			whichfork,
3962	xfs_fileoff_t		off,
3963	xfs_filblks_t		len,
3964	xfs_filblks_t		prealloc,
3965	struct xfs_bmbt_irec	*got,
3966	struct xfs_iext_cursor	*icur,
3967	int			eof)
3968{
3969	struct xfs_mount	*mp = ip->i_mount;
3970	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
3971	xfs_extlen_t		alen;
3972	xfs_extlen_t		indlen;
3973	int			error;
3974	xfs_fileoff_t		aoff = off;
3975
3976	/*
3977	 * Cap the alloc length. Keep track of prealloc so we know whether to
3978	 * tag the inode before we return.
3979	 */
3980	alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
3981	if (!eof)
3982		alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
3983	if (prealloc && alen >= len)
3984		prealloc = alen - len;
3985
3986	/* Figure out the extent size, adjust alen */
3987	if (whichfork == XFS_COW_FORK) {
3988		struct xfs_bmbt_irec	prev;
3989		xfs_extlen_t		extsz = xfs_get_cowextsz_hint(ip);
3990
3991		if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
3992			prev.br_startoff = NULLFILEOFF;
3993
3994		error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
3995					       1, 0, &aoff, &alen);
3996		ASSERT(!error);
3997	}
3998
3999	/*
4000	 * Make a transaction-less quota reservation for delayed allocation
4001	 * blocks.  This number gets adjusted later.  We return if we haven't
4002	 * allocated blocks already inside this loop.
4003	 */
4004	error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
4005						XFS_QMOPT_RES_REGBLKS);
4006	if (error)
4007		return error;
4008
4009	/*
4010	 * Split changing sb for alen and indlen since they could be coming
4011	 * from different places.
4012	 */
4013	indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4014	ASSERT(indlen > 0);
4015
4016	error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4017	if (error)
4018		goto out_unreserve_quota;
4019
4020	error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4021	if (error)
4022		goto out_unreserve_blocks;
4023
4024
4025	ip->i_delayed_blks += alen;
4026	xfs_mod_delalloc(ip->i_mount, alen + indlen);
4027
4028	got->br_startoff = aoff;
4029	got->br_startblock = nullstartblock(indlen);
4030	got->br_blockcount = alen;
4031	got->br_state = XFS_EXT_NORM;
4032
4033	xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
4034
4035	/*
4036	 * Tag the inode if blocks were preallocated. Note that COW fork
4037	 * preallocation can occur at the start or end of the extent, even when
4038	 * prealloc == 0, so we must also check the aligned offset and length.
4039	 */
4040	if (whichfork == XFS_DATA_FORK && prealloc)
4041		xfs_inode_set_eofblocks_tag(ip);
4042	if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
4043		xfs_inode_set_cowblocks_tag(ip);
4044
4045	return 0;
4046
4047out_unreserve_blocks:
4048	xfs_mod_fdblocks(mp, alen, false);
4049out_unreserve_quota:
4050	if (XFS_IS_QUOTA_ON(mp))
4051		xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0,
4052						XFS_QMOPT_RES_REGBLKS);
4053	return error;
4054}
4055
4056static int
4057xfs_bmap_alloc_userdata(
4058	struct xfs_bmalloca	*bma)
4059{
4060	struct xfs_mount	*mp = bma->ip->i_mount;
4061	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4062	int			error;
4063
4064	/*
4065	 * Set the data type being allocated. For the data fork, the first data
4066	 * in the file is treated differently to all other allocations. For the
4067	 * attribute fork, we only need to ensure the allocated range is not on
4068	 * the busy list.
4069	 */
4070	bma->datatype = XFS_ALLOC_NOBUSY;
4071	if (whichfork == XFS_DATA_FORK) {
4072		bma->datatype |= XFS_ALLOC_USERDATA;
4073		if (bma->offset == 0)
4074			bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4075
4076		if (mp->m_dalign && bma->length >= mp->m_dalign) {
4077			error = xfs_bmap_isaeof(bma, whichfork);
4078			if (error)
4079				return error;
4080		}
4081
4082		if (XFS_IS_REALTIME_INODE(bma->ip))
4083			return xfs_bmap_rtalloc(bma);
4084	}
4085
4086	return xfs_bmap_btalloc(bma);
4087}
4088
4089static int
4090xfs_bmapi_allocate(
4091	struct xfs_bmalloca	*bma)
4092{
4093	struct xfs_mount	*mp = bma->ip->i_mount;
4094	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4095	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4096	int			tmp_logflags = 0;
4097	int			error;
4098
4099	ASSERT(bma->length > 0);
4100
4101	/*
4102	 * For the wasdelay case, we could also just allocate the stuff asked
4103	 * for in this bmap call but that wouldn't be as good.
4104	 */
4105	if (bma->wasdel) {
4106		bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4107		bma->offset = bma->got.br_startoff;
4108		if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
4109			bma->prev.br_startoff = NULLFILEOFF;
4110	} else {
4111		bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4112		if (!bma->eof)
4113			bma->length = XFS_FILBLKS_MIN(bma->length,
4114					bma->got.br_startoff - bma->offset);
4115	}
4116
4117	if (bma->flags & XFS_BMAPI_CONTIG)
4118		bma->minlen = bma->length;
4119	else
4120		bma->minlen = 1;
4121
4122	if (bma->flags & XFS_BMAPI_METADATA)
4123		error = xfs_bmap_btalloc(bma);
4124	else
4125		error = xfs_bmap_alloc_userdata(bma);
4126	if (error || bma->blkno == NULLFSBLOCK)
4127		return error;
4128
4129	if (bma->flags & XFS_BMAPI_ZERO) {
4130		error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
4131		if (error)
4132			return error;
4133	}
4134
4135	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur)
4136		bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4137	/*
4138	 * Bump the number of extents we've allocated
4139	 * in this call.
4140	 */
4141	bma->nallocs++;
4142
4143	if (bma->cur)
4144		bma->cur->bc_ino.flags =
4145			bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
4146
4147	bma->got.br_startoff = bma->offset;
4148	bma->got.br_startblock = bma->blkno;
4149	bma->got.br_blockcount = bma->length;
4150	bma->got.br_state = XFS_EXT_NORM;
4151
4152	if (bma->flags & XFS_BMAPI_PREALLOC)
4153		bma->got.br_state = XFS_EXT_UNWRITTEN;
4154
4155	if (bma->wasdel)
4156		error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4157	else
4158		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4159				whichfork, &bma->icur, &bma->cur, &bma->got,
4160				&bma->logflags, bma->flags);
4161
4162	bma->logflags |= tmp_logflags;
4163	if (error)
4164		return error;
4165
4166	/*
4167	 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4168	 * or xfs_bmap_add_extent_hole_real might have merged it into one of
4169	 * the neighbouring ones.
4170	 */
4171	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4172
4173	ASSERT(bma->got.br_startoff <= bma->offset);
4174	ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4175	       bma->offset + bma->length);
4176	ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4177	       bma->got.br_state == XFS_EXT_UNWRITTEN);
4178	return 0;
4179}
4180
4181STATIC int
4182xfs_bmapi_convert_unwritten(
4183	struct xfs_bmalloca	*bma,
4184	struct xfs_bmbt_irec	*mval,
4185	xfs_filblks_t		len,
4186	int			flags)
4187{
4188	int			whichfork = xfs_bmapi_whichfork(flags);
4189	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4190	int			tmp_logflags = 0;
4191	int			error;
4192
4193	/* check if we need to do unwritten->real conversion */
4194	if (mval->br_state == XFS_EXT_UNWRITTEN &&
4195	    (flags & XFS_BMAPI_PREALLOC))
4196		return 0;
4197
4198	/* check if we need to do real->unwritten conversion */
4199	if (mval->br_state == XFS_EXT_NORM &&
4200	    (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4201			(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4202		return 0;
4203
4204	/*
4205	 * Modify (by adding) the state flag, if writing.
4206	 */
4207	ASSERT(mval->br_blockcount <= len);
4208	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4209		bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4210					bma->ip, whichfork);
4211	}
4212	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4213				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4214
4215	/*
4216	 * Before insertion into the bmbt, zero the range being converted
4217	 * if required.
4218	 */
4219	if (flags & XFS_BMAPI_ZERO) {
4220		error = xfs_zero_extent(bma->ip, mval->br_startblock,
4221					mval->br_blockcount);
4222		if (error)
4223			return error;
4224	}
4225
4226	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4227			&bma->icur, &bma->cur, mval, &tmp_logflags);
4228	/*
4229	 * Log the inode core unconditionally in the unwritten extent conversion
4230	 * path because the conversion might not have done so (e.g., if the
4231	 * extent count hasn't changed). We need to make sure the inode is dirty
4232	 * in the transaction for the sake of fsync(), even if nothing has
4233	 * changed, because fsync() will not force the log for this transaction
4234	 * unless it sees the inode pinned.
4235	 *
4236	 * Note: If we're only converting cow fork extents, there aren't
4237	 * any on-disk updates to make, so we don't need to log anything.
4238	 */
4239	if (whichfork != XFS_COW_FORK)
4240		bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4241	if (error)
4242		return error;
4243
4244	/*
4245	 * Update our extent pointer, given that
4246	 * xfs_bmap_add_extent_unwritten_real might have merged it into one
4247	 * of the neighbouring ones.
4248	 */
4249	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4250
4251	/*
4252	 * We may have combined previously unwritten space with written space,
4253	 * so generate another request.
4254	 */
4255	if (mval->br_blockcount < len)
4256		return -EAGAIN;
4257	return 0;
4258}
4259
4260static inline xfs_extlen_t
4261xfs_bmapi_minleft(
4262	struct xfs_trans	*tp,
4263	struct xfs_inode	*ip,
4264	int			fork)
4265{
4266	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, fork);
4267
4268	if (tp && tp->t_firstblock != NULLFSBLOCK)
4269		return 0;
4270	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
4271		return 1;
4272	return be16_to_cpu(ifp->if_broot->bb_level) + 1;
4273}
4274
4275/*
4276 * Log whatever the flags say, even if error.  Otherwise we might miss detecting
4277 * a case where the data is changed, there's an error, and it's not logged so we
4278 * don't shutdown when we should.  Don't bother logging extents/btree changes if
4279 * we converted to the other format.
4280 */
4281static void
4282xfs_bmapi_finish(
4283	struct xfs_bmalloca	*bma,
4284	int			whichfork,
4285	int			error)
4286{
4287	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4288
4289	if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
4290	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
4291		bma->logflags &= ~xfs_ilog_fext(whichfork);
4292	else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
4293		 ifp->if_format != XFS_DINODE_FMT_BTREE)
4294		bma->logflags &= ~xfs_ilog_fbroot(whichfork);
4295
4296	if (bma->logflags)
4297		xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
4298	if (bma->cur)
4299		xfs_btree_del_cursor(bma->cur, error);
4300}
4301
4302/*
4303 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4304 * extent state if necessary.  Details behaviour is controlled by the flags
4305 * parameter.  Only allocates blocks from a single allocation group, to avoid
4306 * locking problems.
4307 */
4308int
4309xfs_bmapi_write(
4310	struct xfs_trans	*tp,		/* transaction pointer */
4311	struct xfs_inode	*ip,		/* incore inode */
4312	xfs_fileoff_t		bno,		/* starting file offs. mapped */
4313	xfs_filblks_t		len,		/* length to map in file */
4314	int			flags,		/* XFS_BMAPI_... */
4315	xfs_extlen_t		total,		/* total blocks needed */
4316	struct xfs_bmbt_irec	*mval,		/* output: map values */
4317	int			*nmap)		/* i/o: mval size/count */
4318{
4319	struct xfs_bmalloca	bma = {
4320		.tp		= tp,
4321		.ip		= ip,
4322		.total		= total,
4323	};
4324	struct xfs_mount	*mp = ip->i_mount;
4325	int			whichfork = xfs_bmapi_whichfork(flags);
4326	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
4327	xfs_fileoff_t		end;		/* end of mapped file region */
4328	bool			eof = false;	/* after the end of extents */
4329	int			error;		/* error return */
4330	int			n;		/* current extent index */
4331	xfs_fileoff_t		obno;		/* old block number (offset) */
4332
4333#ifdef DEBUG
4334	xfs_fileoff_t		orig_bno;	/* original block number value */
4335	int			orig_flags;	/* original flags arg value */
4336	xfs_filblks_t		orig_len;	/* original value of len arg */
4337	struct xfs_bmbt_irec	*orig_mval;	/* original value of mval */
4338	int			orig_nmap;	/* original value of *nmap */
4339
4340	orig_bno = bno;
4341	orig_len = len;
4342	orig_flags = flags;
4343	orig_mval = mval;
4344	orig_nmap = *nmap;
4345#endif
4346
4347	ASSERT(*nmap >= 1);
4348	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4349	ASSERT(tp != NULL);
4350	ASSERT(len > 0);
4351	ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
4352	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4353	ASSERT(!(flags & XFS_BMAPI_REMAP));
4354
4355	/* zeroing is for currently only for data extents, not metadata */
4356	ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4357			(XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4358	/*
4359	 * we can allocate unwritten extents or pre-zero allocated blocks,
4360	 * but it makes no sense to do both at once. This would result in
4361	 * zeroing the unwritten extent twice, but it still being an
4362	 * unwritten extent....
4363	 */
4364	ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4365			(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4366
4367	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4368	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4369		return -EFSCORRUPTED;
4370	}
4371
4372	if (XFS_FORCED_SHUTDOWN(mp))
4373		return -EIO;
4374
4375	XFS_STATS_INC(mp, xs_blk_mapw);
4376
4377	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4378		error = xfs_iread_extents(tp, ip, whichfork);
4379		if (error)
4380			goto error0;
4381	}
4382
4383	if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4384		eof = true;
4385	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4386		bma.prev.br_startoff = NULLFILEOFF;
4387	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4388
4389	n = 0;
4390	end = bno + len;
4391	obno = bno;
4392	while (bno < end && n < *nmap) {
4393		bool			need_alloc = false, wasdelay = false;
4394
4395		/* in hole or beyond EOF? */
4396		if (eof || bma.got.br_startoff > bno) {
4397			/*
4398			 * CoW fork conversions should /never/ hit EOF or
4399			 * holes.  There should always be something for us
4400			 * to work on.
4401			 */
4402			ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
4403			         (flags & XFS_BMAPI_COWFORK)));
4404
4405			need_alloc = true;
4406		} else if (isnullstartblock(bma.got.br_startblock)) {
4407			wasdelay = true;
4408		}
4409
4410		/*
4411		 * First, deal with the hole before the allocated space
4412		 * that we found, if any.
4413		 */
4414		if (need_alloc || wasdelay) {
4415			bma.eof = eof;
4416			bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4417			bma.wasdel = wasdelay;
4418			bma.offset = bno;
4419			bma.flags = flags;
4420
4421			/*
4422			 * There's a 32/64 bit type mismatch between the
4423			 * allocation length request (which can be 64 bits in
4424			 * length) and the bma length request, which is
4425			 * xfs_extlen_t and therefore 32 bits. Hence we have to
4426			 * check for 32-bit overflows and handle them here.
4427			 */
4428			if (len > (xfs_filblks_t)MAXEXTLEN)
4429				bma.length = MAXEXTLEN;
4430			else
4431				bma.length = len;
4432
4433			ASSERT(len > 0);
4434			ASSERT(bma.length > 0);
4435			error = xfs_bmapi_allocate(&bma);
4436			if (error)
4437				goto error0;
4438			if (bma.blkno == NULLFSBLOCK)
4439				break;
4440
4441			/*
4442			 * If this is a CoW allocation, record the data in
4443			 * the refcount btree for orphan recovery.
4444			 */
4445			if (whichfork == XFS_COW_FORK)
4446				xfs_refcount_alloc_cow_extent(tp, bma.blkno,
4447						bma.length);
4448		}
4449
4450		/* Deal with the allocated space we found.  */
4451		xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4452							end, n, flags);
4453
4454		/* Execute unwritten extent conversion if necessary */
4455		error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4456		if (error == -EAGAIN)
4457			continue;
4458		if (error)
4459			goto error0;
4460
4461		/* update the extent map to return */
4462		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4463
4464		/*
4465		 * If we're done, stop now.  Stop when we've allocated
4466		 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4467		 * the transaction may get too big.
4468		 */
4469		if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4470			break;
4471
4472		/* Else go on to the next record. */
4473		bma.prev = bma.got;
4474		if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4475			eof = true;
4476	}
4477	*nmap = n;
4478
4479	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4480			whichfork);
4481	if (error)
4482		goto error0;
4483
4484	ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
4485	       ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
4486	xfs_bmapi_finish(&bma, whichfork, 0);
4487	xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4488		orig_nmap, *nmap);
4489	return 0;
4490error0:
4491	xfs_bmapi_finish(&bma, whichfork, error);
4492	return error;
4493}
4494
4495/*
4496 * Convert an existing delalloc extent to real blocks based on file offset. This
4497 * attempts to allocate the entire delalloc extent and may require multiple
4498 * invocations to allocate the target offset if a large enough physical extent
4499 * is not available.
4500 */
4501int
4502xfs_bmapi_convert_delalloc(
4503	struct xfs_inode	*ip,
4504	int			whichfork,
4505	xfs_off_t		offset,
4506	struct iomap		*iomap,
4507	unsigned int		*seq)
4508{
4509	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
4510	struct xfs_mount	*mp = ip->i_mount;
4511	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
4512	struct xfs_bmalloca	bma = { NULL };
4513	uint16_t		flags = 0;
4514	struct xfs_trans	*tp;
4515	int			error;
4516
4517	if (whichfork == XFS_COW_FORK)
4518		flags |= IOMAP_F_SHARED;
4519
4520	/*
4521	 * Space for the extent and indirect blocks was reserved when the
4522	 * delalloc extent was created so there's no need to do so here.
4523	 */
4524	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
4525				XFS_TRANS_RESERVE, &tp);
4526	if (error)
4527		return error;
4528
4529	xfs_ilock(ip, XFS_ILOCK_EXCL);
4530	xfs_trans_ijoin(tp, ip, 0);
4531
4532	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
4533	    bma.got.br_startoff > offset_fsb) {
4534		/*
4535		 * No extent found in the range we are trying to convert.  This
4536		 * should only happen for the COW fork, where another thread
4537		 * might have moved the extent to the data fork in the meantime.
4538		 */
4539		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
4540		error = -EAGAIN;
4541		goto out_trans_cancel;
4542	}
4543
4544	/*
4545	 * If we find a real extent here we raced with another thread converting
4546	 * the extent.  Just return the real extent at this offset.
4547	 */
4548	if (!isnullstartblock(bma.got.br_startblock)) {
4549		xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
4550		*seq = READ_ONCE(ifp->if_seq);
4551		goto out_trans_cancel;
4552	}
4553
4554	bma.tp = tp;
4555	bma.ip = ip;
4556	bma.wasdel = true;
4557	bma.offset = bma.got.br_startoff;
4558	bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
4559	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4560
4561	/*
4562	 * When we're converting the delalloc reservations backing dirty pages
4563	 * in the page cache, we must be careful about how we create the new
4564	 * extents:
4565	 *
4566	 * New CoW fork extents are created unwritten, turned into real extents
4567	 * when we're about to write the data to disk, and mapped into the data
4568	 * fork after the write finishes.  End of story.
4569	 *
4570	 * New data fork extents must be mapped in as unwritten and converted
4571	 * to real extents after the write succeeds to avoid exposing stale
4572	 * disk contents if we crash.
4573	 */
4574	bma.flags = XFS_BMAPI_PREALLOC;
4575	if (whichfork == XFS_COW_FORK)
4576		bma.flags |= XFS_BMAPI_COWFORK;
4577
4578	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4579		bma.prev.br_startoff = NULLFILEOFF;
4580
4581	error = xfs_bmapi_allocate(&bma);
4582	if (error)
4583		goto out_finish;
4584
4585	error = -ENOSPC;
4586	if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
4587		goto out_finish;
4588	error = -EFSCORRUPTED;
4589	if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
4590		goto out_finish;
4591
4592	XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
4593	XFS_STATS_INC(mp, xs_xstrat_quick);
4594
4595	ASSERT(!isnullstartblock(bma.got.br_startblock));
4596	xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
4597	*seq = READ_ONCE(ifp->if_seq);
4598
4599	if (whichfork == XFS_COW_FORK)
4600		xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
4601
4602	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4603			whichfork);
4604	if (error)
4605		goto out_finish;
4606
4607	xfs_bmapi_finish(&bma, whichfork, 0);
4608	error = xfs_trans_commit(tp);
4609	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4610	return error;
4611
4612out_finish:
4613	xfs_bmapi_finish(&bma, whichfork, error);
4614out_trans_cancel:
4615	xfs_trans_cancel(tp);
4616	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4617	return error;
4618}
4619
4620int
4621xfs_bmapi_remap(
4622	struct xfs_trans	*tp,
4623	struct xfs_inode	*ip,
4624	xfs_fileoff_t		bno,
4625	xfs_filblks_t		len,
4626	xfs_fsblock_t		startblock,
4627	int			flags)
4628{
4629	struct xfs_mount	*mp = ip->i_mount;
4630	struct xfs_ifork	*ifp;
4631	struct xfs_btree_cur	*cur = NULL;
4632	struct xfs_bmbt_irec	got;
4633	struct xfs_iext_cursor	icur;
4634	int			whichfork = xfs_bmapi_whichfork(flags);
4635	int			logflags = 0, error;
4636
4637	ifp = XFS_IFORK_PTR(ip, whichfork);
4638	ASSERT(len > 0);
4639	ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
4640	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4641	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
4642			   XFS_BMAPI_NORMAP)));
4643	ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
4644			(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
4645
4646	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4647	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4648		return -EFSCORRUPTED;
4649	}
4650
4651	if (XFS_FORCED_SHUTDOWN(mp))
4652		return -EIO;
4653
4654	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4655		error = xfs_iread_extents(tp, ip, whichfork);
4656		if (error)
4657			return error;
4658	}
4659
4660	if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4661		/* make sure we only reflink into a hole. */
4662		ASSERT(got.br_startoff > bno);
4663		ASSERT(got.br_startoff - bno >= len);
4664	}
4665
4666	ip->i_d.di_nblocks += len;
4667	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4668
4669	if (ifp->if_flags & XFS_IFBROOT) {
4670		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
4671		cur->bc_ino.flags = 0;
4672	}
4673
4674	got.br_startoff = bno;
4675	got.br_startblock = startblock;
4676	got.br_blockcount = len;
4677	if (flags & XFS_BMAPI_PREALLOC)
4678		got.br_state = XFS_EXT_UNWRITTEN;
4679	else
4680		got.br_state = XFS_EXT_NORM;
4681
4682	error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
4683			&cur, &got, &logflags, flags);
4684	if (error)
4685		goto error0;
4686
4687	error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
4688
4689error0:
4690	if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
4691		logflags &= ~XFS_ILOG_DEXT;
4692	else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
4693		logflags &= ~XFS_ILOG_DBROOT;
4694
4695	if (logflags)
4696		xfs_trans_log_inode(tp, ip, logflags);
4697	if (cur)
4698		xfs_btree_del_cursor(cur, error);
4699	return error;
4700}
4701
4702/*
4703 * When a delalloc extent is split (e.g., due to a hole punch), the original
4704 * indlen reservation must be shared across the two new extents that are left
4705 * behind.
4706 *
4707 * Given the original reservation and the worst case indlen for the two new
4708 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4709 * reservation fairly across the two new extents. If necessary, steal available
4710 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4711 * ores == 1). The number of stolen blocks is returned. The availability and
4712 * subsequent accounting of stolen blocks is the responsibility of the caller.
4713 */
4714static xfs_filblks_t
4715xfs_bmap_split_indlen(
4716	xfs_filblks_t			ores,		/* original res. */
4717	xfs_filblks_t			*indlen1,	/* ext1 worst indlen */
4718	xfs_filblks_t			*indlen2,	/* ext2 worst indlen */
4719	xfs_filblks_t			avail)		/* stealable blocks */
4720{
4721	xfs_filblks_t			len1 = *indlen1;
4722	xfs_filblks_t			len2 = *indlen2;
4723	xfs_filblks_t			nres = len1 + len2; /* new total res. */
4724	xfs_filblks_t			stolen = 0;
4725	xfs_filblks_t			resfactor;
4726
4727	/*
4728	 * Steal as many blocks as we can to try and satisfy the worst case
4729	 * indlen for both new extents.
4730	 */
4731	if (ores < nres && avail)
4732		stolen = XFS_FILBLKS_MIN(nres - ores, avail);
4733	ores += stolen;
4734
4735	 /* nothing else to do if we've satisfied the new reservation */
4736	if (ores >= nres)
4737		return stolen;
4738
4739	/*
4740	 * We can't meet the total required reservation for the two extents.
4741	 * Calculate the percent of the overall shortage between both extents
4742	 * and apply this percentage to each of the requested indlen values.
4743	 * This distributes the shortage fairly and reduces the chances that one
4744	 * of the two extents is left with nothing when extents are repeatedly
4745	 * split.
4746	 */
4747	resfactor = (ores * 100);
4748	do_div(resfactor, nres);
4749	len1 *= resfactor;
4750	do_div(len1, 100);
4751	len2 *= resfactor;
4752	do_div(len2, 100);
4753	ASSERT(len1 + len2 <= ores);
4754	ASSERT(len1 < *indlen1 && len2 < *indlen2);
4755
4756	/*
4757	 * Hand out the remainder to each extent. If one of the two reservations
4758	 * is zero, we want to make sure that one gets a block first. The loop
4759	 * below starts with len1, so hand len2 a block right off the bat if it
4760	 * is zero.
4761	 */
4762	ores -= (len1 + len2);
4763	ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4764	if (ores && !len2 && *indlen2) {
4765		len2++;
4766		ores--;
4767	}
4768	while (ores) {
4769		if (len1 < *indlen1) {
4770			len1++;
4771			ores--;
4772		}
4773		if (!ores)
4774			break;
4775		if (len2 < *indlen2) {
4776			len2++;
4777			ores--;
4778		}
4779	}
4780
4781	*indlen1 = len1;
4782	*indlen2 = len2;
4783
4784	return stolen;
4785}
4786
4787int
4788xfs_bmap_del_extent_delay(
4789	struct xfs_inode	*ip,
4790	int			whichfork,
4791	struct xfs_iext_cursor	*icur,
4792	struct xfs_bmbt_irec	*got,
4793	struct xfs_bmbt_irec	*del)
4794{
4795	struct xfs_mount	*mp = ip->i_mount;
4796	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
4797	struct xfs_bmbt_irec	new;
4798	int64_t			da_old, da_new, da_diff = 0;
4799	xfs_fileoff_t		del_endoff, got_endoff;
4800	xfs_filblks_t		got_indlen, new_indlen, stolen;
4801	int			state = xfs_bmap_fork_to_state(whichfork);
4802	int			error = 0;
4803	bool			isrt;
4804
4805	XFS_STATS_INC(mp, xs_del_exlist);
4806
4807	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4808	del_endoff = del->br_startoff + del->br_blockcount;
4809	got_endoff = got->br_startoff + got->br_blockcount;
4810	da_old = startblockval(got->br_startblock);
4811	da_new = 0;
4812
4813	ASSERT(del->br_blockcount > 0);
4814	ASSERT(got->br_startoff <= del->br_startoff);
4815	ASSERT(got_endoff >= del_endoff);
4816
4817	if (isrt) {
4818		uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
4819
4820		do_div(rtexts, mp->m_sb.sb_rextsize);
4821		xfs_mod_frextents(mp, rtexts);
4822	}
4823
4824	/*
4825	 * Update the inode delalloc counter now and wait to update the
4826	 * sb counters as we might have to borrow some blocks for the
4827	 * indirect block accounting.
4828	 */
4829	error = xfs_trans_reserve_quota_nblks(NULL, ip,
4830			-((long)del->br_blockcount), 0,
4831			isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4832	if (error)
4833		return error;
4834	ip->i_delayed_blks -= del->br_blockcount;
4835
4836	if (got->br_startoff == del->br_startoff)
4837		state |= BMAP_LEFT_FILLING;
4838	if (got_endoff == del_endoff)
4839		state |= BMAP_RIGHT_FILLING;
4840
4841	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4842	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4843		/*
4844		 * Matches the whole extent.  Delete the entry.
4845		 */
4846		xfs_iext_remove(ip, icur, state);
4847		xfs_iext_prev(ifp, icur);
4848		break;
4849	case BMAP_LEFT_FILLING:
4850		/*
4851		 * Deleting the first part of the extent.
4852		 */
4853		got->br_startoff = del_endoff;
4854		got->br_blockcount -= del->br_blockcount;
4855		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4856				got->br_blockcount), da_old);
4857		got->br_startblock = nullstartblock((int)da_new);
4858		xfs_iext_update_extent(ip, state, icur, got);
4859		break;
4860	case BMAP_RIGHT_FILLING:
4861		/*
4862		 * Deleting the last part of the extent.
4863		 */
4864		got->br_blockcount = got->br_blockcount - del->br_blockcount;
4865		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4866				got->br_blockcount), da_old);
4867		got->br_startblock = nullstartblock((int)da_new);
4868		xfs_iext_update_extent(ip, state, icur, got);
4869		break;
4870	case 0:
4871		/*
4872		 * Deleting the middle of the extent.
4873		 *
4874		 * Distribute the original indlen reservation across the two new
4875		 * extents.  Steal blocks from the deleted extent if necessary.
4876		 * Stealing blocks simply fudges the fdblocks accounting below.
4877		 * Warn if either of the new indlen reservations is zero as this
4878		 * can lead to delalloc problems.
4879		 */
4880		got->br_blockcount = del->br_startoff - got->br_startoff;
4881		got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4882
4883		new.br_blockcount = got_endoff - del_endoff;
4884		new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4885
4886		WARN_ON_ONCE(!got_indlen || !new_indlen);
4887		stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4888						       del->br_blockcount);
4889
4890		got->br_startblock = nullstartblock((int)got_indlen);
4891
4892		new.br_startoff = del_endoff;
4893		new.br_state = got->br_state;
4894		new.br_startblock = nullstartblock((int)new_indlen);
4895
4896		xfs_iext_update_extent(ip, state, icur, got);
4897		xfs_iext_next(ifp, icur);
4898		xfs_iext_insert(ip, icur, &new, state);
4899
4900		da_new = got_indlen + new_indlen - stolen;
4901		del->br_blockcount -= stolen;
4902		break;
4903	}
4904
4905	ASSERT(da_old >= da_new);
4906	da_diff = da_old - da_new;
4907	if (!isrt)
4908		da_diff += del->br_blockcount;
4909	if (da_diff) {
4910		xfs_mod_fdblocks(mp, da_diff, false);
4911		xfs_mod_delalloc(mp, -da_diff);
4912	}
4913	return error;
4914}
4915
4916void
4917xfs_bmap_del_extent_cow(
4918	struct xfs_inode	*ip,
4919	struct xfs_iext_cursor	*icur,
4920	struct xfs_bmbt_irec	*got,
4921	struct xfs_bmbt_irec	*del)
4922{
4923	struct xfs_mount	*mp = ip->i_mount;
4924	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
4925	struct xfs_bmbt_irec	new;
4926	xfs_fileoff_t		del_endoff, got_endoff;
4927	int			state = BMAP_COWFORK;
4928
4929	XFS_STATS_INC(mp, xs_del_exlist);
4930
4931	del_endoff = del->br_startoff + del->br_blockcount;
4932	got_endoff = got->br_startoff + got->br_blockcount;
4933
4934	ASSERT(del->br_blockcount > 0);
4935	ASSERT(got->br_startoff <= del->br_startoff);
4936	ASSERT(got_endoff >= del_endoff);
4937	ASSERT(!isnullstartblock(got->br_startblock));
4938
4939	if (got->br_startoff == del->br_startoff)
4940		state |= BMAP_LEFT_FILLING;
4941	if (got_endoff == del_endoff)
4942		state |= BMAP_RIGHT_FILLING;
4943
4944	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4945	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4946		/*
4947		 * Matches the whole extent.  Delete the entry.
4948		 */
4949		xfs_iext_remove(ip, icur, state);
4950		xfs_iext_prev(ifp, icur);
4951		break;
4952	case BMAP_LEFT_FILLING:
4953		/*
4954		 * Deleting the first part of the extent.
4955		 */
4956		got->br_startoff = del_endoff;
4957		got->br_blockcount -= del->br_blockcount;
4958		got->br_startblock = del->br_startblock + del->br_blockcount;
4959		xfs_iext_update_extent(ip, state, icur, got);
4960		break;
4961	case BMAP_RIGHT_FILLING:
4962		/*
4963		 * Deleting the last part of the extent.
4964		 */
4965		got->br_blockcount -= del->br_blockcount;
4966		xfs_iext_update_extent(ip, state, icur, got);
4967		break;
4968	case 0:
4969		/*
4970		 * Deleting the middle of the extent.
4971		 */
4972		got->br_blockcount = del->br_startoff - got->br_startoff;
4973
4974		new.br_startoff = del_endoff;
4975		new.br_blockcount = got_endoff - del_endoff;
4976		new.br_state = got->br_state;
4977		new.br_startblock = del->br_startblock + del->br_blockcount;
4978
4979		xfs_iext_update_extent(ip, state, icur, got);
4980		xfs_iext_next(ifp, icur);
4981		xfs_iext_insert(ip, icur, &new, state);
4982		break;
4983	}
4984	ip->i_delayed_blks -= del->br_blockcount;
4985}
4986
4987/*
4988 * Called by xfs_bmapi to update file extent records and the btree
4989 * after removing space.
4990 */
4991STATIC int				/* error */
4992xfs_bmap_del_extent_real(
4993	xfs_inode_t		*ip,	/* incore inode pointer */
4994	xfs_trans_t		*tp,	/* current transaction pointer */
4995	struct xfs_iext_cursor	*icur,
4996	xfs_btree_cur_t		*cur,	/* if null, not a btree */
4997	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
4998	int			*logflagsp, /* inode logging flags */
4999	int			whichfork, /* data or attr fork */
5000	int			bflags)	/* bmapi flags */
5001{
5002	xfs_fsblock_t		del_endblock=0;	/* first block past del */
5003	xfs_fileoff_t		del_endoff;	/* first offset past del */
5004	int			do_fx;	/* free extent at end of routine */
5005	int			error;	/* error return value */
5006	int			flags = 0;/* inode logging flags */
5007	struct xfs_bmbt_irec	got;	/* current extent entry */
5008	xfs_fileoff_t		got_endoff;	/* first offset past got */
5009	int			i;	/* temp state */
5010	struct xfs_ifork	*ifp;	/* inode fork pointer */
5011	xfs_mount_t		*mp;	/* mount structure */
5012	xfs_filblks_t		nblks;	/* quota/sb block count */
5013	xfs_bmbt_irec_t		new;	/* new record to be inserted */
5014	/* REFERENCED */
5015	uint			qfield;	/* quota field to update */
5016	int			state = xfs_bmap_fork_to_state(whichfork);
5017	struct xfs_bmbt_irec	old;
5018
5019	mp = ip->i_mount;
5020	XFS_STATS_INC(mp, xs_del_exlist);
5021
5022	ifp = XFS_IFORK_PTR(ip, whichfork);
5023	ASSERT(del->br_blockcount > 0);
5024	xfs_iext_get_extent(ifp, icur, &got);
5025	ASSERT(got.br_startoff <= del->br_startoff);
5026	del_endoff = del->br_startoff + del->br_blockcount;
5027	got_endoff = got.br_startoff + got.br_blockcount;
5028	ASSERT(got_endoff >= del_endoff);
5029	ASSERT(!isnullstartblock(got.br_startblock));
5030	qfield = 0;
5031	error = 0;
5032
5033	/*
5034	 * If it's the case where the directory code is running with no block
5035	 * reservation, and the deleted block is in the middle of its extent,
5036	 * and the resulting insert of an extent would cause transformation to
5037	 * btree format, then reject it.  The calling code will then swap blocks
5038	 * around instead.  We have to do this now, rather than waiting for the
5039	 * conversion to btree format, since the transaction will be dirty then.
5040	 */
5041	if (tp->t_blk_res == 0 &&
5042	    ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
5043	    ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
5044	    del->br_startoff > got.br_startoff && del_endoff < got_endoff)
5045		return -ENOSPC;
5046
5047	flags = XFS_ILOG_CORE;
5048	if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
5049		xfs_fsblock_t	bno;
5050		xfs_filblks_t	len;
5051		xfs_extlen_t	mod;
5052
5053		bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize,
5054				  &mod);
5055		ASSERT(mod == 0);
5056		len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
5057				  &mod);
5058		ASSERT(mod == 0);
5059
5060		error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
5061		if (error)
5062			goto done;
5063		do_fx = 0;
5064		nblks = len * mp->m_sb.sb_rextsize;
5065		qfield = XFS_TRANS_DQ_RTBCOUNT;
5066	} else {
5067		do_fx = 1;
5068		nblks = del->br_blockcount;
5069		qfield = XFS_TRANS_DQ_BCOUNT;
5070	}
5071
5072	del_endblock = del->br_startblock + del->br_blockcount;
5073	if (cur) {
5074		error = xfs_bmbt_lookup_eq(cur, &got, &i);
5075		if (error)
5076			goto done;
5077		if (XFS_IS_CORRUPT(mp, i != 1)) {
5078			error = -EFSCORRUPTED;
5079			goto done;
5080		}
5081	}
5082
5083	if (got.br_startoff == del->br_startoff)
5084		state |= BMAP_LEFT_FILLING;
5085	if (got_endoff == del_endoff)
5086		state |= BMAP_RIGHT_FILLING;
5087
5088	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5089	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5090		/*
5091		 * Matches the whole extent.  Delete the entry.
5092		 */
5093		xfs_iext_remove(ip, icur, state);
5094		xfs_iext_prev(ifp, icur);
5095		ifp->if_nextents--;
5096
5097		flags |= XFS_ILOG_CORE;
5098		if (!cur) {
5099			flags |= xfs_ilog_fext(whichfork);
5100			break;
5101		}
5102		if ((error = xfs_btree_delete(cur, &i)))
5103			goto done;
5104		if (XFS_IS_CORRUPT(mp, i != 1)) {
5105			error = -EFSCORRUPTED;
5106			goto done;
5107		}
5108		break;
5109	case BMAP_LEFT_FILLING:
5110		/*
5111		 * Deleting the first part of the extent.
5112		 */
5113		got.br_startoff = del_endoff;
5114		got.br_startblock = del_endblock;
5115		got.br_blockcount -= del->br_blockcount;
5116		xfs_iext_update_extent(ip, state, icur, &got);
5117		if (!cur) {
5118			flags |= xfs_ilog_fext(whichfork);
5119			break;
5120		}
5121		error = xfs_bmbt_update(cur, &got);
5122		if (error)
5123			goto done;
5124		break;
5125	case BMAP_RIGHT_FILLING:
5126		/*
5127		 * Deleting the last part of the extent.
5128		 */
5129		got.br_blockcount -= del->br_blockcount;
5130		xfs_iext_update_extent(ip, state, icur, &got);
5131		if (!cur) {
5132			flags |= xfs_ilog_fext(whichfork);
5133			break;
5134		}
5135		error = xfs_bmbt_update(cur, &got);
5136		if (error)
5137			goto done;
5138		break;
5139	case 0:
5140		/*
5141		 * Deleting the middle of the extent.
5142		 */
5143		old = got;
5144
5145		got.br_blockcount = del->br_startoff - got.br_startoff;
5146		xfs_iext_update_extent(ip, state, icur, &got);
5147
5148		new.br_startoff = del_endoff;
5149		new.br_blockcount = got_endoff - del_endoff;
5150		new.br_state = got.br_state;
5151		new.br_startblock = del_endblock;
5152
5153		flags |= XFS_ILOG_CORE;
5154		if (cur) {
5155			error = xfs_bmbt_update(cur, &got);
5156			if (error)
5157				goto done;
5158			error = xfs_btree_increment(cur, 0, &i);
5159			if (error)
5160				goto done;
5161			cur->bc_rec.b = new;
5162			error = xfs_btree_insert(cur, &i);
5163			if (error && error != -ENOSPC)
5164				goto done;
5165			/*
5166			 * If get no-space back from btree insert, it tried a
5167			 * split, and we have a zero block reservation.  Fix up
5168			 * our state and return the error.
5169			 */
5170			if (error == -ENOSPC) {
5171				/*
5172				 * Reset the cursor, don't trust it after any
5173				 * insert operation.
5174				 */
5175				error = xfs_bmbt_lookup_eq(cur, &got, &i);
5176				if (error)
5177					goto done;
5178				if (XFS_IS_CORRUPT(mp, i != 1)) {
5179					error = -EFSCORRUPTED;
5180					goto done;
5181				}
5182				/*
5183				 * Update the btree record back
5184				 * to the original value.
5185				 */
5186				error = xfs_bmbt_update(cur, &old);
5187				if (error)
5188					goto done;
5189				/*
5190				 * Reset the extent record back
5191				 * to the original value.
5192				 */
5193				xfs_iext_update_extent(ip, state, icur, &old);
5194				flags = 0;
5195				error = -ENOSPC;
5196				goto done;
5197			}
5198			if (XFS_IS_CORRUPT(mp, i != 1)) {
5199				error = -EFSCORRUPTED;
5200				goto done;
5201			}
5202		} else
5203			flags |= xfs_ilog_fext(whichfork);
5204
5205		ifp->if_nextents++;
5206		xfs_iext_next(ifp, icur);
5207		xfs_iext_insert(ip, icur, &new, state);
5208		break;
5209	}
5210
5211	/* remove reverse mapping */
5212	xfs_rmap_unmap_extent(tp, ip, whichfork, del);
5213
5214	/*
5215	 * If we need to, add to list of extents to delete.
5216	 */
5217	if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5218		if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5219			xfs_refcount_decrease_extent(tp, del);
5220		} else {
5221			__xfs_bmap_add_free(tp, del->br_startblock,
5222					del->br_blockcount, NULL,
5223					(bflags & XFS_BMAPI_NODISCARD) ||
5224					del->br_state == XFS_EXT_UNWRITTEN);
5225		}
5226	}
5227
5228	/*
5229	 * Adjust inode # blocks in the file.
5230	 */
5231	if (nblks)
5232		ip->i_d.di_nblocks -= nblks;
5233	/*
5234	 * Adjust quota data.
5235	 */
5236	if (qfield && !(bflags & XFS_BMAPI_REMAP))
5237		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5238
5239done:
5240	*logflagsp = flags;
5241	return error;
5242}
5243
5244/*
5245 * Unmap (remove) blocks from a file.
5246 * If nexts is nonzero then the number of extents to remove is limited to
5247 * that value.  If not all extents in the block range can be removed then
5248 * *done is set.
5249 */
5250int						/* error */
5251__xfs_bunmapi(
5252	struct xfs_trans	*tp,		/* transaction pointer */
5253	struct xfs_inode	*ip,		/* incore inode */
5254	xfs_fileoff_t		start,		/* first file offset deleted */
5255	xfs_filblks_t		*rlen,		/* i/o: amount remaining */
5256	int			flags,		/* misc flags */
5257	xfs_extnum_t		nexts)		/* number of extents max */
5258{
5259	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
5260	struct xfs_bmbt_irec	del;		/* extent being deleted */
5261	int			error;		/* error return value */
5262	xfs_extnum_t		extno;		/* extent number in list */
5263	struct xfs_bmbt_irec	got;		/* current extent record */
5264	struct xfs_ifork	*ifp;		/* inode fork pointer */
5265	int			isrt;		/* freeing in rt area */
5266	int			logflags;	/* transaction logging flags */
5267	xfs_extlen_t		mod;		/* rt extent offset */
5268	struct xfs_mount	*mp = ip->i_mount;
5269	int			tmp_logflags;	/* partial logging flags */
5270	int			wasdel;		/* was a delayed alloc extent */
5271	int			whichfork;	/* data or attribute fork */
5272	xfs_fsblock_t		sum;
5273	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
5274	xfs_fileoff_t		max_len;
5275	xfs_agnumber_t		prev_agno = NULLAGNUMBER, agno;
5276	xfs_fileoff_t		end;
5277	struct xfs_iext_cursor	icur;
5278	bool			done = false;
5279
5280	trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5281
5282	whichfork = xfs_bmapi_whichfork(flags);
5283	ASSERT(whichfork != XFS_COW_FORK);
5284	ifp = XFS_IFORK_PTR(ip, whichfork);
5285	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)))
5286		return -EFSCORRUPTED;
5287	if (XFS_FORCED_SHUTDOWN(mp))
5288		return -EIO;
5289
5290	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5291	ASSERT(len > 0);
5292	ASSERT(nexts >= 0);
5293
5294	/*
5295	 * Guesstimate how many blocks we can unmap without running the risk of
5296	 * blowing out the transaction with a mix of EFIs and reflink
5297	 * adjustments.
5298	 */
5299	if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5300		max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5301	else
5302		max_len = len;
5303
5304	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5305	    (error = xfs_iread_extents(tp, ip, whichfork)))
5306		return error;
5307	if (xfs_iext_count(ifp) == 0) {
5308		*rlen = 0;
5309		return 0;
5310	}
5311	XFS_STATS_INC(mp, xs_blk_unmap);
5312	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5313	end = start + len;
5314
5315	if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5316		*rlen = 0;
5317		return 0;
5318	}
5319	end--;
5320
5321	logflags = 0;
5322	if (ifp->if_flags & XFS_IFBROOT) {
5323		ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
5324		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5325		cur->bc_ino.flags = 0;
5326	} else
5327		cur = NULL;
5328
5329	if (isrt) {
5330		/*
5331		 * Synchronize by locking the bitmap inode.
5332		 */
5333		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5334		xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5335		xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5336		xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5337	}
5338
5339	extno = 0;
5340	while (end != (xfs_fileoff_t)-1 && end >= start &&
5341	       (nexts == 0 || extno < nexts) && max_len > 0) {
5342		/*
5343		 * Is the found extent after a hole in which end lives?
5344		 * Just back up to the previous extent, if so.
5345		 */
5346		if (got.br_startoff > end &&
5347		    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5348			done = true;
5349			break;
5350		}
5351		/*
5352		 * Is the last block of this extent before the range
5353		 * we're supposed to delete?  If so, we're done.
5354		 */
5355		end = XFS_FILEOFF_MIN(end,
5356			got.br_startoff + got.br_blockcount - 1);
5357		if (end < start)
5358			break;
5359		/*
5360		 * Then deal with the (possibly delayed) allocated space
5361		 * we found.
5362		 */
5363		del = got;
5364		wasdel = isnullstartblock(del.br_startblock);
5365
5366		/*
5367		 * Make sure we don't touch multiple AGF headers out of order
5368		 * in a single transaction, as that could cause AB-BA deadlocks.
5369		 */
5370		if (!wasdel && !isrt) {
5371			agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
5372			if (prev_agno != NULLAGNUMBER && prev_agno > agno)
5373				break;
5374			prev_agno = agno;
5375		}
5376		if (got.br_startoff < start) {
5377			del.br_startoff = start;
5378			del.br_blockcount -= start - got.br_startoff;
5379			if (!wasdel)
5380				del.br_startblock += start - got.br_startoff;
5381		}
5382		if (del.br_startoff + del.br_blockcount > end + 1)
5383			del.br_blockcount = end + 1 - del.br_startoff;
5384
5385		/* How much can we safely unmap? */
5386		if (max_len < del.br_blockcount) {
5387			del.br_startoff += del.br_blockcount - max_len;
5388			if (!wasdel)
5389				del.br_startblock += del.br_blockcount - max_len;
5390			del.br_blockcount = max_len;
5391		}
5392
5393		if (!isrt)
5394			goto delete;
5395
5396		sum = del.br_startblock + del.br_blockcount;
5397		div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod);
5398		if (mod) {
5399			/*
5400			 * Realtime extent not lined up at the end.
5401			 * The extent could have been split into written
5402			 * and unwritten pieces, or we could just be
5403			 * unmapping part of it.  But we can't really
5404			 * get rid of part of a realtime extent.
5405			 */
5406			if (del.br_state == XFS_EXT_UNWRITTEN) {
5407				/*
5408				 * This piece is unwritten, or we're not
5409				 * using unwritten extents.  Skip over it.
5410				 */
5411				ASSERT(end >= mod);
5412				end -= mod > del.br_blockcount ?
5413					del.br_blockcount : mod;
5414				if (end < got.br_startoff &&
5415				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5416					done = true;
5417					break;
5418				}
5419				continue;
5420			}
5421			/*
5422			 * It's written, turn it unwritten.
5423			 * This is better than zeroing it.
5424			 */
5425			ASSERT(del.br_state == XFS_EXT_NORM);
5426			ASSERT(tp->t_blk_res > 0);
5427			/*
5428			 * If this spans a realtime extent boundary,
5429			 * chop it back to the start of the one we end at.
5430			 */
5431			if (del.br_blockcount > mod) {
5432				del.br_startoff += del.br_blockcount - mod;
5433				del.br_startblock += del.br_blockcount - mod;
5434				del.br_blockcount = mod;
5435			}
5436			del.br_state = XFS_EXT_UNWRITTEN;
5437			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5438					whichfork, &icur, &cur, &del,
5439					&logflags);
5440			if (error)
5441				goto error0;
5442			goto nodelete;
5443		}
5444		div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
5445		if (mod) {
5446			xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
5447
5448			/*
5449			 * Realtime extent is lined up at the end but not
5450			 * at the front.  We'll get rid of full extents if
5451			 * we can.
5452			 */
5453			if (del.br_blockcount > off) {
5454				del.br_blockcount -= off;
5455				del.br_startoff += off;
5456				del.br_startblock += off;
5457			} else if (del.br_startoff == start &&
5458				   (del.br_state == XFS_EXT_UNWRITTEN ||
5459				    tp->t_blk_res == 0)) {
5460				/*
5461				 * Can't make it unwritten.  There isn't
5462				 * a full extent here so just skip it.
5463				 */
5464				ASSERT(end >= del.br_blockcount);
5465				end -= del.br_blockcount;
5466				if (got.br_startoff > end &&
5467				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5468					done = true;
5469					break;
5470				}
5471				continue;
5472			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
5473				struct xfs_bmbt_irec	prev;
5474				xfs_fileoff_t		unwrite_start;
5475
5476				/*
5477				 * This one is already unwritten.
5478				 * It must have a written left neighbor.
5479				 * Unwrite the killed part of that one and
5480				 * try again.
5481				 */
5482				if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5483					ASSERT(0);
5484				ASSERT(prev.br_state == XFS_EXT_NORM);
5485				ASSERT(!isnullstartblock(prev.br_startblock));
5486				ASSERT(del.br_startblock ==
5487				       prev.br_startblock + prev.br_blockcount);
5488				unwrite_start = max3(start,
5489						     del.br_startoff - mod,
5490						     prev.br_startoff);
5491				mod = unwrite_start - prev.br_startoff;
5492				prev.br_startoff = unwrite_start;
5493				prev.br_startblock += mod;
5494				prev.br_blockcount -= mod;
5495				prev.br_state = XFS_EXT_UNWRITTEN;
5496				error = xfs_bmap_add_extent_unwritten_real(tp,
5497						ip, whichfork, &icur, &cur,
5498						&prev, &logflags);
5499				if (error)
5500					goto error0;
5501				goto nodelete;
5502			} else {
5503				ASSERT(del.br_state == XFS_EXT_NORM);
5504				del.br_state = XFS_EXT_UNWRITTEN;
5505				error = xfs_bmap_add_extent_unwritten_real(tp,
5506						ip, whichfork, &icur, &cur,
5507						&del, &logflags);
5508				if (error)
5509					goto error0;
5510				goto nodelete;
5511			}
5512		}
5513
5514delete:
5515		if (wasdel) {
5516			error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
5517					&got, &del);
5518		} else {
5519			error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
5520					&del, &tmp_logflags, whichfork,
5521					flags);
5522			logflags |= tmp_logflags;
5523		}
5524
5525		if (error)
5526			goto error0;
5527
5528		max_len -= del.br_blockcount;
5529		end = del.br_startoff - 1;
5530nodelete:
5531		/*
5532		 * If not done go on to the next (previous) record.
5533		 */
5534		if (end != (xfs_fileoff_t)-1 && end >= start) {
5535			if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5536			    (got.br_startoff > end &&
5537			     !xfs_iext_prev_extent(ifp, &icur, &got))) {
5538				done = true;
5539				break;
5540			}
5541			extno++;
5542		}
5543	}
5544	if (done || end == (xfs_fileoff_t)-1 || end < start)
5545		*rlen = 0;
5546	else
5547		*rlen = end - start + 1;
5548
5549	/*
5550	 * Convert to a btree if necessary.
5551	 */
5552	if (xfs_bmap_needs_btree(ip, whichfork)) {
5553		ASSERT(cur == NULL);
5554		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
5555				&tmp_logflags, whichfork);
5556		logflags |= tmp_logflags;
5557	} else {
5558		error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
5559			whichfork);
5560	}
5561
5562error0:
5563	/*
5564	 * Log everything.  Do this after conversion, there's no point in
5565	 * logging the extent records if we've converted to btree format.
5566	 */
5567	if ((logflags & xfs_ilog_fext(whichfork)) &&
5568	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
5569		logflags &= ~xfs_ilog_fext(whichfork);
5570	else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5571		 ifp->if_format != XFS_DINODE_FMT_BTREE)
5572		logflags &= ~xfs_ilog_fbroot(whichfork);
5573	/*
5574	 * Log inode even in the error case, if the transaction
5575	 * is dirty we'll need to shut down the filesystem.
5576	 */
5577	if (logflags)
5578		xfs_trans_log_inode(tp, ip, logflags);
5579	if (cur) {
5580		if (!error)
5581			cur->bc_ino.allocated = 0;
5582		xfs_btree_del_cursor(cur, error);
5583	}
5584	return error;
5585}
5586
5587/* Unmap a range of a file. */
5588int
5589xfs_bunmapi(
5590	xfs_trans_t		*tp,
5591	struct xfs_inode	*ip,
5592	xfs_fileoff_t		bno,
5593	xfs_filblks_t		len,
5594	int			flags,
5595	xfs_extnum_t		nexts,
5596	int			*done)
5597{
5598	int			error;
5599
5600	error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
5601	*done = (len == 0);
5602	return error;
5603}
5604
5605/*
5606 * Determine whether an extent shift can be accomplished by a merge with the
5607 * extent that precedes the target hole of the shift.
5608 */
5609STATIC bool
5610xfs_bmse_can_merge(
5611	struct xfs_bmbt_irec	*left,	/* preceding extent */
5612	struct xfs_bmbt_irec	*got,	/* current extent to shift */
5613	xfs_fileoff_t		shift)	/* shift fsb */
5614{
5615	xfs_fileoff_t		startoff;
5616
5617	startoff = got->br_startoff - shift;
5618
5619	/*
5620	 * The extent, once shifted, must be adjacent in-file and on-disk with
5621	 * the preceding extent.
5622	 */
5623	if ((left->br_startoff + left->br_blockcount != startoff) ||
5624	    (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5625	    (left->br_state != got->br_state) ||
5626	    (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5627		return false;
5628
5629	return true;
5630}
5631
5632/*
5633 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5634 * hole in the file. If an extent shift would result in the extent being fully
5635 * adjacent to the extent that currently precedes the hole, we can merge with
5636 * the preceding extent rather than do the shift.
5637 *
5638 * This function assumes the caller has verified a shift-by-merge is possible
5639 * with the provided extents via xfs_bmse_can_merge().
5640 */
5641STATIC int
5642xfs_bmse_merge(
5643	struct xfs_trans		*tp,
5644	struct xfs_inode		*ip,
5645	int				whichfork,
5646	xfs_fileoff_t			shift,		/* shift fsb */
5647	struct xfs_iext_cursor		*icur,
5648	struct xfs_bmbt_irec		*got,		/* extent to shift */
5649	struct xfs_bmbt_irec		*left,		/* preceding extent */
5650	struct xfs_btree_cur		*cur,
5651	int				*logflags)	/* output */
5652{
5653	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
5654	struct xfs_bmbt_irec		new;
5655	xfs_filblks_t			blockcount;
5656	int				error, i;
5657	struct xfs_mount		*mp = ip->i_mount;
5658
5659	blockcount = left->br_blockcount + got->br_blockcount;
5660
5661	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5662	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5663	ASSERT(xfs_bmse_can_merge(left, got, shift));
5664
5665	new = *left;
5666	new.br_blockcount = blockcount;
5667
5668	/*
5669	 * Update the on-disk extent count, the btree if necessary and log the
5670	 * inode.
5671	 */
5672	ifp->if_nextents--;
5673	*logflags |= XFS_ILOG_CORE;
5674	if (!cur) {
5675		*logflags |= XFS_ILOG_DEXT;
5676		goto done;
5677	}
5678
5679	/* lookup and remove the extent to merge */
5680	error = xfs_bmbt_lookup_eq(cur, got, &i);
5681	if (error)
5682		return error;
5683	if (XFS_IS_CORRUPT(mp, i != 1))
5684		return -EFSCORRUPTED;
5685
5686	error = xfs_btree_delete(cur, &i);
5687	if (error)
5688		return error;
5689	if (XFS_IS_CORRUPT(mp, i != 1))
5690		return -EFSCORRUPTED;
5691
5692	/* lookup and update size of the previous extent */
5693	error = xfs_bmbt_lookup_eq(cur, left, &i);
5694	if (error)
5695		return error;
5696	if (XFS_IS_CORRUPT(mp, i != 1))
5697		return -EFSCORRUPTED;
5698
5699	error = xfs_bmbt_update(cur, &new);
5700	if (error)
5701		return error;
5702
5703	/* change to extent format if required after extent removal */
5704	error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
5705	if (error)
5706		return error;
5707
5708done:
5709	xfs_iext_remove(ip, icur, 0);
5710	xfs_iext_prev(ifp, icur);
5711	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5712			&new);
5713
5714	/* update reverse mapping. rmap functions merge the rmaps for us */
5715	xfs_rmap_unmap_extent(tp, ip, whichfork, got);
5716	memcpy(&new, got, sizeof(new));
5717	new.br_startoff = left->br_startoff + left->br_blockcount;
5718	xfs_rmap_map_extent(tp, ip, whichfork, &new);
5719	return 0;
5720}
5721
5722static int
5723xfs_bmap_shift_update_extent(
5724	struct xfs_trans	*tp,
5725	struct xfs_inode	*ip,
5726	int			whichfork,
5727	struct xfs_iext_cursor	*icur,
5728	struct xfs_bmbt_irec	*got,
5729	struct xfs_btree_cur	*cur,
5730	int			*logflags,
5731	xfs_fileoff_t		startoff)
5732{
5733	struct xfs_mount	*mp = ip->i_mount;
5734	struct xfs_bmbt_irec	prev = *got;
5735	int			error, i;
5736
5737	*logflags |= XFS_ILOG_CORE;
5738
5739	got->br_startoff = startoff;
5740
5741	if (cur) {
5742		error = xfs_bmbt_lookup_eq(cur, &prev, &i);
5743		if (error)
5744			return error;
5745		if (XFS_IS_CORRUPT(mp, i != 1))
5746			return -EFSCORRUPTED;
5747
5748		error = xfs_bmbt_update(cur, got);
5749		if (error)
5750			return error;
5751	} else {
5752		*logflags |= XFS_ILOG_DEXT;
5753	}
5754
5755	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5756			got);
5757
5758	/* update reverse mapping */
5759	xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
5760	xfs_rmap_map_extent(tp, ip, whichfork, got);
5761	return 0;
5762}
5763
5764int
5765xfs_bmap_collapse_extents(
5766	struct xfs_trans	*tp,
5767	struct xfs_inode	*ip,
5768	xfs_fileoff_t		*next_fsb,
5769	xfs_fileoff_t		offset_shift_fsb,
5770	bool			*done)
5771{
5772	int			whichfork = XFS_DATA_FORK;
5773	struct xfs_mount	*mp = ip->i_mount;
5774	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
5775	struct xfs_btree_cur	*cur = NULL;
5776	struct xfs_bmbt_irec	got, prev;
5777	struct xfs_iext_cursor	icur;
5778	xfs_fileoff_t		new_startoff;
5779	int			error = 0;
5780	int			logflags = 0;
5781
5782	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5783	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5784		return -EFSCORRUPTED;
5785	}
5786
5787	if (XFS_FORCED_SHUTDOWN(mp))
5788		return -EIO;
5789
5790	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5791
5792	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5793		error = xfs_iread_extents(tp, ip, whichfork);
5794		if (error)
5795			return error;
5796	}
5797
5798	if (ifp->if_flags & XFS_IFBROOT) {
5799		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5800		cur->bc_ino.flags = 0;
5801	}
5802
5803	if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5804		*done = true;
5805		goto del_cursor;
5806	}
5807	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5808		error = -EFSCORRUPTED;
5809		goto del_cursor;
5810	}
5811
5812	new_startoff = got.br_startoff - offset_shift_fsb;
5813	if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
5814		if (new_startoff < prev.br_startoff + prev.br_blockcount) {
5815			error = -EINVAL;
5816			goto del_cursor;
5817		}
5818
5819		if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5820			error = xfs_bmse_merge(tp, ip, whichfork,
5821					offset_shift_fsb, &icur, &got, &prev,
5822					cur, &logflags);
5823			if (error)
5824				goto del_cursor;
5825			goto done;
5826		}
5827	} else {
5828		if (got.br_startoff < offset_shift_fsb) {
5829			error = -EINVAL;
5830			goto del_cursor;
5831		}
5832	}
5833
5834	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5835			cur, &logflags, new_startoff);
5836	if (error)
5837		goto del_cursor;
5838
5839done:
5840	if (!xfs_iext_next_extent(ifp, &icur, &got)) {
5841		*done = true;
5842		goto del_cursor;
5843	}
5844
5845	*next_fsb = got.br_startoff;
5846del_cursor:
5847	if (cur)
5848		xfs_btree_del_cursor(cur, error);
5849	if (logflags)
5850		xfs_trans_log_inode(tp, ip, logflags);
5851	return error;
5852}
5853
5854/* Make sure we won't be right-shifting an extent past the maximum bound. */
5855int
5856xfs_bmap_can_insert_extents(
5857	struct xfs_inode	*ip,
5858	xfs_fileoff_t		off,
5859	xfs_fileoff_t		shift)
5860{
5861	struct xfs_bmbt_irec	got;
5862	int			is_empty;
5863	int			error = 0;
5864
5865	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5866
5867	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
5868		return -EIO;
5869
5870	xfs_ilock(ip, XFS_ILOCK_EXCL);
5871	error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
5872	if (!error && !is_empty && got.br_startoff >= off &&
5873	    ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
5874		error = -EINVAL;
5875	xfs_iunlock(ip, XFS_ILOCK_EXCL);
5876
5877	return error;
5878}
5879
5880int
5881xfs_bmap_insert_extents(
5882	struct xfs_trans	*tp,
5883	struct xfs_inode	*ip,
5884	xfs_fileoff_t		*next_fsb,
5885	xfs_fileoff_t		offset_shift_fsb,
5886	bool			*done,
5887	xfs_fileoff_t		stop_fsb)
5888{
5889	int			whichfork = XFS_DATA_FORK;
5890	struct xfs_mount	*mp = ip->i_mount;
5891	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
5892	struct xfs_btree_cur	*cur = NULL;
5893	struct xfs_bmbt_irec	got, next;
5894	struct xfs_iext_cursor	icur;
5895	xfs_fileoff_t		new_startoff;
5896	int			error = 0;
5897	int			logflags = 0;
5898
5899	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5900	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5901		return -EFSCORRUPTED;
5902	}
5903
5904	if (XFS_FORCED_SHUTDOWN(mp))
5905		return -EIO;
5906
5907	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5908
5909	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5910		error = xfs_iread_extents(tp, ip, whichfork);
5911		if (error)
5912			return error;
5913	}
5914
5915	if (ifp->if_flags & XFS_IFBROOT) {
5916		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5917		cur->bc_ino.flags = 0;
5918	}
5919
5920	if (*next_fsb == NULLFSBLOCK) {
5921		xfs_iext_last(ifp, &icur);
5922		if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5923		    stop_fsb > got.br_startoff) {
5924			*done = true;
5925			goto del_cursor;
5926		}
5927	} else {
5928		if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5929			*done = true;
5930			goto del_cursor;
5931		}
5932	}
5933	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5934		error = -EFSCORRUPTED;
5935		goto del_cursor;
5936	}
5937
5938	if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
5939		error = -EFSCORRUPTED;
5940		goto del_cursor;
5941	}
5942
5943	new_startoff = got.br_startoff + offset_shift_fsb;
5944	if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
5945		if (new_startoff + got.br_blockcount > next.br_startoff) {
5946			error = -EINVAL;
5947			goto del_cursor;
5948		}
5949
5950		/*
5951		 * Unlike a left shift (which involves a hole punch), a right
5952		 * shift does not modify extent neighbors in any way.  We should
5953		 * never find mergeable extents in this scenario.  Check anyways
5954		 * and warn if we encounter two extents that could be one.
5955		 */
5956		if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
5957			WARN_ON_ONCE(1);
5958	}
5959
5960	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5961			cur, &logflags, new_startoff);
5962	if (error)
5963		goto del_cursor;
5964
5965	if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
5966	    stop_fsb >= got.br_startoff + got.br_blockcount) {
5967		*done = true;
5968		goto del_cursor;
5969	}
5970
5971	*next_fsb = got.br_startoff;
5972del_cursor:
5973	if (cur)
5974		xfs_btree_del_cursor(cur, error);
5975	if (logflags)
5976		xfs_trans_log_inode(tp, ip, logflags);
5977	return error;
5978}
5979
5980/*
5981 * Splits an extent into two extents at split_fsb block such that it is the
5982 * first block of the current_ext. @ext is a target extent to be split.
5983 * @split_fsb is a block where the extents is split.  If split_fsb lies in a
5984 * hole or the first block of extents, just return 0.
5985 */
5986int
5987xfs_bmap_split_extent(
5988	struct xfs_trans	*tp,
5989	struct xfs_inode	*ip,
5990	xfs_fileoff_t		split_fsb)
5991{
5992	int				whichfork = XFS_DATA_FORK;
5993	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
5994	struct xfs_btree_cur		*cur = NULL;
5995	struct xfs_bmbt_irec		got;
5996	struct xfs_bmbt_irec		new; /* split extent */
5997	struct xfs_mount		*mp = ip->i_mount;
5998	xfs_fsblock_t			gotblkcnt; /* new block count for got */
5999	struct xfs_iext_cursor		icur;
6000	int				error = 0;
6001	int				logflags = 0;
6002	int				i = 0;
6003
6004	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
6005	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
6006		return -EFSCORRUPTED;
6007	}
6008
6009	if (XFS_FORCED_SHUTDOWN(mp))
6010		return -EIO;
6011
6012	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
6013		/* Read in all the extents */
6014		error = xfs_iread_extents(tp, ip, whichfork);
6015		if (error)
6016			return error;
6017	}
6018
6019	/*
6020	 * If there are not extents, or split_fsb lies in a hole we are done.
6021	 */
6022	if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
6023	    got.br_startoff >= split_fsb)
6024		return 0;
6025
6026	gotblkcnt = split_fsb - got.br_startoff;
6027	new.br_startoff = split_fsb;
6028	new.br_startblock = got.br_startblock + gotblkcnt;
6029	new.br_blockcount = got.br_blockcount - gotblkcnt;
6030	new.br_state = got.br_state;
6031
6032	if (ifp->if_flags & XFS_IFBROOT) {
6033		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
6034		cur->bc_ino.flags = 0;
6035		error = xfs_bmbt_lookup_eq(cur, &got, &i);
6036		if (error)
6037			goto del_cursor;
6038		if (XFS_IS_CORRUPT(mp, i != 1)) {
6039			error = -EFSCORRUPTED;
6040			goto del_cursor;
6041		}
6042	}
6043
6044	got.br_blockcount = gotblkcnt;
6045	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
6046			&got);
6047
6048	logflags = XFS_ILOG_CORE;
6049	if (cur) {
6050		error = xfs_bmbt_update(cur, &got);
6051		if (error)
6052			goto del_cursor;
6053	} else
6054		logflags |= XFS_ILOG_DEXT;
6055
6056	/* Add new extent */
6057	xfs_iext_next(ifp, &icur);
6058	xfs_iext_insert(ip, &icur, &new, 0);
6059	ifp->if_nextents++;
6060
6061	if (cur) {
6062		error = xfs_bmbt_lookup_eq(cur, &new, &i);
6063		if (error)
6064			goto del_cursor;
6065		if (XFS_IS_CORRUPT(mp, i != 0)) {
6066			error = -EFSCORRUPTED;
6067			goto del_cursor;
6068		}
6069		error = xfs_btree_insert(cur, &i);
6070		if (error)
6071			goto del_cursor;
6072		if (XFS_IS_CORRUPT(mp, i != 1)) {
6073			error = -EFSCORRUPTED;
6074			goto del_cursor;
6075		}
6076	}
6077
6078	/*
6079	 * Convert to a btree if necessary.
6080	 */
6081	if (xfs_bmap_needs_btree(ip, whichfork)) {
6082		int tmp_logflags; /* partial log flag return val */
6083
6084		ASSERT(cur == NULL);
6085		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
6086				&tmp_logflags, whichfork);
6087		logflags |= tmp_logflags;
6088	}
6089
6090del_cursor:
6091	if (cur) {
6092		cur->bc_ino.allocated = 0;
6093		xfs_btree_del_cursor(cur, error);
6094	}
6095
6096	if (logflags)
6097		xfs_trans_log_inode(tp, ip, logflags);
6098	return error;
6099}
6100
6101/* Deferred mapping is only for real extents in the data fork. */
6102static bool
6103xfs_bmap_is_update_needed(
6104	struct xfs_bmbt_irec	*bmap)
6105{
6106	return  bmap->br_startblock != HOLESTARTBLOCK &&
6107		bmap->br_startblock != DELAYSTARTBLOCK;
6108}
6109
6110/* Record a bmap intent. */
6111static int
6112__xfs_bmap_add(
6113	struct xfs_trans		*tp,
6114	enum xfs_bmap_intent_type	type,
6115	struct xfs_inode		*ip,
6116	int				whichfork,
6117	struct xfs_bmbt_irec		*bmap)
6118{
6119	struct xfs_bmap_intent		*bi;
6120
6121	trace_xfs_bmap_defer(tp->t_mountp,
6122			XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
6123			type,
6124			XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
6125			ip->i_ino, whichfork,
6126			bmap->br_startoff,
6127			bmap->br_blockcount,
6128			bmap->br_state);
6129
6130	bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS);
6131	INIT_LIST_HEAD(&bi->bi_list);
6132	bi->bi_type = type;
6133	bi->bi_owner = ip;
6134	bi->bi_whichfork = whichfork;
6135	bi->bi_bmap = *bmap;
6136
6137	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
6138	return 0;
6139}
6140
6141/* Map an extent into a file. */
6142void
6143xfs_bmap_map_extent(
6144	struct xfs_trans	*tp,
6145	struct xfs_inode	*ip,
6146	struct xfs_bmbt_irec	*PREV)
6147{
6148	if (!xfs_bmap_is_update_needed(PREV))
6149		return;
6150
6151	__xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
6152}
6153
6154/* Unmap an extent out of a file. */
6155void
6156xfs_bmap_unmap_extent(
6157	struct xfs_trans	*tp,
6158	struct xfs_inode	*ip,
6159	struct xfs_bmbt_irec	*PREV)
6160{
6161	if (!xfs_bmap_is_update_needed(PREV))
6162		return;
6163
6164	__xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
6165}
6166
6167/*
6168 * Process one of the deferred bmap operations.  We pass back the
6169 * btree cursor to maintain our lock on the bmapbt between calls.
6170 */
6171int
6172xfs_bmap_finish_one(
6173	struct xfs_trans		*tp,
6174	struct xfs_inode		*ip,
6175	enum xfs_bmap_intent_type	type,
6176	int				whichfork,
6177	xfs_fileoff_t			startoff,
6178	xfs_fsblock_t			startblock,
6179	xfs_filblks_t			*blockcount,
6180	xfs_exntst_t			state)
6181{
6182	int				error = 0;
6183
6184	ASSERT(tp->t_firstblock == NULLFSBLOCK);
6185
6186	trace_xfs_bmap_deferred(tp->t_mountp,
6187			XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
6188			XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
6189			ip->i_ino, whichfork, startoff, *blockcount, state);
6190
6191	if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
6192		return -EFSCORRUPTED;
6193
6194	if (XFS_TEST_ERROR(false, tp->t_mountp,
6195			XFS_ERRTAG_BMAP_FINISH_ONE))
6196		return -EIO;
6197
6198	switch (type) {
6199	case XFS_BMAP_MAP:
6200		error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
6201				startblock, 0);
6202		*blockcount = 0;
6203		break;
6204	case XFS_BMAP_UNMAP:
6205		error = __xfs_bunmapi(tp, ip, startoff, blockcount,
6206				XFS_BMAPI_REMAP, 1);
6207		break;
6208	default:
6209		ASSERT(0);
6210		error = -EFSCORRUPTED;
6211	}
6212
6213	return error;
6214}
6215
6216/* Check that an inode's extent does not have invalid flags or bad ranges. */
6217xfs_failaddr_t
6218xfs_bmap_validate_extent(
6219	struct xfs_inode	*ip,
6220	int			whichfork,
6221	struct xfs_bmbt_irec	*irec)
6222{
6223	struct xfs_mount	*mp = ip->i_mount;
6224	xfs_fsblock_t		endfsb;
6225	bool			isrt;
6226
6227	isrt = XFS_IS_REALTIME_INODE(ip);
6228	endfsb = irec->br_startblock + irec->br_blockcount - 1;
6229	if (isrt && whichfork == XFS_DATA_FORK) {
6230		if (!xfs_verify_rtbno(mp, irec->br_startblock))
6231			return __this_address;
6232		if (!xfs_verify_rtbno(mp, endfsb))
6233			return __this_address;
6234	} else {
6235		if (!xfs_verify_fsbno(mp, irec->br_startblock))
6236			return __this_address;
6237		if (!xfs_verify_fsbno(mp, endfsb))
6238			return __this_address;
6239		if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
6240		    XFS_FSB_TO_AGNO(mp, endfsb))
6241			return __this_address;
6242	}
6243	if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
6244		return __this_address;
6245	return NULL;
6246}