Linux Audio

Check our new training course

Loading...
v6.8
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_bit.h"
  13#include "xfs_sb.h"
  14#include "xfs_mount.h"
  15#include "xfs_defer.h"
  16#include "xfs_dir2.h"
  17#include "xfs_inode.h"
  18#include "xfs_btree.h"
  19#include "xfs_trans.h"
  20#include "xfs_alloc.h"
  21#include "xfs_bmap.h"
  22#include "xfs_bmap_util.h"
  23#include "xfs_bmap_btree.h"
  24#include "xfs_rtbitmap.h"
  25#include "xfs_errortag.h"
  26#include "xfs_error.h"
  27#include "xfs_quota.h"
  28#include "xfs_trans_space.h"
  29#include "xfs_buf_item.h"
  30#include "xfs_trace.h"
  31#include "xfs_attr_leaf.h"
  32#include "xfs_filestream.h"
  33#include "xfs_rmap.h"
  34#include "xfs_ag.h"
  35#include "xfs_ag_resv.h"
  36#include "xfs_refcount.h"
  37#include "xfs_icache.h"
  38#include "xfs_iomap.h"
  39
  40struct kmem_cache		*xfs_bmap_intent_cache;
 
  41
  42/*
  43 * Miscellaneous helper functions
  44 */
  45
  46/*
  47 * Compute and fill in the value of the maximum depth of a bmap btree
  48 * in this filesystem.  Done once, during mount.
  49 */
  50void
  51xfs_bmap_compute_maxlevels(
  52	xfs_mount_t	*mp,		/* file system mount structure */
  53	int		whichfork)	/* data or attr fork */
  54{
  55	uint64_t	maxblocks;	/* max blocks at this level */
  56	xfs_extnum_t	maxleafents;	/* max leaf entries possible */
  57	int		level;		/* btree level */
 
 
  58	int		maxrootrecs;	/* max records in root block */
  59	int		minleafrecs;	/* min records in leaf block */
  60	int		minnoderecs;	/* min records in node block */
  61	int		sz;		/* root block size */
  62
  63	/*
  64	 * The maximum number of extents in a fork, hence the maximum number of
  65	 * leaf entries, is controlled by the size of the on-disk extent count.
 
 
  66	 *
  67	 * Note that we can no longer assume that if we are in ATTR1 that the
  68	 * fork offset of all the inodes will be
  69	 * (xfs_default_attroffset(ip) >> 3) because we could have mounted with
  70	 * ATTR2 and then mounted back with ATTR1, keeping the i_forkoff's fixed
  71	 * but probably at various positions. Therefore, for both ATTR1 and
  72	 * ATTR2 we have to assume the worst case scenario of a minimum size
  73	 * available.
  74	 */
  75	maxleafents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
  76				whichfork);
  77	if (whichfork == XFS_DATA_FORK)
  78		sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
  79	else
 
  80		sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
  81
  82	maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
  83	minleafrecs = mp->m_bmap_dmnr[0];
  84	minnoderecs = mp->m_bmap_dmnr[1];
  85	maxblocks = howmany_64(maxleafents, minleafrecs);
  86	for (level = 1; maxblocks > 1; level++) {
  87		if (maxblocks <= maxrootrecs)
  88			maxblocks = 1;
  89		else
  90			maxblocks = howmany_64(maxblocks, minnoderecs);
  91	}
  92	mp->m_bm_maxlevels[whichfork] = level;
  93	ASSERT(mp->m_bm_maxlevels[whichfork] <= xfs_bmbt_maxlevels_ondisk());
  94}
  95
  96unsigned int
  97xfs_bmap_compute_attr_offset(
  98	struct xfs_mount	*mp)
  99{
 100	if (mp->m_sb.sb_inodesize == 256)
 101		return XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
 102	return XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
 103}
 104
 105STATIC int				/* error */
 106xfs_bmbt_lookup_eq(
 107	struct xfs_btree_cur	*cur,
 108	struct xfs_bmbt_irec	*irec,
 109	int			*stat)	/* success/failure */
 110{
 111	cur->bc_rec.b = *irec;
 112	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 113}
 114
 115STATIC int				/* error */
 116xfs_bmbt_lookup_first(
 117	struct xfs_btree_cur	*cur,
 118	int			*stat)	/* success/failure */
 119{
 120	cur->bc_rec.b.br_startoff = 0;
 121	cur->bc_rec.b.br_startblock = 0;
 122	cur->bc_rec.b.br_blockcount = 0;
 123	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 124}
 125
 126/*
 127 * Check if the inode needs to be converted to btree format.
 128 */
 129static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
 130{
 131	struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
 132
 133	return whichfork != XFS_COW_FORK &&
 134		ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
 135		ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
 136}
 137
 138/*
 139 * Check if the inode should be converted to extent format.
 140 */
 141static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
 142{
 143	struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
 144
 145	return whichfork != XFS_COW_FORK &&
 146		ifp->if_format == XFS_DINODE_FMT_BTREE &&
 147		ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
 148}
 149
 150/*
 151 * Update the record referred to by cur to the value given by irec
 152 * This either works (return 0) or gets an EFSCORRUPTED error.
 153 */
 154STATIC int
 155xfs_bmbt_update(
 156	struct xfs_btree_cur	*cur,
 157	struct xfs_bmbt_irec	*irec)
 158{
 159	union xfs_btree_rec	rec;
 160
 161	xfs_bmbt_disk_set_all(&rec.bmbt, irec);
 162	return xfs_btree_update(cur, &rec);
 163}
 164
 165/*
 166 * Compute the worst-case number of indirect blocks that will be used
 167 * for ip's delayed extent of length "len".
 168 */
 169STATIC xfs_filblks_t
 170xfs_bmap_worst_indlen(
 171	xfs_inode_t	*ip,		/* incore inode pointer */
 172	xfs_filblks_t	len)		/* delayed extent length */
 173{
 174	int		level;		/* btree level number */
 175	int		maxrecs;	/* maximum record count at this level */
 176	xfs_mount_t	*mp;		/* mount structure */
 177	xfs_filblks_t	rval;		/* return value */
 178
 179	mp = ip->i_mount;
 180	maxrecs = mp->m_bmap_dmxr[0];
 181	for (level = 0, rval = 0;
 182	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
 183	     level++) {
 184		len += maxrecs - 1;
 185		do_div(len, maxrecs);
 186		rval += len;
 187		if (len == 1)
 188			return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
 189				level - 1;
 190		if (level == 0)
 191			maxrecs = mp->m_bmap_dmxr[1];
 192	}
 193	return rval;
 194}
 195
 196/*
 197 * Calculate the default attribute fork offset for newly created inodes.
 198 */
 199uint
 200xfs_default_attroffset(
 201	struct xfs_inode	*ip)
 202{
 203	if (ip->i_df.if_format == XFS_DINODE_FMT_DEV)
 204		return roundup(sizeof(xfs_dev_t), 8);
 205	return M_IGEO(ip->i_mount)->attr_fork_offset;
 
 
 
 
 
 
 
 206}
 207
 208/*
 209 * Helper routine to reset inode i_forkoff field when switching attribute fork
 210 * from local to extent format - we reset it where possible to make space
 211 * available for inline data fork extents.
 212 */
 213STATIC void
 214xfs_bmap_forkoff_reset(
 215	xfs_inode_t	*ip,
 216	int		whichfork)
 217{
 218	if (whichfork == XFS_ATTR_FORK &&
 219	    ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
 220	    ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
 221		uint	dfl_forkoff = xfs_default_attroffset(ip) >> 3;
 222
 223		if (dfl_forkoff > ip->i_forkoff)
 224			ip->i_forkoff = dfl_forkoff;
 225	}
 226}
 227
 228#ifdef DEBUG
 229STATIC struct xfs_buf *
 230xfs_bmap_get_bp(
 231	struct xfs_btree_cur	*cur,
 232	xfs_fsblock_t		bno)
 233{
 234	struct xfs_log_item	*lip;
 235	int			i;
 236
 237	if (!cur)
 238		return NULL;
 239
 240	for (i = 0; i < cur->bc_maxlevels; i++) {
 241		if (!cur->bc_levels[i].bp)
 242			break;
 243		if (xfs_buf_daddr(cur->bc_levels[i].bp) == bno)
 244			return cur->bc_levels[i].bp;
 245	}
 246
 247	/* Chase down all the log items to see if the bp is there */
 248	list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
 249		struct xfs_buf_log_item	*bip = (struct xfs_buf_log_item *)lip;
 250
 251		if (bip->bli_item.li_type == XFS_LI_BUF &&
 252		    xfs_buf_daddr(bip->bli_buf) == bno)
 253			return bip->bli_buf;
 254	}
 255
 256	return NULL;
 257}
 258
 259STATIC void
 260xfs_check_block(
 261	struct xfs_btree_block	*block,
 262	xfs_mount_t		*mp,
 263	int			root,
 264	short			sz)
 265{
 266	int			i, j, dmxr;
 267	__be64			*pp, *thispa;	/* pointer to block address */
 268	xfs_bmbt_key_t		*prevp, *keyp;
 269
 270	ASSERT(be16_to_cpu(block->bb_level) > 0);
 271
 272	prevp = NULL;
 273	for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
 274		dmxr = mp->m_bmap_dmxr[0];
 275		keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
 276
 277		if (prevp) {
 278			ASSERT(be64_to_cpu(prevp->br_startoff) <
 279			       be64_to_cpu(keyp->br_startoff));
 280		}
 281		prevp = keyp;
 282
 283		/*
 284		 * Compare the block numbers to see if there are dups.
 285		 */
 286		if (root)
 287			pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
 288		else
 289			pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
 290
 291		for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
 292			if (root)
 293				thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
 294			else
 295				thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
 296			if (*thispa == *pp) {
 297				xfs_warn(mp, "%s: thispa(%d) == pp(%d) %lld",
 298					__func__, j, i,
 299					(unsigned long long)be64_to_cpu(*thispa));
 300				xfs_err(mp, "%s: ptrs are equal in node\n",
 301					__func__);
 302				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 303			}
 304		}
 305	}
 306}
 307
 308/*
 309 * Check that the extents for the inode ip are in the right order in all
 310 * btree leaves. THis becomes prohibitively expensive for large extent count
 311 * files, so don't bother with inodes that have more than 10,000 extents in
 312 * them. The btree record ordering checks will still be done, so for such large
 313 * bmapbt constructs that is going to catch most corruptions.
 314 */
 315STATIC void
 316xfs_bmap_check_leaf_extents(
 317	struct xfs_btree_cur	*cur,	/* btree cursor or null */
 318	xfs_inode_t		*ip,		/* incore inode pointer */
 319	int			whichfork)	/* data or attr fork */
 320{
 321	struct xfs_mount	*mp = ip->i_mount;
 322	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
 323	struct xfs_btree_block	*block;	/* current btree block */
 324	xfs_fsblock_t		bno;	/* block # of "block" */
 325	struct xfs_buf		*bp;	/* buffer for "block" */
 326	int			error;	/* error return value */
 327	xfs_extnum_t		i=0, j;	/* index into the extents list */
 328	int			level;	/* btree level, for checking */
 329	__be64			*pp;	/* pointer to block address */
 330	xfs_bmbt_rec_t		*ep;	/* pointer to current extent */
 331	xfs_bmbt_rec_t		last = {0, 0}; /* last extent in prev block */
 332	xfs_bmbt_rec_t		*nextp;	/* pointer to next extent */
 333	int			bp_release = 0;
 334
 335	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
 336		return;
 337
 338	/* skip large extent count inodes */
 339	if (ip->i_df.if_nextents > 10000)
 340		return;
 341
 342	bno = NULLFSBLOCK;
 343	block = ifp->if_broot;
 344	/*
 345	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
 346	 */
 347	level = be16_to_cpu(block->bb_level);
 348	ASSERT(level > 0);
 349	xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
 350	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
 351	bno = be64_to_cpu(*pp);
 352
 353	ASSERT(bno != NULLFSBLOCK);
 354	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
 355	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
 356
 357	/*
 358	 * Go down the tree until leaf level is reached, following the first
 359	 * pointer (leftmost) at each level.
 360	 */
 361	while (level-- > 0) {
 362		/* See if buf is in cur first */
 363		bp_release = 0;
 364		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 365		if (!bp) {
 366			bp_release = 1;
 367			error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
 368						XFS_BMAP_BTREE_REF,
 369						&xfs_bmbt_buf_ops);
 370			if (error)
 371				goto error_norelse;
 372		}
 373		block = XFS_BUF_TO_BLOCK(bp);
 374		if (level == 0)
 375			break;
 376
 377		/*
 378		 * Check this block for basic sanity (increasing keys and
 379		 * no duplicate blocks).
 380		 */
 381
 382		xfs_check_block(block, mp, 0, 0);
 383		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
 384		bno = be64_to_cpu(*pp);
 385		if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
 386			error = -EFSCORRUPTED;
 387			goto error0;
 388		}
 389		if (bp_release) {
 390			bp_release = 0;
 391			xfs_trans_brelse(NULL, bp);
 392		}
 393	}
 394
 395	/*
 396	 * Here with bp and block set to the leftmost leaf node in the tree.
 397	 */
 398	i = 0;
 399
 400	/*
 401	 * Loop over all leaf nodes checking that all extents are in the right order.
 402	 */
 403	for (;;) {
 404		xfs_fsblock_t	nextbno;
 405		xfs_extnum_t	num_recs;
 406
 407
 408		num_recs = xfs_btree_get_numrecs(block);
 409
 410		/*
 411		 * Read-ahead the next leaf block, if any.
 412		 */
 413
 414		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
 415
 416		/*
 417		 * Check all the extents to make sure they are OK.
 418		 * If we had a previous block, the last entry should
 419		 * conform with the first entry in this one.
 420		 */
 421
 422		ep = XFS_BMBT_REC_ADDR(mp, block, 1);
 423		if (i) {
 424			ASSERT(xfs_bmbt_disk_get_startoff(&last) +
 425			       xfs_bmbt_disk_get_blockcount(&last) <=
 426			       xfs_bmbt_disk_get_startoff(ep));
 427		}
 428		for (j = 1; j < num_recs; j++) {
 429			nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
 430			ASSERT(xfs_bmbt_disk_get_startoff(ep) +
 431			       xfs_bmbt_disk_get_blockcount(ep) <=
 432			       xfs_bmbt_disk_get_startoff(nextp));
 433			ep = nextp;
 434		}
 435
 436		last = *ep;
 437		i += num_recs;
 438		if (bp_release) {
 439			bp_release = 0;
 440			xfs_trans_brelse(NULL, bp);
 441		}
 442		bno = nextbno;
 443		/*
 444		 * If we've reached the end, stop.
 445		 */
 446		if (bno == NULLFSBLOCK)
 447			break;
 448
 449		bp_release = 0;
 450		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 451		if (!bp) {
 452			bp_release = 1;
 453			error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
 454						XFS_BMAP_BTREE_REF,
 455						&xfs_bmbt_buf_ops);
 456			if (error)
 457				goto error_norelse;
 458		}
 459		block = XFS_BUF_TO_BLOCK(bp);
 460	}
 461
 462	return;
 463
 464error0:
 465	xfs_warn(mp, "%s: at error0", __func__);
 466	if (bp_release)
 467		xfs_trans_brelse(NULL, bp);
 468error_norelse:
 469	xfs_warn(mp, "%s: BAD after btree leaves for %llu extents",
 470		__func__, i);
 471	xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
 472	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 473	return;
 474}
 475
 476/*
 477 * Validate that the bmbt_irecs being returned from bmapi are valid
 478 * given the caller's original parameters.  Specifically check the
 479 * ranges of the returned irecs to ensure that they only extend beyond
 480 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
 481 */
 482STATIC void
 483xfs_bmap_validate_ret(
 484	xfs_fileoff_t		bno,
 485	xfs_filblks_t		len,
 486	uint32_t		flags,
 487	xfs_bmbt_irec_t		*mval,
 488	int			nmap,
 489	int			ret_nmap)
 490{
 491	int			i;		/* index to map values */
 492
 493	ASSERT(ret_nmap <= nmap);
 494
 495	for (i = 0; i < ret_nmap; i++) {
 496		ASSERT(mval[i].br_blockcount > 0);
 497		if (!(flags & XFS_BMAPI_ENTIRE)) {
 498			ASSERT(mval[i].br_startoff >= bno);
 499			ASSERT(mval[i].br_blockcount <= len);
 500			ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
 501			       bno + len);
 502		} else {
 503			ASSERT(mval[i].br_startoff < bno + len);
 504			ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
 505			       bno);
 506		}
 507		ASSERT(i == 0 ||
 508		       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
 509		       mval[i].br_startoff);
 510		ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
 511		       mval[i].br_startblock != HOLESTARTBLOCK);
 512		ASSERT(mval[i].br_state == XFS_EXT_NORM ||
 513		       mval[i].br_state == XFS_EXT_UNWRITTEN);
 514	}
 515}
 516
 517#else
 518#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)		do { } while (0)
 519#define	xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)	do { } while (0)
 520#endif /* DEBUG */
 521
 522/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 523 * Inode fork format manipulation functions
 524 */
 525
 526/*
 527 * Convert the inode format to extent format if it currently is in btree format,
 528 * but the extent list is small enough that it fits into the extent format.
 529 *
 530 * Since the extents are already in-core, all we have to do is give up the space
 531 * for the btree root and pitch the leaf block.
 532 */
 533STATIC int				/* error */
 534xfs_bmap_btree_to_extents(
 535	struct xfs_trans	*tp,	/* transaction pointer */
 536	struct xfs_inode	*ip,	/* incore inode pointer */
 537	struct xfs_btree_cur	*cur,	/* btree cursor */
 538	int			*logflagsp, /* inode logging flags */
 539	int			whichfork)  /* data or attr fork */
 540{
 541	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
 542	struct xfs_mount	*mp = ip->i_mount;
 543	struct xfs_btree_block	*rblock = ifp->if_broot;
 544	struct xfs_btree_block	*cblock;/* child btree block */
 545	xfs_fsblock_t		cbno;	/* child block number */
 546	struct xfs_buf		*cbp;	/* child block's buffer */
 547	int			error;	/* error return value */
 548	__be64			*pp;	/* ptr to block address */
 549	struct xfs_owner_info	oinfo;
 550
 551	/* check if we actually need the extent format first: */
 552	if (!xfs_bmap_wants_extents(ip, whichfork))
 553		return 0;
 554
 555	ASSERT(cur);
 556	ASSERT(whichfork != XFS_COW_FORK);
 
 557	ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
 558	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
 559	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
 560	ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
 561
 562	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
 563	cbno = be64_to_cpu(*pp);
 564#ifdef DEBUG
 565	if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1)))
 566		return -EFSCORRUPTED;
 567#endif
 568	error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
 569				&xfs_bmbt_buf_ops);
 570	if (error)
 571		return error;
 572	cblock = XFS_BUF_TO_BLOCK(cbp);
 573	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 574		return error;
 575
 576	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
 577	error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
 578			XFS_AG_RESV_NONE, false);
 579	if (error)
 580		return error;
 581
 582	ip->i_nblocks--;
 583	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 584	xfs_trans_binval(tp, cbp);
 585	if (cur->bc_levels[0].bp == cbp)
 586		cur->bc_levels[0].bp = NULL;
 587	xfs_iroot_realloc(ip, -1, whichfork);
 588	ASSERT(ifp->if_broot == NULL);
 
 589	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 590	*logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 591	return 0;
 592}
 593
 594/*
 595 * Convert an extents-format file into a btree-format file.
 596 * The new file will have a root block (in the inode) and a single child block.
 597 */
 598STATIC int					/* error */
 599xfs_bmap_extents_to_btree(
 600	struct xfs_trans	*tp,		/* transaction pointer */
 601	struct xfs_inode	*ip,		/* incore inode pointer */
 602	struct xfs_btree_cur	**curp,		/* cursor returned to caller */
 603	int			wasdel,		/* converting a delayed alloc */
 604	int			*logflagsp,	/* inode logging flags */
 605	int			whichfork)	/* data or attr fork */
 606{
 607	struct xfs_btree_block	*ablock;	/* allocated (child) bt block */
 608	struct xfs_buf		*abp;		/* buffer for ablock */
 609	struct xfs_alloc_arg	args;		/* allocation arguments */
 610	struct xfs_bmbt_rec	*arp;		/* child record pointer */
 611	struct xfs_btree_block	*block;		/* btree root block */
 612	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
 613	int			error;		/* error return value */
 614	struct xfs_ifork	*ifp;		/* inode fork pointer */
 615	struct xfs_bmbt_key	*kp;		/* root block key pointer */
 616	struct xfs_mount	*mp;		/* mount structure */
 617	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
 618	struct xfs_iext_cursor	icur;
 619	struct xfs_bmbt_irec	rec;
 620	xfs_extnum_t		cnt = 0;
 621
 622	mp = ip->i_mount;
 623	ASSERT(whichfork != XFS_COW_FORK);
 624	ifp = xfs_ifork_ptr(ip, whichfork);
 625	ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
 626
 627	/*
 628	 * Make space in the inode incore. This needs to be undone if we fail
 629	 * to expand the root.
 630	 */
 631	xfs_iroot_realloc(ip, 1, whichfork);
 
 632
 633	/*
 634	 * Fill in the root.
 635	 */
 636	block = ifp->if_broot;
 637	xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
 638				 XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
 639				 XFS_BTREE_LONG_PTRS);
 640	/*
 641	 * Need a cursor.  Can't allocate until bb_level is filled in.
 642	 */
 643	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 644	cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
 645	/*
 646	 * Convert to a btree with two levels, one record in root.
 647	 */
 648	ifp->if_format = XFS_DINODE_FMT_BTREE;
 649	memset(&args, 0, sizeof(args));
 650	args.tp = tp;
 651	args.mp = mp;
 652	xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
 653
 
 
 
 
 
 
 
 
 
 654	args.minlen = args.maxlen = args.prod = 1;
 655	args.wasdel = wasdel;
 656	*logflagsp = 0;
 657	error = xfs_alloc_vextent_start_ag(&args,
 658				XFS_INO_TO_FSB(mp, ip->i_ino));
 659	if (error)
 660		goto out_root_realloc;
 661
 662	/*
 663	 * Allocation can't fail, the space was reserved.
 664	 */
 665	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
 666		error = -ENOSPC;
 667		goto out_root_realloc;
 668	}
 669
 
 
 
 
 
 
 670	cur->bc_ino.allocated++;
 671	ip->i_nblocks++;
 672	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 673	error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
 674			XFS_FSB_TO_DADDR(mp, args.fsbno),
 675			mp->m_bsize, 0, &abp);
 676	if (error)
 677		goto out_unreserve_dquot;
 678
 679	/*
 680	 * Fill in the child block.
 681	 */
 682	abp->b_ops = &xfs_bmbt_buf_ops;
 683	ablock = XFS_BUF_TO_BLOCK(abp);
 684	xfs_btree_init_block_int(mp, ablock, xfs_buf_daddr(abp),
 685				XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
 686				XFS_BTREE_LONG_PTRS);
 687
 688	for_each_xfs_iext(ifp, &icur, &rec) {
 689		if (isnullstartblock(rec.br_startblock))
 690			continue;
 691		arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
 692		xfs_bmbt_disk_set_all(arp, &rec);
 693		cnt++;
 694	}
 695	ASSERT(cnt == ifp->if_nextents);
 696	xfs_btree_set_numrecs(ablock, cnt);
 697
 698	/*
 699	 * Fill in the root key and pointer.
 700	 */
 701	kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
 702	arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
 703	kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
 704	pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
 705						be16_to_cpu(block->bb_level)));
 706	*pp = cpu_to_be64(args.fsbno);
 707
 708	/*
 709	 * Do all this logging at the end so that
 710	 * the root is at the right level.
 711	 */
 712	xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
 713	xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
 714	ASSERT(*curp == NULL);
 715	*curp = cur;
 716	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
 717	return 0;
 718
 719out_unreserve_dquot:
 720	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 721out_root_realloc:
 722	xfs_iroot_realloc(ip, -1, whichfork);
 723	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 724	ASSERT(ifp->if_broot == NULL);
 725	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 726
 727	return error;
 728}
 729
 730/*
 731 * Convert a local file to an extents file.
 732 * This code is out of bounds for data forks of regular files,
 733 * since the file data needs to get logged so things will stay consistent.
 734 * (The bmap-level manipulations are ok, though).
 735 */
 736void
 737xfs_bmap_local_to_extents_empty(
 738	struct xfs_trans	*tp,
 739	struct xfs_inode	*ip,
 740	int			whichfork)
 741{
 742	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
 743
 744	ASSERT(whichfork != XFS_COW_FORK);
 745	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 746	ASSERT(ifp->if_bytes == 0);
 747	ASSERT(ifp->if_nextents == 0);
 748
 749	xfs_bmap_forkoff_reset(ip, whichfork);
 750	ifp->if_data = NULL;
 
 
 751	ifp->if_height = 0;
 752	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 753	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 754}
 755
 756
 757STATIC int				/* error */
 758xfs_bmap_local_to_extents(
 759	xfs_trans_t	*tp,		/* transaction pointer */
 760	xfs_inode_t	*ip,		/* incore inode pointer */
 761	xfs_extlen_t	total,		/* total blocks needed by transaction */
 762	int		*logflagsp,	/* inode logging flags */
 763	int		whichfork,
 764	void		(*init_fn)(struct xfs_trans *tp,
 765				   struct xfs_buf *bp,
 766				   struct xfs_inode *ip,
 767				   struct xfs_ifork *ifp))
 768{
 769	int		error = 0;
 770	int		flags;		/* logging flags returned */
 771	struct xfs_ifork *ifp;		/* inode fork pointer */
 772	xfs_alloc_arg_t	args;		/* allocation arguments */
 773	struct xfs_buf	*bp;		/* buffer for extent block */
 774	struct xfs_bmbt_irec rec;
 775	struct xfs_iext_cursor icur;
 776
 777	/*
 778	 * We don't want to deal with the case of keeping inode data inline yet.
 779	 * So sending the data fork of a regular inode is invalid.
 780	 */
 781	ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
 782	ifp = xfs_ifork_ptr(ip, whichfork);
 783	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 784
 785	if (!ifp->if_bytes) {
 786		xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 787		flags = XFS_ILOG_CORE;
 788		goto done;
 789	}
 790
 791	flags = 0;
 792	error = 0;
 
 793	memset(&args, 0, sizeof(args));
 794	args.tp = tp;
 795	args.mp = ip->i_mount;
 796	args.total = total;
 797	args.minlen = args.maxlen = args.prod = 1;
 798	xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
 799
 800	/*
 801	 * Allocate a block.  We know we need only one, since the
 802	 * file currently fits in an inode.
 803	 */
 
 
 
 
 
 
 
 804	args.total = total;
 805	args.minlen = args.maxlen = args.prod = 1;
 806	error = xfs_alloc_vextent_start_ag(&args,
 807			XFS_INO_TO_FSB(args.mp, ip->i_ino));
 808	if (error)
 809		goto done;
 810
 811	/* Can't fail, the space was reserved. */
 812	ASSERT(args.fsbno != NULLFSBLOCK);
 813	ASSERT(args.len == 1);
 
 814	error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
 815			XFS_FSB_TO_DADDR(args.mp, args.fsbno),
 816			args.mp->m_bsize, 0, &bp);
 817	if (error)
 818		goto done;
 819
 820	/*
 821	 * Initialize the block, copy the data and log the remote buffer.
 822	 *
 823	 * The callout is responsible for logging because the remote format
 824	 * might differ from the local format and thus we don't know how much to
 825	 * log here. Note that init_fn must also set the buffer log item type
 826	 * correctly.
 827	 */
 828	init_fn(tp, bp, ip, ifp);
 829
 830	/* account for the change in fork size */
 831	xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 832	xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 833	flags |= XFS_ILOG_CORE;
 834
 835	ifp->if_data = NULL;
 836	ifp->if_height = 0;
 837
 838	rec.br_startoff = 0;
 839	rec.br_startblock = args.fsbno;
 840	rec.br_blockcount = 1;
 841	rec.br_state = XFS_EXT_NORM;
 842	xfs_iext_first(ifp, &icur);
 843	xfs_iext_insert(ip, &icur, &rec, 0);
 844
 845	ifp->if_nextents = 1;
 846	ip->i_nblocks = 1;
 847	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 
 848	flags |= xfs_ilog_fext(whichfork);
 849
 850done:
 851	*logflagsp = flags;
 852	return error;
 853}
 854
 855/*
 856 * Called from xfs_bmap_add_attrfork to handle btree format files.
 857 */
 858STATIC int					/* error */
 859xfs_bmap_add_attrfork_btree(
 860	xfs_trans_t		*tp,		/* transaction pointer */
 861	xfs_inode_t		*ip,		/* incore inode pointer */
 862	int			*flags)		/* inode logging flags */
 863{
 864	struct xfs_btree_block	*block = ip->i_df.if_broot;
 865	struct xfs_btree_cur	*cur;		/* btree cursor */
 866	int			error;		/* error return value */
 867	xfs_mount_t		*mp;		/* file system mount struct */
 868	int			stat;		/* newroot status */
 869
 870	mp = ip->i_mount;
 871
 872	if (XFS_BMAP_BMDR_SPACE(block) <= xfs_inode_data_fork_size(ip))
 873		*flags |= XFS_ILOG_DBROOT;
 874	else {
 875		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
 876		error = xfs_bmbt_lookup_first(cur, &stat);
 877		if (error)
 878			goto error0;
 879		/* must be at least one entry */
 880		if (XFS_IS_CORRUPT(mp, stat != 1)) {
 881			error = -EFSCORRUPTED;
 882			goto error0;
 883		}
 884		if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
 885			goto error0;
 886		if (stat == 0) {
 887			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 888			return -ENOSPC;
 889		}
 890		cur->bc_ino.allocated = 0;
 891		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 892	}
 893	return 0;
 894error0:
 895	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 896	return error;
 897}
 898
 899/*
 900 * Called from xfs_bmap_add_attrfork to handle extents format files.
 901 */
 902STATIC int					/* error */
 903xfs_bmap_add_attrfork_extents(
 904	struct xfs_trans	*tp,		/* transaction pointer */
 905	struct xfs_inode	*ip,		/* incore inode pointer */
 906	int			*flags)		/* inode logging flags */
 907{
 908	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
 909	int			error;		/* error return value */
 910
 911	if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
 912	    xfs_inode_data_fork_size(ip))
 913		return 0;
 914	cur = NULL;
 915	error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
 916					  XFS_DATA_FORK);
 917	if (cur) {
 918		cur->bc_ino.allocated = 0;
 919		xfs_btree_del_cursor(cur, error);
 920	}
 921	return error;
 922}
 923
 924/*
 925 * Called from xfs_bmap_add_attrfork to handle local format files. Each
 926 * different data fork content type needs a different callout to do the
 927 * conversion. Some are basic and only require special block initialisation
 928 * callouts for the data formating, others (directories) are so specialised they
 929 * handle everything themselves.
 930 *
 931 * XXX (dgc): investigate whether directory conversion can use the generic
 932 * formatting callout. It should be possible - it's just a very complex
 933 * formatter.
 934 */
 935STATIC int					/* error */
 936xfs_bmap_add_attrfork_local(
 937	struct xfs_trans	*tp,		/* transaction pointer */
 938	struct xfs_inode	*ip,		/* incore inode pointer */
 939	int			*flags)		/* inode logging flags */
 940{
 941	struct xfs_da_args	dargs;		/* args for dir/attr code */
 942
 943	if (ip->i_df.if_bytes <= xfs_inode_data_fork_size(ip))
 944		return 0;
 945
 946	if (S_ISDIR(VFS_I(ip)->i_mode)) {
 947		memset(&dargs, 0, sizeof(dargs));
 948		dargs.geo = ip->i_mount->m_dir_geo;
 949		dargs.dp = ip;
 950		dargs.total = dargs.geo->fsbcount;
 951		dargs.whichfork = XFS_DATA_FORK;
 952		dargs.trans = tp;
 953		return xfs_dir2_sf_to_block(&dargs);
 954	}
 955
 956	if (S_ISLNK(VFS_I(ip)->i_mode))
 957		return xfs_bmap_local_to_extents(tp, ip, 1, flags,
 958						 XFS_DATA_FORK,
 959						 xfs_symlink_local_to_remote);
 960
 961	/* should only be called for types that support local format data */
 962	ASSERT(0);
 963	return -EFSCORRUPTED;
 964}
 965
 966/*
 967 * Set an inode attr fork offset based on the format of the data fork.
 968 */
 969static int
 970xfs_bmap_set_attrforkoff(
 971	struct xfs_inode	*ip,
 972	int			size,
 973	int			*version)
 974{
 975	int			default_size = xfs_default_attroffset(ip) >> 3;
 976
 977	switch (ip->i_df.if_format) {
 978	case XFS_DINODE_FMT_DEV:
 979		ip->i_forkoff = default_size;
 980		break;
 981	case XFS_DINODE_FMT_LOCAL:
 982	case XFS_DINODE_FMT_EXTENTS:
 983	case XFS_DINODE_FMT_BTREE:
 984		ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size);
 985		if (!ip->i_forkoff)
 986			ip->i_forkoff = default_size;
 987		else if (xfs_has_attr2(ip->i_mount) && version)
 988			*version = 2;
 989		break;
 990	default:
 991		ASSERT(0);
 992		return -EINVAL;
 993	}
 994
 995	return 0;
 996}
 997
 998/*
 999 * Convert inode from non-attributed to attributed.
1000 * Must not be in a transaction, ip must not be locked.
1001 */
1002int						/* error code */
1003xfs_bmap_add_attrfork(
1004	xfs_inode_t		*ip,		/* incore inode pointer */
1005	int			size,		/* space new attribute needs */
1006	int			rsvd)		/* xact may use reserved blks */
1007{
1008	xfs_mount_t		*mp;		/* mount structure */
1009	xfs_trans_t		*tp;		/* transaction pointer */
1010	int			blks;		/* space reservation */
1011	int			version = 1;	/* superblock attr version */
1012	int			logflags;	/* logging flags */
1013	int			error;		/* error return value */
1014
1015	ASSERT(xfs_inode_has_attr_fork(ip) == 0);
1016
1017	mp = ip->i_mount;
1018	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1019
1020	blks = XFS_ADDAFORK_SPACE_RES(mp);
1021
1022	error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0,
1023			rsvd, &tp);
1024	if (error)
1025		return error;
1026	if (xfs_inode_has_attr_fork(ip))
 
 
 
 
 
 
 
1027		goto trans_cancel;
1028
 
1029	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1030	error = xfs_bmap_set_attrforkoff(ip, size, &version);
1031	if (error)
1032		goto trans_cancel;
 
 
 
 
1033
1034	xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
 
1035	logflags = 0;
1036	switch (ip->i_df.if_format) {
1037	case XFS_DINODE_FMT_LOCAL:
1038		error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
1039		break;
1040	case XFS_DINODE_FMT_EXTENTS:
1041		error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
1042		break;
1043	case XFS_DINODE_FMT_BTREE:
1044		error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
1045		break;
1046	default:
1047		error = 0;
1048		break;
1049	}
1050	if (logflags)
1051		xfs_trans_log_inode(tp, ip, logflags);
1052	if (error)
1053		goto trans_cancel;
1054	if (!xfs_has_attr(mp) ||
1055	   (!xfs_has_attr2(mp) && version == 2)) {
1056		bool log_sb = false;
1057
1058		spin_lock(&mp->m_sb_lock);
1059		if (!xfs_has_attr(mp)) {
1060			xfs_add_attr(mp);
1061			log_sb = true;
1062		}
1063		if (!xfs_has_attr2(mp) && version == 2) {
1064			xfs_add_attr2(mp);
1065			log_sb = true;
1066		}
1067		spin_unlock(&mp->m_sb_lock);
1068		if (log_sb)
1069			xfs_log_sb(tp);
1070	}
1071
1072	error = xfs_trans_commit(tp);
1073	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1074	return error;
1075
1076trans_cancel:
1077	xfs_trans_cancel(tp);
1078	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1079	return error;
1080}
1081
1082/*
1083 * Internal and external extent tree search functions.
1084 */
1085
1086struct xfs_iread_state {
1087	struct xfs_iext_cursor	icur;
1088	xfs_extnum_t		loaded;
1089};
1090
1091int
1092xfs_bmap_complain_bad_rec(
1093	struct xfs_inode		*ip,
1094	int				whichfork,
1095	xfs_failaddr_t			fa,
1096	const struct xfs_bmbt_irec	*irec)
1097{
1098	struct xfs_mount		*mp = ip->i_mount;
1099	const char			*forkname;
1100
1101	switch (whichfork) {
1102	case XFS_DATA_FORK:	forkname = "data"; break;
1103	case XFS_ATTR_FORK:	forkname = "attr"; break;
1104	case XFS_COW_FORK:	forkname = "CoW"; break;
1105	default:		forkname = "???"; break;
1106	}
1107
1108	xfs_warn(mp,
1109 "Bmap BTree record corruption in inode 0x%llx %s fork detected at %pS!",
1110				ip->i_ino, forkname, fa);
1111	xfs_warn(mp,
1112		"Offset 0x%llx, start block 0x%llx, block count 0x%llx state 0x%x",
1113		irec->br_startoff, irec->br_startblock, irec->br_blockcount,
1114		irec->br_state);
1115
1116	return -EFSCORRUPTED;
1117}
1118
1119/* Stuff every bmbt record from this block into the incore extent map. */
1120static int
1121xfs_iread_bmbt_block(
1122	struct xfs_btree_cur	*cur,
1123	int			level,
1124	void			*priv)
1125{
1126	struct xfs_iread_state	*ir = priv;
1127	struct xfs_mount	*mp = cur->bc_mp;
1128	struct xfs_inode	*ip = cur->bc_ino.ip;
1129	struct xfs_btree_block	*block;
1130	struct xfs_buf		*bp;
1131	struct xfs_bmbt_rec	*frp;
1132	xfs_extnum_t		num_recs;
1133	xfs_extnum_t		j;
1134	int			whichfork = cur->bc_ino.whichfork;
1135	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1136
1137	block = xfs_btree_get_block(cur, level, &bp);
1138
1139	/* Abort if we find more records than nextents. */
1140	num_recs = xfs_btree_get_numrecs(block);
1141	if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
1142		xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
1143				(unsigned long long)ip->i_ino);
1144		xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
1145				sizeof(*block), __this_address);
1146		return -EFSCORRUPTED;
1147	}
1148
1149	/* Copy records into the incore cache. */
1150	frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1151	for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
1152		struct xfs_bmbt_irec	new;
1153		xfs_failaddr_t		fa;
1154
1155		xfs_bmbt_disk_get_all(frp, &new);
1156		fa = xfs_bmap_validate_extent(ip, whichfork, &new);
1157		if (fa) {
1158			xfs_inode_verifier_error(ip, -EFSCORRUPTED,
1159					"xfs_iread_extents(2)", frp,
1160					sizeof(*frp), fa);
1161			return xfs_bmap_complain_bad_rec(ip, whichfork, fa,
1162					&new);
1163		}
1164		xfs_iext_insert(ip, &ir->icur, &new,
1165				xfs_bmap_fork_to_state(whichfork));
1166		trace_xfs_read_extent(ip, &ir->icur,
1167				xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
1168		xfs_iext_next(ifp, &ir->icur);
1169	}
1170
1171	return 0;
1172}
1173
1174/*
1175 * Read in extents from a btree-format inode.
1176 */
1177int
1178xfs_iread_extents(
1179	struct xfs_trans	*tp,
1180	struct xfs_inode	*ip,
1181	int			whichfork)
1182{
1183	struct xfs_iread_state	ir;
1184	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1185	struct xfs_mount	*mp = ip->i_mount;
1186	struct xfs_btree_cur	*cur;
1187	int			error;
1188
1189	if (!xfs_need_iread_extents(ifp))
1190		return 0;
1191
1192	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1193
 
 
 
 
 
1194	ir.loaded = 0;
1195	xfs_iext_first(ifp, &ir.icur);
1196	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
1197	error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
1198			XFS_BTREE_VISIT_RECORDS, &ir);
1199	xfs_btree_del_cursor(cur, error);
1200	if (error)
1201		goto out;
1202
1203	if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
1204		error = -EFSCORRUPTED;
1205		goto out;
1206	}
1207	ASSERT(ir.loaded == xfs_iext_count(ifp));
1208	/*
1209	 * Use release semantics so that we can use acquire semantics in
1210	 * xfs_need_iread_extents and be guaranteed to see a valid mapping tree
1211	 * after that load.
1212	 */
1213	smp_store_release(&ifp->if_needextents, 0);
1214	return 0;
1215out:
1216	xfs_iext_destroy(ifp);
1217	return error;
1218}
1219
1220/*
1221 * Returns the relative block number of the first unused block(s) in the given
1222 * fork with at least "len" logically contiguous blocks free.  This is the
1223 * lowest-address hole if the fork has holes, else the first block past the end
1224 * of fork.  Return 0 if the fork is currently local (in-inode).
1225 */
1226int						/* error */
1227xfs_bmap_first_unused(
1228	struct xfs_trans	*tp,		/* transaction pointer */
1229	struct xfs_inode	*ip,		/* incore inode */
1230	xfs_extlen_t		len,		/* size of hole to find */
1231	xfs_fileoff_t		*first_unused,	/* unused block */
1232	int			whichfork)	/* data or attr fork */
1233{
1234	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1235	struct xfs_bmbt_irec	got;
1236	struct xfs_iext_cursor	icur;
1237	xfs_fileoff_t		lastaddr = 0;
1238	xfs_fileoff_t		lowest, max;
1239	int			error;
1240
1241	if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
1242		*first_unused = 0;
1243		return 0;
1244	}
1245
1246	ASSERT(xfs_ifork_has_extents(ifp));
1247
1248	error = xfs_iread_extents(tp, ip, whichfork);
1249	if (error)
1250		return error;
 
 
1251
1252	lowest = max = *first_unused;
1253	for_each_xfs_iext(ifp, &icur, &got) {
1254		/*
1255		 * See if the hole before this extent will work.
1256		 */
1257		if (got.br_startoff >= lowest + len &&
1258		    got.br_startoff - max >= len)
1259			break;
1260		lastaddr = got.br_startoff + got.br_blockcount;
1261		max = XFS_FILEOFF_MAX(lastaddr, lowest);
1262	}
1263
1264	*first_unused = max;
1265	return 0;
1266}
1267
1268/*
1269 * Returns the file-relative block number of the last block - 1 before
1270 * last_block (input value) in the file.
1271 * This is not based on i_size, it is based on the extent records.
1272 * Returns 0 for local files, as they do not have extent records.
1273 */
1274int						/* error */
1275xfs_bmap_last_before(
1276	struct xfs_trans	*tp,		/* transaction pointer */
1277	struct xfs_inode	*ip,		/* incore inode */
1278	xfs_fileoff_t		*last_block,	/* last block */
1279	int			whichfork)	/* data or attr fork */
1280{
1281	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1282	struct xfs_bmbt_irec	got;
1283	struct xfs_iext_cursor	icur;
1284	int			error;
1285
1286	switch (ifp->if_format) {
1287	case XFS_DINODE_FMT_LOCAL:
1288		*last_block = 0;
1289		return 0;
1290	case XFS_DINODE_FMT_BTREE:
1291	case XFS_DINODE_FMT_EXTENTS:
1292		break;
1293	default:
1294		ASSERT(0);
1295		return -EFSCORRUPTED;
1296	}
1297
1298	error = xfs_iread_extents(tp, ip, whichfork);
1299	if (error)
1300		return error;
 
 
1301
1302	if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1303		*last_block = 0;
1304	return 0;
1305}
1306
1307int
1308xfs_bmap_last_extent(
1309	struct xfs_trans	*tp,
1310	struct xfs_inode	*ip,
1311	int			whichfork,
1312	struct xfs_bmbt_irec	*rec,
1313	int			*is_empty)
1314{
1315	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1316	struct xfs_iext_cursor	icur;
1317	int			error;
1318
1319	error = xfs_iread_extents(tp, ip, whichfork);
1320	if (error)
1321		return error;
 
 
1322
1323	xfs_iext_last(ifp, &icur);
1324	if (!xfs_iext_get_extent(ifp, &icur, rec))
1325		*is_empty = 1;
1326	else
1327		*is_empty = 0;
1328	return 0;
1329}
1330
1331/*
1332 * Check the last inode extent to determine whether this allocation will result
1333 * in blocks being allocated at the end of the file. When we allocate new data
1334 * blocks at the end of the file which do not start at the previous data block,
1335 * we will try to align the new blocks at stripe unit boundaries.
1336 *
1337 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1338 * at, or past the EOF.
1339 */
1340STATIC int
1341xfs_bmap_isaeof(
1342	struct xfs_bmalloca	*bma,
1343	int			whichfork)
1344{
1345	struct xfs_bmbt_irec	rec;
1346	int			is_empty;
1347	int			error;
1348
1349	bma->aeof = false;
1350	error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1351				     &is_empty);
1352	if (error)
1353		return error;
1354
1355	if (is_empty) {
1356		bma->aeof = true;
1357		return 0;
1358	}
1359
1360	/*
1361	 * Check if we are allocation or past the last extent, or at least into
1362	 * the last delayed allocated extent.
1363	 */
1364	bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1365		(bma->offset >= rec.br_startoff &&
1366		 isnullstartblock(rec.br_startblock));
1367	return 0;
1368}
1369
1370/*
1371 * Returns the file-relative block number of the first block past eof in
1372 * the file.  This is not based on i_size, it is based on the extent records.
1373 * Returns 0 for local files, as they do not have extent records.
1374 */
1375int
1376xfs_bmap_last_offset(
1377	struct xfs_inode	*ip,
1378	xfs_fileoff_t		*last_block,
1379	int			whichfork)
1380{
1381	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1382	struct xfs_bmbt_irec	rec;
1383	int			is_empty;
1384	int			error;
1385
1386	*last_block = 0;
1387
1388	if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
1389		return 0;
1390
1391	if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp)))
1392		return -EFSCORRUPTED;
1393
1394	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1395	if (error || is_empty)
1396		return error;
1397
1398	*last_block = rec.br_startoff + rec.br_blockcount;
1399	return 0;
1400}
1401
1402/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1403 * Extent tree manipulation functions used during allocation.
1404 */
1405
1406/*
1407 * Convert a delayed allocation to a real allocation.
1408 */
1409STATIC int				/* error */
1410xfs_bmap_add_extent_delay_real(
1411	struct xfs_bmalloca	*bma,
1412	int			whichfork)
1413{
1414	struct xfs_mount	*mp = bma->ip->i_mount;
1415	struct xfs_ifork	*ifp = xfs_ifork_ptr(bma->ip, whichfork);
1416	struct xfs_bmbt_irec	*new = &bma->got;
1417	int			error;	/* error return value */
1418	int			i;	/* temp state */
1419	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
1420	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
1421					/* left is 0, right is 1, prev is 2 */
1422	int			rval=0;	/* return value (logging flags) */
1423	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
1424	xfs_filblks_t		da_new; /* new count del alloc blocks used */
1425	xfs_filblks_t		da_old; /* old count del alloc blocks used */
1426	xfs_filblks_t		temp=0;	/* value for da_new calculations */
1427	int			tmp_rval;	/* partial logging flags */
1428	struct xfs_bmbt_irec	old;
1429
1430	ASSERT(whichfork != XFS_ATTR_FORK);
1431	ASSERT(!isnullstartblock(new->br_startblock));
1432	ASSERT(!bma->cur ||
1433	       (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
1434
1435	XFS_STATS_INC(mp, xs_add_exlist);
1436
1437#define	LEFT		r[0]
1438#define	RIGHT		r[1]
1439#define	PREV		r[2]
1440
1441	/*
1442	 * Set up a bunch of variables to make the tests simpler.
1443	 */
1444	xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1445	new_endoff = new->br_startoff + new->br_blockcount;
1446	ASSERT(isnullstartblock(PREV.br_startblock));
1447	ASSERT(PREV.br_startoff <= new->br_startoff);
1448	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1449
1450	da_old = startblockval(PREV.br_startblock);
1451	da_new = 0;
1452
1453	/*
1454	 * Set flags determining what part of the previous delayed allocation
1455	 * extent is being replaced by a real allocation.
1456	 */
1457	if (PREV.br_startoff == new->br_startoff)
1458		state |= BMAP_LEFT_FILLING;
1459	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1460		state |= BMAP_RIGHT_FILLING;
1461
1462	/*
1463	 * Check and set flags if this segment has a left neighbor.
1464	 * Don't set contiguous if the combined extent would be too large.
1465	 */
1466	if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1467		state |= BMAP_LEFT_VALID;
1468		if (isnullstartblock(LEFT.br_startblock))
1469			state |= BMAP_LEFT_DELAY;
1470	}
1471
1472	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1473	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1474	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1475	    LEFT.br_state == new->br_state &&
1476	    LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
1477		state |= BMAP_LEFT_CONTIG;
1478
1479	/*
1480	 * Check and set flags if this segment has a right neighbor.
1481	 * Don't set contiguous if the combined extent would be too large.
1482	 * Also check for all-three-contiguous being too large.
1483	 */
1484	if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1485		state |= BMAP_RIGHT_VALID;
1486		if (isnullstartblock(RIGHT.br_startblock))
1487			state |= BMAP_RIGHT_DELAY;
1488	}
1489
1490	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1491	    new_endoff == RIGHT.br_startoff &&
1492	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1493	    new->br_state == RIGHT.br_state &&
1494	    new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
1495	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1496		       BMAP_RIGHT_FILLING)) !=
1497		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1498		       BMAP_RIGHT_FILLING) ||
1499	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1500			<= XFS_MAX_BMBT_EXTLEN))
1501		state |= BMAP_RIGHT_CONTIG;
1502
1503	error = 0;
1504	/*
1505	 * Switch out based on the FILLING and CONTIG state bits.
1506	 */
1507	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1508			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1509	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1510	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1511		/*
1512		 * Filling in all of a previously delayed allocation extent.
1513		 * The left and right neighbors are both contiguous with new.
1514		 */
1515		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1516
1517		xfs_iext_remove(bma->ip, &bma->icur, state);
1518		xfs_iext_remove(bma->ip, &bma->icur, state);
1519		xfs_iext_prev(ifp, &bma->icur);
1520		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1521		ifp->if_nextents--;
1522
1523		if (bma->cur == NULL)
1524			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1525		else {
1526			rval = XFS_ILOG_CORE;
1527			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1528			if (error)
1529				goto done;
1530			if (XFS_IS_CORRUPT(mp, i != 1)) {
1531				error = -EFSCORRUPTED;
1532				goto done;
1533			}
1534			error = xfs_btree_delete(bma->cur, &i);
1535			if (error)
1536				goto done;
1537			if (XFS_IS_CORRUPT(mp, i != 1)) {
1538				error = -EFSCORRUPTED;
1539				goto done;
1540			}
1541			error = xfs_btree_decrement(bma->cur, 0, &i);
1542			if (error)
1543				goto done;
1544			if (XFS_IS_CORRUPT(mp, i != 1)) {
1545				error = -EFSCORRUPTED;
1546				goto done;
1547			}
1548			error = xfs_bmbt_update(bma->cur, &LEFT);
1549			if (error)
1550				goto done;
1551		}
1552		break;
1553
1554	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1555		/*
1556		 * Filling in all of a previously delayed allocation extent.
1557		 * The left neighbor is contiguous, the right is not.
1558		 */
1559		old = LEFT;
1560		LEFT.br_blockcount += PREV.br_blockcount;
1561
1562		xfs_iext_remove(bma->ip, &bma->icur, state);
1563		xfs_iext_prev(ifp, &bma->icur);
1564		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1565
1566		if (bma->cur == NULL)
1567			rval = XFS_ILOG_DEXT;
1568		else {
1569			rval = 0;
1570			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1571			if (error)
1572				goto done;
1573			if (XFS_IS_CORRUPT(mp, i != 1)) {
1574				error = -EFSCORRUPTED;
1575				goto done;
1576			}
1577			error = xfs_bmbt_update(bma->cur, &LEFT);
1578			if (error)
1579				goto done;
1580		}
1581		break;
1582
1583	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1584		/*
1585		 * Filling in all of a previously delayed allocation extent.
1586		 * The right neighbor is contiguous, the left is not. Take care
1587		 * with delay -> unwritten extent allocation here because the
1588		 * delalloc record we are overwriting is always written.
1589		 */
1590		PREV.br_startblock = new->br_startblock;
1591		PREV.br_blockcount += RIGHT.br_blockcount;
1592		PREV.br_state = new->br_state;
1593
1594		xfs_iext_next(ifp, &bma->icur);
1595		xfs_iext_remove(bma->ip, &bma->icur, state);
1596		xfs_iext_prev(ifp, &bma->icur);
1597		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1598
1599		if (bma->cur == NULL)
1600			rval = XFS_ILOG_DEXT;
1601		else {
1602			rval = 0;
1603			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1604			if (error)
1605				goto done;
1606			if (XFS_IS_CORRUPT(mp, i != 1)) {
1607				error = -EFSCORRUPTED;
1608				goto done;
1609			}
1610			error = xfs_bmbt_update(bma->cur, &PREV);
1611			if (error)
1612				goto done;
1613		}
1614		break;
1615
1616	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1617		/*
1618		 * Filling in all of a previously delayed allocation extent.
1619		 * Neither the left nor right neighbors are contiguous with
1620		 * the new one.
1621		 */
1622		PREV.br_startblock = new->br_startblock;
1623		PREV.br_state = new->br_state;
1624		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1625		ifp->if_nextents++;
1626
1627		if (bma->cur == NULL)
1628			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1629		else {
1630			rval = XFS_ILOG_CORE;
1631			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1632			if (error)
1633				goto done;
1634			if (XFS_IS_CORRUPT(mp, i != 0)) {
1635				error = -EFSCORRUPTED;
1636				goto done;
1637			}
1638			error = xfs_btree_insert(bma->cur, &i);
1639			if (error)
1640				goto done;
1641			if (XFS_IS_CORRUPT(mp, i != 1)) {
1642				error = -EFSCORRUPTED;
1643				goto done;
1644			}
1645		}
1646		break;
1647
1648	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1649		/*
1650		 * Filling in the first part of a previous delayed allocation.
1651		 * The left neighbor is contiguous.
1652		 */
1653		old = LEFT;
1654		temp = PREV.br_blockcount - new->br_blockcount;
1655		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1656				startblockval(PREV.br_startblock));
1657
1658		LEFT.br_blockcount += new->br_blockcount;
1659
1660		PREV.br_blockcount = temp;
1661		PREV.br_startoff += new->br_blockcount;
1662		PREV.br_startblock = nullstartblock(da_new);
1663
1664		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1665		xfs_iext_prev(ifp, &bma->icur);
1666		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1667
1668		if (bma->cur == NULL)
1669			rval = XFS_ILOG_DEXT;
1670		else {
1671			rval = 0;
1672			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1673			if (error)
1674				goto done;
1675			if (XFS_IS_CORRUPT(mp, i != 1)) {
1676				error = -EFSCORRUPTED;
1677				goto done;
1678			}
1679			error = xfs_bmbt_update(bma->cur, &LEFT);
1680			if (error)
1681				goto done;
1682		}
1683		break;
1684
1685	case BMAP_LEFT_FILLING:
1686		/*
1687		 * Filling in the first part of a previous delayed allocation.
1688		 * The left neighbor is not contiguous.
1689		 */
1690		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1691		ifp->if_nextents++;
1692
1693		if (bma->cur == NULL)
1694			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1695		else {
1696			rval = XFS_ILOG_CORE;
1697			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1698			if (error)
1699				goto done;
1700			if (XFS_IS_CORRUPT(mp, i != 0)) {
1701				error = -EFSCORRUPTED;
1702				goto done;
1703			}
1704			error = xfs_btree_insert(bma->cur, &i);
1705			if (error)
1706				goto done;
1707			if (XFS_IS_CORRUPT(mp, i != 1)) {
1708				error = -EFSCORRUPTED;
1709				goto done;
1710			}
1711		}
1712
1713		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1714			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1715					&bma->cur, 1, &tmp_rval, whichfork);
1716			rval |= tmp_rval;
1717			if (error)
1718				goto done;
1719		}
1720
1721		temp = PREV.br_blockcount - new->br_blockcount;
1722		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1723			startblockval(PREV.br_startblock) -
1724			(bma->cur ? bma->cur->bc_ino.allocated : 0));
1725
1726		PREV.br_startoff = new_endoff;
1727		PREV.br_blockcount = temp;
1728		PREV.br_startblock = nullstartblock(da_new);
1729		xfs_iext_next(ifp, &bma->icur);
1730		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1731		xfs_iext_prev(ifp, &bma->icur);
1732		break;
1733
1734	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1735		/*
1736		 * Filling in the last part of a previous delayed allocation.
1737		 * The right neighbor is contiguous with the new allocation.
1738		 */
1739		old = RIGHT;
1740		RIGHT.br_startoff = new->br_startoff;
1741		RIGHT.br_startblock = new->br_startblock;
1742		RIGHT.br_blockcount += new->br_blockcount;
1743
1744		if (bma->cur == NULL)
1745			rval = XFS_ILOG_DEXT;
1746		else {
1747			rval = 0;
1748			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1749			if (error)
1750				goto done;
1751			if (XFS_IS_CORRUPT(mp, i != 1)) {
1752				error = -EFSCORRUPTED;
1753				goto done;
1754			}
1755			error = xfs_bmbt_update(bma->cur, &RIGHT);
1756			if (error)
1757				goto done;
1758		}
1759
1760		temp = PREV.br_blockcount - new->br_blockcount;
1761		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1762			startblockval(PREV.br_startblock));
1763
1764		PREV.br_blockcount = temp;
1765		PREV.br_startblock = nullstartblock(da_new);
1766
1767		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1768		xfs_iext_next(ifp, &bma->icur);
1769		xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1770		break;
1771
1772	case BMAP_RIGHT_FILLING:
1773		/*
1774		 * Filling in the last part of a previous delayed allocation.
1775		 * The right neighbor is not contiguous.
1776		 */
1777		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1778		ifp->if_nextents++;
1779
1780		if (bma->cur == NULL)
1781			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1782		else {
1783			rval = XFS_ILOG_CORE;
1784			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1785			if (error)
1786				goto done;
1787			if (XFS_IS_CORRUPT(mp, i != 0)) {
1788				error = -EFSCORRUPTED;
1789				goto done;
1790			}
1791			error = xfs_btree_insert(bma->cur, &i);
1792			if (error)
1793				goto done;
1794			if (XFS_IS_CORRUPT(mp, i != 1)) {
1795				error = -EFSCORRUPTED;
1796				goto done;
1797			}
1798		}
1799
1800		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1801			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1802				&bma->cur, 1, &tmp_rval, whichfork);
1803			rval |= tmp_rval;
1804			if (error)
1805				goto done;
1806		}
1807
1808		temp = PREV.br_blockcount - new->br_blockcount;
1809		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1810			startblockval(PREV.br_startblock) -
1811			(bma->cur ? bma->cur->bc_ino.allocated : 0));
1812
1813		PREV.br_startblock = nullstartblock(da_new);
1814		PREV.br_blockcount = temp;
1815		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1816		xfs_iext_next(ifp, &bma->icur);
1817		break;
1818
1819	case 0:
1820		/*
1821		 * Filling in the middle part of a previous delayed allocation.
1822		 * Contiguity is impossible here.
1823		 * This case is avoided almost all the time.
1824		 *
1825		 * We start with a delayed allocation:
1826		 *
1827		 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1828		 *  PREV @ idx
1829		 *
1830	         * and we are allocating:
1831		 *                     +rrrrrrrrrrrrrrrrr+
1832		 *			      new
1833		 *
1834		 * and we set it up for insertion as:
1835		 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1836		 *                            new
1837		 *  PREV @ idx          LEFT              RIGHT
1838		 *                      inserted at idx + 1
1839		 */
1840		old = PREV;
1841
1842		/* LEFT is the new middle */
1843		LEFT = *new;
1844
1845		/* RIGHT is the new right */
1846		RIGHT.br_state = PREV.br_state;
1847		RIGHT.br_startoff = new_endoff;
1848		RIGHT.br_blockcount =
1849			PREV.br_startoff + PREV.br_blockcount - new_endoff;
1850		RIGHT.br_startblock =
1851			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1852					RIGHT.br_blockcount));
1853
1854		/* truncate PREV */
1855		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1856		PREV.br_startblock =
1857			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1858					PREV.br_blockcount));
1859		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1860
1861		xfs_iext_next(ifp, &bma->icur);
1862		xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1863		xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
1864		ifp->if_nextents++;
1865
1866		if (bma->cur == NULL)
1867			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1868		else {
1869			rval = XFS_ILOG_CORE;
1870			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1871			if (error)
1872				goto done;
1873			if (XFS_IS_CORRUPT(mp, i != 0)) {
1874				error = -EFSCORRUPTED;
1875				goto done;
1876			}
1877			error = xfs_btree_insert(bma->cur, &i);
1878			if (error)
1879				goto done;
1880			if (XFS_IS_CORRUPT(mp, i != 1)) {
1881				error = -EFSCORRUPTED;
1882				goto done;
1883			}
1884		}
1885
1886		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1887			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1888					&bma->cur, 1, &tmp_rval, whichfork);
1889			rval |= tmp_rval;
1890			if (error)
1891				goto done;
1892		}
1893
1894		da_new = startblockval(PREV.br_startblock) +
1895			 startblockval(RIGHT.br_startblock);
1896		break;
1897
1898	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1899	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1900	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1901	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1902	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1903	case BMAP_LEFT_CONTIG:
1904	case BMAP_RIGHT_CONTIG:
1905		/*
1906		 * These cases are all impossible.
1907		 */
1908		ASSERT(0);
1909	}
1910
1911	/* add reverse mapping unless caller opted out */
1912	if (!(bma->flags & XFS_BMAPI_NORMAP))
1913		xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
1914
1915	/* convert to a btree if necessary */
1916	if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1917		int	tmp_logflags;	/* partial log flag return val */
1918
1919		ASSERT(bma->cur == NULL);
1920		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1921				&bma->cur, da_old > 0, &tmp_logflags,
1922				whichfork);
1923		bma->logflags |= tmp_logflags;
1924		if (error)
1925			goto done;
1926	}
1927
1928	if (da_new != da_old)
1929		xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
1930
1931	if (bma->cur) {
1932		da_new += bma->cur->bc_ino.allocated;
1933		bma->cur->bc_ino.allocated = 0;
1934	}
1935
1936	/* adjust for changes in reserved delayed indirect blocks */
1937	if (da_new != da_old) {
1938		ASSERT(state == 0 || da_new < da_old);
1939		error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
1940				false);
1941	}
1942
1943	xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
1944done:
1945	if (whichfork != XFS_COW_FORK)
1946		bma->logflags |= rval;
1947	return error;
1948#undef	LEFT
1949#undef	RIGHT
1950#undef	PREV
1951}
1952
1953/*
1954 * Convert an unwritten allocation to a real allocation or vice versa.
1955 */
1956int					/* error */
1957xfs_bmap_add_extent_unwritten_real(
1958	struct xfs_trans	*tp,
1959	xfs_inode_t		*ip,	/* incore inode pointer */
1960	int			whichfork,
1961	struct xfs_iext_cursor	*icur,
1962	struct xfs_btree_cur	**curp,	/* if *curp is null, not a btree */
1963	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
1964	int			*logflagsp) /* inode logging flags */
1965{
1966	struct xfs_btree_cur	*cur;	/* btree cursor */
1967	int			error;	/* error return value */
1968	int			i;	/* temp state */
1969	struct xfs_ifork	*ifp;	/* inode fork pointer */
1970	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
1971	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
1972					/* left is 0, right is 1, prev is 2 */
1973	int			rval=0;	/* return value (logging flags) */
1974	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
1975	struct xfs_mount	*mp = ip->i_mount;
1976	struct xfs_bmbt_irec	old;
1977
1978	*logflagsp = 0;
1979
1980	cur = *curp;
1981	ifp = xfs_ifork_ptr(ip, whichfork);
1982
1983	ASSERT(!isnullstartblock(new->br_startblock));
1984
1985	XFS_STATS_INC(mp, xs_add_exlist);
1986
1987#define	LEFT		r[0]
1988#define	RIGHT		r[1]
1989#define	PREV		r[2]
1990
1991	/*
1992	 * Set up a bunch of variables to make the tests simpler.
1993	 */
1994	error = 0;
1995	xfs_iext_get_extent(ifp, icur, &PREV);
1996	ASSERT(new->br_state != PREV.br_state);
1997	new_endoff = new->br_startoff + new->br_blockcount;
1998	ASSERT(PREV.br_startoff <= new->br_startoff);
1999	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2000
2001	/*
2002	 * Set flags determining what part of the previous oldext allocation
2003	 * extent is being replaced by a newext allocation.
2004	 */
2005	if (PREV.br_startoff == new->br_startoff)
2006		state |= BMAP_LEFT_FILLING;
2007	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2008		state |= BMAP_RIGHT_FILLING;
2009
2010	/*
2011	 * Check and set flags if this segment has a left neighbor.
2012	 * Don't set contiguous if the combined extent would be too large.
2013	 */
2014	if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2015		state |= BMAP_LEFT_VALID;
2016		if (isnullstartblock(LEFT.br_startblock))
2017			state |= BMAP_LEFT_DELAY;
2018	}
2019
2020	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2021	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2022	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2023	    LEFT.br_state == new->br_state &&
2024	    LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
2025		state |= BMAP_LEFT_CONTIG;
2026
2027	/*
2028	 * Check and set flags if this segment has a right neighbor.
2029	 * Don't set contiguous if the combined extent would be too large.
2030	 * Also check for all-three-contiguous being too large.
2031	 */
2032	if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2033		state |= BMAP_RIGHT_VALID;
2034		if (isnullstartblock(RIGHT.br_startblock))
2035			state |= BMAP_RIGHT_DELAY;
2036	}
2037
2038	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2039	    new_endoff == RIGHT.br_startoff &&
2040	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2041	    new->br_state == RIGHT.br_state &&
2042	    new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2043	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2044		       BMAP_RIGHT_FILLING)) !=
2045		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2046		       BMAP_RIGHT_FILLING) ||
2047	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2048			<= XFS_MAX_BMBT_EXTLEN))
2049		state |= BMAP_RIGHT_CONTIG;
2050
2051	/*
2052	 * Switch out based on the FILLING and CONTIG state bits.
2053	 */
2054	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2055			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2056	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2057	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2058		/*
2059		 * Setting all of a previous oldext extent to newext.
2060		 * The left and right neighbors are both contiguous with new.
2061		 */
2062		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2063
2064		xfs_iext_remove(ip, icur, state);
2065		xfs_iext_remove(ip, icur, state);
2066		xfs_iext_prev(ifp, icur);
2067		xfs_iext_update_extent(ip, state, icur, &LEFT);
2068		ifp->if_nextents -= 2;
2069		if (cur == NULL)
2070			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2071		else {
2072			rval = XFS_ILOG_CORE;
2073			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2074			if (error)
2075				goto done;
2076			if (XFS_IS_CORRUPT(mp, i != 1)) {
2077				error = -EFSCORRUPTED;
2078				goto done;
2079			}
2080			if ((error = xfs_btree_delete(cur, &i)))
2081				goto done;
2082			if (XFS_IS_CORRUPT(mp, i != 1)) {
2083				error = -EFSCORRUPTED;
2084				goto done;
2085			}
2086			if ((error = xfs_btree_decrement(cur, 0, &i)))
2087				goto done;
2088			if (XFS_IS_CORRUPT(mp, i != 1)) {
2089				error = -EFSCORRUPTED;
2090				goto done;
2091			}
2092			if ((error = xfs_btree_delete(cur, &i)))
2093				goto done;
2094			if (XFS_IS_CORRUPT(mp, i != 1)) {
2095				error = -EFSCORRUPTED;
2096				goto done;
2097			}
2098			if ((error = xfs_btree_decrement(cur, 0, &i)))
2099				goto done;
2100			if (XFS_IS_CORRUPT(mp, i != 1)) {
2101				error = -EFSCORRUPTED;
2102				goto done;
2103			}
2104			error = xfs_bmbt_update(cur, &LEFT);
2105			if (error)
2106				goto done;
2107		}
2108		break;
2109
2110	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2111		/*
2112		 * Setting all of a previous oldext extent to newext.
2113		 * The left neighbor is contiguous, the right is not.
2114		 */
2115		LEFT.br_blockcount += PREV.br_blockcount;
2116
2117		xfs_iext_remove(ip, icur, state);
2118		xfs_iext_prev(ifp, icur);
2119		xfs_iext_update_extent(ip, state, icur, &LEFT);
2120		ifp->if_nextents--;
2121		if (cur == NULL)
2122			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2123		else {
2124			rval = XFS_ILOG_CORE;
2125			error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2126			if (error)
2127				goto done;
2128			if (XFS_IS_CORRUPT(mp, i != 1)) {
2129				error = -EFSCORRUPTED;
2130				goto done;
2131			}
2132			if ((error = xfs_btree_delete(cur, &i)))
2133				goto done;
2134			if (XFS_IS_CORRUPT(mp, i != 1)) {
2135				error = -EFSCORRUPTED;
2136				goto done;
2137			}
2138			if ((error = xfs_btree_decrement(cur, 0, &i)))
2139				goto done;
2140			if (XFS_IS_CORRUPT(mp, i != 1)) {
2141				error = -EFSCORRUPTED;
2142				goto done;
2143			}
2144			error = xfs_bmbt_update(cur, &LEFT);
2145			if (error)
2146				goto done;
2147		}
2148		break;
2149
2150	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2151		/*
2152		 * Setting all of a previous oldext extent to newext.
2153		 * The right neighbor is contiguous, the left is not.
2154		 */
2155		PREV.br_blockcount += RIGHT.br_blockcount;
2156		PREV.br_state = new->br_state;
2157
2158		xfs_iext_next(ifp, icur);
2159		xfs_iext_remove(ip, icur, state);
2160		xfs_iext_prev(ifp, icur);
2161		xfs_iext_update_extent(ip, state, icur, &PREV);
2162		ifp->if_nextents--;
2163
2164		if (cur == NULL)
2165			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2166		else {
2167			rval = XFS_ILOG_CORE;
2168			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2169			if (error)
2170				goto done;
2171			if (XFS_IS_CORRUPT(mp, i != 1)) {
2172				error = -EFSCORRUPTED;
2173				goto done;
2174			}
2175			if ((error = xfs_btree_delete(cur, &i)))
2176				goto done;
2177			if (XFS_IS_CORRUPT(mp, i != 1)) {
2178				error = -EFSCORRUPTED;
2179				goto done;
2180			}
2181			if ((error = xfs_btree_decrement(cur, 0, &i)))
2182				goto done;
2183			if (XFS_IS_CORRUPT(mp, i != 1)) {
2184				error = -EFSCORRUPTED;
2185				goto done;
2186			}
2187			error = xfs_bmbt_update(cur, &PREV);
2188			if (error)
2189				goto done;
2190		}
2191		break;
2192
2193	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2194		/*
2195		 * Setting all of a previous oldext extent to newext.
2196		 * Neither the left nor right neighbors are contiguous with
2197		 * the new one.
2198		 */
2199		PREV.br_state = new->br_state;
2200		xfs_iext_update_extent(ip, state, icur, &PREV);
2201
2202		if (cur == NULL)
2203			rval = XFS_ILOG_DEXT;
2204		else {
2205			rval = 0;
2206			error = xfs_bmbt_lookup_eq(cur, new, &i);
2207			if (error)
2208				goto done;
2209			if (XFS_IS_CORRUPT(mp, i != 1)) {
2210				error = -EFSCORRUPTED;
2211				goto done;
2212			}
2213			error = xfs_bmbt_update(cur, &PREV);
2214			if (error)
2215				goto done;
2216		}
2217		break;
2218
2219	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2220		/*
2221		 * Setting the first part of a previous oldext extent to newext.
2222		 * The left neighbor is contiguous.
2223		 */
2224		LEFT.br_blockcount += new->br_blockcount;
2225
2226		old = PREV;
2227		PREV.br_startoff += new->br_blockcount;
2228		PREV.br_startblock += new->br_blockcount;
2229		PREV.br_blockcount -= new->br_blockcount;
2230
2231		xfs_iext_update_extent(ip, state, icur, &PREV);
2232		xfs_iext_prev(ifp, icur);
2233		xfs_iext_update_extent(ip, state, icur, &LEFT);
2234
2235		if (cur == NULL)
2236			rval = XFS_ILOG_DEXT;
2237		else {
2238			rval = 0;
2239			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2240			if (error)
2241				goto done;
2242			if (XFS_IS_CORRUPT(mp, i != 1)) {
2243				error = -EFSCORRUPTED;
2244				goto done;
2245			}
2246			error = xfs_bmbt_update(cur, &PREV);
2247			if (error)
2248				goto done;
2249			error = xfs_btree_decrement(cur, 0, &i);
2250			if (error)
2251				goto done;
2252			error = xfs_bmbt_update(cur, &LEFT);
2253			if (error)
2254				goto done;
2255		}
2256		break;
2257
2258	case BMAP_LEFT_FILLING:
2259		/*
2260		 * Setting the first part of a previous oldext extent to newext.
2261		 * The left neighbor is not contiguous.
2262		 */
2263		old = PREV;
2264		PREV.br_startoff += new->br_blockcount;
2265		PREV.br_startblock += new->br_blockcount;
2266		PREV.br_blockcount -= new->br_blockcount;
2267
2268		xfs_iext_update_extent(ip, state, icur, &PREV);
2269		xfs_iext_insert(ip, icur, new, state);
2270		ifp->if_nextents++;
2271
2272		if (cur == NULL)
2273			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2274		else {
2275			rval = XFS_ILOG_CORE;
2276			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2277			if (error)
2278				goto done;
2279			if (XFS_IS_CORRUPT(mp, i != 1)) {
2280				error = -EFSCORRUPTED;
2281				goto done;
2282			}
2283			error = xfs_bmbt_update(cur, &PREV);
2284			if (error)
2285				goto done;
2286			cur->bc_rec.b = *new;
2287			if ((error = xfs_btree_insert(cur, &i)))
2288				goto done;
2289			if (XFS_IS_CORRUPT(mp, i != 1)) {
2290				error = -EFSCORRUPTED;
2291				goto done;
2292			}
2293		}
2294		break;
2295
2296	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2297		/*
2298		 * Setting the last part of a previous oldext extent to newext.
2299		 * The right neighbor is contiguous with the new allocation.
2300		 */
2301		old = PREV;
2302		PREV.br_blockcount -= new->br_blockcount;
2303
2304		RIGHT.br_startoff = new->br_startoff;
2305		RIGHT.br_startblock = new->br_startblock;
2306		RIGHT.br_blockcount += new->br_blockcount;
2307
2308		xfs_iext_update_extent(ip, state, icur, &PREV);
2309		xfs_iext_next(ifp, icur);
2310		xfs_iext_update_extent(ip, state, icur, &RIGHT);
2311
2312		if (cur == NULL)
2313			rval = XFS_ILOG_DEXT;
2314		else {
2315			rval = 0;
2316			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2317			if (error)
2318				goto done;
2319			if (XFS_IS_CORRUPT(mp, i != 1)) {
2320				error = -EFSCORRUPTED;
2321				goto done;
2322			}
2323			error = xfs_bmbt_update(cur, &PREV);
2324			if (error)
2325				goto done;
2326			error = xfs_btree_increment(cur, 0, &i);
2327			if (error)
2328				goto done;
2329			error = xfs_bmbt_update(cur, &RIGHT);
2330			if (error)
2331				goto done;
2332		}
2333		break;
2334
2335	case BMAP_RIGHT_FILLING:
2336		/*
2337		 * Setting the last part of a previous oldext extent to newext.
2338		 * The right neighbor is not contiguous.
2339		 */
2340		old = PREV;
2341		PREV.br_blockcount -= new->br_blockcount;
2342
2343		xfs_iext_update_extent(ip, state, icur, &PREV);
2344		xfs_iext_next(ifp, icur);
2345		xfs_iext_insert(ip, icur, new, state);
2346		ifp->if_nextents++;
2347
2348		if (cur == NULL)
2349			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2350		else {
2351			rval = XFS_ILOG_CORE;
2352			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2353			if (error)
2354				goto done;
2355			if (XFS_IS_CORRUPT(mp, i != 1)) {
2356				error = -EFSCORRUPTED;
2357				goto done;
2358			}
2359			error = xfs_bmbt_update(cur, &PREV);
2360			if (error)
2361				goto done;
2362			error = xfs_bmbt_lookup_eq(cur, new, &i);
2363			if (error)
2364				goto done;
2365			if (XFS_IS_CORRUPT(mp, i != 0)) {
2366				error = -EFSCORRUPTED;
2367				goto done;
2368			}
2369			if ((error = xfs_btree_insert(cur, &i)))
2370				goto done;
2371			if (XFS_IS_CORRUPT(mp, i != 1)) {
2372				error = -EFSCORRUPTED;
2373				goto done;
2374			}
2375		}
2376		break;
2377
2378	case 0:
2379		/*
2380		 * Setting the middle part of a previous oldext extent to
2381		 * newext.  Contiguity is impossible here.
2382		 * One extent becomes three extents.
2383		 */
2384		old = PREV;
2385		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2386
2387		r[0] = *new;
2388		r[1].br_startoff = new_endoff;
2389		r[1].br_blockcount =
2390			old.br_startoff + old.br_blockcount - new_endoff;
2391		r[1].br_startblock = new->br_startblock + new->br_blockcount;
2392		r[1].br_state = PREV.br_state;
2393
2394		xfs_iext_update_extent(ip, state, icur, &PREV);
2395		xfs_iext_next(ifp, icur);
2396		xfs_iext_insert(ip, icur, &r[1], state);
2397		xfs_iext_insert(ip, icur, &r[0], state);
2398		ifp->if_nextents += 2;
2399
2400		if (cur == NULL)
2401			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2402		else {
2403			rval = XFS_ILOG_CORE;
2404			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2405			if (error)
2406				goto done;
2407			if (XFS_IS_CORRUPT(mp, i != 1)) {
2408				error = -EFSCORRUPTED;
2409				goto done;
2410			}
2411			/* new right extent - oldext */
2412			error = xfs_bmbt_update(cur, &r[1]);
2413			if (error)
2414				goto done;
2415			/* new left extent - oldext */
2416			cur->bc_rec.b = PREV;
2417			if ((error = xfs_btree_insert(cur, &i)))
2418				goto done;
2419			if (XFS_IS_CORRUPT(mp, i != 1)) {
2420				error = -EFSCORRUPTED;
2421				goto done;
2422			}
2423			/*
2424			 * Reset the cursor to the position of the new extent
2425			 * we are about to insert as we can't trust it after
2426			 * the previous insert.
2427			 */
2428			error = xfs_bmbt_lookup_eq(cur, new, &i);
2429			if (error)
2430				goto done;
2431			if (XFS_IS_CORRUPT(mp, i != 0)) {
2432				error = -EFSCORRUPTED;
2433				goto done;
2434			}
2435			/* new middle extent - newext */
2436			if ((error = xfs_btree_insert(cur, &i)))
2437				goto done;
2438			if (XFS_IS_CORRUPT(mp, i != 1)) {
2439				error = -EFSCORRUPTED;
2440				goto done;
2441			}
2442		}
2443		break;
2444
2445	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2446	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2447	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2448	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2449	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2450	case BMAP_LEFT_CONTIG:
2451	case BMAP_RIGHT_CONTIG:
2452		/*
2453		 * These cases are all impossible.
2454		 */
2455		ASSERT(0);
2456	}
2457
2458	/* update reverse mappings */
2459	xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
2460
2461	/* convert to a btree if necessary */
2462	if (xfs_bmap_needs_btree(ip, whichfork)) {
2463		int	tmp_logflags;	/* partial log flag return val */
2464
2465		ASSERT(cur == NULL);
2466		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
2467				&tmp_logflags, whichfork);
2468		*logflagsp |= tmp_logflags;
2469		if (error)
2470			goto done;
2471	}
2472
2473	/* clear out the allocated field, done with it now in any case. */
2474	if (cur) {
2475		cur->bc_ino.allocated = 0;
2476		*curp = cur;
2477	}
2478
2479	xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2480done:
2481	*logflagsp |= rval;
2482	return error;
2483#undef	LEFT
2484#undef	RIGHT
2485#undef	PREV
2486}
2487
2488/*
2489 * Convert a hole to a delayed allocation.
2490 */
2491STATIC void
2492xfs_bmap_add_extent_hole_delay(
2493	xfs_inode_t		*ip,	/* incore inode pointer */
2494	int			whichfork,
2495	struct xfs_iext_cursor	*icur,
2496	xfs_bmbt_irec_t		*new)	/* new data to add to file extents */
2497{
2498	struct xfs_ifork	*ifp;	/* inode fork pointer */
2499	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2500	xfs_filblks_t		newlen=0;	/* new indirect size */
2501	xfs_filblks_t		oldlen=0;	/* old indirect size */
2502	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2503	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
2504	xfs_filblks_t		temp;	 /* temp for indirect calculations */
2505
2506	ifp = xfs_ifork_ptr(ip, whichfork);
2507	ASSERT(isnullstartblock(new->br_startblock));
2508
2509	/*
2510	 * Check and set flags if this segment has a left neighbor
2511	 */
2512	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2513		state |= BMAP_LEFT_VALID;
2514		if (isnullstartblock(left.br_startblock))
2515			state |= BMAP_LEFT_DELAY;
2516	}
2517
2518	/*
2519	 * Check and set flags if the current (right) segment exists.
2520	 * If it doesn't exist, we're converting the hole at end-of-file.
2521	 */
2522	if (xfs_iext_get_extent(ifp, icur, &right)) {
2523		state |= BMAP_RIGHT_VALID;
2524		if (isnullstartblock(right.br_startblock))
2525			state |= BMAP_RIGHT_DELAY;
2526	}
2527
2528	/*
2529	 * Set contiguity flags on the left and right neighbors.
2530	 * Don't let extents get too large, even if the pieces are contiguous.
2531	 */
2532	if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2533	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2534	    left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
2535		state |= BMAP_LEFT_CONTIG;
2536
2537	if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2538	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2539	    new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2540	    (!(state & BMAP_LEFT_CONTIG) ||
2541	     (left.br_blockcount + new->br_blockcount +
2542	      right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)))
2543		state |= BMAP_RIGHT_CONTIG;
2544
2545	/*
2546	 * Switch out based on the contiguity flags.
2547	 */
2548	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2549	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2550		/*
2551		 * New allocation is contiguous with delayed allocations
2552		 * on the left and on the right.
2553		 * Merge all three into a single extent record.
2554		 */
2555		temp = left.br_blockcount + new->br_blockcount +
2556			right.br_blockcount;
2557
2558		oldlen = startblockval(left.br_startblock) +
2559			startblockval(new->br_startblock) +
2560			startblockval(right.br_startblock);
2561		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2562					 oldlen);
2563		left.br_startblock = nullstartblock(newlen);
2564		left.br_blockcount = temp;
2565
2566		xfs_iext_remove(ip, icur, state);
2567		xfs_iext_prev(ifp, icur);
2568		xfs_iext_update_extent(ip, state, icur, &left);
2569		break;
2570
2571	case BMAP_LEFT_CONTIG:
2572		/*
2573		 * New allocation is contiguous with a delayed allocation
2574		 * on the left.
2575		 * Merge the new allocation with the left neighbor.
2576		 */
2577		temp = left.br_blockcount + new->br_blockcount;
2578
2579		oldlen = startblockval(left.br_startblock) +
2580			startblockval(new->br_startblock);
2581		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2582					 oldlen);
2583		left.br_blockcount = temp;
2584		left.br_startblock = nullstartblock(newlen);
2585
2586		xfs_iext_prev(ifp, icur);
2587		xfs_iext_update_extent(ip, state, icur, &left);
2588		break;
2589
2590	case BMAP_RIGHT_CONTIG:
2591		/*
2592		 * New allocation is contiguous with a delayed allocation
2593		 * on the right.
2594		 * Merge the new allocation with the right neighbor.
2595		 */
2596		temp = new->br_blockcount + right.br_blockcount;
2597		oldlen = startblockval(new->br_startblock) +
2598			startblockval(right.br_startblock);
2599		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2600					 oldlen);
2601		right.br_startoff = new->br_startoff;
2602		right.br_startblock = nullstartblock(newlen);
2603		right.br_blockcount = temp;
2604		xfs_iext_update_extent(ip, state, icur, &right);
2605		break;
2606
2607	case 0:
2608		/*
2609		 * New allocation is not contiguous with another
2610		 * delayed allocation.
2611		 * Insert a new entry.
2612		 */
2613		oldlen = newlen = 0;
2614		xfs_iext_insert(ip, icur, new, state);
2615		break;
2616	}
2617	if (oldlen != newlen) {
2618		ASSERT(oldlen > newlen);
2619		xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2620				 false);
2621		/*
2622		 * Nothing to do for disk quota accounting here.
2623		 */
2624		xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
2625	}
2626}
2627
2628/*
2629 * Convert a hole to a real allocation.
2630 */
2631STATIC int				/* error */
2632xfs_bmap_add_extent_hole_real(
2633	struct xfs_trans	*tp,
2634	struct xfs_inode	*ip,
2635	int			whichfork,
2636	struct xfs_iext_cursor	*icur,
2637	struct xfs_btree_cur	**curp,
2638	struct xfs_bmbt_irec	*new,
2639	int			*logflagsp,
2640	uint32_t		flags)
2641{
2642	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
2643	struct xfs_mount	*mp = ip->i_mount;
2644	struct xfs_btree_cur	*cur = *curp;
2645	int			error;	/* error return value */
2646	int			i;	/* temp state */
2647	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2648	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2649	int			rval=0;	/* return value (logging flags) */
2650	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
2651	struct xfs_bmbt_irec	old;
2652
2653	ASSERT(!isnullstartblock(new->br_startblock));
2654	ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
2655
2656	XFS_STATS_INC(mp, xs_add_exlist);
2657
2658	/*
2659	 * Check and set flags if this segment has a left neighbor.
2660	 */
2661	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2662		state |= BMAP_LEFT_VALID;
2663		if (isnullstartblock(left.br_startblock))
2664			state |= BMAP_LEFT_DELAY;
2665	}
2666
2667	/*
2668	 * Check and set flags if this segment has a current value.
2669	 * Not true if we're inserting into the "hole" at eof.
2670	 */
2671	if (xfs_iext_get_extent(ifp, icur, &right)) {
2672		state |= BMAP_RIGHT_VALID;
2673		if (isnullstartblock(right.br_startblock))
2674			state |= BMAP_RIGHT_DELAY;
2675	}
2676
2677	/*
2678	 * We're inserting a real allocation between "left" and "right".
2679	 * Set the contiguity flags.  Don't let extents get too large.
2680	 */
2681	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2682	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2683	    left.br_startblock + left.br_blockcount == new->br_startblock &&
2684	    left.br_state == new->br_state &&
2685	    left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
2686		state |= BMAP_LEFT_CONTIG;
2687
2688	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2689	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2690	    new->br_startblock + new->br_blockcount == right.br_startblock &&
2691	    new->br_state == right.br_state &&
2692	    new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2693	    (!(state & BMAP_LEFT_CONTIG) ||
2694	     left.br_blockcount + new->br_blockcount +
2695	     right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))
2696		state |= BMAP_RIGHT_CONTIG;
2697
2698	error = 0;
2699	/*
2700	 * Select which case we're in here, and implement it.
2701	 */
2702	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2703	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2704		/*
2705		 * New allocation is contiguous with real allocations on the
2706		 * left and on the right.
2707		 * Merge all three into a single extent record.
2708		 */
2709		left.br_blockcount += new->br_blockcount + right.br_blockcount;
2710
2711		xfs_iext_remove(ip, icur, state);
2712		xfs_iext_prev(ifp, icur);
2713		xfs_iext_update_extent(ip, state, icur, &left);
2714		ifp->if_nextents--;
2715
2716		if (cur == NULL) {
2717			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2718		} else {
2719			rval = XFS_ILOG_CORE;
2720			error = xfs_bmbt_lookup_eq(cur, &right, &i);
2721			if (error)
2722				goto done;
2723			if (XFS_IS_CORRUPT(mp, i != 1)) {
2724				error = -EFSCORRUPTED;
2725				goto done;
2726			}
2727			error = xfs_btree_delete(cur, &i);
2728			if (error)
2729				goto done;
2730			if (XFS_IS_CORRUPT(mp, i != 1)) {
2731				error = -EFSCORRUPTED;
2732				goto done;
2733			}
2734			error = xfs_btree_decrement(cur, 0, &i);
2735			if (error)
2736				goto done;
2737			if (XFS_IS_CORRUPT(mp, i != 1)) {
2738				error = -EFSCORRUPTED;
2739				goto done;
2740			}
2741			error = xfs_bmbt_update(cur, &left);
2742			if (error)
2743				goto done;
2744		}
2745		break;
2746
2747	case BMAP_LEFT_CONTIG:
2748		/*
2749		 * New allocation is contiguous with a real allocation
2750		 * on the left.
2751		 * Merge the new allocation with the left neighbor.
2752		 */
2753		old = left;
2754		left.br_blockcount += new->br_blockcount;
2755
2756		xfs_iext_prev(ifp, icur);
2757		xfs_iext_update_extent(ip, state, icur, &left);
2758
2759		if (cur == NULL) {
2760			rval = xfs_ilog_fext(whichfork);
2761		} else {
2762			rval = 0;
2763			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2764			if (error)
2765				goto done;
2766			if (XFS_IS_CORRUPT(mp, i != 1)) {
2767				error = -EFSCORRUPTED;
2768				goto done;
2769			}
2770			error = xfs_bmbt_update(cur, &left);
2771			if (error)
2772				goto done;
2773		}
2774		break;
2775
2776	case BMAP_RIGHT_CONTIG:
2777		/*
2778		 * New allocation is contiguous with a real allocation
2779		 * on the right.
2780		 * Merge the new allocation with the right neighbor.
2781		 */
2782		old = right;
2783
2784		right.br_startoff = new->br_startoff;
2785		right.br_startblock = new->br_startblock;
2786		right.br_blockcount += new->br_blockcount;
2787		xfs_iext_update_extent(ip, state, icur, &right);
2788
2789		if (cur == NULL) {
2790			rval = xfs_ilog_fext(whichfork);
2791		} else {
2792			rval = 0;
2793			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2794			if (error)
2795				goto done;
2796			if (XFS_IS_CORRUPT(mp, i != 1)) {
2797				error = -EFSCORRUPTED;
2798				goto done;
2799			}
2800			error = xfs_bmbt_update(cur, &right);
2801			if (error)
2802				goto done;
2803		}
2804		break;
2805
2806	case 0:
2807		/*
2808		 * New allocation is not contiguous with another
2809		 * real allocation.
2810		 * Insert a new entry.
2811		 */
2812		xfs_iext_insert(ip, icur, new, state);
2813		ifp->if_nextents++;
2814
2815		if (cur == NULL) {
2816			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2817		} else {
2818			rval = XFS_ILOG_CORE;
2819			error = xfs_bmbt_lookup_eq(cur, new, &i);
2820			if (error)
2821				goto done;
2822			if (XFS_IS_CORRUPT(mp, i != 0)) {
2823				error = -EFSCORRUPTED;
2824				goto done;
2825			}
2826			error = xfs_btree_insert(cur, &i);
2827			if (error)
2828				goto done;
2829			if (XFS_IS_CORRUPT(mp, i != 1)) {
2830				error = -EFSCORRUPTED;
2831				goto done;
2832			}
2833		}
2834		break;
2835	}
2836
2837	/* add reverse mapping unless caller opted out */
2838	if (!(flags & XFS_BMAPI_NORMAP))
2839		xfs_rmap_map_extent(tp, ip, whichfork, new);
2840
2841	/* convert to a btree if necessary */
2842	if (xfs_bmap_needs_btree(ip, whichfork)) {
2843		int	tmp_logflags;	/* partial log flag return val */
2844
2845		ASSERT(cur == NULL);
2846		error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
2847				&tmp_logflags, whichfork);
2848		*logflagsp |= tmp_logflags;
2849		cur = *curp;
2850		if (error)
2851			goto done;
2852	}
2853
2854	/* clear out the allocated field, done with it now in any case. */
2855	if (cur)
2856		cur->bc_ino.allocated = 0;
2857
2858	xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2859done:
2860	*logflagsp |= rval;
2861	return error;
2862}
2863
2864/*
2865 * Functions used in the extent read, allocate and remove paths
2866 */
2867
2868/*
2869 * Adjust the size of the new extent based on i_extsize and rt extsize.
2870 */
2871int
2872xfs_bmap_extsize_align(
2873	xfs_mount_t	*mp,
2874	xfs_bmbt_irec_t	*gotp,		/* next extent pointer */
2875	xfs_bmbt_irec_t	*prevp,		/* previous extent pointer */
2876	xfs_extlen_t	extsz,		/* align to this extent size */
2877	int		rt,		/* is this a realtime inode? */
2878	int		eof,		/* is extent at end-of-file? */
2879	int		delay,		/* creating delalloc extent? */
2880	int		convert,	/* overwriting unwritten extent? */
2881	xfs_fileoff_t	*offp,		/* in/out: aligned offset */
2882	xfs_extlen_t	*lenp)		/* in/out: aligned length */
2883{
2884	xfs_fileoff_t	orig_off;	/* original offset */
2885	xfs_extlen_t	orig_alen;	/* original length */
2886	xfs_fileoff_t	orig_end;	/* original off+len */
2887	xfs_fileoff_t	nexto;		/* next file offset */
2888	xfs_fileoff_t	prevo;		/* previous file offset */
2889	xfs_fileoff_t	align_off;	/* temp for offset */
2890	xfs_extlen_t	align_alen;	/* temp for length */
2891	xfs_extlen_t	temp;		/* temp for calculations */
2892
2893	if (convert)
2894		return 0;
2895
2896	orig_off = align_off = *offp;
2897	orig_alen = align_alen = *lenp;
2898	orig_end = orig_off + orig_alen;
2899
2900	/*
2901	 * If this request overlaps an existing extent, then don't
2902	 * attempt to perform any additional alignment.
2903	 */
2904	if (!delay && !eof &&
2905	    (orig_off >= gotp->br_startoff) &&
2906	    (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2907		return 0;
2908	}
2909
2910	/*
2911	 * If the file offset is unaligned vs. the extent size
2912	 * we need to align it.  This will be possible unless
2913	 * the file was previously written with a kernel that didn't
2914	 * perform this alignment, or if a truncate shot us in the
2915	 * foot.
2916	 */
2917	div_u64_rem(orig_off, extsz, &temp);
2918	if (temp) {
2919		align_alen += temp;
2920		align_off -= temp;
2921	}
2922
2923	/* Same adjustment for the end of the requested area. */
2924	temp = (align_alen % extsz);
2925	if (temp)
2926		align_alen += extsz - temp;
2927
2928	/*
2929	 * For large extent hint sizes, the aligned extent might be larger than
2930	 * XFS_BMBT_MAX_EXTLEN. In that case, reduce the size by an extsz so
2931	 * that it pulls the length back under XFS_BMBT_MAX_EXTLEN. The outer
2932	 * allocation loops handle short allocation just fine, so it is safe to
2933	 * do this. We only want to do it when we are forced to, though, because
2934	 * it means more allocation operations are required.
2935	 */
2936	while (align_alen > XFS_MAX_BMBT_EXTLEN)
2937		align_alen -= extsz;
2938	ASSERT(align_alen <= XFS_MAX_BMBT_EXTLEN);
2939
2940	/*
2941	 * If the previous block overlaps with this proposed allocation
2942	 * then move the start forward without adjusting the length.
2943	 */
2944	if (prevp->br_startoff != NULLFILEOFF) {
2945		if (prevp->br_startblock == HOLESTARTBLOCK)
2946			prevo = prevp->br_startoff;
2947		else
2948			prevo = prevp->br_startoff + prevp->br_blockcount;
2949	} else
2950		prevo = 0;
2951	if (align_off != orig_off && align_off < prevo)
2952		align_off = prevo;
2953	/*
2954	 * If the next block overlaps with this proposed allocation
2955	 * then move the start back without adjusting the length,
2956	 * but not before offset 0.
2957	 * This may of course make the start overlap previous block,
2958	 * and if we hit the offset 0 limit then the next block
2959	 * can still overlap too.
2960	 */
2961	if (!eof && gotp->br_startoff != NULLFILEOFF) {
2962		if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
2963		    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
2964			nexto = gotp->br_startoff + gotp->br_blockcount;
2965		else
2966			nexto = gotp->br_startoff;
2967	} else
2968		nexto = NULLFILEOFF;
2969	if (!eof &&
2970	    align_off + align_alen != orig_end &&
2971	    align_off + align_alen > nexto)
2972		align_off = nexto > align_alen ? nexto - align_alen : 0;
2973	/*
2974	 * If we're now overlapping the next or previous extent that
2975	 * means we can't fit an extsz piece in this hole.  Just move
2976	 * the start forward to the first valid spot and set
2977	 * the length so we hit the end.
2978	 */
2979	if (align_off != orig_off && align_off < prevo)
2980		align_off = prevo;
2981	if (align_off + align_alen != orig_end &&
2982	    align_off + align_alen > nexto &&
2983	    nexto != NULLFILEOFF) {
2984		ASSERT(nexto > prevo);
2985		align_alen = nexto - align_off;
2986	}
2987
2988	/*
2989	 * If realtime, and the result isn't a multiple of the realtime
2990	 * extent size we need to remove blocks until it is.
2991	 */
2992	if (rt && (temp = xfs_extlen_to_rtxmod(mp, align_alen))) {
2993		/*
2994		 * We're not covering the original request, or
2995		 * we won't be able to once we fix the length.
2996		 */
2997		if (orig_off < align_off ||
2998		    orig_end > align_off + align_alen ||
2999		    align_alen - temp < orig_alen)
3000			return -EINVAL;
3001		/*
3002		 * Try to fix it by moving the start up.
3003		 */
3004		if (align_off + temp <= orig_off) {
3005			align_alen -= temp;
3006			align_off += temp;
3007		}
3008		/*
3009		 * Try to fix it by moving the end in.
3010		 */
3011		else if (align_off + align_alen - temp >= orig_end)
3012			align_alen -= temp;
3013		/*
3014		 * Set the start to the minimum then trim the length.
3015		 */
3016		else {
3017			align_alen -= orig_off - align_off;
3018			align_off = orig_off;
3019			align_alen -= xfs_extlen_to_rtxmod(mp, align_alen);
3020		}
3021		/*
3022		 * Result doesn't cover the request, fail it.
3023		 */
3024		if (orig_off < align_off || orig_end > align_off + align_alen)
3025			return -EINVAL;
3026	} else {
3027		ASSERT(orig_off >= align_off);
3028		/* see XFS_BMBT_MAX_EXTLEN handling above */
3029		ASSERT(orig_end <= align_off + align_alen ||
3030		       align_alen + extsz > XFS_MAX_BMBT_EXTLEN);
3031	}
3032
3033#ifdef DEBUG
3034	if (!eof && gotp->br_startoff != NULLFILEOFF)
3035		ASSERT(align_off + align_alen <= gotp->br_startoff);
3036	if (prevp->br_startoff != NULLFILEOFF)
3037		ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3038#endif
3039
3040	*lenp = align_alen;
3041	*offp = align_off;
3042	return 0;
3043}
3044
3045#define XFS_ALLOC_GAP_UNITS	4
3046
3047/* returns true if ap->blkno was modified */
3048bool
3049xfs_bmap_adjacent(
3050	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
3051{
3052	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
 
3053	xfs_mount_t	*mp;		/* mount point structure */
 
3054	int		rt;		/* true if inode is realtime */
3055
3056#define	ISVALID(x,y)	\
3057	(rt ? \
3058		(x) < mp->m_sb.sb_rblocks : \
3059		XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3060		XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3061		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3062
3063	mp = ap->ip->i_mount;
 
3064	rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3065		(ap->datatype & XFS_ALLOC_USERDATA);
 
 
3066	/*
3067	 * If allocating at eof, and there's a previous real block,
3068	 * try to use its last block as our starting point.
3069	 */
3070	if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3071	    !isnullstartblock(ap->prev.br_startblock) &&
3072	    ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3073		    ap->prev.br_startblock)) {
3074		ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3075		/*
3076		 * Adjust for the gap between prevp and us.
3077		 */
3078		adjust = ap->offset -
3079			(ap->prev.br_startoff + ap->prev.br_blockcount);
3080		if (adjust &&
3081		    ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3082			ap->blkno += adjust;
3083		return true;
3084	}
3085	/*
3086	 * If not at eof, then compare the two neighbor blocks.
3087	 * Figure out whether either one gives us a good starting point,
3088	 * and pick the better one.
3089	 */
3090	if (!ap->eof) {
3091		xfs_fsblock_t	gotbno;		/* right side block number */
3092		xfs_fsblock_t	gotdiff=0;	/* right side difference */
3093		xfs_fsblock_t	prevbno;	/* left side block number */
3094		xfs_fsblock_t	prevdiff=0;	/* left side difference */
3095
3096		/*
3097		 * If there's a previous (left) block, select a requested
3098		 * start block based on it.
3099		 */
3100		if (ap->prev.br_startoff != NULLFILEOFF &&
3101		    !isnullstartblock(ap->prev.br_startblock) &&
3102		    (prevbno = ap->prev.br_startblock +
3103			       ap->prev.br_blockcount) &&
3104		    ISVALID(prevbno, ap->prev.br_startblock)) {
3105			/*
3106			 * Calculate gap to end of previous block.
3107			 */
3108			adjust = prevdiff = ap->offset -
3109				(ap->prev.br_startoff +
3110				 ap->prev.br_blockcount);
3111			/*
3112			 * Figure the startblock based on the previous block's
3113			 * end and the gap size.
3114			 * Heuristic!
3115			 * If the gap is large relative to the piece we're
3116			 * allocating, or using it gives us an invalid block
3117			 * number, then just use the end of the previous block.
3118			 */
3119			if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3120			    ISVALID(prevbno + prevdiff,
3121				    ap->prev.br_startblock))
3122				prevbno += adjust;
3123			else
3124				prevdiff += adjust;
 
 
 
 
 
 
 
3125		}
3126		/*
3127		 * No previous block or can't follow it, just default.
3128		 */
3129		else
3130			prevbno = NULLFSBLOCK;
3131		/*
3132		 * If there's a following (right) block, select a requested
3133		 * start block based on it.
3134		 */
3135		if (!isnullstartblock(ap->got.br_startblock)) {
3136			/*
3137			 * Calculate gap to start of next block.
3138			 */
3139			adjust = gotdiff = ap->got.br_startoff - ap->offset;
3140			/*
3141			 * Figure the startblock based on the next block's
3142			 * start and the gap size.
3143			 */
3144			gotbno = ap->got.br_startblock;
3145			/*
3146			 * Heuristic!
3147			 * If the gap is large relative to the piece we're
3148			 * allocating, or using it gives us an invalid block
3149			 * number, then just use the start of the next block
3150			 * offset by our length.
3151			 */
3152			if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3153			    ISVALID(gotbno - gotdiff, gotbno))
3154				gotbno -= adjust;
3155			else if (ISVALID(gotbno - ap->length, gotbno)) {
3156				gotbno -= ap->length;
3157				gotdiff += adjust - ap->length;
3158			} else
3159				gotdiff += adjust;
 
 
 
 
 
 
 
3160		}
3161		/*
3162		 * No next block, just default.
3163		 */
3164		else
3165			gotbno = NULLFSBLOCK;
3166		/*
3167		 * If both valid, pick the better one, else the only good
3168		 * one, else ap->blkno is already set (to 0 or the inode block).
3169		 */
3170		if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) {
3171			ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3172			return true;
3173		}
3174		if (prevbno != NULLFSBLOCK) {
3175			ap->blkno = prevbno;
3176			return true;
3177		}
3178		if (gotbno != NULLFSBLOCK) {
3179			ap->blkno = gotbno;
3180			return true;
3181		}
3182	}
3183#undef ISVALID
3184	return false;
3185}
3186
3187int
3188xfs_bmap_longest_free_extent(
3189	struct xfs_perag	*pag,
3190	struct xfs_trans	*tp,
3191	xfs_extlen_t		*blen)
 
 
3192{
 
 
3193	xfs_extlen_t		longest;
3194	int			error = 0;
3195
3196	if (!xfs_perag_initialised_agf(pag)) {
3197		error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_TRYLOCK,
3198				NULL);
3199		if (error)
3200			return error;
 
 
 
 
 
 
3201	}
3202
3203	longest = xfs_alloc_longest_free_extent(pag,
3204				xfs_alloc_min_freelist(pag->pag_mount, pag),
3205				xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3206	if (*blen < longest)
3207		*blen = longest;
3208
3209	return 0;
 
 
3210}
3211
3212static xfs_extlen_t
3213xfs_bmap_select_minlen(
3214	struct xfs_bmalloca	*ap,
3215	struct xfs_alloc_arg	*args,
3216	xfs_extlen_t		blen)
 
3217{
3218
3219	/*
3220	 * Since we used XFS_ALLOC_FLAG_TRYLOCK in _longest_free_extent(), it is
3221	 * possible that there is enough contiguous free space for this request.
3222	 */
3223	if (blen < ap->minlen)
3224		return ap->minlen;
3225
3226	/*
3227	 * If the best seen length is less than the request length,
3228	 * use the best as the minimum, otherwise we've got the maxlen we
3229	 * were asked for.
3230	 */
3231	if (blen < args->maxlen)
3232		return blen;
3233	return args->maxlen;
 
 
 
3234}
3235
3236static int
3237xfs_bmap_btalloc_select_lengths(
3238	struct xfs_bmalloca	*ap,
3239	struct xfs_alloc_arg	*args,
3240	xfs_extlen_t		*blen)
3241{
3242	struct xfs_mount	*mp = args->mp;
3243	struct xfs_perag	*pag;
3244	xfs_agnumber_t		agno, startag;
3245	int			error = 0;
3246
3247	if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3248		args->total = ap->minlen;
3249		args->minlen = ap->minlen;
3250		return 0;
3251	}
3252
 
3253	args->total = ap->total;
3254	startag = XFS_FSB_TO_AGNO(mp, ap->blkno);
 
3255	if (startag == NULLAGNUMBER)
3256		startag = 0;
3257
3258	*blen = 0;
3259	for_each_perag_wrap(mp, startag, agno, pag) {
3260		error = xfs_bmap_longest_free_extent(pag, args->tp, blen);
3261		if (error && error != -EAGAIN)
3262			break;
3263		error = 0;
3264		if (*blen >= args->maxlen)
 
 
3265			break;
3266	}
3267	if (pag)
3268		xfs_perag_rele(pag);
3269
3270	args->minlen = xfs_bmap_select_minlen(ap, args, *blen);
3271	return error;
3272}
3273
3274/* Update all inode and quota accounting for the allocation we just did. */
3275void
3276xfs_bmap_alloc_account(
3277	struct xfs_bmalloca	*ap)
 
3278{
3279	bool			isrt = XFS_IS_REALTIME_INODE(ap->ip) &&
3280					!(ap->flags & XFS_BMAPI_ATTRFORK);
3281	uint			fld;
 
 
 
 
3282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3283	if (ap->flags & XFS_BMAPI_COWFORK) {
3284		/*
3285		 * COW fork blocks are in-core only and thus are treated as
3286		 * in-core quota reservation (like delalloc blocks) even when
3287		 * converted to real blocks. The quota reservation is not
3288		 * accounted to disk until blocks are remapped to the data
3289		 * fork. So if these blocks were previously delalloc, we
3290		 * already have quota reservation and there's nothing to do
3291		 * yet.
3292		 */
3293		if (ap->wasdel) {
3294			xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length);
3295			return;
3296		}
3297
3298		/*
3299		 * Otherwise, we've allocated blocks in a hole. The transaction
3300		 * has acquired in-core quota reservation for this extent.
3301		 * Rather than account these as real blocks, however, we reduce
3302		 * the transaction quota reservation based on the allocation.
3303		 * This essentially transfers the transaction quota reservation
3304		 * to that of a delalloc extent.
3305		 */
3306		ap->ip->i_delayed_blks += ap->length;
3307		xfs_trans_mod_dquot_byino(ap->tp, ap->ip, isrt ?
3308				XFS_TRANS_DQ_RES_RTBLKS : XFS_TRANS_DQ_RES_BLKS,
3309				-(long)ap->length);
3310		return;
3311	}
3312
3313	/* data/attr fork only */
3314	ap->ip->i_nblocks += ap->length;
3315	xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3316	if (ap->wasdel) {
3317		ap->ip->i_delayed_blks -= ap->length;
3318		xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length);
3319		fld = isrt ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_DELBCOUNT;
3320	} else {
3321		fld = isrt ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
3322	}
3323
3324	xfs_trans_mod_dquot_byino(ap->tp, ap->ip, fld, ap->length);
 
3325}
3326
3327static int
3328xfs_bmap_compute_alignments(
3329	struct xfs_bmalloca	*ap,
3330	struct xfs_alloc_arg	*args)
3331{
3332	struct xfs_mount	*mp = args->mp;
3333	xfs_extlen_t		align = 0; /* minimum allocation alignment */
3334	int			stripe_align = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3335
3336	/* stripe alignment for allocation is determined by mount parameters */
3337	if (mp->m_swidth && xfs_has_swalloc(mp))
 
3338		stripe_align = mp->m_swidth;
3339	else if (mp->m_dalign)
3340		stripe_align = mp->m_dalign;
3341
3342	if (ap->flags & XFS_BMAPI_COWFORK)
3343		align = xfs_get_cowextsz_hint(ap->ip);
3344	else if (ap->datatype & XFS_ALLOC_USERDATA)
3345		align = xfs_get_extsz_hint(ap->ip);
3346	if (align) {
3347		if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
3348					ap->eof, 0, ap->conv, &ap->offset,
3349					&ap->length))
3350			ASSERT(0);
3351		ASSERT(ap->length);
3352	}
3353
3354	/* apply extent size hints if obtained earlier */
3355	if (align) {
3356		args->prod = align;
3357		div_u64_rem(ap->offset, args->prod, &args->mod);
3358		if (args->mod)
3359			args->mod = args->prod - args->mod;
3360	} else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3361		args->prod = 1;
3362		args->mod = 0;
3363	} else {
3364		args->prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3365		div_u64_rem(ap->offset, args->prod, &args->mod);
3366		if (args->mod)
3367			args->mod = args->prod - args->mod;
3368	}
3369
3370	return stripe_align;
3371}
3372
3373static void
3374xfs_bmap_process_allocated_extent(
3375	struct xfs_bmalloca	*ap,
3376	struct xfs_alloc_arg	*args,
3377	xfs_fileoff_t		orig_offset,
3378	xfs_extlen_t		orig_length)
3379{
3380	ap->blkno = args->fsbno;
3381	ap->length = args->len;
3382	/*
3383	 * If the extent size hint is active, we tried to round the
3384	 * caller's allocation request offset down to extsz and the
3385	 * length up to another extsz boundary.  If we found a free
3386	 * extent we mapped it in starting at this new offset.  If the
3387	 * newly mapped space isn't long enough to cover any of the
3388	 * range of offsets that was originally requested, move the
3389	 * mapping up so that we can fill as much of the caller's
3390	 * original request as possible.  Free space is apparently
3391	 * very fragmented so we're unlikely to be able to satisfy the
3392	 * hints anyway.
3393	 */
3394	if (ap->length <= orig_length)
3395		ap->offset = orig_offset;
3396	else if (ap->offset + ap->length < orig_offset + orig_length)
3397		ap->offset = orig_offset + orig_length - ap->length;
3398	xfs_bmap_alloc_account(ap);
3399}
3400
3401#ifdef DEBUG
3402static int
3403xfs_bmap_exact_minlen_extent_alloc(
3404	struct xfs_bmalloca	*ap)
3405{
3406	struct xfs_mount	*mp = ap->ip->i_mount;
3407	struct xfs_alloc_arg	args = { .tp = ap->tp, .mp = mp };
3408	xfs_fileoff_t		orig_offset;
3409	xfs_extlen_t		orig_length;
3410	int			error;
3411
3412	ASSERT(ap->length);
3413
3414	if (ap->minlen != 1) {
3415		ap->blkno = NULLFSBLOCK;
3416		ap->length = 0;
3417		return 0;
3418	}
3419
3420	orig_offset = ap->offset;
3421	orig_length = ap->length;
3422
3423	args.alloc_minlen_only = 1;
 
 
 
 
 
 
 
 
 
 
 
 
 
3424
3425	xfs_bmap_compute_alignments(ap, &args);
3426
3427	/*
3428	 * Unlike the longest extent available in an AG, we don't track
3429	 * the length of an AG's shortest extent.
3430	 * XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and
3431	 * hence we can afford to start traversing from the 0th AG since
3432	 * we need not be concerned about a drop in performance in
3433	 * "debug only" code paths.
3434	 */
3435	ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
3436
 
 
 
 
 
 
 
 
 
 
3437	args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
3438	args.minlen = args.maxlen = ap->minlen;
3439	args.total = ap->total;
3440
3441	args.alignment = 1;
3442	args.minalignslop = 0;
3443
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3444	args.minleft = ap->minleft;
3445	args.wasdel = ap->wasdel;
3446	args.resv = XFS_AG_RESV_NONE;
3447	args.datatype = ap->datatype;
3448
3449	error = xfs_alloc_vextent_first_ag(&args, ap->blkno);
3450	if (error)
3451		return error;
3452
3453	if (args.fsbno != NULLFSBLOCK) {
3454		xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
3455			orig_length);
3456	} else {
3457		ap->blkno = NULLFSBLOCK;
3458		ap->length = 0;
3459	}
3460
3461	return 0;
3462}
3463#else
3464
3465#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED)
3466
3467#endif
3468
3469/*
3470 * If we are not low on available data blocks and we are allocating at
3471 * EOF, optimise allocation for contiguous file extension and/or stripe
3472 * alignment of the new extent.
3473 *
3474 * NOTE: ap->aeof is only set if the allocation length is >= the
3475 * stripe unit and the allocation offset is at the end of file.
3476 */
3477static int
3478xfs_bmap_btalloc_at_eof(
3479	struct xfs_bmalloca	*ap,
3480	struct xfs_alloc_arg	*args,
3481	xfs_extlen_t		blen,
3482	int			stripe_align,
3483	bool			ag_only)
3484{
3485	struct xfs_mount	*mp = args->mp;
3486	struct xfs_perag	*caller_pag = args->pag;
3487	int			error;
3488
3489	/*
3490	 * If there are already extents in the file, try an exact EOF block
3491	 * allocation to extend the file as a contiguous extent. If that fails,
3492	 * or it's the first allocation in a file, just try for a stripe aligned
3493	 * allocation.
3494	 */
3495	if (ap->offset) {
3496		xfs_extlen_t	nextminlen = 0;
3497
3498		/*
3499		 * Compute the minlen+alignment for the next case.  Set slop so
3500		 * that the value of minlen+alignment+slop doesn't go up between
3501		 * the calls.
3502		 */
3503		args->alignment = 1;
3504		if (blen > stripe_align && blen <= args->maxlen)
3505			nextminlen = blen - stripe_align;
3506		else
3507			nextminlen = args->minlen;
3508		if (nextminlen + stripe_align > args->minlen + 1)
3509			args->minalignslop = nextminlen + stripe_align -
3510					args->minlen - 1;
3511		else
3512			args->minalignslop = 0;
3513
3514		if (!caller_pag)
3515			args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
3516		error = xfs_alloc_vextent_exact_bno(args, ap->blkno);
3517		if (!caller_pag) {
3518			xfs_perag_put(args->pag);
3519			args->pag = NULL;
3520		}
3521		if (error)
3522			return error;
3523
3524		if (args->fsbno != NULLFSBLOCK)
3525			return 0;
3526		/*
3527		 * Exact allocation failed. Reset to try an aligned allocation
3528		 * according to the original allocation specification.
3529		 */
3530		args->alignment = stripe_align;
3531		args->minlen = nextminlen;
3532		args->minalignslop = 0;
3533	} else {
 
 
 
 
 
 
3534		/*
3535		 * Adjust minlen to try and preserve alignment if we
3536		 * can't guarantee an aligned maxlen extent.
3537		 */
3538		args->alignment = stripe_align;
3539		if (blen > args->alignment &&
3540		    blen <= args->maxlen + args->alignment)
3541			args->minlen = blen - args->alignment;
3542		args->minalignslop = 0;
3543	}
3544
3545	if (ag_only) {
3546		error = xfs_alloc_vextent_near_bno(args, ap->blkno);
3547	} else {
3548		args->pag = NULL;
3549		error = xfs_alloc_vextent_start_ag(args, ap->blkno);
3550		ASSERT(args->pag == NULL);
3551		args->pag = caller_pag;
3552	}
3553	if (error)
3554		return error;
3555
3556	if (args->fsbno != NULLFSBLOCK)
3557		return 0;
3558
3559	/*
3560	 * Allocation failed, so turn return the allocation args to their
3561	 * original non-aligned state so the caller can proceed on allocation
3562	 * failure as if this function was never called.
3563	 */
3564	args->alignment = 1;
3565	return 0;
3566}
3567
3568/*
3569 * We have failed multiple allocation attempts so now are in a low space
3570 * allocation situation. Try a locality first full filesystem minimum length
3571 * allocation whilst still maintaining necessary total block reservation
3572 * requirements.
3573 *
3574 * If that fails, we are now critically low on space, so perform a last resort
3575 * allocation attempt: no reserve, no locality, blocking, minimum length, full
3576 * filesystem free space scan. We also indicate to future allocations in this
3577 * transaction that we are critically low on space so they don't waste time on
3578 * allocation modes that are unlikely to succeed.
3579 */
3580int
3581xfs_bmap_btalloc_low_space(
3582	struct xfs_bmalloca	*ap,
3583	struct xfs_alloc_arg	*args)
3584{
3585	int			error;
3586
3587	if (args->minlen > ap->minlen) {
3588		args->minlen = ap->minlen;
3589		error = xfs_alloc_vextent_start_ag(args, ap->blkno);
3590		if (error || args->fsbno != NULLFSBLOCK)
3591			return error;
3592	}
3593
3594	/* Last ditch attempt before failure is declared. */
3595	args->total = ap->minlen;
3596	error = xfs_alloc_vextent_first_ag(args, 0);
3597	if (error)
3598		return error;
3599	ap->tp->t_flags |= XFS_TRANS_LOWMODE;
3600	return 0;
3601}
3602
3603static int
3604xfs_bmap_btalloc_filestreams(
3605	struct xfs_bmalloca	*ap,
3606	struct xfs_alloc_arg	*args,
3607	int			stripe_align)
3608{
3609	xfs_extlen_t		blen = 0;
3610	int			error = 0;
3611
3612
3613	error = xfs_filestream_select_ag(ap, args, &blen);
3614	if (error)
3615		return error;
3616	ASSERT(args->pag);
3617
3618	/*
3619	 * If we are in low space mode, then optimal allocation will fail so
3620	 * prepare for minimal allocation and jump to the low space algorithm
3621	 * immediately.
3622	 */
3623	if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3624		args->minlen = ap->minlen;
3625		ASSERT(args->fsbno == NULLFSBLOCK);
3626		goto out_low_space;
3627	}
3628
3629	args->minlen = xfs_bmap_select_minlen(ap, args, blen);
3630	if (ap->aeof)
3631		error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
3632				true);
3633
3634	if (!error && args->fsbno == NULLFSBLOCK)
3635		error = xfs_alloc_vextent_near_bno(args, ap->blkno);
3636
3637out_low_space:
3638	/*
3639	 * We are now done with the perag reference for the filestreams
3640	 * association provided by xfs_filestream_select_ag(). Release it now as
3641	 * we've either succeeded, had a fatal error or we are out of space and
3642	 * need to do a full filesystem scan for free space which will take it's
3643	 * own references.
3644	 */
3645	xfs_perag_rele(args->pag);
3646	args->pag = NULL;
3647	if (error || args->fsbno != NULLFSBLOCK)
3648		return error;
3649
3650	return xfs_bmap_btalloc_low_space(ap, args);
3651}
3652
3653static int
3654xfs_bmap_btalloc_best_length(
3655	struct xfs_bmalloca	*ap,
3656	struct xfs_alloc_arg	*args,
3657	int			stripe_align)
3658{
3659	xfs_extlen_t		blen = 0;
3660	int			error;
3661
3662	ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino);
3663	xfs_bmap_adjacent(ap);
3664
3665	/*
3666	 * Search for an allocation group with a single extent large enough for
3667	 * the request.  If one isn't found, then adjust the minimum allocation
3668	 * size to the largest space found.
3669	 */
3670	error = xfs_bmap_btalloc_select_lengths(ap, args, &blen);
3671	if (error)
3672		return error;
3673
3674	/*
3675	 * Don't attempt optimal EOF allocation if previous allocations barely
3676	 * succeeded due to being near ENOSPC. It is highly unlikely we'll get
3677	 * optimal or even aligned allocations in this case, so don't waste time
3678	 * trying.
3679	 */
3680	if (ap->aeof && !(ap->tp->t_flags & XFS_TRANS_LOWMODE)) {
3681		error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
3682				false);
3683		if (error || args->fsbno != NULLFSBLOCK)
3684			return error;
 
3685	}
3686
3687	error = xfs_alloc_vextent_start_ag(args, ap->blkno);
3688	if (error || args->fsbno != NULLFSBLOCK)
3689		return error;
3690
3691	return xfs_bmap_btalloc_low_space(ap, args);
3692}
3693
3694static int
3695xfs_bmap_btalloc(
3696	struct xfs_bmalloca	*ap)
3697{
3698	struct xfs_mount	*mp = ap->ip->i_mount;
3699	struct xfs_alloc_arg	args = {
3700		.tp		= ap->tp,
3701		.mp		= mp,
3702		.fsbno		= NULLFSBLOCK,
3703		.oinfo		= XFS_RMAP_OINFO_SKIP_UPDATE,
3704		.minleft	= ap->minleft,
3705		.wasdel		= ap->wasdel,
3706		.resv		= XFS_AG_RESV_NONE,
3707		.datatype	= ap->datatype,
3708		.alignment	= 1,
3709		.minalignslop	= 0,
3710	};
3711	xfs_fileoff_t		orig_offset;
3712	xfs_extlen_t		orig_length;
3713	int			error;
3714	int			stripe_align;
3715
3716	ASSERT(ap->length);
3717	orig_offset = ap->offset;
3718	orig_length = ap->length;
3719
3720	stripe_align = xfs_bmap_compute_alignments(ap, &args);
3721
3722	/* Trim the allocation back to the maximum an AG can fit. */
3723	args.maxlen = min(ap->length, mp->m_ag_max_usable);
3724
3725	if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3726	    xfs_inode_is_filestream(ap->ip))
3727		error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align);
3728	else
3729		error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align);
3730	if (error)
3731		return error;
3732
3733	if (args.fsbno != NULLFSBLOCK) {
3734		xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
3735			orig_length);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3736	} else {
3737		ap->blkno = NULLFSBLOCK;
3738		ap->length = 0;
3739	}
3740	return 0;
3741}
3742
3743/* Trim extent to fit a logical block range. */
3744void
3745xfs_trim_extent(
3746	struct xfs_bmbt_irec	*irec,
3747	xfs_fileoff_t		bno,
3748	xfs_filblks_t		len)
3749{
3750	xfs_fileoff_t		distance;
3751	xfs_fileoff_t		end = bno + len;
3752
3753	if (irec->br_startoff + irec->br_blockcount <= bno ||
3754	    irec->br_startoff >= end) {
3755		irec->br_blockcount = 0;
3756		return;
3757	}
3758
3759	if (irec->br_startoff < bno) {
3760		distance = bno - irec->br_startoff;
3761		if (isnullstartblock(irec->br_startblock))
3762			irec->br_startblock = DELAYSTARTBLOCK;
3763		if (irec->br_startblock != DELAYSTARTBLOCK &&
3764		    irec->br_startblock != HOLESTARTBLOCK)
3765			irec->br_startblock += distance;
3766		irec->br_startoff += distance;
3767		irec->br_blockcount -= distance;
3768	}
3769
3770	if (end < irec->br_startoff + irec->br_blockcount) {
3771		distance = irec->br_startoff + irec->br_blockcount - end;
3772		irec->br_blockcount -= distance;
3773	}
3774}
3775
3776/*
3777 * Trim the returned map to the required bounds
3778 */
3779STATIC void
3780xfs_bmapi_trim_map(
3781	struct xfs_bmbt_irec	*mval,
3782	struct xfs_bmbt_irec	*got,
3783	xfs_fileoff_t		*bno,
3784	xfs_filblks_t		len,
3785	xfs_fileoff_t		obno,
3786	xfs_fileoff_t		end,
3787	int			n,
3788	uint32_t		flags)
3789{
3790	if ((flags & XFS_BMAPI_ENTIRE) ||
3791	    got->br_startoff + got->br_blockcount <= obno) {
3792		*mval = *got;
3793		if (isnullstartblock(got->br_startblock))
3794			mval->br_startblock = DELAYSTARTBLOCK;
3795		return;
3796	}
3797
3798	if (obno > *bno)
3799		*bno = obno;
3800	ASSERT((*bno >= obno) || (n == 0));
3801	ASSERT(*bno < end);
3802	mval->br_startoff = *bno;
3803	if (isnullstartblock(got->br_startblock))
3804		mval->br_startblock = DELAYSTARTBLOCK;
3805	else
3806		mval->br_startblock = got->br_startblock +
3807					(*bno - got->br_startoff);
3808	/*
3809	 * Return the minimum of what we got and what we asked for for
3810	 * the length.  We can use the len variable here because it is
3811	 * modified below and we could have been there before coming
3812	 * here if the first part of the allocation didn't overlap what
3813	 * was asked for.
3814	 */
3815	mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3816			got->br_blockcount - (*bno - got->br_startoff));
3817	mval->br_state = got->br_state;
3818	ASSERT(mval->br_blockcount <= len);
3819	return;
3820}
3821
3822/*
3823 * Update and validate the extent map to return
3824 */
3825STATIC void
3826xfs_bmapi_update_map(
3827	struct xfs_bmbt_irec	**map,
3828	xfs_fileoff_t		*bno,
3829	xfs_filblks_t		*len,
3830	xfs_fileoff_t		obno,
3831	xfs_fileoff_t		end,
3832	int			*n,
3833	uint32_t		flags)
3834{
3835	xfs_bmbt_irec_t	*mval = *map;
3836
3837	ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3838	       ((mval->br_startoff + mval->br_blockcount) <= end));
3839	ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3840	       (mval->br_startoff < obno));
3841
3842	*bno = mval->br_startoff + mval->br_blockcount;
3843	*len = end - *bno;
3844	if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3845		/* update previous map with new information */
3846		ASSERT(mval->br_startblock == mval[-1].br_startblock);
3847		ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3848		ASSERT(mval->br_state == mval[-1].br_state);
3849		mval[-1].br_blockcount = mval->br_blockcount;
3850		mval[-1].br_state = mval->br_state;
3851	} else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3852		   mval[-1].br_startblock != DELAYSTARTBLOCK &&
3853		   mval[-1].br_startblock != HOLESTARTBLOCK &&
3854		   mval->br_startblock == mval[-1].br_startblock +
3855					  mval[-1].br_blockcount &&
3856		   mval[-1].br_state == mval->br_state) {
3857		ASSERT(mval->br_startoff ==
3858		       mval[-1].br_startoff + mval[-1].br_blockcount);
3859		mval[-1].br_blockcount += mval->br_blockcount;
3860	} else if (*n > 0 &&
3861		   mval->br_startblock == DELAYSTARTBLOCK &&
3862		   mval[-1].br_startblock == DELAYSTARTBLOCK &&
3863		   mval->br_startoff ==
3864		   mval[-1].br_startoff + mval[-1].br_blockcount) {
3865		mval[-1].br_blockcount += mval->br_blockcount;
3866		mval[-1].br_state = mval->br_state;
3867	} else if (!((*n == 0) &&
3868		     ((mval->br_startoff + mval->br_blockcount) <=
3869		      obno))) {
3870		mval++;
3871		(*n)++;
3872	}
3873	*map = mval;
3874}
3875
3876/*
3877 * Map file blocks to filesystem blocks without allocation.
3878 */
3879int
3880xfs_bmapi_read(
3881	struct xfs_inode	*ip,
3882	xfs_fileoff_t		bno,
3883	xfs_filblks_t		len,
3884	struct xfs_bmbt_irec	*mval,
3885	int			*nmap,
3886	uint32_t		flags)
3887{
3888	struct xfs_mount	*mp = ip->i_mount;
3889	int			whichfork = xfs_bmapi_whichfork(flags);
3890	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
3891	struct xfs_bmbt_irec	got;
3892	xfs_fileoff_t		obno;
3893	xfs_fileoff_t		end;
3894	struct xfs_iext_cursor	icur;
3895	int			error;
3896	bool			eof = false;
3897	int			n = 0;
3898
3899	ASSERT(*nmap >= 1);
3900	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
3901	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
3902
3903	if (WARN_ON_ONCE(!ifp))
3904		return -EFSCORRUPTED;
3905
3906	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
3907	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT))
3908		return -EFSCORRUPTED;
3909
3910	if (xfs_is_shutdown(mp))
3911		return -EIO;
3912
3913	XFS_STATS_INC(mp, xs_blk_mapr);
3914
3915	error = xfs_iread_extents(NULL, ip, whichfork);
3916	if (error)
3917		return error;
 
 
3918
3919	if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
3920		eof = true;
3921	end = bno + len;
3922	obno = bno;
3923
3924	while (bno < end && n < *nmap) {
3925		/* Reading past eof, act as though there's a hole up to end. */
3926		if (eof)
3927			got.br_startoff = end;
3928		if (got.br_startoff > bno) {
3929			/* Reading in a hole.  */
3930			mval->br_startoff = bno;
3931			mval->br_startblock = HOLESTARTBLOCK;
3932			mval->br_blockcount =
3933				XFS_FILBLKS_MIN(len, got.br_startoff - bno);
3934			mval->br_state = XFS_EXT_NORM;
3935			bno += mval->br_blockcount;
3936			len -= mval->br_blockcount;
3937			mval++;
3938			n++;
3939			continue;
3940		}
3941
3942		/* set up the extent map to return. */
3943		xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
3944		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
3945
3946		/* If we're done, stop now. */
3947		if (bno >= end || n >= *nmap)
3948			break;
3949
3950		/* Else go on to the next record. */
3951		if (!xfs_iext_next_extent(ifp, &icur, &got))
3952			eof = true;
3953	}
3954	*nmap = n;
3955	return 0;
3956}
3957
3958/*
3959 * Add a delayed allocation extent to an inode. Blocks are reserved from the
3960 * global pool and the extent inserted into the inode in-core extent tree.
3961 *
3962 * On entry, got refers to the first extent beyond the offset of the extent to
3963 * allocate or eof is specified if no such extent exists. On return, got refers
3964 * to the extent record that was inserted to the inode fork.
3965 *
3966 * Note that the allocated extent may have been merged with contiguous extents
3967 * during insertion into the inode fork. Thus, got does not reflect the current
3968 * state of the inode fork on return. If necessary, the caller can use lastx to
3969 * look up the updated record in the inode fork.
3970 */
3971int
3972xfs_bmapi_reserve_delalloc(
3973	struct xfs_inode	*ip,
3974	int			whichfork,
3975	xfs_fileoff_t		off,
3976	xfs_filblks_t		len,
3977	xfs_filblks_t		prealloc,
3978	struct xfs_bmbt_irec	*got,
3979	struct xfs_iext_cursor	*icur,
3980	int			eof)
3981{
3982	struct xfs_mount	*mp = ip->i_mount;
3983	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
3984	xfs_extlen_t		alen;
3985	xfs_extlen_t		indlen;
3986	int			error;
3987	xfs_fileoff_t		aoff = off;
3988
3989	/*
3990	 * Cap the alloc length. Keep track of prealloc so we know whether to
3991	 * tag the inode before we return.
3992	 */
3993	alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN);
3994	if (!eof)
3995		alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
3996	if (prealloc && alen >= len)
3997		prealloc = alen - len;
3998
3999	/* Figure out the extent size, adjust alen */
4000	if (whichfork == XFS_COW_FORK) {
4001		struct xfs_bmbt_irec	prev;
4002		xfs_extlen_t		extsz = xfs_get_cowextsz_hint(ip);
4003
4004		if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
4005			prev.br_startoff = NULLFILEOFF;
4006
4007		error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
4008					       1, 0, &aoff, &alen);
4009		ASSERT(!error);
4010	}
4011
4012	/*
4013	 * Make a transaction-less quota reservation for delayed allocation
4014	 * blocks.  This number gets adjusted later.  We return if we haven't
4015	 * allocated blocks already inside this loop.
4016	 */
4017	error = xfs_quota_reserve_blkres(ip, alen);
 
4018	if (error)
4019		return error;
4020
4021	/*
4022	 * Split changing sb for alen and indlen since they could be coming
4023	 * from different places.
4024	 */
4025	indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4026	ASSERT(indlen > 0);
4027
4028	error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4029	if (error)
4030		goto out_unreserve_quota;
4031
4032	error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4033	if (error)
4034		goto out_unreserve_blocks;
4035
4036
4037	ip->i_delayed_blks += alen;
4038	xfs_mod_delalloc(ip->i_mount, alen + indlen);
4039
4040	got->br_startoff = aoff;
4041	got->br_startblock = nullstartblock(indlen);
4042	got->br_blockcount = alen;
4043	got->br_state = XFS_EXT_NORM;
4044
4045	xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
4046
4047	/*
4048	 * Tag the inode if blocks were preallocated. Note that COW fork
4049	 * preallocation can occur at the start or end of the extent, even when
4050	 * prealloc == 0, so we must also check the aligned offset and length.
4051	 */
4052	if (whichfork == XFS_DATA_FORK && prealloc)
4053		xfs_inode_set_eofblocks_tag(ip);
4054	if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
4055		xfs_inode_set_cowblocks_tag(ip);
4056
4057	return 0;
4058
4059out_unreserve_blocks:
4060	xfs_mod_fdblocks(mp, alen, false);
4061out_unreserve_quota:
4062	if (XFS_IS_QUOTA_ON(mp))
4063		xfs_quota_unreserve_blkres(ip, alen);
 
4064	return error;
4065}
4066
4067static int
4068xfs_bmap_alloc_userdata(
4069	struct xfs_bmalloca	*bma)
4070{
4071	struct xfs_mount	*mp = bma->ip->i_mount;
4072	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4073	int			error;
4074
4075	/*
4076	 * Set the data type being allocated. For the data fork, the first data
4077	 * in the file is treated differently to all other allocations. For the
4078	 * attribute fork, we only need to ensure the allocated range is not on
4079	 * the busy list.
4080	 */
4081	bma->datatype = XFS_ALLOC_NOBUSY;
4082	if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
4083		bma->datatype |= XFS_ALLOC_USERDATA;
4084		if (bma->offset == 0)
4085			bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4086
4087		if (mp->m_dalign && bma->length >= mp->m_dalign) {
4088			error = xfs_bmap_isaeof(bma, whichfork);
4089			if (error)
4090				return error;
4091		}
4092
4093		if (XFS_IS_REALTIME_INODE(bma->ip))
4094			return xfs_bmap_rtalloc(bma);
4095	}
4096
4097	if (unlikely(XFS_TEST_ERROR(false, mp,
4098			XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
4099		return xfs_bmap_exact_minlen_extent_alloc(bma);
4100
4101	return xfs_bmap_btalloc(bma);
4102}
4103
4104static int
4105xfs_bmapi_allocate(
4106	struct xfs_bmalloca	*bma)
4107{
4108	struct xfs_mount	*mp = bma->ip->i_mount;
4109	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4110	struct xfs_ifork	*ifp = xfs_ifork_ptr(bma->ip, whichfork);
4111	int			tmp_logflags = 0;
4112	int			error;
4113
4114	ASSERT(bma->length > 0);
4115
4116	/*
4117	 * For the wasdelay case, we could also just allocate the stuff asked
4118	 * for in this bmap call but that wouldn't be as good.
4119	 */
4120	if (bma->wasdel) {
4121		bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4122		bma->offset = bma->got.br_startoff;
4123		if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
4124			bma->prev.br_startoff = NULLFILEOFF;
4125	} else {
4126		bma->length = XFS_FILBLKS_MIN(bma->length, XFS_MAX_BMBT_EXTLEN);
4127		if (!bma->eof)
4128			bma->length = XFS_FILBLKS_MIN(bma->length,
4129					bma->got.br_startoff - bma->offset);
4130	}
4131
4132	if (bma->flags & XFS_BMAPI_CONTIG)
4133		bma->minlen = bma->length;
4134	else
4135		bma->minlen = 1;
4136
4137	if (bma->flags & XFS_BMAPI_METADATA) {
4138		if (unlikely(XFS_TEST_ERROR(false, mp,
4139				XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
4140			error = xfs_bmap_exact_minlen_extent_alloc(bma);
4141		else
4142			error = xfs_bmap_btalloc(bma);
4143	} else {
4144		error = xfs_bmap_alloc_userdata(bma);
4145	}
4146	if (error || bma->blkno == NULLFSBLOCK)
4147		return error;
4148
4149	if (bma->flags & XFS_BMAPI_ZERO) {
4150		error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
4151		if (error)
4152			return error;
4153	}
4154
4155	if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur)
4156		bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4157	/*
4158	 * Bump the number of extents we've allocated
4159	 * in this call.
4160	 */
4161	bma->nallocs++;
4162
4163	if (bma->cur)
4164		bma->cur->bc_ino.flags =
4165			bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
4166
4167	bma->got.br_startoff = bma->offset;
4168	bma->got.br_startblock = bma->blkno;
4169	bma->got.br_blockcount = bma->length;
4170	bma->got.br_state = XFS_EXT_NORM;
4171
4172	if (bma->flags & XFS_BMAPI_PREALLOC)
4173		bma->got.br_state = XFS_EXT_UNWRITTEN;
4174
4175	if (bma->wasdel)
4176		error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4177	else
4178		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4179				whichfork, &bma->icur, &bma->cur, &bma->got,
4180				&bma->logflags, bma->flags);
4181
4182	bma->logflags |= tmp_logflags;
4183	if (error)
4184		return error;
4185
4186	/*
4187	 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4188	 * or xfs_bmap_add_extent_hole_real might have merged it into one of
4189	 * the neighbouring ones.
4190	 */
4191	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4192
4193	ASSERT(bma->got.br_startoff <= bma->offset);
4194	ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4195	       bma->offset + bma->length);
4196	ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4197	       bma->got.br_state == XFS_EXT_UNWRITTEN);
4198	return 0;
4199}
4200
4201STATIC int
4202xfs_bmapi_convert_unwritten(
4203	struct xfs_bmalloca	*bma,
4204	struct xfs_bmbt_irec	*mval,
4205	xfs_filblks_t		len,
4206	uint32_t		flags)
4207{
4208	int			whichfork = xfs_bmapi_whichfork(flags);
4209	struct xfs_ifork	*ifp = xfs_ifork_ptr(bma->ip, whichfork);
4210	int			tmp_logflags = 0;
4211	int			error;
4212
4213	/* check if we need to do unwritten->real conversion */
4214	if (mval->br_state == XFS_EXT_UNWRITTEN &&
4215	    (flags & XFS_BMAPI_PREALLOC))
4216		return 0;
4217
4218	/* check if we need to do real->unwritten conversion */
4219	if (mval->br_state == XFS_EXT_NORM &&
4220	    (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4221			(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4222		return 0;
4223
4224	/*
4225	 * Modify (by adding) the state flag, if writing.
4226	 */
4227	ASSERT(mval->br_blockcount <= len);
4228	if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur) {
4229		bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4230					bma->ip, whichfork);
4231	}
4232	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4233				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4234
4235	/*
4236	 * Before insertion into the bmbt, zero the range being converted
4237	 * if required.
4238	 */
4239	if (flags & XFS_BMAPI_ZERO) {
4240		error = xfs_zero_extent(bma->ip, mval->br_startblock,
4241					mval->br_blockcount);
4242		if (error)
4243			return error;
4244	}
4245
4246	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4247			&bma->icur, &bma->cur, mval, &tmp_logflags);
4248	/*
4249	 * Log the inode core unconditionally in the unwritten extent conversion
4250	 * path because the conversion might not have done so (e.g., if the
4251	 * extent count hasn't changed). We need to make sure the inode is dirty
4252	 * in the transaction for the sake of fsync(), even if nothing has
4253	 * changed, because fsync() will not force the log for this transaction
4254	 * unless it sees the inode pinned.
4255	 *
4256	 * Note: If we're only converting cow fork extents, there aren't
4257	 * any on-disk updates to make, so we don't need to log anything.
4258	 */
4259	if (whichfork != XFS_COW_FORK)
4260		bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4261	if (error)
4262		return error;
4263
4264	/*
4265	 * Update our extent pointer, given that
4266	 * xfs_bmap_add_extent_unwritten_real might have merged it into one
4267	 * of the neighbouring ones.
4268	 */
4269	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4270
4271	/*
4272	 * We may have combined previously unwritten space with written space,
4273	 * so generate another request.
4274	 */
4275	if (mval->br_blockcount < len)
4276		return -EAGAIN;
4277	return 0;
4278}
4279
4280xfs_extlen_t
4281xfs_bmapi_minleft(
4282	struct xfs_trans	*tp,
4283	struct xfs_inode	*ip,
4284	int			fork)
4285{
4286	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, fork);
4287
4288	if (tp && tp->t_highest_agno != NULLAGNUMBER)
4289		return 0;
4290	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
4291		return 1;
4292	return be16_to_cpu(ifp->if_broot->bb_level) + 1;
4293}
4294
4295/*
4296 * Log whatever the flags say, even if error.  Otherwise we might miss detecting
4297 * a case where the data is changed, there's an error, and it's not logged so we
4298 * don't shutdown when we should.  Don't bother logging extents/btree changes if
4299 * we converted to the other format.
4300 */
4301static void
4302xfs_bmapi_finish(
4303	struct xfs_bmalloca	*bma,
4304	int			whichfork,
4305	int			error)
4306{
4307	struct xfs_ifork	*ifp = xfs_ifork_ptr(bma->ip, whichfork);
4308
4309	if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
4310	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
4311		bma->logflags &= ~xfs_ilog_fext(whichfork);
4312	else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
4313		 ifp->if_format != XFS_DINODE_FMT_BTREE)
4314		bma->logflags &= ~xfs_ilog_fbroot(whichfork);
4315
4316	if (bma->logflags)
4317		xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
4318	if (bma->cur)
4319		xfs_btree_del_cursor(bma->cur, error);
4320}
4321
4322/*
4323 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4324 * extent state if necessary.  Details behaviour is controlled by the flags
4325 * parameter.  Only allocates blocks from a single allocation group, to avoid
4326 * locking problems.
4327 */
4328int
4329xfs_bmapi_write(
4330	struct xfs_trans	*tp,		/* transaction pointer */
4331	struct xfs_inode	*ip,		/* incore inode */
4332	xfs_fileoff_t		bno,		/* starting file offs. mapped */
4333	xfs_filblks_t		len,		/* length to map in file */
4334	uint32_t		flags,		/* XFS_BMAPI_... */
4335	xfs_extlen_t		total,		/* total blocks needed */
4336	struct xfs_bmbt_irec	*mval,		/* output: map values */
4337	int			*nmap)		/* i/o: mval size/count */
4338{
4339	struct xfs_bmalloca	bma = {
4340		.tp		= tp,
4341		.ip		= ip,
4342		.total		= total,
4343	};
4344	struct xfs_mount	*mp = ip->i_mount;
4345	int			whichfork = xfs_bmapi_whichfork(flags);
4346	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
4347	xfs_fileoff_t		end;		/* end of mapped file region */
4348	bool			eof = false;	/* after the end of extents */
4349	int			error;		/* error return */
4350	int			n;		/* current extent index */
4351	xfs_fileoff_t		obno;		/* old block number (offset) */
4352
4353#ifdef DEBUG
4354	xfs_fileoff_t		orig_bno;	/* original block number value */
4355	int			orig_flags;	/* original flags arg value */
4356	xfs_filblks_t		orig_len;	/* original value of len arg */
4357	struct xfs_bmbt_irec	*orig_mval;	/* original value of mval */
4358	int			orig_nmap;	/* original value of *nmap */
4359
4360	orig_bno = bno;
4361	orig_len = len;
4362	orig_flags = flags;
4363	orig_mval = mval;
4364	orig_nmap = *nmap;
4365#endif
4366
4367	ASSERT(*nmap >= 1);
4368	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4369	ASSERT(tp != NULL);
4370	ASSERT(len > 0);
4371	ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
4372	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4373	ASSERT(!(flags & XFS_BMAPI_REMAP));
4374
4375	/* zeroing is for currently only for data extents, not metadata */
4376	ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4377			(XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4378	/*
4379	 * we can allocate unwritten extents or pre-zero allocated blocks,
4380	 * but it makes no sense to do both at once. This would result in
4381	 * zeroing the unwritten extent twice, but it still being an
4382	 * unwritten extent....
4383	 */
4384	ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4385			(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4386
4387	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4388	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4389		return -EFSCORRUPTED;
4390	}
4391
4392	if (xfs_is_shutdown(mp))
4393		return -EIO;
4394
4395	XFS_STATS_INC(mp, xs_blk_mapw);
4396
4397	error = xfs_iread_extents(tp, ip, whichfork);
4398	if (error)
4399		goto error0;
 
 
4400
4401	if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4402		eof = true;
4403	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4404		bma.prev.br_startoff = NULLFILEOFF;
4405	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4406
4407	n = 0;
4408	end = bno + len;
4409	obno = bno;
4410	while (bno < end && n < *nmap) {
4411		bool			need_alloc = false, wasdelay = false;
4412
4413		/* in hole or beyond EOF? */
4414		if (eof || bma.got.br_startoff > bno) {
4415			/*
4416			 * CoW fork conversions should /never/ hit EOF or
4417			 * holes.  There should always be something for us
4418			 * to work on.
4419			 */
4420			ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
4421			         (flags & XFS_BMAPI_COWFORK)));
4422
4423			need_alloc = true;
4424		} else if (isnullstartblock(bma.got.br_startblock)) {
4425			wasdelay = true;
4426		}
4427
4428		/*
4429		 * First, deal with the hole before the allocated space
4430		 * that we found, if any.
4431		 */
4432		if (need_alloc || wasdelay) {
4433			bma.eof = eof;
4434			bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4435			bma.wasdel = wasdelay;
4436			bma.offset = bno;
4437			bma.flags = flags;
4438
4439			/*
4440			 * There's a 32/64 bit type mismatch between the
4441			 * allocation length request (which can be 64 bits in
4442			 * length) and the bma length request, which is
4443			 * xfs_extlen_t and therefore 32 bits. Hence we have to
4444			 * check for 32-bit overflows and handle them here.
4445			 */
4446			if (len > (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN)
4447				bma.length = XFS_MAX_BMBT_EXTLEN;
4448			else
4449				bma.length = len;
4450
4451			ASSERT(len > 0);
4452			ASSERT(bma.length > 0);
4453			error = xfs_bmapi_allocate(&bma);
4454			if (error)
4455				goto error0;
4456			if (bma.blkno == NULLFSBLOCK)
4457				break;
4458
4459			/*
4460			 * If this is a CoW allocation, record the data in
4461			 * the refcount btree for orphan recovery.
4462			 */
4463			if (whichfork == XFS_COW_FORK)
4464				xfs_refcount_alloc_cow_extent(tp, bma.blkno,
4465						bma.length);
4466		}
4467
4468		/* Deal with the allocated space we found.  */
4469		xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4470							end, n, flags);
4471
4472		/* Execute unwritten extent conversion if necessary */
4473		error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4474		if (error == -EAGAIN)
4475			continue;
4476		if (error)
4477			goto error0;
4478
4479		/* update the extent map to return */
4480		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4481
4482		/*
4483		 * If we're done, stop now.  Stop when we've allocated
4484		 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4485		 * the transaction may get too big.
4486		 */
4487		if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4488			break;
4489
4490		/* Else go on to the next record. */
4491		bma.prev = bma.got;
4492		if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4493			eof = true;
4494	}
4495	*nmap = n;
4496
4497	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4498			whichfork);
4499	if (error)
4500		goto error0;
4501
4502	ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
4503	       ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
4504	xfs_bmapi_finish(&bma, whichfork, 0);
4505	xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4506		orig_nmap, *nmap);
4507	return 0;
4508error0:
4509	xfs_bmapi_finish(&bma, whichfork, error);
4510	return error;
4511}
4512
4513/*
4514 * Convert an existing delalloc extent to real blocks based on file offset. This
4515 * attempts to allocate the entire delalloc extent and may require multiple
4516 * invocations to allocate the target offset if a large enough physical extent
4517 * is not available.
4518 */
4519int
4520xfs_bmapi_convert_delalloc(
4521	struct xfs_inode	*ip,
4522	int			whichfork,
4523	xfs_off_t		offset,
4524	struct iomap		*iomap,
4525	unsigned int		*seq)
4526{
4527	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
4528	struct xfs_mount	*mp = ip->i_mount;
4529	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
4530	struct xfs_bmalloca	bma = { NULL };
4531	uint16_t		flags = 0;
4532	struct xfs_trans	*tp;
4533	int			error;
4534
4535	if (whichfork == XFS_COW_FORK)
4536		flags |= IOMAP_F_SHARED;
4537
4538	/*
4539	 * Space for the extent and indirect blocks was reserved when the
4540	 * delalloc extent was created so there's no need to do so here.
4541	 */
4542	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
4543				XFS_TRANS_RESERVE, &tp);
4544	if (error)
4545		return error;
4546
4547	xfs_ilock(ip, XFS_ILOCK_EXCL);
4548	xfs_trans_ijoin(tp, ip, 0);
4549
4550	error = xfs_iext_count_may_overflow(ip, whichfork,
4551			XFS_IEXT_ADD_NOSPLIT_CNT);
4552	if (error == -EFBIG)
4553		error = xfs_iext_count_upgrade(tp, ip,
4554				XFS_IEXT_ADD_NOSPLIT_CNT);
4555	if (error)
4556		goto out_trans_cancel;
4557
4558	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
4559	    bma.got.br_startoff > offset_fsb) {
4560		/*
4561		 * No extent found in the range we are trying to convert.  This
4562		 * should only happen for the COW fork, where another thread
4563		 * might have moved the extent to the data fork in the meantime.
4564		 */
4565		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
4566		error = -EAGAIN;
4567		goto out_trans_cancel;
4568	}
4569
4570	/*
4571	 * If we find a real extent here we raced with another thread converting
4572	 * the extent.  Just return the real extent at this offset.
4573	 */
4574	if (!isnullstartblock(bma.got.br_startblock)) {
4575		xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
4576				xfs_iomap_inode_sequence(ip, flags));
4577		*seq = READ_ONCE(ifp->if_seq);
4578		goto out_trans_cancel;
4579	}
4580
4581	bma.tp = tp;
4582	bma.ip = ip;
4583	bma.wasdel = true;
4584	bma.offset = bma.got.br_startoff;
4585	bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount,
4586			XFS_MAX_BMBT_EXTLEN);
4587	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4588
4589	/*
4590	 * When we're converting the delalloc reservations backing dirty pages
4591	 * in the page cache, we must be careful about how we create the new
4592	 * extents:
4593	 *
4594	 * New CoW fork extents are created unwritten, turned into real extents
4595	 * when we're about to write the data to disk, and mapped into the data
4596	 * fork after the write finishes.  End of story.
4597	 *
4598	 * New data fork extents must be mapped in as unwritten and converted
4599	 * to real extents after the write succeeds to avoid exposing stale
4600	 * disk contents if we crash.
4601	 */
4602	bma.flags = XFS_BMAPI_PREALLOC;
4603	if (whichfork == XFS_COW_FORK)
4604		bma.flags |= XFS_BMAPI_COWFORK;
4605
4606	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4607		bma.prev.br_startoff = NULLFILEOFF;
4608
4609	error = xfs_bmapi_allocate(&bma);
4610	if (error)
4611		goto out_finish;
4612
4613	error = -ENOSPC;
4614	if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
4615		goto out_finish;
4616	error = -EFSCORRUPTED;
4617	if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
4618		goto out_finish;
4619
4620	XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
4621	XFS_STATS_INC(mp, xs_xstrat_quick);
4622
4623	ASSERT(!isnullstartblock(bma.got.br_startblock));
4624	xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
4625				xfs_iomap_inode_sequence(ip, flags));
4626	*seq = READ_ONCE(ifp->if_seq);
4627
4628	if (whichfork == XFS_COW_FORK)
4629		xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
4630
4631	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4632			whichfork);
4633	if (error)
4634		goto out_finish;
4635
4636	xfs_bmapi_finish(&bma, whichfork, 0);
4637	error = xfs_trans_commit(tp);
4638	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4639	return error;
4640
4641out_finish:
4642	xfs_bmapi_finish(&bma, whichfork, error);
4643out_trans_cancel:
4644	xfs_trans_cancel(tp);
4645	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4646	return error;
4647}
4648
4649int
4650xfs_bmapi_remap(
4651	struct xfs_trans	*tp,
4652	struct xfs_inode	*ip,
4653	xfs_fileoff_t		bno,
4654	xfs_filblks_t		len,
4655	xfs_fsblock_t		startblock,
4656	uint32_t		flags)
4657{
4658	struct xfs_mount	*mp = ip->i_mount;
4659	struct xfs_ifork	*ifp;
4660	struct xfs_btree_cur	*cur = NULL;
4661	struct xfs_bmbt_irec	got;
4662	struct xfs_iext_cursor	icur;
4663	int			whichfork = xfs_bmapi_whichfork(flags);
4664	int			logflags = 0, error;
4665
4666	ifp = xfs_ifork_ptr(ip, whichfork);
4667	ASSERT(len > 0);
4668	ASSERT(len <= (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN);
4669	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4670	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
4671			   XFS_BMAPI_NORMAP)));
4672	ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
4673			(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
4674
4675	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4676	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4677		return -EFSCORRUPTED;
4678	}
4679
4680	if (xfs_is_shutdown(mp))
4681		return -EIO;
4682
4683	error = xfs_iread_extents(tp, ip, whichfork);
4684	if (error)
4685		return error;
 
 
4686
4687	if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4688		/* make sure we only reflink into a hole. */
4689		ASSERT(got.br_startoff > bno);
4690		ASSERT(got.br_startoff - bno >= len);
4691	}
4692
4693	ip->i_nblocks += len;
4694	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4695
4696	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
4697		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
4698		cur->bc_ino.flags = 0;
4699	}
4700
4701	got.br_startoff = bno;
4702	got.br_startblock = startblock;
4703	got.br_blockcount = len;
4704	if (flags & XFS_BMAPI_PREALLOC)
4705		got.br_state = XFS_EXT_UNWRITTEN;
4706	else
4707		got.br_state = XFS_EXT_NORM;
4708
4709	error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
4710			&cur, &got, &logflags, flags);
4711	if (error)
4712		goto error0;
4713
4714	error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
4715
4716error0:
4717	if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
4718		logflags &= ~XFS_ILOG_DEXT;
4719	else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
4720		logflags &= ~XFS_ILOG_DBROOT;
4721
4722	if (logflags)
4723		xfs_trans_log_inode(tp, ip, logflags);
4724	if (cur)
4725		xfs_btree_del_cursor(cur, error);
4726	return error;
4727}
4728
4729/*
4730 * When a delalloc extent is split (e.g., due to a hole punch), the original
4731 * indlen reservation must be shared across the two new extents that are left
4732 * behind.
4733 *
4734 * Given the original reservation and the worst case indlen for the two new
4735 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4736 * reservation fairly across the two new extents. If necessary, steal available
4737 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4738 * ores == 1). The number of stolen blocks is returned. The availability and
4739 * subsequent accounting of stolen blocks is the responsibility of the caller.
4740 */
4741static xfs_filblks_t
4742xfs_bmap_split_indlen(
4743	xfs_filblks_t			ores,		/* original res. */
4744	xfs_filblks_t			*indlen1,	/* ext1 worst indlen */
4745	xfs_filblks_t			*indlen2,	/* ext2 worst indlen */
4746	xfs_filblks_t			avail)		/* stealable blocks */
4747{
4748	xfs_filblks_t			len1 = *indlen1;
4749	xfs_filblks_t			len2 = *indlen2;
4750	xfs_filblks_t			nres = len1 + len2; /* new total res. */
4751	xfs_filblks_t			stolen = 0;
4752	xfs_filblks_t			resfactor;
4753
4754	/*
4755	 * Steal as many blocks as we can to try and satisfy the worst case
4756	 * indlen for both new extents.
4757	 */
4758	if (ores < nres && avail)
4759		stolen = XFS_FILBLKS_MIN(nres - ores, avail);
4760	ores += stolen;
4761
4762	 /* nothing else to do if we've satisfied the new reservation */
4763	if (ores >= nres)
4764		return stolen;
4765
4766	/*
4767	 * We can't meet the total required reservation for the two extents.
4768	 * Calculate the percent of the overall shortage between both extents
4769	 * and apply this percentage to each of the requested indlen values.
4770	 * This distributes the shortage fairly and reduces the chances that one
4771	 * of the two extents is left with nothing when extents are repeatedly
4772	 * split.
4773	 */
4774	resfactor = (ores * 100);
4775	do_div(resfactor, nres);
4776	len1 *= resfactor;
4777	do_div(len1, 100);
4778	len2 *= resfactor;
4779	do_div(len2, 100);
4780	ASSERT(len1 + len2 <= ores);
4781	ASSERT(len1 < *indlen1 && len2 < *indlen2);
4782
4783	/*
4784	 * Hand out the remainder to each extent. If one of the two reservations
4785	 * is zero, we want to make sure that one gets a block first. The loop
4786	 * below starts with len1, so hand len2 a block right off the bat if it
4787	 * is zero.
4788	 */
4789	ores -= (len1 + len2);
4790	ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4791	if (ores && !len2 && *indlen2) {
4792		len2++;
4793		ores--;
4794	}
4795	while (ores) {
4796		if (len1 < *indlen1) {
4797			len1++;
4798			ores--;
4799		}
4800		if (!ores)
4801			break;
4802		if (len2 < *indlen2) {
4803			len2++;
4804			ores--;
4805		}
4806	}
4807
4808	*indlen1 = len1;
4809	*indlen2 = len2;
4810
4811	return stolen;
4812}
4813
4814int
4815xfs_bmap_del_extent_delay(
4816	struct xfs_inode	*ip,
4817	int			whichfork,
4818	struct xfs_iext_cursor	*icur,
4819	struct xfs_bmbt_irec	*got,
4820	struct xfs_bmbt_irec	*del)
4821{
4822	struct xfs_mount	*mp = ip->i_mount;
4823	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
4824	struct xfs_bmbt_irec	new;
4825	int64_t			da_old, da_new, da_diff = 0;
4826	xfs_fileoff_t		del_endoff, got_endoff;
4827	xfs_filblks_t		got_indlen, new_indlen, stolen;
4828	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
4829	int			error = 0;
4830	bool			isrt;
4831
4832	XFS_STATS_INC(mp, xs_del_exlist);
4833
4834	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4835	del_endoff = del->br_startoff + del->br_blockcount;
4836	got_endoff = got->br_startoff + got->br_blockcount;
4837	da_old = startblockval(got->br_startblock);
4838	da_new = 0;
4839
4840	ASSERT(del->br_blockcount > 0);
4841	ASSERT(got->br_startoff <= del->br_startoff);
4842	ASSERT(got_endoff >= del_endoff);
4843
4844	if (isrt)
4845		xfs_mod_frextents(mp, xfs_rtb_to_rtx(mp, del->br_blockcount));
 
 
 
 
4846
4847	/*
4848	 * Update the inode delalloc counter now and wait to update the
4849	 * sb counters as we might have to borrow some blocks for the
4850	 * indirect block accounting.
4851	 */
4852	ASSERT(!isrt);
4853	error = xfs_quota_unreserve_blkres(ip, del->br_blockcount);
 
4854	if (error)
4855		return error;
4856	ip->i_delayed_blks -= del->br_blockcount;
4857
4858	if (got->br_startoff == del->br_startoff)
4859		state |= BMAP_LEFT_FILLING;
4860	if (got_endoff == del_endoff)
4861		state |= BMAP_RIGHT_FILLING;
4862
4863	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4864	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4865		/*
4866		 * Matches the whole extent.  Delete the entry.
4867		 */
4868		xfs_iext_remove(ip, icur, state);
4869		xfs_iext_prev(ifp, icur);
4870		break;
4871	case BMAP_LEFT_FILLING:
4872		/*
4873		 * Deleting the first part of the extent.
4874		 */
4875		got->br_startoff = del_endoff;
4876		got->br_blockcount -= del->br_blockcount;
4877		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4878				got->br_blockcount), da_old);
4879		got->br_startblock = nullstartblock((int)da_new);
4880		xfs_iext_update_extent(ip, state, icur, got);
4881		break;
4882	case BMAP_RIGHT_FILLING:
4883		/*
4884		 * Deleting the last part of the extent.
4885		 */
4886		got->br_blockcount = got->br_blockcount - del->br_blockcount;
4887		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4888				got->br_blockcount), da_old);
4889		got->br_startblock = nullstartblock((int)da_new);
4890		xfs_iext_update_extent(ip, state, icur, got);
4891		break;
4892	case 0:
4893		/*
4894		 * Deleting the middle of the extent.
4895		 *
4896		 * Distribute the original indlen reservation across the two new
4897		 * extents.  Steal blocks from the deleted extent if necessary.
4898		 * Stealing blocks simply fudges the fdblocks accounting below.
4899		 * Warn if either of the new indlen reservations is zero as this
4900		 * can lead to delalloc problems.
4901		 */
4902		got->br_blockcount = del->br_startoff - got->br_startoff;
4903		got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4904
4905		new.br_blockcount = got_endoff - del_endoff;
4906		new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4907
4908		WARN_ON_ONCE(!got_indlen || !new_indlen);
4909		stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4910						       del->br_blockcount);
4911
4912		got->br_startblock = nullstartblock((int)got_indlen);
4913
4914		new.br_startoff = del_endoff;
4915		new.br_state = got->br_state;
4916		new.br_startblock = nullstartblock((int)new_indlen);
4917
4918		xfs_iext_update_extent(ip, state, icur, got);
4919		xfs_iext_next(ifp, icur);
4920		xfs_iext_insert(ip, icur, &new, state);
4921
4922		da_new = got_indlen + new_indlen - stolen;
4923		del->br_blockcount -= stolen;
4924		break;
4925	}
4926
4927	ASSERT(da_old >= da_new);
4928	da_diff = da_old - da_new;
4929	if (!isrt)
4930		da_diff += del->br_blockcount;
4931	if (da_diff) {
4932		xfs_mod_fdblocks(mp, da_diff, false);
4933		xfs_mod_delalloc(mp, -da_diff);
4934	}
4935	return error;
4936}
4937
4938void
4939xfs_bmap_del_extent_cow(
4940	struct xfs_inode	*ip,
4941	struct xfs_iext_cursor	*icur,
4942	struct xfs_bmbt_irec	*got,
4943	struct xfs_bmbt_irec	*del)
4944{
4945	struct xfs_mount	*mp = ip->i_mount;
4946	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
4947	struct xfs_bmbt_irec	new;
4948	xfs_fileoff_t		del_endoff, got_endoff;
4949	uint32_t		state = BMAP_COWFORK;
4950
4951	XFS_STATS_INC(mp, xs_del_exlist);
4952
4953	del_endoff = del->br_startoff + del->br_blockcount;
4954	got_endoff = got->br_startoff + got->br_blockcount;
4955
4956	ASSERT(del->br_blockcount > 0);
4957	ASSERT(got->br_startoff <= del->br_startoff);
4958	ASSERT(got_endoff >= del_endoff);
4959	ASSERT(!isnullstartblock(got->br_startblock));
4960
4961	if (got->br_startoff == del->br_startoff)
4962		state |= BMAP_LEFT_FILLING;
4963	if (got_endoff == del_endoff)
4964		state |= BMAP_RIGHT_FILLING;
4965
4966	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4967	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4968		/*
4969		 * Matches the whole extent.  Delete the entry.
4970		 */
4971		xfs_iext_remove(ip, icur, state);
4972		xfs_iext_prev(ifp, icur);
4973		break;
4974	case BMAP_LEFT_FILLING:
4975		/*
4976		 * Deleting the first part of the extent.
4977		 */
4978		got->br_startoff = del_endoff;
4979		got->br_blockcount -= del->br_blockcount;
4980		got->br_startblock = del->br_startblock + del->br_blockcount;
4981		xfs_iext_update_extent(ip, state, icur, got);
4982		break;
4983	case BMAP_RIGHT_FILLING:
4984		/*
4985		 * Deleting the last part of the extent.
4986		 */
4987		got->br_blockcount -= del->br_blockcount;
4988		xfs_iext_update_extent(ip, state, icur, got);
4989		break;
4990	case 0:
4991		/*
4992		 * Deleting the middle of the extent.
4993		 */
4994		got->br_blockcount = del->br_startoff - got->br_startoff;
4995
4996		new.br_startoff = del_endoff;
4997		new.br_blockcount = got_endoff - del_endoff;
4998		new.br_state = got->br_state;
4999		new.br_startblock = del->br_startblock + del->br_blockcount;
5000
5001		xfs_iext_update_extent(ip, state, icur, got);
5002		xfs_iext_next(ifp, icur);
5003		xfs_iext_insert(ip, icur, &new, state);
5004		break;
5005	}
5006	ip->i_delayed_blks -= del->br_blockcount;
5007}
5008
5009/*
5010 * Called by xfs_bmapi to update file extent records and the btree
5011 * after removing space.
5012 */
5013STATIC int				/* error */
5014xfs_bmap_del_extent_real(
5015	xfs_inode_t		*ip,	/* incore inode pointer */
5016	xfs_trans_t		*tp,	/* current transaction pointer */
5017	struct xfs_iext_cursor	*icur,
5018	struct xfs_btree_cur	*cur,	/* if null, not a btree */
5019	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
5020	int			*logflagsp, /* inode logging flags */
5021	int			whichfork, /* data or attr fork */
5022	uint32_t		bflags)	/* bmapi flags */
5023{
5024	xfs_fsblock_t		del_endblock=0;	/* first block past del */
5025	xfs_fileoff_t		del_endoff;	/* first offset past del */
5026	int			do_fx;	/* free extent at end of routine */
5027	int			error;	/* error return value */
 
5028	struct xfs_bmbt_irec	got;	/* current extent entry */
5029	xfs_fileoff_t		got_endoff;	/* first offset past got */
5030	int			i;	/* temp state */
5031	struct xfs_ifork	*ifp;	/* inode fork pointer */
5032	xfs_mount_t		*mp;	/* mount structure */
5033	xfs_filblks_t		nblks;	/* quota/sb block count */
5034	xfs_bmbt_irec_t		new;	/* new record to be inserted */
5035	/* REFERENCED */
5036	uint			qfield;	/* quota field to update */
5037	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
5038	struct xfs_bmbt_irec	old;
5039
5040	*logflagsp = 0;
5041
5042	mp = ip->i_mount;
5043	XFS_STATS_INC(mp, xs_del_exlist);
5044
5045	ifp = xfs_ifork_ptr(ip, whichfork);
5046	ASSERT(del->br_blockcount > 0);
5047	xfs_iext_get_extent(ifp, icur, &got);
5048	ASSERT(got.br_startoff <= del->br_startoff);
5049	del_endoff = del->br_startoff + del->br_blockcount;
5050	got_endoff = got.br_startoff + got.br_blockcount;
5051	ASSERT(got_endoff >= del_endoff);
5052	ASSERT(!isnullstartblock(got.br_startblock));
5053	qfield = 0;
 
5054
5055	/*
5056	 * If it's the case where the directory code is running with no block
5057	 * reservation, and the deleted block is in the middle of its extent,
5058	 * and the resulting insert of an extent would cause transformation to
5059	 * btree format, then reject it.  The calling code will then swap blocks
5060	 * around instead.  We have to do this now, rather than waiting for the
5061	 * conversion to btree format, since the transaction will be dirty then.
5062	 */
5063	if (tp->t_blk_res == 0 &&
5064	    ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
5065	    ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
5066	    del->br_startoff > got.br_startoff && del_endoff < got_endoff)
5067		return -ENOSPC;
5068
5069	*logflagsp = XFS_ILOG_CORE;
5070	if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
5071		if (!(bflags & XFS_BMAPI_REMAP)) {
5072			error = xfs_rtfree_blocks(tp, del->br_startblock,
5073					del->br_blockcount);
5074			if (error)
5075				return error;
5076		}
 
 
 
 
5077
 
 
 
5078		do_fx = 0;
 
5079		qfield = XFS_TRANS_DQ_RTBCOUNT;
5080	} else {
5081		do_fx = 1;
 
5082		qfield = XFS_TRANS_DQ_BCOUNT;
5083	}
5084	nblks = del->br_blockcount;
5085
5086	del_endblock = del->br_startblock + del->br_blockcount;
5087	if (cur) {
5088		error = xfs_bmbt_lookup_eq(cur, &got, &i);
5089		if (error)
5090			return error;
5091		if (XFS_IS_CORRUPT(mp, i != 1))
5092			return -EFSCORRUPTED;
 
 
5093	}
5094
5095	if (got.br_startoff == del->br_startoff)
5096		state |= BMAP_LEFT_FILLING;
5097	if (got_endoff == del_endoff)
5098		state |= BMAP_RIGHT_FILLING;
5099
5100	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5101	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5102		/*
5103		 * Matches the whole extent.  Delete the entry.
5104		 */
5105		xfs_iext_remove(ip, icur, state);
5106		xfs_iext_prev(ifp, icur);
5107		ifp->if_nextents--;
5108
5109		*logflagsp |= XFS_ILOG_CORE;
5110		if (!cur) {
5111			*logflagsp |= xfs_ilog_fext(whichfork);
5112			break;
5113		}
5114		if ((error = xfs_btree_delete(cur, &i)))
5115			return error;
5116		if (XFS_IS_CORRUPT(mp, i != 1))
5117			return -EFSCORRUPTED;
 
 
5118		break;
5119	case BMAP_LEFT_FILLING:
5120		/*
5121		 * Deleting the first part of the extent.
5122		 */
5123		got.br_startoff = del_endoff;
5124		got.br_startblock = del_endblock;
5125		got.br_blockcount -= del->br_blockcount;
5126		xfs_iext_update_extent(ip, state, icur, &got);
5127		if (!cur) {
5128			*logflagsp |= xfs_ilog_fext(whichfork);
5129			break;
5130		}
5131		error = xfs_bmbt_update(cur, &got);
5132		if (error)
5133			return error;
5134		break;
5135	case BMAP_RIGHT_FILLING:
5136		/*
5137		 * Deleting the last part of the extent.
5138		 */
5139		got.br_blockcount -= del->br_blockcount;
5140		xfs_iext_update_extent(ip, state, icur, &got);
5141		if (!cur) {
5142			*logflagsp |= xfs_ilog_fext(whichfork);
5143			break;
5144		}
5145		error = xfs_bmbt_update(cur, &got);
5146		if (error)
5147			return error;
5148		break;
5149	case 0:
5150		/*
5151		 * Deleting the middle of the extent.
5152		 */
5153
5154		old = got;
5155
5156		got.br_blockcount = del->br_startoff - got.br_startoff;
5157		xfs_iext_update_extent(ip, state, icur, &got);
5158
5159		new.br_startoff = del_endoff;
5160		new.br_blockcount = got_endoff - del_endoff;
5161		new.br_state = got.br_state;
5162		new.br_startblock = del_endblock;
5163
5164		*logflagsp |= XFS_ILOG_CORE;
5165		if (cur) {
5166			error = xfs_bmbt_update(cur, &got);
5167			if (error)
5168				return error;
5169			error = xfs_btree_increment(cur, 0, &i);
5170			if (error)
5171				return error;
5172			cur->bc_rec.b = new;
5173			error = xfs_btree_insert(cur, &i);
5174			if (error && error != -ENOSPC)
5175				return error;
5176			/*
5177			 * If get no-space back from btree insert, it tried a
5178			 * split, and we have a zero block reservation.  Fix up
5179			 * our state and return the error.
5180			 */
5181			if (error == -ENOSPC) {
5182				/*
5183				 * Reset the cursor, don't trust it after any
5184				 * insert operation.
5185				 */
5186				error = xfs_bmbt_lookup_eq(cur, &got, &i);
5187				if (error)
5188					return error;
5189				if (XFS_IS_CORRUPT(mp, i != 1))
5190					return -EFSCORRUPTED;
 
 
5191				/*
5192				 * Update the btree record back
5193				 * to the original value.
5194				 */
5195				error = xfs_bmbt_update(cur, &old);
5196				if (error)
5197					return error;
5198				/*
5199				 * Reset the extent record back
5200				 * to the original value.
5201				 */
5202				xfs_iext_update_extent(ip, state, icur, &old);
5203				*logflagsp = 0;
5204				return -ENOSPC;
 
 
 
 
 
5205			}
5206			if (XFS_IS_CORRUPT(mp, i != 1))
5207				return -EFSCORRUPTED;
5208		} else
5209			*logflagsp |= xfs_ilog_fext(whichfork);
5210
5211		ifp->if_nextents++;
5212		xfs_iext_next(ifp, icur);
5213		xfs_iext_insert(ip, icur, &new, state);
5214		break;
5215	}
5216
5217	/* remove reverse mapping */
5218	xfs_rmap_unmap_extent(tp, ip, whichfork, del);
5219
5220	/*
5221	 * If we need to, add to list of extents to delete.
5222	 */
5223	if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5224		if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5225			xfs_refcount_decrease_extent(tp, del);
5226		} else {
5227			error = xfs_free_extent_later(tp, del->br_startblock,
5228					del->br_blockcount, NULL,
5229					XFS_AG_RESV_NONE,
5230					((bflags & XFS_BMAPI_NODISCARD) ||
5231					del->br_state == XFS_EXT_UNWRITTEN));
5232			if (error)
5233				return error;
5234		}
5235	}
5236
5237	/*
5238	 * Adjust inode # blocks in the file.
5239	 */
5240	if (nblks)
5241		ip->i_nblocks -= nblks;
5242	/*
5243	 * Adjust quota data.
5244	 */
5245	if (qfield && !(bflags & XFS_BMAPI_REMAP))
5246		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5247
5248	return 0;
 
 
5249}
5250
5251/*
5252 * Unmap (remove) blocks from a file.
5253 * If nexts is nonzero then the number of extents to remove is limited to
5254 * that value.  If not all extents in the block range can be removed then
5255 * *done is set.
5256 */
5257static int
5258__xfs_bunmapi(
5259	struct xfs_trans	*tp,		/* transaction pointer */
5260	struct xfs_inode	*ip,		/* incore inode */
5261	xfs_fileoff_t		start,		/* first file offset deleted */
5262	xfs_filblks_t		*rlen,		/* i/o: amount remaining */
5263	uint32_t		flags,		/* misc flags */
5264	xfs_extnum_t		nexts)		/* number of extents max */
5265{
5266	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
5267	struct xfs_bmbt_irec	del;		/* extent being deleted */
5268	int			error;		/* error return value */
5269	xfs_extnum_t		extno;		/* extent number in list */
5270	struct xfs_bmbt_irec	got;		/* current extent record */
5271	struct xfs_ifork	*ifp;		/* inode fork pointer */
5272	int			isrt;		/* freeing in rt area */
5273	int			logflags;	/* transaction logging flags */
5274	xfs_extlen_t		mod;		/* rt extent offset */
5275	struct xfs_mount	*mp = ip->i_mount;
5276	int			tmp_logflags;	/* partial logging flags */
5277	int			wasdel;		/* was a delayed alloc extent */
5278	int			whichfork;	/* data or attribute fork */
 
5279	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
 
 
5280	xfs_fileoff_t		end;
5281	struct xfs_iext_cursor	icur;
5282	bool			done = false;
5283
5284	trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5285
5286	whichfork = xfs_bmapi_whichfork(flags);
5287	ASSERT(whichfork != XFS_COW_FORK);
5288	ifp = xfs_ifork_ptr(ip, whichfork);
5289	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)))
5290		return -EFSCORRUPTED;
5291	if (xfs_is_shutdown(mp))
5292		return -EIO;
5293
5294	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5295	ASSERT(len > 0);
5296	ASSERT(nexts >= 0);
5297
5298	error = xfs_iread_extents(tp, ip, whichfork);
5299	if (error)
5300		return error;
 
 
 
 
 
 
5301
 
 
 
5302	if (xfs_iext_count(ifp) == 0) {
5303		*rlen = 0;
5304		return 0;
5305	}
5306	XFS_STATS_INC(mp, xs_blk_unmap);
5307	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5308	end = start + len;
5309
5310	if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5311		*rlen = 0;
5312		return 0;
5313	}
5314	end--;
5315
5316	logflags = 0;
5317	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
5318		ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
5319		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5320		cur->bc_ino.flags = 0;
5321	} else
5322		cur = NULL;
5323
5324	if (isrt) {
5325		/*
5326		 * Synchronize by locking the bitmap inode.
5327		 */
5328		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5329		xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5330		xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5331		xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5332	}
5333
5334	extno = 0;
5335	while (end != (xfs_fileoff_t)-1 && end >= start &&
5336	       (nexts == 0 || extno < nexts)) {
5337		/*
5338		 * Is the found extent after a hole in which end lives?
5339		 * Just back up to the previous extent, if so.
5340		 */
5341		if (got.br_startoff > end &&
5342		    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5343			done = true;
5344			break;
5345		}
5346		/*
5347		 * Is the last block of this extent before the range
5348		 * we're supposed to delete?  If so, we're done.
5349		 */
5350		end = XFS_FILEOFF_MIN(end,
5351			got.br_startoff + got.br_blockcount - 1);
5352		if (end < start)
5353			break;
5354		/*
5355		 * Then deal with the (possibly delayed) allocated space
5356		 * we found.
5357		 */
5358		del = got;
5359		wasdel = isnullstartblock(del.br_startblock);
5360
 
 
 
 
 
 
 
 
 
 
5361		if (got.br_startoff < start) {
5362			del.br_startoff = start;
5363			del.br_blockcount -= start - got.br_startoff;
5364			if (!wasdel)
5365				del.br_startblock += start - got.br_startoff;
5366		}
5367		if (del.br_startoff + del.br_blockcount > end + 1)
5368			del.br_blockcount = end + 1 - del.br_startoff;
5369
 
 
 
 
 
 
 
 
5370		if (!isrt)
5371			goto delete;
5372
5373		mod = xfs_rtb_to_rtxoff(mp,
5374				del.br_startblock + del.br_blockcount);
5375		if (mod) {
5376			/*
5377			 * Realtime extent not lined up at the end.
5378			 * The extent could have been split into written
5379			 * and unwritten pieces, or we could just be
5380			 * unmapping part of it.  But we can't really
5381			 * get rid of part of a realtime extent.
5382			 */
5383			if (del.br_state == XFS_EXT_UNWRITTEN) {
5384				/*
5385				 * This piece is unwritten, or we're not
5386				 * using unwritten extents.  Skip over it.
5387				 */
5388				ASSERT(end >= mod);
5389				end -= mod > del.br_blockcount ?
5390					del.br_blockcount : mod;
5391				if (end < got.br_startoff &&
5392				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5393					done = true;
5394					break;
5395				}
5396				continue;
5397			}
5398			/*
5399			 * It's written, turn it unwritten.
5400			 * This is better than zeroing it.
5401			 */
5402			ASSERT(del.br_state == XFS_EXT_NORM);
5403			ASSERT(tp->t_blk_res > 0);
5404			/*
5405			 * If this spans a realtime extent boundary,
5406			 * chop it back to the start of the one we end at.
5407			 */
5408			if (del.br_blockcount > mod) {
5409				del.br_startoff += del.br_blockcount - mod;
5410				del.br_startblock += del.br_blockcount - mod;
5411				del.br_blockcount = mod;
5412			}
5413			del.br_state = XFS_EXT_UNWRITTEN;
5414			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5415					whichfork, &icur, &cur, &del,
5416					&logflags);
5417			if (error)
5418				goto error0;
5419			goto nodelete;
5420		}
5421
5422		mod = xfs_rtb_to_rtxoff(mp, del.br_startblock);
5423		if (mod) {
5424			xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
5425
5426			/*
5427			 * Realtime extent is lined up at the end but not
5428			 * at the front.  We'll get rid of full extents if
5429			 * we can.
5430			 */
5431			if (del.br_blockcount > off) {
5432				del.br_blockcount -= off;
5433				del.br_startoff += off;
5434				del.br_startblock += off;
5435			} else if (del.br_startoff == start &&
5436				   (del.br_state == XFS_EXT_UNWRITTEN ||
5437				    tp->t_blk_res == 0)) {
5438				/*
5439				 * Can't make it unwritten.  There isn't
5440				 * a full extent here so just skip it.
5441				 */
5442				ASSERT(end >= del.br_blockcount);
5443				end -= del.br_blockcount;
5444				if (got.br_startoff > end &&
5445				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5446					done = true;
5447					break;
5448				}
5449				continue;
5450			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
5451				struct xfs_bmbt_irec	prev;
5452				xfs_fileoff_t		unwrite_start;
5453
5454				/*
5455				 * This one is already unwritten.
5456				 * It must have a written left neighbor.
5457				 * Unwrite the killed part of that one and
5458				 * try again.
5459				 */
5460				if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5461					ASSERT(0);
5462				ASSERT(prev.br_state == XFS_EXT_NORM);
5463				ASSERT(!isnullstartblock(prev.br_startblock));
5464				ASSERT(del.br_startblock ==
5465				       prev.br_startblock + prev.br_blockcount);
5466				unwrite_start = max3(start,
5467						     del.br_startoff - mod,
5468						     prev.br_startoff);
5469				mod = unwrite_start - prev.br_startoff;
5470				prev.br_startoff = unwrite_start;
5471				prev.br_startblock += mod;
5472				prev.br_blockcount -= mod;
5473				prev.br_state = XFS_EXT_UNWRITTEN;
5474				error = xfs_bmap_add_extent_unwritten_real(tp,
5475						ip, whichfork, &icur, &cur,
5476						&prev, &logflags);
5477				if (error)
5478					goto error0;
5479				goto nodelete;
5480			} else {
5481				ASSERT(del.br_state == XFS_EXT_NORM);
5482				del.br_state = XFS_EXT_UNWRITTEN;
5483				error = xfs_bmap_add_extent_unwritten_real(tp,
5484						ip, whichfork, &icur, &cur,
5485						&del, &logflags);
5486				if (error)
5487					goto error0;
5488				goto nodelete;
5489			}
5490		}
5491
5492delete:
5493		if (wasdel) {
5494			error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
5495					&got, &del);
5496		} else {
5497			error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
5498					&del, &tmp_logflags, whichfork,
5499					flags);
5500			logflags |= tmp_logflags;
5501		}
5502
5503		if (error)
5504			goto error0;
5505
 
5506		end = del.br_startoff - 1;
5507nodelete:
5508		/*
5509		 * If not done go on to the next (previous) record.
5510		 */
5511		if (end != (xfs_fileoff_t)-1 && end >= start) {
5512			if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5513			    (got.br_startoff > end &&
5514			     !xfs_iext_prev_extent(ifp, &icur, &got))) {
5515				done = true;
5516				break;
5517			}
5518			extno++;
5519		}
5520	}
5521	if (done || end == (xfs_fileoff_t)-1 || end < start)
5522		*rlen = 0;
5523	else
5524		*rlen = end - start + 1;
5525
5526	/*
5527	 * Convert to a btree if necessary.
5528	 */
5529	if (xfs_bmap_needs_btree(ip, whichfork)) {
5530		ASSERT(cur == NULL);
5531		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
5532				&tmp_logflags, whichfork);
5533		logflags |= tmp_logflags;
5534	} else {
5535		error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
5536			whichfork);
5537	}
5538
5539error0:
5540	/*
5541	 * Log everything.  Do this after conversion, there's no point in
5542	 * logging the extent records if we've converted to btree format.
5543	 */
5544	if ((logflags & xfs_ilog_fext(whichfork)) &&
5545	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
5546		logflags &= ~xfs_ilog_fext(whichfork);
5547	else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5548		 ifp->if_format != XFS_DINODE_FMT_BTREE)
5549		logflags &= ~xfs_ilog_fbroot(whichfork);
5550	/*
5551	 * Log inode even in the error case, if the transaction
5552	 * is dirty we'll need to shut down the filesystem.
5553	 */
5554	if (logflags)
5555		xfs_trans_log_inode(tp, ip, logflags);
5556	if (cur) {
5557		if (!error)
5558			cur->bc_ino.allocated = 0;
5559		xfs_btree_del_cursor(cur, error);
5560	}
5561	return error;
5562}
5563
5564/* Unmap a range of a file. */
5565int
5566xfs_bunmapi(
5567	xfs_trans_t		*tp,
5568	struct xfs_inode	*ip,
5569	xfs_fileoff_t		bno,
5570	xfs_filblks_t		len,
5571	uint32_t		flags,
5572	xfs_extnum_t		nexts,
5573	int			*done)
5574{
5575	int			error;
5576
5577	error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
5578	*done = (len == 0);
5579	return error;
5580}
5581
5582/*
5583 * Determine whether an extent shift can be accomplished by a merge with the
5584 * extent that precedes the target hole of the shift.
5585 */
5586STATIC bool
5587xfs_bmse_can_merge(
5588	struct xfs_bmbt_irec	*left,	/* preceding extent */
5589	struct xfs_bmbt_irec	*got,	/* current extent to shift */
5590	xfs_fileoff_t		shift)	/* shift fsb */
5591{
5592	xfs_fileoff_t		startoff;
5593
5594	startoff = got->br_startoff - shift;
5595
5596	/*
5597	 * The extent, once shifted, must be adjacent in-file and on-disk with
5598	 * the preceding extent.
5599	 */
5600	if ((left->br_startoff + left->br_blockcount != startoff) ||
5601	    (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5602	    (left->br_state != got->br_state) ||
5603	    (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN))
5604		return false;
5605
5606	return true;
5607}
5608
5609/*
5610 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5611 * hole in the file. If an extent shift would result in the extent being fully
5612 * adjacent to the extent that currently precedes the hole, we can merge with
5613 * the preceding extent rather than do the shift.
5614 *
5615 * This function assumes the caller has verified a shift-by-merge is possible
5616 * with the provided extents via xfs_bmse_can_merge().
5617 */
5618STATIC int
5619xfs_bmse_merge(
5620	struct xfs_trans		*tp,
5621	struct xfs_inode		*ip,
5622	int				whichfork,
5623	xfs_fileoff_t			shift,		/* shift fsb */
5624	struct xfs_iext_cursor		*icur,
5625	struct xfs_bmbt_irec		*got,		/* extent to shift */
5626	struct xfs_bmbt_irec		*left,		/* preceding extent */
5627	struct xfs_btree_cur		*cur,
5628	int				*logflags)	/* output */
5629{
5630	struct xfs_ifork		*ifp = xfs_ifork_ptr(ip, whichfork);
5631	struct xfs_bmbt_irec		new;
5632	xfs_filblks_t			blockcount;
5633	int				error, i;
5634	struct xfs_mount		*mp = ip->i_mount;
5635
5636	blockcount = left->br_blockcount + got->br_blockcount;
5637
5638	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5639	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5640	ASSERT(xfs_bmse_can_merge(left, got, shift));
5641
5642	new = *left;
5643	new.br_blockcount = blockcount;
5644
5645	/*
5646	 * Update the on-disk extent count, the btree if necessary and log the
5647	 * inode.
5648	 */
5649	ifp->if_nextents--;
5650	*logflags |= XFS_ILOG_CORE;
5651	if (!cur) {
5652		*logflags |= XFS_ILOG_DEXT;
5653		goto done;
5654	}
5655
5656	/* lookup and remove the extent to merge */
5657	error = xfs_bmbt_lookup_eq(cur, got, &i);
5658	if (error)
5659		return error;
5660	if (XFS_IS_CORRUPT(mp, i != 1))
5661		return -EFSCORRUPTED;
5662
5663	error = xfs_btree_delete(cur, &i);
5664	if (error)
5665		return error;
5666	if (XFS_IS_CORRUPT(mp, i != 1))
5667		return -EFSCORRUPTED;
5668
5669	/* lookup and update size of the previous extent */
5670	error = xfs_bmbt_lookup_eq(cur, left, &i);
5671	if (error)
5672		return error;
5673	if (XFS_IS_CORRUPT(mp, i != 1))
5674		return -EFSCORRUPTED;
5675
5676	error = xfs_bmbt_update(cur, &new);
5677	if (error)
5678		return error;
5679
5680	/* change to extent format if required after extent removal */
5681	error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
5682	if (error)
5683		return error;
5684
5685done:
5686	xfs_iext_remove(ip, icur, 0);
5687	xfs_iext_prev(ifp, icur);
5688	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5689			&new);
5690
5691	/* update reverse mapping. rmap functions merge the rmaps for us */
5692	xfs_rmap_unmap_extent(tp, ip, whichfork, got);
5693	memcpy(&new, got, sizeof(new));
5694	new.br_startoff = left->br_startoff + left->br_blockcount;
5695	xfs_rmap_map_extent(tp, ip, whichfork, &new);
5696	return 0;
5697}
5698
5699static int
5700xfs_bmap_shift_update_extent(
5701	struct xfs_trans	*tp,
5702	struct xfs_inode	*ip,
5703	int			whichfork,
5704	struct xfs_iext_cursor	*icur,
5705	struct xfs_bmbt_irec	*got,
5706	struct xfs_btree_cur	*cur,
5707	int			*logflags,
5708	xfs_fileoff_t		startoff)
5709{
5710	struct xfs_mount	*mp = ip->i_mount;
5711	struct xfs_bmbt_irec	prev = *got;
5712	int			error, i;
5713
5714	*logflags |= XFS_ILOG_CORE;
5715
5716	got->br_startoff = startoff;
5717
5718	if (cur) {
5719		error = xfs_bmbt_lookup_eq(cur, &prev, &i);
5720		if (error)
5721			return error;
5722		if (XFS_IS_CORRUPT(mp, i != 1))
5723			return -EFSCORRUPTED;
5724
5725		error = xfs_bmbt_update(cur, got);
5726		if (error)
5727			return error;
5728	} else {
5729		*logflags |= XFS_ILOG_DEXT;
5730	}
5731
5732	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5733			got);
5734
5735	/* update reverse mapping */
5736	xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
5737	xfs_rmap_map_extent(tp, ip, whichfork, got);
5738	return 0;
5739}
5740
5741int
5742xfs_bmap_collapse_extents(
5743	struct xfs_trans	*tp,
5744	struct xfs_inode	*ip,
5745	xfs_fileoff_t		*next_fsb,
5746	xfs_fileoff_t		offset_shift_fsb,
5747	bool			*done)
5748{
5749	int			whichfork = XFS_DATA_FORK;
5750	struct xfs_mount	*mp = ip->i_mount;
5751	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
5752	struct xfs_btree_cur	*cur = NULL;
5753	struct xfs_bmbt_irec	got, prev;
5754	struct xfs_iext_cursor	icur;
5755	xfs_fileoff_t		new_startoff;
5756	int			error = 0;
5757	int			logflags = 0;
5758
5759	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5760	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5761		return -EFSCORRUPTED;
5762	}
5763
5764	if (xfs_is_shutdown(mp))
5765		return -EIO;
5766
5767	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5768
5769	error = xfs_iread_extents(tp, ip, whichfork);
5770	if (error)
5771		return error;
 
 
5772
5773	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
5774		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5775		cur->bc_ino.flags = 0;
5776	}
5777
5778	if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5779		*done = true;
5780		goto del_cursor;
5781	}
5782	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5783		error = -EFSCORRUPTED;
5784		goto del_cursor;
5785	}
5786
5787	new_startoff = got.br_startoff - offset_shift_fsb;
5788	if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
5789		if (new_startoff < prev.br_startoff + prev.br_blockcount) {
5790			error = -EINVAL;
5791			goto del_cursor;
5792		}
5793
5794		if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5795			error = xfs_bmse_merge(tp, ip, whichfork,
5796					offset_shift_fsb, &icur, &got, &prev,
5797					cur, &logflags);
5798			if (error)
5799				goto del_cursor;
5800			goto done;
5801		}
5802	} else {
5803		if (got.br_startoff < offset_shift_fsb) {
5804			error = -EINVAL;
5805			goto del_cursor;
5806		}
5807	}
5808
5809	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5810			cur, &logflags, new_startoff);
5811	if (error)
5812		goto del_cursor;
5813
5814done:
5815	if (!xfs_iext_next_extent(ifp, &icur, &got)) {
5816		*done = true;
5817		goto del_cursor;
5818	}
5819
5820	*next_fsb = got.br_startoff;
5821del_cursor:
5822	if (cur)
5823		xfs_btree_del_cursor(cur, error);
5824	if (logflags)
5825		xfs_trans_log_inode(tp, ip, logflags);
5826	return error;
5827}
5828
5829/* Make sure we won't be right-shifting an extent past the maximum bound. */
5830int
5831xfs_bmap_can_insert_extents(
5832	struct xfs_inode	*ip,
5833	xfs_fileoff_t		off,
5834	xfs_fileoff_t		shift)
5835{
5836	struct xfs_bmbt_irec	got;
5837	int			is_empty;
5838	int			error = 0;
5839
5840	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5841
5842	if (xfs_is_shutdown(ip->i_mount))
5843		return -EIO;
5844
5845	xfs_ilock(ip, XFS_ILOCK_EXCL);
5846	error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
5847	if (!error && !is_empty && got.br_startoff >= off &&
5848	    ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
5849		error = -EINVAL;
5850	xfs_iunlock(ip, XFS_ILOCK_EXCL);
5851
5852	return error;
5853}
5854
5855int
5856xfs_bmap_insert_extents(
5857	struct xfs_trans	*tp,
5858	struct xfs_inode	*ip,
5859	xfs_fileoff_t		*next_fsb,
5860	xfs_fileoff_t		offset_shift_fsb,
5861	bool			*done,
5862	xfs_fileoff_t		stop_fsb)
5863{
5864	int			whichfork = XFS_DATA_FORK;
5865	struct xfs_mount	*mp = ip->i_mount;
5866	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
5867	struct xfs_btree_cur	*cur = NULL;
5868	struct xfs_bmbt_irec	got, next;
5869	struct xfs_iext_cursor	icur;
5870	xfs_fileoff_t		new_startoff;
5871	int			error = 0;
5872	int			logflags = 0;
5873
5874	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5875	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5876		return -EFSCORRUPTED;
5877	}
5878
5879	if (xfs_is_shutdown(mp))
5880		return -EIO;
5881
5882	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5883
5884	error = xfs_iread_extents(tp, ip, whichfork);
5885	if (error)
5886		return error;
 
 
5887
5888	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
5889		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5890		cur->bc_ino.flags = 0;
5891	}
5892
5893	if (*next_fsb == NULLFSBLOCK) {
5894		xfs_iext_last(ifp, &icur);
5895		if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5896		    stop_fsb > got.br_startoff) {
5897			*done = true;
5898			goto del_cursor;
5899		}
5900	} else {
5901		if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5902			*done = true;
5903			goto del_cursor;
5904		}
5905	}
5906	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5907		error = -EFSCORRUPTED;
5908		goto del_cursor;
5909	}
5910
5911	if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
5912		error = -EFSCORRUPTED;
5913		goto del_cursor;
5914	}
5915
5916	new_startoff = got.br_startoff + offset_shift_fsb;
5917	if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
5918		if (new_startoff + got.br_blockcount > next.br_startoff) {
5919			error = -EINVAL;
5920			goto del_cursor;
5921		}
5922
5923		/*
5924		 * Unlike a left shift (which involves a hole punch), a right
5925		 * shift does not modify extent neighbors in any way.  We should
5926		 * never find mergeable extents in this scenario.  Check anyways
5927		 * and warn if we encounter two extents that could be one.
5928		 */
5929		if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
5930			WARN_ON_ONCE(1);
5931	}
5932
5933	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5934			cur, &logflags, new_startoff);
5935	if (error)
5936		goto del_cursor;
5937
5938	if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
5939	    stop_fsb >= got.br_startoff + got.br_blockcount) {
5940		*done = true;
5941		goto del_cursor;
5942	}
5943
5944	*next_fsb = got.br_startoff;
5945del_cursor:
5946	if (cur)
5947		xfs_btree_del_cursor(cur, error);
5948	if (logflags)
5949		xfs_trans_log_inode(tp, ip, logflags);
5950	return error;
5951}
5952
5953/*
5954 * Splits an extent into two extents at split_fsb block such that it is the
5955 * first block of the current_ext. @ext is a target extent to be split.
5956 * @split_fsb is a block where the extents is split.  If split_fsb lies in a
5957 * hole or the first block of extents, just return 0.
5958 */
5959int
5960xfs_bmap_split_extent(
5961	struct xfs_trans	*tp,
5962	struct xfs_inode	*ip,
5963	xfs_fileoff_t		split_fsb)
5964{
5965	int				whichfork = XFS_DATA_FORK;
5966	struct xfs_ifork		*ifp = xfs_ifork_ptr(ip, whichfork);
5967	struct xfs_btree_cur		*cur = NULL;
5968	struct xfs_bmbt_irec		got;
5969	struct xfs_bmbt_irec		new; /* split extent */
5970	struct xfs_mount		*mp = ip->i_mount;
5971	xfs_fsblock_t			gotblkcnt; /* new block count for got */
5972	struct xfs_iext_cursor		icur;
5973	int				error = 0;
5974	int				logflags = 0;
5975	int				i = 0;
5976
5977	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5978	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5979		return -EFSCORRUPTED;
5980	}
5981
5982	if (xfs_is_shutdown(mp))
5983		return -EIO;
5984
5985	/* Read in all the extents */
5986	error = xfs_iread_extents(tp, ip, whichfork);
5987	if (error)
5988		return error;
 
 
5989
5990	/*
5991	 * If there are not extents, or split_fsb lies in a hole we are done.
5992	 */
5993	if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
5994	    got.br_startoff >= split_fsb)
5995		return 0;
5996
5997	gotblkcnt = split_fsb - got.br_startoff;
5998	new.br_startoff = split_fsb;
5999	new.br_startblock = got.br_startblock + gotblkcnt;
6000	new.br_blockcount = got.br_blockcount - gotblkcnt;
6001	new.br_state = got.br_state;
6002
6003	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
6004		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
6005		cur->bc_ino.flags = 0;
6006		error = xfs_bmbt_lookup_eq(cur, &got, &i);
6007		if (error)
6008			goto del_cursor;
6009		if (XFS_IS_CORRUPT(mp, i != 1)) {
6010			error = -EFSCORRUPTED;
6011			goto del_cursor;
6012		}
6013	}
6014
6015	got.br_blockcount = gotblkcnt;
6016	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
6017			&got);
6018
6019	logflags = XFS_ILOG_CORE;
6020	if (cur) {
6021		error = xfs_bmbt_update(cur, &got);
6022		if (error)
6023			goto del_cursor;
6024	} else
6025		logflags |= XFS_ILOG_DEXT;
6026
6027	/* Add new extent */
6028	xfs_iext_next(ifp, &icur);
6029	xfs_iext_insert(ip, &icur, &new, 0);
6030	ifp->if_nextents++;
6031
6032	if (cur) {
6033		error = xfs_bmbt_lookup_eq(cur, &new, &i);
6034		if (error)
6035			goto del_cursor;
6036		if (XFS_IS_CORRUPT(mp, i != 0)) {
6037			error = -EFSCORRUPTED;
6038			goto del_cursor;
6039		}
6040		error = xfs_btree_insert(cur, &i);
6041		if (error)
6042			goto del_cursor;
6043		if (XFS_IS_CORRUPT(mp, i != 1)) {
6044			error = -EFSCORRUPTED;
6045			goto del_cursor;
6046		}
6047	}
6048
6049	/*
6050	 * Convert to a btree if necessary.
6051	 */
6052	if (xfs_bmap_needs_btree(ip, whichfork)) {
6053		int tmp_logflags; /* partial log flag return val */
6054
6055		ASSERT(cur == NULL);
6056		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
6057				&tmp_logflags, whichfork);
6058		logflags |= tmp_logflags;
6059	}
6060
6061del_cursor:
6062	if (cur) {
6063		cur->bc_ino.allocated = 0;
6064		xfs_btree_del_cursor(cur, error);
6065	}
6066
6067	if (logflags)
6068		xfs_trans_log_inode(tp, ip, logflags);
6069	return error;
6070}
6071
6072/* Deferred mapping is only for real extents in the data fork. */
6073static bool
6074xfs_bmap_is_update_needed(
6075	struct xfs_bmbt_irec	*bmap)
6076{
6077	return  bmap->br_startblock != HOLESTARTBLOCK &&
6078		bmap->br_startblock != DELAYSTARTBLOCK;
6079}
6080
6081/* Record a bmap intent. */
6082static int
6083__xfs_bmap_add(
6084	struct xfs_trans		*tp,
6085	enum xfs_bmap_intent_type	type,
6086	struct xfs_inode		*ip,
6087	int				whichfork,
6088	struct xfs_bmbt_irec		*bmap)
6089{
6090	struct xfs_bmap_intent		*bi;
6091
6092	trace_xfs_bmap_defer(tp->t_mountp,
6093			XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
6094			type,
6095			XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
6096			ip->i_ino, whichfork,
6097			bmap->br_startoff,
6098			bmap->br_blockcount,
6099			bmap->br_state);
6100
6101	bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_NOFS | __GFP_NOFAIL);
6102	INIT_LIST_HEAD(&bi->bi_list);
6103	bi->bi_type = type;
6104	bi->bi_owner = ip;
6105	bi->bi_whichfork = whichfork;
6106	bi->bi_bmap = *bmap;
6107
6108	xfs_bmap_update_get_group(tp->t_mountp, bi);
6109	xfs_defer_add(tp, &bi->bi_list, &xfs_bmap_update_defer_type);
6110	return 0;
6111}
6112
6113/* Map an extent into a file. */
6114void
6115xfs_bmap_map_extent(
6116	struct xfs_trans	*tp,
6117	struct xfs_inode	*ip,
6118	struct xfs_bmbt_irec	*PREV)
6119{
6120	if (!xfs_bmap_is_update_needed(PREV))
6121		return;
6122
6123	__xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
6124}
6125
6126/* Unmap an extent out of a file. */
6127void
6128xfs_bmap_unmap_extent(
6129	struct xfs_trans	*tp,
6130	struct xfs_inode	*ip,
6131	struct xfs_bmbt_irec	*PREV)
6132{
6133	if (!xfs_bmap_is_update_needed(PREV))
6134		return;
6135
6136	__xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
6137}
6138
6139/*
6140 * Process one of the deferred bmap operations.  We pass back the
6141 * btree cursor to maintain our lock on the bmapbt between calls.
6142 */
6143int
6144xfs_bmap_finish_one(
6145	struct xfs_trans		*tp,
6146	struct xfs_bmap_intent		*bi)
 
 
 
 
 
 
6147{
6148	struct xfs_bmbt_irec		*bmap = &bi->bi_bmap;
6149	int				error = 0;
6150
6151	ASSERT(tp->t_highest_agno == NULLAGNUMBER);
6152
6153	trace_xfs_bmap_deferred(tp->t_mountp,
6154			XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
6155			bi->bi_type,
6156			XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
6157			bi->bi_owner->i_ino, bi->bi_whichfork,
6158			bmap->br_startoff, bmap->br_blockcount,
6159			bmap->br_state);
6160
6161	if (WARN_ON_ONCE(bi->bi_whichfork != XFS_DATA_FORK))
6162		return -EFSCORRUPTED;
6163
6164	if (XFS_TEST_ERROR(false, tp->t_mountp,
6165			XFS_ERRTAG_BMAP_FINISH_ONE))
6166		return -EIO;
6167
6168	switch (bi->bi_type) {
6169	case XFS_BMAP_MAP:
6170		error = xfs_bmapi_remap(tp, bi->bi_owner, bmap->br_startoff,
6171				bmap->br_blockcount, bmap->br_startblock, 0);
6172		bmap->br_blockcount = 0;
6173		break;
6174	case XFS_BMAP_UNMAP:
6175		error = __xfs_bunmapi(tp, bi->bi_owner, bmap->br_startoff,
6176				&bmap->br_blockcount, XFS_BMAPI_REMAP, 1);
6177		break;
6178	default:
6179		ASSERT(0);
6180		error = -EFSCORRUPTED;
6181	}
6182
6183	return error;
6184}
6185
6186/* Check that an extent does not have invalid flags or bad ranges. */
6187xfs_failaddr_t
6188xfs_bmap_validate_extent_raw(
6189	struct xfs_mount	*mp,
6190	bool			rtfile,
6191	int			whichfork,
6192	struct xfs_bmbt_irec	*irec)
6193{
6194	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
6195		return __this_address;
 
6196
6197	if (rtfile && whichfork == XFS_DATA_FORK) {
6198		if (!xfs_verify_rtbext(mp, irec->br_startblock,
6199					   irec->br_blockcount))
 
 
 
6200			return __this_address;
6201	} else {
6202		if (!xfs_verify_fsbext(mp, irec->br_startblock,
6203					   irec->br_blockcount))
 
 
 
 
6204			return __this_address;
6205	}
6206	if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
6207		return __this_address;
6208	return NULL;
6209}
6210
6211int __init
6212xfs_bmap_intent_init_cache(void)
6213{
6214	xfs_bmap_intent_cache = kmem_cache_create("xfs_bmap_intent",
6215			sizeof(struct xfs_bmap_intent),
6216			0, 0, NULL);
6217
6218	return xfs_bmap_intent_cache != NULL ? 0 : -ENOMEM;
6219}
6220
6221void
6222xfs_bmap_intent_destroy_cache(void)
6223{
6224	kmem_cache_destroy(xfs_bmap_intent_cache);
6225	xfs_bmap_intent_cache = NULL;
6226}
6227
6228/* Check that an inode's extent does not have invalid flags or bad ranges. */
6229xfs_failaddr_t
6230xfs_bmap_validate_extent(
6231	struct xfs_inode	*ip,
6232	int			whichfork,
6233	struct xfs_bmbt_irec	*irec)
6234{
6235	return xfs_bmap_validate_extent_raw(ip->i_mount,
6236			XFS_IS_REALTIME_INODE(ip), whichfork, irec);
6237}
6238
6239/*
6240 * Used in xfs_itruncate_extents().  This is the maximum number of extents
6241 * freed from a file in a single transaction.
6242 */
6243#define	XFS_ITRUNC_MAX_EXTENTS	2
6244
6245/*
6246 * Unmap every extent in part of an inode's fork.  We don't do any higher level
6247 * invalidation work at all.
6248 */
6249int
6250xfs_bunmapi_range(
6251	struct xfs_trans	**tpp,
6252	struct xfs_inode	*ip,
6253	uint32_t		flags,
6254	xfs_fileoff_t		startoff,
6255	xfs_fileoff_t		endoff)
6256{
6257	xfs_filblks_t		unmap_len = endoff - startoff + 1;
6258	int			error = 0;
6259
6260	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
6261
6262	while (unmap_len > 0) {
6263		ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER);
6264		error = __xfs_bunmapi(*tpp, ip, startoff, &unmap_len, flags,
6265				XFS_ITRUNC_MAX_EXTENTS);
6266		if (error)
6267			goto out;
6268
6269		/* free the just unmapped extents */
6270		error = xfs_defer_finish(tpp);
6271		if (error)
6272			goto out;
6273	}
6274out:
6275	return error;
6276}
v5.9
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_bit.h"
  13#include "xfs_sb.h"
  14#include "xfs_mount.h"
  15#include "xfs_defer.h"
  16#include "xfs_dir2.h"
  17#include "xfs_inode.h"
  18#include "xfs_btree.h"
  19#include "xfs_trans.h"
  20#include "xfs_alloc.h"
  21#include "xfs_bmap.h"
  22#include "xfs_bmap_util.h"
  23#include "xfs_bmap_btree.h"
  24#include "xfs_rtalloc.h"
  25#include "xfs_errortag.h"
  26#include "xfs_error.h"
  27#include "xfs_quota.h"
  28#include "xfs_trans_space.h"
  29#include "xfs_buf_item.h"
  30#include "xfs_trace.h"
  31#include "xfs_attr_leaf.h"
  32#include "xfs_filestream.h"
  33#include "xfs_rmap.h"
 
  34#include "xfs_ag_resv.h"
  35#include "xfs_refcount.h"
  36#include "xfs_icache.h"
  37#include "xfs_iomap.h"
  38
  39
  40kmem_zone_t		*xfs_bmap_free_item_zone;
  41
  42/*
  43 * Miscellaneous helper functions
  44 */
  45
  46/*
  47 * Compute and fill in the value of the maximum depth of a bmap btree
  48 * in this filesystem.  Done once, during mount.
  49 */
  50void
  51xfs_bmap_compute_maxlevels(
  52	xfs_mount_t	*mp,		/* file system mount structure */
  53	int		whichfork)	/* data or attr fork */
  54{
 
 
  55	int		level;		/* btree level */
  56	uint		maxblocks;	/* max blocks at this level */
  57	uint		maxleafents;	/* max leaf entries possible */
  58	int		maxrootrecs;	/* max records in root block */
  59	int		minleafrecs;	/* min records in leaf block */
  60	int		minnoderecs;	/* min records in node block */
  61	int		sz;		/* root block size */
  62
  63	/*
  64	 * The maximum number of extents in a file, hence the maximum number of
  65	 * leaf entries, is controlled by the size of the on-disk extent count,
  66	 * either a signed 32-bit number for the data fork, or a signed 16-bit
  67	 * number for the attr fork.
  68	 *
  69	 * Note that we can no longer assume that if we are in ATTR1 that
  70	 * the fork offset of all the inodes will be
  71	 * (xfs_default_attroffset(ip) >> 3) because we could have mounted
  72	 * with ATTR2 and then mounted back with ATTR1, keeping the
  73	 * di_forkoff's fixed but probably at various positions. Therefore,
  74	 * for both ATTR1 and ATTR2 we have to assume the worst case scenario
  75	 * of a minimum size available.
  76	 */
  77	if (whichfork == XFS_DATA_FORK) {
  78		maxleafents = MAXEXTNUM;
 
  79		sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
  80	} else {
  81		maxleafents = MAXAEXTNUM;
  82		sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
  83	}
  84	maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
  85	minleafrecs = mp->m_bmap_dmnr[0];
  86	minnoderecs = mp->m_bmap_dmnr[1];
  87	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
  88	for (level = 1; maxblocks > 1; level++) {
  89		if (maxblocks <= maxrootrecs)
  90			maxblocks = 1;
  91		else
  92			maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
  93	}
  94	mp->m_bm_maxlevels[whichfork] = level;
 
 
 
 
 
 
 
 
 
 
  95}
  96
  97STATIC int				/* error */
  98xfs_bmbt_lookup_eq(
  99	struct xfs_btree_cur	*cur,
 100	struct xfs_bmbt_irec	*irec,
 101	int			*stat)	/* success/failure */
 102{
 103	cur->bc_rec.b = *irec;
 104	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 105}
 106
 107STATIC int				/* error */
 108xfs_bmbt_lookup_first(
 109	struct xfs_btree_cur	*cur,
 110	int			*stat)	/* success/failure */
 111{
 112	cur->bc_rec.b.br_startoff = 0;
 113	cur->bc_rec.b.br_startblock = 0;
 114	cur->bc_rec.b.br_blockcount = 0;
 115	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 116}
 117
 118/*
 119 * Check if the inode needs to be converted to btree format.
 120 */
 121static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
 122{
 123	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
 124
 125	return whichfork != XFS_COW_FORK &&
 126		ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
 127		ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
 128}
 129
 130/*
 131 * Check if the inode should be converted to extent format.
 132 */
 133static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
 134{
 135	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
 136
 137	return whichfork != XFS_COW_FORK &&
 138		ifp->if_format == XFS_DINODE_FMT_BTREE &&
 139		ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
 140}
 141
 142/*
 143 * Update the record referred to by cur to the value given by irec
 144 * This either works (return 0) or gets an EFSCORRUPTED error.
 145 */
 146STATIC int
 147xfs_bmbt_update(
 148	struct xfs_btree_cur	*cur,
 149	struct xfs_bmbt_irec	*irec)
 150{
 151	union xfs_btree_rec	rec;
 152
 153	xfs_bmbt_disk_set_all(&rec.bmbt, irec);
 154	return xfs_btree_update(cur, &rec);
 155}
 156
 157/*
 158 * Compute the worst-case number of indirect blocks that will be used
 159 * for ip's delayed extent of length "len".
 160 */
 161STATIC xfs_filblks_t
 162xfs_bmap_worst_indlen(
 163	xfs_inode_t	*ip,		/* incore inode pointer */
 164	xfs_filblks_t	len)		/* delayed extent length */
 165{
 166	int		level;		/* btree level number */
 167	int		maxrecs;	/* maximum record count at this level */
 168	xfs_mount_t	*mp;		/* mount structure */
 169	xfs_filblks_t	rval;		/* return value */
 170
 171	mp = ip->i_mount;
 172	maxrecs = mp->m_bmap_dmxr[0];
 173	for (level = 0, rval = 0;
 174	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
 175	     level++) {
 176		len += maxrecs - 1;
 177		do_div(len, maxrecs);
 178		rval += len;
 179		if (len == 1)
 180			return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
 181				level - 1;
 182		if (level == 0)
 183			maxrecs = mp->m_bmap_dmxr[1];
 184	}
 185	return rval;
 186}
 187
 188/*
 189 * Calculate the default attribute fork offset for newly created inodes.
 190 */
 191uint
 192xfs_default_attroffset(
 193	struct xfs_inode	*ip)
 194{
 195	struct xfs_mount	*mp = ip->i_mount;
 196	uint			offset;
 197
 198	if (mp->m_sb.sb_inodesize == 256)
 199		offset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
 200	else
 201		offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
 202
 203	ASSERT(offset < XFS_LITINO(mp));
 204	return offset;
 205}
 206
 207/*
 208 * Helper routine to reset inode di_forkoff field when switching
 209 * attribute fork from local to extent format - we reset it where
 210 * possible to make space available for inline data fork extents.
 211 */
 212STATIC void
 213xfs_bmap_forkoff_reset(
 214	xfs_inode_t	*ip,
 215	int		whichfork)
 216{
 217	if (whichfork == XFS_ATTR_FORK &&
 218	    ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
 219	    ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
 220		uint	dfl_forkoff = xfs_default_attroffset(ip) >> 3;
 221
 222		if (dfl_forkoff > ip->i_d.di_forkoff)
 223			ip->i_d.di_forkoff = dfl_forkoff;
 224	}
 225}
 226
 227#ifdef DEBUG
 228STATIC struct xfs_buf *
 229xfs_bmap_get_bp(
 230	struct xfs_btree_cur	*cur,
 231	xfs_fsblock_t		bno)
 232{
 233	struct xfs_log_item	*lip;
 234	int			i;
 235
 236	if (!cur)
 237		return NULL;
 238
 239	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
 240		if (!cur->bc_bufs[i])
 241			break;
 242		if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
 243			return cur->bc_bufs[i];
 244	}
 245
 246	/* Chase down all the log items to see if the bp is there */
 247	list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
 248		struct xfs_buf_log_item	*bip = (struct xfs_buf_log_item *)lip;
 249
 250		if (bip->bli_item.li_type == XFS_LI_BUF &&
 251		    XFS_BUF_ADDR(bip->bli_buf) == bno)
 252			return bip->bli_buf;
 253	}
 254
 255	return NULL;
 256}
 257
 258STATIC void
 259xfs_check_block(
 260	struct xfs_btree_block	*block,
 261	xfs_mount_t		*mp,
 262	int			root,
 263	short			sz)
 264{
 265	int			i, j, dmxr;
 266	__be64			*pp, *thispa;	/* pointer to block address */
 267	xfs_bmbt_key_t		*prevp, *keyp;
 268
 269	ASSERT(be16_to_cpu(block->bb_level) > 0);
 270
 271	prevp = NULL;
 272	for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
 273		dmxr = mp->m_bmap_dmxr[0];
 274		keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
 275
 276		if (prevp) {
 277			ASSERT(be64_to_cpu(prevp->br_startoff) <
 278			       be64_to_cpu(keyp->br_startoff));
 279		}
 280		prevp = keyp;
 281
 282		/*
 283		 * Compare the block numbers to see if there are dups.
 284		 */
 285		if (root)
 286			pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
 287		else
 288			pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
 289
 290		for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
 291			if (root)
 292				thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
 293			else
 294				thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
 295			if (*thispa == *pp) {
 296				xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
 297					__func__, j, i,
 298					(unsigned long long)be64_to_cpu(*thispa));
 299				xfs_err(mp, "%s: ptrs are equal in node\n",
 300					__func__);
 301				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 302			}
 303		}
 304	}
 305}
 306
 307/*
 308 * Check that the extents for the inode ip are in the right order in all
 309 * btree leaves. THis becomes prohibitively expensive for large extent count
 310 * files, so don't bother with inodes that have more than 10,000 extents in
 311 * them. The btree record ordering checks will still be done, so for such large
 312 * bmapbt constructs that is going to catch most corruptions.
 313 */
 314STATIC void
 315xfs_bmap_check_leaf_extents(
 316	xfs_btree_cur_t		*cur,	/* btree cursor or null */
 317	xfs_inode_t		*ip,		/* incore inode pointer */
 318	int			whichfork)	/* data or attr fork */
 319{
 320	struct xfs_mount	*mp = ip->i_mount;
 321	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 322	struct xfs_btree_block	*block;	/* current btree block */
 323	xfs_fsblock_t		bno;	/* block # of "block" */
 324	xfs_buf_t		*bp;	/* buffer for "block" */
 325	int			error;	/* error return value */
 326	xfs_extnum_t		i=0, j;	/* index into the extents list */
 327	int			level;	/* btree level, for checking */
 328	__be64			*pp;	/* pointer to block address */
 329	xfs_bmbt_rec_t		*ep;	/* pointer to current extent */
 330	xfs_bmbt_rec_t		last = {0, 0}; /* last extent in prev block */
 331	xfs_bmbt_rec_t		*nextp;	/* pointer to next extent */
 332	int			bp_release = 0;
 333
 334	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
 335		return;
 336
 337	/* skip large extent count inodes */
 338	if (ip->i_df.if_nextents > 10000)
 339		return;
 340
 341	bno = NULLFSBLOCK;
 342	block = ifp->if_broot;
 343	/*
 344	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
 345	 */
 346	level = be16_to_cpu(block->bb_level);
 347	ASSERT(level > 0);
 348	xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
 349	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
 350	bno = be64_to_cpu(*pp);
 351
 352	ASSERT(bno != NULLFSBLOCK);
 353	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
 354	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
 355
 356	/*
 357	 * Go down the tree until leaf level is reached, following the first
 358	 * pointer (leftmost) at each level.
 359	 */
 360	while (level-- > 0) {
 361		/* See if buf is in cur first */
 362		bp_release = 0;
 363		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 364		if (!bp) {
 365			bp_release = 1;
 366			error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
 367						XFS_BMAP_BTREE_REF,
 368						&xfs_bmbt_buf_ops);
 369			if (error)
 370				goto error_norelse;
 371		}
 372		block = XFS_BUF_TO_BLOCK(bp);
 373		if (level == 0)
 374			break;
 375
 376		/*
 377		 * Check this block for basic sanity (increasing keys and
 378		 * no duplicate blocks).
 379		 */
 380
 381		xfs_check_block(block, mp, 0, 0);
 382		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
 383		bno = be64_to_cpu(*pp);
 384		if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
 385			error = -EFSCORRUPTED;
 386			goto error0;
 387		}
 388		if (bp_release) {
 389			bp_release = 0;
 390			xfs_trans_brelse(NULL, bp);
 391		}
 392	}
 393
 394	/*
 395	 * Here with bp and block set to the leftmost leaf node in the tree.
 396	 */
 397	i = 0;
 398
 399	/*
 400	 * Loop over all leaf nodes checking that all extents are in the right order.
 401	 */
 402	for (;;) {
 403		xfs_fsblock_t	nextbno;
 404		xfs_extnum_t	num_recs;
 405
 406
 407		num_recs = xfs_btree_get_numrecs(block);
 408
 409		/*
 410		 * Read-ahead the next leaf block, if any.
 411		 */
 412
 413		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
 414
 415		/*
 416		 * Check all the extents to make sure they are OK.
 417		 * If we had a previous block, the last entry should
 418		 * conform with the first entry in this one.
 419		 */
 420
 421		ep = XFS_BMBT_REC_ADDR(mp, block, 1);
 422		if (i) {
 423			ASSERT(xfs_bmbt_disk_get_startoff(&last) +
 424			       xfs_bmbt_disk_get_blockcount(&last) <=
 425			       xfs_bmbt_disk_get_startoff(ep));
 426		}
 427		for (j = 1; j < num_recs; j++) {
 428			nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
 429			ASSERT(xfs_bmbt_disk_get_startoff(ep) +
 430			       xfs_bmbt_disk_get_blockcount(ep) <=
 431			       xfs_bmbt_disk_get_startoff(nextp));
 432			ep = nextp;
 433		}
 434
 435		last = *ep;
 436		i += num_recs;
 437		if (bp_release) {
 438			bp_release = 0;
 439			xfs_trans_brelse(NULL, bp);
 440		}
 441		bno = nextbno;
 442		/*
 443		 * If we've reached the end, stop.
 444		 */
 445		if (bno == NULLFSBLOCK)
 446			break;
 447
 448		bp_release = 0;
 449		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 450		if (!bp) {
 451			bp_release = 1;
 452			error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
 453						XFS_BMAP_BTREE_REF,
 454						&xfs_bmbt_buf_ops);
 455			if (error)
 456				goto error_norelse;
 457		}
 458		block = XFS_BUF_TO_BLOCK(bp);
 459	}
 460
 461	return;
 462
 463error0:
 464	xfs_warn(mp, "%s: at error0", __func__);
 465	if (bp_release)
 466		xfs_trans_brelse(NULL, bp);
 467error_norelse:
 468	xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
 469		__func__, i);
 470	xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
 471	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 472	return;
 473}
 474
 475/*
 476 * Validate that the bmbt_irecs being returned from bmapi are valid
 477 * given the caller's original parameters.  Specifically check the
 478 * ranges of the returned irecs to ensure that they only extend beyond
 479 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
 480 */
 481STATIC void
 482xfs_bmap_validate_ret(
 483	xfs_fileoff_t		bno,
 484	xfs_filblks_t		len,
 485	int			flags,
 486	xfs_bmbt_irec_t		*mval,
 487	int			nmap,
 488	int			ret_nmap)
 489{
 490	int			i;		/* index to map values */
 491
 492	ASSERT(ret_nmap <= nmap);
 493
 494	for (i = 0; i < ret_nmap; i++) {
 495		ASSERT(mval[i].br_blockcount > 0);
 496		if (!(flags & XFS_BMAPI_ENTIRE)) {
 497			ASSERT(mval[i].br_startoff >= bno);
 498			ASSERT(mval[i].br_blockcount <= len);
 499			ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
 500			       bno + len);
 501		} else {
 502			ASSERT(mval[i].br_startoff < bno + len);
 503			ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
 504			       bno);
 505		}
 506		ASSERT(i == 0 ||
 507		       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
 508		       mval[i].br_startoff);
 509		ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
 510		       mval[i].br_startblock != HOLESTARTBLOCK);
 511		ASSERT(mval[i].br_state == XFS_EXT_NORM ||
 512		       mval[i].br_state == XFS_EXT_UNWRITTEN);
 513	}
 514}
 515
 516#else
 517#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)		do { } while (0)
 518#define	xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)	do { } while (0)
 519#endif /* DEBUG */
 520
 521/*
 522 * bmap free list manipulation functions
 523 */
 524
 525/*
 526 * Add the extent to the list of extents to be free at transaction end.
 527 * The list is maintained sorted (by block number).
 528 */
 529void
 530__xfs_bmap_add_free(
 531	struct xfs_trans		*tp,
 532	xfs_fsblock_t			bno,
 533	xfs_filblks_t			len,
 534	const struct xfs_owner_info	*oinfo,
 535	bool				skip_discard)
 536{
 537	struct xfs_extent_free_item	*new;		/* new element */
 538#ifdef DEBUG
 539	struct xfs_mount		*mp = tp->t_mountp;
 540	xfs_agnumber_t			agno;
 541	xfs_agblock_t			agbno;
 542
 543	ASSERT(bno != NULLFSBLOCK);
 544	ASSERT(len > 0);
 545	ASSERT(len <= MAXEXTLEN);
 546	ASSERT(!isnullstartblock(bno));
 547	agno = XFS_FSB_TO_AGNO(mp, bno);
 548	agbno = XFS_FSB_TO_AGBNO(mp, bno);
 549	ASSERT(agno < mp->m_sb.sb_agcount);
 550	ASSERT(agbno < mp->m_sb.sb_agblocks);
 551	ASSERT(len < mp->m_sb.sb_agblocks);
 552	ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
 553#endif
 554	ASSERT(xfs_bmap_free_item_zone != NULL);
 555
 556	new = kmem_cache_alloc(xfs_bmap_free_item_zone,
 557			       GFP_KERNEL | __GFP_NOFAIL);
 558	new->xefi_startblock = bno;
 559	new->xefi_blockcount = (xfs_extlen_t)len;
 560	if (oinfo)
 561		new->xefi_oinfo = *oinfo;
 562	else
 563		new->xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
 564	new->xefi_skip_discard = skip_discard;
 565	trace_xfs_bmap_free_defer(tp->t_mountp,
 566			XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
 567			XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
 568	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
 569}
 570
 571/*
 572 * Inode fork format manipulation functions
 573 */
 574
 575/*
 576 * Convert the inode format to extent format if it currently is in btree format,
 577 * but the extent list is small enough that it fits into the extent format.
 578 *
 579 * Since the extents are already in-core, all we have to do is give up the space
 580 * for the btree root and pitch the leaf block.
 581 */
 582STATIC int				/* error */
 583xfs_bmap_btree_to_extents(
 584	struct xfs_trans	*tp,	/* transaction pointer */
 585	struct xfs_inode	*ip,	/* incore inode pointer */
 586	struct xfs_btree_cur	*cur,	/* btree cursor */
 587	int			*logflagsp, /* inode logging flags */
 588	int			whichfork)  /* data or attr fork */
 589{
 590	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 591	struct xfs_mount	*mp = ip->i_mount;
 592	struct xfs_btree_block	*rblock = ifp->if_broot;
 593	struct xfs_btree_block	*cblock;/* child btree block */
 594	xfs_fsblock_t		cbno;	/* child block number */
 595	xfs_buf_t		*cbp;	/* child block's buffer */
 596	int			error;	/* error return value */
 597	__be64			*pp;	/* ptr to block address */
 598	struct xfs_owner_info	oinfo;
 599
 600	/* check if we actually need the extent format first: */
 601	if (!xfs_bmap_wants_extents(ip, whichfork))
 602		return 0;
 603
 604	ASSERT(cur);
 605	ASSERT(whichfork != XFS_COW_FORK);
 606	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
 607	ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
 608	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
 609	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
 610	ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
 611
 612	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
 613	cbno = be64_to_cpu(*pp);
 614#ifdef DEBUG
 615	if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1)))
 616		return -EFSCORRUPTED;
 617#endif
 618	error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
 619				&xfs_bmbt_buf_ops);
 620	if (error)
 621		return error;
 622	cblock = XFS_BUF_TO_BLOCK(cbp);
 623	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 624		return error;
 
 625	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
 626	xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo);
 627	ip->i_d.di_nblocks--;
 
 
 
 
 628	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 629	xfs_trans_binval(tp, cbp);
 630	if (cur->bc_bufs[0] == cbp)
 631		cur->bc_bufs[0] = NULL;
 632	xfs_iroot_realloc(ip, -1, whichfork);
 633	ASSERT(ifp->if_broot == NULL);
 634	ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
 635	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 636	*logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 637	return 0;
 638}
 639
 640/*
 641 * Convert an extents-format file into a btree-format file.
 642 * The new file will have a root block (in the inode) and a single child block.
 643 */
 644STATIC int					/* error */
 645xfs_bmap_extents_to_btree(
 646	struct xfs_trans	*tp,		/* transaction pointer */
 647	struct xfs_inode	*ip,		/* incore inode pointer */
 648	struct xfs_btree_cur	**curp,		/* cursor returned to caller */
 649	int			wasdel,		/* converting a delayed alloc */
 650	int			*logflagsp,	/* inode logging flags */
 651	int			whichfork)	/* data or attr fork */
 652{
 653	struct xfs_btree_block	*ablock;	/* allocated (child) bt block */
 654	struct xfs_buf		*abp;		/* buffer for ablock */
 655	struct xfs_alloc_arg	args;		/* allocation arguments */
 656	struct xfs_bmbt_rec	*arp;		/* child record pointer */
 657	struct xfs_btree_block	*block;		/* btree root block */
 658	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
 659	int			error;		/* error return value */
 660	struct xfs_ifork	*ifp;		/* inode fork pointer */
 661	struct xfs_bmbt_key	*kp;		/* root block key pointer */
 662	struct xfs_mount	*mp;		/* mount structure */
 663	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
 664	struct xfs_iext_cursor	icur;
 665	struct xfs_bmbt_irec	rec;
 666	xfs_extnum_t		cnt = 0;
 667
 668	mp = ip->i_mount;
 669	ASSERT(whichfork != XFS_COW_FORK);
 670	ifp = XFS_IFORK_PTR(ip, whichfork);
 671	ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
 672
 673	/*
 674	 * Make space in the inode incore. This needs to be undone if we fail
 675	 * to expand the root.
 676	 */
 677	xfs_iroot_realloc(ip, 1, whichfork);
 678	ifp->if_flags |= XFS_IFBROOT;
 679
 680	/*
 681	 * Fill in the root.
 682	 */
 683	block = ifp->if_broot;
 684	xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
 685				 XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
 686				 XFS_BTREE_LONG_PTRS);
 687	/*
 688	 * Need a cursor.  Can't allocate until bb_level is filled in.
 689	 */
 690	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 691	cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
 692	/*
 693	 * Convert to a btree with two levels, one record in root.
 694	 */
 695	ifp->if_format = XFS_DINODE_FMT_BTREE;
 696	memset(&args, 0, sizeof(args));
 697	args.tp = tp;
 698	args.mp = mp;
 699	xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
 700	if (tp->t_firstblock == NULLFSBLOCK) {
 701		args.type = XFS_ALLOCTYPE_START_BNO;
 702		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
 703	} else if (tp->t_flags & XFS_TRANS_LOWMODE) {
 704		args.type = XFS_ALLOCTYPE_START_BNO;
 705		args.fsbno = tp->t_firstblock;
 706	} else {
 707		args.type = XFS_ALLOCTYPE_NEAR_BNO;
 708		args.fsbno = tp->t_firstblock;
 709	}
 710	args.minlen = args.maxlen = args.prod = 1;
 711	args.wasdel = wasdel;
 712	*logflagsp = 0;
 713	error = xfs_alloc_vextent(&args);
 
 714	if (error)
 715		goto out_root_realloc;
 716
 
 
 
 717	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
 718		error = -ENOSPC;
 719		goto out_root_realloc;
 720	}
 721
 722	/*
 723	 * Allocation can't fail, the space was reserved.
 724	 */
 725	ASSERT(tp->t_firstblock == NULLFSBLOCK ||
 726	       args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
 727	tp->t_firstblock = args.fsbno;
 728	cur->bc_ino.allocated++;
 729	ip->i_d.di_nblocks++;
 730	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 731	error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
 732			XFS_FSB_TO_DADDR(mp, args.fsbno),
 733			mp->m_bsize, 0, &abp);
 734	if (error)
 735		goto out_unreserve_dquot;
 736
 737	/*
 738	 * Fill in the child block.
 739	 */
 740	abp->b_ops = &xfs_bmbt_buf_ops;
 741	ablock = XFS_BUF_TO_BLOCK(abp);
 742	xfs_btree_init_block_int(mp, ablock, abp->b_bn,
 743				XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
 744				XFS_BTREE_LONG_PTRS);
 745
 746	for_each_xfs_iext(ifp, &icur, &rec) {
 747		if (isnullstartblock(rec.br_startblock))
 748			continue;
 749		arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
 750		xfs_bmbt_disk_set_all(arp, &rec);
 751		cnt++;
 752	}
 753	ASSERT(cnt == ifp->if_nextents);
 754	xfs_btree_set_numrecs(ablock, cnt);
 755
 756	/*
 757	 * Fill in the root key and pointer.
 758	 */
 759	kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
 760	arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
 761	kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
 762	pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
 763						be16_to_cpu(block->bb_level)));
 764	*pp = cpu_to_be64(args.fsbno);
 765
 766	/*
 767	 * Do all this logging at the end so that
 768	 * the root is at the right level.
 769	 */
 770	xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
 771	xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
 772	ASSERT(*curp == NULL);
 773	*curp = cur;
 774	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
 775	return 0;
 776
 777out_unreserve_dquot:
 778	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 779out_root_realloc:
 780	xfs_iroot_realloc(ip, -1, whichfork);
 781	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 782	ASSERT(ifp->if_broot == NULL);
 783	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 784
 785	return error;
 786}
 787
 788/*
 789 * Convert a local file to an extents file.
 790 * This code is out of bounds for data forks of regular files,
 791 * since the file data needs to get logged so things will stay consistent.
 792 * (The bmap-level manipulations are ok, though).
 793 */
 794void
 795xfs_bmap_local_to_extents_empty(
 796	struct xfs_trans	*tp,
 797	struct xfs_inode	*ip,
 798	int			whichfork)
 799{
 800	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 801
 802	ASSERT(whichfork != XFS_COW_FORK);
 803	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 804	ASSERT(ifp->if_bytes == 0);
 805	ASSERT(ifp->if_nextents == 0);
 806
 807	xfs_bmap_forkoff_reset(ip, whichfork);
 808	ifp->if_flags &= ~XFS_IFINLINE;
 809	ifp->if_flags |= XFS_IFEXTENTS;
 810	ifp->if_u1.if_root = NULL;
 811	ifp->if_height = 0;
 812	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 813	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 814}
 815
 816
 817STATIC int				/* error */
 818xfs_bmap_local_to_extents(
 819	xfs_trans_t	*tp,		/* transaction pointer */
 820	xfs_inode_t	*ip,		/* incore inode pointer */
 821	xfs_extlen_t	total,		/* total blocks needed by transaction */
 822	int		*logflagsp,	/* inode logging flags */
 823	int		whichfork,
 824	void		(*init_fn)(struct xfs_trans *tp,
 825				   struct xfs_buf *bp,
 826				   struct xfs_inode *ip,
 827				   struct xfs_ifork *ifp))
 828{
 829	int		error = 0;
 830	int		flags;		/* logging flags returned */
 831	struct xfs_ifork *ifp;		/* inode fork pointer */
 832	xfs_alloc_arg_t	args;		/* allocation arguments */
 833	xfs_buf_t	*bp;		/* buffer for extent block */
 834	struct xfs_bmbt_irec rec;
 835	struct xfs_iext_cursor icur;
 836
 837	/*
 838	 * We don't want to deal with the case of keeping inode data inline yet.
 839	 * So sending the data fork of a regular inode is invalid.
 840	 */
 841	ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
 842	ifp = XFS_IFORK_PTR(ip, whichfork);
 843	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 844
 845	if (!ifp->if_bytes) {
 846		xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 847		flags = XFS_ILOG_CORE;
 848		goto done;
 849	}
 850
 851	flags = 0;
 852	error = 0;
 853	ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE);
 854	memset(&args, 0, sizeof(args));
 855	args.tp = tp;
 856	args.mp = ip->i_mount;
 
 
 857	xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
 
 858	/*
 859	 * Allocate a block.  We know we need only one, since the
 860	 * file currently fits in an inode.
 861	 */
 862	if (tp->t_firstblock == NULLFSBLOCK) {
 863		args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
 864		args.type = XFS_ALLOCTYPE_START_BNO;
 865	} else {
 866		args.fsbno = tp->t_firstblock;
 867		args.type = XFS_ALLOCTYPE_NEAR_BNO;
 868	}
 869	args.total = total;
 870	args.minlen = args.maxlen = args.prod = 1;
 871	error = xfs_alloc_vextent(&args);
 
 872	if (error)
 873		goto done;
 874
 875	/* Can't fail, the space was reserved. */
 876	ASSERT(args.fsbno != NULLFSBLOCK);
 877	ASSERT(args.len == 1);
 878	tp->t_firstblock = args.fsbno;
 879	error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
 880			XFS_FSB_TO_DADDR(args.mp, args.fsbno),
 881			args.mp->m_bsize, 0, &bp);
 882	if (error)
 883		goto done;
 884
 885	/*
 886	 * Initialize the block, copy the data and log the remote buffer.
 887	 *
 888	 * The callout is responsible for logging because the remote format
 889	 * might differ from the local format and thus we don't know how much to
 890	 * log here. Note that init_fn must also set the buffer log item type
 891	 * correctly.
 892	 */
 893	init_fn(tp, bp, ip, ifp);
 894
 895	/* account for the change in fork size */
 896	xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 897	xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 898	flags |= XFS_ILOG_CORE;
 899
 900	ifp->if_u1.if_root = NULL;
 901	ifp->if_height = 0;
 902
 903	rec.br_startoff = 0;
 904	rec.br_startblock = args.fsbno;
 905	rec.br_blockcount = 1;
 906	rec.br_state = XFS_EXT_NORM;
 907	xfs_iext_first(ifp, &icur);
 908	xfs_iext_insert(ip, &icur, &rec, 0);
 909
 910	ifp->if_nextents = 1;
 911	ip->i_d.di_nblocks = 1;
 912	xfs_trans_mod_dquot_byino(tp, ip,
 913		XFS_TRANS_DQ_BCOUNT, 1L);
 914	flags |= xfs_ilog_fext(whichfork);
 915
 916done:
 917	*logflagsp = flags;
 918	return error;
 919}
 920
 921/*
 922 * Called from xfs_bmap_add_attrfork to handle btree format files.
 923 */
 924STATIC int					/* error */
 925xfs_bmap_add_attrfork_btree(
 926	xfs_trans_t		*tp,		/* transaction pointer */
 927	xfs_inode_t		*ip,		/* incore inode pointer */
 928	int			*flags)		/* inode logging flags */
 929{
 930	xfs_btree_cur_t		*cur;		/* btree cursor */
 
 931	int			error;		/* error return value */
 932	xfs_mount_t		*mp;		/* file system mount struct */
 933	int			stat;		/* newroot status */
 934
 935	mp = ip->i_mount;
 936	if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
 
 937		*flags |= XFS_ILOG_DBROOT;
 938	else {
 939		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
 940		error = xfs_bmbt_lookup_first(cur, &stat);
 941		if (error)
 942			goto error0;
 943		/* must be at least one entry */
 944		if (XFS_IS_CORRUPT(mp, stat != 1)) {
 945			error = -EFSCORRUPTED;
 946			goto error0;
 947		}
 948		if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
 949			goto error0;
 950		if (stat == 0) {
 951			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 952			return -ENOSPC;
 953		}
 954		cur->bc_ino.allocated = 0;
 955		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 956	}
 957	return 0;
 958error0:
 959	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 960	return error;
 961}
 962
 963/*
 964 * Called from xfs_bmap_add_attrfork to handle extents format files.
 965 */
 966STATIC int					/* error */
 967xfs_bmap_add_attrfork_extents(
 968	struct xfs_trans	*tp,		/* transaction pointer */
 969	struct xfs_inode	*ip,		/* incore inode pointer */
 970	int			*flags)		/* inode logging flags */
 971{
 972	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
 973	int			error;		/* error return value */
 974
 975	if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
 976	    XFS_IFORK_DSIZE(ip))
 977		return 0;
 978	cur = NULL;
 979	error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
 980					  XFS_DATA_FORK);
 981	if (cur) {
 982		cur->bc_ino.allocated = 0;
 983		xfs_btree_del_cursor(cur, error);
 984	}
 985	return error;
 986}
 987
 988/*
 989 * Called from xfs_bmap_add_attrfork to handle local format files. Each
 990 * different data fork content type needs a different callout to do the
 991 * conversion. Some are basic and only require special block initialisation
 992 * callouts for the data formating, others (directories) are so specialised they
 993 * handle everything themselves.
 994 *
 995 * XXX (dgc): investigate whether directory conversion can use the generic
 996 * formatting callout. It should be possible - it's just a very complex
 997 * formatter.
 998 */
 999STATIC int					/* error */
1000xfs_bmap_add_attrfork_local(
1001	struct xfs_trans	*tp,		/* transaction pointer */
1002	struct xfs_inode	*ip,		/* incore inode pointer */
1003	int			*flags)		/* inode logging flags */
1004{
1005	struct xfs_da_args	dargs;		/* args for dir/attr code */
1006
1007	if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1008		return 0;
1009
1010	if (S_ISDIR(VFS_I(ip)->i_mode)) {
1011		memset(&dargs, 0, sizeof(dargs));
1012		dargs.geo = ip->i_mount->m_dir_geo;
1013		dargs.dp = ip;
1014		dargs.total = dargs.geo->fsbcount;
1015		dargs.whichfork = XFS_DATA_FORK;
1016		dargs.trans = tp;
1017		return xfs_dir2_sf_to_block(&dargs);
1018	}
1019
1020	if (S_ISLNK(VFS_I(ip)->i_mode))
1021		return xfs_bmap_local_to_extents(tp, ip, 1, flags,
1022						 XFS_DATA_FORK,
1023						 xfs_symlink_local_to_remote);
1024
1025	/* should only be called for types that support local format data */
1026	ASSERT(0);
1027	return -EFSCORRUPTED;
1028}
1029
1030/* Set an inode attr fork off based on the format */
1031int
 
 
1032xfs_bmap_set_attrforkoff(
1033	struct xfs_inode	*ip,
1034	int			size,
1035	int			*version)
1036{
 
 
1037	switch (ip->i_df.if_format) {
1038	case XFS_DINODE_FMT_DEV:
1039		ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1040		break;
1041	case XFS_DINODE_FMT_LOCAL:
1042	case XFS_DINODE_FMT_EXTENTS:
1043	case XFS_DINODE_FMT_BTREE:
1044		ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1045		if (!ip->i_d.di_forkoff)
1046			ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1047		else if ((ip->i_mount->m_flags & XFS_MOUNT_ATTR2) && version)
1048			*version = 2;
1049		break;
1050	default:
1051		ASSERT(0);
1052		return -EINVAL;
1053	}
1054
1055	return 0;
1056}
1057
1058/*
1059 * Convert inode from non-attributed to attributed.
1060 * Must not be in a transaction, ip must not be locked.
1061 */
1062int						/* error code */
1063xfs_bmap_add_attrfork(
1064	xfs_inode_t		*ip,		/* incore inode pointer */
1065	int			size,		/* space new attribute needs */
1066	int			rsvd)		/* xact may use reserved blks */
1067{
1068	xfs_mount_t		*mp;		/* mount structure */
1069	xfs_trans_t		*tp;		/* transaction pointer */
1070	int			blks;		/* space reservation */
1071	int			version = 1;	/* superblock attr version */
1072	int			logflags;	/* logging flags */
1073	int			error;		/* error return value */
1074
1075	ASSERT(XFS_IFORK_Q(ip) == 0);
1076
1077	mp = ip->i_mount;
1078	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1079
1080	blks = XFS_ADDAFORK_SPACE_RES(mp);
1081
1082	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
1083			rsvd ? XFS_TRANS_RESERVE : 0, &tp);
1084	if (error)
1085		return error;
1086
1087	xfs_ilock(ip, XFS_ILOCK_EXCL);
1088	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1089			XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1090			XFS_QMOPT_RES_REGBLKS);
1091	if (error)
1092		goto trans_cancel;
1093	if (XFS_IFORK_Q(ip))
1094		goto trans_cancel;
1095
1096	xfs_trans_ijoin(tp, ip, 0);
1097	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1098	error = xfs_bmap_set_attrforkoff(ip, size, &version);
1099	if (error)
1100		goto trans_cancel;
1101	ASSERT(ip->i_afp == NULL);
1102
1103	ip->i_afp = kmem_cache_zalloc(xfs_ifork_zone,
1104				      GFP_KERNEL | __GFP_NOFAIL);
1105
1106	ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
1107	ip->i_afp->if_flags = XFS_IFEXTENTS;
1108	logflags = 0;
1109	switch (ip->i_df.if_format) {
1110	case XFS_DINODE_FMT_LOCAL:
1111		error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
1112		break;
1113	case XFS_DINODE_FMT_EXTENTS:
1114		error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
1115		break;
1116	case XFS_DINODE_FMT_BTREE:
1117		error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
1118		break;
1119	default:
1120		error = 0;
1121		break;
1122	}
1123	if (logflags)
1124		xfs_trans_log_inode(tp, ip, logflags);
1125	if (error)
1126		goto trans_cancel;
1127	if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1128	   (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1129		bool log_sb = false;
1130
1131		spin_lock(&mp->m_sb_lock);
1132		if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1133			xfs_sb_version_addattr(&mp->m_sb);
1134			log_sb = true;
1135		}
1136		if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1137			xfs_sb_version_addattr2(&mp->m_sb);
1138			log_sb = true;
1139		}
1140		spin_unlock(&mp->m_sb_lock);
1141		if (log_sb)
1142			xfs_log_sb(tp);
1143	}
1144
1145	error = xfs_trans_commit(tp);
1146	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1147	return error;
1148
1149trans_cancel:
1150	xfs_trans_cancel(tp);
1151	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1152	return error;
1153}
1154
1155/*
1156 * Internal and external extent tree search functions.
1157 */
1158
1159struct xfs_iread_state {
1160	struct xfs_iext_cursor	icur;
1161	xfs_extnum_t		loaded;
1162};
1163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1164/* Stuff every bmbt record from this block into the incore extent map. */
1165static int
1166xfs_iread_bmbt_block(
1167	struct xfs_btree_cur	*cur,
1168	int			level,
1169	void			*priv)
1170{
1171	struct xfs_iread_state	*ir = priv;
1172	struct xfs_mount	*mp = cur->bc_mp;
1173	struct xfs_inode	*ip = cur->bc_ino.ip;
1174	struct xfs_btree_block	*block;
1175	struct xfs_buf		*bp;
1176	struct xfs_bmbt_rec	*frp;
1177	xfs_extnum_t		num_recs;
1178	xfs_extnum_t		j;
1179	int			whichfork = cur->bc_ino.whichfork;
1180	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1181
1182	block = xfs_btree_get_block(cur, level, &bp);
1183
1184	/* Abort if we find more records than nextents. */
1185	num_recs = xfs_btree_get_numrecs(block);
1186	if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
1187		xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
1188				(unsigned long long)ip->i_ino);
1189		xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
1190				sizeof(*block), __this_address);
1191		return -EFSCORRUPTED;
1192	}
1193
1194	/* Copy records into the incore cache. */
1195	frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1196	for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
1197		struct xfs_bmbt_irec	new;
1198		xfs_failaddr_t		fa;
1199
1200		xfs_bmbt_disk_get_all(frp, &new);
1201		fa = xfs_bmap_validate_extent(ip, whichfork, &new);
1202		if (fa) {
1203			xfs_inode_verifier_error(ip, -EFSCORRUPTED,
1204					"xfs_iread_extents(2)", frp,
1205					sizeof(*frp), fa);
1206			return -EFSCORRUPTED;
 
1207		}
1208		xfs_iext_insert(ip, &ir->icur, &new,
1209				xfs_bmap_fork_to_state(whichfork));
1210		trace_xfs_read_extent(ip, &ir->icur,
1211				xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
1212		xfs_iext_next(ifp, &ir->icur);
1213	}
1214
1215	return 0;
1216}
1217
1218/*
1219 * Read in extents from a btree-format inode.
1220 */
1221int
1222xfs_iread_extents(
1223	struct xfs_trans	*tp,
1224	struct xfs_inode	*ip,
1225	int			whichfork)
1226{
1227	struct xfs_iread_state	ir;
1228	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1229	struct xfs_mount	*mp = ip->i_mount;
1230	struct xfs_btree_cur	*cur;
1231	int			error;
1232
 
 
 
1233	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1234
1235	if (XFS_IS_CORRUPT(mp, ifp->if_format != XFS_DINODE_FMT_BTREE)) {
1236		error = -EFSCORRUPTED;
1237		goto out;
1238	}
1239
1240	ir.loaded = 0;
1241	xfs_iext_first(ifp, &ir.icur);
1242	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
1243	error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
1244			XFS_BTREE_VISIT_RECORDS, &ir);
1245	xfs_btree_del_cursor(cur, error);
1246	if (error)
1247		goto out;
1248
1249	if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
1250		error = -EFSCORRUPTED;
1251		goto out;
1252	}
1253	ASSERT(ir.loaded == xfs_iext_count(ifp));
1254
1255	ifp->if_flags |= XFS_IFEXTENTS;
 
 
 
 
1256	return 0;
1257out:
1258	xfs_iext_destroy(ifp);
1259	return error;
1260}
1261
1262/*
1263 * Returns the relative block number of the first unused block(s) in the given
1264 * fork with at least "len" logically contiguous blocks free.  This is the
1265 * lowest-address hole if the fork has holes, else the first block past the end
1266 * of fork.  Return 0 if the fork is currently local (in-inode).
1267 */
1268int						/* error */
1269xfs_bmap_first_unused(
1270	struct xfs_trans	*tp,		/* transaction pointer */
1271	struct xfs_inode	*ip,		/* incore inode */
1272	xfs_extlen_t		len,		/* size of hole to find */
1273	xfs_fileoff_t		*first_unused,	/* unused block */
1274	int			whichfork)	/* data or attr fork */
1275{
1276	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1277	struct xfs_bmbt_irec	got;
1278	struct xfs_iext_cursor	icur;
1279	xfs_fileoff_t		lastaddr = 0;
1280	xfs_fileoff_t		lowest, max;
1281	int			error;
1282
1283	if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
1284		*first_unused = 0;
1285		return 0;
1286	}
1287
1288	ASSERT(xfs_ifork_has_extents(ifp));
1289
1290	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1291		error = xfs_iread_extents(tp, ip, whichfork);
1292		if (error)
1293			return error;
1294	}
1295
1296	lowest = max = *first_unused;
1297	for_each_xfs_iext(ifp, &icur, &got) {
1298		/*
1299		 * See if the hole before this extent will work.
1300		 */
1301		if (got.br_startoff >= lowest + len &&
1302		    got.br_startoff - max >= len)
1303			break;
1304		lastaddr = got.br_startoff + got.br_blockcount;
1305		max = XFS_FILEOFF_MAX(lastaddr, lowest);
1306	}
1307
1308	*first_unused = max;
1309	return 0;
1310}
1311
1312/*
1313 * Returns the file-relative block number of the last block - 1 before
1314 * last_block (input value) in the file.
1315 * This is not based on i_size, it is based on the extent records.
1316 * Returns 0 for local files, as they do not have extent records.
1317 */
1318int						/* error */
1319xfs_bmap_last_before(
1320	struct xfs_trans	*tp,		/* transaction pointer */
1321	struct xfs_inode	*ip,		/* incore inode */
1322	xfs_fileoff_t		*last_block,	/* last block */
1323	int			whichfork)	/* data or attr fork */
1324{
1325	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1326	struct xfs_bmbt_irec	got;
1327	struct xfs_iext_cursor	icur;
1328	int			error;
1329
1330	switch (ifp->if_format) {
1331	case XFS_DINODE_FMT_LOCAL:
1332		*last_block = 0;
1333		return 0;
1334	case XFS_DINODE_FMT_BTREE:
1335	case XFS_DINODE_FMT_EXTENTS:
1336		break;
1337	default:
1338		ASSERT(0);
1339		return -EFSCORRUPTED;
1340	}
1341
1342	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1343		error = xfs_iread_extents(tp, ip, whichfork);
1344		if (error)
1345			return error;
1346	}
1347
1348	if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1349		*last_block = 0;
1350	return 0;
1351}
1352
1353int
1354xfs_bmap_last_extent(
1355	struct xfs_trans	*tp,
1356	struct xfs_inode	*ip,
1357	int			whichfork,
1358	struct xfs_bmbt_irec	*rec,
1359	int			*is_empty)
1360{
1361	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1362	struct xfs_iext_cursor	icur;
1363	int			error;
1364
1365	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1366		error = xfs_iread_extents(tp, ip, whichfork);
1367		if (error)
1368			return error;
1369	}
1370
1371	xfs_iext_last(ifp, &icur);
1372	if (!xfs_iext_get_extent(ifp, &icur, rec))
1373		*is_empty = 1;
1374	else
1375		*is_empty = 0;
1376	return 0;
1377}
1378
1379/*
1380 * Check the last inode extent to determine whether this allocation will result
1381 * in blocks being allocated at the end of the file. When we allocate new data
1382 * blocks at the end of the file which do not start at the previous data block,
1383 * we will try to align the new blocks at stripe unit boundaries.
1384 *
1385 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1386 * at, or past the EOF.
1387 */
1388STATIC int
1389xfs_bmap_isaeof(
1390	struct xfs_bmalloca	*bma,
1391	int			whichfork)
1392{
1393	struct xfs_bmbt_irec	rec;
1394	int			is_empty;
1395	int			error;
1396
1397	bma->aeof = false;
1398	error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1399				     &is_empty);
1400	if (error)
1401		return error;
1402
1403	if (is_empty) {
1404		bma->aeof = true;
1405		return 0;
1406	}
1407
1408	/*
1409	 * Check if we are allocation or past the last extent, or at least into
1410	 * the last delayed allocated extent.
1411	 */
1412	bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1413		(bma->offset >= rec.br_startoff &&
1414		 isnullstartblock(rec.br_startblock));
1415	return 0;
1416}
1417
1418/*
1419 * Returns the file-relative block number of the first block past eof in
1420 * the file.  This is not based on i_size, it is based on the extent records.
1421 * Returns 0 for local files, as they do not have extent records.
1422 */
1423int
1424xfs_bmap_last_offset(
1425	struct xfs_inode	*ip,
1426	xfs_fileoff_t		*last_block,
1427	int			whichfork)
1428{
1429	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1430	struct xfs_bmbt_irec	rec;
1431	int			is_empty;
1432	int			error;
1433
1434	*last_block = 0;
1435
1436	if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
1437		return 0;
1438
1439	if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp)))
1440		return -EFSCORRUPTED;
1441
1442	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1443	if (error || is_empty)
1444		return error;
1445
1446	*last_block = rec.br_startoff + rec.br_blockcount;
1447	return 0;
1448}
1449
1450/*
1451 * Returns whether the selected fork of the inode has exactly one
1452 * block or not.  For the data fork we check this matches di_size,
1453 * implying the file's range is 0..bsize-1.
1454 */
1455int					/* 1=>1 block, 0=>otherwise */
1456xfs_bmap_one_block(
1457	struct xfs_inode	*ip,		/* incore inode */
1458	int			whichfork)	/* data or attr fork */
1459{
1460	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1461	int			rval;		/* return value */
1462	struct xfs_bmbt_irec	s;		/* internal version of extent */
1463	struct xfs_iext_cursor icur;
1464
1465#ifndef DEBUG
1466	if (whichfork == XFS_DATA_FORK)
1467		return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1468#endif	/* !DEBUG */
1469	if (ifp->if_nextents != 1)
1470		return 0;
1471	if (ifp->if_format != XFS_DINODE_FMT_EXTENTS)
1472		return 0;
1473	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1474	xfs_iext_first(ifp, &icur);
1475	xfs_iext_get_extent(ifp, &icur, &s);
1476	rval = s.br_startoff == 0 && s.br_blockcount == 1;
1477	if (rval && whichfork == XFS_DATA_FORK)
1478		ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1479	return rval;
1480}
1481
1482/*
1483 * Extent tree manipulation functions used during allocation.
1484 */
1485
1486/*
1487 * Convert a delayed allocation to a real allocation.
1488 */
1489STATIC int				/* error */
1490xfs_bmap_add_extent_delay_real(
1491	struct xfs_bmalloca	*bma,
1492	int			whichfork)
1493{
1494	struct xfs_mount	*mp = bma->ip->i_mount;
1495	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1496	struct xfs_bmbt_irec	*new = &bma->got;
1497	int			error;	/* error return value */
1498	int			i;	/* temp state */
1499	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
1500	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
1501					/* left is 0, right is 1, prev is 2 */
1502	int			rval=0;	/* return value (logging flags) */
1503	int			state = xfs_bmap_fork_to_state(whichfork);
1504	xfs_filblks_t		da_new; /* new count del alloc blocks used */
1505	xfs_filblks_t		da_old; /* old count del alloc blocks used */
1506	xfs_filblks_t		temp=0;	/* value for da_new calculations */
1507	int			tmp_rval;	/* partial logging flags */
1508	struct xfs_bmbt_irec	old;
1509
1510	ASSERT(whichfork != XFS_ATTR_FORK);
1511	ASSERT(!isnullstartblock(new->br_startblock));
1512	ASSERT(!bma->cur ||
1513	       (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
1514
1515	XFS_STATS_INC(mp, xs_add_exlist);
1516
1517#define	LEFT		r[0]
1518#define	RIGHT		r[1]
1519#define	PREV		r[2]
1520
1521	/*
1522	 * Set up a bunch of variables to make the tests simpler.
1523	 */
1524	xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1525	new_endoff = new->br_startoff + new->br_blockcount;
1526	ASSERT(isnullstartblock(PREV.br_startblock));
1527	ASSERT(PREV.br_startoff <= new->br_startoff);
1528	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1529
1530	da_old = startblockval(PREV.br_startblock);
1531	da_new = 0;
1532
1533	/*
1534	 * Set flags determining what part of the previous delayed allocation
1535	 * extent is being replaced by a real allocation.
1536	 */
1537	if (PREV.br_startoff == new->br_startoff)
1538		state |= BMAP_LEFT_FILLING;
1539	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1540		state |= BMAP_RIGHT_FILLING;
1541
1542	/*
1543	 * Check and set flags if this segment has a left neighbor.
1544	 * Don't set contiguous if the combined extent would be too large.
1545	 */
1546	if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1547		state |= BMAP_LEFT_VALID;
1548		if (isnullstartblock(LEFT.br_startblock))
1549			state |= BMAP_LEFT_DELAY;
1550	}
1551
1552	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1553	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1554	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1555	    LEFT.br_state == new->br_state &&
1556	    LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1557		state |= BMAP_LEFT_CONTIG;
1558
1559	/*
1560	 * Check and set flags if this segment has a right neighbor.
1561	 * Don't set contiguous if the combined extent would be too large.
1562	 * Also check for all-three-contiguous being too large.
1563	 */
1564	if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1565		state |= BMAP_RIGHT_VALID;
1566		if (isnullstartblock(RIGHT.br_startblock))
1567			state |= BMAP_RIGHT_DELAY;
1568	}
1569
1570	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1571	    new_endoff == RIGHT.br_startoff &&
1572	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1573	    new->br_state == RIGHT.br_state &&
1574	    new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1575	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1576		       BMAP_RIGHT_FILLING)) !=
1577		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1578		       BMAP_RIGHT_FILLING) ||
1579	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1580			<= MAXEXTLEN))
1581		state |= BMAP_RIGHT_CONTIG;
1582
1583	error = 0;
1584	/*
1585	 * Switch out based on the FILLING and CONTIG state bits.
1586	 */
1587	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1588			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1589	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1590	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1591		/*
1592		 * Filling in all of a previously delayed allocation extent.
1593		 * The left and right neighbors are both contiguous with new.
1594		 */
1595		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1596
1597		xfs_iext_remove(bma->ip, &bma->icur, state);
1598		xfs_iext_remove(bma->ip, &bma->icur, state);
1599		xfs_iext_prev(ifp, &bma->icur);
1600		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1601		ifp->if_nextents--;
1602
1603		if (bma->cur == NULL)
1604			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1605		else {
1606			rval = XFS_ILOG_CORE;
1607			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1608			if (error)
1609				goto done;
1610			if (XFS_IS_CORRUPT(mp, i != 1)) {
1611				error = -EFSCORRUPTED;
1612				goto done;
1613			}
1614			error = xfs_btree_delete(bma->cur, &i);
1615			if (error)
1616				goto done;
1617			if (XFS_IS_CORRUPT(mp, i != 1)) {
1618				error = -EFSCORRUPTED;
1619				goto done;
1620			}
1621			error = xfs_btree_decrement(bma->cur, 0, &i);
1622			if (error)
1623				goto done;
1624			if (XFS_IS_CORRUPT(mp, i != 1)) {
1625				error = -EFSCORRUPTED;
1626				goto done;
1627			}
1628			error = xfs_bmbt_update(bma->cur, &LEFT);
1629			if (error)
1630				goto done;
1631		}
1632		break;
1633
1634	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1635		/*
1636		 * Filling in all of a previously delayed allocation extent.
1637		 * The left neighbor is contiguous, the right is not.
1638		 */
1639		old = LEFT;
1640		LEFT.br_blockcount += PREV.br_blockcount;
1641
1642		xfs_iext_remove(bma->ip, &bma->icur, state);
1643		xfs_iext_prev(ifp, &bma->icur);
1644		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1645
1646		if (bma->cur == NULL)
1647			rval = XFS_ILOG_DEXT;
1648		else {
1649			rval = 0;
1650			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1651			if (error)
1652				goto done;
1653			if (XFS_IS_CORRUPT(mp, i != 1)) {
1654				error = -EFSCORRUPTED;
1655				goto done;
1656			}
1657			error = xfs_bmbt_update(bma->cur, &LEFT);
1658			if (error)
1659				goto done;
1660		}
1661		break;
1662
1663	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1664		/*
1665		 * Filling in all of a previously delayed allocation extent.
1666		 * The right neighbor is contiguous, the left is not. Take care
1667		 * with delay -> unwritten extent allocation here because the
1668		 * delalloc record we are overwriting is always written.
1669		 */
1670		PREV.br_startblock = new->br_startblock;
1671		PREV.br_blockcount += RIGHT.br_blockcount;
1672		PREV.br_state = new->br_state;
1673
1674		xfs_iext_next(ifp, &bma->icur);
1675		xfs_iext_remove(bma->ip, &bma->icur, state);
1676		xfs_iext_prev(ifp, &bma->icur);
1677		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1678
1679		if (bma->cur == NULL)
1680			rval = XFS_ILOG_DEXT;
1681		else {
1682			rval = 0;
1683			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1684			if (error)
1685				goto done;
1686			if (XFS_IS_CORRUPT(mp, i != 1)) {
1687				error = -EFSCORRUPTED;
1688				goto done;
1689			}
1690			error = xfs_bmbt_update(bma->cur, &PREV);
1691			if (error)
1692				goto done;
1693		}
1694		break;
1695
1696	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1697		/*
1698		 * Filling in all of a previously delayed allocation extent.
1699		 * Neither the left nor right neighbors are contiguous with
1700		 * the new one.
1701		 */
1702		PREV.br_startblock = new->br_startblock;
1703		PREV.br_state = new->br_state;
1704		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1705		ifp->if_nextents++;
1706
1707		if (bma->cur == NULL)
1708			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1709		else {
1710			rval = XFS_ILOG_CORE;
1711			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1712			if (error)
1713				goto done;
1714			if (XFS_IS_CORRUPT(mp, i != 0)) {
1715				error = -EFSCORRUPTED;
1716				goto done;
1717			}
1718			error = xfs_btree_insert(bma->cur, &i);
1719			if (error)
1720				goto done;
1721			if (XFS_IS_CORRUPT(mp, i != 1)) {
1722				error = -EFSCORRUPTED;
1723				goto done;
1724			}
1725		}
1726		break;
1727
1728	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1729		/*
1730		 * Filling in the first part of a previous delayed allocation.
1731		 * The left neighbor is contiguous.
1732		 */
1733		old = LEFT;
1734		temp = PREV.br_blockcount - new->br_blockcount;
1735		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1736				startblockval(PREV.br_startblock));
1737
1738		LEFT.br_blockcount += new->br_blockcount;
1739
1740		PREV.br_blockcount = temp;
1741		PREV.br_startoff += new->br_blockcount;
1742		PREV.br_startblock = nullstartblock(da_new);
1743
1744		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1745		xfs_iext_prev(ifp, &bma->icur);
1746		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1747
1748		if (bma->cur == NULL)
1749			rval = XFS_ILOG_DEXT;
1750		else {
1751			rval = 0;
1752			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1753			if (error)
1754				goto done;
1755			if (XFS_IS_CORRUPT(mp, i != 1)) {
1756				error = -EFSCORRUPTED;
1757				goto done;
1758			}
1759			error = xfs_bmbt_update(bma->cur, &LEFT);
1760			if (error)
1761				goto done;
1762		}
1763		break;
1764
1765	case BMAP_LEFT_FILLING:
1766		/*
1767		 * Filling in the first part of a previous delayed allocation.
1768		 * The left neighbor is not contiguous.
1769		 */
1770		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1771		ifp->if_nextents++;
1772
1773		if (bma->cur == NULL)
1774			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1775		else {
1776			rval = XFS_ILOG_CORE;
1777			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1778			if (error)
1779				goto done;
1780			if (XFS_IS_CORRUPT(mp, i != 0)) {
1781				error = -EFSCORRUPTED;
1782				goto done;
1783			}
1784			error = xfs_btree_insert(bma->cur, &i);
1785			if (error)
1786				goto done;
1787			if (XFS_IS_CORRUPT(mp, i != 1)) {
1788				error = -EFSCORRUPTED;
1789				goto done;
1790			}
1791		}
1792
1793		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1794			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1795					&bma->cur, 1, &tmp_rval, whichfork);
1796			rval |= tmp_rval;
1797			if (error)
1798				goto done;
1799		}
1800
1801		temp = PREV.br_blockcount - new->br_blockcount;
1802		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1803			startblockval(PREV.br_startblock) -
1804			(bma->cur ? bma->cur->bc_ino.allocated : 0));
1805
1806		PREV.br_startoff = new_endoff;
1807		PREV.br_blockcount = temp;
1808		PREV.br_startblock = nullstartblock(da_new);
1809		xfs_iext_next(ifp, &bma->icur);
1810		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1811		xfs_iext_prev(ifp, &bma->icur);
1812		break;
1813
1814	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1815		/*
1816		 * Filling in the last part of a previous delayed allocation.
1817		 * The right neighbor is contiguous with the new allocation.
1818		 */
1819		old = RIGHT;
1820		RIGHT.br_startoff = new->br_startoff;
1821		RIGHT.br_startblock = new->br_startblock;
1822		RIGHT.br_blockcount += new->br_blockcount;
1823
1824		if (bma->cur == NULL)
1825			rval = XFS_ILOG_DEXT;
1826		else {
1827			rval = 0;
1828			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1829			if (error)
1830				goto done;
1831			if (XFS_IS_CORRUPT(mp, i != 1)) {
1832				error = -EFSCORRUPTED;
1833				goto done;
1834			}
1835			error = xfs_bmbt_update(bma->cur, &RIGHT);
1836			if (error)
1837				goto done;
1838		}
1839
1840		temp = PREV.br_blockcount - new->br_blockcount;
1841		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1842			startblockval(PREV.br_startblock));
1843
1844		PREV.br_blockcount = temp;
1845		PREV.br_startblock = nullstartblock(da_new);
1846
1847		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1848		xfs_iext_next(ifp, &bma->icur);
1849		xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1850		break;
1851
1852	case BMAP_RIGHT_FILLING:
1853		/*
1854		 * Filling in the last part of a previous delayed allocation.
1855		 * The right neighbor is not contiguous.
1856		 */
1857		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1858		ifp->if_nextents++;
1859
1860		if (bma->cur == NULL)
1861			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1862		else {
1863			rval = XFS_ILOG_CORE;
1864			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1865			if (error)
1866				goto done;
1867			if (XFS_IS_CORRUPT(mp, i != 0)) {
1868				error = -EFSCORRUPTED;
1869				goto done;
1870			}
1871			error = xfs_btree_insert(bma->cur, &i);
1872			if (error)
1873				goto done;
1874			if (XFS_IS_CORRUPT(mp, i != 1)) {
1875				error = -EFSCORRUPTED;
1876				goto done;
1877			}
1878		}
1879
1880		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1881			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1882				&bma->cur, 1, &tmp_rval, whichfork);
1883			rval |= tmp_rval;
1884			if (error)
1885				goto done;
1886		}
1887
1888		temp = PREV.br_blockcount - new->br_blockcount;
1889		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1890			startblockval(PREV.br_startblock) -
1891			(bma->cur ? bma->cur->bc_ino.allocated : 0));
1892
1893		PREV.br_startblock = nullstartblock(da_new);
1894		PREV.br_blockcount = temp;
1895		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1896		xfs_iext_next(ifp, &bma->icur);
1897		break;
1898
1899	case 0:
1900		/*
1901		 * Filling in the middle part of a previous delayed allocation.
1902		 * Contiguity is impossible here.
1903		 * This case is avoided almost all the time.
1904		 *
1905		 * We start with a delayed allocation:
1906		 *
1907		 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1908		 *  PREV @ idx
1909		 *
1910	         * and we are allocating:
1911		 *                     +rrrrrrrrrrrrrrrrr+
1912		 *			      new
1913		 *
1914		 * and we set it up for insertion as:
1915		 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1916		 *                            new
1917		 *  PREV @ idx          LEFT              RIGHT
1918		 *                      inserted at idx + 1
1919		 */
1920		old = PREV;
1921
1922		/* LEFT is the new middle */
1923		LEFT = *new;
1924
1925		/* RIGHT is the new right */
1926		RIGHT.br_state = PREV.br_state;
1927		RIGHT.br_startoff = new_endoff;
1928		RIGHT.br_blockcount =
1929			PREV.br_startoff + PREV.br_blockcount - new_endoff;
1930		RIGHT.br_startblock =
1931			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1932					RIGHT.br_blockcount));
1933
1934		/* truncate PREV */
1935		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1936		PREV.br_startblock =
1937			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1938					PREV.br_blockcount));
1939		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1940
1941		xfs_iext_next(ifp, &bma->icur);
1942		xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1943		xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
1944		ifp->if_nextents++;
1945
1946		if (bma->cur == NULL)
1947			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1948		else {
1949			rval = XFS_ILOG_CORE;
1950			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1951			if (error)
1952				goto done;
1953			if (XFS_IS_CORRUPT(mp, i != 0)) {
1954				error = -EFSCORRUPTED;
1955				goto done;
1956			}
1957			error = xfs_btree_insert(bma->cur, &i);
1958			if (error)
1959				goto done;
1960			if (XFS_IS_CORRUPT(mp, i != 1)) {
1961				error = -EFSCORRUPTED;
1962				goto done;
1963			}
1964		}
1965
1966		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1967			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1968					&bma->cur, 1, &tmp_rval, whichfork);
1969			rval |= tmp_rval;
1970			if (error)
1971				goto done;
1972		}
1973
1974		da_new = startblockval(PREV.br_startblock) +
1975			 startblockval(RIGHT.br_startblock);
1976		break;
1977
1978	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1979	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1980	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1981	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1982	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1983	case BMAP_LEFT_CONTIG:
1984	case BMAP_RIGHT_CONTIG:
1985		/*
1986		 * These cases are all impossible.
1987		 */
1988		ASSERT(0);
1989	}
1990
1991	/* add reverse mapping unless caller opted out */
1992	if (!(bma->flags & XFS_BMAPI_NORMAP))
1993		xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
1994
1995	/* convert to a btree if necessary */
1996	if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1997		int	tmp_logflags;	/* partial log flag return val */
1998
1999		ASSERT(bma->cur == NULL);
2000		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2001				&bma->cur, da_old > 0, &tmp_logflags,
2002				whichfork);
2003		bma->logflags |= tmp_logflags;
2004		if (error)
2005			goto done;
2006	}
2007
2008	if (da_new != da_old)
2009		xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
2010
2011	if (bma->cur) {
2012		da_new += bma->cur->bc_ino.allocated;
2013		bma->cur->bc_ino.allocated = 0;
2014	}
2015
2016	/* adjust for changes in reserved delayed indirect blocks */
2017	if (da_new != da_old) {
2018		ASSERT(state == 0 || da_new < da_old);
2019		error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
2020				false);
2021	}
2022
2023	xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
2024done:
2025	if (whichfork != XFS_COW_FORK)
2026		bma->logflags |= rval;
2027	return error;
2028#undef	LEFT
2029#undef	RIGHT
2030#undef	PREV
2031}
2032
2033/*
2034 * Convert an unwritten allocation to a real allocation or vice versa.
2035 */
2036int					/* error */
2037xfs_bmap_add_extent_unwritten_real(
2038	struct xfs_trans	*tp,
2039	xfs_inode_t		*ip,	/* incore inode pointer */
2040	int			whichfork,
2041	struct xfs_iext_cursor	*icur,
2042	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
2043	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
2044	int			*logflagsp) /* inode logging flags */
2045{
2046	xfs_btree_cur_t		*cur;	/* btree cursor */
2047	int			error;	/* error return value */
2048	int			i;	/* temp state */
2049	struct xfs_ifork	*ifp;	/* inode fork pointer */
2050	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
2051	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
2052					/* left is 0, right is 1, prev is 2 */
2053	int			rval=0;	/* return value (logging flags) */
2054	int			state = xfs_bmap_fork_to_state(whichfork);
2055	struct xfs_mount	*mp = ip->i_mount;
2056	struct xfs_bmbt_irec	old;
2057
2058	*logflagsp = 0;
2059
2060	cur = *curp;
2061	ifp = XFS_IFORK_PTR(ip, whichfork);
2062
2063	ASSERT(!isnullstartblock(new->br_startblock));
2064
2065	XFS_STATS_INC(mp, xs_add_exlist);
2066
2067#define	LEFT		r[0]
2068#define	RIGHT		r[1]
2069#define	PREV		r[2]
2070
2071	/*
2072	 * Set up a bunch of variables to make the tests simpler.
2073	 */
2074	error = 0;
2075	xfs_iext_get_extent(ifp, icur, &PREV);
2076	ASSERT(new->br_state != PREV.br_state);
2077	new_endoff = new->br_startoff + new->br_blockcount;
2078	ASSERT(PREV.br_startoff <= new->br_startoff);
2079	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2080
2081	/*
2082	 * Set flags determining what part of the previous oldext allocation
2083	 * extent is being replaced by a newext allocation.
2084	 */
2085	if (PREV.br_startoff == new->br_startoff)
2086		state |= BMAP_LEFT_FILLING;
2087	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2088		state |= BMAP_RIGHT_FILLING;
2089
2090	/*
2091	 * Check and set flags if this segment has a left neighbor.
2092	 * Don't set contiguous if the combined extent would be too large.
2093	 */
2094	if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2095		state |= BMAP_LEFT_VALID;
2096		if (isnullstartblock(LEFT.br_startblock))
2097			state |= BMAP_LEFT_DELAY;
2098	}
2099
2100	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2101	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2102	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2103	    LEFT.br_state == new->br_state &&
2104	    LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2105		state |= BMAP_LEFT_CONTIG;
2106
2107	/*
2108	 * Check and set flags if this segment has a right neighbor.
2109	 * Don't set contiguous if the combined extent would be too large.
2110	 * Also check for all-three-contiguous being too large.
2111	 */
2112	if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2113		state |= BMAP_RIGHT_VALID;
2114		if (isnullstartblock(RIGHT.br_startblock))
2115			state |= BMAP_RIGHT_DELAY;
2116	}
2117
2118	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2119	    new_endoff == RIGHT.br_startoff &&
2120	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2121	    new->br_state == RIGHT.br_state &&
2122	    new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2123	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2124		       BMAP_RIGHT_FILLING)) !=
2125		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2126		       BMAP_RIGHT_FILLING) ||
2127	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2128			<= MAXEXTLEN))
2129		state |= BMAP_RIGHT_CONTIG;
2130
2131	/*
2132	 * Switch out based on the FILLING and CONTIG state bits.
2133	 */
2134	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2135			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2136	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2137	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2138		/*
2139		 * Setting all of a previous oldext extent to newext.
2140		 * The left and right neighbors are both contiguous with new.
2141		 */
2142		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2143
2144		xfs_iext_remove(ip, icur, state);
2145		xfs_iext_remove(ip, icur, state);
2146		xfs_iext_prev(ifp, icur);
2147		xfs_iext_update_extent(ip, state, icur, &LEFT);
2148		ifp->if_nextents -= 2;
2149		if (cur == NULL)
2150			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2151		else {
2152			rval = XFS_ILOG_CORE;
2153			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2154			if (error)
2155				goto done;
2156			if (XFS_IS_CORRUPT(mp, i != 1)) {
2157				error = -EFSCORRUPTED;
2158				goto done;
2159			}
2160			if ((error = xfs_btree_delete(cur, &i)))
2161				goto done;
2162			if (XFS_IS_CORRUPT(mp, i != 1)) {
2163				error = -EFSCORRUPTED;
2164				goto done;
2165			}
2166			if ((error = xfs_btree_decrement(cur, 0, &i)))
2167				goto done;
2168			if (XFS_IS_CORRUPT(mp, i != 1)) {
2169				error = -EFSCORRUPTED;
2170				goto done;
2171			}
2172			if ((error = xfs_btree_delete(cur, &i)))
2173				goto done;
2174			if (XFS_IS_CORRUPT(mp, i != 1)) {
2175				error = -EFSCORRUPTED;
2176				goto done;
2177			}
2178			if ((error = xfs_btree_decrement(cur, 0, &i)))
2179				goto done;
2180			if (XFS_IS_CORRUPT(mp, i != 1)) {
2181				error = -EFSCORRUPTED;
2182				goto done;
2183			}
2184			error = xfs_bmbt_update(cur, &LEFT);
2185			if (error)
2186				goto done;
2187		}
2188		break;
2189
2190	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2191		/*
2192		 * Setting all of a previous oldext extent to newext.
2193		 * The left neighbor is contiguous, the right is not.
2194		 */
2195		LEFT.br_blockcount += PREV.br_blockcount;
2196
2197		xfs_iext_remove(ip, icur, state);
2198		xfs_iext_prev(ifp, icur);
2199		xfs_iext_update_extent(ip, state, icur, &LEFT);
2200		ifp->if_nextents--;
2201		if (cur == NULL)
2202			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2203		else {
2204			rval = XFS_ILOG_CORE;
2205			error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2206			if (error)
2207				goto done;
2208			if (XFS_IS_CORRUPT(mp, i != 1)) {
2209				error = -EFSCORRUPTED;
2210				goto done;
2211			}
2212			if ((error = xfs_btree_delete(cur, &i)))
2213				goto done;
2214			if (XFS_IS_CORRUPT(mp, i != 1)) {
2215				error = -EFSCORRUPTED;
2216				goto done;
2217			}
2218			if ((error = xfs_btree_decrement(cur, 0, &i)))
2219				goto done;
2220			if (XFS_IS_CORRUPT(mp, i != 1)) {
2221				error = -EFSCORRUPTED;
2222				goto done;
2223			}
2224			error = xfs_bmbt_update(cur, &LEFT);
2225			if (error)
2226				goto done;
2227		}
2228		break;
2229
2230	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2231		/*
2232		 * Setting all of a previous oldext extent to newext.
2233		 * The right neighbor is contiguous, the left is not.
2234		 */
2235		PREV.br_blockcount += RIGHT.br_blockcount;
2236		PREV.br_state = new->br_state;
2237
2238		xfs_iext_next(ifp, icur);
2239		xfs_iext_remove(ip, icur, state);
2240		xfs_iext_prev(ifp, icur);
2241		xfs_iext_update_extent(ip, state, icur, &PREV);
2242		ifp->if_nextents--;
2243
2244		if (cur == NULL)
2245			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2246		else {
2247			rval = XFS_ILOG_CORE;
2248			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2249			if (error)
2250				goto done;
2251			if (XFS_IS_CORRUPT(mp, i != 1)) {
2252				error = -EFSCORRUPTED;
2253				goto done;
2254			}
2255			if ((error = xfs_btree_delete(cur, &i)))
2256				goto done;
2257			if (XFS_IS_CORRUPT(mp, i != 1)) {
2258				error = -EFSCORRUPTED;
2259				goto done;
2260			}
2261			if ((error = xfs_btree_decrement(cur, 0, &i)))
2262				goto done;
2263			if (XFS_IS_CORRUPT(mp, i != 1)) {
2264				error = -EFSCORRUPTED;
2265				goto done;
2266			}
2267			error = xfs_bmbt_update(cur, &PREV);
2268			if (error)
2269				goto done;
2270		}
2271		break;
2272
2273	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2274		/*
2275		 * Setting all of a previous oldext extent to newext.
2276		 * Neither the left nor right neighbors are contiguous with
2277		 * the new one.
2278		 */
2279		PREV.br_state = new->br_state;
2280		xfs_iext_update_extent(ip, state, icur, &PREV);
2281
2282		if (cur == NULL)
2283			rval = XFS_ILOG_DEXT;
2284		else {
2285			rval = 0;
2286			error = xfs_bmbt_lookup_eq(cur, new, &i);
2287			if (error)
2288				goto done;
2289			if (XFS_IS_CORRUPT(mp, i != 1)) {
2290				error = -EFSCORRUPTED;
2291				goto done;
2292			}
2293			error = xfs_bmbt_update(cur, &PREV);
2294			if (error)
2295				goto done;
2296		}
2297		break;
2298
2299	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2300		/*
2301		 * Setting the first part of a previous oldext extent to newext.
2302		 * The left neighbor is contiguous.
2303		 */
2304		LEFT.br_blockcount += new->br_blockcount;
2305
2306		old = PREV;
2307		PREV.br_startoff += new->br_blockcount;
2308		PREV.br_startblock += new->br_blockcount;
2309		PREV.br_blockcount -= new->br_blockcount;
2310
2311		xfs_iext_update_extent(ip, state, icur, &PREV);
2312		xfs_iext_prev(ifp, icur);
2313		xfs_iext_update_extent(ip, state, icur, &LEFT);
2314
2315		if (cur == NULL)
2316			rval = XFS_ILOG_DEXT;
2317		else {
2318			rval = 0;
2319			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2320			if (error)
2321				goto done;
2322			if (XFS_IS_CORRUPT(mp, i != 1)) {
2323				error = -EFSCORRUPTED;
2324				goto done;
2325			}
2326			error = xfs_bmbt_update(cur, &PREV);
2327			if (error)
2328				goto done;
2329			error = xfs_btree_decrement(cur, 0, &i);
2330			if (error)
2331				goto done;
2332			error = xfs_bmbt_update(cur, &LEFT);
2333			if (error)
2334				goto done;
2335		}
2336		break;
2337
2338	case BMAP_LEFT_FILLING:
2339		/*
2340		 * Setting the first part of a previous oldext extent to newext.
2341		 * The left neighbor is not contiguous.
2342		 */
2343		old = PREV;
2344		PREV.br_startoff += new->br_blockcount;
2345		PREV.br_startblock += new->br_blockcount;
2346		PREV.br_blockcount -= new->br_blockcount;
2347
2348		xfs_iext_update_extent(ip, state, icur, &PREV);
2349		xfs_iext_insert(ip, icur, new, state);
2350		ifp->if_nextents++;
2351
2352		if (cur == NULL)
2353			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2354		else {
2355			rval = XFS_ILOG_CORE;
2356			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2357			if (error)
2358				goto done;
2359			if (XFS_IS_CORRUPT(mp, i != 1)) {
2360				error = -EFSCORRUPTED;
2361				goto done;
2362			}
2363			error = xfs_bmbt_update(cur, &PREV);
2364			if (error)
2365				goto done;
2366			cur->bc_rec.b = *new;
2367			if ((error = xfs_btree_insert(cur, &i)))
2368				goto done;
2369			if (XFS_IS_CORRUPT(mp, i != 1)) {
2370				error = -EFSCORRUPTED;
2371				goto done;
2372			}
2373		}
2374		break;
2375
2376	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2377		/*
2378		 * Setting the last part of a previous oldext extent to newext.
2379		 * The right neighbor is contiguous with the new allocation.
2380		 */
2381		old = PREV;
2382		PREV.br_blockcount -= new->br_blockcount;
2383
2384		RIGHT.br_startoff = new->br_startoff;
2385		RIGHT.br_startblock = new->br_startblock;
2386		RIGHT.br_blockcount += new->br_blockcount;
2387
2388		xfs_iext_update_extent(ip, state, icur, &PREV);
2389		xfs_iext_next(ifp, icur);
2390		xfs_iext_update_extent(ip, state, icur, &RIGHT);
2391
2392		if (cur == NULL)
2393			rval = XFS_ILOG_DEXT;
2394		else {
2395			rval = 0;
2396			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2397			if (error)
2398				goto done;
2399			if (XFS_IS_CORRUPT(mp, i != 1)) {
2400				error = -EFSCORRUPTED;
2401				goto done;
2402			}
2403			error = xfs_bmbt_update(cur, &PREV);
2404			if (error)
2405				goto done;
2406			error = xfs_btree_increment(cur, 0, &i);
2407			if (error)
2408				goto done;
2409			error = xfs_bmbt_update(cur, &RIGHT);
2410			if (error)
2411				goto done;
2412		}
2413		break;
2414
2415	case BMAP_RIGHT_FILLING:
2416		/*
2417		 * Setting the last part of a previous oldext extent to newext.
2418		 * The right neighbor is not contiguous.
2419		 */
2420		old = PREV;
2421		PREV.br_blockcount -= new->br_blockcount;
2422
2423		xfs_iext_update_extent(ip, state, icur, &PREV);
2424		xfs_iext_next(ifp, icur);
2425		xfs_iext_insert(ip, icur, new, state);
2426		ifp->if_nextents++;
2427
2428		if (cur == NULL)
2429			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2430		else {
2431			rval = XFS_ILOG_CORE;
2432			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2433			if (error)
2434				goto done;
2435			if (XFS_IS_CORRUPT(mp, i != 1)) {
2436				error = -EFSCORRUPTED;
2437				goto done;
2438			}
2439			error = xfs_bmbt_update(cur, &PREV);
2440			if (error)
2441				goto done;
2442			error = xfs_bmbt_lookup_eq(cur, new, &i);
2443			if (error)
2444				goto done;
2445			if (XFS_IS_CORRUPT(mp, i != 0)) {
2446				error = -EFSCORRUPTED;
2447				goto done;
2448			}
2449			if ((error = xfs_btree_insert(cur, &i)))
2450				goto done;
2451			if (XFS_IS_CORRUPT(mp, i != 1)) {
2452				error = -EFSCORRUPTED;
2453				goto done;
2454			}
2455		}
2456		break;
2457
2458	case 0:
2459		/*
2460		 * Setting the middle part of a previous oldext extent to
2461		 * newext.  Contiguity is impossible here.
2462		 * One extent becomes three extents.
2463		 */
2464		old = PREV;
2465		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2466
2467		r[0] = *new;
2468		r[1].br_startoff = new_endoff;
2469		r[1].br_blockcount =
2470			old.br_startoff + old.br_blockcount - new_endoff;
2471		r[1].br_startblock = new->br_startblock + new->br_blockcount;
2472		r[1].br_state = PREV.br_state;
2473
2474		xfs_iext_update_extent(ip, state, icur, &PREV);
2475		xfs_iext_next(ifp, icur);
2476		xfs_iext_insert(ip, icur, &r[1], state);
2477		xfs_iext_insert(ip, icur, &r[0], state);
2478		ifp->if_nextents += 2;
2479
2480		if (cur == NULL)
2481			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2482		else {
2483			rval = XFS_ILOG_CORE;
2484			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2485			if (error)
2486				goto done;
2487			if (XFS_IS_CORRUPT(mp, i != 1)) {
2488				error = -EFSCORRUPTED;
2489				goto done;
2490			}
2491			/* new right extent - oldext */
2492			error = xfs_bmbt_update(cur, &r[1]);
2493			if (error)
2494				goto done;
2495			/* new left extent - oldext */
2496			cur->bc_rec.b = PREV;
2497			if ((error = xfs_btree_insert(cur, &i)))
2498				goto done;
2499			if (XFS_IS_CORRUPT(mp, i != 1)) {
2500				error = -EFSCORRUPTED;
2501				goto done;
2502			}
2503			/*
2504			 * Reset the cursor to the position of the new extent
2505			 * we are about to insert as we can't trust it after
2506			 * the previous insert.
2507			 */
2508			error = xfs_bmbt_lookup_eq(cur, new, &i);
2509			if (error)
2510				goto done;
2511			if (XFS_IS_CORRUPT(mp, i != 0)) {
2512				error = -EFSCORRUPTED;
2513				goto done;
2514			}
2515			/* new middle extent - newext */
2516			if ((error = xfs_btree_insert(cur, &i)))
2517				goto done;
2518			if (XFS_IS_CORRUPT(mp, i != 1)) {
2519				error = -EFSCORRUPTED;
2520				goto done;
2521			}
2522		}
2523		break;
2524
2525	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2526	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2527	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2528	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2529	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2530	case BMAP_LEFT_CONTIG:
2531	case BMAP_RIGHT_CONTIG:
2532		/*
2533		 * These cases are all impossible.
2534		 */
2535		ASSERT(0);
2536	}
2537
2538	/* update reverse mappings */
2539	xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
2540
2541	/* convert to a btree if necessary */
2542	if (xfs_bmap_needs_btree(ip, whichfork)) {
2543		int	tmp_logflags;	/* partial log flag return val */
2544
2545		ASSERT(cur == NULL);
2546		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
2547				&tmp_logflags, whichfork);
2548		*logflagsp |= tmp_logflags;
2549		if (error)
2550			goto done;
2551	}
2552
2553	/* clear out the allocated field, done with it now in any case. */
2554	if (cur) {
2555		cur->bc_ino.allocated = 0;
2556		*curp = cur;
2557	}
2558
2559	xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2560done:
2561	*logflagsp |= rval;
2562	return error;
2563#undef	LEFT
2564#undef	RIGHT
2565#undef	PREV
2566}
2567
2568/*
2569 * Convert a hole to a delayed allocation.
2570 */
2571STATIC void
2572xfs_bmap_add_extent_hole_delay(
2573	xfs_inode_t		*ip,	/* incore inode pointer */
2574	int			whichfork,
2575	struct xfs_iext_cursor	*icur,
2576	xfs_bmbt_irec_t		*new)	/* new data to add to file extents */
2577{
2578	struct xfs_ifork	*ifp;	/* inode fork pointer */
2579	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2580	xfs_filblks_t		newlen=0;	/* new indirect size */
2581	xfs_filblks_t		oldlen=0;	/* old indirect size */
2582	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2583	int			state = xfs_bmap_fork_to_state(whichfork);
2584	xfs_filblks_t		temp;	 /* temp for indirect calculations */
2585
2586	ifp = XFS_IFORK_PTR(ip, whichfork);
2587	ASSERT(isnullstartblock(new->br_startblock));
2588
2589	/*
2590	 * Check and set flags if this segment has a left neighbor
2591	 */
2592	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2593		state |= BMAP_LEFT_VALID;
2594		if (isnullstartblock(left.br_startblock))
2595			state |= BMAP_LEFT_DELAY;
2596	}
2597
2598	/*
2599	 * Check and set flags if the current (right) segment exists.
2600	 * If it doesn't exist, we're converting the hole at end-of-file.
2601	 */
2602	if (xfs_iext_get_extent(ifp, icur, &right)) {
2603		state |= BMAP_RIGHT_VALID;
2604		if (isnullstartblock(right.br_startblock))
2605			state |= BMAP_RIGHT_DELAY;
2606	}
2607
2608	/*
2609	 * Set contiguity flags on the left and right neighbors.
2610	 * Don't let extents get too large, even if the pieces are contiguous.
2611	 */
2612	if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2613	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2614	    left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2615		state |= BMAP_LEFT_CONTIG;
2616
2617	if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2618	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2619	    new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2620	    (!(state & BMAP_LEFT_CONTIG) ||
2621	     (left.br_blockcount + new->br_blockcount +
2622	      right.br_blockcount <= MAXEXTLEN)))
2623		state |= BMAP_RIGHT_CONTIG;
2624
2625	/*
2626	 * Switch out based on the contiguity flags.
2627	 */
2628	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2629	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2630		/*
2631		 * New allocation is contiguous with delayed allocations
2632		 * on the left and on the right.
2633		 * Merge all three into a single extent record.
2634		 */
2635		temp = left.br_blockcount + new->br_blockcount +
2636			right.br_blockcount;
2637
2638		oldlen = startblockval(left.br_startblock) +
2639			startblockval(new->br_startblock) +
2640			startblockval(right.br_startblock);
2641		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2642					 oldlen);
2643		left.br_startblock = nullstartblock(newlen);
2644		left.br_blockcount = temp;
2645
2646		xfs_iext_remove(ip, icur, state);
2647		xfs_iext_prev(ifp, icur);
2648		xfs_iext_update_extent(ip, state, icur, &left);
2649		break;
2650
2651	case BMAP_LEFT_CONTIG:
2652		/*
2653		 * New allocation is contiguous with a delayed allocation
2654		 * on the left.
2655		 * Merge the new allocation with the left neighbor.
2656		 */
2657		temp = left.br_blockcount + new->br_blockcount;
2658
2659		oldlen = startblockval(left.br_startblock) +
2660			startblockval(new->br_startblock);
2661		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2662					 oldlen);
2663		left.br_blockcount = temp;
2664		left.br_startblock = nullstartblock(newlen);
2665
2666		xfs_iext_prev(ifp, icur);
2667		xfs_iext_update_extent(ip, state, icur, &left);
2668		break;
2669
2670	case BMAP_RIGHT_CONTIG:
2671		/*
2672		 * New allocation is contiguous with a delayed allocation
2673		 * on the right.
2674		 * Merge the new allocation with the right neighbor.
2675		 */
2676		temp = new->br_blockcount + right.br_blockcount;
2677		oldlen = startblockval(new->br_startblock) +
2678			startblockval(right.br_startblock);
2679		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2680					 oldlen);
2681		right.br_startoff = new->br_startoff;
2682		right.br_startblock = nullstartblock(newlen);
2683		right.br_blockcount = temp;
2684		xfs_iext_update_extent(ip, state, icur, &right);
2685		break;
2686
2687	case 0:
2688		/*
2689		 * New allocation is not contiguous with another
2690		 * delayed allocation.
2691		 * Insert a new entry.
2692		 */
2693		oldlen = newlen = 0;
2694		xfs_iext_insert(ip, icur, new, state);
2695		break;
2696	}
2697	if (oldlen != newlen) {
2698		ASSERT(oldlen > newlen);
2699		xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2700				 false);
2701		/*
2702		 * Nothing to do for disk quota accounting here.
2703		 */
2704		xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
2705	}
2706}
2707
2708/*
2709 * Convert a hole to a real allocation.
2710 */
2711STATIC int				/* error */
2712xfs_bmap_add_extent_hole_real(
2713	struct xfs_trans	*tp,
2714	struct xfs_inode	*ip,
2715	int			whichfork,
2716	struct xfs_iext_cursor	*icur,
2717	struct xfs_btree_cur	**curp,
2718	struct xfs_bmbt_irec	*new,
2719	int			*logflagsp,
2720	int			flags)
2721{
2722	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
2723	struct xfs_mount	*mp = ip->i_mount;
2724	struct xfs_btree_cur	*cur = *curp;
2725	int			error;	/* error return value */
2726	int			i;	/* temp state */
2727	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2728	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2729	int			rval=0;	/* return value (logging flags) */
2730	int			state = xfs_bmap_fork_to_state(whichfork);
2731	struct xfs_bmbt_irec	old;
2732
2733	ASSERT(!isnullstartblock(new->br_startblock));
2734	ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
2735
2736	XFS_STATS_INC(mp, xs_add_exlist);
2737
2738	/*
2739	 * Check and set flags if this segment has a left neighbor.
2740	 */
2741	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2742		state |= BMAP_LEFT_VALID;
2743		if (isnullstartblock(left.br_startblock))
2744			state |= BMAP_LEFT_DELAY;
2745	}
2746
2747	/*
2748	 * Check and set flags if this segment has a current value.
2749	 * Not true if we're inserting into the "hole" at eof.
2750	 */
2751	if (xfs_iext_get_extent(ifp, icur, &right)) {
2752		state |= BMAP_RIGHT_VALID;
2753		if (isnullstartblock(right.br_startblock))
2754			state |= BMAP_RIGHT_DELAY;
2755	}
2756
2757	/*
2758	 * We're inserting a real allocation between "left" and "right".
2759	 * Set the contiguity flags.  Don't let extents get too large.
2760	 */
2761	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2762	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2763	    left.br_startblock + left.br_blockcount == new->br_startblock &&
2764	    left.br_state == new->br_state &&
2765	    left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2766		state |= BMAP_LEFT_CONTIG;
2767
2768	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2769	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2770	    new->br_startblock + new->br_blockcount == right.br_startblock &&
2771	    new->br_state == right.br_state &&
2772	    new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2773	    (!(state & BMAP_LEFT_CONTIG) ||
2774	     left.br_blockcount + new->br_blockcount +
2775	     right.br_blockcount <= MAXEXTLEN))
2776		state |= BMAP_RIGHT_CONTIG;
2777
2778	error = 0;
2779	/*
2780	 * Select which case we're in here, and implement it.
2781	 */
2782	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2783	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2784		/*
2785		 * New allocation is contiguous with real allocations on the
2786		 * left and on the right.
2787		 * Merge all three into a single extent record.
2788		 */
2789		left.br_blockcount += new->br_blockcount + right.br_blockcount;
2790
2791		xfs_iext_remove(ip, icur, state);
2792		xfs_iext_prev(ifp, icur);
2793		xfs_iext_update_extent(ip, state, icur, &left);
2794		ifp->if_nextents--;
2795
2796		if (cur == NULL) {
2797			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2798		} else {
2799			rval = XFS_ILOG_CORE;
2800			error = xfs_bmbt_lookup_eq(cur, &right, &i);
2801			if (error)
2802				goto done;
2803			if (XFS_IS_CORRUPT(mp, i != 1)) {
2804				error = -EFSCORRUPTED;
2805				goto done;
2806			}
2807			error = xfs_btree_delete(cur, &i);
2808			if (error)
2809				goto done;
2810			if (XFS_IS_CORRUPT(mp, i != 1)) {
2811				error = -EFSCORRUPTED;
2812				goto done;
2813			}
2814			error = xfs_btree_decrement(cur, 0, &i);
2815			if (error)
2816				goto done;
2817			if (XFS_IS_CORRUPT(mp, i != 1)) {
2818				error = -EFSCORRUPTED;
2819				goto done;
2820			}
2821			error = xfs_bmbt_update(cur, &left);
2822			if (error)
2823				goto done;
2824		}
2825		break;
2826
2827	case BMAP_LEFT_CONTIG:
2828		/*
2829		 * New allocation is contiguous with a real allocation
2830		 * on the left.
2831		 * Merge the new allocation with the left neighbor.
2832		 */
2833		old = left;
2834		left.br_blockcount += new->br_blockcount;
2835
2836		xfs_iext_prev(ifp, icur);
2837		xfs_iext_update_extent(ip, state, icur, &left);
2838
2839		if (cur == NULL) {
2840			rval = xfs_ilog_fext(whichfork);
2841		} else {
2842			rval = 0;
2843			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2844			if (error)
2845				goto done;
2846			if (XFS_IS_CORRUPT(mp, i != 1)) {
2847				error = -EFSCORRUPTED;
2848				goto done;
2849			}
2850			error = xfs_bmbt_update(cur, &left);
2851			if (error)
2852				goto done;
2853		}
2854		break;
2855
2856	case BMAP_RIGHT_CONTIG:
2857		/*
2858		 * New allocation is contiguous with a real allocation
2859		 * on the right.
2860		 * Merge the new allocation with the right neighbor.
2861		 */
2862		old = right;
2863
2864		right.br_startoff = new->br_startoff;
2865		right.br_startblock = new->br_startblock;
2866		right.br_blockcount += new->br_blockcount;
2867		xfs_iext_update_extent(ip, state, icur, &right);
2868
2869		if (cur == NULL) {
2870			rval = xfs_ilog_fext(whichfork);
2871		} else {
2872			rval = 0;
2873			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2874			if (error)
2875				goto done;
2876			if (XFS_IS_CORRUPT(mp, i != 1)) {
2877				error = -EFSCORRUPTED;
2878				goto done;
2879			}
2880			error = xfs_bmbt_update(cur, &right);
2881			if (error)
2882				goto done;
2883		}
2884		break;
2885
2886	case 0:
2887		/*
2888		 * New allocation is not contiguous with another
2889		 * real allocation.
2890		 * Insert a new entry.
2891		 */
2892		xfs_iext_insert(ip, icur, new, state);
2893		ifp->if_nextents++;
2894
2895		if (cur == NULL) {
2896			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2897		} else {
2898			rval = XFS_ILOG_CORE;
2899			error = xfs_bmbt_lookup_eq(cur, new, &i);
2900			if (error)
2901				goto done;
2902			if (XFS_IS_CORRUPT(mp, i != 0)) {
2903				error = -EFSCORRUPTED;
2904				goto done;
2905			}
2906			error = xfs_btree_insert(cur, &i);
2907			if (error)
2908				goto done;
2909			if (XFS_IS_CORRUPT(mp, i != 1)) {
2910				error = -EFSCORRUPTED;
2911				goto done;
2912			}
2913		}
2914		break;
2915	}
2916
2917	/* add reverse mapping unless caller opted out */
2918	if (!(flags & XFS_BMAPI_NORMAP))
2919		xfs_rmap_map_extent(tp, ip, whichfork, new);
2920
2921	/* convert to a btree if necessary */
2922	if (xfs_bmap_needs_btree(ip, whichfork)) {
2923		int	tmp_logflags;	/* partial log flag return val */
2924
2925		ASSERT(cur == NULL);
2926		error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
2927				&tmp_logflags, whichfork);
2928		*logflagsp |= tmp_logflags;
2929		cur = *curp;
2930		if (error)
2931			goto done;
2932	}
2933
2934	/* clear out the allocated field, done with it now in any case. */
2935	if (cur)
2936		cur->bc_ino.allocated = 0;
2937
2938	xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2939done:
2940	*logflagsp |= rval;
2941	return error;
2942}
2943
2944/*
2945 * Functions used in the extent read, allocate and remove paths
2946 */
2947
2948/*
2949 * Adjust the size of the new extent based on di_extsize and rt extsize.
2950 */
2951int
2952xfs_bmap_extsize_align(
2953	xfs_mount_t	*mp,
2954	xfs_bmbt_irec_t	*gotp,		/* next extent pointer */
2955	xfs_bmbt_irec_t	*prevp,		/* previous extent pointer */
2956	xfs_extlen_t	extsz,		/* align to this extent size */
2957	int		rt,		/* is this a realtime inode? */
2958	int		eof,		/* is extent at end-of-file? */
2959	int		delay,		/* creating delalloc extent? */
2960	int		convert,	/* overwriting unwritten extent? */
2961	xfs_fileoff_t	*offp,		/* in/out: aligned offset */
2962	xfs_extlen_t	*lenp)		/* in/out: aligned length */
2963{
2964	xfs_fileoff_t	orig_off;	/* original offset */
2965	xfs_extlen_t	orig_alen;	/* original length */
2966	xfs_fileoff_t	orig_end;	/* original off+len */
2967	xfs_fileoff_t	nexto;		/* next file offset */
2968	xfs_fileoff_t	prevo;		/* previous file offset */
2969	xfs_fileoff_t	align_off;	/* temp for offset */
2970	xfs_extlen_t	align_alen;	/* temp for length */
2971	xfs_extlen_t	temp;		/* temp for calculations */
2972
2973	if (convert)
2974		return 0;
2975
2976	orig_off = align_off = *offp;
2977	orig_alen = align_alen = *lenp;
2978	orig_end = orig_off + orig_alen;
2979
2980	/*
2981	 * If this request overlaps an existing extent, then don't
2982	 * attempt to perform any additional alignment.
2983	 */
2984	if (!delay && !eof &&
2985	    (orig_off >= gotp->br_startoff) &&
2986	    (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2987		return 0;
2988	}
2989
2990	/*
2991	 * If the file offset is unaligned vs. the extent size
2992	 * we need to align it.  This will be possible unless
2993	 * the file was previously written with a kernel that didn't
2994	 * perform this alignment, or if a truncate shot us in the
2995	 * foot.
2996	 */
2997	div_u64_rem(orig_off, extsz, &temp);
2998	if (temp) {
2999		align_alen += temp;
3000		align_off -= temp;
3001	}
3002
3003	/* Same adjustment for the end of the requested area. */
3004	temp = (align_alen % extsz);
3005	if (temp)
3006		align_alen += extsz - temp;
3007
3008	/*
3009	 * For large extent hint sizes, the aligned extent might be larger than
3010	 * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
3011	 * the length back under MAXEXTLEN. The outer allocation loops handle
3012	 * short allocation just fine, so it is safe to do this. We only want to
3013	 * do it when we are forced to, though, because it means more allocation
3014	 * operations are required.
3015	 */
3016	while (align_alen > MAXEXTLEN)
3017		align_alen -= extsz;
3018	ASSERT(align_alen <= MAXEXTLEN);
3019
3020	/*
3021	 * If the previous block overlaps with this proposed allocation
3022	 * then move the start forward without adjusting the length.
3023	 */
3024	if (prevp->br_startoff != NULLFILEOFF) {
3025		if (prevp->br_startblock == HOLESTARTBLOCK)
3026			prevo = prevp->br_startoff;
3027		else
3028			prevo = prevp->br_startoff + prevp->br_blockcount;
3029	} else
3030		prevo = 0;
3031	if (align_off != orig_off && align_off < prevo)
3032		align_off = prevo;
3033	/*
3034	 * If the next block overlaps with this proposed allocation
3035	 * then move the start back without adjusting the length,
3036	 * but not before offset 0.
3037	 * This may of course make the start overlap previous block,
3038	 * and if we hit the offset 0 limit then the next block
3039	 * can still overlap too.
3040	 */
3041	if (!eof && gotp->br_startoff != NULLFILEOFF) {
3042		if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3043		    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3044			nexto = gotp->br_startoff + gotp->br_blockcount;
3045		else
3046			nexto = gotp->br_startoff;
3047	} else
3048		nexto = NULLFILEOFF;
3049	if (!eof &&
3050	    align_off + align_alen != orig_end &&
3051	    align_off + align_alen > nexto)
3052		align_off = nexto > align_alen ? nexto - align_alen : 0;
3053	/*
3054	 * If we're now overlapping the next or previous extent that
3055	 * means we can't fit an extsz piece in this hole.  Just move
3056	 * the start forward to the first valid spot and set
3057	 * the length so we hit the end.
3058	 */
3059	if (align_off != orig_off && align_off < prevo)
3060		align_off = prevo;
3061	if (align_off + align_alen != orig_end &&
3062	    align_off + align_alen > nexto &&
3063	    nexto != NULLFILEOFF) {
3064		ASSERT(nexto > prevo);
3065		align_alen = nexto - align_off;
3066	}
3067
3068	/*
3069	 * If realtime, and the result isn't a multiple of the realtime
3070	 * extent size we need to remove blocks until it is.
3071	 */
3072	if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
3073		/*
3074		 * We're not covering the original request, or
3075		 * we won't be able to once we fix the length.
3076		 */
3077		if (orig_off < align_off ||
3078		    orig_end > align_off + align_alen ||
3079		    align_alen - temp < orig_alen)
3080			return -EINVAL;
3081		/*
3082		 * Try to fix it by moving the start up.
3083		 */
3084		if (align_off + temp <= orig_off) {
3085			align_alen -= temp;
3086			align_off += temp;
3087		}
3088		/*
3089		 * Try to fix it by moving the end in.
3090		 */
3091		else if (align_off + align_alen - temp >= orig_end)
3092			align_alen -= temp;
3093		/*
3094		 * Set the start to the minimum then trim the length.
3095		 */
3096		else {
3097			align_alen -= orig_off - align_off;
3098			align_off = orig_off;
3099			align_alen -= align_alen % mp->m_sb.sb_rextsize;
3100		}
3101		/*
3102		 * Result doesn't cover the request, fail it.
3103		 */
3104		if (orig_off < align_off || orig_end > align_off + align_alen)
3105			return -EINVAL;
3106	} else {
3107		ASSERT(orig_off >= align_off);
3108		/* see MAXEXTLEN handling above */
3109		ASSERT(orig_end <= align_off + align_alen ||
3110		       align_alen + extsz > MAXEXTLEN);
3111	}
3112
3113#ifdef DEBUG
3114	if (!eof && gotp->br_startoff != NULLFILEOFF)
3115		ASSERT(align_off + align_alen <= gotp->br_startoff);
3116	if (prevp->br_startoff != NULLFILEOFF)
3117		ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3118#endif
3119
3120	*lenp = align_alen;
3121	*offp = align_off;
3122	return 0;
3123}
3124
3125#define XFS_ALLOC_GAP_UNITS	4
3126
3127void
 
3128xfs_bmap_adjacent(
3129	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
3130{
3131	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
3132	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
3133	xfs_mount_t	*mp;		/* mount point structure */
3134	int		nullfb;		/* true if ap->firstblock isn't set */
3135	int		rt;		/* true if inode is realtime */
3136
3137#define	ISVALID(x,y)	\
3138	(rt ? \
3139		(x) < mp->m_sb.sb_rblocks : \
3140		XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3141		XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3142		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3143
3144	mp = ap->ip->i_mount;
3145	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3146	rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3147		(ap->datatype & XFS_ALLOC_USERDATA);
3148	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3149							ap->tp->t_firstblock);
3150	/*
3151	 * If allocating at eof, and there's a previous real block,
3152	 * try to use its last block as our starting point.
3153	 */
3154	if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3155	    !isnullstartblock(ap->prev.br_startblock) &&
3156	    ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3157		    ap->prev.br_startblock)) {
3158		ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3159		/*
3160		 * Adjust for the gap between prevp and us.
3161		 */
3162		adjust = ap->offset -
3163			(ap->prev.br_startoff + ap->prev.br_blockcount);
3164		if (adjust &&
3165		    ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3166			ap->blkno += adjust;
 
3167	}
3168	/*
3169	 * If not at eof, then compare the two neighbor blocks.
3170	 * Figure out whether either one gives us a good starting point,
3171	 * and pick the better one.
3172	 */
3173	else if (!ap->eof) {
3174		xfs_fsblock_t	gotbno;		/* right side block number */
3175		xfs_fsblock_t	gotdiff=0;	/* right side difference */
3176		xfs_fsblock_t	prevbno;	/* left side block number */
3177		xfs_fsblock_t	prevdiff=0;	/* left side difference */
3178
3179		/*
3180		 * If there's a previous (left) block, select a requested
3181		 * start block based on it.
3182		 */
3183		if (ap->prev.br_startoff != NULLFILEOFF &&
3184		    !isnullstartblock(ap->prev.br_startblock) &&
3185		    (prevbno = ap->prev.br_startblock +
3186			       ap->prev.br_blockcount) &&
3187		    ISVALID(prevbno, ap->prev.br_startblock)) {
3188			/*
3189			 * Calculate gap to end of previous block.
3190			 */
3191			adjust = prevdiff = ap->offset -
3192				(ap->prev.br_startoff +
3193				 ap->prev.br_blockcount);
3194			/*
3195			 * Figure the startblock based on the previous block's
3196			 * end and the gap size.
3197			 * Heuristic!
3198			 * If the gap is large relative to the piece we're
3199			 * allocating, or using it gives us an invalid block
3200			 * number, then just use the end of the previous block.
3201			 */
3202			if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3203			    ISVALID(prevbno + prevdiff,
3204				    ap->prev.br_startblock))
3205				prevbno += adjust;
3206			else
3207				prevdiff += adjust;
3208			/*
3209			 * If the firstblock forbids it, can't use it,
3210			 * must use default.
3211			 */
3212			if (!rt && !nullfb &&
3213			    XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3214				prevbno = NULLFSBLOCK;
3215		}
3216		/*
3217		 * No previous block or can't follow it, just default.
3218		 */
3219		else
3220			prevbno = NULLFSBLOCK;
3221		/*
3222		 * If there's a following (right) block, select a requested
3223		 * start block based on it.
3224		 */
3225		if (!isnullstartblock(ap->got.br_startblock)) {
3226			/*
3227			 * Calculate gap to start of next block.
3228			 */
3229			adjust = gotdiff = ap->got.br_startoff - ap->offset;
3230			/*
3231			 * Figure the startblock based on the next block's
3232			 * start and the gap size.
3233			 */
3234			gotbno = ap->got.br_startblock;
3235			/*
3236			 * Heuristic!
3237			 * If the gap is large relative to the piece we're
3238			 * allocating, or using it gives us an invalid block
3239			 * number, then just use the start of the next block
3240			 * offset by our length.
3241			 */
3242			if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3243			    ISVALID(gotbno - gotdiff, gotbno))
3244				gotbno -= adjust;
3245			else if (ISVALID(gotbno - ap->length, gotbno)) {
3246				gotbno -= ap->length;
3247				gotdiff += adjust - ap->length;
3248			} else
3249				gotdiff += adjust;
3250			/*
3251			 * If the firstblock forbids it, can't use it,
3252			 * must use default.
3253			 */
3254			if (!rt && !nullfb &&
3255			    XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3256				gotbno = NULLFSBLOCK;
3257		}
3258		/*
3259		 * No next block, just default.
3260		 */
3261		else
3262			gotbno = NULLFSBLOCK;
3263		/*
3264		 * If both valid, pick the better one, else the only good
3265		 * one, else ap->blkno is already set (to 0 or the inode block).
3266		 */
3267		if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3268			ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3269		else if (prevbno != NULLFSBLOCK)
 
 
3270			ap->blkno = prevbno;
3271		else if (gotbno != NULLFSBLOCK)
 
 
3272			ap->blkno = gotbno;
 
 
3273	}
3274#undef ISVALID
 
3275}
3276
3277static int
3278xfs_bmap_longest_free_extent(
 
3279	struct xfs_trans	*tp,
3280	xfs_agnumber_t		ag,
3281	xfs_extlen_t		*blen,
3282	int			*notinit)
3283{
3284	struct xfs_mount	*mp = tp->t_mountp;
3285	struct xfs_perag	*pag;
3286	xfs_extlen_t		longest;
3287	int			error = 0;
3288
3289	pag = xfs_perag_get(mp, ag);
3290	if (!pag->pagf_init) {
3291		error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3292		if (error) {
3293			/* Couldn't lock the AGF, so skip this AG. */
3294			if (error == -EAGAIN) {
3295				*notinit = 1;
3296				error = 0;
3297			}
3298			goto out;
3299		}
3300	}
3301
3302	longest = xfs_alloc_longest_free_extent(pag,
3303				xfs_alloc_min_freelist(mp, pag),
3304				xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3305	if (*blen < longest)
3306		*blen = longest;
3307
3308out:
3309	xfs_perag_put(pag);
3310	return error;
3311}
3312
3313static void
3314xfs_bmap_select_minlen(
3315	struct xfs_bmalloca	*ap,
3316	struct xfs_alloc_arg	*args,
3317	xfs_extlen_t		*blen,
3318	int			notinit)
3319{
3320	if (notinit || *blen < ap->minlen) {
3321		/*
3322		 * Since we did a BUF_TRYLOCK above, it is possible that
3323		 * there is space for this request.
3324		 */
3325		args->minlen = ap->minlen;
3326	} else if (*blen < args->maxlen) {
3327		/*
3328		 * If the best seen length is less than the request length,
3329		 * use the best as the minimum.
3330		 */
3331		args->minlen = *blen;
3332	} else {
3333		/*
3334		 * Otherwise we've seen an extent as big as maxlen, use that
3335		 * as the minimum.
3336		 */
3337		args->minlen = args->maxlen;
3338	}
3339}
3340
3341STATIC int
3342xfs_bmap_btalloc_nullfb(
3343	struct xfs_bmalloca	*ap,
3344	struct xfs_alloc_arg	*args,
3345	xfs_extlen_t		*blen)
3346{
3347	struct xfs_mount	*mp = ap->ip->i_mount;
3348	xfs_agnumber_t		ag, startag;
3349	int			notinit = 0;
3350	int			error;
 
 
 
 
 
 
3351
3352	args->type = XFS_ALLOCTYPE_START_BNO;
3353	args->total = ap->total;
3354
3355	startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3356	if (startag == NULLAGNUMBER)
3357		startag = ag = 0;
3358
3359	while (*blen < args->maxlen) {
3360		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3361						     &notinit);
3362		if (error)
3363			return error;
3364
3365		if (++ag == mp->m_sb.sb_agcount)
3366			ag = 0;
3367		if (ag == startag)
3368			break;
3369	}
 
 
3370
3371	xfs_bmap_select_minlen(ap, args, blen, notinit);
3372	return 0;
3373}
3374
3375STATIC int
3376xfs_bmap_btalloc_filestreams(
3377	struct xfs_bmalloca	*ap,
3378	struct xfs_alloc_arg	*args,
3379	xfs_extlen_t		*blen)
3380{
3381	struct xfs_mount	*mp = ap->ip->i_mount;
3382	xfs_agnumber_t		ag;
3383	int			notinit = 0;
3384	int			error;
3385
3386	args->type = XFS_ALLOCTYPE_NEAR_BNO;
3387	args->total = ap->total;
3388
3389	ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3390	if (ag == NULLAGNUMBER)
3391		ag = 0;
3392
3393	error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3394	if (error)
3395		return error;
3396
3397	if (*blen < args->maxlen) {
3398		error = xfs_filestream_new_ag(ap, &ag);
3399		if (error)
3400			return error;
3401
3402		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3403						     &notinit);
3404		if (error)
3405			return error;
3406
3407	}
3408
3409	xfs_bmap_select_minlen(ap, args, blen, notinit);
3410
3411	/*
3412	 * Set the failure fallback case to look in the selected AG as stream
3413	 * may have moved.
3414	 */
3415	ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3416	return 0;
3417}
3418
3419/* Update all inode and quota accounting for the allocation we just did. */
3420static void
3421xfs_bmap_btalloc_accounting(
3422	struct xfs_bmalloca	*ap,
3423	struct xfs_alloc_arg	*args)
3424{
3425	if (ap->flags & XFS_BMAPI_COWFORK) {
3426		/*
3427		 * COW fork blocks are in-core only and thus are treated as
3428		 * in-core quota reservation (like delalloc blocks) even when
3429		 * converted to real blocks. The quota reservation is not
3430		 * accounted to disk until blocks are remapped to the data
3431		 * fork. So if these blocks were previously delalloc, we
3432		 * already have quota reservation and there's nothing to do
3433		 * yet.
3434		 */
3435		if (ap->wasdel) {
3436			xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
3437			return;
3438		}
3439
3440		/*
3441		 * Otherwise, we've allocated blocks in a hole. The transaction
3442		 * has acquired in-core quota reservation for this extent.
3443		 * Rather than account these as real blocks, however, we reduce
3444		 * the transaction quota reservation based on the allocation.
3445		 * This essentially transfers the transaction quota reservation
3446		 * to that of a delalloc extent.
3447		 */
3448		ap->ip->i_delayed_blks += args->len;
3449		xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
3450				-(long)args->len);
 
3451		return;
3452	}
3453
3454	/* data/attr fork only */
3455	ap->ip->i_d.di_nblocks += args->len;
3456	xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3457	if (ap->wasdel) {
3458		ap->ip->i_delayed_blks -= args->len;
3459		xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
 
 
 
3460	}
3461	xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3462		ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
3463		args->len);
3464}
3465
3466STATIC int
3467xfs_bmap_btalloc(
3468	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
 
3469{
3470	xfs_mount_t	*mp;		/* mount point structure */
3471	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
3472	xfs_extlen_t	align = 0;	/* minimum allocation alignment */
3473	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
3474	xfs_agnumber_t	ag;
3475	xfs_alloc_arg_t	args;
3476	xfs_fileoff_t	orig_offset;
3477	xfs_extlen_t	orig_length;
3478	xfs_extlen_t	blen;
3479	xfs_extlen_t	nextminlen = 0;
3480	int		nullfb;		/* true if ap->firstblock isn't set */
3481	int		isaligned;
3482	int		tryagain;
3483	int		error;
3484	int		stripe_align;
3485
3486	ASSERT(ap->length);
3487	orig_offset = ap->offset;
3488	orig_length = ap->length;
3489
3490	mp = ap->ip->i_mount;
3491
3492	/* stripe alignment for allocation is determined by mount parameters */
3493	stripe_align = 0;
3494	if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3495		stripe_align = mp->m_swidth;
3496	else if (mp->m_dalign)
3497		stripe_align = mp->m_dalign;
3498
3499	if (ap->flags & XFS_BMAPI_COWFORK)
3500		align = xfs_get_cowextsz_hint(ap->ip);
3501	else if (ap->datatype & XFS_ALLOC_USERDATA)
3502		align = xfs_get_extsz_hint(ap->ip);
3503	if (align) {
3504		error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3505						align, 0, ap->eof, 0, ap->conv,
3506						&ap->offset, &ap->length);
3507		ASSERT(!error);
3508		ASSERT(ap->length);
3509	}
3510
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3511
3512	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3513	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3514							ap->tp->t_firstblock);
3515	if (nullfb) {
3516		if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3517		    xfs_inode_is_filestream(ap->ip)) {
3518			ag = xfs_filestream_lookup_ag(ap->ip);
3519			ag = (ag != NULLAGNUMBER) ? ag : 0;
3520			ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3521		} else {
3522			ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3523		}
3524	} else
3525		ap->blkno = ap->tp->t_firstblock;
3526
3527	xfs_bmap_adjacent(ap);
3528
3529	/*
3530	 * If allowed, use ap->blkno; otherwise must use firstblock since
3531	 * it's in the right allocation group.
 
 
 
 
3532	 */
3533	if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3534		;
3535	else
3536		ap->blkno = ap->tp->t_firstblock;
3537	/*
3538	 * Normal allocation, done through xfs_alloc_vextent.
3539	 */
3540	tryagain = isaligned = 0;
3541	memset(&args, 0, sizeof(args));
3542	args.tp = ap->tp;
3543	args.mp = mp;
3544	args.fsbno = ap->blkno;
3545	args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
 
 
 
 
 
3546
3547	/* Trim the allocation back to the maximum an AG can fit. */
3548	args.maxlen = min(ap->length, mp->m_ag_max_usable);
3549	blen = 0;
3550	if (nullfb) {
3551		/*
3552		 * Search for an allocation group with a single extent large
3553		 * enough for the request.  If one isn't found, then adjust
3554		 * the minimum allocation size to the largest space found.
3555		 */
3556		if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3557		    xfs_inode_is_filestream(ap->ip))
3558			error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3559		else
3560			error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3561		if (error)
3562			return error;
3563	} else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3564		if (xfs_inode_is_filestream(ap->ip))
3565			args.type = XFS_ALLOCTYPE_FIRST_AG;
3566		else
3567			args.type = XFS_ALLOCTYPE_START_BNO;
3568		args.total = args.minlen = ap->minlen;
3569	} else {
3570		args.type = XFS_ALLOCTYPE_NEAR_BNO;
3571		args.total = ap->total;
3572		args.minlen = ap->minlen;
3573	}
3574	/* apply extent size hints if obtained earlier */
3575	if (align) {
3576		args.prod = align;
3577		div_u64_rem(ap->offset, args.prod, &args.mod);
3578		if (args.mod)
3579			args.mod = args.prod - args.mod;
3580	} else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3581		args.prod = 1;
3582		args.mod = 0;
3583	} else {
3584		args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3585		div_u64_rem(ap->offset, args.prod, &args.mod);
3586		if (args.mod)
3587			args.mod = args.prod - args.mod;
3588	}
3589	/*
3590	 * If we are not low on available data blocks, and the underlying
3591	 * logical volume manager is a stripe, and the file offset is zero then
3592	 * try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof
3593	 * is only set if the allocation length is >= the stripe unit and the
3594	 * allocation offset is at the end of file.
3595	 */
3596	if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
3597		if (!ap->offset) {
3598			args.alignment = stripe_align;
3599			atype = args.type;
3600			isaligned = 1;
3601			/*
3602			 * Adjust minlen to try and preserve alignment if we
3603			 * can't guarantee an aligned maxlen extent.
3604			 */
3605			if (blen > args.alignment &&
3606			    blen <= args.maxlen + args.alignment)
3607				args.minlen = blen - args.alignment;
3608			args.minalignslop = 0;
3609		} else {
3610			/*
3611			 * First try an exact bno allocation.
3612			 * If it fails then do a near or start bno
3613			 * allocation with alignment turned on.
3614			 */
3615			atype = args.type;
3616			tryagain = 1;
3617			args.type = XFS_ALLOCTYPE_THIS_BNO;
3618			args.alignment = 1;
3619			/*
3620			 * Compute the minlen+alignment for the
3621			 * next case.  Set slop so that the value
3622			 * of minlen+alignment+slop doesn't go up
3623			 * between the calls.
3624			 */
3625			if (blen > stripe_align && blen <= args.maxlen)
3626				nextminlen = blen - stripe_align;
3627			else
3628				nextminlen = args.minlen;
3629			if (nextminlen + stripe_align > args.minlen + 1)
3630				args.minalignslop =
3631					nextminlen + stripe_align -
3632					args.minlen - 1;
3633			else
3634				args.minalignslop = 0;
3635		}
3636	} else {
3637		args.alignment = 1;
3638		args.minalignslop = 0;
3639	}
3640	args.minleft = ap->minleft;
3641	args.wasdel = ap->wasdel;
3642	args.resv = XFS_AG_RESV_NONE;
3643	args.datatype = ap->datatype;
3644
3645	error = xfs_alloc_vextent(&args);
3646	if (error)
3647		return error;
3648
3649	if (tryagain && args.fsbno == NULLFSBLOCK) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3650		/*
3651		 * Exact allocation failed. Now try with alignment
3652		 * turned on.
3653		 */
3654		args.type = atype;
3655		args.fsbno = ap->blkno;
3656		args.alignment = stripe_align;
3657		args.minlen = nextminlen;
3658		args.minalignslop = 0;
3659		isaligned = 1;
3660		if ((error = xfs_alloc_vextent(&args)))
3661			return error;
3662	}
3663	if (isaligned && args.fsbno == NULLFSBLOCK) {
3664		/*
3665		 * allocation failed, so turn off alignment and
3666		 * try again.
3667		 */
3668		args.type = atype;
3669		args.fsbno = ap->blkno;
3670		args.alignment = 0;
3671		if ((error = xfs_alloc_vextent(&args)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3672			return error;
3673	}
3674	if (args.fsbno == NULLFSBLOCK && nullfb &&
3675	    args.minlen > ap->minlen) {
3676		args.minlen = ap->minlen;
3677		args.type = XFS_ALLOCTYPE_START_BNO;
3678		args.fsbno = ap->blkno;
3679		if ((error = xfs_alloc_vextent(&args)))
3680			return error;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3681	}
3682	if (args.fsbno == NULLFSBLOCK && nullfb) {
3683		args.fsbno = 0;
3684		args.type = XFS_ALLOCTYPE_FIRST_AG;
3685		args.total = ap->minlen;
3686		if ((error = xfs_alloc_vextent(&args)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3687			return error;
3688		ap->tp->t_flags |= XFS_TRANS_LOWMODE;
3689	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3690	if (args.fsbno != NULLFSBLOCK) {
3691		/*
3692		 * check the allocation happened at the same or higher AG than
3693		 * the first block that was allocated.
3694		 */
3695		ASSERT(ap->tp->t_firstblock == NULLFSBLOCK ||
3696		       XFS_FSB_TO_AGNO(mp, ap->tp->t_firstblock) <=
3697		       XFS_FSB_TO_AGNO(mp, args.fsbno));
3698
3699		ap->blkno = args.fsbno;
3700		if (ap->tp->t_firstblock == NULLFSBLOCK)
3701			ap->tp->t_firstblock = args.fsbno;
3702		ASSERT(nullfb || fb_agno <= args.agno);
3703		ap->length = args.len;
3704		/*
3705		 * If the extent size hint is active, we tried to round the
3706		 * caller's allocation request offset down to extsz and the
3707		 * length up to another extsz boundary.  If we found a free
3708		 * extent we mapped it in starting at this new offset.  If the
3709		 * newly mapped space isn't long enough to cover any of the
3710		 * range of offsets that was originally requested, move the
3711		 * mapping up so that we can fill as much of the caller's
3712		 * original request as possible.  Free space is apparently
3713		 * very fragmented so we're unlikely to be able to satisfy the
3714		 * hints anyway.
3715		 */
3716		if (ap->length <= orig_length)
3717			ap->offset = orig_offset;
3718		else if (ap->offset + ap->length < orig_offset + orig_length)
3719			ap->offset = orig_offset + orig_length - ap->length;
3720		xfs_bmap_btalloc_accounting(ap, &args);
3721	} else {
3722		ap->blkno = NULLFSBLOCK;
3723		ap->length = 0;
3724	}
3725	return 0;
3726}
3727
3728/* Trim extent to fit a logical block range. */
3729void
3730xfs_trim_extent(
3731	struct xfs_bmbt_irec	*irec,
3732	xfs_fileoff_t		bno,
3733	xfs_filblks_t		len)
3734{
3735	xfs_fileoff_t		distance;
3736	xfs_fileoff_t		end = bno + len;
3737
3738	if (irec->br_startoff + irec->br_blockcount <= bno ||
3739	    irec->br_startoff >= end) {
3740		irec->br_blockcount = 0;
3741		return;
3742	}
3743
3744	if (irec->br_startoff < bno) {
3745		distance = bno - irec->br_startoff;
3746		if (isnullstartblock(irec->br_startblock))
3747			irec->br_startblock = DELAYSTARTBLOCK;
3748		if (irec->br_startblock != DELAYSTARTBLOCK &&
3749		    irec->br_startblock != HOLESTARTBLOCK)
3750			irec->br_startblock += distance;
3751		irec->br_startoff += distance;
3752		irec->br_blockcount -= distance;
3753	}
3754
3755	if (end < irec->br_startoff + irec->br_blockcount) {
3756		distance = irec->br_startoff + irec->br_blockcount - end;
3757		irec->br_blockcount -= distance;
3758	}
3759}
3760
3761/*
3762 * Trim the returned map to the required bounds
3763 */
3764STATIC void
3765xfs_bmapi_trim_map(
3766	struct xfs_bmbt_irec	*mval,
3767	struct xfs_bmbt_irec	*got,
3768	xfs_fileoff_t		*bno,
3769	xfs_filblks_t		len,
3770	xfs_fileoff_t		obno,
3771	xfs_fileoff_t		end,
3772	int			n,
3773	int			flags)
3774{
3775	if ((flags & XFS_BMAPI_ENTIRE) ||
3776	    got->br_startoff + got->br_blockcount <= obno) {
3777		*mval = *got;
3778		if (isnullstartblock(got->br_startblock))
3779			mval->br_startblock = DELAYSTARTBLOCK;
3780		return;
3781	}
3782
3783	if (obno > *bno)
3784		*bno = obno;
3785	ASSERT((*bno >= obno) || (n == 0));
3786	ASSERT(*bno < end);
3787	mval->br_startoff = *bno;
3788	if (isnullstartblock(got->br_startblock))
3789		mval->br_startblock = DELAYSTARTBLOCK;
3790	else
3791		mval->br_startblock = got->br_startblock +
3792					(*bno - got->br_startoff);
3793	/*
3794	 * Return the minimum of what we got and what we asked for for
3795	 * the length.  We can use the len variable here because it is
3796	 * modified below and we could have been there before coming
3797	 * here if the first part of the allocation didn't overlap what
3798	 * was asked for.
3799	 */
3800	mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3801			got->br_blockcount - (*bno - got->br_startoff));
3802	mval->br_state = got->br_state;
3803	ASSERT(mval->br_blockcount <= len);
3804	return;
3805}
3806
3807/*
3808 * Update and validate the extent map to return
3809 */
3810STATIC void
3811xfs_bmapi_update_map(
3812	struct xfs_bmbt_irec	**map,
3813	xfs_fileoff_t		*bno,
3814	xfs_filblks_t		*len,
3815	xfs_fileoff_t		obno,
3816	xfs_fileoff_t		end,
3817	int			*n,
3818	int			flags)
3819{
3820	xfs_bmbt_irec_t	*mval = *map;
3821
3822	ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3823	       ((mval->br_startoff + mval->br_blockcount) <= end));
3824	ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3825	       (mval->br_startoff < obno));
3826
3827	*bno = mval->br_startoff + mval->br_blockcount;
3828	*len = end - *bno;
3829	if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3830		/* update previous map with new information */
3831		ASSERT(mval->br_startblock == mval[-1].br_startblock);
3832		ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3833		ASSERT(mval->br_state == mval[-1].br_state);
3834		mval[-1].br_blockcount = mval->br_blockcount;
3835		mval[-1].br_state = mval->br_state;
3836	} else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3837		   mval[-1].br_startblock != DELAYSTARTBLOCK &&
3838		   mval[-1].br_startblock != HOLESTARTBLOCK &&
3839		   mval->br_startblock == mval[-1].br_startblock +
3840					  mval[-1].br_blockcount &&
3841		   mval[-1].br_state == mval->br_state) {
3842		ASSERT(mval->br_startoff ==
3843		       mval[-1].br_startoff + mval[-1].br_blockcount);
3844		mval[-1].br_blockcount += mval->br_blockcount;
3845	} else if (*n > 0 &&
3846		   mval->br_startblock == DELAYSTARTBLOCK &&
3847		   mval[-1].br_startblock == DELAYSTARTBLOCK &&
3848		   mval->br_startoff ==
3849		   mval[-1].br_startoff + mval[-1].br_blockcount) {
3850		mval[-1].br_blockcount += mval->br_blockcount;
3851		mval[-1].br_state = mval->br_state;
3852	} else if (!((*n == 0) &&
3853		     ((mval->br_startoff + mval->br_blockcount) <=
3854		      obno))) {
3855		mval++;
3856		(*n)++;
3857	}
3858	*map = mval;
3859}
3860
3861/*
3862 * Map file blocks to filesystem blocks without allocation.
3863 */
3864int
3865xfs_bmapi_read(
3866	struct xfs_inode	*ip,
3867	xfs_fileoff_t		bno,
3868	xfs_filblks_t		len,
3869	struct xfs_bmbt_irec	*mval,
3870	int			*nmap,
3871	int			flags)
3872{
3873	struct xfs_mount	*mp = ip->i_mount;
3874	int			whichfork = xfs_bmapi_whichfork(flags);
3875	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
3876	struct xfs_bmbt_irec	got;
3877	xfs_fileoff_t		obno;
3878	xfs_fileoff_t		end;
3879	struct xfs_iext_cursor	icur;
3880	int			error;
3881	bool			eof = false;
3882	int			n = 0;
3883
3884	ASSERT(*nmap >= 1);
3885	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
3886	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
3887
3888	if (WARN_ON_ONCE(!ifp))
3889		return -EFSCORRUPTED;
3890
3891	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
3892	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT))
3893		return -EFSCORRUPTED;
3894
3895	if (XFS_FORCED_SHUTDOWN(mp))
3896		return -EIO;
3897
3898	XFS_STATS_INC(mp, xs_blk_mapr);
3899
3900	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
3901		error = xfs_iread_extents(NULL, ip, whichfork);
3902		if (error)
3903			return error;
3904	}
3905
3906	if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
3907		eof = true;
3908	end = bno + len;
3909	obno = bno;
3910
3911	while (bno < end && n < *nmap) {
3912		/* Reading past eof, act as though there's a hole up to end. */
3913		if (eof)
3914			got.br_startoff = end;
3915		if (got.br_startoff > bno) {
3916			/* Reading in a hole.  */
3917			mval->br_startoff = bno;
3918			mval->br_startblock = HOLESTARTBLOCK;
3919			mval->br_blockcount =
3920				XFS_FILBLKS_MIN(len, got.br_startoff - bno);
3921			mval->br_state = XFS_EXT_NORM;
3922			bno += mval->br_blockcount;
3923			len -= mval->br_blockcount;
3924			mval++;
3925			n++;
3926			continue;
3927		}
3928
3929		/* set up the extent map to return. */
3930		xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
3931		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
3932
3933		/* If we're done, stop now. */
3934		if (bno >= end || n >= *nmap)
3935			break;
3936
3937		/* Else go on to the next record. */
3938		if (!xfs_iext_next_extent(ifp, &icur, &got))
3939			eof = true;
3940	}
3941	*nmap = n;
3942	return 0;
3943}
3944
3945/*
3946 * Add a delayed allocation extent to an inode. Blocks are reserved from the
3947 * global pool and the extent inserted into the inode in-core extent tree.
3948 *
3949 * On entry, got refers to the first extent beyond the offset of the extent to
3950 * allocate or eof is specified if no such extent exists. On return, got refers
3951 * to the extent record that was inserted to the inode fork.
3952 *
3953 * Note that the allocated extent may have been merged with contiguous extents
3954 * during insertion into the inode fork. Thus, got does not reflect the current
3955 * state of the inode fork on return. If necessary, the caller can use lastx to
3956 * look up the updated record in the inode fork.
3957 */
3958int
3959xfs_bmapi_reserve_delalloc(
3960	struct xfs_inode	*ip,
3961	int			whichfork,
3962	xfs_fileoff_t		off,
3963	xfs_filblks_t		len,
3964	xfs_filblks_t		prealloc,
3965	struct xfs_bmbt_irec	*got,
3966	struct xfs_iext_cursor	*icur,
3967	int			eof)
3968{
3969	struct xfs_mount	*mp = ip->i_mount;
3970	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
3971	xfs_extlen_t		alen;
3972	xfs_extlen_t		indlen;
3973	int			error;
3974	xfs_fileoff_t		aoff = off;
3975
3976	/*
3977	 * Cap the alloc length. Keep track of prealloc so we know whether to
3978	 * tag the inode before we return.
3979	 */
3980	alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
3981	if (!eof)
3982		alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
3983	if (prealloc && alen >= len)
3984		prealloc = alen - len;
3985
3986	/* Figure out the extent size, adjust alen */
3987	if (whichfork == XFS_COW_FORK) {
3988		struct xfs_bmbt_irec	prev;
3989		xfs_extlen_t		extsz = xfs_get_cowextsz_hint(ip);
3990
3991		if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
3992			prev.br_startoff = NULLFILEOFF;
3993
3994		error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
3995					       1, 0, &aoff, &alen);
3996		ASSERT(!error);
3997	}
3998
3999	/*
4000	 * Make a transaction-less quota reservation for delayed allocation
4001	 * blocks.  This number gets adjusted later.  We return if we haven't
4002	 * allocated blocks already inside this loop.
4003	 */
4004	error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
4005						XFS_QMOPT_RES_REGBLKS);
4006	if (error)
4007		return error;
4008
4009	/*
4010	 * Split changing sb for alen and indlen since they could be coming
4011	 * from different places.
4012	 */
4013	indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4014	ASSERT(indlen > 0);
4015
4016	error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4017	if (error)
4018		goto out_unreserve_quota;
4019
4020	error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4021	if (error)
4022		goto out_unreserve_blocks;
4023
4024
4025	ip->i_delayed_blks += alen;
4026	xfs_mod_delalloc(ip->i_mount, alen + indlen);
4027
4028	got->br_startoff = aoff;
4029	got->br_startblock = nullstartblock(indlen);
4030	got->br_blockcount = alen;
4031	got->br_state = XFS_EXT_NORM;
4032
4033	xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
4034
4035	/*
4036	 * Tag the inode if blocks were preallocated. Note that COW fork
4037	 * preallocation can occur at the start or end of the extent, even when
4038	 * prealloc == 0, so we must also check the aligned offset and length.
4039	 */
4040	if (whichfork == XFS_DATA_FORK && prealloc)
4041		xfs_inode_set_eofblocks_tag(ip);
4042	if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
4043		xfs_inode_set_cowblocks_tag(ip);
4044
4045	return 0;
4046
4047out_unreserve_blocks:
4048	xfs_mod_fdblocks(mp, alen, false);
4049out_unreserve_quota:
4050	if (XFS_IS_QUOTA_ON(mp))
4051		xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0,
4052						XFS_QMOPT_RES_REGBLKS);
4053	return error;
4054}
4055
4056static int
4057xfs_bmap_alloc_userdata(
4058	struct xfs_bmalloca	*bma)
4059{
4060	struct xfs_mount	*mp = bma->ip->i_mount;
4061	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4062	int			error;
4063
4064	/*
4065	 * Set the data type being allocated. For the data fork, the first data
4066	 * in the file is treated differently to all other allocations. For the
4067	 * attribute fork, we only need to ensure the allocated range is not on
4068	 * the busy list.
4069	 */
4070	bma->datatype = XFS_ALLOC_NOBUSY;
4071	if (whichfork == XFS_DATA_FORK) {
4072		bma->datatype |= XFS_ALLOC_USERDATA;
4073		if (bma->offset == 0)
4074			bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4075
4076		if (mp->m_dalign && bma->length >= mp->m_dalign) {
4077			error = xfs_bmap_isaeof(bma, whichfork);
4078			if (error)
4079				return error;
4080		}
4081
4082		if (XFS_IS_REALTIME_INODE(bma->ip))
4083			return xfs_bmap_rtalloc(bma);
4084	}
4085
 
 
 
 
4086	return xfs_bmap_btalloc(bma);
4087}
4088
4089static int
4090xfs_bmapi_allocate(
4091	struct xfs_bmalloca	*bma)
4092{
4093	struct xfs_mount	*mp = bma->ip->i_mount;
4094	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4095	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4096	int			tmp_logflags = 0;
4097	int			error;
4098
4099	ASSERT(bma->length > 0);
4100
4101	/*
4102	 * For the wasdelay case, we could also just allocate the stuff asked
4103	 * for in this bmap call but that wouldn't be as good.
4104	 */
4105	if (bma->wasdel) {
4106		bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4107		bma->offset = bma->got.br_startoff;
4108		if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
4109			bma->prev.br_startoff = NULLFILEOFF;
4110	} else {
4111		bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4112		if (!bma->eof)
4113			bma->length = XFS_FILBLKS_MIN(bma->length,
4114					bma->got.br_startoff - bma->offset);
4115	}
4116
4117	if (bma->flags & XFS_BMAPI_CONTIG)
4118		bma->minlen = bma->length;
4119	else
4120		bma->minlen = 1;
4121
4122	if (bma->flags & XFS_BMAPI_METADATA)
4123		error = xfs_bmap_btalloc(bma);
4124	else
 
 
 
 
4125		error = xfs_bmap_alloc_userdata(bma);
 
4126	if (error || bma->blkno == NULLFSBLOCK)
4127		return error;
4128
4129	if (bma->flags & XFS_BMAPI_ZERO) {
4130		error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
4131		if (error)
4132			return error;
4133	}
4134
4135	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur)
4136		bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4137	/*
4138	 * Bump the number of extents we've allocated
4139	 * in this call.
4140	 */
4141	bma->nallocs++;
4142
4143	if (bma->cur)
4144		bma->cur->bc_ino.flags =
4145			bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
4146
4147	bma->got.br_startoff = bma->offset;
4148	bma->got.br_startblock = bma->blkno;
4149	bma->got.br_blockcount = bma->length;
4150	bma->got.br_state = XFS_EXT_NORM;
4151
4152	if (bma->flags & XFS_BMAPI_PREALLOC)
4153		bma->got.br_state = XFS_EXT_UNWRITTEN;
4154
4155	if (bma->wasdel)
4156		error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4157	else
4158		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4159				whichfork, &bma->icur, &bma->cur, &bma->got,
4160				&bma->logflags, bma->flags);
4161
4162	bma->logflags |= tmp_logflags;
4163	if (error)
4164		return error;
4165
4166	/*
4167	 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4168	 * or xfs_bmap_add_extent_hole_real might have merged it into one of
4169	 * the neighbouring ones.
4170	 */
4171	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4172
4173	ASSERT(bma->got.br_startoff <= bma->offset);
4174	ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4175	       bma->offset + bma->length);
4176	ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4177	       bma->got.br_state == XFS_EXT_UNWRITTEN);
4178	return 0;
4179}
4180
4181STATIC int
4182xfs_bmapi_convert_unwritten(
4183	struct xfs_bmalloca	*bma,
4184	struct xfs_bmbt_irec	*mval,
4185	xfs_filblks_t		len,
4186	int			flags)
4187{
4188	int			whichfork = xfs_bmapi_whichfork(flags);
4189	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4190	int			tmp_logflags = 0;
4191	int			error;
4192
4193	/* check if we need to do unwritten->real conversion */
4194	if (mval->br_state == XFS_EXT_UNWRITTEN &&
4195	    (flags & XFS_BMAPI_PREALLOC))
4196		return 0;
4197
4198	/* check if we need to do real->unwritten conversion */
4199	if (mval->br_state == XFS_EXT_NORM &&
4200	    (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4201			(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4202		return 0;
4203
4204	/*
4205	 * Modify (by adding) the state flag, if writing.
4206	 */
4207	ASSERT(mval->br_blockcount <= len);
4208	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4209		bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4210					bma->ip, whichfork);
4211	}
4212	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4213				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4214
4215	/*
4216	 * Before insertion into the bmbt, zero the range being converted
4217	 * if required.
4218	 */
4219	if (flags & XFS_BMAPI_ZERO) {
4220		error = xfs_zero_extent(bma->ip, mval->br_startblock,
4221					mval->br_blockcount);
4222		if (error)
4223			return error;
4224	}
4225
4226	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4227			&bma->icur, &bma->cur, mval, &tmp_logflags);
4228	/*
4229	 * Log the inode core unconditionally in the unwritten extent conversion
4230	 * path because the conversion might not have done so (e.g., if the
4231	 * extent count hasn't changed). We need to make sure the inode is dirty
4232	 * in the transaction for the sake of fsync(), even if nothing has
4233	 * changed, because fsync() will not force the log for this transaction
4234	 * unless it sees the inode pinned.
4235	 *
4236	 * Note: If we're only converting cow fork extents, there aren't
4237	 * any on-disk updates to make, so we don't need to log anything.
4238	 */
4239	if (whichfork != XFS_COW_FORK)
4240		bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4241	if (error)
4242		return error;
4243
4244	/*
4245	 * Update our extent pointer, given that
4246	 * xfs_bmap_add_extent_unwritten_real might have merged it into one
4247	 * of the neighbouring ones.
4248	 */
4249	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4250
4251	/*
4252	 * We may have combined previously unwritten space with written space,
4253	 * so generate another request.
4254	 */
4255	if (mval->br_blockcount < len)
4256		return -EAGAIN;
4257	return 0;
4258}
4259
4260static inline xfs_extlen_t
4261xfs_bmapi_minleft(
4262	struct xfs_trans	*tp,
4263	struct xfs_inode	*ip,
4264	int			fork)
4265{
4266	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, fork);
4267
4268	if (tp && tp->t_firstblock != NULLFSBLOCK)
4269		return 0;
4270	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
4271		return 1;
4272	return be16_to_cpu(ifp->if_broot->bb_level) + 1;
4273}
4274
4275/*
4276 * Log whatever the flags say, even if error.  Otherwise we might miss detecting
4277 * a case where the data is changed, there's an error, and it's not logged so we
4278 * don't shutdown when we should.  Don't bother logging extents/btree changes if
4279 * we converted to the other format.
4280 */
4281static void
4282xfs_bmapi_finish(
4283	struct xfs_bmalloca	*bma,
4284	int			whichfork,
4285	int			error)
4286{
4287	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4288
4289	if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
4290	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
4291		bma->logflags &= ~xfs_ilog_fext(whichfork);
4292	else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
4293		 ifp->if_format != XFS_DINODE_FMT_BTREE)
4294		bma->logflags &= ~xfs_ilog_fbroot(whichfork);
4295
4296	if (bma->logflags)
4297		xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
4298	if (bma->cur)
4299		xfs_btree_del_cursor(bma->cur, error);
4300}
4301
4302/*
4303 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4304 * extent state if necessary.  Details behaviour is controlled by the flags
4305 * parameter.  Only allocates blocks from a single allocation group, to avoid
4306 * locking problems.
4307 */
4308int
4309xfs_bmapi_write(
4310	struct xfs_trans	*tp,		/* transaction pointer */
4311	struct xfs_inode	*ip,		/* incore inode */
4312	xfs_fileoff_t		bno,		/* starting file offs. mapped */
4313	xfs_filblks_t		len,		/* length to map in file */
4314	int			flags,		/* XFS_BMAPI_... */
4315	xfs_extlen_t		total,		/* total blocks needed */
4316	struct xfs_bmbt_irec	*mval,		/* output: map values */
4317	int			*nmap)		/* i/o: mval size/count */
4318{
4319	struct xfs_bmalloca	bma = {
4320		.tp		= tp,
4321		.ip		= ip,
4322		.total		= total,
4323	};
4324	struct xfs_mount	*mp = ip->i_mount;
4325	int			whichfork = xfs_bmapi_whichfork(flags);
4326	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
4327	xfs_fileoff_t		end;		/* end of mapped file region */
4328	bool			eof = false;	/* after the end of extents */
4329	int			error;		/* error return */
4330	int			n;		/* current extent index */
4331	xfs_fileoff_t		obno;		/* old block number (offset) */
4332
4333#ifdef DEBUG
4334	xfs_fileoff_t		orig_bno;	/* original block number value */
4335	int			orig_flags;	/* original flags arg value */
4336	xfs_filblks_t		orig_len;	/* original value of len arg */
4337	struct xfs_bmbt_irec	*orig_mval;	/* original value of mval */
4338	int			orig_nmap;	/* original value of *nmap */
4339
4340	orig_bno = bno;
4341	orig_len = len;
4342	orig_flags = flags;
4343	orig_mval = mval;
4344	orig_nmap = *nmap;
4345#endif
4346
4347	ASSERT(*nmap >= 1);
4348	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4349	ASSERT(tp != NULL);
4350	ASSERT(len > 0);
4351	ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
4352	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4353	ASSERT(!(flags & XFS_BMAPI_REMAP));
4354
4355	/* zeroing is for currently only for data extents, not metadata */
4356	ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4357			(XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4358	/*
4359	 * we can allocate unwritten extents or pre-zero allocated blocks,
4360	 * but it makes no sense to do both at once. This would result in
4361	 * zeroing the unwritten extent twice, but it still being an
4362	 * unwritten extent....
4363	 */
4364	ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4365			(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4366
4367	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4368	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4369		return -EFSCORRUPTED;
4370	}
4371
4372	if (XFS_FORCED_SHUTDOWN(mp))
4373		return -EIO;
4374
4375	XFS_STATS_INC(mp, xs_blk_mapw);
4376
4377	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4378		error = xfs_iread_extents(tp, ip, whichfork);
4379		if (error)
4380			goto error0;
4381	}
4382
4383	if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4384		eof = true;
4385	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4386		bma.prev.br_startoff = NULLFILEOFF;
4387	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4388
4389	n = 0;
4390	end = bno + len;
4391	obno = bno;
4392	while (bno < end && n < *nmap) {
4393		bool			need_alloc = false, wasdelay = false;
4394
4395		/* in hole or beyond EOF? */
4396		if (eof || bma.got.br_startoff > bno) {
4397			/*
4398			 * CoW fork conversions should /never/ hit EOF or
4399			 * holes.  There should always be something for us
4400			 * to work on.
4401			 */
4402			ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
4403			         (flags & XFS_BMAPI_COWFORK)));
4404
4405			need_alloc = true;
4406		} else if (isnullstartblock(bma.got.br_startblock)) {
4407			wasdelay = true;
4408		}
4409
4410		/*
4411		 * First, deal with the hole before the allocated space
4412		 * that we found, if any.
4413		 */
4414		if (need_alloc || wasdelay) {
4415			bma.eof = eof;
4416			bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4417			bma.wasdel = wasdelay;
4418			bma.offset = bno;
4419			bma.flags = flags;
4420
4421			/*
4422			 * There's a 32/64 bit type mismatch between the
4423			 * allocation length request (which can be 64 bits in
4424			 * length) and the bma length request, which is
4425			 * xfs_extlen_t and therefore 32 bits. Hence we have to
4426			 * check for 32-bit overflows and handle them here.
4427			 */
4428			if (len > (xfs_filblks_t)MAXEXTLEN)
4429				bma.length = MAXEXTLEN;
4430			else
4431				bma.length = len;
4432
4433			ASSERT(len > 0);
4434			ASSERT(bma.length > 0);
4435			error = xfs_bmapi_allocate(&bma);
4436			if (error)
4437				goto error0;
4438			if (bma.blkno == NULLFSBLOCK)
4439				break;
4440
4441			/*
4442			 * If this is a CoW allocation, record the data in
4443			 * the refcount btree for orphan recovery.
4444			 */
4445			if (whichfork == XFS_COW_FORK)
4446				xfs_refcount_alloc_cow_extent(tp, bma.blkno,
4447						bma.length);
4448		}
4449
4450		/* Deal with the allocated space we found.  */
4451		xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4452							end, n, flags);
4453
4454		/* Execute unwritten extent conversion if necessary */
4455		error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4456		if (error == -EAGAIN)
4457			continue;
4458		if (error)
4459			goto error0;
4460
4461		/* update the extent map to return */
4462		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4463
4464		/*
4465		 * If we're done, stop now.  Stop when we've allocated
4466		 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4467		 * the transaction may get too big.
4468		 */
4469		if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4470			break;
4471
4472		/* Else go on to the next record. */
4473		bma.prev = bma.got;
4474		if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4475			eof = true;
4476	}
4477	*nmap = n;
4478
4479	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4480			whichfork);
4481	if (error)
4482		goto error0;
4483
4484	ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
4485	       ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
4486	xfs_bmapi_finish(&bma, whichfork, 0);
4487	xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4488		orig_nmap, *nmap);
4489	return 0;
4490error0:
4491	xfs_bmapi_finish(&bma, whichfork, error);
4492	return error;
4493}
4494
4495/*
4496 * Convert an existing delalloc extent to real blocks based on file offset. This
4497 * attempts to allocate the entire delalloc extent and may require multiple
4498 * invocations to allocate the target offset if a large enough physical extent
4499 * is not available.
4500 */
4501int
4502xfs_bmapi_convert_delalloc(
4503	struct xfs_inode	*ip,
4504	int			whichfork,
4505	xfs_off_t		offset,
4506	struct iomap		*iomap,
4507	unsigned int		*seq)
4508{
4509	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
4510	struct xfs_mount	*mp = ip->i_mount;
4511	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
4512	struct xfs_bmalloca	bma = { NULL };
4513	uint16_t		flags = 0;
4514	struct xfs_trans	*tp;
4515	int			error;
4516
4517	if (whichfork == XFS_COW_FORK)
4518		flags |= IOMAP_F_SHARED;
4519
4520	/*
4521	 * Space for the extent and indirect blocks was reserved when the
4522	 * delalloc extent was created so there's no need to do so here.
4523	 */
4524	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
4525				XFS_TRANS_RESERVE, &tp);
4526	if (error)
4527		return error;
4528
4529	xfs_ilock(ip, XFS_ILOCK_EXCL);
4530	xfs_trans_ijoin(tp, ip, 0);
4531
 
 
 
 
 
 
 
 
4532	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
4533	    bma.got.br_startoff > offset_fsb) {
4534		/*
4535		 * No extent found in the range we are trying to convert.  This
4536		 * should only happen for the COW fork, where another thread
4537		 * might have moved the extent to the data fork in the meantime.
4538		 */
4539		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
4540		error = -EAGAIN;
4541		goto out_trans_cancel;
4542	}
4543
4544	/*
4545	 * If we find a real extent here we raced with another thread converting
4546	 * the extent.  Just return the real extent at this offset.
4547	 */
4548	if (!isnullstartblock(bma.got.br_startblock)) {
4549		xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
 
4550		*seq = READ_ONCE(ifp->if_seq);
4551		goto out_trans_cancel;
4552	}
4553
4554	bma.tp = tp;
4555	bma.ip = ip;
4556	bma.wasdel = true;
4557	bma.offset = bma.got.br_startoff;
4558	bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
 
4559	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4560
4561	/*
4562	 * When we're converting the delalloc reservations backing dirty pages
4563	 * in the page cache, we must be careful about how we create the new
4564	 * extents:
4565	 *
4566	 * New CoW fork extents are created unwritten, turned into real extents
4567	 * when we're about to write the data to disk, and mapped into the data
4568	 * fork after the write finishes.  End of story.
4569	 *
4570	 * New data fork extents must be mapped in as unwritten and converted
4571	 * to real extents after the write succeeds to avoid exposing stale
4572	 * disk contents if we crash.
4573	 */
4574	bma.flags = XFS_BMAPI_PREALLOC;
4575	if (whichfork == XFS_COW_FORK)
4576		bma.flags |= XFS_BMAPI_COWFORK;
4577
4578	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4579		bma.prev.br_startoff = NULLFILEOFF;
4580
4581	error = xfs_bmapi_allocate(&bma);
4582	if (error)
4583		goto out_finish;
4584
4585	error = -ENOSPC;
4586	if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
4587		goto out_finish;
4588	error = -EFSCORRUPTED;
4589	if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
4590		goto out_finish;
4591
4592	XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
4593	XFS_STATS_INC(mp, xs_xstrat_quick);
4594
4595	ASSERT(!isnullstartblock(bma.got.br_startblock));
4596	xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
 
4597	*seq = READ_ONCE(ifp->if_seq);
4598
4599	if (whichfork == XFS_COW_FORK)
4600		xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
4601
4602	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4603			whichfork);
4604	if (error)
4605		goto out_finish;
4606
4607	xfs_bmapi_finish(&bma, whichfork, 0);
4608	error = xfs_trans_commit(tp);
4609	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4610	return error;
4611
4612out_finish:
4613	xfs_bmapi_finish(&bma, whichfork, error);
4614out_trans_cancel:
4615	xfs_trans_cancel(tp);
4616	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4617	return error;
4618}
4619
4620int
4621xfs_bmapi_remap(
4622	struct xfs_trans	*tp,
4623	struct xfs_inode	*ip,
4624	xfs_fileoff_t		bno,
4625	xfs_filblks_t		len,
4626	xfs_fsblock_t		startblock,
4627	int			flags)
4628{
4629	struct xfs_mount	*mp = ip->i_mount;
4630	struct xfs_ifork	*ifp;
4631	struct xfs_btree_cur	*cur = NULL;
4632	struct xfs_bmbt_irec	got;
4633	struct xfs_iext_cursor	icur;
4634	int			whichfork = xfs_bmapi_whichfork(flags);
4635	int			logflags = 0, error;
4636
4637	ifp = XFS_IFORK_PTR(ip, whichfork);
4638	ASSERT(len > 0);
4639	ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
4640	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4641	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
4642			   XFS_BMAPI_NORMAP)));
4643	ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
4644			(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
4645
4646	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4647	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4648		return -EFSCORRUPTED;
4649	}
4650
4651	if (XFS_FORCED_SHUTDOWN(mp))
4652		return -EIO;
4653
4654	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4655		error = xfs_iread_extents(tp, ip, whichfork);
4656		if (error)
4657			return error;
4658	}
4659
4660	if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4661		/* make sure we only reflink into a hole. */
4662		ASSERT(got.br_startoff > bno);
4663		ASSERT(got.br_startoff - bno >= len);
4664	}
4665
4666	ip->i_d.di_nblocks += len;
4667	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4668
4669	if (ifp->if_flags & XFS_IFBROOT) {
4670		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
4671		cur->bc_ino.flags = 0;
4672	}
4673
4674	got.br_startoff = bno;
4675	got.br_startblock = startblock;
4676	got.br_blockcount = len;
4677	if (flags & XFS_BMAPI_PREALLOC)
4678		got.br_state = XFS_EXT_UNWRITTEN;
4679	else
4680		got.br_state = XFS_EXT_NORM;
4681
4682	error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
4683			&cur, &got, &logflags, flags);
4684	if (error)
4685		goto error0;
4686
4687	error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
4688
4689error0:
4690	if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
4691		logflags &= ~XFS_ILOG_DEXT;
4692	else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
4693		logflags &= ~XFS_ILOG_DBROOT;
4694
4695	if (logflags)
4696		xfs_trans_log_inode(tp, ip, logflags);
4697	if (cur)
4698		xfs_btree_del_cursor(cur, error);
4699	return error;
4700}
4701
4702/*
4703 * When a delalloc extent is split (e.g., due to a hole punch), the original
4704 * indlen reservation must be shared across the two new extents that are left
4705 * behind.
4706 *
4707 * Given the original reservation and the worst case indlen for the two new
4708 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4709 * reservation fairly across the two new extents. If necessary, steal available
4710 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4711 * ores == 1). The number of stolen blocks is returned. The availability and
4712 * subsequent accounting of stolen blocks is the responsibility of the caller.
4713 */
4714static xfs_filblks_t
4715xfs_bmap_split_indlen(
4716	xfs_filblks_t			ores,		/* original res. */
4717	xfs_filblks_t			*indlen1,	/* ext1 worst indlen */
4718	xfs_filblks_t			*indlen2,	/* ext2 worst indlen */
4719	xfs_filblks_t			avail)		/* stealable blocks */
4720{
4721	xfs_filblks_t			len1 = *indlen1;
4722	xfs_filblks_t			len2 = *indlen2;
4723	xfs_filblks_t			nres = len1 + len2; /* new total res. */
4724	xfs_filblks_t			stolen = 0;
4725	xfs_filblks_t			resfactor;
4726
4727	/*
4728	 * Steal as many blocks as we can to try and satisfy the worst case
4729	 * indlen for both new extents.
4730	 */
4731	if (ores < nres && avail)
4732		stolen = XFS_FILBLKS_MIN(nres - ores, avail);
4733	ores += stolen;
4734
4735	 /* nothing else to do if we've satisfied the new reservation */
4736	if (ores >= nres)
4737		return stolen;
4738
4739	/*
4740	 * We can't meet the total required reservation for the two extents.
4741	 * Calculate the percent of the overall shortage between both extents
4742	 * and apply this percentage to each of the requested indlen values.
4743	 * This distributes the shortage fairly and reduces the chances that one
4744	 * of the two extents is left with nothing when extents are repeatedly
4745	 * split.
4746	 */
4747	resfactor = (ores * 100);
4748	do_div(resfactor, nres);
4749	len1 *= resfactor;
4750	do_div(len1, 100);
4751	len2 *= resfactor;
4752	do_div(len2, 100);
4753	ASSERT(len1 + len2 <= ores);
4754	ASSERT(len1 < *indlen1 && len2 < *indlen2);
4755
4756	/*
4757	 * Hand out the remainder to each extent. If one of the two reservations
4758	 * is zero, we want to make sure that one gets a block first. The loop
4759	 * below starts with len1, so hand len2 a block right off the bat if it
4760	 * is zero.
4761	 */
4762	ores -= (len1 + len2);
4763	ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4764	if (ores && !len2 && *indlen2) {
4765		len2++;
4766		ores--;
4767	}
4768	while (ores) {
4769		if (len1 < *indlen1) {
4770			len1++;
4771			ores--;
4772		}
4773		if (!ores)
4774			break;
4775		if (len2 < *indlen2) {
4776			len2++;
4777			ores--;
4778		}
4779	}
4780
4781	*indlen1 = len1;
4782	*indlen2 = len2;
4783
4784	return stolen;
4785}
4786
4787int
4788xfs_bmap_del_extent_delay(
4789	struct xfs_inode	*ip,
4790	int			whichfork,
4791	struct xfs_iext_cursor	*icur,
4792	struct xfs_bmbt_irec	*got,
4793	struct xfs_bmbt_irec	*del)
4794{
4795	struct xfs_mount	*mp = ip->i_mount;
4796	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
4797	struct xfs_bmbt_irec	new;
4798	int64_t			da_old, da_new, da_diff = 0;
4799	xfs_fileoff_t		del_endoff, got_endoff;
4800	xfs_filblks_t		got_indlen, new_indlen, stolen;
4801	int			state = xfs_bmap_fork_to_state(whichfork);
4802	int			error = 0;
4803	bool			isrt;
4804
4805	XFS_STATS_INC(mp, xs_del_exlist);
4806
4807	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4808	del_endoff = del->br_startoff + del->br_blockcount;
4809	got_endoff = got->br_startoff + got->br_blockcount;
4810	da_old = startblockval(got->br_startblock);
4811	da_new = 0;
4812
4813	ASSERT(del->br_blockcount > 0);
4814	ASSERT(got->br_startoff <= del->br_startoff);
4815	ASSERT(got_endoff >= del_endoff);
4816
4817	if (isrt) {
4818		uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
4819
4820		do_div(rtexts, mp->m_sb.sb_rextsize);
4821		xfs_mod_frextents(mp, rtexts);
4822	}
4823
4824	/*
4825	 * Update the inode delalloc counter now and wait to update the
4826	 * sb counters as we might have to borrow some blocks for the
4827	 * indirect block accounting.
4828	 */
4829	error = xfs_trans_reserve_quota_nblks(NULL, ip,
4830			-((long)del->br_blockcount), 0,
4831			isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4832	if (error)
4833		return error;
4834	ip->i_delayed_blks -= del->br_blockcount;
4835
4836	if (got->br_startoff == del->br_startoff)
4837		state |= BMAP_LEFT_FILLING;
4838	if (got_endoff == del_endoff)
4839		state |= BMAP_RIGHT_FILLING;
4840
4841	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4842	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4843		/*
4844		 * Matches the whole extent.  Delete the entry.
4845		 */
4846		xfs_iext_remove(ip, icur, state);
4847		xfs_iext_prev(ifp, icur);
4848		break;
4849	case BMAP_LEFT_FILLING:
4850		/*
4851		 * Deleting the first part of the extent.
4852		 */
4853		got->br_startoff = del_endoff;
4854		got->br_blockcount -= del->br_blockcount;
4855		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4856				got->br_blockcount), da_old);
4857		got->br_startblock = nullstartblock((int)da_new);
4858		xfs_iext_update_extent(ip, state, icur, got);
4859		break;
4860	case BMAP_RIGHT_FILLING:
4861		/*
4862		 * Deleting the last part of the extent.
4863		 */
4864		got->br_blockcount = got->br_blockcount - del->br_blockcount;
4865		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4866				got->br_blockcount), da_old);
4867		got->br_startblock = nullstartblock((int)da_new);
4868		xfs_iext_update_extent(ip, state, icur, got);
4869		break;
4870	case 0:
4871		/*
4872		 * Deleting the middle of the extent.
4873		 *
4874		 * Distribute the original indlen reservation across the two new
4875		 * extents.  Steal blocks from the deleted extent if necessary.
4876		 * Stealing blocks simply fudges the fdblocks accounting below.
4877		 * Warn if either of the new indlen reservations is zero as this
4878		 * can lead to delalloc problems.
4879		 */
4880		got->br_blockcount = del->br_startoff - got->br_startoff;
4881		got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4882
4883		new.br_blockcount = got_endoff - del_endoff;
4884		new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4885
4886		WARN_ON_ONCE(!got_indlen || !new_indlen);
4887		stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4888						       del->br_blockcount);
4889
4890		got->br_startblock = nullstartblock((int)got_indlen);
4891
4892		new.br_startoff = del_endoff;
4893		new.br_state = got->br_state;
4894		new.br_startblock = nullstartblock((int)new_indlen);
4895
4896		xfs_iext_update_extent(ip, state, icur, got);
4897		xfs_iext_next(ifp, icur);
4898		xfs_iext_insert(ip, icur, &new, state);
4899
4900		da_new = got_indlen + new_indlen - stolen;
4901		del->br_blockcount -= stolen;
4902		break;
4903	}
4904
4905	ASSERT(da_old >= da_new);
4906	da_diff = da_old - da_new;
4907	if (!isrt)
4908		da_diff += del->br_blockcount;
4909	if (da_diff) {
4910		xfs_mod_fdblocks(mp, da_diff, false);
4911		xfs_mod_delalloc(mp, -da_diff);
4912	}
4913	return error;
4914}
4915
4916void
4917xfs_bmap_del_extent_cow(
4918	struct xfs_inode	*ip,
4919	struct xfs_iext_cursor	*icur,
4920	struct xfs_bmbt_irec	*got,
4921	struct xfs_bmbt_irec	*del)
4922{
4923	struct xfs_mount	*mp = ip->i_mount;
4924	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
4925	struct xfs_bmbt_irec	new;
4926	xfs_fileoff_t		del_endoff, got_endoff;
4927	int			state = BMAP_COWFORK;
4928
4929	XFS_STATS_INC(mp, xs_del_exlist);
4930
4931	del_endoff = del->br_startoff + del->br_blockcount;
4932	got_endoff = got->br_startoff + got->br_blockcount;
4933
4934	ASSERT(del->br_blockcount > 0);
4935	ASSERT(got->br_startoff <= del->br_startoff);
4936	ASSERT(got_endoff >= del_endoff);
4937	ASSERT(!isnullstartblock(got->br_startblock));
4938
4939	if (got->br_startoff == del->br_startoff)
4940		state |= BMAP_LEFT_FILLING;
4941	if (got_endoff == del_endoff)
4942		state |= BMAP_RIGHT_FILLING;
4943
4944	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4945	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4946		/*
4947		 * Matches the whole extent.  Delete the entry.
4948		 */
4949		xfs_iext_remove(ip, icur, state);
4950		xfs_iext_prev(ifp, icur);
4951		break;
4952	case BMAP_LEFT_FILLING:
4953		/*
4954		 * Deleting the first part of the extent.
4955		 */
4956		got->br_startoff = del_endoff;
4957		got->br_blockcount -= del->br_blockcount;
4958		got->br_startblock = del->br_startblock + del->br_blockcount;
4959		xfs_iext_update_extent(ip, state, icur, got);
4960		break;
4961	case BMAP_RIGHT_FILLING:
4962		/*
4963		 * Deleting the last part of the extent.
4964		 */
4965		got->br_blockcount -= del->br_blockcount;
4966		xfs_iext_update_extent(ip, state, icur, got);
4967		break;
4968	case 0:
4969		/*
4970		 * Deleting the middle of the extent.
4971		 */
4972		got->br_blockcount = del->br_startoff - got->br_startoff;
4973
4974		new.br_startoff = del_endoff;
4975		new.br_blockcount = got_endoff - del_endoff;
4976		new.br_state = got->br_state;
4977		new.br_startblock = del->br_startblock + del->br_blockcount;
4978
4979		xfs_iext_update_extent(ip, state, icur, got);
4980		xfs_iext_next(ifp, icur);
4981		xfs_iext_insert(ip, icur, &new, state);
4982		break;
4983	}
4984	ip->i_delayed_blks -= del->br_blockcount;
4985}
4986
4987/*
4988 * Called by xfs_bmapi to update file extent records and the btree
4989 * after removing space.
4990 */
4991STATIC int				/* error */
4992xfs_bmap_del_extent_real(
4993	xfs_inode_t		*ip,	/* incore inode pointer */
4994	xfs_trans_t		*tp,	/* current transaction pointer */
4995	struct xfs_iext_cursor	*icur,
4996	xfs_btree_cur_t		*cur,	/* if null, not a btree */
4997	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
4998	int			*logflagsp, /* inode logging flags */
4999	int			whichfork, /* data or attr fork */
5000	int			bflags)	/* bmapi flags */
5001{
5002	xfs_fsblock_t		del_endblock=0;	/* first block past del */
5003	xfs_fileoff_t		del_endoff;	/* first offset past del */
5004	int			do_fx;	/* free extent at end of routine */
5005	int			error;	/* error return value */
5006	int			flags = 0;/* inode logging flags */
5007	struct xfs_bmbt_irec	got;	/* current extent entry */
5008	xfs_fileoff_t		got_endoff;	/* first offset past got */
5009	int			i;	/* temp state */
5010	struct xfs_ifork	*ifp;	/* inode fork pointer */
5011	xfs_mount_t		*mp;	/* mount structure */
5012	xfs_filblks_t		nblks;	/* quota/sb block count */
5013	xfs_bmbt_irec_t		new;	/* new record to be inserted */
5014	/* REFERENCED */
5015	uint			qfield;	/* quota field to update */
5016	int			state = xfs_bmap_fork_to_state(whichfork);
5017	struct xfs_bmbt_irec	old;
5018
 
 
5019	mp = ip->i_mount;
5020	XFS_STATS_INC(mp, xs_del_exlist);
5021
5022	ifp = XFS_IFORK_PTR(ip, whichfork);
5023	ASSERT(del->br_blockcount > 0);
5024	xfs_iext_get_extent(ifp, icur, &got);
5025	ASSERT(got.br_startoff <= del->br_startoff);
5026	del_endoff = del->br_startoff + del->br_blockcount;
5027	got_endoff = got.br_startoff + got.br_blockcount;
5028	ASSERT(got_endoff >= del_endoff);
5029	ASSERT(!isnullstartblock(got.br_startblock));
5030	qfield = 0;
5031	error = 0;
5032
5033	/*
5034	 * If it's the case where the directory code is running with no block
5035	 * reservation, and the deleted block is in the middle of its extent,
5036	 * and the resulting insert of an extent would cause transformation to
5037	 * btree format, then reject it.  The calling code will then swap blocks
5038	 * around instead.  We have to do this now, rather than waiting for the
5039	 * conversion to btree format, since the transaction will be dirty then.
5040	 */
5041	if (tp->t_blk_res == 0 &&
5042	    ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
5043	    ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
5044	    del->br_startoff > got.br_startoff && del_endoff < got_endoff)
5045		return -ENOSPC;
5046
5047	flags = XFS_ILOG_CORE;
5048	if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
5049		xfs_fsblock_t	bno;
5050		xfs_filblks_t	len;
5051		xfs_extlen_t	mod;
5052
5053		bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize,
5054				  &mod);
5055		ASSERT(mod == 0);
5056		len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
5057				  &mod);
5058		ASSERT(mod == 0);
5059
5060		error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
5061		if (error)
5062			goto done;
5063		do_fx = 0;
5064		nblks = len * mp->m_sb.sb_rextsize;
5065		qfield = XFS_TRANS_DQ_RTBCOUNT;
5066	} else {
5067		do_fx = 1;
5068		nblks = del->br_blockcount;
5069		qfield = XFS_TRANS_DQ_BCOUNT;
5070	}
 
5071
5072	del_endblock = del->br_startblock + del->br_blockcount;
5073	if (cur) {
5074		error = xfs_bmbt_lookup_eq(cur, &got, &i);
5075		if (error)
5076			goto done;
5077		if (XFS_IS_CORRUPT(mp, i != 1)) {
5078			error = -EFSCORRUPTED;
5079			goto done;
5080		}
5081	}
5082
5083	if (got.br_startoff == del->br_startoff)
5084		state |= BMAP_LEFT_FILLING;
5085	if (got_endoff == del_endoff)
5086		state |= BMAP_RIGHT_FILLING;
5087
5088	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5089	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5090		/*
5091		 * Matches the whole extent.  Delete the entry.
5092		 */
5093		xfs_iext_remove(ip, icur, state);
5094		xfs_iext_prev(ifp, icur);
5095		ifp->if_nextents--;
5096
5097		flags |= XFS_ILOG_CORE;
5098		if (!cur) {
5099			flags |= xfs_ilog_fext(whichfork);
5100			break;
5101		}
5102		if ((error = xfs_btree_delete(cur, &i)))
5103			goto done;
5104		if (XFS_IS_CORRUPT(mp, i != 1)) {
5105			error = -EFSCORRUPTED;
5106			goto done;
5107		}
5108		break;
5109	case BMAP_LEFT_FILLING:
5110		/*
5111		 * Deleting the first part of the extent.
5112		 */
5113		got.br_startoff = del_endoff;
5114		got.br_startblock = del_endblock;
5115		got.br_blockcount -= del->br_blockcount;
5116		xfs_iext_update_extent(ip, state, icur, &got);
5117		if (!cur) {
5118			flags |= xfs_ilog_fext(whichfork);
5119			break;
5120		}
5121		error = xfs_bmbt_update(cur, &got);
5122		if (error)
5123			goto done;
5124		break;
5125	case BMAP_RIGHT_FILLING:
5126		/*
5127		 * Deleting the last part of the extent.
5128		 */
5129		got.br_blockcount -= del->br_blockcount;
5130		xfs_iext_update_extent(ip, state, icur, &got);
5131		if (!cur) {
5132			flags |= xfs_ilog_fext(whichfork);
5133			break;
5134		}
5135		error = xfs_bmbt_update(cur, &got);
5136		if (error)
5137			goto done;
5138		break;
5139	case 0:
5140		/*
5141		 * Deleting the middle of the extent.
5142		 */
 
5143		old = got;
5144
5145		got.br_blockcount = del->br_startoff - got.br_startoff;
5146		xfs_iext_update_extent(ip, state, icur, &got);
5147
5148		new.br_startoff = del_endoff;
5149		new.br_blockcount = got_endoff - del_endoff;
5150		new.br_state = got.br_state;
5151		new.br_startblock = del_endblock;
5152
5153		flags |= XFS_ILOG_CORE;
5154		if (cur) {
5155			error = xfs_bmbt_update(cur, &got);
5156			if (error)
5157				goto done;
5158			error = xfs_btree_increment(cur, 0, &i);
5159			if (error)
5160				goto done;
5161			cur->bc_rec.b = new;
5162			error = xfs_btree_insert(cur, &i);
5163			if (error && error != -ENOSPC)
5164				goto done;
5165			/*
5166			 * If get no-space back from btree insert, it tried a
5167			 * split, and we have a zero block reservation.  Fix up
5168			 * our state and return the error.
5169			 */
5170			if (error == -ENOSPC) {
5171				/*
5172				 * Reset the cursor, don't trust it after any
5173				 * insert operation.
5174				 */
5175				error = xfs_bmbt_lookup_eq(cur, &got, &i);
5176				if (error)
5177					goto done;
5178				if (XFS_IS_CORRUPT(mp, i != 1)) {
5179					error = -EFSCORRUPTED;
5180					goto done;
5181				}
5182				/*
5183				 * Update the btree record back
5184				 * to the original value.
5185				 */
5186				error = xfs_bmbt_update(cur, &old);
5187				if (error)
5188					goto done;
5189				/*
5190				 * Reset the extent record back
5191				 * to the original value.
5192				 */
5193				xfs_iext_update_extent(ip, state, icur, &old);
5194				flags = 0;
5195				error = -ENOSPC;
5196				goto done;
5197			}
5198			if (XFS_IS_CORRUPT(mp, i != 1)) {
5199				error = -EFSCORRUPTED;
5200				goto done;
5201			}
 
 
5202		} else
5203			flags |= xfs_ilog_fext(whichfork);
5204
5205		ifp->if_nextents++;
5206		xfs_iext_next(ifp, icur);
5207		xfs_iext_insert(ip, icur, &new, state);
5208		break;
5209	}
5210
5211	/* remove reverse mapping */
5212	xfs_rmap_unmap_extent(tp, ip, whichfork, del);
5213
5214	/*
5215	 * If we need to, add to list of extents to delete.
5216	 */
5217	if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5218		if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5219			xfs_refcount_decrease_extent(tp, del);
5220		} else {
5221			__xfs_bmap_add_free(tp, del->br_startblock,
5222					del->br_blockcount, NULL,
5223					(bflags & XFS_BMAPI_NODISCARD) ||
5224					del->br_state == XFS_EXT_UNWRITTEN);
 
 
 
5225		}
5226	}
5227
5228	/*
5229	 * Adjust inode # blocks in the file.
5230	 */
5231	if (nblks)
5232		ip->i_d.di_nblocks -= nblks;
5233	/*
5234	 * Adjust quota data.
5235	 */
5236	if (qfield && !(bflags & XFS_BMAPI_REMAP))
5237		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5238
5239done:
5240	*logflagsp = flags;
5241	return error;
5242}
5243
5244/*
5245 * Unmap (remove) blocks from a file.
5246 * If nexts is nonzero then the number of extents to remove is limited to
5247 * that value.  If not all extents in the block range can be removed then
5248 * *done is set.
5249 */
5250int						/* error */
5251__xfs_bunmapi(
5252	struct xfs_trans	*tp,		/* transaction pointer */
5253	struct xfs_inode	*ip,		/* incore inode */
5254	xfs_fileoff_t		start,		/* first file offset deleted */
5255	xfs_filblks_t		*rlen,		/* i/o: amount remaining */
5256	int			flags,		/* misc flags */
5257	xfs_extnum_t		nexts)		/* number of extents max */
5258{
5259	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
5260	struct xfs_bmbt_irec	del;		/* extent being deleted */
5261	int			error;		/* error return value */
5262	xfs_extnum_t		extno;		/* extent number in list */
5263	struct xfs_bmbt_irec	got;		/* current extent record */
5264	struct xfs_ifork	*ifp;		/* inode fork pointer */
5265	int			isrt;		/* freeing in rt area */
5266	int			logflags;	/* transaction logging flags */
5267	xfs_extlen_t		mod;		/* rt extent offset */
5268	struct xfs_mount	*mp = ip->i_mount;
5269	int			tmp_logflags;	/* partial logging flags */
5270	int			wasdel;		/* was a delayed alloc extent */
5271	int			whichfork;	/* data or attribute fork */
5272	xfs_fsblock_t		sum;
5273	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
5274	xfs_fileoff_t		max_len;
5275	xfs_agnumber_t		prev_agno = NULLAGNUMBER, agno;
5276	xfs_fileoff_t		end;
5277	struct xfs_iext_cursor	icur;
5278	bool			done = false;
5279
5280	trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5281
5282	whichfork = xfs_bmapi_whichfork(flags);
5283	ASSERT(whichfork != XFS_COW_FORK);
5284	ifp = XFS_IFORK_PTR(ip, whichfork);
5285	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)))
5286		return -EFSCORRUPTED;
5287	if (XFS_FORCED_SHUTDOWN(mp))
5288		return -EIO;
5289
5290	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5291	ASSERT(len > 0);
5292	ASSERT(nexts >= 0);
5293
5294	/*
5295	 * Guesstimate how many blocks we can unmap without running the risk of
5296	 * blowing out the transaction with a mix of EFIs and reflink
5297	 * adjustments.
5298	 */
5299	if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5300		max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5301	else
5302		max_len = len;
5303
5304	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5305	    (error = xfs_iread_extents(tp, ip, whichfork)))
5306		return error;
5307	if (xfs_iext_count(ifp) == 0) {
5308		*rlen = 0;
5309		return 0;
5310	}
5311	XFS_STATS_INC(mp, xs_blk_unmap);
5312	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5313	end = start + len;
5314
5315	if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5316		*rlen = 0;
5317		return 0;
5318	}
5319	end--;
5320
5321	logflags = 0;
5322	if (ifp->if_flags & XFS_IFBROOT) {
5323		ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
5324		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5325		cur->bc_ino.flags = 0;
5326	} else
5327		cur = NULL;
5328
5329	if (isrt) {
5330		/*
5331		 * Synchronize by locking the bitmap inode.
5332		 */
5333		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5334		xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5335		xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5336		xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5337	}
5338
5339	extno = 0;
5340	while (end != (xfs_fileoff_t)-1 && end >= start &&
5341	       (nexts == 0 || extno < nexts) && max_len > 0) {
5342		/*
5343		 * Is the found extent after a hole in which end lives?
5344		 * Just back up to the previous extent, if so.
5345		 */
5346		if (got.br_startoff > end &&
5347		    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5348			done = true;
5349			break;
5350		}
5351		/*
5352		 * Is the last block of this extent before the range
5353		 * we're supposed to delete?  If so, we're done.
5354		 */
5355		end = XFS_FILEOFF_MIN(end,
5356			got.br_startoff + got.br_blockcount - 1);
5357		if (end < start)
5358			break;
5359		/*
5360		 * Then deal with the (possibly delayed) allocated space
5361		 * we found.
5362		 */
5363		del = got;
5364		wasdel = isnullstartblock(del.br_startblock);
5365
5366		/*
5367		 * Make sure we don't touch multiple AGF headers out of order
5368		 * in a single transaction, as that could cause AB-BA deadlocks.
5369		 */
5370		if (!wasdel && !isrt) {
5371			agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
5372			if (prev_agno != NULLAGNUMBER && prev_agno > agno)
5373				break;
5374			prev_agno = agno;
5375		}
5376		if (got.br_startoff < start) {
5377			del.br_startoff = start;
5378			del.br_blockcount -= start - got.br_startoff;
5379			if (!wasdel)
5380				del.br_startblock += start - got.br_startoff;
5381		}
5382		if (del.br_startoff + del.br_blockcount > end + 1)
5383			del.br_blockcount = end + 1 - del.br_startoff;
5384
5385		/* How much can we safely unmap? */
5386		if (max_len < del.br_blockcount) {
5387			del.br_startoff += del.br_blockcount - max_len;
5388			if (!wasdel)
5389				del.br_startblock += del.br_blockcount - max_len;
5390			del.br_blockcount = max_len;
5391		}
5392
5393		if (!isrt)
5394			goto delete;
5395
5396		sum = del.br_startblock + del.br_blockcount;
5397		div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod);
5398		if (mod) {
5399			/*
5400			 * Realtime extent not lined up at the end.
5401			 * The extent could have been split into written
5402			 * and unwritten pieces, or we could just be
5403			 * unmapping part of it.  But we can't really
5404			 * get rid of part of a realtime extent.
5405			 */
5406			if (del.br_state == XFS_EXT_UNWRITTEN) {
5407				/*
5408				 * This piece is unwritten, or we're not
5409				 * using unwritten extents.  Skip over it.
5410				 */
5411				ASSERT(end >= mod);
5412				end -= mod > del.br_blockcount ?
5413					del.br_blockcount : mod;
5414				if (end < got.br_startoff &&
5415				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5416					done = true;
5417					break;
5418				}
5419				continue;
5420			}
5421			/*
5422			 * It's written, turn it unwritten.
5423			 * This is better than zeroing it.
5424			 */
5425			ASSERT(del.br_state == XFS_EXT_NORM);
5426			ASSERT(tp->t_blk_res > 0);
5427			/*
5428			 * If this spans a realtime extent boundary,
5429			 * chop it back to the start of the one we end at.
5430			 */
5431			if (del.br_blockcount > mod) {
5432				del.br_startoff += del.br_blockcount - mod;
5433				del.br_startblock += del.br_blockcount - mod;
5434				del.br_blockcount = mod;
5435			}
5436			del.br_state = XFS_EXT_UNWRITTEN;
5437			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5438					whichfork, &icur, &cur, &del,
5439					&logflags);
5440			if (error)
5441				goto error0;
5442			goto nodelete;
5443		}
5444		div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
 
5445		if (mod) {
5446			xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
5447
5448			/*
5449			 * Realtime extent is lined up at the end but not
5450			 * at the front.  We'll get rid of full extents if
5451			 * we can.
5452			 */
5453			if (del.br_blockcount > off) {
5454				del.br_blockcount -= off;
5455				del.br_startoff += off;
5456				del.br_startblock += off;
5457			} else if (del.br_startoff == start &&
5458				   (del.br_state == XFS_EXT_UNWRITTEN ||
5459				    tp->t_blk_res == 0)) {
5460				/*
5461				 * Can't make it unwritten.  There isn't
5462				 * a full extent here so just skip it.
5463				 */
5464				ASSERT(end >= del.br_blockcount);
5465				end -= del.br_blockcount;
5466				if (got.br_startoff > end &&
5467				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5468					done = true;
5469					break;
5470				}
5471				continue;
5472			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
5473				struct xfs_bmbt_irec	prev;
5474				xfs_fileoff_t		unwrite_start;
5475
5476				/*
5477				 * This one is already unwritten.
5478				 * It must have a written left neighbor.
5479				 * Unwrite the killed part of that one and
5480				 * try again.
5481				 */
5482				if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5483					ASSERT(0);
5484				ASSERT(prev.br_state == XFS_EXT_NORM);
5485				ASSERT(!isnullstartblock(prev.br_startblock));
5486				ASSERT(del.br_startblock ==
5487				       prev.br_startblock + prev.br_blockcount);
5488				unwrite_start = max3(start,
5489						     del.br_startoff - mod,
5490						     prev.br_startoff);
5491				mod = unwrite_start - prev.br_startoff;
5492				prev.br_startoff = unwrite_start;
5493				prev.br_startblock += mod;
5494				prev.br_blockcount -= mod;
5495				prev.br_state = XFS_EXT_UNWRITTEN;
5496				error = xfs_bmap_add_extent_unwritten_real(tp,
5497						ip, whichfork, &icur, &cur,
5498						&prev, &logflags);
5499				if (error)
5500					goto error0;
5501				goto nodelete;
5502			} else {
5503				ASSERT(del.br_state == XFS_EXT_NORM);
5504				del.br_state = XFS_EXT_UNWRITTEN;
5505				error = xfs_bmap_add_extent_unwritten_real(tp,
5506						ip, whichfork, &icur, &cur,
5507						&del, &logflags);
5508				if (error)
5509					goto error0;
5510				goto nodelete;
5511			}
5512		}
5513
5514delete:
5515		if (wasdel) {
5516			error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
5517					&got, &del);
5518		} else {
5519			error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
5520					&del, &tmp_logflags, whichfork,
5521					flags);
5522			logflags |= tmp_logflags;
5523		}
5524
5525		if (error)
5526			goto error0;
5527
5528		max_len -= del.br_blockcount;
5529		end = del.br_startoff - 1;
5530nodelete:
5531		/*
5532		 * If not done go on to the next (previous) record.
5533		 */
5534		if (end != (xfs_fileoff_t)-1 && end >= start) {
5535			if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5536			    (got.br_startoff > end &&
5537			     !xfs_iext_prev_extent(ifp, &icur, &got))) {
5538				done = true;
5539				break;
5540			}
5541			extno++;
5542		}
5543	}
5544	if (done || end == (xfs_fileoff_t)-1 || end < start)
5545		*rlen = 0;
5546	else
5547		*rlen = end - start + 1;
5548
5549	/*
5550	 * Convert to a btree if necessary.
5551	 */
5552	if (xfs_bmap_needs_btree(ip, whichfork)) {
5553		ASSERT(cur == NULL);
5554		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
5555				&tmp_logflags, whichfork);
5556		logflags |= tmp_logflags;
5557	} else {
5558		error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
5559			whichfork);
5560	}
5561
5562error0:
5563	/*
5564	 * Log everything.  Do this after conversion, there's no point in
5565	 * logging the extent records if we've converted to btree format.
5566	 */
5567	if ((logflags & xfs_ilog_fext(whichfork)) &&
5568	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
5569		logflags &= ~xfs_ilog_fext(whichfork);
5570	else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5571		 ifp->if_format != XFS_DINODE_FMT_BTREE)
5572		logflags &= ~xfs_ilog_fbroot(whichfork);
5573	/*
5574	 * Log inode even in the error case, if the transaction
5575	 * is dirty we'll need to shut down the filesystem.
5576	 */
5577	if (logflags)
5578		xfs_trans_log_inode(tp, ip, logflags);
5579	if (cur) {
5580		if (!error)
5581			cur->bc_ino.allocated = 0;
5582		xfs_btree_del_cursor(cur, error);
5583	}
5584	return error;
5585}
5586
5587/* Unmap a range of a file. */
5588int
5589xfs_bunmapi(
5590	xfs_trans_t		*tp,
5591	struct xfs_inode	*ip,
5592	xfs_fileoff_t		bno,
5593	xfs_filblks_t		len,
5594	int			flags,
5595	xfs_extnum_t		nexts,
5596	int			*done)
5597{
5598	int			error;
5599
5600	error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
5601	*done = (len == 0);
5602	return error;
5603}
5604
5605/*
5606 * Determine whether an extent shift can be accomplished by a merge with the
5607 * extent that precedes the target hole of the shift.
5608 */
5609STATIC bool
5610xfs_bmse_can_merge(
5611	struct xfs_bmbt_irec	*left,	/* preceding extent */
5612	struct xfs_bmbt_irec	*got,	/* current extent to shift */
5613	xfs_fileoff_t		shift)	/* shift fsb */
5614{
5615	xfs_fileoff_t		startoff;
5616
5617	startoff = got->br_startoff - shift;
5618
5619	/*
5620	 * The extent, once shifted, must be adjacent in-file and on-disk with
5621	 * the preceding extent.
5622	 */
5623	if ((left->br_startoff + left->br_blockcount != startoff) ||
5624	    (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5625	    (left->br_state != got->br_state) ||
5626	    (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5627		return false;
5628
5629	return true;
5630}
5631
5632/*
5633 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5634 * hole in the file. If an extent shift would result in the extent being fully
5635 * adjacent to the extent that currently precedes the hole, we can merge with
5636 * the preceding extent rather than do the shift.
5637 *
5638 * This function assumes the caller has verified a shift-by-merge is possible
5639 * with the provided extents via xfs_bmse_can_merge().
5640 */
5641STATIC int
5642xfs_bmse_merge(
5643	struct xfs_trans		*tp,
5644	struct xfs_inode		*ip,
5645	int				whichfork,
5646	xfs_fileoff_t			shift,		/* shift fsb */
5647	struct xfs_iext_cursor		*icur,
5648	struct xfs_bmbt_irec		*got,		/* extent to shift */
5649	struct xfs_bmbt_irec		*left,		/* preceding extent */
5650	struct xfs_btree_cur		*cur,
5651	int				*logflags)	/* output */
5652{
5653	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
5654	struct xfs_bmbt_irec		new;
5655	xfs_filblks_t			blockcount;
5656	int				error, i;
5657	struct xfs_mount		*mp = ip->i_mount;
5658
5659	blockcount = left->br_blockcount + got->br_blockcount;
5660
5661	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5662	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5663	ASSERT(xfs_bmse_can_merge(left, got, shift));
5664
5665	new = *left;
5666	new.br_blockcount = blockcount;
5667
5668	/*
5669	 * Update the on-disk extent count, the btree if necessary and log the
5670	 * inode.
5671	 */
5672	ifp->if_nextents--;
5673	*logflags |= XFS_ILOG_CORE;
5674	if (!cur) {
5675		*logflags |= XFS_ILOG_DEXT;
5676		goto done;
5677	}
5678
5679	/* lookup and remove the extent to merge */
5680	error = xfs_bmbt_lookup_eq(cur, got, &i);
5681	if (error)
5682		return error;
5683	if (XFS_IS_CORRUPT(mp, i != 1))
5684		return -EFSCORRUPTED;
5685
5686	error = xfs_btree_delete(cur, &i);
5687	if (error)
5688		return error;
5689	if (XFS_IS_CORRUPT(mp, i != 1))
5690		return -EFSCORRUPTED;
5691
5692	/* lookup and update size of the previous extent */
5693	error = xfs_bmbt_lookup_eq(cur, left, &i);
5694	if (error)
5695		return error;
5696	if (XFS_IS_CORRUPT(mp, i != 1))
5697		return -EFSCORRUPTED;
5698
5699	error = xfs_bmbt_update(cur, &new);
5700	if (error)
5701		return error;
5702
5703	/* change to extent format if required after extent removal */
5704	error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
5705	if (error)
5706		return error;
5707
5708done:
5709	xfs_iext_remove(ip, icur, 0);
5710	xfs_iext_prev(ifp, icur);
5711	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5712			&new);
5713
5714	/* update reverse mapping. rmap functions merge the rmaps for us */
5715	xfs_rmap_unmap_extent(tp, ip, whichfork, got);
5716	memcpy(&new, got, sizeof(new));
5717	new.br_startoff = left->br_startoff + left->br_blockcount;
5718	xfs_rmap_map_extent(tp, ip, whichfork, &new);
5719	return 0;
5720}
5721
5722static int
5723xfs_bmap_shift_update_extent(
5724	struct xfs_trans	*tp,
5725	struct xfs_inode	*ip,
5726	int			whichfork,
5727	struct xfs_iext_cursor	*icur,
5728	struct xfs_bmbt_irec	*got,
5729	struct xfs_btree_cur	*cur,
5730	int			*logflags,
5731	xfs_fileoff_t		startoff)
5732{
5733	struct xfs_mount	*mp = ip->i_mount;
5734	struct xfs_bmbt_irec	prev = *got;
5735	int			error, i;
5736
5737	*logflags |= XFS_ILOG_CORE;
5738
5739	got->br_startoff = startoff;
5740
5741	if (cur) {
5742		error = xfs_bmbt_lookup_eq(cur, &prev, &i);
5743		if (error)
5744			return error;
5745		if (XFS_IS_CORRUPT(mp, i != 1))
5746			return -EFSCORRUPTED;
5747
5748		error = xfs_bmbt_update(cur, got);
5749		if (error)
5750			return error;
5751	} else {
5752		*logflags |= XFS_ILOG_DEXT;
5753	}
5754
5755	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5756			got);
5757
5758	/* update reverse mapping */
5759	xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
5760	xfs_rmap_map_extent(tp, ip, whichfork, got);
5761	return 0;
5762}
5763
5764int
5765xfs_bmap_collapse_extents(
5766	struct xfs_trans	*tp,
5767	struct xfs_inode	*ip,
5768	xfs_fileoff_t		*next_fsb,
5769	xfs_fileoff_t		offset_shift_fsb,
5770	bool			*done)
5771{
5772	int			whichfork = XFS_DATA_FORK;
5773	struct xfs_mount	*mp = ip->i_mount;
5774	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
5775	struct xfs_btree_cur	*cur = NULL;
5776	struct xfs_bmbt_irec	got, prev;
5777	struct xfs_iext_cursor	icur;
5778	xfs_fileoff_t		new_startoff;
5779	int			error = 0;
5780	int			logflags = 0;
5781
5782	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5783	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5784		return -EFSCORRUPTED;
5785	}
5786
5787	if (XFS_FORCED_SHUTDOWN(mp))
5788		return -EIO;
5789
5790	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5791
5792	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5793		error = xfs_iread_extents(tp, ip, whichfork);
5794		if (error)
5795			return error;
5796	}
5797
5798	if (ifp->if_flags & XFS_IFBROOT) {
5799		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5800		cur->bc_ino.flags = 0;
5801	}
5802
5803	if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5804		*done = true;
5805		goto del_cursor;
5806	}
5807	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5808		error = -EFSCORRUPTED;
5809		goto del_cursor;
5810	}
5811
5812	new_startoff = got.br_startoff - offset_shift_fsb;
5813	if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
5814		if (new_startoff < prev.br_startoff + prev.br_blockcount) {
5815			error = -EINVAL;
5816			goto del_cursor;
5817		}
5818
5819		if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5820			error = xfs_bmse_merge(tp, ip, whichfork,
5821					offset_shift_fsb, &icur, &got, &prev,
5822					cur, &logflags);
5823			if (error)
5824				goto del_cursor;
5825			goto done;
5826		}
5827	} else {
5828		if (got.br_startoff < offset_shift_fsb) {
5829			error = -EINVAL;
5830			goto del_cursor;
5831		}
5832	}
5833
5834	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5835			cur, &logflags, new_startoff);
5836	if (error)
5837		goto del_cursor;
5838
5839done:
5840	if (!xfs_iext_next_extent(ifp, &icur, &got)) {
5841		*done = true;
5842		goto del_cursor;
5843	}
5844
5845	*next_fsb = got.br_startoff;
5846del_cursor:
5847	if (cur)
5848		xfs_btree_del_cursor(cur, error);
5849	if (logflags)
5850		xfs_trans_log_inode(tp, ip, logflags);
5851	return error;
5852}
5853
5854/* Make sure we won't be right-shifting an extent past the maximum bound. */
5855int
5856xfs_bmap_can_insert_extents(
5857	struct xfs_inode	*ip,
5858	xfs_fileoff_t		off,
5859	xfs_fileoff_t		shift)
5860{
5861	struct xfs_bmbt_irec	got;
5862	int			is_empty;
5863	int			error = 0;
5864
5865	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5866
5867	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
5868		return -EIO;
5869
5870	xfs_ilock(ip, XFS_ILOCK_EXCL);
5871	error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
5872	if (!error && !is_empty && got.br_startoff >= off &&
5873	    ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
5874		error = -EINVAL;
5875	xfs_iunlock(ip, XFS_ILOCK_EXCL);
5876
5877	return error;
5878}
5879
5880int
5881xfs_bmap_insert_extents(
5882	struct xfs_trans	*tp,
5883	struct xfs_inode	*ip,
5884	xfs_fileoff_t		*next_fsb,
5885	xfs_fileoff_t		offset_shift_fsb,
5886	bool			*done,
5887	xfs_fileoff_t		stop_fsb)
5888{
5889	int			whichfork = XFS_DATA_FORK;
5890	struct xfs_mount	*mp = ip->i_mount;
5891	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
5892	struct xfs_btree_cur	*cur = NULL;
5893	struct xfs_bmbt_irec	got, next;
5894	struct xfs_iext_cursor	icur;
5895	xfs_fileoff_t		new_startoff;
5896	int			error = 0;
5897	int			logflags = 0;
5898
5899	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5900	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5901		return -EFSCORRUPTED;
5902	}
5903
5904	if (XFS_FORCED_SHUTDOWN(mp))
5905		return -EIO;
5906
5907	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5908
5909	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5910		error = xfs_iread_extents(tp, ip, whichfork);
5911		if (error)
5912			return error;
5913	}
5914
5915	if (ifp->if_flags & XFS_IFBROOT) {
5916		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5917		cur->bc_ino.flags = 0;
5918	}
5919
5920	if (*next_fsb == NULLFSBLOCK) {
5921		xfs_iext_last(ifp, &icur);
5922		if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5923		    stop_fsb > got.br_startoff) {
5924			*done = true;
5925			goto del_cursor;
5926		}
5927	} else {
5928		if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5929			*done = true;
5930			goto del_cursor;
5931		}
5932	}
5933	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5934		error = -EFSCORRUPTED;
5935		goto del_cursor;
5936	}
5937
5938	if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
5939		error = -EFSCORRUPTED;
5940		goto del_cursor;
5941	}
5942
5943	new_startoff = got.br_startoff + offset_shift_fsb;
5944	if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
5945		if (new_startoff + got.br_blockcount > next.br_startoff) {
5946			error = -EINVAL;
5947			goto del_cursor;
5948		}
5949
5950		/*
5951		 * Unlike a left shift (which involves a hole punch), a right
5952		 * shift does not modify extent neighbors in any way.  We should
5953		 * never find mergeable extents in this scenario.  Check anyways
5954		 * and warn if we encounter two extents that could be one.
5955		 */
5956		if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
5957			WARN_ON_ONCE(1);
5958	}
5959
5960	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5961			cur, &logflags, new_startoff);
5962	if (error)
5963		goto del_cursor;
5964
5965	if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
5966	    stop_fsb >= got.br_startoff + got.br_blockcount) {
5967		*done = true;
5968		goto del_cursor;
5969	}
5970
5971	*next_fsb = got.br_startoff;
5972del_cursor:
5973	if (cur)
5974		xfs_btree_del_cursor(cur, error);
5975	if (logflags)
5976		xfs_trans_log_inode(tp, ip, logflags);
5977	return error;
5978}
5979
5980/*
5981 * Splits an extent into two extents at split_fsb block such that it is the
5982 * first block of the current_ext. @ext is a target extent to be split.
5983 * @split_fsb is a block where the extents is split.  If split_fsb lies in a
5984 * hole or the first block of extents, just return 0.
5985 */
5986int
5987xfs_bmap_split_extent(
5988	struct xfs_trans	*tp,
5989	struct xfs_inode	*ip,
5990	xfs_fileoff_t		split_fsb)
5991{
5992	int				whichfork = XFS_DATA_FORK;
5993	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
5994	struct xfs_btree_cur		*cur = NULL;
5995	struct xfs_bmbt_irec		got;
5996	struct xfs_bmbt_irec		new; /* split extent */
5997	struct xfs_mount		*mp = ip->i_mount;
5998	xfs_fsblock_t			gotblkcnt; /* new block count for got */
5999	struct xfs_iext_cursor		icur;
6000	int				error = 0;
6001	int				logflags = 0;
6002	int				i = 0;
6003
6004	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
6005	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
6006		return -EFSCORRUPTED;
6007	}
6008
6009	if (XFS_FORCED_SHUTDOWN(mp))
6010		return -EIO;
6011
6012	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
6013		/* Read in all the extents */
6014		error = xfs_iread_extents(tp, ip, whichfork);
6015		if (error)
6016			return error;
6017	}
6018
6019	/*
6020	 * If there are not extents, or split_fsb lies in a hole we are done.
6021	 */
6022	if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
6023	    got.br_startoff >= split_fsb)
6024		return 0;
6025
6026	gotblkcnt = split_fsb - got.br_startoff;
6027	new.br_startoff = split_fsb;
6028	new.br_startblock = got.br_startblock + gotblkcnt;
6029	new.br_blockcount = got.br_blockcount - gotblkcnt;
6030	new.br_state = got.br_state;
6031
6032	if (ifp->if_flags & XFS_IFBROOT) {
6033		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
6034		cur->bc_ino.flags = 0;
6035		error = xfs_bmbt_lookup_eq(cur, &got, &i);
6036		if (error)
6037			goto del_cursor;
6038		if (XFS_IS_CORRUPT(mp, i != 1)) {
6039			error = -EFSCORRUPTED;
6040			goto del_cursor;
6041		}
6042	}
6043
6044	got.br_blockcount = gotblkcnt;
6045	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
6046			&got);
6047
6048	logflags = XFS_ILOG_CORE;
6049	if (cur) {
6050		error = xfs_bmbt_update(cur, &got);
6051		if (error)
6052			goto del_cursor;
6053	} else
6054		logflags |= XFS_ILOG_DEXT;
6055
6056	/* Add new extent */
6057	xfs_iext_next(ifp, &icur);
6058	xfs_iext_insert(ip, &icur, &new, 0);
6059	ifp->if_nextents++;
6060
6061	if (cur) {
6062		error = xfs_bmbt_lookup_eq(cur, &new, &i);
6063		if (error)
6064			goto del_cursor;
6065		if (XFS_IS_CORRUPT(mp, i != 0)) {
6066			error = -EFSCORRUPTED;
6067			goto del_cursor;
6068		}
6069		error = xfs_btree_insert(cur, &i);
6070		if (error)
6071			goto del_cursor;
6072		if (XFS_IS_CORRUPT(mp, i != 1)) {
6073			error = -EFSCORRUPTED;
6074			goto del_cursor;
6075		}
6076	}
6077
6078	/*
6079	 * Convert to a btree if necessary.
6080	 */
6081	if (xfs_bmap_needs_btree(ip, whichfork)) {
6082		int tmp_logflags; /* partial log flag return val */
6083
6084		ASSERT(cur == NULL);
6085		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
6086				&tmp_logflags, whichfork);
6087		logflags |= tmp_logflags;
6088	}
6089
6090del_cursor:
6091	if (cur) {
6092		cur->bc_ino.allocated = 0;
6093		xfs_btree_del_cursor(cur, error);
6094	}
6095
6096	if (logflags)
6097		xfs_trans_log_inode(tp, ip, logflags);
6098	return error;
6099}
6100
6101/* Deferred mapping is only for real extents in the data fork. */
6102static bool
6103xfs_bmap_is_update_needed(
6104	struct xfs_bmbt_irec	*bmap)
6105{
6106	return  bmap->br_startblock != HOLESTARTBLOCK &&
6107		bmap->br_startblock != DELAYSTARTBLOCK;
6108}
6109
6110/* Record a bmap intent. */
6111static int
6112__xfs_bmap_add(
6113	struct xfs_trans		*tp,
6114	enum xfs_bmap_intent_type	type,
6115	struct xfs_inode		*ip,
6116	int				whichfork,
6117	struct xfs_bmbt_irec		*bmap)
6118{
6119	struct xfs_bmap_intent		*bi;
6120
6121	trace_xfs_bmap_defer(tp->t_mountp,
6122			XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
6123			type,
6124			XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
6125			ip->i_ino, whichfork,
6126			bmap->br_startoff,
6127			bmap->br_blockcount,
6128			bmap->br_state);
6129
6130	bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS);
6131	INIT_LIST_HEAD(&bi->bi_list);
6132	bi->bi_type = type;
6133	bi->bi_owner = ip;
6134	bi->bi_whichfork = whichfork;
6135	bi->bi_bmap = *bmap;
6136
6137	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
 
6138	return 0;
6139}
6140
6141/* Map an extent into a file. */
6142void
6143xfs_bmap_map_extent(
6144	struct xfs_trans	*tp,
6145	struct xfs_inode	*ip,
6146	struct xfs_bmbt_irec	*PREV)
6147{
6148	if (!xfs_bmap_is_update_needed(PREV))
6149		return;
6150
6151	__xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
6152}
6153
6154/* Unmap an extent out of a file. */
6155void
6156xfs_bmap_unmap_extent(
6157	struct xfs_trans	*tp,
6158	struct xfs_inode	*ip,
6159	struct xfs_bmbt_irec	*PREV)
6160{
6161	if (!xfs_bmap_is_update_needed(PREV))
6162		return;
6163
6164	__xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
6165}
6166
6167/*
6168 * Process one of the deferred bmap operations.  We pass back the
6169 * btree cursor to maintain our lock on the bmapbt between calls.
6170 */
6171int
6172xfs_bmap_finish_one(
6173	struct xfs_trans		*tp,
6174	struct xfs_inode		*ip,
6175	enum xfs_bmap_intent_type	type,
6176	int				whichfork,
6177	xfs_fileoff_t			startoff,
6178	xfs_fsblock_t			startblock,
6179	xfs_filblks_t			*blockcount,
6180	xfs_exntst_t			state)
6181{
 
6182	int				error = 0;
6183
6184	ASSERT(tp->t_firstblock == NULLFSBLOCK);
6185
6186	trace_xfs_bmap_deferred(tp->t_mountp,
6187			XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
6188			XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
6189			ip->i_ino, whichfork, startoff, *blockcount, state);
 
 
 
6190
6191	if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
6192		return -EFSCORRUPTED;
6193
6194	if (XFS_TEST_ERROR(false, tp->t_mountp,
6195			XFS_ERRTAG_BMAP_FINISH_ONE))
6196		return -EIO;
6197
6198	switch (type) {
6199	case XFS_BMAP_MAP:
6200		error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
6201				startblock, 0);
6202		*blockcount = 0;
6203		break;
6204	case XFS_BMAP_UNMAP:
6205		error = __xfs_bunmapi(tp, ip, startoff, blockcount,
6206				XFS_BMAPI_REMAP, 1);
6207		break;
6208	default:
6209		ASSERT(0);
6210		error = -EFSCORRUPTED;
6211	}
6212
6213	return error;
6214}
6215
6216/* Check that an inode's extent does not have invalid flags or bad ranges. */
6217xfs_failaddr_t
6218xfs_bmap_validate_extent(
6219	struct xfs_inode	*ip,
 
6220	int			whichfork,
6221	struct xfs_bmbt_irec	*irec)
6222{
6223	struct xfs_mount	*mp = ip->i_mount;
6224	xfs_fsblock_t		endfsb;
6225	bool			isrt;
6226
6227	isrt = XFS_IS_REALTIME_INODE(ip);
6228	endfsb = irec->br_startblock + irec->br_blockcount - 1;
6229	if (isrt && whichfork == XFS_DATA_FORK) {
6230		if (!xfs_verify_rtbno(mp, irec->br_startblock))
6231			return __this_address;
6232		if (!xfs_verify_rtbno(mp, endfsb))
6233			return __this_address;
6234	} else {
6235		if (!xfs_verify_fsbno(mp, irec->br_startblock))
6236			return __this_address;
6237		if (!xfs_verify_fsbno(mp, endfsb))
6238			return __this_address;
6239		if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
6240		    XFS_FSB_TO_AGNO(mp, endfsb))
6241			return __this_address;
6242	}
6243	if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
6244		return __this_address;
6245	return NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6246}