Linux Audio

Check our new training course

Loading...
v4.6
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_format.h"
  21#include "xfs_log_format.h"
  22#include "xfs_shared.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_bit.h"
  25#include "xfs_sb.h"
  26#include "xfs_mount.h"
 
  27#include "xfs_inode.h"
  28#include "xfs_btree.h"
 
  29#include "xfs_alloc_btree.h"
  30#include "xfs_alloc.h"
  31#include "xfs_extent_busy.h"
  32#include "xfs_error.h"
  33#include "xfs_cksum.h"
  34#include "xfs_trace.h"
  35#include "xfs_trans.h"
  36#include "xfs_buf_item.h"
  37#include "xfs_log.h"
 
  38
  39struct workqueue_struct *xfs_alloc_wq;
  40
  41#define XFS_ABSDIFF(a,b)	(((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
  42
  43#define	XFSA_FIXUP_BNO_OK	1
  44#define	XFSA_FIXUP_CNT_OK	2
  45
  46STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  47STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  48STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  49STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
  50		xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  52/*
  53 * Lookup the record equal to [bno, len] in the btree given by cur.
  54 */
  55STATIC int				/* error */
  56xfs_alloc_lookup_eq(
  57	struct xfs_btree_cur	*cur,	/* btree cursor */
  58	xfs_agblock_t		bno,	/* starting block of extent */
  59	xfs_extlen_t		len,	/* length of extent */
  60	int			*stat)	/* success/failure */
  61{
  62	cur->bc_rec.a.ar_startblock = bno;
  63	cur->bc_rec.a.ar_blockcount = len;
  64	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
  65}
  66
  67/*
  68 * Lookup the first record greater than or equal to [bno, len]
  69 * in the btree given by cur.
  70 */
  71int				/* error */
  72xfs_alloc_lookup_ge(
  73	struct xfs_btree_cur	*cur,	/* btree cursor */
  74	xfs_agblock_t		bno,	/* starting block of extent */
  75	xfs_extlen_t		len,	/* length of extent */
  76	int			*stat)	/* success/failure */
  77{
  78	cur->bc_rec.a.ar_startblock = bno;
  79	cur->bc_rec.a.ar_blockcount = len;
  80	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
  81}
  82
  83/*
  84 * Lookup the first record less than or equal to [bno, len]
  85 * in the btree given by cur.
  86 */
  87int					/* error */
  88xfs_alloc_lookup_le(
  89	struct xfs_btree_cur	*cur,	/* btree cursor */
  90	xfs_agblock_t		bno,	/* starting block of extent */
  91	xfs_extlen_t		len,	/* length of extent */
  92	int			*stat)	/* success/failure */
  93{
  94	cur->bc_rec.a.ar_startblock = bno;
  95	cur->bc_rec.a.ar_blockcount = len;
  96	return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
  97}
  98
  99/*
 100 * Update the record referred to by cur to the value given
 101 * by [bno, len].
 102 * This either works (return 0) or gets an EFSCORRUPTED error.
 103 */
 104STATIC int				/* error */
 105xfs_alloc_update(
 106	struct xfs_btree_cur	*cur,	/* btree cursor */
 107	xfs_agblock_t		bno,	/* starting block of extent */
 108	xfs_extlen_t		len)	/* length of extent */
 109{
 110	union xfs_btree_rec	rec;
 111
 112	rec.alloc.ar_startblock = cpu_to_be32(bno);
 113	rec.alloc.ar_blockcount = cpu_to_be32(len);
 114	return xfs_btree_update(cur, &rec);
 115}
 116
 117/*
 118 * Get the data from the pointed-to record.
 119 */
 120int					/* error */
 121xfs_alloc_get_rec(
 122	struct xfs_btree_cur	*cur,	/* btree cursor */
 123	xfs_agblock_t		*bno,	/* output: starting block of extent */
 124	xfs_extlen_t		*len,	/* output: length of extent */
 125	int			*stat)	/* output: success/failure */
 126{
 127	union xfs_btree_rec	*rec;
 128	int			error;
 129
 130	error = xfs_btree_get_rec(cur, &rec, stat);
 131	if (!error && *stat == 1) {
 132		*bno = be32_to_cpu(rec->alloc.ar_startblock);
 133		*len = be32_to_cpu(rec->alloc.ar_blockcount);
 134	}
 135	return error;
 136}
 137
 138/*
 139 * Compute aligned version of the found extent.
 140 * Takes alignment and min length into account.
 141 */
 142STATIC void
 143xfs_alloc_compute_aligned(
 144	xfs_alloc_arg_t	*args,		/* allocation argument structure */
 145	xfs_agblock_t	foundbno,	/* starting block in found extent */
 146	xfs_extlen_t	foundlen,	/* length in found extent */
 147	xfs_agblock_t	*resbno,	/* result block number */
 148	xfs_extlen_t	*reslen)	/* result length */
 149{
 150	xfs_agblock_t	bno;
 151	xfs_extlen_t	len;
 152	xfs_extlen_t	diff;
 153
 154	/* Trim busy sections out of found extent */
 155	xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
 156
 157	/*
 158	 * If we have a largish extent that happens to start before min_agbno,
 159	 * see if we can shift it into range...
 160	 */
 161	if (bno < args->min_agbno && bno + len > args->min_agbno) {
 162		diff = args->min_agbno - bno;
 163		if (len > diff) {
 164			bno += diff;
 165			len -= diff;
 166		}
 167	}
 168
 169	if (args->alignment > 1 && len >= args->minlen) {
 170		xfs_agblock_t	aligned_bno = roundup(bno, args->alignment);
 171
 172		diff = aligned_bno - bno;
 173
 174		*resbno = aligned_bno;
 175		*reslen = diff >= len ? 0 : len - diff;
 176	} else {
 177		*resbno = bno;
 178		*reslen = len;
 179	}
 180}
 181
 182/*
 183 * Compute best start block and diff for "near" allocations.
 184 * freelen >= wantlen already checked by caller.
 185 */
 186STATIC xfs_extlen_t			/* difference value (absolute) */
 187xfs_alloc_compute_diff(
 188	xfs_agblock_t	wantbno,	/* target starting block */
 189	xfs_extlen_t	wantlen,	/* target length */
 190	xfs_extlen_t	alignment,	/* target alignment */
 191	char		userdata,	/* are we allocating data? */
 192	xfs_agblock_t	freebno,	/* freespace's starting block */
 193	xfs_extlen_t	freelen,	/* freespace's length */
 194	xfs_agblock_t	*newbnop)	/* result: best start block from free */
 195{
 196	xfs_agblock_t	freeend;	/* end of freespace extent */
 197	xfs_agblock_t	newbno1;	/* return block number */
 198	xfs_agblock_t	newbno2;	/* other new block number */
 199	xfs_extlen_t	newlen1=0;	/* length with newbno1 */
 200	xfs_extlen_t	newlen2=0;	/* length with newbno2 */
 201	xfs_agblock_t	wantend;	/* end of target extent */
 
 202
 203	ASSERT(freelen >= wantlen);
 204	freeend = freebno + freelen;
 205	wantend = wantbno + wantlen;
 206	/*
 207	 * We want to allocate from the start of a free extent if it is past
 208	 * the desired block or if we are allocating user data and the free
 209	 * extent is before desired block. The second case is there to allow
 210	 * for contiguous allocation from the remaining free space if the file
 211	 * grows in the short term.
 212	 */
 213	if (freebno >= wantbno || (userdata && freeend < wantend)) {
 214		if ((newbno1 = roundup(freebno, alignment)) >= freeend)
 215			newbno1 = NULLAGBLOCK;
 216	} else if (freeend >= wantend && alignment > 1) {
 217		newbno1 = roundup(wantbno, alignment);
 218		newbno2 = newbno1 - alignment;
 219		if (newbno1 >= freeend)
 220			newbno1 = NULLAGBLOCK;
 221		else
 222			newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
 223		if (newbno2 < freebno)
 224			newbno2 = NULLAGBLOCK;
 225		else
 226			newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
 227		if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 228			if (newlen1 < newlen2 ||
 229			    (newlen1 == newlen2 &&
 230			     XFS_ABSDIFF(newbno1, wantbno) >
 231			     XFS_ABSDIFF(newbno2, wantbno)))
 232				newbno1 = newbno2;
 233		} else if (newbno2 != NULLAGBLOCK)
 234			newbno1 = newbno2;
 235	} else if (freeend >= wantend) {
 236		newbno1 = wantbno;
 237	} else if (alignment > 1) {
 238		newbno1 = roundup(freeend - wantlen, alignment);
 239		if (newbno1 > freeend - wantlen &&
 240		    newbno1 - alignment >= freebno)
 241			newbno1 -= alignment;
 242		else if (newbno1 >= freeend)
 243			newbno1 = NULLAGBLOCK;
 244	} else
 245		newbno1 = freeend - wantlen;
 246	*newbnop = newbno1;
 247	return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
 248}
 249
 250/*
 251 * Fix up the length, based on mod and prod.
 252 * len should be k * prod + mod for some k.
 253 * If len is too small it is returned unchanged.
 254 * If len hits maxlen it is left alone.
 255 */
 256STATIC void
 257xfs_alloc_fix_len(
 258	xfs_alloc_arg_t	*args)		/* allocation argument structure */
 259{
 260	xfs_extlen_t	k;
 261	xfs_extlen_t	rlen;
 262
 263	ASSERT(args->mod < args->prod);
 264	rlen = args->len;
 265	ASSERT(rlen >= args->minlen);
 266	ASSERT(rlen <= args->maxlen);
 267	if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
 268	    (args->mod == 0 && rlen < args->prod))
 269		return;
 270	k = rlen % args->prod;
 271	if (k == args->mod)
 272		return;
 273	if (k > args->mod)
 274		rlen = rlen - (k - args->mod);
 275	else
 276		rlen = rlen - args->prod + (args->mod - k);
 277	/* casts to (int) catch length underflows */
 278	if ((int)rlen < (int)args->minlen)
 279		return;
 280	ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
 281	ASSERT(rlen % args->prod == args->mod);
 
 
 282	args->len = rlen;
 283}
 284
 285/*
 286 * Fix up length if there is too little space left in the a.g.
 287 * Return 1 if ok, 0 if too little, should give up.
 288 */
 289STATIC int
 290xfs_alloc_fix_minleft(
 291	xfs_alloc_arg_t	*args)		/* allocation argument structure */
 292{
 293	xfs_agf_t	*agf;		/* a.g. freelist header */
 294	int		diff;		/* free space difference */
 295
 296	if (args->minleft == 0)
 297		return 1;
 298	agf = XFS_BUF_TO_AGF(args->agbp);
 299	diff = be32_to_cpu(agf->agf_freeblks)
 300		- args->len - args->minleft;
 301	if (diff >= 0)
 302		return 1;
 303	args->len += diff;		/* shrink the allocated space */
 304	/* casts to (int) catch length underflows */
 305	if ((int)args->len >= (int)args->minlen)
 306		return 1;
 307	args->agbno = NULLAGBLOCK;
 308	return 0;
 309}
 310
 311/*
 312 * Update the two btrees, logically removing from freespace the extent
 313 * starting at rbno, rlen blocks.  The extent is contained within the
 314 * actual (current) free extent fbno for flen blocks.
 315 * Flags are passed in indicating whether the cursors are set to the
 316 * relevant records.
 317 */
 318STATIC int				/* error code */
 319xfs_alloc_fixup_trees(
 320	xfs_btree_cur_t	*cnt_cur,	/* cursor for by-size btree */
 321	xfs_btree_cur_t	*bno_cur,	/* cursor for by-block btree */
 322	xfs_agblock_t	fbno,		/* starting block of free extent */
 323	xfs_extlen_t	flen,		/* length of free extent */
 324	xfs_agblock_t	rbno,		/* starting block of returned extent */
 325	xfs_extlen_t	rlen,		/* length of returned extent */
 326	int		flags)		/* flags, XFSA_FIXUP_... */
 327{
 328	int		error;		/* error code */
 329	int		i;		/* operation results */
 330	xfs_agblock_t	nfbno1;		/* first new free startblock */
 331	xfs_agblock_t	nfbno2;		/* second new free startblock */
 332	xfs_extlen_t	nflen1=0;	/* first new free length */
 333	xfs_extlen_t	nflen2=0;	/* second new free length */
 334	struct xfs_mount *mp;
 335
 336	mp = cnt_cur->bc_mp;
 337
 338	/*
 339	 * Look up the record in the by-size tree if necessary.
 340	 */
 341	if (flags & XFSA_FIXUP_CNT_OK) {
 342#ifdef DEBUG
 343		if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
 344			return error;
 345		XFS_WANT_CORRUPTED_RETURN(mp,
 346			i == 1 && nfbno1 == fbno && nflen1 == flen);
 347#endif
 348	} else {
 349		if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
 350			return error;
 351		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 352	}
 353	/*
 354	 * Look up the record in the by-block tree if necessary.
 355	 */
 356	if (flags & XFSA_FIXUP_BNO_OK) {
 357#ifdef DEBUG
 358		if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
 359			return error;
 360		XFS_WANT_CORRUPTED_RETURN(mp,
 361			i == 1 && nfbno1 == fbno && nflen1 == flen);
 362#endif
 363	} else {
 364		if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
 365			return error;
 366		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 367	}
 368
 369#ifdef DEBUG
 370	if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
 371		struct xfs_btree_block	*bnoblock;
 372		struct xfs_btree_block	*cntblock;
 373
 374		bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
 375		cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 376
 377		XFS_WANT_CORRUPTED_RETURN(mp,
 378			bnoblock->bb_numrecs == cntblock->bb_numrecs);
 379	}
 380#endif
 381
 382	/*
 383	 * Deal with all four cases: the allocated record is contained
 384	 * within the freespace record, so we can have new freespace
 385	 * at either (or both) end, or no freespace remaining.
 386	 */
 387	if (rbno == fbno && rlen == flen)
 388		nfbno1 = nfbno2 = NULLAGBLOCK;
 389	else if (rbno == fbno) {
 390		nfbno1 = rbno + rlen;
 391		nflen1 = flen - rlen;
 392		nfbno2 = NULLAGBLOCK;
 393	} else if (rbno + rlen == fbno + flen) {
 394		nfbno1 = fbno;
 395		nflen1 = flen - rlen;
 396		nfbno2 = NULLAGBLOCK;
 397	} else {
 398		nfbno1 = fbno;
 399		nflen1 = rbno - fbno;
 400		nfbno2 = rbno + rlen;
 401		nflen2 = (fbno + flen) - nfbno2;
 402	}
 403	/*
 404	 * Delete the entry from the by-size btree.
 405	 */
 406	if ((error = xfs_btree_delete(cnt_cur, &i)))
 407		return error;
 408	XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 409	/*
 410	 * Add new by-size btree entry(s).
 411	 */
 412	if (nfbno1 != NULLAGBLOCK) {
 413		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 414			return error;
 415		XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 416		if ((error = xfs_btree_insert(cnt_cur, &i)))
 417			return error;
 418		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 419	}
 420	if (nfbno2 != NULLAGBLOCK) {
 421		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 422			return error;
 423		XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 424		if ((error = xfs_btree_insert(cnt_cur, &i)))
 425			return error;
 426		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 427	}
 428	/*
 429	 * Fix up the by-block btree entry(s).
 430	 */
 431	if (nfbno1 == NULLAGBLOCK) {
 432		/*
 433		 * No remaining freespace, just delete the by-block tree entry.
 434		 */
 435		if ((error = xfs_btree_delete(bno_cur, &i)))
 436			return error;
 437		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 438	} else {
 439		/*
 440		 * Update the by-block entry to start later|be shorter.
 441		 */
 442		if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
 443			return error;
 444	}
 445	if (nfbno2 != NULLAGBLOCK) {
 446		/*
 447		 * 2 resulting free entries, need to add one.
 448		 */
 449		if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
 450			return error;
 451		XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 452		if ((error = xfs_btree_insert(bno_cur, &i)))
 453			return error;
 454		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 455	}
 456	return 0;
 457}
 458
 459static bool
 460xfs_agfl_verify(
 461	struct xfs_buf	*bp)
 462{
 463	struct xfs_mount *mp = bp->b_target->bt_mount;
 464	struct xfs_agfl	*agfl = XFS_BUF_TO_AGFL(bp);
 465	int		i;
 466
 467	if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
 468		return false;
 469	if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
 470		return false;
 471	/*
 472	 * during growfs operations, the perag is not fully initialised,
 473	 * so we can't use it for any useful checking. growfs ensures we can't
 474	 * use it by using uncached buffers that don't have the perag attached
 475	 * so we can detect and avoid this problem.
 476	 */
 477	if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 478		return false;
 479
 480	for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
 481		if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 482		    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 483			return false;
 484	}
 485
 486	return xfs_log_check_lsn(mp,
 487				 be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
 488}
 489
 490static void
 491xfs_agfl_read_verify(
 492	struct xfs_buf	*bp)
 493{
 494	struct xfs_mount *mp = bp->b_target->bt_mount;
 495
 496	/*
 497	 * There is no verification of non-crc AGFLs because mkfs does not
 498	 * initialise the AGFL to zero or NULL. Hence the only valid part of the
 499	 * AGFL is what the AGF says is active. We can't get to the AGF, so we
 500	 * can't verify just those entries are valid.
 501	 */
 502	if (!xfs_sb_version_hascrc(&mp->m_sb))
 503		return;
 504
 505	if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
 506		xfs_buf_ioerror(bp, -EFSBADCRC);
 507	else if (!xfs_agfl_verify(bp))
 508		xfs_buf_ioerror(bp, -EFSCORRUPTED);
 509
 510	if (bp->b_error)
 511		xfs_verifier_error(bp);
 512}
 513
 514static void
 515xfs_agfl_write_verify(
 516	struct xfs_buf	*bp)
 517{
 518	struct xfs_mount *mp = bp->b_target->bt_mount;
 519	struct xfs_buf_log_item	*bip = bp->b_fspriv;
 520
 521	/* no verification of non-crc AGFLs */
 522	if (!xfs_sb_version_hascrc(&mp->m_sb))
 523		return;
 524
 525	if (!xfs_agfl_verify(bp)) {
 526		xfs_buf_ioerror(bp, -EFSCORRUPTED);
 527		xfs_verifier_error(bp);
 528		return;
 529	}
 530
 531	if (bip)
 532		XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 533
 534	xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
 535}
 536
 537const struct xfs_buf_ops xfs_agfl_buf_ops = {
 538	.name = "xfs_agfl",
 539	.verify_read = xfs_agfl_read_verify,
 540	.verify_write = xfs_agfl_write_verify,
 541};
 542
 543/*
 544 * Read in the allocation group free block array.
 545 */
 546STATIC int				/* error */
 547xfs_alloc_read_agfl(
 548	xfs_mount_t	*mp,		/* mount point structure */
 549	xfs_trans_t	*tp,		/* transaction pointer */
 550	xfs_agnumber_t	agno,		/* allocation group number */
 551	xfs_buf_t	**bpp)		/* buffer for the ag free block array */
 552{
 553	xfs_buf_t	*bp;		/* return value */
 554	int		error;
 555
 556	ASSERT(agno != NULLAGNUMBER);
 557	error = xfs_trans_read_buf(
 558			mp, tp, mp->m_ddev_targp,
 559			XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 560			XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 561	if (error)
 562		return error;
 563	xfs_buf_set_ref(bp, XFS_AGFL_REF);
 564	*bpp = bp;
 565	return 0;
 566}
 567
 568STATIC int
 569xfs_alloc_update_counters(
 570	struct xfs_trans	*tp,
 571	struct xfs_perag	*pag,
 572	struct xfs_buf		*agbp,
 573	long			len)
 574{
 575	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
 576
 577	pag->pagf_freeblks += len;
 578	be32_add_cpu(&agf->agf_freeblks, len);
 579
 580	xfs_trans_agblocks_delta(tp, len);
 581	if (unlikely(be32_to_cpu(agf->agf_freeblks) >
 582		     be32_to_cpu(agf->agf_length)))
 583		return -EFSCORRUPTED;
 584
 585	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
 586	return 0;
 587}
 588
 589/*
 590 * Allocation group level functions.
 591 */
 592
 593/*
 594 * Allocate a variable extent in the allocation group agno.
 595 * Type and bno are used to determine where in the allocation group the
 596 * extent will start.
 597 * Extent's length (returned in *len) will be between minlen and maxlen,
 598 * and of the form k * prod + mod unless there's nothing that large.
 599 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 600 */
 601STATIC int			/* error */
 602xfs_alloc_ag_vextent(
 603	xfs_alloc_arg_t	*args)	/* argument structure for allocation */
 604{
 605	int		error=0;
 606
 607	ASSERT(args->minlen > 0);
 608	ASSERT(args->maxlen > 0);
 609	ASSERT(args->minlen <= args->maxlen);
 610	ASSERT(args->mod < args->prod);
 611	ASSERT(args->alignment > 0);
 
 612	/*
 613	 * Branch to correct routine based on the type.
 614	 */
 615	args->wasfromfl = 0;
 616	switch (args->type) {
 617	case XFS_ALLOCTYPE_THIS_AG:
 618		error = xfs_alloc_ag_vextent_size(args);
 619		break;
 620	case XFS_ALLOCTYPE_NEAR_BNO:
 621		error = xfs_alloc_ag_vextent_near(args);
 622		break;
 623	case XFS_ALLOCTYPE_THIS_BNO:
 624		error = xfs_alloc_ag_vextent_exact(args);
 625		break;
 626	default:
 627		ASSERT(0);
 628		/* NOTREACHED */
 629	}
 630
 631	if (error || args->agbno == NULLAGBLOCK)
 632		return error;
 633
 634	ASSERT(args->len >= args->minlen);
 635	ASSERT(args->len <= args->maxlen);
 636	ASSERT(!args->wasfromfl || !args->isfl);
 637	ASSERT(args->agbno % args->alignment == 0);
 638
 
 
 
 
 
 
 
 
 639	if (!args->wasfromfl) {
 640		error = xfs_alloc_update_counters(args->tp, args->pag,
 641						  args->agbp,
 642						  -((long)(args->len)));
 643		if (error)
 644			return error;
 645
 646		ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
 647					      args->agbno, args->len));
 648	}
 649
 650	if (!args->isfl) {
 651		xfs_trans_mod_sb(args->tp, args->wasdel ?
 652				 XFS_TRANS_SB_RES_FDBLOCKS :
 653				 XFS_TRANS_SB_FDBLOCKS,
 654				 -((long)(args->len)));
 655	}
 656
 657	XFS_STATS_INC(args->mp, xs_allocx);
 658	XFS_STATS_ADD(args->mp, xs_allocb, args->len);
 659	return error;
 660}
 661
 662/*
 663 * Allocate a variable extent at exactly agno/bno.
 664 * Extent's length (returned in *len) will be between minlen and maxlen,
 665 * and of the form k * prod + mod unless there's nothing that large.
 666 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
 667 */
 668STATIC int			/* error */
 669xfs_alloc_ag_vextent_exact(
 670	xfs_alloc_arg_t	*args)	/* allocation argument structure */
 671{
 672	xfs_btree_cur_t	*bno_cur;/* by block-number btree cursor */
 673	xfs_btree_cur_t	*cnt_cur;/* by count btree cursor */
 674	int		error;
 675	xfs_agblock_t	fbno;	/* start block of found extent */
 676	xfs_extlen_t	flen;	/* length of found extent */
 677	xfs_agblock_t	tbno;	/* start block of trimmed extent */
 678	xfs_extlen_t	tlen;	/* length of trimmed extent */
 679	xfs_agblock_t	tend;	/* end block of trimmed extent */
 680	int		i;	/* success/failure of operation */
 681
 682	ASSERT(args->alignment == 1);
 683
 684	/*
 685	 * Allocate/initialize a cursor for the by-number freespace btree.
 686	 */
 687	bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 688					  args->agno, XFS_BTNUM_BNO);
 689
 690	/*
 691	 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
 692	 * Look for the closest free block <= bno, it must contain bno
 693	 * if any free block does.
 694	 */
 695	error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
 696	if (error)
 697		goto error0;
 698	if (!i)
 699		goto not_found;
 700
 701	/*
 702	 * Grab the freespace record.
 703	 */
 704	error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
 705	if (error)
 706		goto error0;
 707	XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 708	ASSERT(fbno <= args->agbno);
 709
 710	/*
 711	 * Check for overlapping busy extents.
 712	 */
 713	xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
 714
 715	/*
 716	 * Give up if the start of the extent is busy, or the freespace isn't
 717	 * long enough for the minimum request.
 718	 */
 719	if (tbno > args->agbno)
 720		goto not_found;
 721	if (tlen < args->minlen)
 722		goto not_found;
 723	tend = tbno + tlen;
 724	if (tend < args->agbno + args->minlen)
 725		goto not_found;
 726
 727	/*
 728	 * End of extent will be smaller of the freespace end and the
 729	 * maximal requested end.
 730	 *
 731	 * Fix the length according to mod and prod if given.
 732	 */
 733	args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 734						- args->agbno;
 735	xfs_alloc_fix_len(args);
 736	if (!xfs_alloc_fix_minleft(args))
 737		goto not_found;
 738
 739	ASSERT(args->agbno + args->len <= tend);
 740
 741	/*
 742	 * We are allocating agbno for args->len
 743	 * Allocate/initialize a cursor for the by-size btree.
 744	 */
 745	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 746		args->agno, XFS_BTNUM_CNT);
 747	ASSERT(args->agbno + args->len <=
 748		be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 749	error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 750				      args->len, XFSA_FIXUP_BNO_OK);
 751	if (error) {
 752		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 753		goto error0;
 754	}
 755
 756	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 757	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 758
 759	args->wasfromfl = 0;
 760	trace_xfs_alloc_exact_done(args);
 761	return 0;
 762
 763not_found:
 764	/* Didn't find it, return null. */
 765	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 766	args->agbno = NULLAGBLOCK;
 767	trace_xfs_alloc_exact_notfound(args);
 768	return 0;
 769
 770error0:
 771	xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 772	trace_xfs_alloc_exact_error(args);
 773	return error;
 774}
 775
 776/*
 777 * Search the btree in a given direction via the search cursor and compare
 778 * the records found against the good extent we've already found.
 779 */
 780STATIC int
 781xfs_alloc_find_best_extent(
 782	struct xfs_alloc_arg	*args,	/* allocation argument structure */
 783	struct xfs_btree_cur	**gcur,	/* good cursor */
 784	struct xfs_btree_cur	**scur,	/* searching cursor */
 785	xfs_agblock_t		gdiff,	/* difference for search comparison */
 786	xfs_agblock_t		*sbno,	/* extent found by search */
 787	xfs_extlen_t		*slen,	/* extent length */
 788	xfs_agblock_t		*sbnoa,	/* aligned extent found by search */
 789	xfs_extlen_t		*slena,	/* aligned extent length */
 790	int			dir)	/* 0 = search right, 1 = search left */
 791{
 792	xfs_agblock_t		new;
 793	xfs_agblock_t		sdiff;
 794	int			error;
 795	int			i;
 796
 797	/* The good extent is perfect, no need to  search. */
 798	if (!gdiff)
 799		goto out_use_good;
 800
 801	/*
 802	 * Look until we find a better one, run out of space or run off the end.
 803	 */
 804	do {
 805		error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
 806		if (error)
 807			goto error0;
 808		XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 809		xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
 810
 811		/*
 812		 * The good extent is closer than this one.
 813		 */
 814		if (!dir) {
 815			if (*sbnoa > args->max_agbno)
 816				goto out_use_good;
 817			if (*sbnoa >= args->agbno + gdiff)
 818				goto out_use_good;
 819		} else {
 820			if (*sbnoa < args->min_agbno)
 821				goto out_use_good;
 822			if (*sbnoa <= args->agbno - gdiff)
 823				goto out_use_good;
 824		}
 825
 826		/*
 827		 * Same distance, compare length and pick the best.
 828		 */
 829		if (*slena >= args->minlen) {
 830			args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
 831			xfs_alloc_fix_len(args);
 832
 833			sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 834						       args->alignment,
 835						       args->userdata, *sbnoa,
 836						       *slena, &new);
 837
 838			/*
 839			 * Choose closer size and invalidate other cursor.
 840			 */
 841			if (sdiff < gdiff)
 842				goto out_use_search;
 843			goto out_use_good;
 844		}
 845
 846		if (!dir)
 847			error = xfs_btree_increment(*scur, 0, &i);
 848		else
 849			error = xfs_btree_decrement(*scur, 0, &i);
 850		if (error)
 851			goto error0;
 852	} while (i);
 853
 854out_use_good:
 855	xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
 856	*scur = NULL;
 857	return 0;
 858
 859out_use_search:
 860	xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
 861	*gcur = NULL;
 862	return 0;
 863
 864error0:
 865	/* caller invalidates cursors */
 866	return error;
 867}
 868
 869/*
 870 * Allocate a variable extent near bno in the allocation group agno.
 871 * Extent's length (returned in len) will be between minlen and maxlen,
 872 * and of the form k * prod + mod unless there's nothing that large.
 873 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 874 */
 875STATIC int				/* error */
 876xfs_alloc_ag_vextent_near(
 877	xfs_alloc_arg_t	*args)		/* allocation argument structure */
 878{
 879	xfs_btree_cur_t	*bno_cur_gt;	/* cursor for bno btree, right side */
 880	xfs_btree_cur_t	*bno_cur_lt;	/* cursor for bno btree, left side */
 881	xfs_btree_cur_t	*cnt_cur;	/* cursor for count btree */
 882	xfs_agblock_t	gtbno;		/* start bno of right side entry */
 883	xfs_agblock_t	gtbnoa;		/* aligned ... */
 884	xfs_extlen_t	gtdiff;		/* difference to right side entry */
 885	xfs_extlen_t	gtlen;		/* length of right side entry */
 886	xfs_extlen_t	gtlena;		/* aligned ... */
 887	xfs_agblock_t	gtnew;		/* useful start bno of right side */
 888	int		error;		/* error code */
 889	int		i;		/* result code, temporary */
 890	int		j;		/* result code, temporary */
 891	xfs_agblock_t	ltbno;		/* start bno of left side entry */
 892	xfs_agblock_t	ltbnoa;		/* aligned ... */
 893	xfs_extlen_t	ltdiff;		/* difference to left side entry */
 894	xfs_extlen_t	ltlen;		/* length of left side entry */
 895	xfs_extlen_t	ltlena;		/* aligned ... */
 896	xfs_agblock_t	ltnew;		/* useful start bno of left side */
 897	xfs_extlen_t	rlen;		/* length of returned extent */
 898	int		forced = 0;
 899#ifdef DEBUG
 900	/*
 901	 * Randomly don't execute the first algorithm.
 902	 */
 903	int		dofirst;	/* set to do first algorithm */
 904
 905	dofirst = prandom_u32() & 1;
 906#endif
 907
 908	/* handle unitialized agbno range so caller doesn't have to */
 909	if (!args->min_agbno && !args->max_agbno)
 910		args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
 911	ASSERT(args->min_agbno <= args->max_agbno);
 912
 913	/* clamp agbno to the range if it's outside */
 914	if (args->agbno < args->min_agbno)
 915		args->agbno = args->min_agbno;
 916	if (args->agbno > args->max_agbno)
 917		args->agbno = args->max_agbno;
 918
 919restart:
 920	bno_cur_lt = NULL;
 921	bno_cur_gt = NULL;
 922	ltlen = 0;
 923	gtlena = 0;
 924	ltlena = 0;
 925
 926	/*
 927	 * Get a cursor for the by-size btree.
 928	 */
 929	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 930		args->agno, XFS_BTNUM_CNT);
 931
 932	/*
 933	 * See if there are any free extents as big as maxlen.
 934	 */
 935	if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
 936		goto error0;
 937	/*
 938	 * If none, then pick up the last entry in the tree unless the
 939	 * tree is empty.
 940	 */
 941	if (!i) {
 942		if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
 943				&ltlen, &i)))
 944			goto error0;
 945		if (i == 0 || ltlen == 0) {
 946			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 947			trace_xfs_alloc_near_noentry(args);
 948			return 0;
 949		}
 950		ASSERT(i == 1);
 951	}
 952	args->wasfromfl = 0;
 953
 954	/*
 955	 * First algorithm.
 956	 * If the requested extent is large wrt the freespaces available
 957	 * in this a.g., then the cursor will be pointing to a btree entry
 958	 * near the right edge of the tree.  If it's in the last btree leaf
 959	 * block, then we just examine all the entries in that block
 960	 * that are big enough, and pick the best one.
 961	 * This is written as a while loop so we can break out of it,
 962	 * but we never loop back to the top.
 963	 */
 964	while (xfs_btree_islastblock(cnt_cur, 0)) {
 965		xfs_extlen_t	bdiff;
 966		int		besti=0;
 967		xfs_extlen_t	blen=0;
 968		xfs_agblock_t	bnew=0;
 969
 970#ifdef DEBUG
 971		if (dofirst)
 972			break;
 973#endif
 974		/*
 975		 * Start from the entry that lookup found, sequence through
 976		 * all larger free blocks.  If we're actually pointing at a
 977		 * record smaller than maxlen, go to the start of this block,
 978		 * and skip all those smaller than minlen.
 979		 */
 980		if (ltlen || args->alignment > 1) {
 981			cnt_cur->bc_ptrs[0] = 1;
 982			do {
 983				if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
 984						&ltlen, &i)))
 985					goto error0;
 986				XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 987				if (ltlen >= args->minlen)
 988					break;
 989				if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
 990					goto error0;
 991			} while (i);
 992			ASSERT(ltlen >= args->minlen);
 993			if (!i)
 994				break;
 995		}
 996		i = cnt_cur->bc_ptrs[0];
 997		for (j = 1, blen = 0, bdiff = 0;
 998		     !error && j && (blen < args->maxlen || bdiff > 0);
 999		     error = xfs_btree_increment(cnt_cur, 0, &j)) {
1000			/*
1001			 * For each entry, decide if it's better than
1002			 * the previous best entry.
1003			 */
1004			if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1005				goto error0;
1006			XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1007			xfs_alloc_compute_aligned(args, ltbno, ltlen,
1008						  &ltbnoa, &ltlena);
1009			if (ltlena < args->minlen)
1010				continue;
1011			if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1012				continue;
1013			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1014			xfs_alloc_fix_len(args);
1015			ASSERT(args->len >= args->minlen);
1016			if (args->len < blen)
1017				continue;
1018			ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1019				args->alignment, args->userdata, ltbnoa,
1020				ltlena, &ltnew);
1021			if (ltnew != NULLAGBLOCK &&
1022			    (args->len > blen || ltdiff < bdiff)) {
1023				bdiff = ltdiff;
1024				bnew = ltnew;
1025				blen = args->len;
1026				besti = cnt_cur->bc_ptrs[0];
1027			}
1028		}
1029		/*
1030		 * It didn't work.  We COULD be in a case where
1031		 * there's a good record somewhere, so try again.
1032		 */
1033		if (blen == 0)
1034			break;
1035		/*
1036		 * Point at the best entry, and retrieve it again.
1037		 */
1038		cnt_cur->bc_ptrs[0] = besti;
1039		if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1040			goto error0;
1041		XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1042		ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1043		args->len = blen;
1044		if (!xfs_alloc_fix_minleft(args)) {
1045			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1046			trace_xfs_alloc_near_nominleft(args);
1047			return 0;
1048		}
1049		blen = args->len;
1050		/*
1051		 * We are allocating starting at bnew for blen blocks.
1052		 */
1053		args->agbno = bnew;
1054		ASSERT(bnew >= ltbno);
1055		ASSERT(bnew + blen <= ltbno + ltlen);
1056		/*
1057		 * Set up a cursor for the by-bno tree.
1058		 */
1059		bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1060			args->agbp, args->agno, XFS_BTNUM_BNO);
1061		/*
1062		 * Fix up the btree entries.
1063		 */
1064		if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1065				ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1066			goto error0;
1067		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1068		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1069
1070		trace_xfs_alloc_near_first(args);
1071		return 0;
1072	}
1073	/*
1074	 * Second algorithm.
1075	 * Search in the by-bno tree to the left and to the right
1076	 * simultaneously, until in each case we find a space big enough,
1077	 * or run into the edge of the tree.  When we run into the edge,
1078	 * we deallocate that cursor.
1079	 * If both searches succeed, we compare the two spaces and pick
1080	 * the better one.
1081	 * With alignment, it's possible for both to fail; the upper
1082	 * level algorithm that picks allocation groups for allocations
1083	 * is not supposed to do this.
1084	 */
1085	/*
1086	 * Allocate and initialize the cursor for the leftward search.
1087	 */
1088	bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1089		args->agno, XFS_BTNUM_BNO);
1090	/*
1091	 * Lookup <= bno to find the leftward search's starting point.
1092	 */
1093	if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1094		goto error0;
1095	if (!i) {
1096		/*
1097		 * Didn't find anything; use this cursor for the rightward
1098		 * search.
1099		 */
1100		bno_cur_gt = bno_cur_lt;
1101		bno_cur_lt = NULL;
1102	}
1103	/*
1104	 * Found something.  Duplicate the cursor for the rightward search.
1105	 */
1106	else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1107		goto error0;
1108	/*
1109	 * Increment the cursor, so we will point at the entry just right
1110	 * of the leftward entry if any, or to the leftmost entry.
1111	 */
1112	if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1113		goto error0;
1114	if (!i) {
1115		/*
1116		 * It failed, there are no rightward entries.
1117		 */
1118		xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1119		bno_cur_gt = NULL;
1120	}
1121	/*
1122	 * Loop going left with the leftward cursor, right with the
1123	 * rightward cursor, until either both directions give up or
1124	 * we find an entry at least as big as minlen.
1125	 */
1126	do {
1127		if (bno_cur_lt) {
1128			if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1129				goto error0;
1130			XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1131			xfs_alloc_compute_aligned(args, ltbno, ltlen,
1132						  &ltbnoa, &ltlena);
1133			if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1134				break;
1135			if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1136				goto error0;
1137			if (!i || ltbnoa < args->min_agbno) {
1138				xfs_btree_del_cursor(bno_cur_lt,
1139						     XFS_BTREE_NOERROR);
1140				bno_cur_lt = NULL;
1141			}
1142		}
1143		if (bno_cur_gt) {
1144			if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1145				goto error0;
1146			XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1147			xfs_alloc_compute_aligned(args, gtbno, gtlen,
1148						  &gtbnoa, &gtlena);
1149			if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1150				break;
1151			if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1152				goto error0;
1153			if (!i || gtbnoa > args->max_agbno) {
1154				xfs_btree_del_cursor(bno_cur_gt,
1155						     XFS_BTREE_NOERROR);
1156				bno_cur_gt = NULL;
1157			}
1158		}
1159	} while (bno_cur_lt || bno_cur_gt);
1160
1161	/*
1162	 * Got both cursors still active, need to find better entry.
1163	 */
1164	if (bno_cur_lt && bno_cur_gt) {
1165		if (ltlena >= args->minlen) {
1166			/*
1167			 * Left side is good, look for a right side entry.
1168			 */
1169			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1170			xfs_alloc_fix_len(args);
1171			ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1172				args->alignment, args->userdata, ltbnoa,
1173				ltlena, &ltnew);
1174
1175			error = xfs_alloc_find_best_extent(args,
1176						&bno_cur_lt, &bno_cur_gt,
1177						ltdiff, &gtbno, &gtlen,
1178						&gtbnoa, &gtlena,
1179						0 /* search right */);
1180		} else {
1181			ASSERT(gtlena >= args->minlen);
1182
1183			/*
1184			 * Right side is good, look for a left side entry.
1185			 */
1186			args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1187			xfs_alloc_fix_len(args);
1188			gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1189				args->alignment, args->userdata, gtbnoa,
1190				gtlena, &gtnew);
1191
1192			error = xfs_alloc_find_best_extent(args,
1193						&bno_cur_gt, &bno_cur_lt,
1194						gtdiff, &ltbno, &ltlen,
1195						&ltbnoa, &ltlena,
1196						1 /* search left */);
1197		}
1198
1199		if (error)
1200			goto error0;
1201	}
1202
1203	/*
1204	 * If we couldn't get anything, give up.
1205	 */
1206	if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1207		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1208
1209		if (!forced++) {
1210			trace_xfs_alloc_near_busy(args);
1211			xfs_log_force(args->mp, XFS_LOG_SYNC);
1212			goto restart;
1213		}
1214		trace_xfs_alloc_size_neither(args);
1215		args->agbno = NULLAGBLOCK;
1216		return 0;
1217	}
1218
1219	/*
1220	 * At this point we have selected a freespace entry, either to the
1221	 * left or to the right.  If it's on the right, copy all the
1222	 * useful variables to the "left" set so we only have one
1223	 * copy of this code.
1224	 */
1225	if (bno_cur_gt) {
1226		bno_cur_lt = bno_cur_gt;
1227		bno_cur_gt = NULL;
1228		ltbno = gtbno;
1229		ltbnoa = gtbnoa;
1230		ltlen = gtlen;
1231		ltlena = gtlena;
1232		j = 1;
1233	} else
1234		j = 0;
1235
1236	/*
1237	 * Fix up the length and compute the useful address.
1238	 */
1239	args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1240	xfs_alloc_fix_len(args);
1241	if (!xfs_alloc_fix_minleft(args)) {
1242		trace_xfs_alloc_near_nominleft(args);
1243		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1244		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1245		return 0;
1246	}
1247	rlen = args->len;
1248	(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1249				     args->userdata, ltbnoa, ltlena, &ltnew);
1250	ASSERT(ltnew >= ltbno);
1251	ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1252	ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1253	ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1254	args->agbno = ltnew;
1255
1256	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1257			ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1258		goto error0;
1259
1260	if (j)
1261		trace_xfs_alloc_near_greater(args);
1262	else
1263		trace_xfs_alloc_near_lesser(args);
1264
1265	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1266	xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1267	return 0;
1268
1269 error0:
1270	trace_xfs_alloc_near_error(args);
1271	if (cnt_cur != NULL)
1272		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1273	if (bno_cur_lt != NULL)
1274		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1275	if (bno_cur_gt != NULL)
1276		xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1277	return error;
1278}
1279
1280/*
1281 * Allocate a variable extent anywhere in the allocation group agno.
1282 * Extent's length (returned in len) will be between minlen and maxlen,
1283 * and of the form k * prod + mod unless there's nothing that large.
1284 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1285 */
1286STATIC int				/* error */
1287xfs_alloc_ag_vextent_size(
1288	xfs_alloc_arg_t	*args)		/* allocation argument structure */
1289{
1290	xfs_btree_cur_t	*bno_cur;	/* cursor for bno btree */
1291	xfs_btree_cur_t	*cnt_cur;	/* cursor for cnt btree */
1292	int		error;		/* error result */
1293	xfs_agblock_t	fbno;		/* start of found freespace */
1294	xfs_extlen_t	flen;		/* length of found freespace */
1295	int		i;		/* temp status variable */
1296	xfs_agblock_t	rbno;		/* returned block number */
1297	xfs_extlen_t	rlen;		/* length of returned extent */
1298	int		forced = 0;
1299
1300restart:
1301	/*
1302	 * Allocate and initialize a cursor for the by-size btree.
1303	 */
1304	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1305		args->agno, XFS_BTNUM_CNT);
1306	bno_cur = NULL;
1307
1308	/*
1309	 * Look for an entry >= maxlen+alignment-1 blocks.
1310	 */
1311	if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1312			args->maxlen + args->alignment - 1, &i)))
1313		goto error0;
1314
1315	/*
1316	 * If none or we have busy extents that we cannot allocate from, then
1317	 * we have to settle for a smaller extent. In the case that there are
1318	 * no large extents, this will return the last entry in the tree unless
1319	 * the tree is empty. In the case that there are only busy large
1320	 * extents, this will return the largest small extent unless there
1321	 * are no smaller extents available.
1322	 */
1323	if (!i || forced > 1) {
1324		error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1325						   &fbno, &flen, &i);
1326		if (error)
1327			goto error0;
1328		if (i == 0 || flen == 0) {
1329			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1330			trace_xfs_alloc_size_noentry(args);
1331			return 0;
1332		}
1333		ASSERT(i == 1);
1334		xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1335	} else {
1336		/*
1337		 * Search for a non-busy extent that is large enough.
1338		 * If we are at low space, don't check, or if we fall of
1339		 * the end of the btree, turn off the busy check and
1340		 * restart.
1341		 */
1342		for (;;) {
1343			error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1344			if (error)
1345				goto error0;
1346			XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1347
1348			xfs_alloc_compute_aligned(args, fbno, flen,
1349						  &rbno, &rlen);
1350
1351			if (rlen >= args->maxlen)
1352				break;
1353
1354			error = xfs_btree_increment(cnt_cur, 0, &i);
1355			if (error)
1356				goto error0;
1357			if (i == 0) {
1358				/*
1359				 * Our only valid extents must have been busy.
1360				 * Make it unbusy by forcing the log out and
1361				 * retrying. If we've been here before, forcing
1362				 * the log isn't making the extents available,
1363				 * which means they have probably been freed in
1364				 * this transaction.  In that case, we have to
1365				 * give up on them and we'll attempt a minlen
1366				 * allocation the next time around.
1367				 */
1368				xfs_btree_del_cursor(cnt_cur,
1369						     XFS_BTREE_NOERROR);
1370				trace_xfs_alloc_size_busy(args);
1371				if (!forced++)
1372					xfs_log_force(args->mp, XFS_LOG_SYNC);
1373				goto restart;
1374			}
1375		}
1376	}
1377
1378	/*
1379	 * In the first case above, we got the last entry in the
1380	 * by-size btree.  Now we check to see if the space hits maxlen
1381	 * once aligned; if not, we search left for something better.
1382	 * This can't happen in the second case above.
1383	 */
1384	rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1385	XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1386			(rlen <= flen && rbno + rlen <= fbno + flen), error0);
1387	if (rlen < args->maxlen) {
1388		xfs_agblock_t	bestfbno;
1389		xfs_extlen_t	bestflen;
1390		xfs_agblock_t	bestrbno;
1391		xfs_extlen_t	bestrlen;
1392
1393		bestrlen = rlen;
1394		bestrbno = rbno;
1395		bestflen = flen;
1396		bestfbno = fbno;
1397		for (;;) {
1398			if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1399				goto error0;
1400			if (i == 0)
1401				break;
1402			if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1403					&i)))
1404				goto error0;
1405			XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1406			if (flen < bestrlen)
1407				break;
1408			xfs_alloc_compute_aligned(args, fbno, flen,
1409						  &rbno, &rlen);
1410			rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1411			XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1412				(rlen <= flen && rbno + rlen <= fbno + flen),
1413				error0);
1414			if (rlen > bestrlen) {
1415				bestrlen = rlen;
1416				bestrbno = rbno;
1417				bestflen = flen;
1418				bestfbno = fbno;
1419				if (rlen == args->maxlen)
1420					break;
1421			}
1422		}
1423		if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1424				&i)))
1425			goto error0;
1426		XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1427		rlen = bestrlen;
1428		rbno = bestrbno;
1429		flen = bestflen;
1430		fbno = bestfbno;
1431	}
1432	args->wasfromfl = 0;
1433	/*
1434	 * Fix up the length.
1435	 */
1436	args->len = rlen;
1437	if (rlen < args->minlen) {
1438		if (!forced++) {
1439			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1440			trace_xfs_alloc_size_busy(args);
1441			xfs_log_force(args->mp, XFS_LOG_SYNC);
1442			goto restart;
1443		}
1444		goto out_nominleft;
1445	}
1446	xfs_alloc_fix_len(args);
1447
1448	if (!xfs_alloc_fix_minleft(args))
1449		goto out_nominleft;
1450	rlen = args->len;
1451	XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1452	/*
1453	 * Allocate and initialize a cursor for the by-block tree.
1454	 */
1455	bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1456		args->agno, XFS_BTNUM_BNO);
1457	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1458			rbno, rlen, XFSA_FIXUP_CNT_OK)))
1459		goto error0;
1460	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1461	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1462	cnt_cur = bno_cur = NULL;
1463	args->len = rlen;
1464	args->agbno = rbno;
1465	XFS_WANT_CORRUPTED_GOTO(args->mp,
1466		args->agbno + args->len <=
1467			be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1468		error0);
1469	trace_xfs_alloc_size_done(args);
1470	return 0;
1471
1472error0:
1473	trace_xfs_alloc_size_error(args);
1474	if (cnt_cur)
1475		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1476	if (bno_cur)
1477		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1478	return error;
1479
1480out_nominleft:
1481	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1482	trace_xfs_alloc_size_nominleft(args);
1483	args->agbno = NULLAGBLOCK;
1484	return 0;
1485}
1486
1487/*
1488 * Deal with the case where only small freespaces remain.
1489 * Either return the contents of the last freespace record,
1490 * or allocate space from the freelist if there is nothing in the tree.
1491 */
1492STATIC int			/* error */
1493xfs_alloc_ag_vextent_small(
1494	xfs_alloc_arg_t	*args,	/* allocation argument structure */
1495	xfs_btree_cur_t	*ccur,	/* by-size cursor */
1496	xfs_agblock_t	*fbnop,	/* result block number */
1497	xfs_extlen_t	*flenp,	/* result length */
1498	int		*stat)	/* status: 0-freelist, 1-normal/none */
1499{
 
 
1500	int		error;
1501	xfs_agblock_t	fbno;
1502	xfs_extlen_t	flen;
1503	int		i;
1504
1505	if ((error = xfs_btree_decrement(ccur, 0, &i)))
1506		goto error0;
1507	if (i) {
1508		if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1509			goto error0;
1510		XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1511	}
1512	/*
1513	 * Nothing in the btree, try the freelist.  Make sure
1514	 * to respect minleft even when pulling from the
1515	 * freelist.
1516	 */
1517	else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
 
1518		 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1519		  > args->minleft)) {
1520		error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1521		if (error)
1522			goto error0;
1523		if (fbno != NULLAGBLOCK) {
1524			xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1525					     args->userdata);
1526
1527			if (args->userdata) {
1528				xfs_buf_t	*bp;
1529
1530				bp = xfs_btree_get_bufs(args->mp, args->tp,
1531					args->agno, fbno, 0);
1532				xfs_trans_binval(args->tp, bp);
1533			}
1534			args->len = 1;
1535			args->agbno = fbno;
1536			XFS_WANT_CORRUPTED_GOTO(args->mp,
1537				args->agbno + args->len <=
1538				be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1539				error0);
1540			args->wasfromfl = 1;
1541			trace_xfs_alloc_small_freelist(args);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1542			*stat = 0;
1543			return 0;
1544		}
1545		/*
1546		 * Nothing in the freelist.
1547		 */
1548		else
1549			flen = 0;
1550	}
1551	/*
1552	 * Can't allocate from the freelist for some reason.
1553	 */
1554	else {
1555		fbno = NULLAGBLOCK;
1556		flen = 0;
1557	}
1558	/*
1559	 * Can't do the allocation, give up.
1560	 */
1561	if (flen < args->minlen) {
1562		args->agbno = NULLAGBLOCK;
1563		trace_xfs_alloc_small_notenough(args);
1564		flen = 0;
1565	}
1566	*fbnop = fbno;
1567	*flenp = flen;
1568	*stat = 1;
1569	trace_xfs_alloc_small_done(args);
1570	return 0;
1571
1572error0:
1573	trace_xfs_alloc_small_error(args);
1574	return error;
1575}
1576
1577/*
1578 * Free the extent starting at agno/bno for length.
1579 */
1580STATIC int			/* error */
1581xfs_free_ag_extent(
1582	xfs_trans_t	*tp,	/* transaction pointer */
1583	xfs_buf_t	*agbp,	/* buffer for a.g. freelist header */
1584	xfs_agnumber_t	agno,	/* allocation group number */
1585	xfs_agblock_t	bno,	/* starting block number */
1586	xfs_extlen_t	len,	/* length of extent */
1587	int		isfl)	/* set if is freelist blocks - no sb acctg */
 
1588{
1589	xfs_btree_cur_t	*bno_cur;	/* cursor for by-block btree */
1590	xfs_btree_cur_t	*cnt_cur;	/* cursor for by-size btree */
1591	int		error;		/* error return value */
1592	xfs_agblock_t	gtbno;		/* start of right neighbor block */
1593	xfs_extlen_t	gtlen;		/* length of right neighbor block */
1594	int		haveleft;	/* have a left neighbor block */
1595	int		haveright;	/* have a right neighbor block */
1596	int		i;		/* temp, result code */
1597	xfs_agblock_t	ltbno;		/* start of left neighbor block */
1598	xfs_extlen_t	ltlen;		/* length of left neighbor block */
1599	xfs_mount_t	*mp;		/* mount point struct for filesystem */
1600	xfs_agblock_t	nbno;		/* new starting block of freespace */
1601	xfs_extlen_t	nlen;		/* new length of freespace */
1602	xfs_perag_t	*pag;		/* per allocation group data */
1603
 
1604	mp = tp->t_mountp;
 
 
 
 
 
 
 
1605	/*
1606	 * Allocate and initialize a cursor for the by-block btree.
1607	 */
1608	bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1609	cnt_cur = NULL;
1610	/*
1611	 * Look for a neighboring block on the left (lower block numbers)
1612	 * that is contiguous with this space.
1613	 */
1614	if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1615		goto error0;
1616	if (haveleft) {
1617		/*
1618		 * There is a block to our left.
1619		 */
1620		if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1621			goto error0;
1622		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1623		/*
1624		 * It's not contiguous, though.
1625		 */
1626		if (ltbno + ltlen < bno)
1627			haveleft = 0;
1628		else {
1629			/*
1630			 * If this failure happens the request to free this
1631			 * space was invalid, it's (partly) already free.
1632			 * Very bad.
1633			 */
1634			XFS_WANT_CORRUPTED_GOTO(mp,
1635						ltbno + ltlen <= bno, error0);
1636		}
1637	}
1638	/*
1639	 * Look for a neighboring block on the right (higher block numbers)
1640	 * that is contiguous with this space.
1641	 */
1642	if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1643		goto error0;
1644	if (haveright) {
1645		/*
1646		 * There is a block to our right.
1647		 */
1648		if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1649			goto error0;
1650		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1651		/*
1652		 * It's not contiguous, though.
1653		 */
1654		if (bno + len < gtbno)
1655			haveright = 0;
1656		else {
1657			/*
1658			 * If this failure happens the request to free this
1659			 * space was invalid, it's (partly) already free.
1660			 * Very bad.
1661			 */
1662			XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1663		}
1664	}
1665	/*
1666	 * Now allocate and initialize a cursor for the by-size tree.
1667	 */
1668	cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1669	/*
1670	 * Have both left and right contiguous neighbors.
1671	 * Merge all three into a single free block.
1672	 */
1673	if (haveleft && haveright) {
1674		/*
1675		 * Delete the old by-size entry on the left.
1676		 */
1677		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1678			goto error0;
1679		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1680		if ((error = xfs_btree_delete(cnt_cur, &i)))
1681			goto error0;
1682		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1683		/*
1684		 * Delete the old by-size entry on the right.
1685		 */
1686		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1687			goto error0;
1688		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1689		if ((error = xfs_btree_delete(cnt_cur, &i)))
1690			goto error0;
1691		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1692		/*
1693		 * Delete the old by-block entry for the right block.
1694		 */
1695		if ((error = xfs_btree_delete(bno_cur, &i)))
1696			goto error0;
1697		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1698		/*
1699		 * Move the by-block cursor back to the left neighbor.
1700		 */
1701		if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1702			goto error0;
1703		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1704#ifdef DEBUG
1705		/*
1706		 * Check that this is the right record: delete didn't
1707		 * mangle the cursor.
1708		 */
1709		{
1710			xfs_agblock_t	xxbno;
1711			xfs_extlen_t	xxlen;
1712
1713			if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1714					&i)))
1715				goto error0;
1716			XFS_WANT_CORRUPTED_GOTO(mp,
1717				i == 1 && xxbno == ltbno && xxlen == ltlen,
1718				error0);
1719		}
1720#endif
1721		/*
1722		 * Update remaining by-block entry to the new, joined block.
1723		 */
1724		nbno = ltbno;
1725		nlen = len + ltlen + gtlen;
1726		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1727			goto error0;
1728	}
1729	/*
1730	 * Have only a left contiguous neighbor.
1731	 * Merge it together with the new freespace.
1732	 */
1733	else if (haveleft) {
1734		/*
1735		 * Delete the old by-size entry on the left.
1736		 */
1737		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1738			goto error0;
1739		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1740		if ((error = xfs_btree_delete(cnt_cur, &i)))
1741			goto error0;
1742		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1743		/*
1744		 * Back up the by-block cursor to the left neighbor, and
1745		 * update its length.
1746		 */
1747		if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1748			goto error0;
1749		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1750		nbno = ltbno;
1751		nlen = len + ltlen;
1752		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1753			goto error0;
1754	}
1755	/*
1756	 * Have only a right contiguous neighbor.
1757	 * Merge it together with the new freespace.
1758	 */
1759	else if (haveright) {
1760		/*
1761		 * Delete the old by-size entry on the right.
1762		 */
1763		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1764			goto error0;
1765		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1766		if ((error = xfs_btree_delete(cnt_cur, &i)))
1767			goto error0;
1768		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1769		/*
1770		 * Update the starting block and length of the right
1771		 * neighbor in the by-block tree.
1772		 */
1773		nbno = bno;
1774		nlen = len + gtlen;
1775		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1776			goto error0;
1777	}
1778	/*
1779	 * No contiguous neighbors.
1780	 * Insert the new freespace into the by-block tree.
1781	 */
1782	else {
1783		nbno = bno;
1784		nlen = len;
1785		if ((error = xfs_btree_insert(bno_cur, &i)))
1786			goto error0;
1787		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1788	}
1789	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1790	bno_cur = NULL;
1791	/*
1792	 * In all cases we need to insert the new freespace in the by-size tree.
1793	 */
1794	if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1795		goto error0;
1796	XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
1797	if ((error = xfs_btree_insert(cnt_cur, &i)))
1798		goto error0;
1799	XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1800	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1801	cnt_cur = NULL;
1802
1803	/*
1804	 * Update the freespace totals in the ag and superblock.
1805	 */
1806	pag = xfs_perag_get(mp, agno);
1807	error = xfs_alloc_update_counters(tp, pag, agbp, len);
 
1808	xfs_perag_put(pag);
1809	if (error)
1810		goto error0;
1811
1812	if (!isfl)
1813		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1814	XFS_STATS_INC(mp, xs_freex);
1815	XFS_STATS_ADD(mp, xs_freeb, len);
1816
1817	trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
 
1818
1819	return 0;
1820
1821 error0:
1822	trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
 
1823	if (bno_cur)
1824		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1825	if (cnt_cur)
1826		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1827	return error;
1828}
1829
1830/*
1831 * Visible (exported) allocation/free functions.
1832 * Some of these are used just by xfs_alloc_btree.c and this file.
1833 */
1834
1835/*
1836 * Compute and fill in value of m_ag_maxlevels.
1837 */
1838void
1839xfs_alloc_compute_maxlevels(
1840	xfs_mount_t	*mp)	/* file system mount structure */
1841{
1842	int		level;
1843	uint		maxblocks;
1844	uint		maxleafents;
1845	int		minleafrecs;
1846	int		minnoderecs;
1847
1848	maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1849	minleafrecs = mp->m_alloc_mnr[0];
1850	minnoderecs = mp->m_alloc_mnr[1];
1851	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1852	for (level = 1; maxblocks > 1; level++)
1853		maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1854	mp->m_ag_maxlevels = level;
1855}
1856
1857/*
1858 * Find the length of the longest extent in an AG.
 
 
 
1859 */
1860xfs_extlen_t
1861xfs_alloc_longest_free_extent(
1862	struct xfs_mount	*mp,
1863	struct xfs_perag	*pag,
1864	xfs_extlen_t		need)
 
1865{
1866	xfs_extlen_t		delta = 0;
1867
 
 
 
 
1868	if (need > pag->pagf_flcount)
1869		delta = need - pag->pagf_flcount;
1870
 
 
 
 
 
 
 
 
 
 
 
 
1871	if (pag->pagf_longest > delta)
1872		return pag->pagf_longest - delta;
 
 
1873	return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1874}
1875
1876unsigned int
1877xfs_alloc_min_freelist(
1878	struct xfs_mount	*mp,
1879	struct xfs_perag	*pag)
1880{
1881	unsigned int		min_free;
1882
1883	/* space needed by-bno freespace btree */
1884	min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
1885				       mp->m_ag_maxlevels);
1886	/* space needed by-size freespace btree */
1887	min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
1888				       mp->m_ag_maxlevels);
 
 
 
 
 
1889
1890	return min_free;
1891}
1892
1893/*
1894 * Check if the operation we are fixing up the freelist for should go ahead or
1895 * not. If we are freeing blocks, we always allow it, otherwise the allocation
1896 * is dependent on whether the size and shape of free space available will
1897 * permit the requested allocation to take place.
1898 */
1899static bool
1900xfs_alloc_space_available(
1901	struct xfs_alloc_arg	*args,
1902	xfs_extlen_t		min_free,
1903	int			flags)
1904{
1905	struct xfs_perag	*pag = args->pag;
1906	xfs_extlen_t		longest;
 
1907	int			available;
1908
1909	if (flags & XFS_ALLOC_FLAG_FREEING)
1910		return true;
1911
 
 
1912	/* do we have enough contiguous free space for the allocation? */
1913	longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free);
1914	if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
 
 
1915		return false;
1916
1917	/* do have enough free space remaining for the allocation? */
1918	available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
1919			  min_free - args->total);
1920	if (available < (int)args->minleft)
1921		return false;
1922
 
 
 
 
 
 
 
 
 
 
1923	return true;
1924}
1925
1926/*
1927 * Decide whether to use this allocation group for this allocation.
1928 * If so, fix up the btree freelist's size.
1929 */
1930int			/* error */
1931xfs_alloc_fix_freelist(
1932	struct xfs_alloc_arg	*args,	/* allocation argument structure */
1933	int			flags)	/* XFS_ALLOC_FLAG_... */
1934{
1935	struct xfs_mount	*mp = args->mp;
1936	struct xfs_perag	*pag = args->pag;
1937	struct xfs_trans	*tp = args->tp;
1938	struct xfs_buf		*agbp = NULL;
1939	struct xfs_buf		*agflbp = NULL;
1940	struct xfs_alloc_arg	targs;	/* local allocation arguments */
1941	xfs_agblock_t		bno;	/* freelist block */
1942	xfs_extlen_t		need;	/* total blocks needed in freelist */
1943	int			error = 0;
1944
1945	if (!pag->pagf_init) {
1946		error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
1947		if (error)
1948			goto out_no_agbp;
1949		if (!pag->pagf_init) {
1950			ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1951			ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1952			goto out_agbp_relse;
1953		}
1954	}
1955
1956	/*
1957	 * If this is a metadata preferred pag and we are user data then try
1958	 * somewhere else if we are not being asked to try harder at this
1959	 * point
1960	 */
1961	if (pag->pagf_metadata && args->userdata &&
1962	    (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1963		ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1964		goto out_agbp_relse;
1965	}
1966
1967	need = xfs_alloc_min_freelist(mp, pag);
1968	if (!xfs_alloc_space_available(args, need, flags))
 
1969		goto out_agbp_relse;
1970
1971	/*
1972	 * Get the a.g. freespace buffer.
1973	 * Can fail if we're not blocking on locks, and it's held.
1974	 */
1975	if (!agbp) {
1976		error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
1977		if (error)
1978			goto out_no_agbp;
1979		if (!agbp) {
1980			ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1981			ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1982			goto out_no_agbp;
1983		}
1984	}
1985
1986	/* If there isn't enough total space or single-extent, reject it. */
1987	need = xfs_alloc_min_freelist(mp, pag);
1988	if (!xfs_alloc_space_available(args, need, flags))
1989		goto out_agbp_relse;
1990
1991	/*
1992	 * Make the freelist shorter if it's too long.
1993	 *
1994	 * Note that from this point onwards, we will always release the agf and
1995	 * agfl buffers on error. This handles the case where we error out and
1996	 * the buffers are clean or may not have been joined to the transaction
1997	 * and hence need to be released manually. If they have been joined to
1998	 * the transaction, then xfs_trans_brelse() will handle them
1999	 * appropriately based on the recursion count and dirty state of the
2000	 * buffer.
2001	 *
2002	 * XXX (dgc): When we have lots of free space, does this buy us
2003	 * anything other than extra overhead when we need to put more blocks
2004	 * back on the free list? Maybe we should only do this when space is
2005	 * getting low or the AGFL is more than half full?
 
 
 
 
 
 
 
 
2006	 */
2007	while (pag->pagf_flcount > need) {
 
 
 
 
 
2008		struct xfs_buf	*bp;
2009
2010		error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2011		if (error)
2012			goto out_agbp_relse;
2013		error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1);
 
2014		if (error)
2015			goto out_agbp_relse;
2016		bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
2017		xfs_trans_binval(tp, bp);
2018	}
2019
2020	memset(&targs, 0, sizeof(targs));
2021	targs.tp = tp;
2022	targs.mp = mp;
2023	targs.agbp = agbp;
2024	targs.agno = args->agno;
2025	targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
2026	targs.type = XFS_ALLOCTYPE_THIS_AG;
2027	targs.pag = pag;
2028	error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2029	if (error)
2030		goto out_agbp_relse;
2031
2032	/* Make the freelist longer if it's too short. */
2033	while (pag->pagf_flcount < need) {
2034		targs.agbno = 0;
2035		targs.maxlen = need - pag->pagf_flcount;
 
2036
2037		/* Allocate as many blocks as possible at once. */
2038		error = xfs_alloc_ag_vextent(&targs);
2039		if (error)
2040			goto out_agflbp_relse;
2041
2042		/*
2043		 * Stop if we run out.  Won't happen if callers are obeying
2044		 * the restrictions correctly.  Can happen for free calls
2045		 * on a completely full ag.
2046		 */
2047		if (targs.agbno == NULLAGBLOCK) {
2048			if (flags & XFS_ALLOC_FLAG_FREEING)
2049				break;
2050			goto out_agflbp_relse;
2051		}
2052		/*
2053		 * Put each allocated block on the list.
2054		 */
2055		for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2056			error = xfs_alloc_put_freelist(tp, agbp,
2057							agflbp, bno, 0);
2058			if (error)
2059				goto out_agflbp_relse;
2060		}
2061	}
2062	xfs_trans_brelse(tp, agflbp);
2063	args->agbp = agbp;
2064	return 0;
2065
2066out_agflbp_relse:
2067	xfs_trans_brelse(tp, agflbp);
2068out_agbp_relse:
2069	if (agbp)
2070		xfs_trans_brelse(tp, agbp);
2071out_no_agbp:
2072	args->agbp = NULL;
2073	return error;
2074}
2075
2076/*
2077 * Get a block from the freelist.
2078 * Returns with the buffer for the block gotten.
2079 */
2080int				/* error */
2081xfs_alloc_get_freelist(
2082	xfs_trans_t	*tp,	/* transaction pointer */
2083	xfs_buf_t	*agbp,	/* buffer containing the agf structure */
2084	xfs_agblock_t	*bnop,	/* block address retrieved from freelist */
2085	int		btreeblk) /* destination is a AGF btree */
2086{
2087	xfs_agf_t	*agf;	/* a.g. freespace structure */
2088	xfs_buf_t	*agflbp;/* buffer for a.g. freelist structure */
2089	xfs_agblock_t	bno;	/* block number returned */
2090	__be32		*agfl_bno;
2091	int		error;
2092	int		logflags;
2093	xfs_mount_t	*mp = tp->t_mountp;
2094	xfs_perag_t	*pag;	/* per allocation group data */
2095
2096	/*
2097	 * Freelist is empty, give up.
2098	 */
2099	agf = XFS_BUF_TO_AGF(agbp);
2100	if (!agf->agf_flcount) {
2101		*bnop = NULLAGBLOCK;
2102		return 0;
2103	}
2104	/*
2105	 * Read the array of free blocks.
2106	 */
2107	error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2108				    &agflbp);
2109	if (error)
2110		return error;
2111
2112
2113	/*
2114	 * Get the block number and update the data structures.
2115	 */
2116	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2117	bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2118	be32_add_cpu(&agf->agf_flfirst, 1);
2119	xfs_trans_brelse(tp, agflbp);
2120	if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2121		agf->agf_flfirst = 0;
2122
2123	pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2124	be32_add_cpu(&agf->agf_flcount, -1);
2125	xfs_trans_agflist_delta(tp, -1);
2126	pag->pagf_flcount--;
2127	xfs_perag_put(pag);
2128
2129	logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2130	if (btreeblk) {
2131		be32_add_cpu(&agf->agf_btreeblks, 1);
2132		pag->pagf_btreeblks++;
2133		logflags |= XFS_AGF_BTREEBLKS;
2134	}
2135
2136	xfs_alloc_log_agf(tp, agbp, logflags);
2137	*bnop = bno;
2138
2139	return 0;
2140}
2141
2142/*
2143 * Log the given fields from the agf structure.
2144 */
2145void
2146xfs_alloc_log_agf(
2147	xfs_trans_t	*tp,	/* transaction pointer */
2148	xfs_buf_t	*bp,	/* buffer for a.g. freelist header */
2149	int		fields)	/* mask of fields to be logged (XFS_AGF_...) */
2150{
2151	int	first;		/* first byte offset */
2152	int	last;		/* last byte offset */
2153	static const short	offsets[] = {
2154		offsetof(xfs_agf_t, agf_magicnum),
2155		offsetof(xfs_agf_t, agf_versionnum),
2156		offsetof(xfs_agf_t, agf_seqno),
2157		offsetof(xfs_agf_t, agf_length),
2158		offsetof(xfs_agf_t, agf_roots[0]),
2159		offsetof(xfs_agf_t, agf_levels[0]),
2160		offsetof(xfs_agf_t, agf_flfirst),
2161		offsetof(xfs_agf_t, agf_fllast),
2162		offsetof(xfs_agf_t, agf_flcount),
2163		offsetof(xfs_agf_t, agf_freeblks),
2164		offsetof(xfs_agf_t, agf_longest),
2165		offsetof(xfs_agf_t, agf_btreeblks),
2166		offsetof(xfs_agf_t, agf_uuid),
 
 
 
 
 
 
2167		sizeof(xfs_agf_t)
2168	};
2169
2170	trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2171
2172	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2173
2174	xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2175	xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2176}
2177
2178/*
2179 * Interface for inode allocation to force the pag data to be initialized.
2180 */
2181int					/* error */
2182xfs_alloc_pagf_init(
2183	xfs_mount_t		*mp,	/* file system mount structure */
2184	xfs_trans_t		*tp,	/* transaction pointer */
2185	xfs_agnumber_t		agno,	/* allocation group number */
2186	int			flags)	/* XFS_ALLOC_FLAGS_... */
2187{
2188	xfs_buf_t		*bp;
2189	int			error;
2190
2191	if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2192		return error;
2193	if (bp)
2194		xfs_trans_brelse(tp, bp);
2195	return 0;
2196}
2197
2198/*
2199 * Put the block on the freelist for the allocation group.
2200 */
2201int					/* error */
2202xfs_alloc_put_freelist(
2203	xfs_trans_t		*tp,	/* transaction pointer */
2204	xfs_buf_t		*agbp,	/* buffer for a.g. freelist header */
2205	xfs_buf_t		*agflbp,/* buffer for a.g. free block array */
2206	xfs_agblock_t		bno,	/* block being freed */
2207	int			btreeblk) /* block came from a AGF btree */
2208{
2209	xfs_agf_t		*agf;	/* a.g. freespace structure */
2210	__be32			*blockp;/* pointer to array entry */
2211	int			error;
2212	int			logflags;
2213	xfs_mount_t		*mp;	/* mount structure */
2214	xfs_perag_t		*pag;	/* per allocation group data */
2215	__be32			*agfl_bno;
2216	int			startoff;
2217
2218	agf = XFS_BUF_TO_AGF(agbp);
2219	mp = tp->t_mountp;
2220
2221	if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2222			be32_to_cpu(agf->agf_seqno), &agflbp)))
2223		return error;
2224	be32_add_cpu(&agf->agf_fllast, 1);
2225	if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2226		agf->agf_fllast = 0;
2227
2228	pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2229	be32_add_cpu(&agf->agf_flcount, 1);
2230	xfs_trans_agflist_delta(tp, 1);
2231	pag->pagf_flcount++;
2232
2233	logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2234	if (btreeblk) {
2235		be32_add_cpu(&agf->agf_btreeblks, -1);
2236		pag->pagf_btreeblks--;
2237		logflags |= XFS_AGF_BTREEBLKS;
2238	}
2239	xfs_perag_put(pag);
2240
2241	xfs_alloc_log_agf(tp, agbp, logflags);
2242
2243	ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2244
2245	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2246	blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2247	*blockp = cpu_to_be32(bno);
2248	startoff = (char *)blockp - (char *)agflbp->b_addr;
2249
2250	xfs_alloc_log_agf(tp, agbp, logflags);
2251
2252	xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2253	xfs_trans_log_buf(tp, agflbp, startoff,
2254			  startoff + sizeof(xfs_agblock_t) - 1);
2255	return 0;
2256}
2257
2258static bool
2259xfs_agf_verify(
2260	struct xfs_mount *mp,
2261	struct xfs_buf	*bp)
2262 {
2263	struct xfs_agf	*agf = XFS_BUF_TO_AGF(bp);
2264
2265	if (xfs_sb_version_hascrc(&mp->m_sb)) {
2266		if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
2267			return false;
2268		if (!xfs_log_check_lsn(mp,
2269				be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
2270			return false;
2271	}
2272
2273	if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2274	      XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2275	      be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2276	      be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2277	      be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2278	      be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2279		return false;
2280
2281	if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
 
 
2282	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2283		return false;
2284
 
 
 
 
 
2285	/*
2286	 * during growfs operations, the perag is not fully initialised,
2287	 * so we can't use it for any useful checking. growfs ensures we can't
2288	 * use it by using uncached buffers that don't have the perag attached
2289	 * so we can detect and avoid this problem.
2290	 */
2291	if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2292		return false;
2293
2294	if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2295	    be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2296		return false;
2297
 
 
 
 
 
2298	return true;;
2299
2300}
2301
2302static void
2303xfs_agf_read_verify(
2304	struct xfs_buf	*bp)
2305{
2306	struct xfs_mount *mp = bp->b_target->bt_mount;
2307
2308	if (xfs_sb_version_hascrc(&mp->m_sb) &&
2309	    !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2310		xfs_buf_ioerror(bp, -EFSBADCRC);
2311	else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2312				XFS_ERRTAG_ALLOC_READ_AGF,
2313				XFS_RANDOM_ALLOC_READ_AGF))
2314		xfs_buf_ioerror(bp, -EFSCORRUPTED);
2315
2316	if (bp->b_error)
2317		xfs_verifier_error(bp);
2318}
2319
2320static void
2321xfs_agf_write_verify(
2322	struct xfs_buf	*bp)
2323{
2324	struct xfs_mount *mp = bp->b_target->bt_mount;
2325	struct xfs_buf_log_item	*bip = bp->b_fspriv;
2326
2327	if (!xfs_agf_verify(mp, bp)) {
2328		xfs_buf_ioerror(bp, -EFSCORRUPTED);
2329		xfs_verifier_error(bp);
2330		return;
2331	}
2332
2333	if (!xfs_sb_version_hascrc(&mp->m_sb))
2334		return;
2335
2336	if (bip)
2337		XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2338
2339	xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2340}
2341
2342const struct xfs_buf_ops xfs_agf_buf_ops = {
2343	.name = "xfs_agf",
2344	.verify_read = xfs_agf_read_verify,
2345	.verify_write = xfs_agf_write_verify,
2346};
2347
2348/*
2349 * Read in the allocation group header (free/alloc section).
2350 */
2351int					/* error */
2352xfs_read_agf(
2353	struct xfs_mount	*mp,	/* mount point structure */
2354	struct xfs_trans	*tp,	/* transaction pointer */
2355	xfs_agnumber_t		agno,	/* allocation group number */
2356	int			flags,	/* XFS_BUF_ */
2357	struct xfs_buf		**bpp)	/* buffer for the ag freelist header */
2358{
2359	int		error;
2360
2361	trace_xfs_read_agf(mp, agno);
2362
2363	ASSERT(agno != NULLAGNUMBER);
2364	error = xfs_trans_read_buf(
2365			mp, tp, mp->m_ddev_targp,
2366			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2367			XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2368	if (error)
2369		return error;
2370	if (!*bpp)
2371		return 0;
2372
2373	ASSERT(!(*bpp)->b_error);
2374	xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2375	return 0;
2376}
2377
2378/*
2379 * Read in the allocation group header (free/alloc section).
2380 */
2381int					/* error */
2382xfs_alloc_read_agf(
2383	struct xfs_mount	*mp,	/* mount point structure */
2384	struct xfs_trans	*tp,	/* transaction pointer */
2385	xfs_agnumber_t		agno,	/* allocation group number */
2386	int			flags,	/* XFS_ALLOC_FLAG_... */
2387	struct xfs_buf		**bpp)	/* buffer for the ag freelist header */
2388{
2389	struct xfs_agf		*agf;		/* ag freelist header */
2390	struct xfs_perag	*pag;		/* per allocation group data */
2391	int			error;
2392
2393	trace_xfs_alloc_read_agf(mp, agno);
2394
2395	ASSERT(agno != NULLAGNUMBER);
2396	error = xfs_read_agf(mp, tp, agno,
2397			(flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2398			bpp);
2399	if (error)
2400		return error;
2401	if (!*bpp)
2402		return 0;
2403	ASSERT(!(*bpp)->b_error);
2404
2405	agf = XFS_BUF_TO_AGF(*bpp);
2406	pag = xfs_perag_get(mp, agno);
2407	if (!pag->pagf_init) {
2408		pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2409		pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2410		pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2411		pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2412		pag->pagf_levels[XFS_BTNUM_BNOi] =
2413			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2414		pag->pagf_levels[XFS_BTNUM_CNTi] =
2415			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
 
 
 
2416		spin_lock_init(&pag->pagb_lock);
2417		pag->pagb_count = 0;
2418		pag->pagb_tree = RB_ROOT;
2419		pag->pagf_init = 1;
2420	}
2421#ifdef DEBUG
2422	else if (!XFS_FORCED_SHUTDOWN(mp)) {
2423		ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2424		ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2425		ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2426		ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2427		ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2428		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2429		ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2430		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2431	}
2432#endif
2433	xfs_perag_put(pag);
2434	return 0;
2435}
2436
2437/*
2438 * Allocate an extent (variable-size).
2439 * Depending on the allocation type, we either look in a single allocation
2440 * group or loop over the allocation groups to find the result.
2441 */
2442int				/* error */
2443xfs_alloc_vextent(
2444	xfs_alloc_arg_t	*args)	/* allocation argument structure */
2445{
2446	xfs_agblock_t	agsize;	/* allocation group size */
2447	int		error;
2448	int		flags;	/* XFS_ALLOC_FLAG_... locking flags */
2449	xfs_extlen_t	minleft;/* minimum left value, temp copy */
2450	xfs_mount_t	*mp;	/* mount structure pointer */
2451	xfs_agnumber_t	sagno;	/* starting allocation group number */
2452	xfs_alloctype_t	type;	/* input allocation type */
2453	int		bump_rotor = 0;
2454	int		no_min = 0;
2455	xfs_agnumber_t	rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2456
2457	mp = args->mp;
2458	type = args->otype = args->type;
2459	args->agbno = NULLAGBLOCK;
2460	/*
2461	 * Just fix this up, for the case where the last a.g. is shorter
2462	 * (or there's only one a.g.) and the caller couldn't easily figure
2463	 * that out (xfs_bmap_alloc).
2464	 */
2465	agsize = mp->m_sb.sb_agblocks;
2466	if (args->maxlen > agsize)
2467		args->maxlen = agsize;
2468	if (args->alignment == 0)
2469		args->alignment = 1;
2470	ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2471	ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2472	ASSERT(args->minlen <= args->maxlen);
2473	ASSERT(args->minlen <= agsize);
2474	ASSERT(args->mod < args->prod);
2475	if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2476	    XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2477	    args->minlen > args->maxlen || args->minlen > agsize ||
2478	    args->mod >= args->prod) {
2479		args->fsbno = NULLFSBLOCK;
2480		trace_xfs_alloc_vextent_badargs(args);
2481		return 0;
2482	}
2483	minleft = args->minleft;
2484
2485	switch (type) {
2486	case XFS_ALLOCTYPE_THIS_AG:
2487	case XFS_ALLOCTYPE_NEAR_BNO:
2488	case XFS_ALLOCTYPE_THIS_BNO:
2489		/*
2490		 * These three force us into a single a.g.
2491		 */
2492		args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2493		args->pag = xfs_perag_get(mp, args->agno);
2494		args->minleft = 0;
2495		error = xfs_alloc_fix_freelist(args, 0);
2496		args->minleft = minleft;
2497		if (error) {
2498			trace_xfs_alloc_vextent_nofix(args);
2499			goto error0;
2500		}
2501		if (!args->agbp) {
2502			trace_xfs_alloc_vextent_noagbp(args);
2503			break;
2504		}
2505		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2506		if ((error = xfs_alloc_ag_vextent(args)))
2507			goto error0;
2508		break;
2509	case XFS_ALLOCTYPE_START_BNO:
2510		/*
2511		 * Try near allocation first, then anywhere-in-ag after
2512		 * the first a.g. fails.
2513		 */
2514		if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) &&
2515		    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2516			args->fsbno = XFS_AGB_TO_FSB(mp,
2517					((mp->m_agfrotor / rotorstep) %
2518					mp->m_sb.sb_agcount), 0);
2519			bump_rotor = 1;
2520		}
2521		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2522		args->type = XFS_ALLOCTYPE_NEAR_BNO;
2523		/* FALLTHROUGH */
2524	case XFS_ALLOCTYPE_ANY_AG:
2525	case XFS_ALLOCTYPE_START_AG:
2526	case XFS_ALLOCTYPE_FIRST_AG:
2527		/*
2528		 * Rotate through the allocation groups looking for a winner.
2529		 */
2530		if (type == XFS_ALLOCTYPE_ANY_AG) {
2531			/*
2532			 * Start with the last place we left off.
2533			 */
2534			args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2535					mp->m_sb.sb_agcount;
2536			args->type = XFS_ALLOCTYPE_THIS_AG;
2537			flags = XFS_ALLOC_FLAG_TRYLOCK;
2538		} else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2539			/*
2540			 * Start with allocation group given by bno.
2541			 */
2542			args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2543			args->type = XFS_ALLOCTYPE_THIS_AG;
2544			sagno = 0;
2545			flags = 0;
2546		} else {
2547			if (type == XFS_ALLOCTYPE_START_AG)
2548				args->type = XFS_ALLOCTYPE_THIS_AG;
2549			/*
2550			 * Start with the given allocation group.
2551			 */
2552			args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2553			flags = XFS_ALLOC_FLAG_TRYLOCK;
2554		}
2555		/*
2556		 * Loop over allocation groups twice; first time with
2557		 * trylock set, second time without.
2558		 */
2559		for (;;) {
2560			args->pag = xfs_perag_get(mp, args->agno);
2561			if (no_min) args->minleft = 0;
2562			error = xfs_alloc_fix_freelist(args, flags);
2563			args->minleft = minleft;
2564			if (error) {
2565				trace_xfs_alloc_vextent_nofix(args);
2566				goto error0;
2567			}
2568			/*
2569			 * If we get a buffer back then the allocation will fly.
2570			 */
2571			if (args->agbp) {
2572				if ((error = xfs_alloc_ag_vextent(args)))
2573					goto error0;
2574				break;
2575			}
2576
2577			trace_xfs_alloc_vextent_loopfailed(args);
2578
2579			/*
2580			 * Didn't work, figure out the next iteration.
2581			 */
2582			if (args->agno == sagno &&
2583			    type == XFS_ALLOCTYPE_START_BNO)
2584				args->type = XFS_ALLOCTYPE_THIS_AG;
2585			/*
2586			* For the first allocation, we can try any AG to get
2587			* space.  However, if we already have allocated a
2588			* block, we don't want to try AGs whose number is below
2589			* sagno. Otherwise, we may end up with out-of-order
2590			* locking of AGF, which might cause deadlock.
2591			*/
2592			if (++(args->agno) == mp->m_sb.sb_agcount) {
2593				if (args->firstblock != NULLFSBLOCK)
2594					args->agno = sagno;
2595				else
2596					args->agno = 0;
2597			}
2598			/*
2599			 * Reached the starting a.g., must either be done
2600			 * or switch to non-trylock mode.
2601			 */
2602			if (args->agno == sagno) {
2603				if (no_min == 1) {
2604					args->agbno = NULLAGBLOCK;
2605					trace_xfs_alloc_vextent_allfailed(args);
2606					break;
2607				}
2608				if (flags == 0) {
2609					no_min = 1;
2610				} else {
2611					flags = 0;
2612					if (type == XFS_ALLOCTYPE_START_BNO) {
2613						args->agbno = XFS_FSB_TO_AGBNO(mp,
2614							args->fsbno);
2615						args->type = XFS_ALLOCTYPE_NEAR_BNO;
2616					}
2617				}
2618			}
2619			xfs_perag_put(args->pag);
2620		}
2621		if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2622			if (args->agno == sagno)
2623				mp->m_agfrotor = (mp->m_agfrotor + 1) %
2624					(mp->m_sb.sb_agcount * rotorstep);
2625			else
2626				mp->m_agfrotor = (args->agno * rotorstep + 1) %
2627					(mp->m_sb.sb_agcount * rotorstep);
2628		}
2629		break;
2630	default:
2631		ASSERT(0);
2632		/* NOTREACHED */
2633	}
2634	if (args->agbno == NULLAGBLOCK)
2635		args->fsbno = NULLFSBLOCK;
2636	else {
2637		args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2638#ifdef DEBUG
2639		ASSERT(args->len >= args->minlen);
2640		ASSERT(args->len <= args->maxlen);
2641		ASSERT(args->agbno % args->alignment == 0);
2642		XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2643			args->len);
2644#endif
2645
2646		/* Zero the extent if we were asked to do so */
2647		if (args->userdata & XFS_ALLOC_USERDATA_ZERO) {
2648			error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2649			if (error)
2650				goto error0;
2651		}
2652
2653	}
2654	xfs_perag_put(args->pag);
2655	return 0;
2656error0:
2657	xfs_perag_put(args->pag);
2658	return error;
2659}
2660
2661/*
2662 * Free an extent.
2663 * Just break up the extent address and hand off to xfs_free_ag_extent
2664 * after fixing up the freelist.
2665 */
2666int				/* error */
2667xfs_free_extent(
2668	xfs_trans_t	*tp,	/* transaction pointer */
2669	xfs_fsblock_t	bno,	/* starting block number of extent */
2670	xfs_extlen_t	len)	/* length of extent */
2671{
2672	xfs_alloc_arg_t	args;
2673	int		error;
2674
2675	ASSERT(len != 0);
2676	memset(&args, 0, sizeof(xfs_alloc_arg_t));
2677	args.tp = tp;
2678	args.mp = tp->t_mountp;
 
2679
2680	/*
2681	 * validate that the block number is legal - the enables us to detect
2682	 * and handle a silent filesystem corruption rather than crashing.
2683	 */
2684	args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2685	if (args.agno >= args.mp->m_sb.sb_agcount)
2686		return -EFSCORRUPTED;
2687
2688	args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2689	if (args.agbno >= args.mp->m_sb.sb_agblocks)
2690		return -EFSCORRUPTED;
2691
2692	args.pag = xfs_perag_get(args.mp, args.agno);
2693	ASSERT(args.pag);
2694
2695	error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2696	if (error)
2697		goto error0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2698
2699	/* validate the extent size is legal now we have the agf locked */
2700	if (args.agbno + len >
2701			be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2702		error = -EFSCORRUPTED;
2703		goto error0;
2704	}
2705
2706	error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2707	if (!error)
2708		xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
2709error0:
2710	xfs_perag_put(args.pag);
 
 
 
 
2711	return error;
2712}
v4.10.11
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_format.h"
  21#include "xfs_log_format.h"
  22#include "xfs_shared.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_bit.h"
  25#include "xfs_sb.h"
  26#include "xfs_mount.h"
  27#include "xfs_defer.h"
  28#include "xfs_inode.h"
  29#include "xfs_btree.h"
  30#include "xfs_rmap.h"
  31#include "xfs_alloc_btree.h"
  32#include "xfs_alloc.h"
  33#include "xfs_extent_busy.h"
  34#include "xfs_error.h"
  35#include "xfs_cksum.h"
  36#include "xfs_trace.h"
  37#include "xfs_trans.h"
  38#include "xfs_buf_item.h"
  39#include "xfs_log.h"
  40#include "xfs_ag_resv.h"
  41
  42struct workqueue_struct *xfs_alloc_wq;
  43
  44#define XFS_ABSDIFF(a,b)	(((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
  45
  46#define	XFSA_FIXUP_BNO_OK	1
  47#define	XFSA_FIXUP_CNT_OK	2
  48
  49STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  50STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  51STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  52STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
  53		xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  54
  55unsigned int
  56xfs_refc_block(
  57	struct xfs_mount	*mp)
  58{
  59	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
  60		return XFS_RMAP_BLOCK(mp) + 1;
  61	if (xfs_sb_version_hasfinobt(&mp->m_sb))
  62		return XFS_FIBT_BLOCK(mp) + 1;
  63	return XFS_IBT_BLOCK(mp) + 1;
  64}
  65
  66xfs_extlen_t
  67xfs_prealloc_blocks(
  68	struct xfs_mount	*mp)
  69{
  70	if (xfs_sb_version_hasreflink(&mp->m_sb))
  71		return xfs_refc_block(mp) + 1;
  72	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
  73		return XFS_RMAP_BLOCK(mp) + 1;
  74	if (xfs_sb_version_hasfinobt(&mp->m_sb))
  75		return XFS_FIBT_BLOCK(mp) + 1;
  76	return XFS_IBT_BLOCK(mp) + 1;
  77}
  78
  79/*
  80 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
  81 * AGF buffer (PV 947395), we place constraints on the relationship among
  82 * actual allocations for data blocks, freelist blocks, and potential file data
  83 * bmap btree blocks. However, these restrictions may result in no actual space
  84 * allocated for a delayed extent, for example, a data block in a certain AG is
  85 * allocated but there is no additional block for the additional bmap btree
  86 * block due to a split of the bmap btree of the file. The result of this may
  87 * lead to an infinite loop when the file gets flushed to disk and all delayed
  88 * extents need to be actually allocated. To get around this, we explicitly set
  89 * aside a few blocks which will not be reserved in delayed allocation.
  90 *
  91 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
  92 * potential split of the file's bmap btree.
  93 */
  94unsigned int
  95xfs_alloc_set_aside(
  96	struct xfs_mount	*mp)
  97{
  98	return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
  99}
 100
 101/*
 102 * When deciding how much space to allocate out of an AG, we limit the
 103 * allocation maximum size to the size the AG. However, we cannot use all the
 104 * blocks in the AG - some are permanently used by metadata. These
 105 * blocks are generally:
 106 *	- the AG superblock, AGF, AGI and AGFL
 107 *	- the AGF (bno and cnt) and AGI btree root blocks, and optionally
 108 *	  the AGI free inode and rmap btree root blocks.
 109 *	- blocks on the AGFL according to xfs_alloc_set_aside() limits
 110 *	- the rmapbt root block
 111 *
 112 * The AG headers are sector sized, so the amount of space they take up is
 113 * dependent on filesystem geometry. The others are all single blocks.
 114 */
 115unsigned int
 116xfs_alloc_ag_max_usable(
 117	struct xfs_mount	*mp)
 118{
 119	unsigned int		blocks;
 120
 121	blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
 122	blocks += XFS_ALLOC_AGFL_RESERVE;
 123	blocks += 3;			/* AGF, AGI btree root blocks */
 124	if (xfs_sb_version_hasfinobt(&mp->m_sb))
 125		blocks++;		/* finobt root block */
 126	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 127		blocks++; 		/* rmap root block */
 128	if (xfs_sb_version_hasreflink(&mp->m_sb))
 129		blocks++;		/* refcount root block */
 130
 131	return mp->m_sb.sb_agblocks - blocks;
 132}
 133
 134/*
 135 * Lookup the record equal to [bno, len] in the btree given by cur.
 136 */
 137STATIC int				/* error */
 138xfs_alloc_lookup_eq(
 139	struct xfs_btree_cur	*cur,	/* btree cursor */
 140	xfs_agblock_t		bno,	/* starting block of extent */
 141	xfs_extlen_t		len,	/* length of extent */
 142	int			*stat)	/* success/failure */
 143{
 144	cur->bc_rec.a.ar_startblock = bno;
 145	cur->bc_rec.a.ar_blockcount = len;
 146	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 147}
 148
 149/*
 150 * Lookup the first record greater than or equal to [bno, len]
 151 * in the btree given by cur.
 152 */
 153int				/* error */
 154xfs_alloc_lookup_ge(
 155	struct xfs_btree_cur	*cur,	/* btree cursor */
 156	xfs_agblock_t		bno,	/* starting block of extent */
 157	xfs_extlen_t		len,	/* length of extent */
 158	int			*stat)	/* success/failure */
 159{
 160	cur->bc_rec.a.ar_startblock = bno;
 161	cur->bc_rec.a.ar_blockcount = len;
 162	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 163}
 164
 165/*
 166 * Lookup the first record less than or equal to [bno, len]
 167 * in the btree given by cur.
 168 */
 169static int				/* error */
 170xfs_alloc_lookup_le(
 171	struct xfs_btree_cur	*cur,	/* btree cursor */
 172	xfs_agblock_t		bno,	/* starting block of extent */
 173	xfs_extlen_t		len,	/* length of extent */
 174	int			*stat)	/* success/failure */
 175{
 176	cur->bc_rec.a.ar_startblock = bno;
 177	cur->bc_rec.a.ar_blockcount = len;
 178	return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
 179}
 180
 181/*
 182 * Update the record referred to by cur to the value given
 183 * by [bno, len].
 184 * This either works (return 0) or gets an EFSCORRUPTED error.
 185 */
 186STATIC int				/* error */
 187xfs_alloc_update(
 188	struct xfs_btree_cur	*cur,	/* btree cursor */
 189	xfs_agblock_t		bno,	/* starting block of extent */
 190	xfs_extlen_t		len)	/* length of extent */
 191{
 192	union xfs_btree_rec	rec;
 193
 194	rec.alloc.ar_startblock = cpu_to_be32(bno);
 195	rec.alloc.ar_blockcount = cpu_to_be32(len);
 196	return xfs_btree_update(cur, &rec);
 197}
 198
 199/*
 200 * Get the data from the pointed-to record.
 201 */
 202int					/* error */
 203xfs_alloc_get_rec(
 204	struct xfs_btree_cur	*cur,	/* btree cursor */
 205	xfs_agblock_t		*bno,	/* output: starting block of extent */
 206	xfs_extlen_t		*len,	/* output: length of extent */
 207	int			*stat)	/* output: success/failure */
 208{
 209	union xfs_btree_rec	*rec;
 210	int			error;
 211
 212	error = xfs_btree_get_rec(cur, &rec, stat);
 213	if (!error && *stat == 1) {
 214		*bno = be32_to_cpu(rec->alloc.ar_startblock);
 215		*len = be32_to_cpu(rec->alloc.ar_blockcount);
 216	}
 217	return error;
 218}
 219
 220/*
 221 * Compute aligned version of the found extent.
 222 * Takes alignment and min length into account.
 223 */
 224STATIC void
 225xfs_alloc_compute_aligned(
 226	xfs_alloc_arg_t	*args,		/* allocation argument structure */
 227	xfs_agblock_t	foundbno,	/* starting block in found extent */
 228	xfs_extlen_t	foundlen,	/* length in found extent */
 229	xfs_agblock_t	*resbno,	/* result block number */
 230	xfs_extlen_t	*reslen)	/* result length */
 231{
 232	xfs_agblock_t	bno;
 233	xfs_extlen_t	len;
 234	xfs_extlen_t	diff;
 235
 236	/* Trim busy sections out of found extent */
 237	xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
 238
 239	/*
 240	 * If we have a largish extent that happens to start before min_agbno,
 241	 * see if we can shift it into range...
 242	 */
 243	if (bno < args->min_agbno && bno + len > args->min_agbno) {
 244		diff = args->min_agbno - bno;
 245		if (len > diff) {
 246			bno += diff;
 247			len -= diff;
 248		}
 249	}
 250
 251	if (args->alignment > 1 && len >= args->minlen) {
 252		xfs_agblock_t	aligned_bno = roundup(bno, args->alignment);
 253
 254		diff = aligned_bno - bno;
 255
 256		*resbno = aligned_bno;
 257		*reslen = diff >= len ? 0 : len - diff;
 258	} else {
 259		*resbno = bno;
 260		*reslen = len;
 261	}
 262}
 263
 264/*
 265 * Compute best start block and diff for "near" allocations.
 266 * freelen >= wantlen already checked by caller.
 267 */
 268STATIC xfs_extlen_t			/* difference value (absolute) */
 269xfs_alloc_compute_diff(
 270	xfs_agblock_t	wantbno,	/* target starting block */
 271	xfs_extlen_t	wantlen,	/* target length */
 272	xfs_extlen_t	alignment,	/* target alignment */
 273	int		datatype,	/* are we allocating data? */
 274	xfs_agblock_t	freebno,	/* freespace's starting block */
 275	xfs_extlen_t	freelen,	/* freespace's length */
 276	xfs_agblock_t	*newbnop)	/* result: best start block from free */
 277{
 278	xfs_agblock_t	freeend;	/* end of freespace extent */
 279	xfs_agblock_t	newbno1;	/* return block number */
 280	xfs_agblock_t	newbno2;	/* other new block number */
 281	xfs_extlen_t	newlen1=0;	/* length with newbno1 */
 282	xfs_extlen_t	newlen2=0;	/* length with newbno2 */
 283	xfs_agblock_t	wantend;	/* end of target extent */
 284	bool		userdata = xfs_alloc_is_userdata(datatype);
 285
 286	ASSERT(freelen >= wantlen);
 287	freeend = freebno + freelen;
 288	wantend = wantbno + wantlen;
 289	/*
 290	 * We want to allocate from the start of a free extent if it is past
 291	 * the desired block or if we are allocating user data and the free
 292	 * extent is before desired block. The second case is there to allow
 293	 * for contiguous allocation from the remaining free space if the file
 294	 * grows in the short term.
 295	 */
 296	if (freebno >= wantbno || (userdata && freeend < wantend)) {
 297		if ((newbno1 = roundup(freebno, alignment)) >= freeend)
 298			newbno1 = NULLAGBLOCK;
 299	} else if (freeend >= wantend && alignment > 1) {
 300		newbno1 = roundup(wantbno, alignment);
 301		newbno2 = newbno1 - alignment;
 302		if (newbno1 >= freeend)
 303			newbno1 = NULLAGBLOCK;
 304		else
 305			newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
 306		if (newbno2 < freebno)
 307			newbno2 = NULLAGBLOCK;
 308		else
 309			newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
 310		if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 311			if (newlen1 < newlen2 ||
 312			    (newlen1 == newlen2 &&
 313			     XFS_ABSDIFF(newbno1, wantbno) >
 314			     XFS_ABSDIFF(newbno2, wantbno)))
 315				newbno1 = newbno2;
 316		} else if (newbno2 != NULLAGBLOCK)
 317			newbno1 = newbno2;
 318	} else if (freeend >= wantend) {
 319		newbno1 = wantbno;
 320	} else if (alignment > 1) {
 321		newbno1 = roundup(freeend - wantlen, alignment);
 322		if (newbno1 > freeend - wantlen &&
 323		    newbno1 - alignment >= freebno)
 324			newbno1 -= alignment;
 325		else if (newbno1 >= freeend)
 326			newbno1 = NULLAGBLOCK;
 327	} else
 328		newbno1 = freeend - wantlen;
 329	*newbnop = newbno1;
 330	return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
 331}
 332
 333/*
 334 * Fix up the length, based on mod and prod.
 335 * len should be k * prod + mod for some k.
 336 * If len is too small it is returned unchanged.
 337 * If len hits maxlen it is left alone.
 338 */
 339STATIC void
 340xfs_alloc_fix_len(
 341	xfs_alloc_arg_t	*args)		/* allocation argument structure */
 342{
 343	xfs_extlen_t	k;
 344	xfs_extlen_t	rlen;
 345
 346	ASSERT(args->mod < args->prod);
 347	rlen = args->len;
 348	ASSERT(rlen >= args->minlen);
 349	ASSERT(rlen <= args->maxlen);
 350	if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
 351	    (args->mod == 0 && rlen < args->prod))
 352		return;
 353	k = rlen % args->prod;
 354	if (k == args->mod)
 355		return;
 356	if (k > args->mod)
 357		rlen = rlen - (k - args->mod);
 358	else
 359		rlen = rlen - args->prod + (args->mod - k);
 360	/* casts to (int) catch length underflows */
 361	if ((int)rlen < (int)args->minlen)
 362		return;
 363	ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
 364	ASSERT(rlen % args->prod == args->mod);
 365	ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
 366		rlen + args->minleft);
 367	args->len = rlen;
 368}
 369
 370/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 371 * Update the two btrees, logically removing from freespace the extent
 372 * starting at rbno, rlen blocks.  The extent is contained within the
 373 * actual (current) free extent fbno for flen blocks.
 374 * Flags are passed in indicating whether the cursors are set to the
 375 * relevant records.
 376 */
 377STATIC int				/* error code */
 378xfs_alloc_fixup_trees(
 379	xfs_btree_cur_t	*cnt_cur,	/* cursor for by-size btree */
 380	xfs_btree_cur_t	*bno_cur,	/* cursor for by-block btree */
 381	xfs_agblock_t	fbno,		/* starting block of free extent */
 382	xfs_extlen_t	flen,		/* length of free extent */
 383	xfs_agblock_t	rbno,		/* starting block of returned extent */
 384	xfs_extlen_t	rlen,		/* length of returned extent */
 385	int		flags)		/* flags, XFSA_FIXUP_... */
 386{
 387	int		error;		/* error code */
 388	int		i;		/* operation results */
 389	xfs_agblock_t	nfbno1;		/* first new free startblock */
 390	xfs_agblock_t	nfbno2;		/* second new free startblock */
 391	xfs_extlen_t	nflen1=0;	/* first new free length */
 392	xfs_extlen_t	nflen2=0;	/* second new free length */
 393	struct xfs_mount *mp;
 394
 395	mp = cnt_cur->bc_mp;
 396
 397	/*
 398	 * Look up the record in the by-size tree if necessary.
 399	 */
 400	if (flags & XFSA_FIXUP_CNT_OK) {
 401#ifdef DEBUG
 402		if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
 403			return error;
 404		XFS_WANT_CORRUPTED_RETURN(mp,
 405			i == 1 && nfbno1 == fbno && nflen1 == flen);
 406#endif
 407	} else {
 408		if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
 409			return error;
 410		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 411	}
 412	/*
 413	 * Look up the record in the by-block tree if necessary.
 414	 */
 415	if (flags & XFSA_FIXUP_BNO_OK) {
 416#ifdef DEBUG
 417		if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
 418			return error;
 419		XFS_WANT_CORRUPTED_RETURN(mp,
 420			i == 1 && nfbno1 == fbno && nflen1 == flen);
 421#endif
 422	} else {
 423		if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
 424			return error;
 425		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 426	}
 427
 428#ifdef DEBUG
 429	if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
 430		struct xfs_btree_block	*bnoblock;
 431		struct xfs_btree_block	*cntblock;
 432
 433		bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
 434		cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 435
 436		XFS_WANT_CORRUPTED_RETURN(mp,
 437			bnoblock->bb_numrecs == cntblock->bb_numrecs);
 438	}
 439#endif
 440
 441	/*
 442	 * Deal with all four cases: the allocated record is contained
 443	 * within the freespace record, so we can have new freespace
 444	 * at either (or both) end, or no freespace remaining.
 445	 */
 446	if (rbno == fbno && rlen == flen)
 447		nfbno1 = nfbno2 = NULLAGBLOCK;
 448	else if (rbno == fbno) {
 449		nfbno1 = rbno + rlen;
 450		nflen1 = flen - rlen;
 451		nfbno2 = NULLAGBLOCK;
 452	} else if (rbno + rlen == fbno + flen) {
 453		nfbno1 = fbno;
 454		nflen1 = flen - rlen;
 455		nfbno2 = NULLAGBLOCK;
 456	} else {
 457		nfbno1 = fbno;
 458		nflen1 = rbno - fbno;
 459		nfbno2 = rbno + rlen;
 460		nflen2 = (fbno + flen) - nfbno2;
 461	}
 462	/*
 463	 * Delete the entry from the by-size btree.
 464	 */
 465	if ((error = xfs_btree_delete(cnt_cur, &i)))
 466		return error;
 467	XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 468	/*
 469	 * Add new by-size btree entry(s).
 470	 */
 471	if (nfbno1 != NULLAGBLOCK) {
 472		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 473			return error;
 474		XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 475		if ((error = xfs_btree_insert(cnt_cur, &i)))
 476			return error;
 477		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 478	}
 479	if (nfbno2 != NULLAGBLOCK) {
 480		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 481			return error;
 482		XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 483		if ((error = xfs_btree_insert(cnt_cur, &i)))
 484			return error;
 485		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 486	}
 487	/*
 488	 * Fix up the by-block btree entry(s).
 489	 */
 490	if (nfbno1 == NULLAGBLOCK) {
 491		/*
 492		 * No remaining freespace, just delete the by-block tree entry.
 493		 */
 494		if ((error = xfs_btree_delete(bno_cur, &i)))
 495			return error;
 496		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 497	} else {
 498		/*
 499		 * Update the by-block entry to start later|be shorter.
 500		 */
 501		if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
 502			return error;
 503	}
 504	if (nfbno2 != NULLAGBLOCK) {
 505		/*
 506		 * 2 resulting free entries, need to add one.
 507		 */
 508		if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
 509			return error;
 510		XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 511		if ((error = xfs_btree_insert(bno_cur, &i)))
 512			return error;
 513		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 514	}
 515	return 0;
 516}
 517
 518static bool
 519xfs_agfl_verify(
 520	struct xfs_buf	*bp)
 521{
 522	struct xfs_mount *mp = bp->b_target->bt_mount;
 523	struct xfs_agfl	*agfl = XFS_BUF_TO_AGFL(bp);
 524	int		i;
 525
 526	if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
 527		return false;
 528	if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
 529		return false;
 530	/*
 531	 * during growfs operations, the perag is not fully initialised,
 532	 * so we can't use it for any useful checking. growfs ensures we can't
 533	 * use it by using uncached buffers that don't have the perag attached
 534	 * so we can detect and avoid this problem.
 535	 */
 536	if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 537		return false;
 538
 539	for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
 540		if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 541		    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 542			return false;
 543	}
 544
 545	return xfs_log_check_lsn(mp,
 546				 be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
 547}
 548
 549static void
 550xfs_agfl_read_verify(
 551	struct xfs_buf	*bp)
 552{
 553	struct xfs_mount *mp = bp->b_target->bt_mount;
 554
 555	/*
 556	 * There is no verification of non-crc AGFLs because mkfs does not
 557	 * initialise the AGFL to zero or NULL. Hence the only valid part of the
 558	 * AGFL is what the AGF says is active. We can't get to the AGF, so we
 559	 * can't verify just those entries are valid.
 560	 */
 561	if (!xfs_sb_version_hascrc(&mp->m_sb))
 562		return;
 563
 564	if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
 565		xfs_buf_ioerror(bp, -EFSBADCRC);
 566	else if (!xfs_agfl_verify(bp))
 567		xfs_buf_ioerror(bp, -EFSCORRUPTED);
 568
 569	if (bp->b_error)
 570		xfs_verifier_error(bp);
 571}
 572
 573static void
 574xfs_agfl_write_verify(
 575	struct xfs_buf	*bp)
 576{
 577	struct xfs_mount *mp = bp->b_target->bt_mount;
 578	struct xfs_buf_log_item	*bip = bp->b_fspriv;
 579
 580	/* no verification of non-crc AGFLs */
 581	if (!xfs_sb_version_hascrc(&mp->m_sb))
 582		return;
 583
 584	if (!xfs_agfl_verify(bp)) {
 585		xfs_buf_ioerror(bp, -EFSCORRUPTED);
 586		xfs_verifier_error(bp);
 587		return;
 588	}
 589
 590	if (bip)
 591		XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 592
 593	xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
 594}
 595
 596const struct xfs_buf_ops xfs_agfl_buf_ops = {
 597	.name = "xfs_agfl",
 598	.verify_read = xfs_agfl_read_verify,
 599	.verify_write = xfs_agfl_write_verify,
 600};
 601
 602/*
 603 * Read in the allocation group free block array.
 604 */
 605STATIC int				/* error */
 606xfs_alloc_read_agfl(
 607	xfs_mount_t	*mp,		/* mount point structure */
 608	xfs_trans_t	*tp,		/* transaction pointer */
 609	xfs_agnumber_t	agno,		/* allocation group number */
 610	xfs_buf_t	**bpp)		/* buffer for the ag free block array */
 611{
 612	xfs_buf_t	*bp;		/* return value */
 613	int		error;
 614
 615	ASSERT(agno != NULLAGNUMBER);
 616	error = xfs_trans_read_buf(
 617			mp, tp, mp->m_ddev_targp,
 618			XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 619			XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 620	if (error)
 621		return error;
 622	xfs_buf_set_ref(bp, XFS_AGFL_REF);
 623	*bpp = bp;
 624	return 0;
 625}
 626
 627STATIC int
 628xfs_alloc_update_counters(
 629	struct xfs_trans	*tp,
 630	struct xfs_perag	*pag,
 631	struct xfs_buf		*agbp,
 632	long			len)
 633{
 634	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
 635
 636	pag->pagf_freeblks += len;
 637	be32_add_cpu(&agf->agf_freeblks, len);
 638
 639	xfs_trans_agblocks_delta(tp, len);
 640	if (unlikely(be32_to_cpu(agf->agf_freeblks) >
 641		     be32_to_cpu(agf->agf_length)))
 642		return -EFSCORRUPTED;
 643
 644	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
 645	return 0;
 646}
 647
 648/*
 649 * Allocation group level functions.
 650 */
 651
 652/*
 653 * Allocate a variable extent in the allocation group agno.
 654 * Type and bno are used to determine where in the allocation group the
 655 * extent will start.
 656 * Extent's length (returned in *len) will be between minlen and maxlen,
 657 * and of the form k * prod + mod unless there's nothing that large.
 658 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 659 */
 660STATIC int			/* error */
 661xfs_alloc_ag_vextent(
 662	xfs_alloc_arg_t	*args)	/* argument structure for allocation */
 663{
 664	int		error=0;
 665
 666	ASSERT(args->minlen > 0);
 667	ASSERT(args->maxlen > 0);
 668	ASSERT(args->minlen <= args->maxlen);
 669	ASSERT(args->mod < args->prod);
 670	ASSERT(args->alignment > 0);
 671
 672	/*
 673	 * Branch to correct routine based on the type.
 674	 */
 675	args->wasfromfl = 0;
 676	switch (args->type) {
 677	case XFS_ALLOCTYPE_THIS_AG:
 678		error = xfs_alloc_ag_vextent_size(args);
 679		break;
 680	case XFS_ALLOCTYPE_NEAR_BNO:
 681		error = xfs_alloc_ag_vextent_near(args);
 682		break;
 683	case XFS_ALLOCTYPE_THIS_BNO:
 684		error = xfs_alloc_ag_vextent_exact(args);
 685		break;
 686	default:
 687		ASSERT(0);
 688		/* NOTREACHED */
 689	}
 690
 691	if (error || args->agbno == NULLAGBLOCK)
 692		return error;
 693
 694	ASSERT(args->len >= args->minlen);
 695	ASSERT(args->len <= args->maxlen);
 696	ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
 697	ASSERT(args->agbno % args->alignment == 0);
 698
 699	/* if not file data, insert new block into the reverse map btree */
 700	if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
 701		error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
 702				       args->agbno, args->len, &args->oinfo);
 703		if (error)
 704			return error;
 705	}
 706
 707	if (!args->wasfromfl) {
 708		error = xfs_alloc_update_counters(args->tp, args->pag,
 709						  args->agbp,
 710						  -((long)(args->len)));
 711		if (error)
 712			return error;
 713
 714		ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
 715					      args->agbno, args->len));
 716	}
 717
 718	xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
 
 
 
 
 
 719
 720	XFS_STATS_INC(args->mp, xs_allocx);
 721	XFS_STATS_ADD(args->mp, xs_allocb, args->len);
 722	return error;
 723}
 724
 725/*
 726 * Allocate a variable extent at exactly agno/bno.
 727 * Extent's length (returned in *len) will be between minlen and maxlen,
 728 * and of the form k * prod + mod unless there's nothing that large.
 729 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
 730 */
 731STATIC int			/* error */
 732xfs_alloc_ag_vextent_exact(
 733	xfs_alloc_arg_t	*args)	/* allocation argument structure */
 734{
 735	xfs_btree_cur_t	*bno_cur;/* by block-number btree cursor */
 736	xfs_btree_cur_t	*cnt_cur;/* by count btree cursor */
 737	int		error;
 738	xfs_agblock_t	fbno;	/* start block of found extent */
 739	xfs_extlen_t	flen;	/* length of found extent */
 740	xfs_agblock_t	tbno;	/* start block of trimmed extent */
 741	xfs_extlen_t	tlen;	/* length of trimmed extent */
 742	xfs_agblock_t	tend;	/* end block of trimmed extent */
 743	int		i;	/* success/failure of operation */
 744
 745	ASSERT(args->alignment == 1);
 746
 747	/*
 748	 * Allocate/initialize a cursor for the by-number freespace btree.
 749	 */
 750	bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 751					  args->agno, XFS_BTNUM_BNO);
 752
 753	/*
 754	 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
 755	 * Look for the closest free block <= bno, it must contain bno
 756	 * if any free block does.
 757	 */
 758	error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
 759	if (error)
 760		goto error0;
 761	if (!i)
 762		goto not_found;
 763
 764	/*
 765	 * Grab the freespace record.
 766	 */
 767	error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
 768	if (error)
 769		goto error0;
 770	XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 771	ASSERT(fbno <= args->agbno);
 772
 773	/*
 774	 * Check for overlapping busy extents.
 775	 */
 776	xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
 777
 778	/*
 779	 * Give up if the start of the extent is busy, or the freespace isn't
 780	 * long enough for the minimum request.
 781	 */
 782	if (tbno > args->agbno)
 783		goto not_found;
 784	if (tlen < args->minlen)
 785		goto not_found;
 786	tend = tbno + tlen;
 787	if (tend < args->agbno + args->minlen)
 788		goto not_found;
 789
 790	/*
 791	 * End of extent will be smaller of the freespace end and the
 792	 * maximal requested end.
 793	 *
 794	 * Fix the length according to mod and prod if given.
 795	 */
 796	args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 797						- args->agbno;
 798	xfs_alloc_fix_len(args);
 
 
 
 799	ASSERT(args->agbno + args->len <= tend);
 800
 801	/*
 802	 * We are allocating agbno for args->len
 803	 * Allocate/initialize a cursor for the by-size btree.
 804	 */
 805	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 806		args->agno, XFS_BTNUM_CNT);
 807	ASSERT(args->agbno + args->len <=
 808		be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 809	error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 810				      args->len, XFSA_FIXUP_BNO_OK);
 811	if (error) {
 812		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 813		goto error0;
 814	}
 815
 816	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 817	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 818
 819	args->wasfromfl = 0;
 820	trace_xfs_alloc_exact_done(args);
 821	return 0;
 822
 823not_found:
 824	/* Didn't find it, return null. */
 825	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 826	args->agbno = NULLAGBLOCK;
 827	trace_xfs_alloc_exact_notfound(args);
 828	return 0;
 829
 830error0:
 831	xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 832	trace_xfs_alloc_exact_error(args);
 833	return error;
 834}
 835
 836/*
 837 * Search the btree in a given direction via the search cursor and compare
 838 * the records found against the good extent we've already found.
 839 */
 840STATIC int
 841xfs_alloc_find_best_extent(
 842	struct xfs_alloc_arg	*args,	/* allocation argument structure */
 843	struct xfs_btree_cur	**gcur,	/* good cursor */
 844	struct xfs_btree_cur	**scur,	/* searching cursor */
 845	xfs_agblock_t		gdiff,	/* difference for search comparison */
 846	xfs_agblock_t		*sbno,	/* extent found by search */
 847	xfs_extlen_t		*slen,	/* extent length */
 848	xfs_agblock_t		*sbnoa,	/* aligned extent found by search */
 849	xfs_extlen_t		*slena,	/* aligned extent length */
 850	int			dir)	/* 0 = search right, 1 = search left */
 851{
 852	xfs_agblock_t		new;
 853	xfs_agblock_t		sdiff;
 854	int			error;
 855	int			i;
 856
 857	/* The good extent is perfect, no need to  search. */
 858	if (!gdiff)
 859		goto out_use_good;
 860
 861	/*
 862	 * Look until we find a better one, run out of space or run off the end.
 863	 */
 864	do {
 865		error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
 866		if (error)
 867			goto error0;
 868		XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 869		xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
 870
 871		/*
 872		 * The good extent is closer than this one.
 873		 */
 874		if (!dir) {
 875			if (*sbnoa > args->max_agbno)
 876				goto out_use_good;
 877			if (*sbnoa >= args->agbno + gdiff)
 878				goto out_use_good;
 879		} else {
 880			if (*sbnoa < args->min_agbno)
 881				goto out_use_good;
 882			if (*sbnoa <= args->agbno - gdiff)
 883				goto out_use_good;
 884		}
 885
 886		/*
 887		 * Same distance, compare length and pick the best.
 888		 */
 889		if (*slena >= args->minlen) {
 890			args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
 891			xfs_alloc_fix_len(args);
 892
 893			sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 894						       args->alignment,
 895						       args->datatype, *sbnoa,
 896						       *slena, &new);
 897
 898			/*
 899			 * Choose closer size and invalidate other cursor.
 900			 */
 901			if (sdiff < gdiff)
 902				goto out_use_search;
 903			goto out_use_good;
 904		}
 905
 906		if (!dir)
 907			error = xfs_btree_increment(*scur, 0, &i);
 908		else
 909			error = xfs_btree_decrement(*scur, 0, &i);
 910		if (error)
 911			goto error0;
 912	} while (i);
 913
 914out_use_good:
 915	xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
 916	*scur = NULL;
 917	return 0;
 918
 919out_use_search:
 920	xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
 921	*gcur = NULL;
 922	return 0;
 923
 924error0:
 925	/* caller invalidates cursors */
 926	return error;
 927}
 928
 929/*
 930 * Allocate a variable extent near bno in the allocation group agno.
 931 * Extent's length (returned in len) will be between minlen and maxlen,
 932 * and of the form k * prod + mod unless there's nothing that large.
 933 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 934 */
 935STATIC int				/* error */
 936xfs_alloc_ag_vextent_near(
 937	xfs_alloc_arg_t	*args)		/* allocation argument structure */
 938{
 939	xfs_btree_cur_t	*bno_cur_gt;	/* cursor for bno btree, right side */
 940	xfs_btree_cur_t	*bno_cur_lt;	/* cursor for bno btree, left side */
 941	xfs_btree_cur_t	*cnt_cur;	/* cursor for count btree */
 942	xfs_agblock_t	gtbno;		/* start bno of right side entry */
 943	xfs_agblock_t	gtbnoa;		/* aligned ... */
 944	xfs_extlen_t	gtdiff;		/* difference to right side entry */
 945	xfs_extlen_t	gtlen;		/* length of right side entry */
 946	xfs_extlen_t	gtlena;		/* aligned ... */
 947	xfs_agblock_t	gtnew;		/* useful start bno of right side */
 948	int		error;		/* error code */
 949	int		i;		/* result code, temporary */
 950	int		j;		/* result code, temporary */
 951	xfs_agblock_t	ltbno;		/* start bno of left side entry */
 952	xfs_agblock_t	ltbnoa;		/* aligned ... */
 953	xfs_extlen_t	ltdiff;		/* difference to left side entry */
 954	xfs_extlen_t	ltlen;		/* length of left side entry */
 955	xfs_extlen_t	ltlena;		/* aligned ... */
 956	xfs_agblock_t	ltnew;		/* useful start bno of left side */
 957	xfs_extlen_t	rlen;		/* length of returned extent */
 958	int		forced = 0;
 959#ifdef DEBUG
 960	/*
 961	 * Randomly don't execute the first algorithm.
 962	 */
 963	int		dofirst;	/* set to do first algorithm */
 964
 965	dofirst = prandom_u32() & 1;
 966#endif
 967
 968	/* handle unitialized agbno range so caller doesn't have to */
 969	if (!args->min_agbno && !args->max_agbno)
 970		args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
 971	ASSERT(args->min_agbno <= args->max_agbno);
 972
 973	/* clamp agbno to the range if it's outside */
 974	if (args->agbno < args->min_agbno)
 975		args->agbno = args->min_agbno;
 976	if (args->agbno > args->max_agbno)
 977		args->agbno = args->max_agbno;
 978
 979restart:
 980	bno_cur_lt = NULL;
 981	bno_cur_gt = NULL;
 982	ltlen = 0;
 983	gtlena = 0;
 984	ltlena = 0;
 985
 986	/*
 987	 * Get a cursor for the by-size btree.
 988	 */
 989	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 990		args->agno, XFS_BTNUM_CNT);
 991
 992	/*
 993	 * See if there are any free extents as big as maxlen.
 994	 */
 995	if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
 996		goto error0;
 997	/*
 998	 * If none, then pick up the last entry in the tree unless the
 999	 * tree is empty.
1000	 */
1001	if (!i) {
1002		if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
1003				&ltlen, &i)))
1004			goto error0;
1005		if (i == 0 || ltlen == 0) {
1006			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1007			trace_xfs_alloc_near_noentry(args);
1008			return 0;
1009		}
1010		ASSERT(i == 1);
1011	}
1012	args->wasfromfl = 0;
1013
1014	/*
1015	 * First algorithm.
1016	 * If the requested extent is large wrt the freespaces available
1017	 * in this a.g., then the cursor will be pointing to a btree entry
1018	 * near the right edge of the tree.  If it's in the last btree leaf
1019	 * block, then we just examine all the entries in that block
1020	 * that are big enough, and pick the best one.
1021	 * This is written as a while loop so we can break out of it,
1022	 * but we never loop back to the top.
1023	 */
1024	while (xfs_btree_islastblock(cnt_cur, 0)) {
1025		xfs_extlen_t	bdiff;
1026		int		besti=0;
1027		xfs_extlen_t	blen=0;
1028		xfs_agblock_t	bnew=0;
1029
1030#ifdef DEBUG
1031		if (dofirst)
1032			break;
1033#endif
1034		/*
1035		 * Start from the entry that lookup found, sequence through
1036		 * all larger free blocks.  If we're actually pointing at a
1037		 * record smaller than maxlen, go to the start of this block,
1038		 * and skip all those smaller than minlen.
1039		 */
1040		if (ltlen || args->alignment > 1) {
1041			cnt_cur->bc_ptrs[0] = 1;
1042			do {
1043				if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
1044						&ltlen, &i)))
1045					goto error0;
1046				XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1047				if (ltlen >= args->minlen)
1048					break;
1049				if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
1050					goto error0;
1051			} while (i);
1052			ASSERT(ltlen >= args->minlen);
1053			if (!i)
1054				break;
1055		}
1056		i = cnt_cur->bc_ptrs[0];
1057		for (j = 1, blen = 0, bdiff = 0;
1058		     !error && j && (blen < args->maxlen || bdiff > 0);
1059		     error = xfs_btree_increment(cnt_cur, 0, &j)) {
1060			/*
1061			 * For each entry, decide if it's better than
1062			 * the previous best entry.
1063			 */
1064			if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1065				goto error0;
1066			XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1067			xfs_alloc_compute_aligned(args, ltbno, ltlen,
1068						  &ltbnoa, &ltlena);
1069			if (ltlena < args->minlen)
1070				continue;
1071			if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1072				continue;
1073			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1074			xfs_alloc_fix_len(args);
1075			ASSERT(args->len >= args->minlen);
1076			if (args->len < blen)
1077				continue;
1078			ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1079				args->alignment, args->datatype, ltbnoa,
1080				ltlena, &ltnew);
1081			if (ltnew != NULLAGBLOCK &&
1082			    (args->len > blen || ltdiff < bdiff)) {
1083				bdiff = ltdiff;
1084				bnew = ltnew;
1085				blen = args->len;
1086				besti = cnt_cur->bc_ptrs[0];
1087			}
1088		}
1089		/*
1090		 * It didn't work.  We COULD be in a case where
1091		 * there's a good record somewhere, so try again.
1092		 */
1093		if (blen == 0)
1094			break;
1095		/*
1096		 * Point at the best entry, and retrieve it again.
1097		 */
1098		cnt_cur->bc_ptrs[0] = besti;
1099		if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1100			goto error0;
1101		XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1102		ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1103		args->len = blen;
1104
 
 
 
 
 
1105		/*
1106		 * We are allocating starting at bnew for blen blocks.
1107		 */
1108		args->agbno = bnew;
1109		ASSERT(bnew >= ltbno);
1110		ASSERT(bnew + blen <= ltbno + ltlen);
1111		/*
1112		 * Set up a cursor for the by-bno tree.
1113		 */
1114		bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1115			args->agbp, args->agno, XFS_BTNUM_BNO);
1116		/*
1117		 * Fix up the btree entries.
1118		 */
1119		if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1120				ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1121			goto error0;
1122		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1123		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1124
1125		trace_xfs_alloc_near_first(args);
1126		return 0;
1127	}
1128	/*
1129	 * Second algorithm.
1130	 * Search in the by-bno tree to the left and to the right
1131	 * simultaneously, until in each case we find a space big enough,
1132	 * or run into the edge of the tree.  When we run into the edge,
1133	 * we deallocate that cursor.
1134	 * If both searches succeed, we compare the two spaces and pick
1135	 * the better one.
1136	 * With alignment, it's possible for both to fail; the upper
1137	 * level algorithm that picks allocation groups for allocations
1138	 * is not supposed to do this.
1139	 */
1140	/*
1141	 * Allocate and initialize the cursor for the leftward search.
1142	 */
1143	bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1144		args->agno, XFS_BTNUM_BNO);
1145	/*
1146	 * Lookup <= bno to find the leftward search's starting point.
1147	 */
1148	if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1149		goto error0;
1150	if (!i) {
1151		/*
1152		 * Didn't find anything; use this cursor for the rightward
1153		 * search.
1154		 */
1155		bno_cur_gt = bno_cur_lt;
1156		bno_cur_lt = NULL;
1157	}
1158	/*
1159	 * Found something.  Duplicate the cursor for the rightward search.
1160	 */
1161	else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1162		goto error0;
1163	/*
1164	 * Increment the cursor, so we will point at the entry just right
1165	 * of the leftward entry if any, or to the leftmost entry.
1166	 */
1167	if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1168		goto error0;
1169	if (!i) {
1170		/*
1171		 * It failed, there are no rightward entries.
1172		 */
1173		xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1174		bno_cur_gt = NULL;
1175	}
1176	/*
1177	 * Loop going left with the leftward cursor, right with the
1178	 * rightward cursor, until either both directions give up or
1179	 * we find an entry at least as big as minlen.
1180	 */
1181	do {
1182		if (bno_cur_lt) {
1183			if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1184				goto error0;
1185			XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1186			xfs_alloc_compute_aligned(args, ltbno, ltlen,
1187						  &ltbnoa, &ltlena);
1188			if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1189				break;
1190			if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1191				goto error0;
1192			if (!i || ltbnoa < args->min_agbno) {
1193				xfs_btree_del_cursor(bno_cur_lt,
1194						     XFS_BTREE_NOERROR);
1195				bno_cur_lt = NULL;
1196			}
1197		}
1198		if (bno_cur_gt) {
1199			if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1200				goto error0;
1201			XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1202			xfs_alloc_compute_aligned(args, gtbno, gtlen,
1203						  &gtbnoa, &gtlena);
1204			if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1205				break;
1206			if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1207				goto error0;
1208			if (!i || gtbnoa > args->max_agbno) {
1209				xfs_btree_del_cursor(bno_cur_gt,
1210						     XFS_BTREE_NOERROR);
1211				bno_cur_gt = NULL;
1212			}
1213		}
1214	} while (bno_cur_lt || bno_cur_gt);
1215
1216	/*
1217	 * Got both cursors still active, need to find better entry.
1218	 */
1219	if (bno_cur_lt && bno_cur_gt) {
1220		if (ltlena >= args->minlen) {
1221			/*
1222			 * Left side is good, look for a right side entry.
1223			 */
1224			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1225			xfs_alloc_fix_len(args);
1226			ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1227				args->alignment, args->datatype, ltbnoa,
1228				ltlena, &ltnew);
1229
1230			error = xfs_alloc_find_best_extent(args,
1231						&bno_cur_lt, &bno_cur_gt,
1232						ltdiff, &gtbno, &gtlen,
1233						&gtbnoa, &gtlena,
1234						0 /* search right */);
1235		} else {
1236			ASSERT(gtlena >= args->minlen);
1237
1238			/*
1239			 * Right side is good, look for a left side entry.
1240			 */
1241			args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1242			xfs_alloc_fix_len(args);
1243			gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1244				args->alignment, args->datatype, gtbnoa,
1245				gtlena, &gtnew);
1246
1247			error = xfs_alloc_find_best_extent(args,
1248						&bno_cur_gt, &bno_cur_lt,
1249						gtdiff, &ltbno, &ltlen,
1250						&ltbnoa, &ltlena,
1251						1 /* search left */);
1252		}
1253
1254		if (error)
1255			goto error0;
1256	}
1257
1258	/*
1259	 * If we couldn't get anything, give up.
1260	 */
1261	if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1262		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1263
1264		if (!forced++) {
1265			trace_xfs_alloc_near_busy(args);
1266			xfs_log_force(args->mp, XFS_LOG_SYNC);
1267			goto restart;
1268		}
1269		trace_xfs_alloc_size_neither(args);
1270		args->agbno = NULLAGBLOCK;
1271		return 0;
1272	}
1273
1274	/*
1275	 * At this point we have selected a freespace entry, either to the
1276	 * left or to the right.  If it's on the right, copy all the
1277	 * useful variables to the "left" set so we only have one
1278	 * copy of this code.
1279	 */
1280	if (bno_cur_gt) {
1281		bno_cur_lt = bno_cur_gt;
1282		bno_cur_gt = NULL;
1283		ltbno = gtbno;
1284		ltbnoa = gtbnoa;
1285		ltlen = gtlen;
1286		ltlena = gtlena;
1287		j = 1;
1288	} else
1289		j = 0;
1290
1291	/*
1292	 * Fix up the length and compute the useful address.
1293	 */
1294	args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1295	xfs_alloc_fix_len(args);
 
 
 
 
 
 
1296	rlen = args->len;
1297	(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1298				     args->datatype, ltbnoa, ltlena, &ltnew);
1299	ASSERT(ltnew >= ltbno);
1300	ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1301	ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1302	ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1303	args->agbno = ltnew;
1304
1305	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1306			ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1307		goto error0;
1308
1309	if (j)
1310		trace_xfs_alloc_near_greater(args);
1311	else
1312		trace_xfs_alloc_near_lesser(args);
1313
1314	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1315	xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1316	return 0;
1317
1318 error0:
1319	trace_xfs_alloc_near_error(args);
1320	if (cnt_cur != NULL)
1321		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1322	if (bno_cur_lt != NULL)
1323		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1324	if (bno_cur_gt != NULL)
1325		xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1326	return error;
1327}
1328
1329/*
1330 * Allocate a variable extent anywhere in the allocation group agno.
1331 * Extent's length (returned in len) will be between minlen and maxlen,
1332 * and of the form k * prod + mod unless there's nothing that large.
1333 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1334 */
1335STATIC int				/* error */
1336xfs_alloc_ag_vextent_size(
1337	xfs_alloc_arg_t	*args)		/* allocation argument structure */
1338{
1339	xfs_btree_cur_t	*bno_cur;	/* cursor for bno btree */
1340	xfs_btree_cur_t	*cnt_cur;	/* cursor for cnt btree */
1341	int		error;		/* error result */
1342	xfs_agblock_t	fbno;		/* start of found freespace */
1343	xfs_extlen_t	flen;		/* length of found freespace */
1344	int		i;		/* temp status variable */
1345	xfs_agblock_t	rbno;		/* returned block number */
1346	xfs_extlen_t	rlen;		/* length of returned extent */
1347	int		forced = 0;
1348
1349restart:
1350	/*
1351	 * Allocate and initialize a cursor for the by-size btree.
1352	 */
1353	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1354		args->agno, XFS_BTNUM_CNT);
1355	bno_cur = NULL;
1356
1357	/*
1358	 * Look for an entry >= maxlen+alignment-1 blocks.
1359	 */
1360	if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1361			args->maxlen + args->alignment - 1, &i)))
1362		goto error0;
1363
1364	/*
1365	 * If none or we have busy extents that we cannot allocate from, then
1366	 * we have to settle for a smaller extent. In the case that there are
1367	 * no large extents, this will return the last entry in the tree unless
1368	 * the tree is empty. In the case that there are only busy large
1369	 * extents, this will return the largest small extent unless there
1370	 * are no smaller extents available.
1371	 */
1372	if (!i || forced > 1) {
1373		error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1374						   &fbno, &flen, &i);
1375		if (error)
1376			goto error0;
1377		if (i == 0 || flen == 0) {
1378			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1379			trace_xfs_alloc_size_noentry(args);
1380			return 0;
1381		}
1382		ASSERT(i == 1);
1383		xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1384	} else {
1385		/*
1386		 * Search for a non-busy extent that is large enough.
1387		 * If we are at low space, don't check, or if we fall of
1388		 * the end of the btree, turn off the busy check and
1389		 * restart.
1390		 */
1391		for (;;) {
1392			error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1393			if (error)
1394				goto error0;
1395			XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1396
1397			xfs_alloc_compute_aligned(args, fbno, flen,
1398						  &rbno, &rlen);
1399
1400			if (rlen >= args->maxlen)
1401				break;
1402
1403			error = xfs_btree_increment(cnt_cur, 0, &i);
1404			if (error)
1405				goto error0;
1406			if (i == 0) {
1407				/*
1408				 * Our only valid extents must have been busy.
1409				 * Make it unbusy by forcing the log out and
1410				 * retrying. If we've been here before, forcing
1411				 * the log isn't making the extents available,
1412				 * which means they have probably been freed in
1413				 * this transaction.  In that case, we have to
1414				 * give up on them and we'll attempt a minlen
1415				 * allocation the next time around.
1416				 */
1417				xfs_btree_del_cursor(cnt_cur,
1418						     XFS_BTREE_NOERROR);
1419				trace_xfs_alloc_size_busy(args);
1420				if (!forced++)
1421					xfs_log_force(args->mp, XFS_LOG_SYNC);
1422				goto restart;
1423			}
1424		}
1425	}
1426
1427	/*
1428	 * In the first case above, we got the last entry in the
1429	 * by-size btree.  Now we check to see if the space hits maxlen
1430	 * once aligned; if not, we search left for something better.
1431	 * This can't happen in the second case above.
1432	 */
1433	rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1434	XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1435			(rlen <= flen && rbno + rlen <= fbno + flen), error0);
1436	if (rlen < args->maxlen) {
1437		xfs_agblock_t	bestfbno;
1438		xfs_extlen_t	bestflen;
1439		xfs_agblock_t	bestrbno;
1440		xfs_extlen_t	bestrlen;
1441
1442		bestrlen = rlen;
1443		bestrbno = rbno;
1444		bestflen = flen;
1445		bestfbno = fbno;
1446		for (;;) {
1447			if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1448				goto error0;
1449			if (i == 0)
1450				break;
1451			if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1452					&i)))
1453				goto error0;
1454			XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1455			if (flen < bestrlen)
1456				break;
1457			xfs_alloc_compute_aligned(args, fbno, flen,
1458						  &rbno, &rlen);
1459			rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1460			XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1461				(rlen <= flen && rbno + rlen <= fbno + flen),
1462				error0);
1463			if (rlen > bestrlen) {
1464				bestrlen = rlen;
1465				bestrbno = rbno;
1466				bestflen = flen;
1467				bestfbno = fbno;
1468				if (rlen == args->maxlen)
1469					break;
1470			}
1471		}
1472		if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1473				&i)))
1474			goto error0;
1475		XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1476		rlen = bestrlen;
1477		rbno = bestrbno;
1478		flen = bestflen;
1479		fbno = bestfbno;
1480	}
1481	args->wasfromfl = 0;
1482	/*
1483	 * Fix up the length.
1484	 */
1485	args->len = rlen;
1486	if (rlen < args->minlen) {
1487		if (!forced++) {
1488			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1489			trace_xfs_alloc_size_busy(args);
1490			xfs_log_force(args->mp, XFS_LOG_SYNC);
1491			goto restart;
1492		}
1493		goto out_nominleft;
1494	}
1495	xfs_alloc_fix_len(args);
1496
 
 
1497	rlen = args->len;
1498	XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1499	/*
1500	 * Allocate and initialize a cursor for the by-block tree.
1501	 */
1502	bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1503		args->agno, XFS_BTNUM_BNO);
1504	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1505			rbno, rlen, XFSA_FIXUP_CNT_OK)))
1506		goto error0;
1507	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1508	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1509	cnt_cur = bno_cur = NULL;
1510	args->len = rlen;
1511	args->agbno = rbno;
1512	XFS_WANT_CORRUPTED_GOTO(args->mp,
1513		args->agbno + args->len <=
1514			be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1515		error0);
1516	trace_xfs_alloc_size_done(args);
1517	return 0;
1518
1519error0:
1520	trace_xfs_alloc_size_error(args);
1521	if (cnt_cur)
1522		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1523	if (bno_cur)
1524		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1525	return error;
1526
1527out_nominleft:
1528	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1529	trace_xfs_alloc_size_nominleft(args);
1530	args->agbno = NULLAGBLOCK;
1531	return 0;
1532}
1533
1534/*
1535 * Deal with the case where only small freespaces remain.
1536 * Either return the contents of the last freespace record,
1537 * or allocate space from the freelist if there is nothing in the tree.
1538 */
1539STATIC int			/* error */
1540xfs_alloc_ag_vextent_small(
1541	xfs_alloc_arg_t	*args,	/* allocation argument structure */
1542	xfs_btree_cur_t	*ccur,	/* by-size cursor */
1543	xfs_agblock_t	*fbnop,	/* result block number */
1544	xfs_extlen_t	*flenp,	/* result length */
1545	int		*stat)	/* status: 0-freelist, 1-normal/none */
1546{
1547	struct xfs_owner_info	oinfo;
1548	struct xfs_perag	*pag;
1549	int		error;
1550	xfs_agblock_t	fbno;
1551	xfs_extlen_t	flen;
1552	int		i;
1553
1554	if ((error = xfs_btree_decrement(ccur, 0, &i)))
1555		goto error0;
1556	if (i) {
1557		if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1558			goto error0;
1559		XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1560	}
1561	/*
1562	 * Nothing in the btree, try the freelist.  Make sure
1563	 * to respect minleft even when pulling from the
1564	 * freelist.
1565	 */
1566	else if (args->minlen == 1 && args->alignment == 1 &&
1567		 args->resv != XFS_AG_RESV_AGFL &&
1568		 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1569		  > args->minleft)) {
1570		error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1571		if (error)
1572			goto error0;
1573		if (fbno != NULLAGBLOCK) {
1574			xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1575			      xfs_alloc_allow_busy_reuse(args->datatype));
1576
1577			if (xfs_alloc_is_userdata(args->datatype)) {
1578				xfs_buf_t	*bp;
1579
1580				bp = xfs_btree_get_bufs(args->mp, args->tp,
1581					args->agno, fbno, 0);
1582				xfs_trans_binval(args->tp, bp);
1583			}
1584			args->len = 1;
1585			args->agbno = fbno;
1586			XFS_WANT_CORRUPTED_GOTO(args->mp,
1587				args->agbno + args->len <=
1588				be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1589				error0);
1590			args->wasfromfl = 1;
1591			trace_xfs_alloc_small_freelist(args);
1592
1593			/*
1594			 * If we're feeding an AGFL block to something that
1595			 * doesn't live in the free space, we need to clear
1596			 * out the OWN_AG rmap and add the block back to
1597			 * the AGFL per-AG reservation.
1598			 */
1599			xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
1600			error = xfs_rmap_free(args->tp, args->agbp, args->agno,
1601					fbno, 1, &oinfo);
1602			if (error)
1603				goto error0;
1604			pag = xfs_perag_get(args->mp, args->agno);
1605			xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL,
1606					args->tp, 1);
1607			xfs_perag_put(pag);
1608
1609			*stat = 0;
1610			return 0;
1611		}
1612		/*
1613		 * Nothing in the freelist.
1614		 */
1615		else
1616			flen = 0;
1617	}
1618	/*
1619	 * Can't allocate from the freelist for some reason.
1620	 */
1621	else {
1622		fbno = NULLAGBLOCK;
1623		flen = 0;
1624	}
1625	/*
1626	 * Can't do the allocation, give up.
1627	 */
1628	if (flen < args->minlen) {
1629		args->agbno = NULLAGBLOCK;
1630		trace_xfs_alloc_small_notenough(args);
1631		flen = 0;
1632	}
1633	*fbnop = fbno;
1634	*flenp = flen;
1635	*stat = 1;
1636	trace_xfs_alloc_small_done(args);
1637	return 0;
1638
1639error0:
1640	trace_xfs_alloc_small_error(args);
1641	return error;
1642}
1643
1644/*
1645 * Free the extent starting at agno/bno for length.
1646 */
1647STATIC int
1648xfs_free_ag_extent(
1649	xfs_trans_t		*tp,
1650	xfs_buf_t		*agbp,
1651	xfs_agnumber_t		agno,
1652	xfs_agblock_t		bno,
1653	xfs_extlen_t		len,
1654	struct xfs_owner_info	*oinfo,
1655	enum xfs_ag_resv_type	type)
1656{
1657	xfs_btree_cur_t	*bno_cur;	/* cursor for by-block btree */
1658	xfs_btree_cur_t	*cnt_cur;	/* cursor for by-size btree */
1659	int		error;		/* error return value */
1660	xfs_agblock_t	gtbno;		/* start of right neighbor block */
1661	xfs_extlen_t	gtlen;		/* length of right neighbor block */
1662	int		haveleft;	/* have a left neighbor block */
1663	int		haveright;	/* have a right neighbor block */
1664	int		i;		/* temp, result code */
1665	xfs_agblock_t	ltbno;		/* start of left neighbor block */
1666	xfs_extlen_t	ltlen;		/* length of left neighbor block */
1667	xfs_mount_t	*mp;		/* mount point struct for filesystem */
1668	xfs_agblock_t	nbno;		/* new starting block of freespace */
1669	xfs_extlen_t	nlen;		/* new length of freespace */
1670	xfs_perag_t	*pag;		/* per allocation group data */
1671
1672	bno_cur = cnt_cur = NULL;
1673	mp = tp->t_mountp;
1674
1675	if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
1676		error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1677		if (error)
1678			goto error0;
1679	}
1680
1681	/*
1682	 * Allocate and initialize a cursor for the by-block btree.
1683	 */
1684	bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
 
1685	/*
1686	 * Look for a neighboring block on the left (lower block numbers)
1687	 * that is contiguous with this space.
1688	 */
1689	if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1690		goto error0;
1691	if (haveleft) {
1692		/*
1693		 * There is a block to our left.
1694		 */
1695		if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1696			goto error0;
1697		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1698		/*
1699		 * It's not contiguous, though.
1700		 */
1701		if (ltbno + ltlen < bno)
1702			haveleft = 0;
1703		else {
1704			/*
1705			 * If this failure happens the request to free this
1706			 * space was invalid, it's (partly) already free.
1707			 * Very bad.
1708			 */
1709			XFS_WANT_CORRUPTED_GOTO(mp,
1710						ltbno + ltlen <= bno, error0);
1711		}
1712	}
1713	/*
1714	 * Look for a neighboring block on the right (higher block numbers)
1715	 * that is contiguous with this space.
1716	 */
1717	if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1718		goto error0;
1719	if (haveright) {
1720		/*
1721		 * There is a block to our right.
1722		 */
1723		if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1724			goto error0;
1725		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1726		/*
1727		 * It's not contiguous, though.
1728		 */
1729		if (bno + len < gtbno)
1730			haveright = 0;
1731		else {
1732			/*
1733			 * If this failure happens the request to free this
1734			 * space was invalid, it's (partly) already free.
1735			 * Very bad.
1736			 */
1737			XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1738		}
1739	}
1740	/*
1741	 * Now allocate and initialize a cursor for the by-size tree.
1742	 */
1743	cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1744	/*
1745	 * Have both left and right contiguous neighbors.
1746	 * Merge all three into a single free block.
1747	 */
1748	if (haveleft && haveright) {
1749		/*
1750		 * Delete the old by-size entry on the left.
1751		 */
1752		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1753			goto error0;
1754		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1755		if ((error = xfs_btree_delete(cnt_cur, &i)))
1756			goto error0;
1757		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1758		/*
1759		 * Delete the old by-size entry on the right.
1760		 */
1761		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1762			goto error0;
1763		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1764		if ((error = xfs_btree_delete(cnt_cur, &i)))
1765			goto error0;
1766		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1767		/*
1768		 * Delete the old by-block entry for the right block.
1769		 */
1770		if ((error = xfs_btree_delete(bno_cur, &i)))
1771			goto error0;
1772		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1773		/*
1774		 * Move the by-block cursor back to the left neighbor.
1775		 */
1776		if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1777			goto error0;
1778		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1779#ifdef DEBUG
1780		/*
1781		 * Check that this is the right record: delete didn't
1782		 * mangle the cursor.
1783		 */
1784		{
1785			xfs_agblock_t	xxbno;
1786			xfs_extlen_t	xxlen;
1787
1788			if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1789					&i)))
1790				goto error0;
1791			XFS_WANT_CORRUPTED_GOTO(mp,
1792				i == 1 && xxbno == ltbno && xxlen == ltlen,
1793				error0);
1794		}
1795#endif
1796		/*
1797		 * Update remaining by-block entry to the new, joined block.
1798		 */
1799		nbno = ltbno;
1800		nlen = len + ltlen + gtlen;
1801		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1802			goto error0;
1803	}
1804	/*
1805	 * Have only a left contiguous neighbor.
1806	 * Merge it together with the new freespace.
1807	 */
1808	else if (haveleft) {
1809		/*
1810		 * Delete the old by-size entry on the left.
1811		 */
1812		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1813			goto error0;
1814		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1815		if ((error = xfs_btree_delete(cnt_cur, &i)))
1816			goto error0;
1817		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1818		/*
1819		 * Back up the by-block cursor to the left neighbor, and
1820		 * update its length.
1821		 */
1822		if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1823			goto error0;
1824		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1825		nbno = ltbno;
1826		nlen = len + ltlen;
1827		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1828			goto error0;
1829	}
1830	/*
1831	 * Have only a right contiguous neighbor.
1832	 * Merge it together with the new freespace.
1833	 */
1834	else if (haveright) {
1835		/*
1836		 * Delete the old by-size entry on the right.
1837		 */
1838		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1839			goto error0;
1840		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1841		if ((error = xfs_btree_delete(cnt_cur, &i)))
1842			goto error0;
1843		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1844		/*
1845		 * Update the starting block and length of the right
1846		 * neighbor in the by-block tree.
1847		 */
1848		nbno = bno;
1849		nlen = len + gtlen;
1850		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1851			goto error0;
1852	}
1853	/*
1854	 * No contiguous neighbors.
1855	 * Insert the new freespace into the by-block tree.
1856	 */
1857	else {
1858		nbno = bno;
1859		nlen = len;
1860		if ((error = xfs_btree_insert(bno_cur, &i)))
1861			goto error0;
1862		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1863	}
1864	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1865	bno_cur = NULL;
1866	/*
1867	 * In all cases we need to insert the new freespace in the by-size tree.
1868	 */
1869	if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1870		goto error0;
1871	XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
1872	if ((error = xfs_btree_insert(cnt_cur, &i)))
1873		goto error0;
1874	XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1875	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1876	cnt_cur = NULL;
1877
1878	/*
1879	 * Update the freespace totals in the ag and superblock.
1880	 */
1881	pag = xfs_perag_get(mp, agno);
1882	error = xfs_alloc_update_counters(tp, pag, agbp, len);
1883	xfs_ag_resv_free_extent(pag, type, tp, len);
1884	xfs_perag_put(pag);
1885	if (error)
1886		goto error0;
1887
 
 
1888	XFS_STATS_INC(mp, xs_freex);
1889	XFS_STATS_ADD(mp, xs_freeb, len);
1890
1891	trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
1892			haveleft, haveright);
1893
1894	return 0;
1895
1896 error0:
1897	trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
1898			-1, -1);
1899	if (bno_cur)
1900		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1901	if (cnt_cur)
1902		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1903	return error;
1904}
1905
1906/*
1907 * Visible (exported) allocation/free functions.
1908 * Some of these are used just by xfs_alloc_btree.c and this file.
1909 */
1910
1911/*
1912 * Compute and fill in value of m_ag_maxlevels.
1913 */
1914void
1915xfs_alloc_compute_maxlevels(
1916	xfs_mount_t	*mp)	/* file system mount structure */
1917{
1918	mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_alloc_mnr,
1919			(mp->m_sb.sb_agblocks + 1) / 2);
 
 
 
 
 
 
 
 
 
 
 
1920}
1921
1922/*
1923 * Find the length of the longest extent in an AG.  The 'need' parameter
1924 * specifies how much space we're going to need for the AGFL and the
1925 * 'reserved' parameter tells us how many blocks in this AG are reserved for
1926 * other callers.
1927 */
1928xfs_extlen_t
1929xfs_alloc_longest_free_extent(
1930	struct xfs_mount	*mp,
1931	struct xfs_perag	*pag,
1932	xfs_extlen_t		need,
1933	xfs_extlen_t		reserved)
1934{
1935	xfs_extlen_t		delta = 0;
1936
1937	/*
1938	 * If the AGFL needs a recharge, we'll have to subtract that from the
1939	 * longest extent.
1940	 */
1941	if (need > pag->pagf_flcount)
1942		delta = need - pag->pagf_flcount;
1943
1944	/*
1945	 * If we cannot maintain others' reservations with space from the
1946	 * not-longest freesp extents, we'll have to subtract /that/ from
1947	 * the longest extent too.
1948	 */
1949	if (pag->pagf_freeblks - pag->pagf_longest < reserved)
1950		delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
1951
1952	/*
1953	 * If the longest extent is long enough to satisfy all the
1954	 * reservations and AGFL rules in place, we can return this extent.
1955	 */
1956	if (pag->pagf_longest > delta)
1957		return pag->pagf_longest - delta;
1958
1959	/* Otherwise, let the caller try for 1 block if there's space. */
1960	return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1961}
1962
1963unsigned int
1964xfs_alloc_min_freelist(
1965	struct xfs_mount	*mp,
1966	struct xfs_perag	*pag)
1967{
1968	unsigned int		min_free;
1969
1970	/* space needed by-bno freespace btree */
1971	min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
1972				       mp->m_ag_maxlevels);
1973	/* space needed by-size freespace btree */
1974	min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
1975				       mp->m_ag_maxlevels);
1976	/* space needed reverse mapping used space btree */
1977	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
1978		min_free += min_t(unsigned int,
1979				  pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
1980				  mp->m_rmap_maxlevels);
1981
1982	return min_free;
1983}
1984
1985/*
1986 * Check if the operation we are fixing up the freelist for should go ahead or
1987 * not. If we are freeing blocks, we always allow it, otherwise the allocation
1988 * is dependent on whether the size and shape of free space available will
1989 * permit the requested allocation to take place.
1990 */
1991static bool
1992xfs_alloc_space_available(
1993	struct xfs_alloc_arg	*args,
1994	xfs_extlen_t		min_free,
1995	int			flags)
1996{
1997	struct xfs_perag	*pag = args->pag;
1998	xfs_extlen_t		alloc_len, longest;
1999	xfs_extlen_t		reservation; /* blocks that are still reserved */
2000	int			available;
2001
2002	if (flags & XFS_ALLOC_FLAG_FREEING)
2003		return true;
2004
2005	reservation = xfs_ag_resv_needed(pag, args->resv);
2006
2007	/* do we have enough contiguous free space for the allocation? */
2008	alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2009	longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free,
2010			reservation);
2011	if (longest < alloc_len)
2012		return false;
2013
2014	/* do we have enough free space remaining for the allocation? */
2015	available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
2016			  reservation - min_free - args->minleft);
2017	if (available < (int)max(args->total, alloc_len))
2018		return false;
2019
2020	/*
2021	 * Clamp maxlen to the amount of free space available for the actual
2022	 * extent allocation.
2023	 */
2024	if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2025		args->maxlen = available;
2026		ASSERT(args->maxlen > 0);
2027		ASSERT(args->maxlen >= args->minlen);
2028	}
2029
2030	return true;
2031}
2032
2033/*
2034 * Decide whether to use this allocation group for this allocation.
2035 * If so, fix up the btree freelist's size.
2036 */
2037int			/* error */
2038xfs_alloc_fix_freelist(
2039	struct xfs_alloc_arg	*args,	/* allocation argument structure */
2040	int			flags)	/* XFS_ALLOC_FLAG_... */
2041{
2042	struct xfs_mount	*mp = args->mp;
2043	struct xfs_perag	*pag = args->pag;
2044	struct xfs_trans	*tp = args->tp;
2045	struct xfs_buf		*agbp = NULL;
2046	struct xfs_buf		*agflbp = NULL;
2047	struct xfs_alloc_arg	targs;	/* local allocation arguments */
2048	xfs_agblock_t		bno;	/* freelist block */
2049	xfs_extlen_t		need;	/* total blocks needed in freelist */
2050	int			error = 0;
2051
2052	if (!pag->pagf_init) {
2053		error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2054		if (error)
2055			goto out_no_agbp;
2056		if (!pag->pagf_init) {
2057			ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2058			ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2059			goto out_agbp_relse;
2060		}
2061	}
2062
2063	/*
2064	 * If this is a metadata preferred pag and we are user data then try
2065	 * somewhere else if we are not being asked to try harder at this
2066	 * point
2067	 */
2068	if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) &&
2069	    (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2070		ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2071		goto out_agbp_relse;
2072	}
2073
2074	need = xfs_alloc_min_freelist(mp, pag);
2075	if (!xfs_alloc_space_available(args, need, flags |
2076			XFS_ALLOC_FLAG_CHECK))
2077		goto out_agbp_relse;
2078
2079	/*
2080	 * Get the a.g. freespace buffer.
2081	 * Can fail if we're not blocking on locks, and it's held.
2082	 */
2083	if (!agbp) {
2084		error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2085		if (error)
2086			goto out_no_agbp;
2087		if (!agbp) {
2088			ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2089			ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2090			goto out_no_agbp;
2091		}
2092	}
2093
2094	/* If there isn't enough total space or single-extent, reject it. */
2095	need = xfs_alloc_min_freelist(mp, pag);
2096	if (!xfs_alloc_space_available(args, need, flags))
2097		goto out_agbp_relse;
2098
2099	/*
2100	 * Make the freelist shorter if it's too long.
2101	 *
2102	 * Note that from this point onwards, we will always release the agf and
2103	 * agfl buffers on error. This handles the case where we error out and
2104	 * the buffers are clean or may not have been joined to the transaction
2105	 * and hence need to be released manually. If they have been joined to
2106	 * the transaction, then xfs_trans_brelse() will handle them
2107	 * appropriately based on the recursion count and dirty state of the
2108	 * buffer.
2109	 *
2110	 * XXX (dgc): When we have lots of free space, does this buy us
2111	 * anything other than extra overhead when we need to put more blocks
2112	 * back on the free list? Maybe we should only do this when space is
2113	 * getting low or the AGFL is more than half full?
2114	 *
2115	 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2116	 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2117	 * updating the rmapbt.  Both flags are used in xfs_repair while we're
2118	 * rebuilding the rmapbt, and neither are used by the kernel.  They're
2119	 * both required to ensure that rmaps are correctly recorded for the
2120	 * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
2121	 * repair/rmap.c in xfsprogs for details.
2122	 */
2123	memset(&targs, 0, sizeof(targs));
2124	if (flags & XFS_ALLOC_FLAG_NORMAP)
2125		xfs_rmap_skip_owner_update(&targs.oinfo);
2126	else
2127		xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
2128	while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
2129		struct xfs_buf	*bp;
2130
2131		error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2132		if (error)
2133			goto out_agbp_relse;
2134		error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
2135					   &targs.oinfo, XFS_AG_RESV_AGFL);
2136		if (error)
2137			goto out_agbp_relse;
2138		bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
2139		xfs_trans_binval(tp, bp);
2140	}
2141
 
2142	targs.tp = tp;
2143	targs.mp = mp;
2144	targs.agbp = agbp;
2145	targs.agno = args->agno;
2146	targs.alignment = targs.minlen = targs.prod = 1;
2147	targs.type = XFS_ALLOCTYPE_THIS_AG;
2148	targs.pag = pag;
2149	error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2150	if (error)
2151		goto out_agbp_relse;
2152
2153	/* Make the freelist longer if it's too short. */
2154	while (pag->pagf_flcount < need) {
2155		targs.agbno = 0;
2156		targs.maxlen = need - pag->pagf_flcount;
2157		targs.resv = XFS_AG_RESV_AGFL;
2158
2159		/* Allocate as many blocks as possible at once. */
2160		error = xfs_alloc_ag_vextent(&targs);
2161		if (error)
2162			goto out_agflbp_relse;
2163
2164		/*
2165		 * Stop if we run out.  Won't happen if callers are obeying
2166		 * the restrictions correctly.  Can happen for free calls
2167		 * on a completely full ag.
2168		 */
2169		if (targs.agbno == NULLAGBLOCK) {
2170			if (flags & XFS_ALLOC_FLAG_FREEING)
2171				break;
2172			goto out_agflbp_relse;
2173		}
2174		/*
2175		 * Put each allocated block on the list.
2176		 */
2177		for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2178			error = xfs_alloc_put_freelist(tp, agbp,
2179							agflbp, bno, 0);
2180			if (error)
2181				goto out_agflbp_relse;
2182		}
2183	}
2184	xfs_trans_brelse(tp, agflbp);
2185	args->agbp = agbp;
2186	return 0;
2187
2188out_agflbp_relse:
2189	xfs_trans_brelse(tp, agflbp);
2190out_agbp_relse:
2191	if (agbp)
2192		xfs_trans_brelse(tp, agbp);
2193out_no_agbp:
2194	args->agbp = NULL;
2195	return error;
2196}
2197
2198/*
2199 * Get a block from the freelist.
2200 * Returns with the buffer for the block gotten.
2201 */
2202int				/* error */
2203xfs_alloc_get_freelist(
2204	xfs_trans_t	*tp,	/* transaction pointer */
2205	xfs_buf_t	*agbp,	/* buffer containing the agf structure */
2206	xfs_agblock_t	*bnop,	/* block address retrieved from freelist */
2207	int		btreeblk) /* destination is a AGF btree */
2208{
2209	xfs_agf_t	*agf;	/* a.g. freespace structure */
2210	xfs_buf_t	*agflbp;/* buffer for a.g. freelist structure */
2211	xfs_agblock_t	bno;	/* block number returned */
2212	__be32		*agfl_bno;
2213	int		error;
2214	int		logflags;
2215	xfs_mount_t	*mp = tp->t_mountp;
2216	xfs_perag_t	*pag;	/* per allocation group data */
2217
2218	/*
2219	 * Freelist is empty, give up.
2220	 */
2221	agf = XFS_BUF_TO_AGF(agbp);
2222	if (!agf->agf_flcount) {
2223		*bnop = NULLAGBLOCK;
2224		return 0;
2225	}
2226	/*
2227	 * Read the array of free blocks.
2228	 */
2229	error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2230				    &agflbp);
2231	if (error)
2232		return error;
2233
2234
2235	/*
2236	 * Get the block number and update the data structures.
2237	 */
2238	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2239	bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2240	be32_add_cpu(&agf->agf_flfirst, 1);
2241	xfs_trans_brelse(tp, agflbp);
2242	if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2243		agf->agf_flfirst = 0;
2244
2245	pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2246	be32_add_cpu(&agf->agf_flcount, -1);
2247	xfs_trans_agflist_delta(tp, -1);
2248	pag->pagf_flcount--;
2249	xfs_perag_put(pag);
2250
2251	logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2252	if (btreeblk) {
2253		be32_add_cpu(&agf->agf_btreeblks, 1);
2254		pag->pagf_btreeblks++;
2255		logflags |= XFS_AGF_BTREEBLKS;
2256	}
2257
2258	xfs_alloc_log_agf(tp, agbp, logflags);
2259	*bnop = bno;
2260
2261	return 0;
2262}
2263
2264/*
2265 * Log the given fields from the agf structure.
2266 */
2267void
2268xfs_alloc_log_agf(
2269	xfs_trans_t	*tp,	/* transaction pointer */
2270	xfs_buf_t	*bp,	/* buffer for a.g. freelist header */
2271	int		fields)	/* mask of fields to be logged (XFS_AGF_...) */
2272{
2273	int	first;		/* first byte offset */
2274	int	last;		/* last byte offset */
2275	static const short	offsets[] = {
2276		offsetof(xfs_agf_t, agf_magicnum),
2277		offsetof(xfs_agf_t, agf_versionnum),
2278		offsetof(xfs_agf_t, agf_seqno),
2279		offsetof(xfs_agf_t, agf_length),
2280		offsetof(xfs_agf_t, agf_roots[0]),
2281		offsetof(xfs_agf_t, agf_levels[0]),
2282		offsetof(xfs_agf_t, agf_flfirst),
2283		offsetof(xfs_agf_t, agf_fllast),
2284		offsetof(xfs_agf_t, agf_flcount),
2285		offsetof(xfs_agf_t, agf_freeblks),
2286		offsetof(xfs_agf_t, agf_longest),
2287		offsetof(xfs_agf_t, agf_btreeblks),
2288		offsetof(xfs_agf_t, agf_uuid),
2289		offsetof(xfs_agf_t, agf_rmap_blocks),
2290		offsetof(xfs_agf_t, agf_refcount_blocks),
2291		offsetof(xfs_agf_t, agf_refcount_root),
2292		offsetof(xfs_agf_t, agf_refcount_level),
2293		/* needed so that we don't log the whole rest of the structure: */
2294		offsetof(xfs_agf_t, agf_spare64),
2295		sizeof(xfs_agf_t)
2296	};
2297
2298	trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2299
2300	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2301
2302	xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2303	xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2304}
2305
2306/*
2307 * Interface for inode allocation to force the pag data to be initialized.
2308 */
2309int					/* error */
2310xfs_alloc_pagf_init(
2311	xfs_mount_t		*mp,	/* file system mount structure */
2312	xfs_trans_t		*tp,	/* transaction pointer */
2313	xfs_agnumber_t		agno,	/* allocation group number */
2314	int			flags)	/* XFS_ALLOC_FLAGS_... */
2315{
2316	xfs_buf_t		*bp;
2317	int			error;
2318
2319	if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2320		return error;
2321	if (bp)
2322		xfs_trans_brelse(tp, bp);
2323	return 0;
2324}
2325
2326/*
2327 * Put the block on the freelist for the allocation group.
2328 */
2329int					/* error */
2330xfs_alloc_put_freelist(
2331	xfs_trans_t		*tp,	/* transaction pointer */
2332	xfs_buf_t		*agbp,	/* buffer for a.g. freelist header */
2333	xfs_buf_t		*agflbp,/* buffer for a.g. free block array */
2334	xfs_agblock_t		bno,	/* block being freed */
2335	int			btreeblk) /* block came from a AGF btree */
2336{
2337	xfs_agf_t		*agf;	/* a.g. freespace structure */
2338	__be32			*blockp;/* pointer to array entry */
2339	int			error;
2340	int			logflags;
2341	xfs_mount_t		*mp;	/* mount structure */
2342	xfs_perag_t		*pag;	/* per allocation group data */
2343	__be32			*agfl_bno;
2344	int			startoff;
2345
2346	agf = XFS_BUF_TO_AGF(agbp);
2347	mp = tp->t_mountp;
2348
2349	if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2350			be32_to_cpu(agf->agf_seqno), &agflbp)))
2351		return error;
2352	be32_add_cpu(&agf->agf_fllast, 1);
2353	if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2354		agf->agf_fllast = 0;
2355
2356	pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2357	be32_add_cpu(&agf->agf_flcount, 1);
2358	xfs_trans_agflist_delta(tp, 1);
2359	pag->pagf_flcount++;
2360
2361	logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2362	if (btreeblk) {
2363		be32_add_cpu(&agf->agf_btreeblks, -1);
2364		pag->pagf_btreeblks--;
2365		logflags |= XFS_AGF_BTREEBLKS;
2366	}
2367	xfs_perag_put(pag);
2368
2369	xfs_alloc_log_agf(tp, agbp, logflags);
2370
2371	ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2372
2373	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2374	blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2375	*blockp = cpu_to_be32(bno);
2376	startoff = (char *)blockp - (char *)agflbp->b_addr;
2377
2378	xfs_alloc_log_agf(tp, agbp, logflags);
2379
2380	xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2381	xfs_trans_log_buf(tp, agflbp, startoff,
2382			  startoff + sizeof(xfs_agblock_t) - 1);
2383	return 0;
2384}
2385
2386static bool
2387xfs_agf_verify(
2388	struct xfs_mount *mp,
2389	struct xfs_buf	*bp)
2390 {
2391	struct xfs_agf	*agf = XFS_BUF_TO_AGF(bp);
2392
2393	if (xfs_sb_version_hascrc(&mp->m_sb)) {
2394		if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
2395			return false;
2396		if (!xfs_log_check_lsn(mp,
2397				be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
2398			return false;
2399	}
2400
2401	if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2402	      XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2403	      be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2404	      be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2405	      be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2406	      be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2407		return false;
2408
2409	if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2410	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2411	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2412	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2413		return false;
2414
2415	if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2416	    (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2417	     be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
2418		return false;
2419
2420	/*
2421	 * during growfs operations, the perag is not fully initialised,
2422	 * so we can't use it for any useful checking. growfs ensures we can't
2423	 * use it by using uncached buffers that don't have the perag attached
2424	 * so we can detect and avoid this problem.
2425	 */
2426	if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2427		return false;
2428
2429	if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2430	    be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2431		return false;
2432
2433	if (xfs_sb_version_hasreflink(&mp->m_sb) &&
2434	    (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2435	     be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
2436		return false;
2437
2438	return true;;
2439
2440}
2441
2442static void
2443xfs_agf_read_verify(
2444	struct xfs_buf	*bp)
2445{
2446	struct xfs_mount *mp = bp->b_target->bt_mount;
2447
2448	if (xfs_sb_version_hascrc(&mp->m_sb) &&
2449	    !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2450		xfs_buf_ioerror(bp, -EFSBADCRC);
2451	else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2452				XFS_ERRTAG_ALLOC_READ_AGF,
2453				XFS_RANDOM_ALLOC_READ_AGF))
2454		xfs_buf_ioerror(bp, -EFSCORRUPTED);
2455
2456	if (bp->b_error)
2457		xfs_verifier_error(bp);
2458}
2459
2460static void
2461xfs_agf_write_verify(
2462	struct xfs_buf	*bp)
2463{
2464	struct xfs_mount *mp = bp->b_target->bt_mount;
2465	struct xfs_buf_log_item	*bip = bp->b_fspriv;
2466
2467	if (!xfs_agf_verify(mp, bp)) {
2468		xfs_buf_ioerror(bp, -EFSCORRUPTED);
2469		xfs_verifier_error(bp);
2470		return;
2471	}
2472
2473	if (!xfs_sb_version_hascrc(&mp->m_sb))
2474		return;
2475
2476	if (bip)
2477		XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2478
2479	xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2480}
2481
2482const struct xfs_buf_ops xfs_agf_buf_ops = {
2483	.name = "xfs_agf",
2484	.verify_read = xfs_agf_read_verify,
2485	.verify_write = xfs_agf_write_verify,
2486};
2487
2488/*
2489 * Read in the allocation group header (free/alloc section).
2490 */
2491int					/* error */
2492xfs_read_agf(
2493	struct xfs_mount	*mp,	/* mount point structure */
2494	struct xfs_trans	*tp,	/* transaction pointer */
2495	xfs_agnumber_t		agno,	/* allocation group number */
2496	int			flags,	/* XFS_BUF_ */
2497	struct xfs_buf		**bpp)	/* buffer for the ag freelist header */
2498{
2499	int		error;
2500
2501	trace_xfs_read_agf(mp, agno);
2502
2503	ASSERT(agno != NULLAGNUMBER);
2504	error = xfs_trans_read_buf(
2505			mp, tp, mp->m_ddev_targp,
2506			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2507			XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2508	if (error)
2509		return error;
2510	if (!*bpp)
2511		return 0;
2512
2513	ASSERT(!(*bpp)->b_error);
2514	xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2515	return 0;
2516}
2517
2518/*
2519 * Read in the allocation group header (free/alloc section).
2520 */
2521int					/* error */
2522xfs_alloc_read_agf(
2523	struct xfs_mount	*mp,	/* mount point structure */
2524	struct xfs_trans	*tp,	/* transaction pointer */
2525	xfs_agnumber_t		agno,	/* allocation group number */
2526	int			flags,	/* XFS_ALLOC_FLAG_... */
2527	struct xfs_buf		**bpp)	/* buffer for the ag freelist header */
2528{
2529	struct xfs_agf		*agf;		/* ag freelist header */
2530	struct xfs_perag	*pag;		/* per allocation group data */
2531	int			error;
2532
2533	trace_xfs_alloc_read_agf(mp, agno);
2534
2535	ASSERT(agno != NULLAGNUMBER);
2536	error = xfs_read_agf(mp, tp, agno,
2537			(flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2538			bpp);
2539	if (error)
2540		return error;
2541	if (!*bpp)
2542		return 0;
2543	ASSERT(!(*bpp)->b_error);
2544
2545	agf = XFS_BUF_TO_AGF(*bpp);
2546	pag = xfs_perag_get(mp, agno);
2547	if (!pag->pagf_init) {
2548		pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2549		pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2550		pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2551		pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2552		pag->pagf_levels[XFS_BTNUM_BNOi] =
2553			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2554		pag->pagf_levels[XFS_BTNUM_CNTi] =
2555			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2556		pag->pagf_levels[XFS_BTNUM_RMAPi] =
2557			be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
2558		pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
2559		spin_lock_init(&pag->pagb_lock);
2560		pag->pagb_count = 0;
2561		pag->pagb_tree = RB_ROOT;
2562		pag->pagf_init = 1;
2563	}
2564#ifdef DEBUG
2565	else if (!XFS_FORCED_SHUTDOWN(mp)) {
2566		ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2567		ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2568		ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2569		ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2570		ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2571		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2572		ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2573		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2574	}
2575#endif
2576	xfs_perag_put(pag);
2577	return 0;
2578}
2579
2580/*
2581 * Allocate an extent (variable-size).
2582 * Depending on the allocation type, we either look in a single allocation
2583 * group or loop over the allocation groups to find the result.
2584 */
2585int				/* error */
2586xfs_alloc_vextent(
2587	xfs_alloc_arg_t	*args)	/* allocation argument structure */
2588{
2589	xfs_agblock_t	agsize;	/* allocation group size */
2590	int		error;
2591	int		flags;	/* XFS_ALLOC_FLAG_... locking flags */
 
2592	xfs_mount_t	*mp;	/* mount structure pointer */
2593	xfs_agnumber_t	sagno;	/* starting allocation group number */
2594	xfs_alloctype_t	type;	/* input allocation type */
2595	int		bump_rotor = 0;
 
2596	xfs_agnumber_t	rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2597
2598	mp = args->mp;
2599	type = args->otype = args->type;
2600	args->agbno = NULLAGBLOCK;
2601	/*
2602	 * Just fix this up, for the case where the last a.g. is shorter
2603	 * (or there's only one a.g.) and the caller couldn't easily figure
2604	 * that out (xfs_bmap_alloc).
2605	 */
2606	agsize = mp->m_sb.sb_agblocks;
2607	if (args->maxlen > agsize)
2608		args->maxlen = agsize;
2609	if (args->alignment == 0)
2610		args->alignment = 1;
2611	ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2612	ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2613	ASSERT(args->minlen <= args->maxlen);
2614	ASSERT(args->minlen <= agsize);
2615	ASSERT(args->mod < args->prod);
2616	if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2617	    XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2618	    args->minlen > args->maxlen || args->minlen > agsize ||
2619	    args->mod >= args->prod) {
2620		args->fsbno = NULLFSBLOCK;
2621		trace_xfs_alloc_vextent_badargs(args);
2622		return 0;
2623	}
 
2624
2625	switch (type) {
2626	case XFS_ALLOCTYPE_THIS_AG:
2627	case XFS_ALLOCTYPE_NEAR_BNO:
2628	case XFS_ALLOCTYPE_THIS_BNO:
2629		/*
2630		 * These three force us into a single a.g.
2631		 */
2632		args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2633		args->pag = xfs_perag_get(mp, args->agno);
 
2634		error = xfs_alloc_fix_freelist(args, 0);
 
2635		if (error) {
2636			trace_xfs_alloc_vextent_nofix(args);
2637			goto error0;
2638		}
2639		if (!args->agbp) {
2640			trace_xfs_alloc_vextent_noagbp(args);
2641			break;
2642		}
2643		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2644		if ((error = xfs_alloc_ag_vextent(args)))
2645			goto error0;
2646		break;
2647	case XFS_ALLOCTYPE_START_BNO:
2648		/*
2649		 * Try near allocation first, then anywhere-in-ag after
2650		 * the first a.g. fails.
2651		 */
2652		if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
2653		    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2654			args->fsbno = XFS_AGB_TO_FSB(mp,
2655					((mp->m_agfrotor / rotorstep) %
2656					mp->m_sb.sb_agcount), 0);
2657			bump_rotor = 1;
2658		}
2659		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2660		args->type = XFS_ALLOCTYPE_NEAR_BNO;
2661		/* FALLTHROUGH */
2662	case XFS_ALLOCTYPE_ANY_AG:
2663	case XFS_ALLOCTYPE_START_AG:
2664	case XFS_ALLOCTYPE_FIRST_AG:
2665		/*
2666		 * Rotate through the allocation groups looking for a winner.
2667		 */
2668		if (type == XFS_ALLOCTYPE_ANY_AG) {
2669			/*
2670			 * Start with the last place we left off.
2671			 */
2672			args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2673					mp->m_sb.sb_agcount;
2674			args->type = XFS_ALLOCTYPE_THIS_AG;
2675			flags = XFS_ALLOC_FLAG_TRYLOCK;
2676		} else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2677			/*
2678			 * Start with allocation group given by bno.
2679			 */
2680			args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2681			args->type = XFS_ALLOCTYPE_THIS_AG;
2682			sagno = 0;
2683			flags = 0;
2684		} else {
2685			if (type == XFS_ALLOCTYPE_START_AG)
2686				args->type = XFS_ALLOCTYPE_THIS_AG;
2687			/*
2688			 * Start with the given allocation group.
2689			 */
2690			args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2691			flags = XFS_ALLOC_FLAG_TRYLOCK;
2692		}
2693		/*
2694		 * Loop over allocation groups twice; first time with
2695		 * trylock set, second time without.
2696		 */
2697		for (;;) {
2698			args->pag = xfs_perag_get(mp, args->agno);
 
2699			error = xfs_alloc_fix_freelist(args, flags);
 
2700			if (error) {
2701				trace_xfs_alloc_vextent_nofix(args);
2702				goto error0;
2703			}
2704			/*
2705			 * If we get a buffer back then the allocation will fly.
2706			 */
2707			if (args->agbp) {
2708				if ((error = xfs_alloc_ag_vextent(args)))
2709					goto error0;
2710				break;
2711			}
2712
2713			trace_xfs_alloc_vextent_loopfailed(args);
2714
2715			/*
2716			 * Didn't work, figure out the next iteration.
2717			 */
2718			if (args->agno == sagno &&
2719			    type == XFS_ALLOCTYPE_START_BNO)
2720				args->type = XFS_ALLOCTYPE_THIS_AG;
2721			/*
2722			* For the first allocation, we can try any AG to get
2723			* space.  However, if we already have allocated a
2724			* block, we don't want to try AGs whose number is below
2725			* sagno. Otherwise, we may end up with out-of-order
2726			* locking of AGF, which might cause deadlock.
2727			*/
2728			if (++(args->agno) == mp->m_sb.sb_agcount) {
2729				if (args->firstblock != NULLFSBLOCK)
2730					args->agno = sagno;
2731				else
2732					args->agno = 0;
2733			}
2734			/*
2735			 * Reached the starting a.g., must either be done
2736			 * or switch to non-trylock mode.
2737			 */
2738			if (args->agno == sagno) {
2739				if (flags == 0) {
2740					args->agbno = NULLAGBLOCK;
2741					trace_xfs_alloc_vextent_allfailed(args);
2742					break;
2743				}
2744
2745				flags = 0;
2746				if (type == XFS_ALLOCTYPE_START_BNO) {
2747					args->agbno = XFS_FSB_TO_AGBNO(mp,
2748						args->fsbno);
2749					args->type = XFS_ALLOCTYPE_NEAR_BNO;
 
 
 
2750				}
2751			}
2752			xfs_perag_put(args->pag);
2753		}
2754		if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2755			if (args->agno == sagno)
2756				mp->m_agfrotor = (mp->m_agfrotor + 1) %
2757					(mp->m_sb.sb_agcount * rotorstep);
2758			else
2759				mp->m_agfrotor = (args->agno * rotorstep + 1) %
2760					(mp->m_sb.sb_agcount * rotorstep);
2761		}
2762		break;
2763	default:
2764		ASSERT(0);
2765		/* NOTREACHED */
2766	}
2767	if (args->agbno == NULLAGBLOCK)
2768		args->fsbno = NULLFSBLOCK;
2769	else {
2770		args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2771#ifdef DEBUG
2772		ASSERT(args->len >= args->minlen);
2773		ASSERT(args->len <= args->maxlen);
2774		ASSERT(args->agbno % args->alignment == 0);
2775		XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2776			args->len);
2777#endif
2778
2779		/* Zero the extent if we were asked to do so */
2780		if (args->datatype & XFS_ALLOC_USERDATA_ZERO) {
2781			error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2782			if (error)
2783				goto error0;
2784		}
2785
2786	}
2787	xfs_perag_put(args->pag);
2788	return 0;
2789error0:
2790	xfs_perag_put(args->pag);
2791	return error;
2792}
2793
2794/* Ensure that the freelist is at full capacity. */
2795int
2796xfs_free_extent_fix_freelist(
2797	struct xfs_trans	*tp,
2798	xfs_agnumber_t		agno,
2799	struct xfs_buf		**agbp)
 
 
 
 
2800{
2801	struct xfs_alloc_arg	args;
2802	int			error;
2803
2804	memset(&args, 0, sizeof(struct xfs_alloc_arg));
 
2805	args.tp = tp;
2806	args.mp = tp->t_mountp;
2807	args.agno = agno;
2808
2809	/*
2810	 * validate that the block number is legal - the enables us to detect
2811	 * and handle a silent filesystem corruption rather than crashing.
2812	 */
 
2813	if (args.agno >= args.mp->m_sb.sb_agcount)
2814		return -EFSCORRUPTED;
2815
 
 
 
 
2816	args.pag = xfs_perag_get(args.mp, args.agno);
2817	ASSERT(args.pag);
2818
2819	error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2820	if (error)
2821		goto out;
2822
2823	*agbp = args.agbp;
2824out:
2825	xfs_perag_put(args.pag);
2826	return error;
2827}
2828
2829/*
2830 * Free an extent.
2831 * Just break up the extent address and hand off to xfs_free_ag_extent
2832 * after fixing up the freelist.
2833 */
2834int				/* error */
2835xfs_free_extent(
2836	struct xfs_trans	*tp,	/* transaction pointer */
2837	xfs_fsblock_t		bno,	/* starting block number of extent */
2838	xfs_extlen_t		len,	/* length of extent */
2839	struct xfs_owner_info	*oinfo,	/* extent owner */
2840	enum xfs_ag_resv_type	type)	/* block reservation type */
2841{
2842	struct xfs_mount	*mp = tp->t_mountp;
2843	struct xfs_buf		*agbp;
2844	xfs_agnumber_t		agno = XFS_FSB_TO_AGNO(mp, bno);
2845	xfs_agblock_t		agbno = XFS_FSB_TO_AGBNO(mp, bno);
2846	int			error;
2847
2848	ASSERT(len != 0);
2849	ASSERT(type != XFS_AG_RESV_AGFL);
2850
2851	if (XFS_TEST_ERROR(false, mp,
2852			XFS_ERRTAG_FREE_EXTENT,
2853			XFS_RANDOM_FREE_EXTENT))
2854		return -EIO;
2855
2856	error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
2857	if (error)
2858		return error;
2859
2860	XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
2861
2862	/* validate the extent size is legal now we have the agf locked */
2863	XFS_WANT_CORRUPTED_GOTO(mp,
2864		agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
2865				err);
 
 
2866
2867	error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
2868	if (error)
2869		goto err;
2870
2871	xfs_extent_busy_insert(tp, agno, agbno, len, 0);
2872	return 0;
2873
2874err:
2875	xfs_trans_brelse(tp, agbp);
2876	return error;
2877}