Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.13.7.
   1/*
   2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include <linux/log2.h>
  19
  20#include "xfs.h"
  21#include "xfs_fs.h"
  22#include "xfs_format.h"
  23#include "xfs_log_format.h"
  24#include "xfs_trans_resv.h"
  25#include "xfs_inum.h"
  26#include "xfs_sb.h"
  27#include "xfs_ag.h"
  28#include "xfs_mount.h"
  29#include "xfs_inode.h"
  30#include "xfs_trans.h"
  31#include "xfs_inode_item.h"
  32#include "xfs_bmap_btree.h"
  33#include "xfs_bmap.h"
  34#include "xfs_error.h"
  35#include "xfs_trace.h"
  36#include "xfs_attr_sf.h"
  37#include "xfs_dinode.h"
  38
  39kmem_zone_t *xfs_ifork_zone;
  40
  41STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
  42STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
  43STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
  44
  45#ifdef DEBUG
  46/*
  47 * Make sure that the extents in the given memory buffer
  48 * are valid.
  49 */
  50void
  51xfs_validate_extents(
  52	xfs_ifork_t		*ifp,
  53	int			nrecs,
  54	xfs_exntfmt_t		fmt)
  55{
  56	xfs_bmbt_irec_t		irec;
  57	xfs_bmbt_rec_host_t	rec;
  58	int			i;
  59
  60	for (i = 0; i < nrecs; i++) {
  61		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
  62		rec.l0 = get_unaligned(&ep->l0);
  63		rec.l1 = get_unaligned(&ep->l1);
  64		xfs_bmbt_get_all(&rec, &irec);
  65		if (fmt == XFS_EXTFMT_NOSTATE)
  66			ASSERT(irec.br_state == XFS_EXT_NORM);
  67	}
  68}
  69#else /* DEBUG */
  70#define xfs_validate_extents(ifp, nrecs, fmt)
  71#endif /* DEBUG */
  72
  73
  74/*
  75 * Move inode type and inode format specific information from the
  76 * on-disk inode to the in-core inode.  For fifos, devs, and sockets
  77 * this means set if_rdev to the proper value.  For files, directories,
  78 * and symlinks this means to bring in the in-line data or extent
  79 * pointers.  For a file in B-tree format, only the root is immediately
  80 * brought in-core.  The rest will be in-lined in if_extents when it
  81 * is first referenced (see xfs_iread_extents()).
  82 */
  83int
  84xfs_iformat_fork(
  85	xfs_inode_t		*ip,
  86	xfs_dinode_t		*dip)
  87{
  88	xfs_attr_shortform_t	*atp;
  89	int			size;
  90	int			error = 0;
  91	xfs_fsize_t             di_size;
  92
  93	if (unlikely(be32_to_cpu(dip->di_nextents) +
  94		     be16_to_cpu(dip->di_anextents) >
  95		     be64_to_cpu(dip->di_nblocks))) {
  96		xfs_warn(ip->i_mount,
  97			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
  98			(unsigned long long)ip->i_ino,
  99			(int)(be32_to_cpu(dip->di_nextents) +
 100			      be16_to_cpu(dip->di_anextents)),
 101			(unsigned long long)
 102				be64_to_cpu(dip->di_nblocks));
 103		XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
 104				     ip->i_mount, dip);
 105		return XFS_ERROR(EFSCORRUPTED);
 106	}
 107
 108	if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
 109		xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
 110			(unsigned long long)ip->i_ino,
 111			dip->di_forkoff);
 112		XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
 113				     ip->i_mount, dip);
 114		return XFS_ERROR(EFSCORRUPTED);
 115	}
 116
 117	if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
 118		     !ip->i_mount->m_rtdev_targp)) {
 119		xfs_warn(ip->i_mount,
 120			"corrupt dinode %Lu, has realtime flag set.",
 121			ip->i_ino);
 122		XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
 123				     XFS_ERRLEVEL_LOW, ip->i_mount, dip);
 124		return XFS_ERROR(EFSCORRUPTED);
 125	}
 126
 127	switch (ip->i_d.di_mode & S_IFMT) {
 128	case S_IFIFO:
 129	case S_IFCHR:
 130	case S_IFBLK:
 131	case S_IFSOCK:
 132		if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
 133			XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
 134					      ip->i_mount, dip);
 135			return XFS_ERROR(EFSCORRUPTED);
 136		}
 137		ip->i_d.di_size = 0;
 138		ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
 139		break;
 140
 141	case S_IFREG:
 142	case S_IFLNK:
 143	case S_IFDIR:
 144		switch (dip->di_format) {
 145		case XFS_DINODE_FMT_LOCAL:
 146			/*
 147			 * no local regular files yet
 148			 */
 149			if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
 150				xfs_warn(ip->i_mount,
 151			"corrupt inode %Lu (local format for regular file).",
 152					(unsigned long long) ip->i_ino);
 153				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
 154						     XFS_ERRLEVEL_LOW,
 155						     ip->i_mount, dip);
 156				return XFS_ERROR(EFSCORRUPTED);
 157			}
 158
 159			di_size = be64_to_cpu(dip->di_size);
 160			if (unlikely(di_size < 0 ||
 161				     di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
 162				xfs_warn(ip->i_mount,
 163			"corrupt inode %Lu (bad size %Ld for local inode).",
 164					(unsigned long long) ip->i_ino,
 165					(long long) di_size);
 166				XFS_CORRUPTION_ERROR("xfs_iformat(5)",
 167						     XFS_ERRLEVEL_LOW,
 168						     ip->i_mount, dip);
 169				return XFS_ERROR(EFSCORRUPTED);
 170			}
 171
 172			size = (int)di_size;
 173			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
 174			break;
 175		case XFS_DINODE_FMT_EXTENTS:
 176			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
 177			break;
 178		case XFS_DINODE_FMT_BTREE:
 179			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
 180			break;
 181		default:
 182			XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
 183					 ip->i_mount);
 184			return XFS_ERROR(EFSCORRUPTED);
 185		}
 186		break;
 187
 188	default:
 189		XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
 190		return XFS_ERROR(EFSCORRUPTED);
 191	}
 192	if (error) {
 193		return error;
 194	}
 195	if (!XFS_DFORK_Q(dip))
 196		return 0;
 197
 198	ASSERT(ip->i_afp == NULL);
 199	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
 200
 201	switch (dip->di_aformat) {
 202	case XFS_DINODE_FMT_LOCAL:
 203		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
 204		size = be16_to_cpu(atp->hdr.totsize);
 205
 206		if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
 207			xfs_warn(ip->i_mount,
 208				"corrupt inode %Lu (bad attr fork size %Ld).",
 209				(unsigned long long) ip->i_ino,
 210				(long long) size);
 211			XFS_CORRUPTION_ERROR("xfs_iformat(8)",
 212					     XFS_ERRLEVEL_LOW,
 213					     ip->i_mount, dip);
 214			return XFS_ERROR(EFSCORRUPTED);
 215		}
 216
 217		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
 218		break;
 219	case XFS_DINODE_FMT_EXTENTS:
 220		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
 221		break;
 222	case XFS_DINODE_FMT_BTREE:
 223		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
 224		break;
 225	default:
 226		error = XFS_ERROR(EFSCORRUPTED);
 227		break;
 228	}
 229	if (error) {
 230		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
 231		ip->i_afp = NULL;
 232		xfs_idestroy_fork(ip, XFS_DATA_FORK);
 233	}
 234	return error;
 235}
 236
 237/*
 238 * The file is in-lined in the on-disk inode.
 239 * If it fits into if_inline_data, then copy
 240 * it there, otherwise allocate a buffer for it
 241 * and copy the data there.  Either way, set
 242 * if_data to point at the data.
 243 * If we allocate a buffer for the data, make
 244 * sure that its size is a multiple of 4 and
 245 * record the real size in i_real_bytes.
 246 */
 247STATIC int
 248xfs_iformat_local(
 249	xfs_inode_t	*ip,
 250	xfs_dinode_t	*dip,
 251	int		whichfork,
 252	int		size)
 253{
 254	xfs_ifork_t	*ifp;
 255	int		real_size;
 256
 257	/*
 258	 * If the size is unreasonable, then something
 259	 * is wrong and we just bail out rather than crash in
 260	 * kmem_alloc() or memcpy() below.
 261	 */
 262	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
 263		xfs_warn(ip->i_mount,
 264	"corrupt inode %Lu (bad size %d for local fork, size = %d).",
 265			(unsigned long long) ip->i_ino, size,
 266			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
 267		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
 268				     ip->i_mount, dip);
 269		return XFS_ERROR(EFSCORRUPTED);
 270	}
 271	ifp = XFS_IFORK_PTR(ip, whichfork);
 272	real_size = 0;
 273	if (size == 0)
 274		ifp->if_u1.if_data = NULL;
 275	else if (size <= sizeof(ifp->if_u2.if_inline_data))
 276		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
 277	else {
 278		real_size = roundup(size, 4);
 279		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
 280	}
 281	ifp->if_bytes = size;
 282	ifp->if_real_bytes = real_size;
 283	if (size)
 284		memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
 285	ifp->if_flags &= ~XFS_IFEXTENTS;
 286	ifp->if_flags |= XFS_IFINLINE;
 287	return 0;
 288}
 289
 290/*
 291 * The file consists of a set of extents all
 292 * of which fit into the on-disk inode.
 293 * If there are few enough extents to fit into
 294 * the if_inline_ext, then copy them there.
 295 * Otherwise allocate a buffer for them and copy
 296 * them into it.  Either way, set if_extents
 297 * to point at the extents.
 298 */
 299STATIC int
 300xfs_iformat_extents(
 301	xfs_inode_t	*ip,
 302	xfs_dinode_t	*dip,
 303	int		whichfork)
 304{
 305	xfs_bmbt_rec_t	*dp;
 306	xfs_ifork_t	*ifp;
 307	int		nex;
 308	int		size;
 309	int		i;
 310
 311	ifp = XFS_IFORK_PTR(ip, whichfork);
 312	nex = XFS_DFORK_NEXTENTS(dip, whichfork);
 313	size = nex * (uint)sizeof(xfs_bmbt_rec_t);
 314
 315	/*
 316	 * If the number of extents is unreasonable, then something
 317	 * is wrong and we just bail out rather than crash in
 318	 * kmem_alloc() or memcpy() below.
 319	 */
 320	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
 321		xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
 322			(unsigned long long) ip->i_ino, nex);
 323		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
 324				     ip->i_mount, dip);
 325		return XFS_ERROR(EFSCORRUPTED);
 326	}
 327
 328	ifp->if_real_bytes = 0;
 329	if (nex == 0)
 330		ifp->if_u1.if_extents = NULL;
 331	else if (nex <= XFS_INLINE_EXTS)
 332		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
 333	else
 334		xfs_iext_add(ifp, 0, nex);
 335
 336	ifp->if_bytes = size;
 337	if (size) {
 338		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
 339		xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
 340		for (i = 0; i < nex; i++, dp++) {
 341			xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
 342			ep->l0 = get_unaligned_be64(&dp->l0);
 343			ep->l1 = get_unaligned_be64(&dp->l1);
 344		}
 345		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
 346		if (whichfork != XFS_DATA_FORK ||
 347			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
 348				if (unlikely(xfs_check_nostate_extents(
 349				    ifp, 0, nex))) {
 350					XFS_ERROR_REPORT("xfs_iformat_extents(2)",
 351							 XFS_ERRLEVEL_LOW,
 352							 ip->i_mount);
 353					return XFS_ERROR(EFSCORRUPTED);
 354				}
 355	}
 356	ifp->if_flags |= XFS_IFEXTENTS;
 357	return 0;
 358}
 359
 360/*
 361 * The file has too many extents to fit into
 362 * the inode, so they are in B-tree format.
 363 * Allocate a buffer for the root of the B-tree
 364 * and copy the root into it.  The i_extents
 365 * field will remain NULL until all of the
 366 * extents are read in (when they are needed).
 367 */
 368STATIC int
 369xfs_iformat_btree(
 370	xfs_inode_t		*ip,
 371	xfs_dinode_t		*dip,
 372	int			whichfork)
 373{
 374	struct xfs_mount	*mp = ip->i_mount;
 375	xfs_bmdr_block_t	*dfp;
 376	xfs_ifork_t		*ifp;
 377	/* REFERENCED */
 378	int			nrecs;
 379	int			size;
 380
 381	ifp = XFS_IFORK_PTR(ip, whichfork);
 382	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
 383	size = XFS_BMAP_BROOT_SPACE(mp, dfp);
 384	nrecs = be16_to_cpu(dfp->bb_numrecs);
 385
 386	/*
 387	 * blow out if -- fork has less extents than can fit in
 388	 * fork (fork shouldn't be a btree format), root btree
 389	 * block has more records than can fit into the fork,
 390	 * or the number of extents is greater than the number of
 391	 * blocks.
 392	 */
 393	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
 394					XFS_IFORK_MAXEXT(ip, whichfork) ||
 395		     XFS_BMDR_SPACE_CALC(nrecs) >
 396					XFS_DFORK_SIZE(dip, mp, whichfork) ||
 397		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
 398		xfs_warn(mp, "corrupt inode %Lu (btree).",
 399					(unsigned long long) ip->i_ino);
 400		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
 401					 mp, dip);
 402		return XFS_ERROR(EFSCORRUPTED);
 403	}
 404
 405	ifp->if_broot_bytes = size;
 406	ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
 407	ASSERT(ifp->if_broot != NULL);
 408	/*
 409	 * Copy and convert from the on-disk structure
 410	 * to the in-memory structure.
 411	 */
 412	xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
 413			 ifp->if_broot, size);
 414	ifp->if_flags &= ~XFS_IFEXTENTS;
 415	ifp->if_flags |= XFS_IFBROOT;
 416
 417	return 0;
 418}
 419
 420/*
 421 * Read in extents from a btree-format inode.
 422 * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
 423 */
 424int
 425xfs_iread_extents(
 426	xfs_trans_t	*tp,
 427	xfs_inode_t	*ip,
 428	int		whichfork)
 429{
 430	int		error;
 431	xfs_ifork_t	*ifp;
 432	xfs_extnum_t	nextents;
 433
 434	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 435
 436	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
 437		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
 438				 ip->i_mount);
 439		return XFS_ERROR(EFSCORRUPTED);
 440	}
 441	nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
 442	ifp = XFS_IFORK_PTR(ip, whichfork);
 443
 444	/*
 445	 * We know that the size is valid (it's checked in iformat_btree)
 446	 */
 447	ifp->if_bytes = ifp->if_real_bytes = 0;
 448	ifp->if_flags |= XFS_IFEXTENTS;
 449	xfs_iext_add(ifp, 0, nextents);
 450	error = xfs_bmap_read_extents(tp, ip, whichfork);
 451	if (error) {
 452		xfs_iext_destroy(ifp);
 453		ifp->if_flags &= ~XFS_IFEXTENTS;
 454		return error;
 455	}
 456	xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
 457	return 0;
 458}
 459/*
 460 * Reallocate the space for if_broot based on the number of records
 461 * being added or deleted as indicated in rec_diff.  Move the records
 462 * and pointers in if_broot to fit the new size.  When shrinking this
 463 * will eliminate holes between the records and pointers created by
 464 * the caller.  When growing this will create holes to be filled in
 465 * by the caller.
 466 *
 467 * The caller must not request to add more records than would fit in
 468 * the on-disk inode root.  If the if_broot is currently NULL, then
 469 * if we are adding records, one will be allocated.  The caller must also
 470 * not request that the number of records go below zero, although
 471 * it can go to zero.
 472 *
 473 * ip -- the inode whose if_broot area is changing
 474 * ext_diff -- the change in the number of records, positive or negative,
 475 *	 requested for the if_broot array.
 476 */
 477void
 478xfs_iroot_realloc(
 479	xfs_inode_t		*ip,
 480	int			rec_diff,
 481	int			whichfork)
 482{
 483	struct xfs_mount	*mp = ip->i_mount;
 484	int			cur_max;
 485	xfs_ifork_t		*ifp;
 486	struct xfs_btree_block	*new_broot;
 487	int			new_max;
 488	size_t			new_size;
 489	char			*np;
 490	char			*op;
 491
 492	/*
 493	 * Handle the degenerate case quietly.
 494	 */
 495	if (rec_diff == 0) {
 496		return;
 497	}
 498
 499	ifp = XFS_IFORK_PTR(ip, whichfork);
 500	if (rec_diff > 0) {
 501		/*
 502		 * If there wasn't any memory allocated before, just
 503		 * allocate it now and get out.
 504		 */
 505		if (ifp->if_broot_bytes == 0) {
 506			new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
 507			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
 508			ifp->if_broot_bytes = (int)new_size;
 509			return;
 510		}
 511
 512		/*
 513		 * If there is already an existing if_broot, then we need
 514		 * to realloc() it and shift the pointers to their new
 515		 * location.  The records don't change location because
 516		 * they are kept butted up against the btree block header.
 517		 */
 518		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
 519		new_max = cur_max + rec_diff;
 520		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
 521		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
 522				XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
 523				KM_SLEEP | KM_NOFS);
 524		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
 525						     ifp->if_broot_bytes);
 526		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
 527						     (int)new_size);
 528		ifp->if_broot_bytes = (int)new_size;
 529		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
 530			XFS_IFORK_SIZE(ip, whichfork));
 531		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
 532		return;
 533	}
 534
 535	/*
 536	 * rec_diff is less than 0.  In this case, we are shrinking the
 537	 * if_broot buffer.  It must already exist.  If we go to zero
 538	 * records, just get rid of the root and clear the status bit.
 539	 */
 540	ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
 541	cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
 542	new_max = cur_max + rec_diff;
 543	ASSERT(new_max >= 0);
 544	if (new_max > 0)
 545		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
 546	else
 547		new_size = 0;
 548	if (new_size > 0) {
 549		new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
 550		/*
 551		 * First copy over the btree block header.
 552		 */
 553		memcpy(new_broot, ifp->if_broot,
 554			XFS_BMBT_BLOCK_LEN(ip->i_mount));
 555	} else {
 556		new_broot = NULL;
 557		ifp->if_flags &= ~XFS_IFBROOT;
 558	}
 559
 560	/*
 561	 * Only copy the records and pointers if there are any.
 562	 */
 563	if (new_max > 0) {
 564		/*
 565		 * First copy the records.
 566		 */
 567		op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
 568		np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
 569		memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
 570
 571		/*
 572		 * Then copy the pointers.
 573		 */
 574		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
 575						     ifp->if_broot_bytes);
 576		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
 577						     (int)new_size);
 578		memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
 579	}
 580	kmem_free(ifp->if_broot);
 581	ifp->if_broot = new_broot;
 582	ifp->if_broot_bytes = (int)new_size;
 583	if (ifp->if_broot)
 584		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
 585			XFS_IFORK_SIZE(ip, whichfork));
 586	return;
 587}
 588
 589
 590/*
 591 * This is called when the amount of space needed for if_data
 592 * is increased or decreased.  The change in size is indicated by
 593 * the number of bytes that need to be added or deleted in the
 594 * byte_diff parameter.
 595 *
 596 * If the amount of space needed has decreased below the size of the
 597 * inline buffer, then switch to using the inline buffer.  Otherwise,
 598 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
 599 * to what is needed.
 600 *
 601 * ip -- the inode whose if_data area is changing
 602 * byte_diff -- the change in the number of bytes, positive or negative,
 603 *	 requested for the if_data array.
 604 */
 605void
 606xfs_idata_realloc(
 607	xfs_inode_t	*ip,
 608	int		byte_diff,
 609	int		whichfork)
 610{
 611	xfs_ifork_t	*ifp;
 612	int		new_size;
 613	int		real_size;
 614
 615	if (byte_diff == 0) {
 616		return;
 617	}
 618
 619	ifp = XFS_IFORK_PTR(ip, whichfork);
 620	new_size = (int)ifp->if_bytes + byte_diff;
 621	ASSERT(new_size >= 0);
 622
 623	if (new_size == 0) {
 624		if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
 625			kmem_free(ifp->if_u1.if_data);
 626		}
 627		ifp->if_u1.if_data = NULL;
 628		real_size = 0;
 629	} else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
 630		/*
 631		 * If the valid extents/data can fit in if_inline_ext/data,
 632		 * copy them from the malloc'd vector and free it.
 633		 */
 634		if (ifp->if_u1.if_data == NULL) {
 635			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
 636		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
 637			ASSERT(ifp->if_real_bytes != 0);
 638			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
 639			      new_size);
 640			kmem_free(ifp->if_u1.if_data);
 641			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
 642		}
 643		real_size = 0;
 644	} else {
 645		/*
 646		 * Stuck with malloc/realloc.
 647		 * For inline data, the underlying buffer must be
 648		 * a multiple of 4 bytes in size so that it can be
 649		 * logged and stay on word boundaries.  We enforce
 650		 * that here.
 651		 */
 652		real_size = roundup(new_size, 4);
 653		if (ifp->if_u1.if_data == NULL) {
 654			ASSERT(ifp->if_real_bytes == 0);
 655			ifp->if_u1.if_data = kmem_alloc(real_size,
 656							KM_SLEEP | KM_NOFS);
 657		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
 658			/*
 659			 * Only do the realloc if the underlying size
 660			 * is really changing.
 661			 */
 662			if (ifp->if_real_bytes != real_size) {
 663				ifp->if_u1.if_data =
 664					kmem_realloc(ifp->if_u1.if_data,
 665							real_size,
 666							ifp->if_real_bytes,
 667							KM_SLEEP | KM_NOFS);
 668			}
 669		} else {
 670			ASSERT(ifp->if_real_bytes == 0);
 671			ifp->if_u1.if_data = kmem_alloc(real_size,
 672							KM_SLEEP | KM_NOFS);
 673			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
 674				ifp->if_bytes);
 675		}
 676	}
 677	ifp->if_real_bytes = real_size;
 678	ifp->if_bytes = new_size;
 679	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
 680}
 681
 682void
 683xfs_idestroy_fork(
 684	xfs_inode_t	*ip,
 685	int		whichfork)
 686{
 687	xfs_ifork_t	*ifp;
 688
 689	ifp = XFS_IFORK_PTR(ip, whichfork);
 690	if (ifp->if_broot != NULL) {
 691		kmem_free(ifp->if_broot);
 692		ifp->if_broot = NULL;
 693	}
 694
 695	/*
 696	 * If the format is local, then we can't have an extents
 697	 * array so just look for an inline data array.  If we're
 698	 * not local then we may or may not have an extents list,
 699	 * so check and free it up if we do.
 700	 */
 701	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
 702		if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
 703		    (ifp->if_u1.if_data != NULL)) {
 704			ASSERT(ifp->if_real_bytes != 0);
 705			kmem_free(ifp->if_u1.if_data);
 706			ifp->if_u1.if_data = NULL;
 707			ifp->if_real_bytes = 0;
 708		}
 709	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
 710		   ((ifp->if_flags & XFS_IFEXTIREC) ||
 711		    ((ifp->if_u1.if_extents != NULL) &&
 712		     (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
 713		ASSERT(ifp->if_real_bytes != 0);
 714		xfs_iext_destroy(ifp);
 715	}
 716	ASSERT(ifp->if_u1.if_extents == NULL ||
 717	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
 718	ASSERT(ifp->if_real_bytes == 0);
 719	if (whichfork == XFS_ATTR_FORK) {
 720		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
 721		ip->i_afp = NULL;
 722	}
 723}
 724
 725/*
 726 * Convert in-core extents to on-disk form
 727 *
 728 * For either the data or attr fork in extent format, we need to endian convert
 729 * the in-core extent as we place them into the on-disk inode.
 730 *
 731 * In the case of the data fork, the in-core and on-disk fork sizes can be
 732 * different due to delayed allocation extents. We only copy on-disk extents
 733 * here, so callers must always use the physical fork size to determine the
 734 * size of the buffer passed to this routine.  We will return the size actually
 735 * used.
 736 */
 737int
 738xfs_iextents_copy(
 739	xfs_inode_t		*ip,
 740	xfs_bmbt_rec_t		*dp,
 741	int			whichfork)
 742{
 743	int			copied;
 744	int			i;
 745	xfs_ifork_t		*ifp;
 746	int			nrecs;
 747	xfs_fsblock_t		start_block;
 748
 749	ifp = XFS_IFORK_PTR(ip, whichfork);
 750	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
 751	ASSERT(ifp->if_bytes > 0);
 752
 753	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 754	XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
 755	ASSERT(nrecs > 0);
 756
 757	/*
 758	 * There are some delayed allocation extents in the
 759	 * inode, so copy the extents one at a time and skip
 760	 * the delayed ones.  There must be at least one
 761	 * non-delayed extent.
 762	 */
 763	copied = 0;
 764	for (i = 0; i < nrecs; i++) {
 765		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
 766		start_block = xfs_bmbt_get_startblock(ep);
 767		if (isnullstartblock(start_block)) {
 768			/*
 769			 * It's a delayed allocation extent, so skip it.
 770			 */
 771			continue;
 772		}
 773
 774		/* Translate to on disk format */
 775		put_unaligned_be64(ep->l0, &dp->l0);
 776		put_unaligned_be64(ep->l1, &dp->l1);
 777		dp++;
 778		copied++;
 779	}
 780	ASSERT(copied != 0);
 781	xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
 782
 783	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
 784}
 785
 786/*
 787 * Each of the following cases stores data into the same region
 788 * of the on-disk inode, so only one of them can be valid at
 789 * any given time. While it is possible to have conflicting formats
 790 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
 791 * in EXTENTS format, this can only happen when the fork has
 792 * changed formats after being modified but before being flushed.
 793 * In these cases, the format always takes precedence, because the
 794 * format indicates the current state of the fork.
 795 */
 796void
 797xfs_iflush_fork(
 798	xfs_inode_t		*ip,
 799	xfs_dinode_t		*dip,
 800	xfs_inode_log_item_t	*iip,
 801	int			whichfork,
 802	xfs_buf_t		*bp)
 803{
 804	char			*cp;
 805	xfs_ifork_t		*ifp;
 806	xfs_mount_t		*mp;
 807	static const short	brootflag[2] =
 808		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
 809	static const short	dataflag[2] =
 810		{ XFS_ILOG_DDATA, XFS_ILOG_ADATA };
 811	static const short	extflag[2] =
 812		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
 813
 814	if (!iip)
 815		return;
 816	ifp = XFS_IFORK_PTR(ip, whichfork);
 817	/*
 818	 * This can happen if we gave up in iformat in an error path,
 819	 * for the attribute fork.
 820	 */
 821	if (!ifp) {
 822		ASSERT(whichfork == XFS_ATTR_FORK);
 823		return;
 824	}
 825	cp = XFS_DFORK_PTR(dip, whichfork);
 826	mp = ip->i_mount;
 827	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
 828	case XFS_DINODE_FMT_LOCAL:
 829		if ((iip->ili_fields & dataflag[whichfork]) &&
 830		    (ifp->if_bytes > 0)) {
 831			ASSERT(ifp->if_u1.if_data != NULL);
 832			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
 833			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
 834		}
 835		break;
 836
 837	case XFS_DINODE_FMT_EXTENTS:
 838		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
 839		       !(iip->ili_fields & extflag[whichfork]));
 840		if ((iip->ili_fields & extflag[whichfork]) &&
 841		    (ifp->if_bytes > 0)) {
 842			ASSERT(xfs_iext_get_ext(ifp, 0));
 843			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
 844			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
 845				whichfork);
 846		}
 847		break;
 848
 849	case XFS_DINODE_FMT_BTREE:
 850		if ((iip->ili_fields & brootflag[whichfork]) &&
 851		    (ifp->if_broot_bytes > 0)) {
 852			ASSERT(ifp->if_broot != NULL);
 853			ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
 854			        XFS_IFORK_SIZE(ip, whichfork));
 855			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
 856				(xfs_bmdr_block_t *)cp,
 857				XFS_DFORK_SIZE(dip, mp, whichfork));
 858		}
 859		break;
 860
 861	case XFS_DINODE_FMT_DEV:
 862		if (iip->ili_fields & XFS_ILOG_DEV) {
 863			ASSERT(whichfork == XFS_DATA_FORK);
 864			xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
 865		}
 866		break;
 867
 868	case XFS_DINODE_FMT_UUID:
 869		if (iip->ili_fields & XFS_ILOG_UUID) {
 870			ASSERT(whichfork == XFS_DATA_FORK);
 871			memcpy(XFS_DFORK_DPTR(dip),
 872			       &ip->i_df.if_u2.if_uuid,
 873			       sizeof(uuid_t));
 874		}
 875		break;
 876
 877	default:
 878		ASSERT(0);
 879		break;
 880	}
 881}
 882
 883/*
 884 * Return a pointer to the extent record at file index idx.
 885 */
 886xfs_bmbt_rec_host_t *
 887xfs_iext_get_ext(
 888	xfs_ifork_t	*ifp,		/* inode fork pointer */
 889	xfs_extnum_t	idx)		/* index of target extent */
 890{
 891	ASSERT(idx >= 0);
 892	ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
 893
 894	if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
 895		return ifp->if_u1.if_ext_irec->er_extbuf;
 896	} else if (ifp->if_flags & XFS_IFEXTIREC) {
 897		xfs_ext_irec_t	*erp;		/* irec pointer */
 898		int		erp_idx = 0;	/* irec index */
 899		xfs_extnum_t	page_idx = idx;	/* ext index in target list */
 900
 901		erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
 902		return &erp->er_extbuf[page_idx];
 903	} else if (ifp->if_bytes) {
 904		return &ifp->if_u1.if_extents[idx];
 905	} else {
 906		return NULL;
 907	}
 908}
 909
 910/*
 911 * Insert new item(s) into the extent records for incore inode
 912 * fork 'ifp'.  'count' new items are inserted at index 'idx'.
 913 */
 914void
 915xfs_iext_insert(
 916	xfs_inode_t	*ip,		/* incore inode pointer */
 917	xfs_extnum_t	idx,		/* starting index of new items */
 918	xfs_extnum_t	count,		/* number of inserted items */
 919	xfs_bmbt_irec_t	*new,		/* items to insert */
 920	int		state)		/* type of extent conversion */
 921{
 922	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
 923	xfs_extnum_t	i;		/* extent record index */
 924
 925	trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
 926
 927	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
 928	xfs_iext_add(ifp, idx, count);
 929	for (i = idx; i < idx + count; i++, new++)
 930		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
 931}
 932
 933/*
 934 * This is called when the amount of space required for incore file
 935 * extents needs to be increased. The ext_diff parameter stores the
 936 * number of new extents being added and the idx parameter contains
 937 * the extent index where the new extents will be added. If the new
 938 * extents are being appended, then we just need to (re)allocate and
 939 * initialize the space. Otherwise, if the new extents are being
 940 * inserted into the middle of the existing entries, a bit more work
 941 * is required to make room for the new extents to be inserted. The
 942 * caller is responsible for filling in the new extent entries upon
 943 * return.
 944 */
 945void
 946xfs_iext_add(
 947	xfs_ifork_t	*ifp,		/* inode fork pointer */
 948	xfs_extnum_t	idx,		/* index to begin adding exts */
 949	int		ext_diff)	/* number of extents to add */
 950{
 951	int		byte_diff;	/* new bytes being added */
 952	int		new_size;	/* size of extents after adding */
 953	xfs_extnum_t	nextents;	/* number of extents in file */
 954
 955	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 956	ASSERT((idx >= 0) && (idx <= nextents));
 957	byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
 958	new_size = ifp->if_bytes + byte_diff;
 959	/*
 960	 * If the new number of extents (nextents + ext_diff)
 961	 * fits inside the inode, then continue to use the inline
 962	 * extent buffer.
 963	 */
 964	if (nextents + ext_diff <= XFS_INLINE_EXTS) {
 965		if (idx < nextents) {
 966			memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
 967				&ifp->if_u2.if_inline_ext[idx],
 968				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
 969			memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
 970		}
 971		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
 972		ifp->if_real_bytes = 0;
 973	}
 974	/*
 975	 * Otherwise use a linear (direct) extent list.
 976	 * If the extents are currently inside the inode,
 977	 * xfs_iext_realloc_direct will switch us from
 978	 * inline to direct extent allocation mode.
 979	 */
 980	else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
 981		xfs_iext_realloc_direct(ifp, new_size);
 982		if (idx < nextents) {
 983			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
 984				&ifp->if_u1.if_extents[idx],
 985				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
 986			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
 987		}
 988	}
 989	/* Indirection array */
 990	else {
 991		xfs_ext_irec_t	*erp;
 992		int		erp_idx = 0;
 993		int		page_idx = idx;
 994
 995		ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
 996		if (ifp->if_flags & XFS_IFEXTIREC) {
 997			erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
 998		} else {
 999			xfs_iext_irec_init(ifp);
1000			ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1001			erp = ifp->if_u1.if_ext_irec;
1002		}
1003		/* Extents fit in target extent page */
1004		if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
1005			if (page_idx < erp->er_extcount) {
1006				memmove(&erp->er_extbuf[page_idx + ext_diff],
1007					&erp->er_extbuf[page_idx],
1008					(erp->er_extcount - page_idx) *
1009					sizeof(xfs_bmbt_rec_t));
1010				memset(&erp->er_extbuf[page_idx], 0, byte_diff);
1011			}
1012			erp->er_extcount += ext_diff;
1013			xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1014		}
1015		/* Insert a new extent page */
1016		else if (erp) {
1017			xfs_iext_add_indirect_multi(ifp,
1018				erp_idx, page_idx, ext_diff);
1019		}
1020		/*
1021		 * If extent(s) are being appended to the last page in
1022		 * the indirection array and the new extent(s) don't fit
1023		 * in the page, then erp is NULL and erp_idx is set to
1024		 * the next index needed in the indirection array.
1025		 */
1026		else {
1027			uint	count = ext_diff;
1028
1029			while (count) {
1030				erp = xfs_iext_irec_new(ifp, erp_idx);
1031				erp->er_extcount = min(count, XFS_LINEAR_EXTS);
1032				count -= erp->er_extcount;
1033				if (count)
1034					erp_idx++;
1035			}
1036		}
1037	}
1038	ifp->if_bytes = new_size;
1039}
1040
1041/*
1042 * This is called when incore extents are being added to the indirection
1043 * array and the new extents do not fit in the target extent list. The
1044 * erp_idx parameter contains the irec index for the target extent list
1045 * in the indirection array, and the idx parameter contains the extent
1046 * index within the list. The number of extents being added is stored
1047 * in the count parameter.
1048 *
1049 *    |-------|   |-------|
1050 *    |       |   |       |    idx - number of extents before idx
1051 *    |  idx  |   | count |
1052 *    |       |   |       |    count - number of extents being inserted at idx
1053 *    |-------|   |-------|
1054 *    | count |   | nex2  |    nex2 - number of extents after idx + count
1055 *    |-------|   |-------|
1056 */
1057void
1058xfs_iext_add_indirect_multi(
1059	xfs_ifork_t	*ifp,			/* inode fork pointer */
1060	int		erp_idx,		/* target extent irec index */
1061	xfs_extnum_t	idx,			/* index within target list */
1062	int		count)			/* new extents being added */
1063{
1064	int		byte_diff;		/* new bytes being added */
1065	xfs_ext_irec_t	*erp;			/* pointer to irec entry */
1066	xfs_extnum_t	ext_diff;		/* number of extents to add */
1067	xfs_extnum_t	ext_cnt;		/* new extents still needed */
1068	xfs_extnum_t	nex2;			/* extents after idx + count */
1069	xfs_bmbt_rec_t	*nex2_ep = NULL;	/* temp list for nex2 extents */
1070	int		nlists;			/* number of irec's (lists) */
1071
1072	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1073	erp = &ifp->if_u1.if_ext_irec[erp_idx];
1074	nex2 = erp->er_extcount - idx;
1075	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1076
1077	/*
1078	 * Save second part of target extent list
1079	 * (all extents past */
1080	if (nex2) {
1081		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1082		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
1083		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
1084		erp->er_extcount -= nex2;
1085		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
1086		memset(&erp->er_extbuf[idx], 0, byte_diff);
1087	}
1088
1089	/*
1090	 * Add the new extents to the end of the target
1091	 * list, then allocate new irec record(s) and
1092	 * extent buffer(s) as needed to store the rest
1093	 * of the new extents.
1094	 */
1095	ext_cnt = count;
1096	ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
1097	if (ext_diff) {
1098		erp->er_extcount += ext_diff;
1099		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1100		ext_cnt -= ext_diff;
1101	}
1102	while (ext_cnt) {
1103		erp_idx++;
1104		erp = xfs_iext_irec_new(ifp, erp_idx);
1105		ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
1106		erp->er_extcount = ext_diff;
1107		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1108		ext_cnt -= ext_diff;
1109	}
1110
1111	/* Add nex2 extents back to indirection array */
1112	if (nex2) {
1113		xfs_extnum_t	ext_avail;
1114		int		i;
1115
1116		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1117		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
1118		i = 0;
1119		/*
1120		 * If nex2 extents fit in the current page, append
1121		 * nex2_ep after the new extents.
1122		 */
1123		if (nex2 <= ext_avail) {
1124			i = erp->er_extcount;
1125		}
1126		/*
1127		 * Otherwise, check if space is available in the
1128		 * next page.
1129		 */
1130		else if ((erp_idx < nlists - 1) &&
1131			 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
1132			  ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
1133			erp_idx++;
1134			erp++;
1135			/* Create a hole for nex2 extents */
1136			memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
1137				erp->er_extcount * sizeof(xfs_bmbt_rec_t));
1138		}
1139		/*
1140		 * Final choice, create a new extent page for
1141		 * nex2 extents.
1142		 */
1143		else {
1144			erp_idx++;
1145			erp = xfs_iext_irec_new(ifp, erp_idx);
1146		}
1147		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
1148		kmem_free(nex2_ep);
1149		erp->er_extcount += nex2;
1150		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
1151	}
1152}
1153
1154/*
1155 * This is called when the amount of space required for incore file
1156 * extents needs to be decreased. The ext_diff parameter stores the
1157 * number of extents to be removed and the idx parameter contains
1158 * the extent index where the extents will be removed from.
1159 *
1160 * If the amount of space needed has decreased below the linear
1161 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
1162 * extent array.  Otherwise, use kmem_realloc() to adjust the
1163 * size to what is needed.
1164 */
1165void
1166xfs_iext_remove(
1167	xfs_inode_t	*ip,		/* incore inode pointer */
1168	xfs_extnum_t	idx,		/* index to begin removing exts */
1169	int		ext_diff,	/* number of extents to remove */
1170	int		state)		/* type of extent conversion */
1171{
1172	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
1173	xfs_extnum_t	nextents;	/* number of extents in file */
1174	int		new_size;	/* size of extents after removal */
1175
1176	trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
1177
1178	ASSERT(ext_diff > 0);
1179	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1180	new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
1181
1182	if (new_size == 0) {
1183		xfs_iext_destroy(ifp);
1184	} else if (ifp->if_flags & XFS_IFEXTIREC) {
1185		xfs_iext_remove_indirect(ifp, idx, ext_diff);
1186	} else if (ifp->if_real_bytes) {
1187		xfs_iext_remove_direct(ifp, idx, ext_diff);
1188	} else {
1189		xfs_iext_remove_inline(ifp, idx, ext_diff);
1190	}
1191	ifp->if_bytes = new_size;
1192}
1193
1194/*
1195 * This removes ext_diff extents from the inline buffer, beginning
1196 * at extent index idx.
1197 */
1198void
1199xfs_iext_remove_inline(
1200	xfs_ifork_t	*ifp,		/* inode fork pointer */
1201	xfs_extnum_t	idx,		/* index to begin removing exts */
1202	int		ext_diff)	/* number of extents to remove */
1203{
1204	int		nextents;	/* number of extents in file */
1205
1206	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1207	ASSERT(idx < XFS_INLINE_EXTS);
1208	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1209	ASSERT(((nextents - ext_diff) > 0) &&
1210		(nextents - ext_diff) < XFS_INLINE_EXTS);
1211
1212	if (idx + ext_diff < nextents) {
1213		memmove(&ifp->if_u2.if_inline_ext[idx],
1214			&ifp->if_u2.if_inline_ext[idx + ext_diff],
1215			(nextents - (idx + ext_diff)) *
1216			 sizeof(xfs_bmbt_rec_t));
1217		memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
1218			0, ext_diff * sizeof(xfs_bmbt_rec_t));
1219	} else {
1220		memset(&ifp->if_u2.if_inline_ext[idx], 0,
1221			ext_diff * sizeof(xfs_bmbt_rec_t));
1222	}
1223}
1224
1225/*
1226 * This removes ext_diff extents from a linear (direct) extent list,
1227 * beginning at extent index idx. If the extents are being removed
1228 * from the end of the list (ie. truncate) then we just need to re-
1229 * allocate the list to remove the extra space. Otherwise, if the
1230 * extents are being removed from the middle of the existing extent
1231 * entries, then we first need to move the extent records beginning
1232 * at idx + ext_diff up in the list to overwrite the records being
1233 * removed, then remove the extra space via kmem_realloc.
1234 */
1235void
1236xfs_iext_remove_direct(
1237	xfs_ifork_t	*ifp,		/* inode fork pointer */
1238	xfs_extnum_t	idx,		/* index to begin removing exts */
1239	int		ext_diff)	/* number of extents to remove */
1240{
1241	xfs_extnum_t	nextents;	/* number of extents in file */
1242	int		new_size;	/* size of extents after removal */
1243
1244	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1245	new_size = ifp->if_bytes -
1246		(ext_diff * sizeof(xfs_bmbt_rec_t));
1247	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1248
1249	if (new_size == 0) {
1250		xfs_iext_destroy(ifp);
1251		return;
1252	}
1253	/* Move extents up in the list (if needed) */
1254	if (idx + ext_diff < nextents) {
1255		memmove(&ifp->if_u1.if_extents[idx],
1256			&ifp->if_u1.if_extents[idx + ext_diff],
1257			(nextents - (idx + ext_diff)) *
1258			 sizeof(xfs_bmbt_rec_t));
1259	}
1260	memset(&ifp->if_u1.if_extents[nextents - ext_diff],
1261		0, ext_diff * sizeof(xfs_bmbt_rec_t));
1262	/*
1263	 * Reallocate the direct extent list. If the extents
1264	 * will fit inside the inode then xfs_iext_realloc_direct
1265	 * will switch from direct to inline extent allocation
1266	 * mode for us.
1267	 */
1268	xfs_iext_realloc_direct(ifp, new_size);
1269	ifp->if_bytes = new_size;
1270}
1271
1272/*
1273 * This is called when incore extents are being removed from the
1274 * indirection array and the extents being removed span multiple extent
1275 * buffers. The idx parameter contains the file extent index where we
1276 * want to begin removing extents, and the count parameter contains
1277 * how many extents need to be removed.
1278 *
1279 *    |-------|   |-------|
1280 *    | nex1  |   |       |    nex1 - number of extents before idx
1281 *    |-------|   | count |
1282 *    |       |   |       |    count - number of extents being removed at idx
1283 *    | count |   |-------|
1284 *    |       |   | nex2  |    nex2 - number of extents after idx + count
1285 *    |-------|   |-------|
1286 */
1287void
1288xfs_iext_remove_indirect(
1289	xfs_ifork_t	*ifp,		/* inode fork pointer */
1290	xfs_extnum_t	idx,		/* index to begin removing extents */
1291	int		count)		/* number of extents to remove */
1292{
1293	xfs_ext_irec_t	*erp;		/* indirection array pointer */
1294	int		erp_idx = 0;	/* indirection array index */
1295	xfs_extnum_t	ext_cnt;	/* extents left to remove */
1296	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
1297	xfs_extnum_t	nex1;		/* number of extents before idx */
1298	xfs_extnum_t	nex2;		/* extents after idx + count */
1299	int		page_idx = idx;	/* index in target extent list */
1300
1301	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1302	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
1303	ASSERT(erp != NULL);
1304	nex1 = page_idx;
1305	ext_cnt = count;
1306	while (ext_cnt) {
1307		nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
1308		ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
1309		/*
1310		 * Check for deletion of entire list;
1311		 * xfs_iext_irec_remove() updates extent offsets.
1312		 */
1313		if (ext_diff == erp->er_extcount) {
1314			xfs_iext_irec_remove(ifp, erp_idx);
1315			ext_cnt -= ext_diff;
1316			nex1 = 0;
1317			if (ext_cnt) {
1318				ASSERT(erp_idx < ifp->if_real_bytes /
1319					XFS_IEXT_BUFSZ);
1320				erp = &ifp->if_u1.if_ext_irec[erp_idx];
1321				nex1 = 0;
1322				continue;
1323			} else {
1324				break;
1325			}
1326		}
1327		/* Move extents up (if needed) */
1328		if (nex2) {
1329			memmove(&erp->er_extbuf[nex1],
1330				&erp->er_extbuf[nex1 + ext_diff],
1331				nex2 * sizeof(xfs_bmbt_rec_t));
1332		}
1333		/* Zero out rest of page */
1334		memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
1335			((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
1336		/* Update remaining counters */
1337		erp->er_extcount -= ext_diff;
1338		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
1339		ext_cnt -= ext_diff;
1340		nex1 = 0;
1341		erp_idx++;
1342		erp++;
1343	}
1344	ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
1345	xfs_iext_irec_compact(ifp);
1346}
1347
1348/*
1349 * Create, destroy, or resize a linear (direct) block of extents.
1350 */
1351void
1352xfs_iext_realloc_direct(
1353	xfs_ifork_t	*ifp,		/* inode fork pointer */
1354	int		new_size)	/* new size of extents after adding */
1355{
1356	int		rnew_size;	/* real new size of extents */
1357
1358	rnew_size = new_size;
1359
1360	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
1361		((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
1362		 (new_size != ifp->if_real_bytes)));
1363
1364	/* Free extent records */
1365	if (new_size == 0) {
1366		xfs_iext_destroy(ifp);
1367	}
1368	/* Resize direct extent list and zero any new bytes */
1369	else if (ifp->if_real_bytes) {
1370		/* Check if extents will fit inside the inode */
1371		if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
1372			xfs_iext_direct_to_inline(ifp, new_size /
1373				(uint)sizeof(xfs_bmbt_rec_t));
1374			ifp->if_bytes = new_size;
1375			return;
1376		}
1377		if (!is_power_of_2(new_size)){
1378			rnew_size = roundup_pow_of_two(new_size);
1379		}
1380		if (rnew_size != ifp->if_real_bytes) {
1381			ifp->if_u1.if_extents =
1382				kmem_realloc(ifp->if_u1.if_extents,
1383						rnew_size,
1384						ifp->if_real_bytes, KM_NOFS);
1385		}
1386		if (rnew_size > ifp->if_real_bytes) {
1387			memset(&ifp->if_u1.if_extents[ifp->if_bytes /
1388				(uint)sizeof(xfs_bmbt_rec_t)], 0,
1389				rnew_size - ifp->if_real_bytes);
1390		}
1391	}
1392	/* Switch from the inline extent buffer to a direct extent list */
1393	else {
1394		if (!is_power_of_2(new_size)) {
1395			rnew_size = roundup_pow_of_two(new_size);
1396		}
1397		xfs_iext_inline_to_direct(ifp, rnew_size);
1398	}
1399	ifp->if_real_bytes = rnew_size;
1400	ifp->if_bytes = new_size;
1401}
1402
1403/*
1404 * Switch from linear (direct) extent records to inline buffer.
1405 */
1406void
1407xfs_iext_direct_to_inline(
1408	xfs_ifork_t	*ifp,		/* inode fork pointer */
1409	xfs_extnum_t	nextents)	/* number of extents in file */
1410{
1411	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1412	ASSERT(nextents <= XFS_INLINE_EXTS);
1413	/*
1414	 * The inline buffer was zeroed when we switched
1415	 * from inline to direct extent allocation mode,
1416	 * so we don't need to clear it here.
1417	 */
1418	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
1419		nextents * sizeof(xfs_bmbt_rec_t));
1420	kmem_free(ifp->if_u1.if_extents);
1421	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
1422	ifp->if_real_bytes = 0;
1423}
1424
1425/*
1426 * Switch from inline buffer to linear (direct) extent records.
1427 * new_size should already be rounded up to the next power of 2
1428 * by the caller (when appropriate), so use new_size as it is.
1429 * However, since new_size may be rounded up, we can't update
1430 * if_bytes here. It is the caller's responsibility to update
1431 * if_bytes upon return.
1432 */
1433void
1434xfs_iext_inline_to_direct(
1435	xfs_ifork_t	*ifp,		/* inode fork pointer */
1436	int		new_size)	/* number of extents in file */
1437{
1438	ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
1439	memset(ifp->if_u1.if_extents, 0, new_size);
1440	if (ifp->if_bytes) {
1441		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
1442			ifp->if_bytes);
1443		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1444			sizeof(xfs_bmbt_rec_t));
1445	}
1446	ifp->if_real_bytes = new_size;
1447}
1448
1449/*
1450 * Resize an extent indirection array to new_size bytes.
1451 */
1452STATIC void
1453xfs_iext_realloc_indirect(
1454	xfs_ifork_t	*ifp,		/* inode fork pointer */
1455	int		new_size)	/* new indirection array size */
1456{
1457	int		nlists;		/* number of irec's (ex lists) */
1458	int		size;		/* current indirection array size */
1459
1460	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1461	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1462	size = nlists * sizeof(xfs_ext_irec_t);
1463	ASSERT(ifp->if_real_bytes);
1464	ASSERT((new_size >= 0) && (new_size != size));
1465	if (new_size == 0) {
1466		xfs_iext_destroy(ifp);
1467	} else {
1468		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
1469			kmem_realloc(ifp->if_u1.if_ext_irec,
1470				new_size, size, KM_NOFS);
1471	}
1472}
1473
1474/*
1475 * Switch from indirection array to linear (direct) extent allocations.
1476 */
1477STATIC void
1478xfs_iext_indirect_to_direct(
1479	 xfs_ifork_t	*ifp)		/* inode fork pointer */
1480{
1481	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */
1482	xfs_extnum_t	nextents;	/* number of extents in file */
1483	int		size;		/* size of file extents */
1484
1485	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1486	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1487	ASSERT(nextents <= XFS_LINEAR_EXTS);
1488	size = nextents * sizeof(xfs_bmbt_rec_t);
1489
1490	xfs_iext_irec_compact_pages(ifp);
1491	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
1492
1493	ep = ifp->if_u1.if_ext_irec->er_extbuf;
1494	kmem_free(ifp->if_u1.if_ext_irec);
1495	ifp->if_flags &= ~XFS_IFEXTIREC;
1496	ifp->if_u1.if_extents = ep;
1497	ifp->if_bytes = size;
1498	if (nextents < XFS_LINEAR_EXTS) {
1499		xfs_iext_realloc_direct(ifp, size);
1500	}
1501}
1502
1503/*
1504 * Free incore file extents.
1505 */
1506void
1507xfs_iext_destroy(
1508	xfs_ifork_t	*ifp)		/* inode fork pointer */
1509{
1510	if (ifp->if_flags & XFS_IFEXTIREC) {
1511		int	erp_idx;
1512		int	nlists;
1513
1514		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1515		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
1516			xfs_iext_irec_remove(ifp, erp_idx);
1517		}
1518		ifp->if_flags &= ~XFS_IFEXTIREC;
1519	} else if (ifp->if_real_bytes) {
1520		kmem_free(ifp->if_u1.if_extents);
1521	} else if (ifp->if_bytes) {
1522		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1523			sizeof(xfs_bmbt_rec_t));
1524	}
1525	ifp->if_u1.if_extents = NULL;
1526	ifp->if_real_bytes = 0;
1527	ifp->if_bytes = 0;
1528}
1529
1530/*
1531 * Return a pointer to the extent record for file system block bno.
1532 */
1533xfs_bmbt_rec_host_t *			/* pointer to found extent record */
1534xfs_iext_bno_to_ext(
1535	xfs_ifork_t	*ifp,		/* inode fork pointer */
1536	xfs_fileoff_t	bno,		/* block number to search for */
1537	xfs_extnum_t	*idxp)		/* index of target extent */
1538{
1539	xfs_bmbt_rec_host_t *base;	/* pointer to first extent */
1540	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
1541	xfs_bmbt_rec_host_t *ep = NULL;	/* pointer to target extent */
1542	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
1543	int		high;		/* upper boundary in search */
1544	xfs_extnum_t	idx = 0;	/* index of target extent */
1545	int		low;		/* lower boundary in search */
1546	xfs_extnum_t	nextents;	/* number of file extents */
1547	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
1548
1549	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1550	if (nextents == 0) {
1551		*idxp = 0;
1552		return NULL;
1553	}
1554	low = 0;
1555	if (ifp->if_flags & XFS_IFEXTIREC) {
1556		/* Find target extent list */
1557		int	erp_idx = 0;
1558		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
1559		base = erp->er_extbuf;
1560		high = erp->er_extcount - 1;
1561	} else {
1562		base = ifp->if_u1.if_extents;
1563		high = nextents - 1;
1564	}
1565	/* Binary search extent records */
1566	while (low <= high) {
1567		idx = (low + high) >> 1;
1568		ep = base + idx;
1569		startoff = xfs_bmbt_get_startoff(ep);
1570		blockcount = xfs_bmbt_get_blockcount(ep);
1571		if (bno < startoff) {
1572			high = idx - 1;
1573		} else if (bno >= startoff + blockcount) {
1574			low = idx + 1;
1575		} else {
1576			/* Convert back to file-based extent index */
1577			if (ifp->if_flags & XFS_IFEXTIREC) {
1578				idx += erp->er_extoff;
1579			}
1580			*idxp = idx;
1581			return ep;
1582		}
1583	}
1584	/* Convert back to file-based extent index */
1585	if (ifp->if_flags & XFS_IFEXTIREC) {
1586		idx += erp->er_extoff;
1587	}
1588	if (bno >= startoff + blockcount) {
1589		if (++idx == nextents) {
1590			ep = NULL;
1591		} else {
1592			ep = xfs_iext_get_ext(ifp, idx);
1593		}
1594	}
1595	*idxp = idx;
1596	return ep;
1597}
1598
1599/*
1600 * Return a pointer to the indirection array entry containing the
1601 * extent record for filesystem block bno. Store the index of the
1602 * target irec in *erp_idxp.
1603 */
1604xfs_ext_irec_t *			/* pointer to found extent record */
1605xfs_iext_bno_to_irec(
1606	xfs_ifork_t	*ifp,		/* inode fork pointer */
1607	xfs_fileoff_t	bno,		/* block number to search for */
1608	int		*erp_idxp)	/* irec index of target ext list */
1609{
1610	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
1611	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
1612	int		erp_idx;	/* indirection array index */
1613	int		nlists;		/* number of extent irec's (lists) */
1614	int		high;		/* binary search upper limit */
1615	int		low;		/* binary search lower limit */
1616
1617	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1618	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1619	erp_idx = 0;
1620	low = 0;
1621	high = nlists - 1;
1622	while (low <= high) {
1623		erp_idx = (low + high) >> 1;
1624		erp = &ifp->if_u1.if_ext_irec[erp_idx];
1625		erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
1626		if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
1627			high = erp_idx - 1;
1628		} else if (erp_next && bno >=
1629			   xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
1630			low = erp_idx + 1;
1631		} else {
1632			break;
1633		}
1634	}
1635	*erp_idxp = erp_idx;
1636	return erp;
1637}
1638
1639/*
1640 * Return a pointer to the indirection array entry containing the
1641 * extent record at file extent index *idxp. Store the index of the
1642 * target irec in *erp_idxp and store the page index of the target
1643 * extent record in *idxp.
1644 */
1645xfs_ext_irec_t *
1646xfs_iext_idx_to_irec(
1647	xfs_ifork_t	*ifp,		/* inode fork pointer */
1648	xfs_extnum_t	*idxp,		/* extent index (file -> page) */
1649	int		*erp_idxp,	/* pointer to target irec */
1650	int		realloc)	/* new bytes were just added */
1651{
1652	xfs_ext_irec_t	*prev;		/* pointer to previous irec */
1653	xfs_ext_irec_t	*erp = NULL;	/* pointer to current irec */
1654	int		erp_idx;	/* indirection array index */
1655	int		nlists;		/* number of irec's (ex lists) */
1656	int		high;		/* binary search upper limit */
1657	int		low;		/* binary search lower limit */
1658	xfs_extnum_t	page_idx = *idxp; /* extent index in target list */
1659
1660	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1661	ASSERT(page_idx >= 0);
1662	ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
1663	ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
1664
1665	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1666	erp_idx = 0;
1667	low = 0;
1668	high = nlists - 1;
1669
1670	/* Binary search extent irec's */
1671	while (low <= high) {
1672		erp_idx = (low + high) >> 1;
1673		erp = &ifp->if_u1.if_ext_irec[erp_idx];
1674		prev = erp_idx > 0 ? erp - 1 : NULL;
1675		if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
1676		     realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
1677			high = erp_idx - 1;
1678		} else if (page_idx > erp->er_extoff + erp->er_extcount ||
1679			   (page_idx == erp->er_extoff + erp->er_extcount &&
1680			    !realloc)) {
1681			low = erp_idx + 1;
1682		} else if (page_idx == erp->er_extoff + erp->er_extcount &&
1683			   erp->er_extcount == XFS_LINEAR_EXTS) {
1684			ASSERT(realloc);
1685			page_idx = 0;
1686			erp_idx++;
1687			erp = erp_idx < nlists ? erp + 1 : NULL;
1688			break;
1689		} else {
1690			page_idx -= erp->er_extoff;
1691			break;
1692		}
1693	}
1694	*idxp = page_idx;
1695	*erp_idxp = erp_idx;
1696	return(erp);
1697}
1698
1699/*
1700 * Allocate and initialize an indirection array once the space needed
1701 * for incore extents increases above XFS_IEXT_BUFSZ.
1702 */
1703void
1704xfs_iext_irec_init(
1705	xfs_ifork_t	*ifp)		/* inode fork pointer */
1706{
1707	xfs_ext_irec_t	*erp;		/* indirection array pointer */
1708	xfs_extnum_t	nextents;	/* number of extents in file */
1709
1710	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1711	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1712	ASSERT(nextents <= XFS_LINEAR_EXTS);
1713
1714	erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
1715
1716	if (nextents == 0) {
1717		ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1718	} else if (!ifp->if_real_bytes) {
1719		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
1720	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
1721		xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
1722	}
1723	erp->er_extbuf = ifp->if_u1.if_extents;
1724	erp->er_extcount = nextents;
1725	erp->er_extoff = 0;
1726
1727	ifp->if_flags |= XFS_IFEXTIREC;
1728	ifp->if_real_bytes = XFS_IEXT_BUFSZ;
1729	ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
1730	ifp->if_u1.if_ext_irec = erp;
1731
1732	return;
1733}
1734
1735/*
1736 * Allocate and initialize a new entry in the indirection array.
1737 */
1738xfs_ext_irec_t *
1739xfs_iext_irec_new(
1740	xfs_ifork_t	*ifp,		/* inode fork pointer */
1741	int		erp_idx)	/* index for new irec */
1742{
1743	xfs_ext_irec_t	*erp;		/* indirection array pointer */
1744	int		i;		/* loop counter */
1745	int		nlists;		/* number of irec's (ex lists) */
1746
1747	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1748	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1749
1750	/* Resize indirection array */
1751	xfs_iext_realloc_indirect(ifp, ++nlists *
1752				  sizeof(xfs_ext_irec_t));
1753	/*
1754	 * Move records down in the array so the
1755	 * new page can use erp_idx.
1756	 */
1757	erp = ifp->if_u1.if_ext_irec;
1758	for (i = nlists - 1; i > erp_idx; i--) {
1759		memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
1760	}
1761	ASSERT(i == erp_idx);
1762
1763	/* Initialize new extent record */
1764	erp = ifp->if_u1.if_ext_irec;
1765	erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1766	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1767	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
1768	erp[erp_idx].er_extcount = 0;
1769	erp[erp_idx].er_extoff = erp_idx > 0 ?
1770		erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
1771	return (&erp[erp_idx]);
1772}
1773
1774/*
1775 * Remove a record from the indirection array.
1776 */
1777void
1778xfs_iext_irec_remove(
1779	xfs_ifork_t	*ifp,		/* inode fork pointer */
1780	int		erp_idx)	/* irec index to remove */
1781{
1782	xfs_ext_irec_t	*erp;		/* indirection array pointer */
1783	int		i;		/* loop counter */
1784	int		nlists;		/* number of irec's (ex lists) */
1785
1786	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1787	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1788	erp = &ifp->if_u1.if_ext_irec[erp_idx];
1789	if (erp->er_extbuf) {
1790		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
1791			-erp->er_extcount);
1792		kmem_free(erp->er_extbuf);
1793	}
1794	/* Compact extent records */
1795	erp = ifp->if_u1.if_ext_irec;
1796	for (i = erp_idx; i < nlists - 1; i++) {
1797		memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
1798	}
1799	/*
1800	 * Manually free the last extent record from the indirection
1801	 * array.  A call to xfs_iext_realloc_indirect() with a size
1802	 * of zero would result in a call to xfs_iext_destroy() which
1803	 * would in turn call this function again, creating a nasty
1804	 * infinite loop.
1805	 */
1806	if (--nlists) {
1807		xfs_iext_realloc_indirect(ifp,
1808			nlists * sizeof(xfs_ext_irec_t));
1809	} else {
1810		kmem_free(ifp->if_u1.if_ext_irec);
1811	}
1812	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1813}
1814
1815/*
1816 * This is called to clean up large amounts of unused memory allocated
1817 * by the indirection array.  Before compacting anything though, verify
1818 * that the indirection array is still needed and switch back to the
1819 * linear extent list (or even the inline buffer) if possible.  The
1820 * compaction policy is as follows:
1821 *
1822 *    Full Compaction: Extents fit into a single page (or inline buffer)
1823 * Partial Compaction: Extents occupy less than 50% of allocated space
1824 *      No Compaction: Extents occupy at least 50% of allocated space
1825 */
1826void
1827xfs_iext_irec_compact(
1828	xfs_ifork_t	*ifp)		/* inode fork pointer */
1829{
1830	xfs_extnum_t	nextents;	/* number of extents in file */
1831	int		nlists;		/* number of irec's (ex lists) */
1832
1833	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1834	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1835	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1836
1837	if (nextents == 0) {
1838		xfs_iext_destroy(ifp);
1839	} else if (nextents <= XFS_INLINE_EXTS) {
1840		xfs_iext_indirect_to_direct(ifp);
1841		xfs_iext_direct_to_inline(ifp, nextents);
1842	} else if (nextents <= XFS_LINEAR_EXTS) {
1843		xfs_iext_indirect_to_direct(ifp);
1844	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
1845		xfs_iext_irec_compact_pages(ifp);
1846	}
1847}
1848
1849/*
1850 * Combine extents from neighboring extent pages.
1851 */
1852void
1853xfs_iext_irec_compact_pages(
1854	xfs_ifork_t	*ifp)		/* inode fork pointer */
1855{
1856	xfs_ext_irec_t	*erp, *erp_next;/* pointers to irec entries */
1857	int		erp_idx = 0;	/* indirection array index */
1858	int		nlists;		/* number of irec's (ex lists) */
1859
1860	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1861	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1862	while (erp_idx < nlists - 1) {
1863		erp = &ifp->if_u1.if_ext_irec[erp_idx];
1864		erp_next = erp + 1;
1865		if (erp_next->er_extcount <=
1866		    (XFS_LINEAR_EXTS - erp->er_extcount)) {
1867			memcpy(&erp->er_extbuf[erp->er_extcount],
1868				erp_next->er_extbuf, erp_next->er_extcount *
1869				sizeof(xfs_bmbt_rec_t));
1870			erp->er_extcount += erp_next->er_extcount;
1871			/*
1872			 * Free page before removing extent record
1873			 * so er_extoffs don't get modified in
1874			 * xfs_iext_irec_remove.
1875			 */
1876			kmem_free(erp_next->er_extbuf);
1877			erp_next->er_extbuf = NULL;
1878			xfs_iext_irec_remove(ifp, erp_idx + 1);
1879			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1880		} else {
1881			erp_idx++;
1882		}
1883	}
1884}
1885
1886/*
1887 * This is called to update the er_extoff field in the indirection
1888 * array when extents have been added or removed from one of the
1889 * extent lists. erp_idx contains the irec index to begin updating
1890 * at and ext_diff contains the number of extents that were added
1891 * or removed.
1892 */
1893void
1894xfs_iext_irec_update_extoffs(
1895	xfs_ifork_t	*ifp,		/* inode fork pointer */
1896	int		erp_idx,	/* irec index to update */
1897	int		ext_diff)	/* number of new extents */
1898{
1899	int		i;		/* loop counter */
1900	int		nlists;		/* number of irec's (ex lists */
1901
1902	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1903	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1904	for (i = erp_idx; i < nlists; i++) {
1905		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
1906	}
1907}