Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.9.4.
   1/*
   2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_types.h"
  21#include "xfs_bit.h"
  22#include "xfs_log.h"
  23#include "xfs_trans.h"
  24#include "xfs_sb.h"
  25#include "xfs_ag.h"
  26#include "xfs_mount.h"
  27#include "xfs_da_btree.h"
  28#include "xfs_bmap_btree.h"
  29#include "xfs_dinode.h"
  30#include "xfs_inode.h"
  31#include "xfs_bmap.h"
  32#include "xfs_dir2_format.h"
  33#include "xfs_dir2_priv.h"
  34#include "xfs_error.h"
  35#include "xfs_trace.h"
  36
  37/*
  38 * Local function declarations.
  39 */
  40#ifdef DEBUG
  41static void xfs_dir2_leaf_check(xfs_inode_t *dp, xfs_dabuf_t *bp);
  42#else
  43#define	xfs_dir2_leaf_check(dp, bp)
  44#endif
  45static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **lbpp,
  46				    int *indexp, xfs_dabuf_t **dbpp);
  47static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
  48				    int first, int last);
  49static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
  50
  51
  52/*
  53 * Convert a block form directory to a leaf form directory.
  54 */
  55int						/* error */
  56xfs_dir2_block_to_leaf(
  57	xfs_da_args_t		*args,		/* operation arguments */
  58	xfs_dabuf_t		*dbp)		/* input block's buffer */
  59{
  60	__be16			*bestsp;	/* leaf's bestsp entries */
  61	xfs_dablk_t		blkno;		/* leaf block's bno */
  62	xfs_dir2_data_hdr_t	*hdr;		/* block header */
  63	xfs_dir2_leaf_entry_t	*blp;		/* block's leaf entries */
  64	xfs_dir2_block_tail_t	*btp;		/* block's tail */
  65	xfs_inode_t		*dp;		/* incore directory inode */
  66	int			error;		/* error return code */
  67	xfs_dabuf_t		*lbp;		/* leaf block's buffer */
  68	xfs_dir2_db_t		ldb;		/* leaf block's bno */
  69	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
  70	xfs_dir2_leaf_tail_t	*ltp;		/* leaf's tail */
  71	xfs_mount_t		*mp;		/* filesystem mount point */
  72	int			needlog;	/* need to log block header */
  73	int			needscan;	/* need to rescan bestfree */
  74	xfs_trans_t		*tp;		/* transaction pointer */
  75
  76	trace_xfs_dir2_block_to_leaf(args);
  77
  78	dp = args->dp;
  79	mp = dp->i_mount;
  80	tp = args->trans;
  81	/*
  82	 * Add the leaf block to the inode.
  83	 * This interface will only put blocks in the leaf/node range.
  84	 * Since that's empty now, we'll get the root (block 0 in range).
  85	 */
  86	if ((error = xfs_da_grow_inode(args, &blkno))) {
  87		return error;
  88	}
  89	ldb = xfs_dir2_da_to_db(mp, blkno);
  90	ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
  91	/*
  92	 * Initialize the leaf block, get a buffer for it.
  93	 */
  94	if ((error = xfs_dir2_leaf_init(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC))) {
  95		return error;
  96	}
  97	ASSERT(lbp != NULL);
  98	leaf = lbp->data;
  99	hdr = dbp->data;
 100	xfs_dir2_data_check(dp, dbp);
 101	btp = xfs_dir2_block_tail_p(mp, hdr);
 102	blp = xfs_dir2_block_leaf_p(btp);
 103	/*
 104	 * Set the counts in the leaf header.
 105	 */
 106	leaf->hdr.count = cpu_to_be16(be32_to_cpu(btp->count));
 107	leaf->hdr.stale = cpu_to_be16(be32_to_cpu(btp->stale));
 108	/*
 109	 * Could compact these but I think we always do the conversion
 110	 * after squeezing out stale entries.
 111	 */
 112	memcpy(leaf->ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
 113	xfs_dir2_leaf_log_ents(tp, lbp, 0, be16_to_cpu(leaf->hdr.count) - 1);
 114	needscan = 0;
 115	needlog = 1;
 116	/*
 117	 * Make the space formerly occupied by the leaf entries and block
 118	 * tail be free.
 119	 */
 120	xfs_dir2_data_make_free(tp, dbp,
 121		(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
 122		(xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
 123				       (char *)blp),
 124		&needlog, &needscan);
 125	/*
 126	 * Fix up the block header, make it a data block.
 127	 */
 128	hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
 129	if (needscan)
 130		xfs_dir2_data_freescan(mp, hdr, &needlog);
 131	/*
 132	 * Set up leaf tail and bests table.
 133	 */
 134	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 135	ltp->bestcount = cpu_to_be32(1);
 136	bestsp = xfs_dir2_leaf_bests_p(ltp);
 137	bestsp[0] =  hdr->bestfree[0].length;
 138	/*
 139	 * Log the data header and leaf bests table.
 140	 */
 141	if (needlog)
 142		xfs_dir2_data_log_header(tp, dbp);
 143	xfs_dir2_leaf_check(dp, lbp);
 144	xfs_dir2_data_check(dp, dbp);
 145	xfs_dir2_leaf_log_bests(tp, lbp, 0, 0);
 146	xfs_da_buf_done(lbp);
 147	return 0;
 148}
 149
 150STATIC void
 151xfs_dir2_leaf_find_stale(
 152	struct xfs_dir2_leaf	*leaf,
 153	int			index,
 154	int			*lowstale,
 155	int			*highstale)
 156{
 157	/*
 158	 * Find the first stale entry before our index, if any.
 159	 */
 160	for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
 161		if (leaf->ents[*lowstale].address ==
 162		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
 163			break;
 164	}
 165
 166	/*
 167	 * Find the first stale entry at or after our index, if any.
 168	 * Stop if the result would require moving more entries than using
 169	 * lowstale.
 170	 */
 171	for (*highstale = index;
 172	     *highstale < be16_to_cpu(leaf->hdr.count);
 173	     ++*highstale) {
 174		if (leaf->ents[*highstale].address ==
 175		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
 176			break;
 177		if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
 178			break;
 179	}
 180}
 181
 182struct xfs_dir2_leaf_entry *
 183xfs_dir2_leaf_find_entry(
 184	xfs_dir2_leaf_t		*leaf,		/* leaf structure */
 185	int			index,		/* leaf table position */
 186	int			compact,	/* need to compact leaves */
 187	int			lowstale,	/* index of prev stale leaf */
 188	int			highstale,	/* index of next stale leaf */
 189	int			*lfloglow,	/* low leaf logging index */
 190	int			*lfloghigh)	/* high leaf logging index */
 191{
 192	if (!leaf->hdr.stale) {
 193		xfs_dir2_leaf_entry_t	*lep;	/* leaf entry table pointer */
 194
 195		/*
 196		 * Now we need to make room to insert the leaf entry.
 197		 *
 198		 * If there are no stale entries, just insert a hole at index.
 199		 */
 200		lep = &leaf->ents[index];
 201		if (index < be16_to_cpu(leaf->hdr.count))
 202			memmove(lep + 1, lep,
 203				(be16_to_cpu(leaf->hdr.count) - index) *
 204				 sizeof(*lep));
 205
 206		/*
 207		 * Record low and high logging indices for the leaf.
 208		 */
 209		*lfloglow = index;
 210		*lfloghigh = be16_to_cpu(leaf->hdr.count);
 211		be16_add_cpu(&leaf->hdr.count, 1);
 212		return lep;
 213	}
 214
 215	/*
 216	 * There are stale entries.
 217	 *
 218	 * We will use one of them for the new entry.  It's probably not at
 219	 * the right location, so we'll have to shift some up or down first.
 220	 *
 221	 * If we didn't compact before, we need to find the nearest stale
 222	 * entries before and after our insertion point.
 223	 */
 224	if (compact == 0)
 225		xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
 226
 227	/*
 228	 * If the low one is better, use it.
 229	 */
 230	if (lowstale >= 0 &&
 231	    (highstale == be16_to_cpu(leaf->hdr.count) ||
 232	     index - lowstale - 1 < highstale - index)) {
 233		ASSERT(index - lowstale - 1 >= 0);
 234		ASSERT(leaf->ents[lowstale].address ==
 235		       cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
 236
 237		/*
 238		 * Copy entries up to cover the stale entry and make room
 239		 * for the new entry.
 240		 */
 241		if (index - lowstale - 1 > 0) {
 242			memmove(&leaf->ents[lowstale],
 243				&leaf->ents[lowstale + 1],
 244				(index - lowstale - 1) *
 245				sizeof(xfs_dir2_leaf_entry_t));
 246		}
 247		*lfloglow = MIN(lowstale, *lfloglow);
 248		*lfloghigh = MAX(index - 1, *lfloghigh);
 249		be16_add_cpu(&leaf->hdr.stale, -1);
 250		return &leaf->ents[index - 1];
 251	}
 252
 253	/*
 254	 * The high one is better, so use that one.
 255	 */
 256	ASSERT(highstale - index >= 0);
 257	ASSERT(leaf->ents[highstale].address ==
 258	       cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
 259
 260	/*
 261	 * Copy entries down to cover the stale entry and make room for the
 262	 * new entry.
 263	 */
 264	if (highstale - index > 0) {
 265		memmove(&leaf->ents[index + 1],
 266			&leaf->ents[index],
 267			(highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
 268	}
 269	*lfloglow = MIN(index, *lfloglow);
 270	*lfloghigh = MAX(highstale, *lfloghigh);
 271	be16_add_cpu(&leaf->hdr.stale, -1);
 272	return &leaf->ents[index];
 273}
 274
 275/*
 276 * Add an entry to a leaf form directory.
 277 */
 278int						/* error */
 279xfs_dir2_leaf_addname(
 280	xfs_da_args_t		*args)		/* operation arguments */
 281{
 282	__be16			*bestsp;	/* freespace table in leaf */
 283	int			compact;	/* need to compact leaves */
 284	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
 285	xfs_dabuf_t		*dbp;		/* data block buffer */
 286	xfs_dir2_data_entry_t	*dep;		/* data block entry */
 287	xfs_inode_t		*dp;		/* incore directory inode */
 288	xfs_dir2_data_unused_t	*dup;		/* data unused entry */
 289	int			error;		/* error return value */
 290	int			grown;		/* allocated new data block */
 291	int			highstale;	/* index of next stale leaf */
 292	int			i;		/* temporary, index */
 293	int			index;		/* leaf table position */
 294	xfs_dabuf_t		*lbp;		/* leaf's buffer */
 295	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 296	int			length;		/* length of new entry */
 297	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry table pointer */
 298	int			lfloglow;	/* low leaf logging index */
 299	int			lfloghigh;	/* high leaf logging index */
 300	int			lowstale;	/* index of prev stale leaf */
 301	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail pointer */
 302	xfs_mount_t		*mp;		/* filesystem mount point */
 303	int			needbytes;	/* leaf block bytes needed */
 304	int			needlog;	/* need to log data header */
 305	int			needscan;	/* need to rescan data free */
 306	__be16			*tagp;		/* end of data entry */
 307	xfs_trans_t		*tp;		/* transaction pointer */
 308	xfs_dir2_db_t		use_block;	/* data block number */
 309
 310	trace_xfs_dir2_leaf_addname(args);
 311
 312	dp = args->dp;
 313	tp = args->trans;
 314	mp = dp->i_mount;
 315	/*
 316	 * Read the leaf block.
 317	 */
 318	error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
 319		XFS_DATA_FORK);
 320	if (error) {
 321		return error;
 322	}
 323	ASSERT(lbp != NULL);
 324	/*
 325	 * Look up the entry by hash value and name.
 326	 * We know it's not there, our caller has already done a lookup.
 327	 * So the index is of the entry to insert in front of.
 328	 * But if there are dup hash values the index is of the first of those.
 329	 */
 330	index = xfs_dir2_leaf_search_hash(args, lbp);
 331	leaf = lbp->data;
 332	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 333	bestsp = xfs_dir2_leaf_bests_p(ltp);
 334	length = xfs_dir2_data_entsize(args->namelen);
 335	/*
 336	 * See if there are any entries with the same hash value
 337	 * and space in their block for the new entry.
 338	 * This is good because it puts multiple same-hash value entries
 339	 * in a data block, improving the lookup of those entries.
 340	 */
 341	for (use_block = -1, lep = &leaf->ents[index];
 342	     index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
 343	     index++, lep++) {
 344		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
 345			continue;
 346		i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 347		ASSERT(i < be32_to_cpu(ltp->bestcount));
 348		ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
 349		if (be16_to_cpu(bestsp[i]) >= length) {
 350			use_block = i;
 351			break;
 352		}
 353	}
 354	/*
 355	 * Didn't find a block yet, linear search all the data blocks.
 356	 */
 357	if (use_block == -1) {
 358		for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) {
 359			/*
 360			 * Remember a block we see that's missing.
 361			 */
 362			if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
 363			    use_block == -1)
 364				use_block = i;
 365			else if (be16_to_cpu(bestsp[i]) >= length) {
 366				use_block = i;
 367				break;
 368			}
 369		}
 370	}
 371	/*
 372	 * How many bytes do we need in the leaf block?
 373	 */
 374	needbytes = 0;
 375	if (!leaf->hdr.stale)
 376		needbytes += sizeof(xfs_dir2_leaf_entry_t);
 377	if (use_block == -1)
 378		needbytes += sizeof(xfs_dir2_data_off_t);
 379
 380	/*
 381	 * Now kill use_block if it refers to a missing block, so we
 382	 * can use it as an indication of allocation needed.
 383	 */
 384	if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
 385		use_block = -1;
 386	/*
 387	 * If we don't have enough free bytes but we can make enough
 388	 * by compacting out stale entries, we'll do that.
 389	 */
 390	if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
 391				needbytes && be16_to_cpu(leaf->hdr.stale) > 1) {
 392		compact = 1;
 393	}
 394	/*
 395	 * Otherwise if we don't have enough free bytes we need to
 396	 * convert to node form.
 397	 */
 398	else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(
 399						leaf->hdr.count)] < needbytes) {
 400		/*
 401		 * Just checking or no space reservation, give up.
 402		 */
 403		if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
 404							args->total == 0) {
 405			xfs_da_brelse(tp, lbp);
 406			return XFS_ERROR(ENOSPC);
 407		}
 408		/*
 409		 * Convert to node form.
 410		 */
 411		error = xfs_dir2_leaf_to_node(args, lbp);
 412		xfs_da_buf_done(lbp);
 413		if (error)
 414			return error;
 415		/*
 416		 * Then add the new entry.
 417		 */
 418		return xfs_dir2_node_addname(args);
 419	}
 420	/*
 421	 * Otherwise it will fit without compaction.
 422	 */
 423	else
 424		compact = 0;
 425	/*
 426	 * If just checking, then it will fit unless we needed to allocate
 427	 * a new data block.
 428	 */
 429	if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
 430		xfs_da_brelse(tp, lbp);
 431		return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
 432	}
 433	/*
 434	 * If no allocations are allowed, return now before we've
 435	 * changed anything.
 436	 */
 437	if (args->total == 0 && use_block == -1) {
 438		xfs_da_brelse(tp, lbp);
 439		return XFS_ERROR(ENOSPC);
 440	}
 441	/*
 442	 * Need to compact the leaf entries, removing stale ones.
 443	 * Leave one stale entry behind - the one closest to our
 444	 * insertion index - and we'll shift that one to our insertion
 445	 * point later.
 446	 */
 447	if (compact) {
 448		xfs_dir2_leaf_compact_x1(lbp, &index, &lowstale, &highstale,
 449			&lfloglow, &lfloghigh);
 450	}
 451	/*
 452	 * There are stale entries, so we'll need log-low and log-high
 453	 * impossibly bad values later.
 454	 */
 455	else if (be16_to_cpu(leaf->hdr.stale)) {
 456		lfloglow = be16_to_cpu(leaf->hdr.count);
 457		lfloghigh = -1;
 458	}
 459	/*
 460	 * If there was no data block space found, we need to allocate
 461	 * a new one.
 462	 */
 463	if (use_block == -1) {
 464		/*
 465		 * Add the new data block.
 466		 */
 467		if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
 468				&use_block))) {
 469			xfs_da_brelse(tp, lbp);
 470			return error;
 471		}
 472		/*
 473		 * Initialize the block.
 474		 */
 475		if ((error = xfs_dir2_data_init(args, use_block, &dbp))) {
 476			xfs_da_brelse(tp, lbp);
 477			return error;
 478		}
 479		/*
 480		 * If we're adding a new data block on the end we need to
 481		 * extend the bests table.  Copy it up one entry.
 482		 */
 483		if (use_block >= be32_to_cpu(ltp->bestcount)) {
 484			bestsp--;
 485			memmove(&bestsp[0], &bestsp[1],
 486				be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
 487			be32_add_cpu(&ltp->bestcount, 1);
 488			xfs_dir2_leaf_log_tail(tp, lbp);
 489			xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 490		}
 491		/*
 492		 * If we're filling in a previously empty block just log it.
 493		 */
 494		else
 495			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
 496		hdr = dbp->data;
 497		bestsp[use_block] = hdr->bestfree[0].length;
 498		grown = 1;
 499	}
 500	/*
 501	 * Already had space in some data block.
 502	 * Just read that one in.
 503	 */
 504	else {
 505		if ((error =
 506		    xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block),
 507			    -1, &dbp, XFS_DATA_FORK))) {
 508			xfs_da_brelse(tp, lbp);
 509			return error;
 510		}
 511		hdr = dbp->data;
 512		grown = 0;
 513	}
 514	xfs_dir2_data_check(dp, dbp);
 515	/*
 516	 * Point to the biggest freespace in our data block.
 517	 */
 518	dup = (xfs_dir2_data_unused_t *)
 519	      ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
 520	ASSERT(be16_to_cpu(dup->length) >= length);
 521	needscan = needlog = 0;
 522	/*
 523	 * Mark the initial part of our freespace in use for the new entry.
 524	 */
 525	xfs_dir2_data_use_free(tp, dbp, dup,
 526		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
 527		&needlog, &needscan);
 528	/*
 529	 * Initialize our new entry (at last).
 530	 */
 531	dep = (xfs_dir2_data_entry_t *)dup;
 532	dep->inumber = cpu_to_be64(args->inumber);
 533	dep->namelen = args->namelen;
 534	memcpy(dep->name, args->name, dep->namelen);
 535	tagp = xfs_dir2_data_entry_tag_p(dep);
 536	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
 537	/*
 538	 * Need to scan fix up the bestfree table.
 539	 */
 540	if (needscan)
 541		xfs_dir2_data_freescan(mp, hdr, &needlog);
 542	/*
 543	 * Need to log the data block's header.
 544	 */
 545	if (needlog)
 546		xfs_dir2_data_log_header(tp, dbp);
 547	xfs_dir2_data_log_entry(tp, dbp, dep);
 548	/*
 549	 * If the bests table needs to be changed, do it.
 550	 * Log the change unless we've already done that.
 551	 */
 552	if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) {
 553		bestsp[use_block] = hdr->bestfree[0].length;
 554		if (!grown)
 555			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
 556	}
 557
 558	lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
 559				       highstale, &lfloglow, &lfloghigh);
 560
 561	/*
 562	 * Fill in the new leaf entry.
 563	 */
 564	lep->hashval = cpu_to_be32(args->hashval);
 565	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block,
 566				be16_to_cpu(*tagp)));
 567	/*
 568	 * Log the leaf fields and give up the buffers.
 569	 */
 570	xfs_dir2_leaf_log_header(tp, lbp);
 571	xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
 572	xfs_dir2_leaf_check(dp, lbp);
 573	xfs_da_buf_done(lbp);
 574	xfs_dir2_data_check(dp, dbp);
 575	xfs_da_buf_done(dbp);
 576	return 0;
 577}
 578
 579#ifdef DEBUG
 580/*
 581 * Check the internal consistency of a leaf1 block.
 582 * Pop an assert if something is wrong.
 583 */
 584STATIC void
 585xfs_dir2_leaf_check(
 586	xfs_inode_t		*dp,		/* incore directory inode */
 587	xfs_dabuf_t		*bp)		/* leaf's buffer */
 588{
 589	int			i;		/* leaf index */
 590	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
 591	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail pointer */
 592	xfs_mount_t		*mp;		/* filesystem mount point */
 593	int			stale;		/* count of stale leaves */
 594
 595	leaf = bp->data;
 596	mp = dp->i_mount;
 597	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
 598	/*
 599	 * This value is not restrictive enough.
 600	 * Should factor in the size of the bests table as well.
 601	 * We can deduce a value for that from di_size.
 602	 */
 603	ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
 604	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 605	/*
 606	 * Leaves and bests don't overlap.
 607	 */
 608	ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
 609	       (char *)xfs_dir2_leaf_bests_p(ltp));
 610	/*
 611	 * Check hash value order, count stale entries.
 612	 */
 613	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
 614		if (i + 1 < be16_to_cpu(leaf->hdr.count))
 615			ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
 616			       be32_to_cpu(leaf->ents[i + 1].hashval));
 617		if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
 618			stale++;
 619	}
 620	ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
 621}
 622#endif	/* DEBUG */
 623
 624/*
 625 * Compact out any stale entries in the leaf.
 626 * Log the header and changed leaf entries, if any.
 627 */
 628void
 629xfs_dir2_leaf_compact(
 630	xfs_da_args_t	*args,		/* operation arguments */
 631	xfs_dabuf_t	*bp)		/* leaf buffer */
 632{
 633	int		from;		/* source leaf index */
 634	xfs_dir2_leaf_t	*leaf;		/* leaf structure */
 635	int		loglow;		/* first leaf entry to log */
 636	int		to;		/* target leaf index */
 637
 638	leaf = bp->data;
 639	if (!leaf->hdr.stale) {
 640		return;
 641	}
 642	/*
 643	 * Compress out the stale entries in place.
 644	 */
 645	for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
 646		if (leaf->ents[from].address ==
 647		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
 648			continue;
 649		/*
 650		 * Only actually copy the entries that are different.
 651		 */
 652		if (from > to) {
 653			if (loglow == -1)
 654				loglow = to;
 655			leaf->ents[to] = leaf->ents[from];
 656		}
 657		to++;
 658	}
 659	/*
 660	 * Update and log the header, log the leaf entries.
 661	 */
 662	ASSERT(be16_to_cpu(leaf->hdr.stale) == from - to);
 663	be16_add_cpu(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale)));
 664	leaf->hdr.stale = 0;
 665	xfs_dir2_leaf_log_header(args->trans, bp);
 666	if (loglow != -1)
 667		xfs_dir2_leaf_log_ents(args->trans, bp, loglow, to - 1);
 668}
 669
 670/*
 671 * Compact the leaf entries, removing stale ones.
 672 * Leave one stale entry behind - the one closest to our
 673 * insertion index - and the caller will shift that one to our insertion
 674 * point later.
 675 * Return new insertion index, where the remaining stale entry is,
 676 * and leaf logging indices.
 677 */
 678void
 679xfs_dir2_leaf_compact_x1(
 680	xfs_dabuf_t	*bp,		/* leaf buffer */
 681	int		*indexp,	/* insertion index */
 682	int		*lowstalep,	/* out: stale entry before us */
 683	int		*highstalep,	/* out: stale entry after us */
 684	int		*lowlogp,	/* out: low log index */
 685	int		*highlogp)	/* out: high log index */
 686{
 687	int		from;		/* source copy index */
 688	int		highstale;	/* stale entry at/after index */
 689	int		index;		/* insertion index */
 690	int		keepstale;	/* source index of kept stale */
 691	xfs_dir2_leaf_t	*leaf;		/* leaf structure */
 692	int		lowstale;	/* stale entry before index */
 693	int		newindex=0;	/* new insertion index */
 694	int		to;		/* destination copy index */
 695
 696	leaf = bp->data;
 697	ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
 698	index = *indexp;
 699
 700	xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
 701
 702	/*
 703	 * Pick the better of lowstale and highstale.
 704	 */
 705	if (lowstale >= 0 &&
 706	    (highstale == be16_to_cpu(leaf->hdr.count) ||
 707	     index - lowstale <= highstale - index))
 708		keepstale = lowstale;
 709	else
 710		keepstale = highstale;
 711	/*
 712	 * Copy the entries in place, removing all the stale entries
 713	 * except keepstale.
 714	 */
 715	for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
 716		/*
 717		 * Notice the new value of index.
 718		 */
 719		if (index == from)
 720			newindex = to;
 721		if (from != keepstale &&
 722		    leaf->ents[from].address ==
 723		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
 724			if (from == to)
 725				*lowlogp = to;
 726			continue;
 727		}
 728		/*
 729		 * Record the new keepstale value for the insertion.
 730		 */
 731		if (from == keepstale)
 732			lowstale = highstale = to;
 733		/*
 734		 * Copy only the entries that have moved.
 735		 */
 736		if (from > to)
 737			leaf->ents[to] = leaf->ents[from];
 738		to++;
 739	}
 740	ASSERT(from > to);
 741	/*
 742	 * If the insertion point was past the last entry,
 743	 * set the new insertion point accordingly.
 744	 */
 745	if (index == from)
 746		newindex = to;
 747	*indexp = newindex;
 748	/*
 749	 * Adjust the leaf header values.
 750	 */
 751	be16_add_cpu(&leaf->hdr.count, -(from - to));
 752	leaf->hdr.stale = cpu_to_be16(1);
 753	/*
 754	 * Remember the low/high stale value only in the "right"
 755	 * direction.
 756	 */
 757	if (lowstale >= newindex)
 758		lowstale = -1;
 759	else
 760		highstale = be16_to_cpu(leaf->hdr.count);
 761	*highlogp = be16_to_cpu(leaf->hdr.count) - 1;
 762	*lowstalep = lowstale;
 763	*highstalep = highstale;
 764}
 765
 766/*
 767 * Getdents (readdir) for leaf and node directories.
 768 * This reads the data blocks only, so is the same for both forms.
 769 */
 770int						/* error */
 771xfs_dir2_leaf_getdents(
 772	xfs_inode_t		*dp,		/* incore directory inode */
 773	void			*dirent,
 774	size_t			bufsize,
 775	xfs_off_t		*offset,
 776	filldir_t		filldir)
 777{
 778	xfs_dabuf_t		*bp;		/* data block buffer */
 779	int			byteoff;	/* offset in current block */
 780	xfs_dir2_db_t		curdb;		/* db for current block */
 781	xfs_dir2_off_t		curoff;		/* current overall offset */
 782	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
 783	xfs_dir2_data_entry_t	*dep;		/* data entry */
 784	xfs_dir2_data_unused_t	*dup;		/* unused entry */
 785	int			error = 0;	/* error return value */
 786	int			i;		/* temporary loop index */
 787	int			j;		/* temporary loop index */
 788	int			length;		/* temporary length value */
 789	xfs_bmbt_irec_t		*map;		/* map vector for blocks */
 790	xfs_extlen_t		map_blocks;	/* number of fsbs in map */
 791	xfs_dablk_t		map_off;	/* last mapped file offset */
 792	int			map_size;	/* total entries in *map */
 793	int			map_valid;	/* valid entries in *map */
 794	xfs_mount_t		*mp;		/* filesystem mount point */
 795	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
 796	int			nmap;		/* mappings to ask xfs_bmapi */
 797	char			*ptr = NULL;	/* pointer to current data */
 798	int			ra_current;	/* number of read-ahead blks */
 799	int			ra_index;	/* *map index for read-ahead */
 800	int			ra_offset;	/* map entry offset for ra */
 801	int			ra_want;	/* readahead count wanted */
 802
 803	/*
 804	 * If the offset is at or past the largest allowed value,
 805	 * give up right away.
 806	 */
 807	if (*offset >= XFS_DIR2_MAX_DATAPTR)
 808		return 0;
 809
 810	mp = dp->i_mount;
 811
 812	/*
 813	 * Set up to bmap a number of blocks based on the caller's
 814	 * buffer size, the directory block size, and the filesystem
 815	 * block size.
 816	 */
 817	map_size = howmany(bufsize + mp->m_dirblksize, mp->m_sb.sb_blocksize);
 818	map = kmem_alloc(map_size * sizeof(*map), KM_SLEEP);
 819	map_valid = ra_index = ra_offset = ra_current = map_blocks = 0;
 820	bp = NULL;
 821
 822	/*
 823	 * Inside the loop we keep the main offset value as a byte offset
 824	 * in the directory file.
 825	 */
 826	curoff = xfs_dir2_dataptr_to_byte(mp, *offset);
 827
 828	/*
 829	 * Force this conversion through db so we truncate the offset
 830	 * down to get the start of the data block.
 831	 */
 832	map_off = xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, curoff));
 833	/*
 834	 * Loop over directory entries until we reach the end offset.
 835	 * Get more blocks and readahead as necessary.
 836	 */
 837	while (curoff < XFS_DIR2_LEAF_OFFSET) {
 838		/*
 839		 * If we have no buffer, or we're off the end of the
 840		 * current buffer, need to get another one.
 841		 */
 842		if (!bp || ptr >= (char *)bp->data + mp->m_dirblksize) {
 843			/*
 844			 * If we have a buffer, we need to release it and
 845			 * take it out of the mapping.
 846			 */
 847			if (bp) {
 848				xfs_da_brelse(NULL, bp);
 849				bp = NULL;
 850				map_blocks -= mp->m_dirblkfsbs;
 851				/*
 852				 * Loop to get rid of the extents for the
 853				 * directory block.
 854				 */
 855				for (i = mp->m_dirblkfsbs; i > 0; ) {
 856					j = MIN((int)map->br_blockcount, i);
 857					map->br_blockcount -= j;
 858					map->br_startblock += j;
 859					map->br_startoff += j;
 860					/*
 861					 * If mapping is done, pitch it from
 862					 * the table.
 863					 */
 864					if (!map->br_blockcount && --map_valid)
 865						memmove(&map[0], &map[1],
 866							sizeof(map[0]) *
 867							map_valid);
 868					i -= j;
 869				}
 870			}
 871			/*
 872			 * Recalculate the readahead blocks wanted.
 873			 */
 874			ra_want = howmany(bufsize + mp->m_dirblksize,
 875					  mp->m_sb.sb_blocksize) - 1;
 876			ASSERT(ra_want >= 0);
 877
 878			/*
 879			 * If we don't have as many as we want, and we haven't
 880			 * run out of data blocks, get some more mappings.
 881			 */
 882			if (1 + ra_want > map_blocks &&
 883			    map_off <
 884			    xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
 885				/*
 886				 * Get more bmaps, fill in after the ones
 887				 * we already have in the table.
 888				 */
 889				nmap = map_size - map_valid;
 890				error = xfs_bmapi_read(dp, map_off,
 891					xfs_dir2_byte_to_da(mp,
 892						XFS_DIR2_LEAF_OFFSET) - map_off,
 893					&map[map_valid], &nmap, 0);
 894				/*
 895				 * Don't know if we should ignore this or
 896				 * try to return an error.
 897				 * The trouble with returning errors
 898				 * is that readdir will just stop without
 899				 * actually passing the error through.
 900				 */
 901				if (error)
 902					break;	/* XXX */
 903				/*
 904				 * If we got all the mappings we asked for,
 905				 * set the final map offset based on the
 906				 * last bmap value received.
 907				 * Otherwise, we've reached the end.
 908				 */
 909				if (nmap == map_size - map_valid)
 910					map_off =
 911					map[map_valid + nmap - 1].br_startoff +
 912					map[map_valid + nmap - 1].br_blockcount;
 913				else
 914					map_off =
 915						xfs_dir2_byte_to_da(mp,
 916							XFS_DIR2_LEAF_OFFSET);
 917				/*
 918				 * Look for holes in the mapping, and
 919				 * eliminate them.  Count up the valid blocks.
 920				 */
 921				for (i = map_valid; i < map_valid + nmap; ) {
 922					if (map[i].br_startblock ==
 923					    HOLESTARTBLOCK) {
 924						nmap--;
 925						length = map_valid + nmap - i;
 926						if (length)
 927							memmove(&map[i],
 928								&map[i + 1],
 929								sizeof(map[i]) *
 930								length);
 931					} else {
 932						map_blocks +=
 933							map[i].br_blockcount;
 934						i++;
 935					}
 936				}
 937				map_valid += nmap;
 938			}
 939			/*
 940			 * No valid mappings, so no more data blocks.
 941			 */
 942			if (!map_valid) {
 943				curoff = xfs_dir2_da_to_byte(mp, map_off);
 944				break;
 945			}
 946			/*
 947			 * Read the directory block starting at the first
 948			 * mapping.
 949			 */
 950			curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
 951			error = xfs_da_read_buf(NULL, dp, map->br_startoff,
 952				map->br_blockcount >= mp->m_dirblkfsbs ?
 953				    XFS_FSB_TO_DADDR(mp, map->br_startblock) :
 954				    -1,
 955				&bp, XFS_DATA_FORK);
 956			/*
 957			 * Should just skip over the data block instead
 958			 * of giving up.
 959			 */
 960			if (error)
 961				break;	/* XXX */
 962			/*
 963			 * Adjust the current amount of read-ahead: we just
 964			 * read a block that was previously ra.
 965			 */
 966			if (ra_current)
 967				ra_current -= mp->m_dirblkfsbs;
 968			/*
 969			 * Do we need more readahead?
 970			 */
 971			for (ra_index = ra_offset = i = 0;
 972			     ra_want > ra_current && i < map_blocks;
 973			     i += mp->m_dirblkfsbs) {
 974				ASSERT(ra_index < map_valid);
 975				/*
 976				 * Read-ahead a contiguous directory block.
 977				 */
 978				if (i > ra_current &&
 979				    map[ra_index].br_blockcount >=
 980				    mp->m_dirblkfsbs) {
 981					xfs_buf_readahead(mp->m_ddev_targp,
 982						XFS_FSB_TO_DADDR(mp,
 983						   map[ra_index].br_startblock +
 984						   ra_offset),
 985						(int)BTOBB(mp->m_dirblksize));
 986					ra_current = i;
 987				}
 988				/*
 989				 * Read-ahead a non-contiguous directory block.
 990				 * This doesn't use our mapping, but this
 991				 * is a very rare case.
 992				 */
 993				else if (i > ra_current) {
 994					(void)xfs_da_reada_buf(NULL, dp,
 995						map[ra_index].br_startoff +
 996						ra_offset, XFS_DATA_FORK);
 997					ra_current = i;
 998				}
 999				/*
1000				 * Advance offset through the mapping table.
1001				 */
1002				for (j = 0; j < mp->m_dirblkfsbs; j++) {
1003					/*
1004					 * The rest of this extent but not
1005					 * more than a dir block.
1006					 */
1007					length = MIN(mp->m_dirblkfsbs,
1008						(int)(map[ra_index].br_blockcount -
1009						ra_offset));
1010					j += length;
1011					ra_offset += length;
1012					/*
1013					 * Advance to the next mapping if
1014					 * this one is used up.
1015					 */
1016					if (ra_offset ==
1017					    map[ra_index].br_blockcount) {
1018						ra_offset = 0;
1019						ra_index++;
1020					}
1021				}
1022			}
1023			/*
1024			 * Having done a read, we need to set a new offset.
1025			 */
1026			newoff = xfs_dir2_db_off_to_byte(mp, curdb, 0);
1027			/*
1028			 * Start of the current block.
1029			 */
1030			if (curoff < newoff)
1031				curoff = newoff;
1032			/*
1033			 * Make sure we're in the right block.
1034			 */
1035			else if (curoff > newoff)
1036				ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
1037				       curdb);
1038			hdr = bp->data;
1039			xfs_dir2_data_check(dp, bp);
1040			/*
1041			 * Find our position in the block.
1042			 */
1043			ptr = (char *)(hdr + 1);
1044			byteoff = xfs_dir2_byte_to_off(mp, curoff);
1045			/*
1046			 * Skip past the header.
1047			 */
1048			if (byteoff == 0)
1049				curoff += (uint)sizeof(*hdr);
1050			/*
1051			 * Skip past entries until we reach our offset.
1052			 */
1053			else {
1054				while ((char *)ptr - (char *)hdr < byteoff) {
1055					dup = (xfs_dir2_data_unused_t *)ptr;
1056
1057					if (be16_to_cpu(dup->freetag)
1058						  == XFS_DIR2_DATA_FREE_TAG) {
1059
1060						length = be16_to_cpu(dup->length);
1061						ptr += length;
1062						continue;
1063					}
1064					dep = (xfs_dir2_data_entry_t *)ptr;
1065					length =
1066					   xfs_dir2_data_entsize(dep->namelen);
1067					ptr += length;
1068				}
1069				/*
1070				 * Now set our real offset.
1071				 */
1072				curoff =
1073					xfs_dir2_db_off_to_byte(mp,
1074					    xfs_dir2_byte_to_db(mp, curoff),
1075					    (char *)ptr - (char *)hdr);
1076				if (ptr >= (char *)hdr + mp->m_dirblksize) {
1077					continue;
1078				}
1079			}
1080		}
1081		/*
1082		 * We have a pointer to an entry.
1083		 * Is it a live one?
1084		 */
1085		dup = (xfs_dir2_data_unused_t *)ptr;
1086		/*
1087		 * No, it's unused, skip over it.
1088		 */
1089		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
1090			length = be16_to_cpu(dup->length);
1091			ptr += length;
1092			curoff += length;
1093			continue;
1094		}
1095
1096		dep = (xfs_dir2_data_entry_t *)ptr;
1097		length = xfs_dir2_data_entsize(dep->namelen);
1098
1099		if (filldir(dirent, (char *)dep->name, dep->namelen,
1100			    xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff,
1101			    be64_to_cpu(dep->inumber), DT_UNKNOWN))
1102			break;
1103
1104		/*
1105		 * Advance to next entry in the block.
1106		 */
1107		ptr += length;
1108		curoff += length;
1109		/* bufsize may have just been a guess; don't go negative */
1110		bufsize = bufsize > length ? bufsize - length : 0;
1111	}
1112
1113	/*
1114	 * All done.  Set output offset value to current offset.
1115	 */
1116	if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
1117		*offset = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
1118	else
1119		*offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
1120	kmem_free(map);
1121	if (bp)
1122		xfs_da_brelse(NULL, bp);
1123	return error;
1124}
1125
1126/*
1127 * Initialize a new leaf block, leaf1 or leafn magic accepted.
1128 */
1129int
1130xfs_dir2_leaf_init(
1131	xfs_da_args_t		*args,		/* operation arguments */
1132	xfs_dir2_db_t		bno,		/* directory block number */
1133	xfs_dabuf_t		**bpp,		/* out: leaf buffer */
1134	int			magic)		/* magic number for block */
1135{
1136	xfs_dabuf_t		*bp;		/* leaf buffer */
1137	xfs_inode_t		*dp;		/* incore directory inode */
1138	int			error;		/* error return code */
1139	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1140	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1141	xfs_mount_t		*mp;		/* filesystem mount point */
1142	xfs_trans_t		*tp;		/* transaction pointer */
1143
1144	dp = args->dp;
1145	ASSERT(dp != NULL);
1146	tp = args->trans;
1147	mp = dp->i_mount;
1148	ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
1149	       bno < XFS_DIR2_FREE_FIRSTDB(mp));
1150	/*
1151	 * Get the buffer for the block.
1152	 */
1153	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
1154		XFS_DATA_FORK);
1155	if (error) {
1156		return error;
1157	}
1158	ASSERT(bp != NULL);
1159	leaf = bp->data;
1160	/*
1161	 * Initialize the header.
1162	 */
1163	leaf->hdr.info.magic = cpu_to_be16(magic);
1164	leaf->hdr.info.forw = 0;
1165	leaf->hdr.info.back = 0;
1166	leaf->hdr.count = 0;
1167	leaf->hdr.stale = 0;
1168	xfs_dir2_leaf_log_header(tp, bp);
1169	/*
1170	 * If it's a leaf-format directory initialize the tail.
1171	 * In this case our caller has the real bests table to copy into
1172	 * the block.
1173	 */
1174	if (magic == XFS_DIR2_LEAF1_MAGIC) {
1175		ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1176		ltp->bestcount = 0;
1177		xfs_dir2_leaf_log_tail(tp, bp);
1178	}
1179	*bpp = bp;
1180	return 0;
1181}
1182
1183/*
1184 * Log the bests entries indicated from a leaf1 block.
1185 */
1186static void
1187xfs_dir2_leaf_log_bests(
1188	xfs_trans_t		*tp,		/* transaction pointer */
1189	xfs_dabuf_t		*bp,		/* leaf buffer */
1190	int			first,		/* first entry to log */
1191	int			last)		/* last entry to log */
1192{
1193	__be16			*firstb;	/* pointer to first entry */
1194	__be16			*lastb;		/* pointer to last entry */
1195	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1196	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1197
1198	leaf = bp->data;
1199	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
1200	ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
1201	firstb = xfs_dir2_leaf_bests_p(ltp) + first;
1202	lastb = xfs_dir2_leaf_bests_p(ltp) + last;
1203	xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
1204		(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
1205}
1206
1207/*
1208 * Log the leaf entries indicated from a leaf1 or leafn block.
1209 */
1210void
1211xfs_dir2_leaf_log_ents(
1212	xfs_trans_t		*tp,		/* transaction pointer */
1213	xfs_dabuf_t		*bp,		/* leaf buffer */
1214	int			first,		/* first entry to log */
1215	int			last)		/* last entry to log */
1216{
1217	xfs_dir2_leaf_entry_t	*firstlep;	/* pointer to first entry */
1218	xfs_dir2_leaf_entry_t	*lastlep;	/* pointer to last entry */
1219	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1220
1221	leaf = bp->data;
1222	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1223	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1224	firstlep = &leaf->ents[first];
1225	lastlep = &leaf->ents[last];
1226	xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
1227		(uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
1228}
1229
1230/*
1231 * Log the header of the leaf1 or leafn block.
1232 */
1233void
1234xfs_dir2_leaf_log_header(
1235	xfs_trans_t		*tp,		/* transaction pointer */
1236	xfs_dabuf_t		*bp)		/* leaf buffer */
1237{
1238	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1239
1240	leaf = bp->data;
1241	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1242	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1243	xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
1244		(uint)(sizeof(leaf->hdr) - 1));
1245}
1246
1247/*
1248 * Log the tail of the leaf1 block.
1249 */
1250STATIC void
1251xfs_dir2_leaf_log_tail(
1252	xfs_trans_t		*tp,		/* transaction pointer */
1253	xfs_dabuf_t		*bp)		/* leaf buffer */
1254{
1255	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1256	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1257	xfs_mount_t		*mp;		/* filesystem mount point */
1258
1259	mp = tp->t_mountp;
1260	leaf = bp->data;
1261	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
1262	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1263	xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
1264		(uint)(mp->m_dirblksize - 1));
1265}
1266
1267/*
1268 * Look up the entry referred to by args in the leaf format directory.
1269 * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which
1270 * is also used by the node-format code.
1271 */
1272int
1273xfs_dir2_leaf_lookup(
1274	xfs_da_args_t		*args)		/* operation arguments */
1275{
1276	xfs_dabuf_t		*dbp;		/* data block buffer */
1277	xfs_dir2_data_entry_t	*dep;		/* data block entry */
1278	xfs_inode_t		*dp;		/* incore directory inode */
1279	int			error;		/* error return code */
1280	int			index;		/* found entry index */
1281	xfs_dabuf_t		*lbp;		/* leaf buffer */
1282	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1283	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1284	xfs_trans_t		*tp;		/* transaction pointer */
1285
1286	trace_xfs_dir2_leaf_lookup(args);
1287
1288	/*
1289	 * Look up name in the leaf block, returning both buffers and index.
1290	 */
1291	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1292		return error;
1293	}
1294	tp = args->trans;
1295	dp = args->dp;
1296	xfs_dir2_leaf_check(dp, lbp);
1297	leaf = lbp->data;
1298	/*
1299	 * Get to the leaf entry and contained data entry address.
1300	 */
1301	lep = &leaf->ents[index];
1302	/*
1303	 * Point to the data entry.
1304	 */
1305	dep = (xfs_dir2_data_entry_t *)
1306	      ((char *)dbp->data +
1307	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1308	/*
1309	 * Return the found inode number & CI name if appropriate
1310	 */
1311	args->inumber = be64_to_cpu(dep->inumber);
1312	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1313	xfs_da_brelse(tp, dbp);
1314	xfs_da_brelse(tp, lbp);
1315	return XFS_ERROR(error);
1316}
1317
1318/*
1319 * Look up name/hash in the leaf block.
1320 * Fill in indexp with the found index, and dbpp with the data buffer.
1321 * If not found dbpp will be NULL, and ENOENT comes back.
1322 * lbpp will always be filled in with the leaf buffer unless there's an error.
1323 */
1324static int					/* error */
1325xfs_dir2_leaf_lookup_int(
1326	xfs_da_args_t		*args,		/* operation arguments */
1327	xfs_dabuf_t		**lbpp,		/* out: leaf buffer */
1328	int			*indexp,	/* out: index in leaf block */
1329	xfs_dabuf_t		**dbpp)		/* out: data buffer */
1330{
1331	xfs_dir2_db_t		curdb = -1;	/* current data block number */
1332	xfs_dabuf_t		*dbp = NULL;	/* data buffer */
1333	xfs_dir2_data_entry_t	*dep;		/* data entry */
1334	xfs_inode_t		*dp;		/* incore directory inode */
1335	int			error;		/* error return code */
1336	int			index;		/* index in leaf block */
1337	xfs_dabuf_t		*lbp;		/* leaf buffer */
1338	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1339	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1340	xfs_mount_t		*mp;		/* filesystem mount point */
1341	xfs_dir2_db_t		newdb;		/* new data block number */
1342	xfs_trans_t		*tp;		/* transaction pointer */
1343	xfs_dir2_db_t		cidb = -1;	/* case match data block no. */
1344	enum xfs_dacmp		cmp;		/* name compare result */
1345
1346	dp = args->dp;
1347	tp = args->trans;
1348	mp = dp->i_mount;
1349	/*
1350	 * Read the leaf block into the buffer.
1351	 */
1352	error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
1353							XFS_DATA_FORK);
1354	if (error)
1355		return error;
1356	*lbpp = lbp;
1357	leaf = lbp->data;
1358	xfs_dir2_leaf_check(dp, lbp);
1359	/*
1360	 * Look for the first leaf entry with our hash value.
1361	 */
1362	index = xfs_dir2_leaf_search_hash(args, lbp);
1363	/*
1364	 * Loop over all the entries with the right hash value
1365	 * looking to match the name.
1366	 */
1367	for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
1368				be32_to_cpu(lep->hashval) == args->hashval;
1369				lep++, index++) {
1370		/*
1371		 * Skip over stale leaf entries.
1372		 */
1373		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
1374			continue;
1375		/*
1376		 * Get the new data block number.
1377		 */
1378		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1379		/*
1380		 * If it's not the same as the old data block number,
1381		 * need to pitch the old one and read the new one.
1382		 */
1383		if (newdb != curdb) {
1384			if (dbp)
1385				xfs_da_brelse(tp, dbp);
1386			error = xfs_da_read_buf(tp, dp,
1387						xfs_dir2_db_to_da(mp, newdb),
1388						-1, &dbp, XFS_DATA_FORK);
1389			if (error) {
1390				xfs_da_brelse(tp, lbp);
1391				return error;
1392			}
1393			xfs_dir2_data_check(dp, dbp);
1394			curdb = newdb;
1395		}
1396		/*
1397		 * Point to the data entry.
1398		 */
1399		dep = (xfs_dir2_data_entry_t *)((char *)dbp->data +
1400			xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1401		/*
1402		 * Compare name and if it's an exact match, return the index
1403		 * and buffer. If it's the first case-insensitive match, store
1404		 * the index and buffer and continue looking for an exact match.
1405		 */
1406		cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
1407		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
1408			args->cmpresult = cmp;
1409			*indexp = index;
1410			/* case exact match: return the current buffer. */
1411			if (cmp == XFS_CMP_EXACT) {
1412				*dbpp = dbp;
1413				return 0;
1414			}
1415			cidb = curdb;
1416		}
1417	}
1418	ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1419	/*
1420	 * Here, we can only be doing a lookup (not a rename or remove).
1421	 * If a case-insensitive match was found earlier, re-read the
1422	 * appropriate data block if required and return it.
1423	 */
1424	if (args->cmpresult == XFS_CMP_CASE) {
1425		ASSERT(cidb != -1);
1426		if (cidb != curdb) {
1427			xfs_da_brelse(tp, dbp);
1428			error = xfs_da_read_buf(tp, dp,
1429						xfs_dir2_db_to_da(mp, cidb),
1430						-1, &dbp, XFS_DATA_FORK);
1431			if (error) {
1432				xfs_da_brelse(tp, lbp);
1433				return error;
1434			}
1435		}
1436		*dbpp = dbp;
1437		return 0;
1438	}
1439	/*
1440	 * No match found, return ENOENT.
1441	 */
1442	ASSERT(cidb == -1);
1443	if (dbp)
1444		xfs_da_brelse(tp, dbp);
1445	xfs_da_brelse(tp, lbp);
1446	return XFS_ERROR(ENOENT);
1447}
1448
1449/*
1450 * Remove an entry from a leaf format directory.
1451 */
1452int						/* error */
1453xfs_dir2_leaf_removename(
1454	xfs_da_args_t		*args)		/* operation arguments */
1455{
1456	__be16			*bestsp;	/* leaf block best freespace */
1457	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
1458	xfs_dir2_db_t		db;		/* data block number */
1459	xfs_dabuf_t		*dbp;		/* data block buffer */
1460	xfs_dir2_data_entry_t	*dep;		/* data entry structure */
1461	xfs_inode_t		*dp;		/* incore directory inode */
1462	int			error;		/* error return code */
1463	xfs_dir2_db_t		i;		/* temporary data block # */
1464	int			index;		/* index into leaf entries */
1465	xfs_dabuf_t		*lbp;		/* leaf buffer */
1466	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1467	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1468	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1469	xfs_mount_t		*mp;		/* filesystem mount point */
1470	int			needlog;	/* need to log data header */
1471	int			needscan;	/* need to rescan data frees */
1472	xfs_dir2_data_off_t	oldbest;	/* old value of best free */
1473	xfs_trans_t		*tp;		/* transaction pointer */
1474
1475	trace_xfs_dir2_leaf_removename(args);
1476
1477	/*
1478	 * Lookup the leaf entry, get the leaf and data blocks read in.
1479	 */
1480	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1481		return error;
1482	}
1483	dp = args->dp;
1484	tp = args->trans;
1485	mp = dp->i_mount;
1486	leaf = lbp->data;
1487	hdr = dbp->data;
1488	xfs_dir2_data_check(dp, dbp);
1489	/*
1490	 * Point to the leaf entry, use that to point to the data entry.
1491	 */
1492	lep = &leaf->ents[index];
1493	db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1494	dep = (xfs_dir2_data_entry_t *)
1495	      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1496	needscan = needlog = 0;
1497	oldbest = be16_to_cpu(hdr->bestfree[0].length);
1498	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1499	bestsp = xfs_dir2_leaf_bests_p(ltp);
1500	ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
1501	/*
1502	 * Mark the former data entry unused.
1503	 */
1504	xfs_dir2_data_make_free(tp, dbp,
1505		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
1506		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
1507	/*
1508	 * We just mark the leaf entry stale by putting a null in it.
1509	 */
1510	be16_add_cpu(&leaf->hdr.stale, 1);
1511	xfs_dir2_leaf_log_header(tp, lbp);
1512	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
1513	xfs_dir2_leaf_log_ents(tp, lbp, index, index);
1514	/*
1515	 * Scan the freespace in the data block again if necessary,
1516	 * log the data block header if necessary.
1517	 */
1518	if (needscan)
1519		xfs_dir2_data_freescan(mp, hdr, &needlog);
1520	if (needlog)
1521		xfs_dir2_data_log_header(tp, dbp);
1522	/*
1523	 * If the longest freespace in the data block has changed,
1524	 * put the new value in the bests table and log that.
1525	 */
1526	if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) {
1527		bestsp[db] = hdr->bestfree[0].length;
1528		xfs_dir2_leaf_log_bests(tp, lbp, db, db);
1529	}
1530	xfs_dir2_data_check(dp, dbp);
1531	/*
1532	 * If the data block is now empty then get rid of the data block.
1533	 */
1534	if (be16_to_cpu(hdr->bestfree[0].length) ==
1535	    mp->m_dirblksize - (uint)sizeof(*hdr)) {
1536		ASSERT(db != mp->m_dirdatablk);
1537		if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1538			/*
1539			 * Nope, can't get rid of it because it caused
1540			 * allocation of a bmap btree block to do so.
1541			 * Just go on, returning success, leaving the
1542			 * empty block in place.
1543			 */
1544			if (error == ENOSPC && args->total == 0) {
1545				xfs_da_buf_done(dbp);
1546				error = 0;
1547			}
1548			xfs_dir2_leaf_check(dp, lbp);
1549			xfs_da_buf_done(lbp);
1550			return error;
1551		}
1552		dbp = NULL;
1553		/*
1554		 * If this is the last data block then compact the
1555		 * bests table by getting rid of entries.
1556		 */
1557		if (db == be32_to_cpu(ltp->bestcount) - 1) {
1558			/*
1559			 * Look for the last active entry (i).
1560			 */
1561			for (i = db - 1; i > 0; i--) {
1562				if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
1563					break;
1564			}
1565			/*
1566			 * Copy the table down so inactive entries at the
1567			 * end are removed.
1568			 */
1569			memmove(&bestsp[db - i], bestsp,
1570				(be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
1571			be32_add_cpu(&ltp->bestcount, -(db - i));
1572			xfs_dir2_leaf_log_tail(tp, lbp);
1573			xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1574		} else
1575			bestsp[db] = cpu_to_be16(NULLDATAOFF);
1576	}
1577	/*
1578	 * If the data block was not the first one, drop it.
1579	 */
1580	else if (db != mp->m_dirdatablk && dbp != NULL) {
1581		xfs_da_buf_done(dbp);
1582		dbp = NULL;
1583	}
1584	xfs_dir2_leaf_check(dp, lbp);
1585	/*
1586	 * See if we can convert to block form.
1587	 */
1588	return xfs_dir2_leaf_to_block(args, lbp, dbp);
1589}
1590
1591/*
1592 * Replace the inode number in a leaf format directory entry.
1593 */
1594int						/* error */
1595xfs_dir2_leaf_replace(
1596	xfs_da_args_t		*args)		/* operation arguments */
1597{
1598	xfs_dabuf_t		*dbp;		/* data block buffer */
1599	xfs_dir2_data_entry_t	*dep;		/* data block entry */
1600	xfs_inode_t		*dp;		/* incore directory inode */
1601	int			error;		/* error return code */
1602	int			index;		/* index of leaf entry */
1603	xfs_dabuf_t		*lbp;		/* leaf buffer */
1604	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1605	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1606	xfs_trans_t		*tp;		/* transaction pointer */
1607
1608	trace_xfs_dir2_leaf_replace(args);
1609
1610	/*
1611	 * Look up the entry.
1612	 */
1613	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1614		return error;
1615	}
1616	dp = args->dp;
1617	leaf = lbp->data;
1618	/*
1619	 * Point to the leaf entry, get data address from it.
1620	 */
1621	lep = &leaf->ents[index];
1622	/*
1623	 * Point to the data entry.
1624	 */
1625	dep = (xfs_dir2_data_entry_t *)
1626	      ((char *)dbp->data +
1627	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1628	ASSERT(args->inumber != be64_to_cpu(dep->inumber));
1629	/*
1630	 * Put the new inode number in, log it.
1631	 */
1632	dep->inumber = cpu_to_be64(args->inumber);
1633	tp = args->trans;
1634	xfs_dir2_data_log_entry(tp, dbp, dep);
1635	xfs_da_buf_done(dbp);
1636	xfs_dir2_leaf_check(dp, lbp);
1637	xfs_da_brelse(tp, lbp);
1638	return 0;
1639}
1640
1641/*
1642 * Return index in the leaf block (lbp) which is either the first
1643 * one with this hash value, or if there are none, the insert point
1644 * for that hash value.
1645 */
1646int						/* index value */
1647xfs_dir2_leaf_search_hash(
1648	xfs_da_args_t		*args,		/* operation arguments */
1649	xfs_dabuf_t		*lbp)		/* leaf buffer */
1650{
1651	xfs_dahash_t		hash=0;		/* hash from this entry */
1652	xfs_dahash_t		hashwant;	/* hash value looking for */
1653	int			high;		/* high leaf index */
1654	int			low;		/* low leaf index */
1655	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1656	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1657	int			mid=0;		/* current leaf index */
1658
1659	leaf = lbp->data;
1660#ifndef __KERNEL__
1661	if (!leaf->hdr.count)
1662		return 0;
1663#endif
1664	/*
1665	 * Note, the table cannot be empty, so we have to go through the loop.
1666	 * Binary search the leaf entries looking for our hash value.
1667	 */
1668	for (lep = leaf->ents, low = 0, high = be16_to_cpu(leaf->hdr.count) - 1,
1669		hashwant = args->hashval;
1670	     low <= high; ) {
1671		mid = (low + high) >> 1;
1672		if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant)
1673			break;
1674		if (hash < hashwant)
1675			low = mid + 1;
1676		else
1677			high = mid - 1;
1678	}
1679	/*
1680	 * Found one, back up through all the equal hash values.
1681	 */
1682	if (hash == hashwant) {
1683		while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) {
1684			mid--;
1685		}
1686	}
1687	/*
1688	 * Need to point to an entry higher than ours.
1689	 */
1690	else if (hash < hashwant)
1691		mid++;
1692	return mid;
1693}
1694
1695/*
1696 * Trim off a trailing data block.  We know it's empty since the leaf
1697 * freespace table says so.
1698 */
1699int						/* error */
1700xfs_dir2_leaf_trim_data(
1701	xfs_da_args_t		*args,		/* operation arguments */
1702	xfs_dabuf_t		*lbp,		/* leaf buffer */
1703	xfs_dir2_db_t		db)		/* data block number */
1704{
1705	__be16			*bestsp;	/* leaf bests table */
1706	xfs_dabuf_t		*dbp;		/* data block buffer */
1707	xfs_inode_t		*dp;		/* incore directory inode */
1708	int			error;		/* error return value */
1709	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1710	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1711	xfs_mount_t		*mp;		/* filesystem mount point */
1712	xfs_trans_t		*tp;		/* transaction pointer */
1713
1714	dp = args->dp;
1715	mp = dp->i_mount;
1716	tp = args->trans;
1717	/*
1718	 * Read the offending data block.  We need its buffer.
1719	 */
1720	if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp,
1721			XFS_DATA_FORK))) {
1722		return error;
1723	}
1724
1725	leaf = lbp->data;
1726	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1727
1728#ifdef DEBUG
1729{
1730	struct xfs_dir2_data_hdr *hdr = dbp->data;
1731
1732	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
1733	ASSERT(be16_to_cpu(hdr->bestfree[0].length) ==
1734	       mp->m_dirblksize - (uint)sizeof(*hdr));
1735	ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
1736}
1737#endif
1738
1739	/*
1740	 * Get rid of the data block.
1741	 */
1742	if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1743		ASSERT(error != ENOSPC);
1744		xfs_da_brelse(tp, dbp);
1745		return error;
1746	}
1747	/*
1748	 * Eliminate the last bests entry from the table.
1749	 */
1750	bestsp = xfs_dir2_leaf_bests_p(ltp);
1751	be32_add_cpu(&ltp->bestcount, -1);
1752	memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
1753	xfs_dir2_leaf_log_tail(tp, lbp);
1754	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1755	return 0;
1756}
1757
1758static inline size_t
1759xfs_dir2_leaf_size(
1760	struct xfs_dir2_leaf_hdr	*hdr,
1761	int				counts)
1762{
1763	int			entries;
1764
1765	entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale);
1766	return sizeof(xfs_dir2_leaf_hdr_t) +
1767	    entries * sizeof(xfs_dir2_leaf_entry_t) +
1768	    counts * sizeof(xfs_dir2_data_off_t) +
1769	    sizeof(xfs_dir2_leaf_tail_t);
1770}
1771
1772/*
1773 * Convert node form directory to leaf form directory.
1774 * The root of the node form dir needs to already be a LEAFN block.
1775 * Just return if we can't do anything.
1776 */
1777int						/* error */
1778xfs_dir2_node_to_leaf(
1779	xfs_da_state_t		*state)		/* directory operation state */
1780{
1781	xfs_da_args_t		*args;		/* operation arguments */
1782	xfs_inode_t		*dp;		/* incore directory inode */
1783	int			error;		/* error return code */
1784	xfs_dabuf_t		*fbp;		/* buffer for freespace block */
1785	xfs_fileoff_t		fo;		/* freespace file offset */
1786	xfs_dir2_free_t		*free;		/* freespace structure */
1787	xfs_dabuf_t		*lbp;		/* buffer for leaf block */
1788	xfs_dir2_leaf_tail_t	*ltp;		/* tail of leaf structure */
1789	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1790	xfs_mount_t		*mp;		/* filesystem mount point */
1791	int			rval;		/* successful free trim? */
1792	xfs_trans_t		*tp;		/* transaction pointer */
1793
1794	/*
1795	 * There's more than a leaf level in the btree, so there must
1796	 * be multiple leafn blocks.  Give up.
1797	 */
1798	if (state->path.active > 1)
1799		return 0;
1800	args = state->args;
1801
1802	trace_xfs_dir2_node_to_leaf(args);
1803
1804	mp = state->mp;
1805	dp = args->dp;
1806	tp = args->trans;
1807	/*
1808	 * Get the last offset in the file.
1809	 */
1810	if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) {
1811		return error;
1812	}
1813	fo -= mp->m_dirblkfsbs;
1814	/*
1815	 * If there are freespace blocks other than the first one,
1816	 * take this opportunity to remove trailing empty freespace blocks
1817	 * that may have been left behind during no-space-reservation
1818	 * operations.
1819	 */
1820	while (fo > mp->m_dirfreeblk) {
1821		if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {
1822			return error;
1823		}
1824		if (rval)
1825			fo -= mp->m_dirblkfsbs;
1826		else
1827			return 0;
1828	}
1829	/*
1830	 * Now find the block just before the freespace block.
1831	 */
1832	if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) {
1833		return error;
1834	}
1835	/*
1836	 * If it's not the single leaf block, give up.
1837	 */
1838	if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
1839		return 0;
1840	lbp = state->path.blk[0].bp;
1841	leaf = lbp->data;
1842	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1843	/*
1844	 * Read the freespace block.
1845	 */
1846	if ((error = xfs_da_read_buf(tp, dp, mp->m_dirfreeblk, -1, &fbp,
1847			XFS_DATA_FORK))) {
1848		return error;
1849	}
1850	free = fbp->data;
1851	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
1852	ASSERT(!free->hdr.firstdb);
1853
1854	/*
1855	 * Now see if the leafn and free data will fit in a leaf1.
1856	 * If not, release the buffer and give up.
1857	 */
1858	if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) >
1859			mp->m_dirblksize) {
1860		xfs_da_brelse(tp, fbp);
1861		return 0;
1862	}
1863
1864	/*
1865	 * If the leaf has any stale entries in it, compress them out.
1866	 * The compact routine will log the header.
1867	 */
1868	if (be16_to_cpu(leaf->hdr.stale))
1869		xfs_dir2_leaf_compact(args, lbp);
1870	else
1871		xfs_dir2_leaf_log_header(tp, lbp);
1872	leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC);
1873	/*
1874	 * Set up the leaf tail from the freespace block.
1875	 */
1876	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1877	ltp->bestcount = free->hdr.nvalid;
1878	/*
1879	 * Set up the leaf bests table.
1880	 */
1881	memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
1882		be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t));
1883	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1884	xfs_dir2_leaf_log_tail(tp, lbp);
1885	xfs_dir2_leaf_check(dp, lbp);
1886	/*
1887	 * Get rid of the freespace block.
1888	 */
1889	error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp);
1890	if (error) {
1891		/*
1892		 * This can't fail here because it can only happen when
1893		 * punching out the middle of an extent, and this is an
1894		 * isolated block.
1895		 */
1896		ASSERT(error != ENOSPC);
1897		return error;
1898	}
1899	fbp = NULL;
1900	/*
1901	 * Now see if we can convert the single-leaf directory
1902	 * down to a block form directory.
1903	 * This routine always kills the dabuf for the leaf, so
1904	 * eliminate it from the path.
1905	 */
1906	error = xfs_dir2_leaf_to_block(args, lbp, NULL);
1907	state->path.blk[0].bp = NULL;
1908	return error;
1909}