Linux Audio

Check our new training course

Loading...
v6.2
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * NILFS B-tree.
   4 *
   5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   6 *
   7 * Written by Koji Sato.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   8 */
   9
  10#include <linux/slab.h>
  11#include <linux/string.h>
  12#include <linux/errno.h>
  13#include <linux/pagevec.h>
  14#include "nilfs.h"
  15#include "page.h"
  16#include "btnode.h"
  17#include "btree.h"
  18#include "alloc.h"
  19#include "dat.h"
  20
  21static void __nilfs_btree_init(struct nilfs_bmap *bmap);
  22
  23static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
  24{
  25	struct nilfs_btree_path *path;
  26	int level = NILFS_BTREE_LEVEL_DATA;
  27
  28	path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
  29	if (path == NULL)
  30		goto out;
  31
  32	for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
  33		path[level].bp_bh = NULL;
  34		path[level].bp_sib_bh = NULL;
  35		path[level].bp_index = 0;
  36		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  37		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  38		path[level].bp_op = NULL;
  39	}
  40
  41out:
  42	return path;
  43}
  44
  45static void nilfs_btree_free_path(struct nilfs_btree_path *path)
  46{
  47	int level = NILFS_BTREE_LEVEL_DATA;
  48
  49	for (; level < NILFS_BTREE_LEVEL_MAX; level++)
  50		brelse(path[level].bp_bh);
  51
  52	kmem_cache_free(nilfs_btree_path_cache, path);
  53}
  54
  55/*
  56 * B-tree node operations
  57 */
  58static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
  59				     __u64 ptr, struct buffer_head **bhp)
  60{
  61	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
  62	struct address_space *btnc = btnc_inode->i_mapping;
  63	struct buffer_head *bh;
  64
  65	bh = nilfs_btnode_create_block(btnc, ptr);
  66	if (!bh)
  67		return -ENOMEM;
  68
  69	set_buffer_nilfs_volatile(bh);
  70	*bhp = bh;
  71	return 0;
  72}
  73
  74static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
  75{
  76	return node->bn_flags;
  77}
  78
  79static void
  80nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
  81{
  82	node->bn_flags = flags;
  83}
  84
  85static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
  86{
  87	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
  88}
  89
  90static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
  91{
  92	return node->bn_level;
  93}
  94
  95static void
  96nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
  97{
  98	node->bn_level = level;
  99}
 100
 101static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
 102{
 103	return le16_to_cpu(node->bn_nchildren);
 104}
 105
 106static void
 107nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
 108{
 109	node->bn_nchildren = cpu_to_le16(nchildren);
 110}
 111
 112static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
 113{
 114	return i_blocksize(btree->b_inode);
 115}
 116
 117static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
 118{
 119	return btree->b_nchildren_per_block;
 120}
 121
 122static __le64 *
 123nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
 124{
 125	return (__le64 *)((char *)(node + 1) +
 126			  (nilfs_btree_node_root(node) ?
 127			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
 128}
 129
 130static __le64 *
 131nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
 132{
 133	return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
 134}
 135
 136static __u64
 137nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
 138{
 139	return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
 140}
 141
 142static void
 143nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
 144{
 145	*(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
 146}
 147
 148static __u64
 149nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
 150			 int ncmax)
 151{
 152	return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
 153}
 154
 155static void
 156nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
 157			 int ncmax)
 158{
 159	*(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
 160}
 161
 162static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
 163				  int level, int nchildren, int ncmax,
 164				  const __u64 *keys, const __u64 *ptrs)
 165{
 166	__le64 *dkeys;
 167	__le64 *dptrs;
 168	int i;
 169
 170	nilfs_btree_node_set_flags(node, flags);
 171	nilfs_btree_node_set_level(node, level);
 172	nilfs_btree_node_set_nchildren(node, nchildren);
 173
 174	dkeys = nilfs_btree_node_dkeys(node);
 175	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 176	for (i = 0; i < nchildren; i++) {
 177		dkeys[i] = cpu_to_le64(keys[i]);
 178		dptrs[i] = cpu_to_le64(ptrs[i]);
 179	}
 180}
 181
 182/* Assume the buffer heads corresponding to left and right are locked. */
 183static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
 184				       struct nilfs_btree_node *right,
 185				       int n, int lncmax, int rncmax)
 186{
 187	__le64 *ldkeys, *rdkeys;
 188	__le64 *ldptrs, *rdptrs;
 189	int lnchildren, rnchildren;
 190
 191	ldkeys = nilfs_btree_node_dkeys(left);
 192	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 193	lnchildren = nilfs_btree_node_get_nchildren(left);
 194
 195	rdkeys = nilfs_btree_node_dkeys(right);
 196	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 197	rnchildren = nilfs_btree_node_get_nchildren(right);
 198
 199	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
 200	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
 201	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
 202	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
 203
 204	lnchildren += n;
 205	rnchildren -= n;
 206	nilfs_btree_node_set_nchildren(left, lnchildren);
 207	nilfs_btree_node_set_nchildren(right, rnchildren);
 208}
 209
 210/* Assume that the buffer heads corresponding to left and right are locked. */
 211static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
 212					struct nilfs_btree_node *right,
 213					int n, int lncmax, int rncmax)
 214{
 215	__le64 *ldkeys, *rdkeys;
 216	__le64 *ldptrs, *rdptrs;
 217	int lnchildren, rnchildren;
 218
 219	ldkeys = nilfs_btree_node_dkeys(left);
 220	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 221	lnchildren = nilfs_btree_node_get_nchildren(left);
 222
 223	rdkeys = nilfs_btree_node_dkeys(right);
 224	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 225	rnchildren = nilfs_btree_node_get_nchildren(right);
 226
 227	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
 228	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
 229	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
 230	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
 231
 232	lnchildren -= n;
 233	rnchildren += n;
 234	nilfs_btree_node_set_nchildren(left, lnchildren);
 235	nilfs_btree_node_set_nchildren(right, rnchildren);
 236}
 237
 238/* Assume that the buffer head corresponding to node is locked. */
 239static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
 240				    __u64 key, __u64 ptr, int ncmax)
 241{
 242	__le64 *dkeys;
 243	__le64 *dptrs;
 244	int nchildren;
 245
 246	dkeys = nilfs_btree_node_dkeys(node);
 247	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 248	nchildren = nilfs_btree_node_get_nchildren(node);
 249	if (index < nchildren) {
 250		memmove(dkeys + index + 1, dkeys + index,
 251			(nchildren - index) * sizeof(*dkeys));
 252		memmove(dptrs + index + 1, dptrs + index,
 253			(nchildren - index) * sizeof(*dptrs));
 254	}
 255	dkeys[index] = cpu_to_le64(key);
 256	dptrs[index] = cpu_to_le64(ptr);
 257	nchildren++;
 258	nilfs_btree_node_set_nchildren(node, nchildren);
 259}
 260
 261/* Assume that the buffer head corresponding to node is locked. */
 262static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
 263				    __u64 *keyp, __u64 *ptrp, int ncmax)
 264{
 265	__u64 key;
 266	__u64 ptr;
 267	__le64 *dkeys;
 268	__le64 *dptrs;
 269	int nchildren;
 270
 271	dkeys = nilfs_btree_node_dkeys(node);
 272	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 273	key = le64_to_cpu(dkeys[index]);
 274	ptr = le64_to_cpu(dptrs[index]);
 275	nchildren = nilfs_btree_node_get_nchildren(node);
 276	if (keyp != NULL)
 277		*keyp = key;
 278	if (ptrp != NULL)
 279		*ptrp = ptr;
 280
 281	if (index < nchildren - 1) {
 282		memmove(dkeys + index, dkeys + index + 1,
 283			(nchildren - index - 1) * sizeof(*dkeys));
 284		memmove(dptrs + index, dptrs + index + 1,
 285			(nchildren - index - 1) * sizeof(*dptrs));
 286	}
 287	nchildren--;
 288	nilfs_btree_node_set_nchildren(node, nchildren);
 289}
 290
 291static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
 292				   __u64 key, int *indexp)
 293{
 294	__u64 nkey;
 295	int index, low, high, s;
 296
 297	/* binary search */
 298	low = 0;
 299	high = nilfs_btree_node_get_nchildren(node) - 1;
 300	index = 0;
 301	s = 0;
 302	while (low <= high) {
 303		index = (low + high) / 2;
 304		nkey = nilfs_btree_node_get_key(node, index);
 305		if (nkey == key) {
 306			s = 0;
 307			goto out;
 308		} else if (nkey < key) {
 309			low = index + 1;
 310			s = -1;
 311		} else {
 312			high = index - 1;
 313			s = 1;
 314		}
 315	}
 316
 317	/* adjust index */
 318	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
 319		if (s > 0 && index > 0)
 320			index--;
 321	} else if (s < 0)
 322		index++;
 323
 324 out:
 325	*indexp = index;
 326
 327	return s == 0;
 328}
 329
 330/**
 331 * nilfs_btree_node_broken - verify consistency of btree node
 332 * @node: btree node block to be examined
 333 * @size: node size (in bytes)
 334 * @inode: host inode of btree
 335 * @blocknr: block number
 336 *
 337 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 338 */
 339static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
 340				   size_t size, struct inode *inode,
 341				   sector_t blocknr)
 342{
 343	int level, flags, nchildren;
 344	int ret = 0;
 345
 346	level = nilfs_btree_node_get_level(node);
 347	flags = nilfs_btree_node_get_flags(node);
 348	nchildren = nilfs_btree_node_get_nchildren(node);
 349
 350	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 351		     level >= NILFS_BTREE_LEVEL_MAX ||
 352		     (flags & NILFS_BTREE_NODE_ROOT) ||
 353		     nchildren < 0 ||
 354		     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
 355		nilfs_crit(inode->i_sb,
 356			   "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
 357			   inode->i_ino, (unsigned long long)blocknr, level,
 358			   flags, nchildren);
 359		ret = 1;
 360	}
 361	return ret;
 362}
 363
 364/**
 365 * nilfs_btree_root_broken - verify consistency of btree root node
 366 * @node: btree root node to be examined
 367 * @inode: host inode of btree
 368 *
 369 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 370 */
 371static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
 372				   struct inode *inode)
 373{
 374	int level, flags, nchildren;
 375	int ret = 0;
 376
 377	level = nilfs_btree_node_get_level(node);
 378	flags = nilfs_btree_node_get_flags(node);
 379	nchildren = nilfs_btree_node_get_nchildren(node);
 380
 381	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 382		     level >= NILFS_BTREE_LEVEL_MAX ||
 383		     nchildren < 0 ||
 384		     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
 385		nilfs_crit(inode->i_sb,
 386			   "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
 387			   inode->i_ino, level, flags, nchildren);
 388		ret = 1;
 389	}
 390	return ret;
 391}
 392
 393int nilfs_btree_broken_node_block(struct buffer_head *bh)
 394{
 395	struct inode *inode;
 396	int ret;
 397
 398	if (buffer_nilfs_checked(bh))
 399		return 0;
 400
 401	inode = bh->b_page->mapping->host;
 402	ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
 403				      bh->b_size, inode, bh->b_blocknr);
 404	if (likely(!ret))
 405		set_buffer_nilfs_checked(bh);
 406	return ret;
 407}
 408
 409static struct nilfs_btree_node *
 410nilfs_btree_get_root(const struct nilfs_bmap *btree)
 411{
 412	return (struct nilfs_btree_node *)btree->b_u.u_data;
 413}
 414
 415static struct nilfs_btree_node *
 416nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
 417{
 418	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
 419}
 420
 421static struct nilfs_btree_node *
 422nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
 423{
 424	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
 425}
 426
 427static int nilfs_btree_height(const struct nilfs_bmap *btree)
 428{
 429	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
 430}
 431
 432static struct nilfs_btree_node *
 433nilfs_btree_get_node(const struct nilfs_bmap *btree,
 434		     const struct nilfs_btree_path *path,
 435		     int level, int *ncmaxp)
 436{
 437	struct nilfs_btree_node *node;
 438
 439	if (level == nilfs_btree_height(btree) - 1) {
 440		node = nilfs_btree_get_root(btree);
 441		*ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
 442	} else {
 443		node = nilfs_btree_get_nonroot_node(path, level);
 444		*ncmaxp = nilfs_btree_nchildren_per_block(btree);
 445	}
 446	return node;
 447}
 448
 449static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
 450				struct nilfs_btree_node *node, int level)
 451{
 452	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
 453		dump_stack();
 454		nilfs_crit(btree->b_inode->i_sb,
 455			   "btree level mismatch (ino=%lu): %d != %d",
 456			   btree->b_inode->i_ino,
 457			   nilfs_btree_node_get_level(node), level);
 458		return 1;
 459	}
 460	return 0;
 461}
 462
 463struct nilfs_btree_readahead_info {
 464	struct nilfs_btree_node *node;	/* parent node */
 465	int max_ra_blocks;		/* max nof blocks to read ahead */
 466	int index;			/* current index on the parent node */
 467	int ncmax;			/* nof children in the parent node */
 468};
 469
 470static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 471				   struct buffer_head **bhp,
 472				   const struct nilfs_btree_readahead_info *ra)
 473{
 474	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
 475	struct address_space *btnc = btnc_inode->i_mapping;
 476	struct buffer_head *bh, *ra_bh;
 477	sector_t submit_ptr = 0;
 478	int ret;
 479
 480	ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh,
 481					&submit_ptr);
 482	if (ret) {
 483		if (likely(ret == -EEXIST))
 484			goto out_check;
 485		if (ret == -ENOENT) {
 486			/*
 487			 * Block address translation failed due to invalid
 488			 * value of 'ptr'.  In this case, return internal code
 489			 * -EINVAL (broken bmap) to notify bmap layer of fatal
 490			 * metadata corruption.
 491			 */
 492			ret = -EINVAL;
 493		}
 494		return ret;
 495	}
 496
 497	if (ra) {
 498		int i, n;
 499		__u64 ptr2;
 500
 501		/* read ahead sibling nodes */
 502		for (n = ra->max_ra_blocks, i = ra->index + 1;
 503		     n > 0 && i < ra->ncmax; n--, i++) {
 504			ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
 505
 506			ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
 507						REQ_OP_READ | REQ_RAHEAD,
 508						&ra_bh, &submit_ptr);
 509			if (likely(!ret || ret == -EEXIST))
 510				brelse(ra_bh);
 511			else if (ret != -EBUSY)
 512				break;
 513			if (!buffer_locked(bh))
 514				goto out_no_wait;
 515		}
 516	}
 517
 518	wait_on_buffer(bh);
 519
 520 out_no_wait:
 521	if (!buffer_uptodate(bh)) {
 522		nilfs_err(btree->b_inode->i_sb,
 523			  "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
 524			  btree->b_inode->i_ino, (unsigned long long)ptr);
 525		brelse(bh);
 526		return -EIO;
 527	}
 528
 529 out_check:
 530	if (nilfs_btree_broken_node_block(bh)) {
 531		clear_buffer_uptodate(bh);
 532		brelse(bh);
 533		return -EINVAL;
 534	}
 535
 536	*bhp = bh;
 537	return 0;
 538}
 539
 540static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 541				   struct buffer_head **bhp)
 542{
 543	return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
 544}
 545
 546static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
 547				 struct nilfs_btree_path *path,
 548				 __u64 key, __u64 *ptrp, int minlevel,
 549				 int readahead)
 550{
 551	struct nilfs_btree_node *node;
 552	struct nilfs_btree_readahead_info p, *ra;
 553	__u64 ptr;
 554	int level, index, found, ncmax, ret;
 555
 556	node = nilfs_btree_get_root(btree);
 557	level = nilfs_btree_node_get_level(node);
 558	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
 559		return -ENOENT;
 560
 561	found = nilfs_btree_node_lookup(node, key, &index);
 562	ptr = nilfs_btree_node_get_ptr(node, index,
 563				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 564	path[level].bp_bh = NULL;
 565	path[level].bp_index = index;
 566
 567	ncmax = nilfs_btree_nchildren_per_block(btree);
 568
 569	while (--level >= minlevel) {
 570		ra = NULL;
 571		if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
 572			p.node = nilfs_btree_get_node(btree, path, level + 1,
 573						      &p.ncmax);
 574			p.index = index;
 575			p.max_ra_blocks = 7;
 576			ra = &p;
 577		}
 578		ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
 579					      ra);
 580		if (ret < 0)
 581			return ret;
 582
 583		node = nilfs_btree_get_nonroot_node(path, level);
 584		if (nilfs_btree_bad_node(btree, node, level))
 585			return -EINVAL;
 586		if (!found)
 587			found = nilfs_btree_node_lookup(node, key, &index);
 588		else
 589			index = 0;
 590		if (index < ncmax) {
 591			ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 592		} else {
 593			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
 594			/* insert */
 595			ptr = NILFS_BMAP_INVALID_PTR;
 596		}
 597		path[level].bp_index = index;
 598	}
 599	if (!found)
 600		return -ENOENT;
 601
 602	if (ptrp != NULL)
 603		*ptrp = ptr;
 604
 605	return 0;
 606}
 607
 608static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
 609				      struct nilfs_btree_path *path,
 610				      __u64 *keyp, __u64 *ptrp)
 611{
 612	struct nilfs_btree_node *node;
 613	__u64 ptr;
 614	int index, level, ncmax, ret;
 615
 616	node = nilfs_btree_get_root(btree);
 617	index = nilfs_btree_node_get_nchildren(node) - 1;
 618	if (index < 0)
 619		return -ENOENT;
 620	level = nilfs_btree_node_get_level(node);
 621	ptr = nilfs_btree_node_get_ptr(node, index,
 622				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 623	path[level].bp_bh = NULL;
 624	path[level].bp_index = index;
 625	ncmax = nilfs_btree_nchildren_per_block(btree);
 626
 627	for (level--; level > 0; level--) {
 628		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
 629		if (ret < 0)
 630			return ret;
 631		node = nilfs_btree_get_nonroot_node(path, level);
 632		if (nilfs_btree_bad_node(btree, node, level))
 633			return -EINVAL;
 634		index = nilfs_btree_node_get_nchildren(node) - 1;
 635		ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 636		path[level].bp_index = index;
 637	}
 638
 639	if (keyp != NULL)
 640		*keyp = nilfs_btree_node_get_key(node, index);
 641	if (ptrp != NULL)
 642		*ptrp = ptr;
 643
 644	return 0;
 645}
 646
 647/**
 648 * nilfs_btree_get_next_key - get next valid key from btree path array
 649 * @btree: bmap struct of btree
 650 * @path: array of nilfs_btree_path struct
 651 * @minlevel: start level
 652 * @nextkey: place to store the next valid key
 653 *
 654 * Return Value: If a next key was found, 0 is returned. Otherwise,
 655 * -ENOENT is returned.
 656 */
 657static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
 658				    const struct nilfs_btree_path *path,
 659				    int minlevel, __u64 *nextkey)
 660{
 661	struct nilfs_btree_node *node;
 662	int maxlevel = nilfs_btree_height(btree) - 1;
 663	int index, next_adj, level;
 664
 665	/* Next index is already set to bp_index for leaf nodes. */
 666	next_adj = 0;
 667	for (level = minlevel; level <= maxlevel; level++) {
 668		if (level == maxlevel)
 669			node = nilfs_btree_get_root(btree);
 670		else
 671			node = nilfs_btree_get_nonroot_node(path, level);
 672
 673		index = path[level].bp_index + next_adj;
 674		if (index < nilfs_btree_node_get_nchildren(node)) {
 675			/* Next key is in this node */
 676			*nextkey = nilfs_btree_node_get_key(node, index);
 677			return 0;
 678		}
 679		/* For non-leaf nodes, next index is stored at bp_index + 1. */
 680		next_adj = 1;
 681	}
 682	return -ENOENT;
 683}
 684
 685static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
 686			      __u64 key, int level, __u64 *ptrp)
 687{
 688	struct nilfs_btree_path *path;
 689	int ret;
 690
 691	path = nilfs_btree_alloc_path();
 692	if (path == NULL)
 693		return -ENOMEM;
 694
 695	ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
 696
 697	nilfs_btree_free_path(path);
 698
 699	return ret;
 700}
 701
 702static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
 703				     __u64 key, __u64 *ptrp,
 704				     unsigned int maxblocks)
 705{
 706	struct nilfs_btree_path *path;
 707	struct nilfs_btree_node *node;
 708	struct inode *dat = NULL;
 709	__u64 ptr, ptr2;
 710	sector_t blocknr;
 711	int level = NILFS_BTREE_LEVEL_NODE_MIN;
 712	int ret, cnt, index, maxlevel, ncmax;
 713	struct nilfs_btree_readahead_info p;
 714
 715	path = nilfs_btree_alloc_path();
 716	if (path == NULL)
 717		return -ENOMEM;
 718
 719	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
 720	if (ret < 0)
 721		goto out;
 722
 723	if (NILFS_BMAP_USE_VBN(btree)) {
 724		dat = nilfs_bmap_get_dat(btree);
 725		ret = nilfs_dat_translate(dat, ptr, &blocknr);
 726		if (ret < 0)
 727			goto out;
 728		ptr = blocknr;
 729	}
 730	cnt = 1;
 731	if (cnt == maxblocks)
 732		goto end;
 733
 734	maxlevel = nilfs_btree_height(btree) - 1;
 735	node = nilfs_btree_get_node(btree, path, level, &ncmax);
 736	index = path[level].bp_index + 1;
 737	for (;;) {
 738		while (index < nilfs_btree_node_get_nchildren(node)) {
 739			if (nilfs_btree_node_get_key(node, index) !=
 740			    key + cnt)
 741				goto end;
 742			ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
 743			if (dat) {
 744				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
 745				if (ret < 0)
 746					goto out;
 747				ptr2 = blocknr;
 748			}
 749			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
 750				goto end;
 751			index++;
 
 752		}
 753		if (level == maxlevel)
 754			break;
 755
 756		/* look-up right sibling node */
 757		p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
 758		p.index = path[level + 1].bp_index + 1;
 759		p.max_ra_blocks = 7;
 760		if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
 761		    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
 762			break;
 763		ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
 764		path[level + 1].bp_index = p.index;
 765
 766		brelse(path[level].bp_bh);
 767		path[level].bp_bh = NULL;
 768
 769		ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
 770					      &p);
 771		if (ret < 0)
 772			goto out;
 773		node = nilfs_btree_get_nonroot_node(path, level);
 774		ncmax = nilfs_btree_nchildren_per_block(btree);
 775		index = 0;
 776		path[level].bp_index = index;
 777	}
 778 end:
 779	*ptrp = ptr;
 780	ret = cnt;
 781 out:
 782	nilfs_btree_free_path(path);
 783	return ret;
 784}
 785
 786static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
 787				    struct nilfs_btree_path *path,
 788				    int level, __u64 key)
 789{
 790	if (level < nilfs_btree_height(btree) - 1) {
 791		do {
 792			nilfs_btree_node_set_key(
 793				nilfs_btree_get_nonroot_node(path, level),
 794				path[level].bp_index, key);
 795			if (!buffer_dirty(path[level].bp_bh))
 796				mark_buffer_dirty(path[level].bp_bh);
 797		} while ((path[level].bp_index == 0) &&
 798			 (++level < nilfs_btree_height(btree) - 1));
 799	}
 800
 801	/* root */
 802	if (level == nilfs_btree_height(btree) - 1) {
 803		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
 804					 path[level].bp_index, key);
 805	}
 806}
 807
 808static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
 809				  struct nilfs_btree_path *path,
 810				  int level, __u64 *keyp, __u64 *ptrp)
 811{
 812	struct nilfs_btree_node *node;
 813	int ncblk;
 814
 815	if (level < nilfs_btree_height(btree) - 1) {
 816		node = nilfs_btree_get_nonroot_node(path, level);
 817		ncblk = nilfs_btree_nchildren_per_block(btree);
 818		nilfs_btree_node_insert(node, path[level].bp_index,
 819					*keyp, *ptrp, ncblk);
 820		if (!buffer_dirty(path[level].bp_bh))
 821			mark_buffer_dirty(path[level].bp_bh);
 822
 823		if (path[level].bp_index == 0)
 824			nilfs_btree_promote_key(btree, path, level + 1,
 825						nilfs_btree_node_get_key(node,
 826									 0));
 827	} else {
 828		node = nilfs_btree_get_root(btree);
 829		nilfs_btree_node_insert(node, path[level].bp_index,
 830					*keyp, *ptrp,
 831					NILFS_BTREE_ROOT_NCHILDREN_MAX);
 832	}
 833}
 834
 835static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
 836				   struct nilfs_btree_path *path,
 837				   int level, __u64 *keyp, __u64 *ptrp)
 838{
 839	struct nilfs_btree_node *node, *left;
 840	int nchildren, lnchildren, n, move, ncblk;
 841
 842	node = nilfs_btree_get_nonroot_node(path, level);
 843	left = nilfs_btree_get_sib_node(path, level);
 844	nchildren = nilfs_btree_node_get_nchildren(node);
 845	lnchildren = nilfs_btree_node_get_nchildren(left);
 846	ncblk = nilfs_btree_nchildren_per_block(btree);
 847	move = 0;
 848
 849	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
 850	if (n > path[level].bp_index) {
 851		/* move insert point */
 852		n--;
 853		move = 1;
 854	}
 855
 856	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
 857
 858	if (!buffer_dirty(path[level].bp_bh))
 859		mark_buffer_dirty(path[level].bp_bh);
 860	if (!buffer_dirty(path[level].bp_sib_bh))
 861		mark_buffer_dirty(path[level].bp_sib_bh);
 862
 863	nilfs_btree_promote_key(btree, path, level + 1,
 864				nilfs_btree_node_get_key(node, 0));
 865
 866	if (move) {
 867		brelse(path[level].bp_bh);
 868		path[level].bp_bh = path[level].bp_sib_bh;
 869		path[level].bp_sib_bh = NULL;
 870		path[level].bp_index += lnchildren;
 871		path[level + 1].bp_index--;
 872	} else {
 873		brelse(path[level].bp_sib_bh);
 874		path[level].bp_sib_bh = NULL;
 875		path[level].bp_index -= n;
 876	}
 877
 878	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 879}
 880
 881static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
 882				    struct nilfs_btree_path *path,
 883				    int level, __u64 *keyp, __u64 *ptrp)
 884{
 885	struct nilfs_btree_node *node, *right;
 886	int nchildren, rnchildren, n, move, ncblk;
 887
 888	node = nilfs_btree_get_nonroot_node(path, level);
 889	right = nilfs_btree_get_sib_node(path, level);
 890	nchildren = nilfs_btree_node_get_nchildren(node);
 891	rnchildren = nilfs_btree_node_get_nchildren(right);
 892	ncblk = nilfs_btree_nchildren_per_block(btree);
 893	move = 0;
 894
 895	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
 896	if (n > nchildren - path[level].bp_index) {
 897		/* move insert point */
 898		n--;
 899		move = 1;
 900	}
 901
 902	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 903
 904	if (!buffer_dirty(path[level].bp_bh))
 905		mark_buffer_dirty(path[level].bp_bh);
 906	if (!buffer_dirty(path[level].bp_sib_bh))
 907		mark_buffer_dirty(path[level].bp_sib_bh);
 908
 909	path[level + 1].bp_index++;
 910	nilfs_btree_promote_key(btree, path, level + 1,
 911				nilfs_btree_node_get_key(right, 0));
 912	path[level + 1].bp_index--;
 913
 914	if (move) {
 915		brelse(path[level].bp_bh);
 916		path[level].bp_bh = path[level].bp_sib_bh;
 917		path[level].bp_sib_bh = NULL;
 918		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 919		path[level + 1].bp_index++;
 920	} else {
 921		brelse(path[level].bp_sib_bh);
 922		path[level].bp_sib_bh = NULL;
 923	}
 924
 925	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 926}
 927
 928static void nilfs_btree_split(struct nilfs_bmap *btree,
 929			      struct nilfs_btree_path *path,
 930			      int level, __u64 *keyp, __u64 *ptrp)
 931{
 932	struct nilfs_btree_node *node, *right;
 933	int nchildren, n, move, ncblk;
 934
 935	node = nilfs_btree_get_nonroot_node(path, level);
 936	right = nilfs_btree_get_sib_node(path, level);
 937	nchildren = nilfs_btree_node_get_nchildren(node);
 938	ncblk = nilfs_btree_nchildren_per_block(btree);
 939	move = 0;
 940
 941	n = (nchildren + 1) / 2;
 942	if (n > nchildren - path[level].bp_index) {
 943		n--;
 944		move = 1;
 945	}
 946
 947	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 948
 949	if (!buffer_dirty(path[level].bp_bh))
 950		mark_buffer_dirty(path[level].bp_bh);
 951	if (!buffer_dirty(path[level].bp_sib_bh))
 952		mark_buffer_dirty(path[level].bp_sib_bh);
 953
 954	if (move) {
 955		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 956		nilfs_btree_node_insert(right, path[level].bp_index,
 957					*keyp, *ptrp, ncblk);
 958
 959		*keyp = nilfs_btree_node_get_key(right, 0);
 960		*ptrp = path[level].bp_newreq.bpr_ptr;
 961
 962		brelse(path[level].bp_bh);
 963		path[level].bp_bh = path[level].bp_sib_bh;
 964		path[level].bp_sib_bh = NULL;
 965	} else {
 966		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 967
 968		*keyp = nilfs_btree_node_get_key(right, 0);
 969		*ptrp = path[level].bp_newreq.bpr_ptr;
 970
 971		brelse(path[level].bp_sib_bh);
 972		path[level].bp_sib_bh = NULL;
 973	}
 974
 975	path[level + 1].bp_index++;
 976}
 977
 978static void nilfs_btree_grow(struct nilfs_bmap *btree,
 979			     struct nilfs_btree_path *path,
 980			     int level, __u64 *keyp, __u64 *ptrp)
 981{
 982	struct nilfs_btree_node *root, *child;
 983	int n, ncblk;
 984
 985	root = nilfs_btree_get_root(btree);
 986	child = nilfs_btree_get_sib_node(path, level);
 987	ncblk = nilfs_btree_nchildren_per_block(btree);
 988
 989	n = nilfs_btree_node_get_nchildren(root);
 990
 991	nilfs_btree_node_move_right(root, child, n,
 992				    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
 993	nilfs_btree_node_set_level(root, level + 1);
 994
 995	if (!buffer_dirty(path[level].bp_sib_bh))
 996		mark_buffer_dirty(path[level].bp_sib_bh);
 997
 998	path[level].bp_bh = path[level].bp_sib_bh;
 999	path[level].bp_sib_bh = NULL;
1000
1001	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
1002
1003	*keyp = nilfs_btree_node_get_key(child, 0);
1004	*ptrp = path[level].bp_newreq.bpr_ptr;
1005}
1006
1007static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
1008				   const struct nilfs_btree_path *path)
1009{
1010	struct nilfs_btree_node *node;
1011	int level, ncmax;
1012
1013	if (path == NULL)
1014		return NILFS_BMAP_INVALID_PTR;
1015
1016	/* left sibling */
1017	level = NILFS_BTREE_LEVEL_NODE_MIN;
1018	if (path[level].bp_index > 0) {
1019		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1020		return nilfs_btree_node_get_ptr(node,
1021						path[level].bp_index - 1,
1022						ncmax);
1023	}
1024
1025	/* parent */
1026	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1027	if (level <= nilfs_btree_height(btree) - 1) {
1028		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1029		return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1030						ncmax);
1031	}
1032
1033	return NILFS_BMAP_INVALID_PTR;
1034}
1035
1036static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1037				       const struct nilfs_btree_path *path,
1038				       __u64 key)
1039{
1040	__u64 ptr;
1041
1042	ptr = nilfs_bmap_find_target_seq(btree, key);
1043	if (ptr != NILFS_BMAP_INVALID_PTR)
1044		/* sequential access */
1045		return ptr;
1046
1047	ptr = nilfs_btree_find_near(btree, path);
1048	if (ptr != NILFS_BMAP_INVALID_PTR)
1049		/* near */
1050		return ptr;
1051
1052	/* block group */
1053	return nilfs_bmap_find_target_in_group(btree);
1054}
1055
1056static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1057				      struct nilfs_btree_path *path,
1058				      int *levelp, __u64 key, __u64 ptr,
1059				      struct nilfs_bmap_stats *stats)
1060{
1061	struct buffer_head *bh;
1062	struct nilfs_btree_node *node, *parent, *sib;
1063	__u64 sibptr;
1064	int pindex, level, ncmax, ncblk, ret;
1065	struct inode *dat = NULL;
1066
1067	stats->bs_nblocks = 0;
1068	level = NILFS_BTREE_LEVEL_DATA;
1069
1070	/* allocate a new ptr for data block */
1071	if (NILFS_BMAP_USE_VBN(btree)) {
1072		path[level].bp_newreq.bpr_ptr =
1073			nilfs_btree_find_target_v(btree, path, key);
1074		dat = nilfs_bmap_get_dat(btree);
1075	}
1076
1077	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1078	if (ret < 0)
1079		goto err_out_data;
1080
1081	ncblk = nilfs_btree_nchildren_per_block(btree);
1082
1083	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1084	     level < nilfs_btree_height(btree) - 1;
1085	     level++) {
1086		node = nilfs_btree_get_nonroot_node(path, level);
1087		if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1088			path[level].bp_op = nilfs_btree_do_insert;
1089			stats->bs_nblocks++;
1090			goto out;
1091		}
1092
1093		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1094		pindex = path[level + 1].bp_index;
1095
1096		/* left sibling */
1097		if (pindex > 0) {
1098			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1099							  ncmax);
1100			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1101			if (ret < 0)
1102				goto err_out_child_node;
1103			sib = (struct nilfs_btree_node *)bh->b_data;
1104			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1105				path[level].bp_sib_bh = bh;
1106				path[level].bp_op = nilfs_btree_carry_left;
1107				stats->bs_nblocks++;
1108				goto out;
1109			} else {
1110				brelse(bh);
1111			}
1112		}
1113
1114		/* right sibling */
1115		if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1116			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1117							  ncmax);
1118			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1119			if (ret < 0)
1120				goto err_out_child_node;
1121			sib = (struct nilfs_btree_node *)bh->b_data;
1122			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1123				path[level].bp_sib_bh = bh;
1124				path[level].bp_op = nilfs_btree_carry_right;
1125				stats->bs_nblocks++;
1126				goto out;
1127			} else {
1128				brelse(bh);
1129			}
1130		}
1131
1132		/* split */
1133		path[level].bp_newreq.bpr_ptr =
1134			path[level - 1].bp_newreq.bpr_ptr + 1;
1135		ret = nilfs_bmap_prepare_alloc_ptr(btree,
1136						   &path[level].bp_newreq, dat);
1137		if (ret < 0)
1138			goto err_out_child_node;
1139		ret = nilfs_btree_get_new_block(btree,
1140						path[level].bp_newreq.bpr_ptr,
1141						&bh);
1142		if (ret < 0)
1143			goto err_out_curr_node;
1144
1145		stats->bs_nblocks++;
1146
1147		sib = (struct nilfs_btree_node *)bh->b_data;
1148		nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1149		path[level].bp_sib_bh = bh;
1150		path[level].bp_op = nilfs_btree_split;
1151	}
1152
1153	/* root */
1154	node = nilfs_btree_get_root(btree);
1155	if (nilfs_btree_node_get_nchildren(node) <
1156	    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1157		path[level].bp_op = nilfs_btree_do_insert;
1158		stats->bs_nblocks++;
1159		goto out;
1160	}
1161
1162	/* grow */
1163	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1164	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1165	if (ret < 0)
1166		goto err_out_child_node;
1167	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1168					&bh);
1169	if (ret < 0)
1170		goto err_out_curr_node;
1171
1172	nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1173			      0, level, 0, ncblk, NULL, NULL);
1174	path[level].bp_sib_bh = bh;
1175	path[level].bp_op = nilfs_btree_grow;
1176
1177	level++;
1178	path[level].bp_op = nilfs_btree_do_insert;
1179
1180	/* a newly-created node block and a data block are added */
1181	stats->bs_nblocks += 2;
1182
1183	/* success */
1184 out:
1185	*levelp = level;
1186	return ret;
1187
1188	/* error */
1189 err_out_curr_node:
1190	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1191 err_out_child_node:
1192	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1193		nilfs_btnode_delete(path[level].bp_sib_bh);
1194		nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1195
1196	}
1197
1198	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1199 err_out_data:
1200	*levelp = level;
1201	stats->bs_nblocks = 0;
1202	return ret;
1203}
1204
1205static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1206				      struct nilfs_btree_path *path,
1207				      int maxlevel, __u64 key, __u64 ptr)
1208{
1209	struct inode *dat = NULL;
1210	int level;
1211
1212	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1213	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1214	if (NILFS_BMAP_USE_VBN(btree)) {
1215		nilfs_bmap_set_target_v(btree, key, ptr);
1216		dat = nilfs_bmap_get_dat(btree);
1217	}
1218
1219	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1220		nilfs_bmap_commit_alloc_ptr(btree,
1221					    &path[level - 1].bp_newreq, dat);
1222		path[level].bp_op(btree, path, level, &key, &ptr);
1223	}
1224
1225	if (!nilfs_bmap_dirty(btree))
1226		nilfs_bmap_set_dirty(btree);
1227}
1228
1229static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1230{
1231	struct nilfs_btree_path *path;
1232	struct nilfs_bmap_stats stats;
1233	int level, ret;
1234
1235	path = nilfs_btree_alloc_path();
1236	if (path == NULL)
1237		return -ENOMEM;
1238
1239	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1240				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1241	if (ret != -ENOENT) {
1242		if (ret == 0)
1243			ret = -EEXIST;
1244		goto out;
1245	}
1246
1247	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1248	if (ret < 0)
1249		goto out;
1250	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1251	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1252
1253 out:
1254	nilfs_btree_free_path(path);
1255	return ret;
1256}
1257
1258static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1259				  struct nilfs_btree_path *path,
1260				  int level, __u64 *keyp, __u64 *ptrp)
1261{
1262	struct nilfs_btree_node *node;
1263	int ncblk;
1264
1265	if (level < nilfs_btree_height(btree) - 1) {
1266		node = nilfs_btree_get_nonroot_node(path, level);
1267		ncblk = nilfs_btree_nchildren_per_block(btree);
1268		nilfs_btree_node_delete(node, path[level].bp_index,
1269					keyp, ptrp, ncblk);
1270		if (!buffer_dirty(path[level].bp_bh))
1271			mark_buffer_dirty(path[level].bp_bh);
1272		if (path[level].bp_index == 0)
1273			nilfs_btree_promote_key(btree, path, level + 1,
1274				nilfs_btree_node_get_key(node, 0));
1275	} else {
1276		node = nilfs_btree_get_root(btree);
1277		nilfs_btree_node_delete(node, path[level].bp_index,
1278					keyp, ptrp,
1279					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1280	}
1281}
1282
1283static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1284				    struct nilfs_btree_path *path,
1285				    int level, __u64 *keyp, __u64 *ptrp)
1286{
1287	struct nilfs_btree_node *node, *left;
1288	int nchildren, lnchildren, n, ncblk;
1289
1290	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1291
1292	node = nilfs_btree_get_nonroot_node(path, level);
1293	left = nilfs_btree_get_sib_node(path, level);
1294	nchildren = nilfs_btree_node_get_nchildren(node);
1295	lnchildren = nilfs_btree_node_get_nchildren(left);
1296	ncblk = nilfs_btree_nchildren_per_block(btree);
1297
1298	n = (nchildren + lnchildren) / 2 - nchildren;
1299
1300	nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1301
1302	if (!buffer_dirty(path[level].bp_bh))
1303		mark_buffer_dirty(path[level].bp_bh);
1304	if (!buffer_dirty(path[level].bp_sib_bh))
1305		mark_buffer_dirty(path[level].bp_sib_bh);
1306
1307	nilfs_btree_promote_key(btree, path, level + 1,
1308				nilfs_btree_node_get_key(node, 0));
1309
1310	brelse(path[level].bp_sib_bh);
1311	path[level].bp_sib_bh = NULL;
1312	path[level].bp_index += n;
1313}
1314
1315static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1316				     struct nilfs_btree_path *path,
1317				     int level, __u64 *keyp, __u64 *ptrp)
1318{
1319	struct nilfs_btree_node *node, *right;
1320	int nchildren, rnchildren, n, ncblk;
1321
1322	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1323
1324	node = nilfs_btree_get_nonroot_node(path, level);
1325	right = nilfs_btree_get_sib_node(path, level);
1326	nchildren = nilfs_btree_node_get_nchildren(node);
1327	rnchildren = nilfs_btree_node_get_nchildren(right);
1328	ncblk = nilfs_btree_nchildren_per_block(btree);
1329
1330	n = (nchildren + rnchildren) / 2 - nchildren;
1331
1332	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1333
1334	if (!buffer_dirty(path[level].bp_bh))
1335		mark_buffer_dirty(path[level].bp_bh);
1336	if (!buffer_dirty(path[level].bp_sib_bh))
1337		mark_buffer_dirty(path[level].bp_sib_bh);
1338
1339	path[level + 1].bp_index++;
1340	nilfs_btree_promote_key(btree, path, level + 1,
1341				nilfs_btree_node_get_key(right, 0));
1342	path[level + 1].bp_index--;
1343
1344	brelse(path[level].bp_sib_bh);
1345	path[level].bp_sib_bh = NULL;
1346}
1347
1348static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1349				    struct nilfs_btree_path *path,
1350				    int level, __u64 *keyp, __u64 *ptrp)
1351{
1352	struct nilfs_btree_node *node, *left;
1353	int n, ncblk;
1354
1355	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1356
1357	node = nilfs_btree_get_nonroot_node(path, level);
1358	left = nilfs_btree_get_sib_node(path, level);
1359	ncblk = nilfs_btree_nchildren_per_block(btree);
1360
1361	n = nilfs_btree_node_get_nchildren(node);
1362
1363	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1364
1365	if (!buffer_dirty(path[level].bp_sib_bh))
1366		mark_buffer_dirty(path[level].bp_sib_bh);
1367
1368	nilfs_btnode_delete(path[level].bp_bh);
1369	path[level].bp_bh = path[level].bp_sib_bh;
1370	path[level].bp_sib_bh = NULL;
1371	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1372}
1373
1374static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1375				     struct nilfs_btree_path *path,
1376				     int level, __u64 *keyp, __u64 *ptrp)
1377{
1378	struct nilfs_btree_node *node, *right;
1379	int n, ncblk;
1380
1381	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1382
1383	node = nilfs_btree_get_nonroot_node(path, level);
1384	right = nilfs_btree_get_sib_node(path, level);
1385	ncblk = nilfs_btree_nchildren_per_block(btree);
1386
1387	n = nilfs_btree_node_get_nchildren(right);
1388
1389	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1390
1391	if (!buffer_dirty(path[level].bp_bh))
1392		mark_buffer_dirty(path[level].bp_bh);
1393
1394	nilfs_btnode_delete(path[level].bp_sib_bh);
1395	path[level].bp_sib_bh = NULL;
1396	path[level + 1].bp_index++;
1397}
1398
1399static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1400			       struct nilfs_btree_path *path,
1401			       int level, __u64 *keyp, __u64 *ptrp)
1402{
1403	struct nilfs_btree_node *root, *child;
1404	int n, ncblk;
1405
1406	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1407
1408	root = nilfs_btree_get_root(btree);
1409	child = nilfs_btree_get_nonroot_node(path, level);
1410	ncblk = nilfs_btree_nchildren_per_block(btree);
1411
1412	nilfs_btree_node_delete(root, 0, NULL, NULL,
1413				NILFS_BTREE_ROOT_NCHILDREN_MAX);
1414	nilfs_btree_node_set_level(root, level);
1415	n = nilfs_btree_node_get_nchildren(child);
1416	nilfs_btree_node_move_left(root, child, n,
1417				   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1418
1419	nilfs_btnode_delete(path[level].bp_bh);
1420	path[level].bp_bh = NULL;
1421}
1422
1423static void nilfs_btree_nop(struct nilfs_bmap *btree,
1424			    struct nilfs_btree_path *path,
1425			    int level, __u64 *keyp, __u64 *ptrp)
1426{
1427}
1428
1429static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1430				      struct nilfs_btree_path *path,
1431				      int *levelp,
1432				      struct nilfs_bmap_stats *stats,
1433				      struct inode *dat)
1434{
1435	struct buffer_head *bh;
1436	struct nilfs_btree_node *node, *parent, *sib;
1437	__u64 sibptr;
1438	int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1439
1440	ret = 0;
1441	stats->bs_nblocks = 0;
1442	ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1443	ncblk = nilfs_btree_nchildren_per_block(btree);
1444
1445	for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1446	     level < nilfs_btree_height(btree) - 1;
1447	     level++) {
1448		node = nilfs_btree_get_nonroot_node(path, level);
1449		path[level].bp_oldreq.bpr_ptr =
1450			nilfs_btree_node_get_ptr(node, dindex, ncblk);
1451		ret = nilfs_bmap_prepare_end_ptr(btree,
1452						 &path[level].bp_oldreq, dat);
1453		if (ret < 0)
1454			goto err_out_child_node;
1455
1456		if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1457			path[level].bp_op = nilfs_btree_do_delete;
1458			stats->bs_nblocks++;
1459			goto out;
1460		}
1461
1462		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1463		pindex = path[level + 1].bp_index;
1464		dindex = pindex;
1465
1466		if (pindex > 0) {
1467			/* left sibling */
1468			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1469							  ncmax);
1470			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1471			if (ret < 0)
1472				goto err_out_curr_node;
1473			sib = (struct nilfs_btree_node *)bh->b_data;
1474			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1475				path[level].bp_sib_bh = bh;
1476				path[level].bp_op = nilfs_btree_borrow_left;
1477				stats->bs_nblocks++;
1478				goto out;
1479			} else {
1480				path[level].bp_sib_bh = bh;
1481				path[level].bp_op = nilfs_btree_concat_left;
1482				stats->bs_nblocks++;
1483				/* continue; */
1484			}
1485		} else if (pindex <
1486			   nilfs_btree_node_get_nchildren(parent) - 1) {
1487			/* right sibling */
1488			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1489							  ncmax);
1490			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1491			if (ret < 0)
1492				goto err_out_curr_node;
1493			sib = (struct nilfs_btree_node *)bh->b_data;
1494			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1495				path[level].bp_sib_bh = bh;
1496				path[level].bp_op = nilfs_btree_borrow_right;
1497				stats->bs_nblocks++;
1498				goto out;
1499			} else {
1500				path[level].bp_sib_bh = bh;
1501				path[level].bp_op = nilfs_btree_concat_right;
1502				stats->bs_nblocks++;
1503				/*
1504				 * When merging right sibling node
1505				 * into the current node, pointer to
1506				 * the right sibling node must be
1507				 * terminated instead.  The adjustment
1508				 * below is required for that.
1509				 */
1510				dindex = pindex + 1;
1511				/* continue; */
1512			}
1513		} else {
1514			/* no siblings */
1515			/* the only child of the root node */
1516			WARN_ON(level != nilfs_btree_height(btree) - 2);
1517			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1518			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1519				path[level].bp_op = nilfs_btree_shrink;
1520				stats->bs_nblocks += 2;
1521				level++;
1522				path[level].bp_op = nilfs_btree_nop;
1523				goto shrink_root_child;
1524			} else {
1525				path[level].bp_op = nilfs_btree_do_delete;
1526				stats->bs_nblocks++;
1527				goto out;
1528			}
1529		}
1530	}
1531
1532	/* child of the root node is deleted */
1533	path[level].bp_op = nilfs_btree_do_delete;
1534	stats->bs_nblocks++;
1535
1536shrink_root_child:
1537	node = nilfs_btree_get_root(btree);
1538	path[level].bp_oldreq.bpr_ptr =
1539		nilfs_btree_node_get_ptr(node, dindex,
1540					 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1541
1542	ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1543	if (ret < 0)
1544		goto err_out_child_node;
1545
1546	/* success */
1547 out:
1548	*levelp = level;
1549	return ret;
1550
1551	/* error */
1552 err_out_curr_node:
1553	nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1554 err_out_child_node:
1555	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1556		brelse(path[level].bp_sib_bh);
1557		nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1558	}
1559	*levelp = level;
1560	stats->bs_nblocks = 0;
1561	return ret;
1562}
1563
1564static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1565				      struct nilfs_btree_path *path,
1566				      int maxlevel, struct inode *dat)
1567{
1568	int level;
1569
1570	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1571		nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1572		path[level].bp_op(btree, path, level, NULL, NULL);
1573	}
1574
1575	if (!nilfs_bmap_dirty(btree))
1576		nilfs_bmap_set_dirty(btree);
1577}
1578
1579static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1580
1581{
1582	struct nilfs_btree_path *path;
1583	struct nilfs_bmap_stats stats;
1584	struct inode *dat;
1585	int level, ret;
1586
1587	path = nilfs_btree_alloc_path();
1588	if (path == NULL)
1589		return -ENOMEM;
1590
1591	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1592				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1593	if (ret < 0)
1594		goto out;
1595
1596
1597	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1598
1599	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1600	if (ret < 0)
1601		goto out;
1602	nilfs_btree_commit_delete(btree, path, level, dat);
1603	nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1604
1605out:
1606	nilfs_btree_free_path(path);
1607	return ret;
1608}
1609
1610static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1611				__u64 *keyp)
1612{
1613	struct nilfs_btree_path *path;
1614	const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1615	int ret;
1616
1617	path = nilfs_btree_alloc_path();
1618	if (!path)
1619		return -ENOMEM;
1620
1621	ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1622	if (!ret)
1623		*keyp = start;
1624	else if (ret == -ENOENT)
1625		ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1626
1627	nilfs_btree_free_path(path);
1628	return ret;
1629}
1630
1631static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1632{
1633	struct nilfs_btree_path *path;
1634	int ret;
1635
1636	path = nilfs_btree_alloc_path();
1637	if (path == NULL)
1638		return -ENOMEM;
1639
1640	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1641
1642	nilfs_btree_free_path(path);
1643
1644	return ret;
1645}
1646
1647static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1648{
1649	struct buffer_head *bh;
1650	struct nilfs_btree_node *root, *node;
1651	__u64 maxkey, nextmaxkey;
1652	__u64 ptr;
1653	int nchildren, ret;
1654
1655	root = nilfs_btree_get_root(btree);
1656	switch (nilfs_btree_height(btree)) {
1657	case 2:
1658		bh = NULL;
1659		node = root;
1660		break;
1661	case 3:
1662		nchildren = nilfs_btree_node_get_nchildren(root);
1663		if (nchildren > 1)
1664			return 0;
1665		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1666					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1667		ret = nilfs_btree_get_block(btree, ptr, &bh);
1668		if (ret < 0)
1669			return ret;
1670		node = (struct nilfs_btree_node *)bh->b_data;
1671		break;
1672	default:
1673		return 0;
1674	}
1675
1676	nchildren = nilfs_btree_node_get_nchildren(node);
1677	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1678	nextmaxkey = (nchildren > 1) ?
1679		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1680	brelse(bh);
 
1681
1682	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1683}
1684
1685static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1686				   __u64 *keys, __u64 *ptrs, int nitems)
1687{
1688	struct buffer_head *bh;
1689	struct nilfs_btree_node *node, *root;
1690	__le64 *dkeys;
1691	__le64 *dptrs;
1692	__u64 ptr;
1693	int nchildren, ncmax, i, ret;
1694
1695	root = nilfs_btree_get_root(btree);
1696	switch (nilfs_btree_height(btree)) {
1697	case 2:
1698		bh = NULL;
1699		node = root;
1700		ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1701		break;
1702	case 3:
1703		nchildren = nilfs_btree_node_get_nchildren(root);
1704		WARN_ON(nchildren > 1);
1705		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1706					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1707		ret = nilfs_btree_get_block(btree, ptr, &bh);
1708		if (ret < 0)
1709			return ret;
1710		node = (struct nilfs_btree_node *)bh->b_data;
1711		ncmax = nilfs_btree_nchildren_per_block(btree);
1712		break;
1713	default:
1714		node = NULL;
1715		return -EINVAL;
1716	}
1717
1718	nchildren = nilfs_btree_node_get_nchildren(node);
1719	if (nchildren < nitems)
1720		nitems = nchildren;
1721	dkeys = nilfs_btree_node_dkeys(node);
1722	dptrs = nilfs_btree_node_dptrs(node, ncmax);
1723	for (i = 0; i < nitems; i++) {
1724		keys[i] = le64_to_cpu(dkeys[i]);
1725		ptrs[i] = le64_to_cpu(dptrs[i]);
1726	}
1727
1728	brelse(bh);
 
1729
1730	return nitems;
1731}
1732
1733static int
1734nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1735				       union nilfs_bmap_ptr_req *dreq,
1736				       union nilfs_bmap_ptr_req *nreq,
1737				       struct buffer_head **bhp,
1738				       struct nilfs_bmap_stats *stats)
1739{
1740	struct buffer_head *bh;
1741	struct inode *dat = NULL;
1742	int ret;
1743
1744	stats->bs_nblocks = 0;
1745
1746	/* for data */
1747	/* cannot find near ptr */
1748	if (NILFS_BMAP_USE_VBN(btree)) {
1749		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1750		dat = nilfs_bmap_get_dat(btree);
1751	}
1752
1753	ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
1754	if (ret < 0)
1755		return ret;
1756
1757	ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1758	if (ret < 0)
1759		return ret;
1760
1761	*bhp = NULL;
1762	stats->bs_nblocks++;
1763	if (nreq != NULL) {
1764		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1765		ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1766		if (ret < 0)
1767			goto err_out_dreq;
1768
1769		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1770		if (ret < 0)
1771			goto err_out_nreq;
1772
1773		*bhp = bh;
1774		stats->bs_nblocks++;
1775	}
1776
1777	/* success */
1778	return 0;
1779
1780	/* error */
1781 err_out_nreq:
1782	nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1783 err_out_dreq:
1784	nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1785	stats->bs_nblocks = 0;
1786	return ret;
1787
1788}
1789
1790static void
1791nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1792				      __u64 key, __u64 ptr,
1793				      const __u64 *keys, const __u64 *ptrs,
1794				      int n,
1795				      union nilfs_bmap_ptr_req *dreq,
1796				      union nilfs_bmap_ptr_req *nreq,
1797				      struct buffer_head *bh)
1798{
1799	struct nilfs_btree_node *node;
1800	struct inode *dat;
1801	__u64 tmpptr;
1802	int ncblk;
1803
1804	/* free resources */
1805	if (btree->b_ops->bop_clear != NULL)
1806		btree->b_ops->bop_clear(btree);
1807
1808	/* ptr must be a pointer to a buffer head. */
1809	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1810
1811	/* convert and insert */
1812	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1813	__nilfs_btree_init(btree);
1814	if (nreq != NULL) {
1815		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1816		nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1817
1818		/* create child node at level 1 */
1819		node = (struct nilfs_btree_node *)bh->b_data;
1820		ncblk = nilfs_btree_nchildren_per_block(btree);
1821		nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1822		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1823		if (!buffer_dirty(bh))
1824			mark_buffer_dirty(bh);
1825		if (!nilfs_bmap_dirty(btree))
1826			nilfs_bmap_set_dirty(btree);
1827
1828		brelse(bh);
1829
1830		/* create root node at level 2 */
1831		node = nilfs_btree_get_root(btree);
1832		tmpptr = nreq->bpr_ptr;
1833		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1834				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1835				      &keys[0], &tmpptr);
1836	} else {
1837		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1838
1839		/* create root node at level 1 */
1840		node = nilfs_btree_get_root(btree);
1841		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1842				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1843				      keys, ptrs);
1844		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1845					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1846		if (!nilfs_bmap_dirty(btree))
1847			nilfs_bmap_set_dirty(btree);
1848	}
1849
1850	if (NILFS_BMAP_USE_VBN(btree))
1851		nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1852}
1853
1854/**
1855 * nilfs_btree_convert_and_insert -
1856 * @bmap:
1857 * @key:
1858 * @ptr:
1859 * @keys:
1860 * @ptrs:
1861 * @n:
1862 */
1863int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1864				   __u64 key, __u64 ptr,
1865				   const __u64 *keys, const __u64 *ptrs, int n)
1866{
1867	struct buffer_head *bh = NULL;
1868	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1869	struct nilfs_bmap_stats stats;
1870	int ret;
1871
1872	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1873		di = &dreq;
1874		ni = NULL;
1875	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1876			   nilfs_btree_node_size(btree))) {
1877		di = &dreq;
1878		ni = &nreq;
1879	} else {
1880		di = NULL;
1881		ni = NULL;
1882		BUG();
1883	}
1884
1885	ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1886						     &stats);
1887	if (ret < 0)
1888		return ret;
1889	nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1890					      di, ni, bh);
1891	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1892	return 0;
1893}
1894
1895static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1896				   struct nilfs_btree_path *path,
1897				   int level,
1898				   struct buffer_head *bh)
1899{
1900	while ((++level < nilfs_btree_height(btree) - 1) &&
1901	       !buffer_dirty(path[level].bp_bh))
1902		mark_buffer_dirty(path[level].bp_bh);
1903
1904	return 0;
1905}
1906
1907static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1908					struct nilfs_btree_path *path,
1909					int level, struct inode *dat)
1910{
1911	struct nilfs_btree_node *parent;
1912	int ncmax, ret;
1913
1914	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1915	path[level].bp_oldreq.bpr_ptr =
1916		nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1917					 ncmax);
1918	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1919	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1920				       &path[level].bp_newreq.bpr_req);
1921	if (ret < 0)
1922		return ret;
1923
1924	if (buffer_nilfs_node(path[level].bp_bh)) {
1925		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1926		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1927		path[level].bp_ctxt.bh = path[level].bp_bh;
1928		ret = nilfs_btnode_prepare_change_key(
1929			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1930			&path[level].bp_ctxt);
1931		if (ret < 0) {
1932			nilfs_dat_abort_update(dat,
1933					       &path[level].bp_oldreq.bpr_req,
1934					       &path[level].bp_newreq.bpr_req);
1935			return ret;
1936		}
1937	}
1938
1939	return 0;
1940}
1941
1942static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1943					struct nilfs_btree_path *path,
1944					int level, struct inode *dat)
1945{
1946	struct nilfs_btree_node *parent;
1947	int ncmax;
1948
1949	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1950				&path[level].bp_newreq.bpr_req,
1951				btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1952
1953	if (buffer_nilfs_node(path[level].bp_bh)) {
1954		nilfs_btnode_commit_change_key(
1955			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1956			&path[level].bp_ctxt);
1957		path[level].bp_bh = path[level].bp_ctxt.bh;
1958	}
1959	set_buffer_nilfs_volatile(path[level].bp_bh);
1960
1961	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1962	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1963				 path[level].bp_newreq.bpr_ptr, ncmax);
1964}
1965
1966static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1967				       struct nilfs_btree_path *path,
1968				       int level, struct inode *dat)
1969{
1970	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1971			       &path[level].bp_newreq.bpr_req);
1972	if (buffer_nilfs_node(path[level].bp_bh))
1973		nilfs_btnode_abort_change_key(
1974			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1975			&path[level].bp_ctxt);
1976}
1977
1978static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1979					   struct nilfs_btree_path *path,
1980					   int minlevel, int *maxlevelp,
1981					   struct inode *dat)
1982{
1983	int level, ret;
1984
1985	level = minlevel;
1986	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1987		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1988		if (ret < 0)
1989			return ret;
1990	}
1991	while ((++level < nilfs_btree_height(btree) - 1) &&
1992	       !buffer_dirty(path[level].bp_bh)) {
1993
1994		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1995		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1996		if (ret < 0)
1997			goto out;
1998	}
1999
2000	/* success */
2001	*maxlevelp = level - 1;
2002	return 0;
2003
2004	/* error */
2005 out:
2006	while (--level > minlevel)
2007		nilfs_btree_abort_update_v(btree, path, level, dat);
2008	if (!buffer_nilfs_volatile(path[level].bp_bh))
2009		nilfs_btree_abort_update_v(btree, path, level, dat);
2010	return ret;
2011}
2012
2013static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2014					   struct nilfs_btree_path *path,
2015					   int minlevel, int maxlevel,
2016					   struct buffer_head *bh,
2017					   struct inode *dat)
2018{
2019	int level;
2020
2021	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2022		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2023
2024	for (level = minlevel + 1; level <= maxlevel; level++)
2025		nilfs_btree_commit_update_v(btree, path, level, dat);
2026}
2027
2028static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2029				   struct nilfs_btree_path *path,
2030				   int level, struct buffer_head *bh)
2031{
2032	int maxlevel = 0, ret;
2033	struct nilfs_btree_node *parent;
2034	struct inode *dat = nilfs_bmap_get_dat(btree);
2035	__u64 ptr;
2036	int ncmax;
2037
2038	get_bh(bh);
2039	path[level].bp_bh = bh;
2040	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2041					      dat);
2042	if (ret < 0)
2043		goto out;
2044
2045	if (buffer_nilfs_volatile(path[level].bp_bh)) {
2046		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2047		ptr = nilfs_btree_node_get_ptr(parent,
2048					       path[level + 1].bp_index,
2049					       ncmax);
2050		ret = nilfs_dat_mark_dirty(dat, ptr);
2051		if (ret < 0)
2052			goto out;
2053	}
2054
2055	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2056
2057 out:
2058	brelse(path[level].bp_bh);
2059	path[level].bp_bh = NULL;
2060	return ret;
2061}
2062
2063static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2064				 struct buffer_head *bh)
2065{
2066	struct nilfs_btree_path *path;
2067	struct nilfs_btree_node *node;
2068	__u64 key;
2069	int level, ret;
2070
2071	WARN_ON(!buffer_dirty(bh));
2072
2073	path = nilfs_btree_alloc_path();
2074	if (path == NULL)
2075		return -ENOMEM;
2076
2077	if (buffer_nilfs_node(bh)) {
2078		node = (struct nilfs_btree_node *)bh->b_data;
2079		key = nilfs_btree_node_get_key(node, 0);
2080		level = nilfs_btree_node_get_level(node);
2081	} else {
2082		key = nilfs_bmap_data_get_key(btree, bh);
2083		level = NILFS_BTREE_LEVEL_DATA;
2084	}
2085
2086	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2087	if (ret < 0) {
2088		if (unlikely(ret == -ENOENT))
2089			nilfs_crit(btree->b_inode->i_sb,
2090				   "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2091				   btree->b_inode->i_ino,
2092				   (unsigned long long)key, level);
2093		goto out;
2094	}
2095
2096	ret = NILFS_BMAP_USE_VBN(btree) ?
2097		nilfs_btree_propagate_v(btree, path, level, bh) :
2098		nilfs_btree_propagate_p(btree, path, level, bh);
2099
2100 out:
2101	nilfs_btree_free_path(path);
2102
2103	return ret;
2104}
2105
2106static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2107				    struct buffer_head *bh)
2108{
2109	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2110}
2111
2112static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2113					 struct list_head *lists,
2114					 struct buffer_head *bh)
2115{
2116	struct list_head *head;
2117	struct buffer_head *cbh;
2118	struct nilfs_btree_node *node, *cnode;
2119	__u64 key, ckey;
2120	int level;
2121
2122	get_bh(bh);
2123	node = (struct nilfs_btree_node *)bh->b_data;
2124	key = nilfs_btree_node_get_key(node, 0);
2125	level = nilfs_btree_node_get_level(node);
2126	if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2127	    level >= NILFS_BTREE_LEVEL_MAX) {
2128		dump_stack();
2129		nilfs_warn(btree->b_inode->i_sb,
2130			   "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2131			   level, (unsigned long long)key,
2132			   btree->b_inode->i_ino,
2133			   (unsigned long long)bh->b_blocknr);
 
2134		return;
2135	}
2136
2137	list_for_each(head, &lists[level]) {
2138		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2139		cnode = (struct nilfs_btree_node *)cbh->b_data;
2140		ckey = nilfs_btree_node_get_key(cnode, 0);
2141		if (key < ckey)
2142			break;
2143	}
2144	list_add_tail(&bh->b_assoc_buffers, head);
2145}
2146
2147static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2148					     struct list_head *listp)
2149{
2150	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
2151	struct address_space *btcache = btnc_inode->i_mapping;
2152	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2153	struct pagevec pvec;
2154	struct buffer_head *bh, *head;
2155	pgoff_t index = 0;
2156	int level, i;
2157
2158	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2159	     level < NILFS_BTREE_LEVEL_MAX;
2160	     level++)
2161		INIT_LIST_HEAD(&lists[level]);
2162
2163	pagevec_init(&pvec);
2164
2165	while (pagevec_lookup_tag(&pvec, btcache, &index,
2166					PAGECACHE_TAG_DIRTY)) {
2167		for (i = 0; i < pagevec_count(&pvec); i++) {
2168			bh = head = page_buffers(pvec.pages[i]);
2169			do {
2170				if (buffer_dirty(bh))
2171					nilfs_btree_add_dirty_buffer(btree,
2172								     lists, bh);
2173			} while ((bh = bh->b_this_page) != head);
2174		}
2175		pagevec_release(&pvec);
2176		cond_resched();
2177	}
2178
2179	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2180	     level < NILFS_BTREE_LEVEL_MAX;
2181	     level++)
2182		list_splice_tail(&lists[level], listp);
2183}
2184
2185static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2186				struct nilfs_btree_path *path,
2187				int level,
2188				struct buffer_head **bh,
2189				sector_t blocknr,
2190				union nilfs_binfo *binfo)
2191{
2192	struct nilfs_btree_node *parent;
2193	__u64 key;
2194	__u64 ptr;
2195	int ncmax, ret;
2196
2197	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2198	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2199				       ncmax);
2200	if (buffer_nilfs_node(*bh)) {
2201		path[level].bp_ctxt.oldkey = ptr;
2202		path[level].bp_ctxt.newkey = blocknr;
2203		path[level].bp_ctxt.bh = *bh;
2204		ret = nilfs_btnode_prepare_change_key(
2205			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2206			&path[level].bp_ctxt);
2207		if (ret < 0)
2208			return ret;
2209		nilfs_btnode_commit_change_key(
2210			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2211			&path[level].bp_ctxt);
2212		*bh = path[level].bp_ctxt.bh;
2213	}
2214
2215	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2216				 ncmax);
2217
2218	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2219	/* on-disk format */
2220	binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2221	binfo->bi_dat.bi_level = level;
2222
2223	return 0;
2224}
2225
2226static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2227				struct nilfs_btree_path *path,
2228				int level,
2229				struct buffer_head **bh,
2230				sector_t blocknr,
2231				union nilfs_binfo *binfo)
2232{
2233	struct nilfs_btree_node *parent;
2234	struct inode *dat = nilfs_bmap_get_dat(btree);
2235	__u64 key;
2236	__u64 ptr;
2237	union nilfs_bmap_ptr_req req;
2238	int ncmax, ret;
2239
2240	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2241	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2242				       ncmax);
2243	req.bpr_ptr = ptr;
2244	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2245	if (ret < 0)
2246		return ret;
2247	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2248
2249	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2250	/* on-disk format */
2251	binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2252	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2253
2254	return 0;
2255}
2256
2257static int nilfs_btree_assign(struct nilfs_bmap *btree,
2258			      struct buffer_head **bh,
2259			      sector_t blocknr,
2260			      union nilfs_binfo *binfo)
2261{
2262	struct nilfs_btree_path *path;
2263	struct nilfs_btree_node *node;
2264	__u64 key;
2265	int level, ret;
2266
2267	path = nilfs_btree_alloc_path();
2268	if (path == NULL)
2269		return -ENOMEM;
2270
2271	if (buffer_nilfs_node(*bh)) {
2272		node = (struct nilfs_btree_node *)(*bh)->b_data;
2273		key = nilfs_btree_node_get_key(node, 0);
2274		level = nilfs_btree_node_get_level(node);
2275	} else {
2276		key = nilfs_bmap_data_get_key(btree, *bh);
2277		level = NILFS_BTREE_LEVEL_DATA;
2278	}
2279
2280	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2281	if (ret < 0) {
2282		WARN_ON(ret == -ENOENT);
2283		goto out;
2284	}
2285
2286	ret = NILFS_BMAP_USE_VBN(btree) ?
2287		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2288		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2289
2290 out:
2291	nilfs_btree_free_path(path);
2292
2293	return ret;
2294}
2295
2296static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2297				 struct buffer_head **bh,
2298				 sector_t blocknr,
2299				 union nilfs_binfo *binfo)
2300{
2301	struct nilfs_btree_node *node;
2302	__u64 key;
2303	int ret;
2304
2305	ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2306			     blocknr);
2307	if (ret < 0)
2308		return ret;
2309
2310	if (buffer_nilfs_node(*bh)) {
2311		node = (struct nilfs_btree_node *)(*bh)->b_data;
2312		key = nilfs_btree_node_get_key(node, 0);
2313	} else
2314		key = nilfs_bmap_data_get_key(btree, *bh);
2315
2316	/* on-disk format */
2317	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2318	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2319
2320	return 0;
2321}
2322
2323static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2324{
2325	struct buffer_head *bh;
2326	struct nilfs_btree_path *path;
2327	__u64 ptr;
2328	int ret;
2329
2330	path = nilfs_btree_alloc_path();
2331	if (path == NULL)
2332		return -ENOMEM;
2333
2334	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2335	if (ret < 0) {
2336		WARN_ON(ret == -ENOENT);
2337		goto out;
2338	}
2339	ret = nilfs_btree_get_block(btree, ptr, &bh);
2340	if (ret < 0) {
2341		WARN_ON(ret == -ENOENT);
2342		goto out;
2343	}
2344
2345	if (!buffer_dirty(bh))
2346		mark_buffer_dirty(bh);
2347	brelse(bh);
2348	if (!nilfs_bmap_dirty(btree))
2349		nilfs_bmap_set_dirty(btree);
2350
2351 out:
2352	nilfs_btree_free_path(path);
2353	return ret;
2354}
2355
2356static const struct nilfs_bmap_operations nilfs_btree_ops = {
2357	.bop_lookup		=	nilfs_btree_lookup,
2358	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2359	.bop_insert		=	nilfs_btree_insert,
2360	.bop_delete		=	nilfs_btree_delete,
2361	.bop_clear		=	NULL,
2362
2363	.bop_propagate		=	nilfs_btree_propagate,
2364
2365	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2366
2367	.bop_assign		=	nilfs_btree_assign,
2368	.bop_mark		=	nilfs_btree_mark,
2369
2370	.bop_seek_key		=	nilfs_btree_seek_key,
2371	.bop_last_key		=	nilfs_btree_last_key,
2372
2373	.bop_check_insert	=	NULL,
2374	.bop_check_delete	=	nilfs_btree_check_delete,
2375	.bop_gather_data	=	nilfs_btree_gather_data,
2376};
2377
2378static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2379	.bop_lookup		=	NULL,
2380	.bop_lookup_contig	=	NULL,
2381	.bop_insert		=	NULL,
2382	.bop_delete		=	NULL,
2383	.bop_clear		=	NULL,
2384
2385	.bop_propagate		=	nilfs_btree_propagate_gc,
2386
2387	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2388
2389	.bop_assign		=	nilfs_btree_assign_gc,
2390	.bop_mark		=	NULL,
2391
2392	.bop_seek_key		=	NULL,
2393	.bop_last_key		=	NULL,
2394
2395	.bop_check_insert	=	NULL,
2396	.bop_check_delete	=	NULL,
2397	.bop_gather_data	=	NULL,
2398};
2399
2400static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2401{
2402	bmap->b_ops = &nilfs_btree_ops;
2403	bmap->b_nchildren_per_block =
2404		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2405}
2406
2407int nilfs_btree_init(struct nilfs_bmap *bmap)
2408{
2409	int ret = 0;
2410
2411	__nilfs_btree_init(bmap);
2412
2413	if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
 
2414		ret = -EIO;
2415	else
2416		ret = nilfs_attach_btree_node_cache(
2417			&NILFS_BMAP_I(bmap)->vfs_inode);
2418
2419	return ret;
2420}
2421
2422void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2423{
2424	bmap->b_ops = &nilfs_btree_ops_gc;
2425	bmap->b_nchildren_per_block =
2426		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2427}
v4.6
 
   1/*
   2 * btree.c - NILFS B-tree.
   3 *
   4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  19 *
  20 * Written by Koji Sato <koji@osrg.net>.
  21 */
  22
  23#include <linux/slab.h>
  24#include <linux/string.h>
  25#include <linux/errno.h>
  26#include <linux/pagevec.h>
  27#include "nilfs.h"
  28#include "page.h"
  29#include "btnode.h"
  30#include "btree.h"
  31#include "alloc.h"
  32#include "dat.h"
  33
  34static void __nilfs_btree_init(struct nilfs_bmap *bmap);
  35
  36static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
  37{
  38	struct nilfs_btree_path *path;
  39	int level = NILFS_BTREE_LEVEL_DATA;
  40
  41	path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
  42	if (path == NULL)
  43		goto out;
  44
  45	for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
  46		path[level].bp_bh = NULL;
  47		path[level].bp_sib_bh = NULL;
  48		path[level].bp_index = 0;
  49		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  50		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  51		path[level].bp_op = NULL;
  52	}
  53
  54out:
  55	return path;
  56}
  57
  58static void nilfs_btree_free_path(struct nilfs_btree_path *path)
  59{
  60	int level = NILFS_BTREE_LEVEL_DATA;
  61
  62	for (; level < NILFS_BTREE_LEVEL_MAX; level++)
  63		brelse(path[level].bp_bh);
  64
  65	kmem_cache_free(nilfs_btree_path_cache, path);
  66}
  67
  68/*
  69 * B-tree node operations
  70 */
  71static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
  72				     __u64 ptr, struct buffer_head **bhp)
  73{
  74	struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
 
  75	struct buffer_head *bh;
  76
  77	bh = nilfs_btnode_create_block(btnc, ptr);
  78	if (!bh)
  79		return -ENOMEM;
  80
  81	set_buffer_nilfs_volatile(bh);
  82	*bhp = bh;
  83	return 0;
  84}
  85
  86static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
  87{
  88	return node->bn_flags;
  89}
  90
  91static void
  92nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
  93{
  94	node->bn_flags = flags;
  95}
  96
  97static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
  98{
  99	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
 100}
 101
 102static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
 103{
 104	return node->bn_level;
 105}
 106
 107static void
 108nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
 109{
 110	node->bn_level = level;
 111}
 112
 113static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
 114{
 115	return le16_to_cpu(node->bn_nchildren);
 116}
 117
 118static void
 119nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
 120{
 121	node->bn_nchildren = cpu_to_le16(nchildren);
 122}
 123
 124static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
 125{
 126	return 1 << btree->b_inode->i_blkbits;
 127}
 128
 129static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
 130{
 131	return btree->b_nchildren_per_block;
 132}
 133
 134static __le64 *
 135nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
 136{
 137	return (__le64 *)((char *)(node + 1) +
 138			  (nilfs_btree_node_root(node) ?
 139			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
 140}
 141
 142static __le64 *
 143nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
 144{
 145	return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
 146}
 147
 148static __u64
 149nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
 150{
 151	return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
 152}
 153
 154static void
 155nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
 156{
 157	*(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
 158}
 159
 160static __u64
 161nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
 162			 int ncmax)
 163{
 164	return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
 165}
 166
 167static void
 168nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
 169			 int ncmax)
 170{
 171	*(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
 172}
 173
 174static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
 175				  int level, int nchildren, int ncmax,
 176				  const __u64 *keys, const __u64 *ptrs)
 177{
 178	__le64 *dkeys;
 179	__le64 *dptrs;
 180	int i;
 181
 182	nilfs_btree_node_set_flags(node, flags);
 183	nilfs_btree_node_set_level(node, level);
 184	nilfs_btree_node_set_nchildren(node, nchildren);
 185
 186	dkeys = nilfs_btree_node_dkeys(node);
 187	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 188	for (i = 0; i < nchildren; i++) {
 189		dkeys[i] = cpu_to_le64(keys[i]);
 190		dptrs[i] = cpu_to_le64(ptrs[i]);
 191	}
 192}
 193
 194/* Assume the buffer heads corresponding to left and right are locked. */
 195static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
 196				       struct nilfs_btree_node *right,
 197				       int n, int lncmax, int rncmax)
 198{
 199	__le64 *ldkeys, *rdkeys;
 200	__le64 *ldptrs, *rdptrs;
 201	int lnchildren, rnchildren;
 202
 203	ldkeys = nilfs_btree_node_dkeys(left);
 204	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 205	lnchildren = nilfs_btree_node_get_nchildren(left);
 206
 207	rdkeys = nilfs_btree_node_dkeys(right);
 208	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 209	rnchildren = nilfs_btree_node_get_nchildren(right);
 210
 211	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
 212	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
 213	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
 214	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
 215
 216	lnchildren += n;
 217	rnchildren -= n;
 218	nilfs_btree_node_set_nchildren(left, lnchildren);
 219	nilfs_btree_node_set_nchildren(right, rnchildren);
 220}
 221
 222/* Assume that the buffer heads corresponding to left and right are locked. */
 223static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
 224					struct nilfs_btree_node *right,
 225					int n, int lncmax, int rncmax)
 226{
 227	__le64 *ldkeys, *rdkeys;
 228	__le64 *ldptrs, *rdptrs;
 229	int lnchildren, rnchildren;
 230
 231	ldkeys = nilfs_btree_node_dkeys(left);
 232	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 233	lnchildren = nilfs_btree_node_get_nchildren(left);
 234
 235	rdkeys = nilfs_btree_node_dkeys(right);
 236	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 237	rnchildren = nilfs_btree_node_get_nchildren(right);
 238
 239	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
 240	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
 241	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
 242	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
 243
 244	lnchildren -= n;
 245	rnchildren += n;
 246	nilfs_btree_node_set_nchildren(left, lnchildren);
 247	nilfs_btree_node_set_nchildren(right, rnchildren);
 248}
 249
 250/* Assume that the buffer head corresponding to node is locked. */
 251static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
 252				    __u64 key, __u64 ptr, int ncmax)
 253{
 254	__le64 *dkeys;
 255	__le64 *dptrs;
 256	int nchildren;
 257
 258	dkeys = nilfs_btree_node_dkeys(node);
 259	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 260	nchildren = nilfs_btree_node_get_nchildren(node);
 261	if (index < nchildren) {
 262		memmove(dkeys + index + 1, dkeys + index,
 263			(nchildren - index) * sizeof(*dkeys));
 264		memmove(dptrs + index + 1, dptrs + index,
 265			(nchildren - index) * sizeof(*dptrs));
 266	}
 267	dkeys[index] = cpu_to_le64(key);
 268	dptrs[index] = cpu_to_le64(ptr);
 269	nchildren++;
 270	nilfs_btree_node_set_nchildren(node, nchildren);
 271}
 272
 273/* Assume that the buffer head corresponding to node is locked. */
 274static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
 275				    __u64 *keyp, __u64 *ptrp, int ncmax)
 276{
 277	__u64 key;
 278	__u64 ptr;
 279	__le64 *dkeys;
 280	__le64 *dptrs;
 281	int nchildren;
 282
 283	dkeys = nilfs_btree_node_dkeys(node);
 284	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 285	key = le64_to_cpu(dkeys[index]);
 286	ptr = le64_to_cpu(dptrs[index]);
 287	nchildren = nilfs_btree_node_get_nchildren(node);
 288	if (keyp != NULL)
 289		*keyp = key;
 290	if (ptrp != NULL)
 291		*ptrp = ptr;
 292
 293	if (index < nchildren - 1) {
 294		memmove(dkeys + index, dkeys + index + 1,
 295			(nchildren - index - 1) * sizeof(*dkeys));
 296		memmove(dptrs + index, dptrs + index + 1,
 297			(nchildren - index - 1) * sizeof(*dptrs));
 298	}
 299	nchildren--;
 300	nilfs_btree_node_set_nchildren(node, nchildren);
 301}
 302
 303static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
 304				   __u64 key, int *indexp)
 305{
 306	__u64 nkey;
 307	int index, low, high, s;
 308
 309	/* binary search */
 310	low = 0;
 311	high = nilfs_btree_node_get_nchildren(node) - 1;
 312	index = 0;
 313	s = 0;
 314	while (low <= high) {
 315		index = (low + high) / 2;
 316		nkey = nilfs_btree_node_get_key(node, index);
 317		if (nkey == key) {
 318			s = 0;
 319			goto out;
 320		} else if (nkey < key) {
 321			low = index + 1;
 322			s = -1;
 323		} else {
 324			high = index - 1;
 325			s = 1;
 326		}
 327	}
 328
 329	/* adjust index */
 330	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
 331		if (s > 0 && index > 0)
 332			index--;
 333	} else if (s < 0)
 334		index++;
 335
 336 out:
 337	*indexp = index;
 338
 339	return s == 0;
 340}
 341
 342/**
 343 * nilfs_btree_node_broken - verify consistency of btree node
 344 * @node: btree node block to be examined
 345 * @size: node size (in bytes)
 
 346 * @blocknr: block number
 347 *
 348 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 349 */
 350static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
 351				   size_t size, sector_t blocknr)
 
 352{
 353	int level, flags, nchildren;
 354	int ret = 0;
 355
 356	level = nilfs_btree_node_get_level(node);
 357	flags = nilfs_btree_node_get_flags(node);
 358	nchildren = nilfs_btree_node_get_nchildren(node);
 359
 360	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 361		     level >= NILFS_BTREE_LEVEL_MAX ||
 362		     (flags & NILFS_BTREE_NODE_ROOT) ||
 363		     nchildren < 0 ||
 364		     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
 365		printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
 366		       "level = %d, flags = 0x%x, nchildren = %d\n",
 367		       (unsigned long long)blocknr, level, flags, nchildren);
 
 368		ret = 1;
 369	}
 370	return ret;
 371}
 372
 373/**
 374 * nilfs_btree_root_broken - verify consistency of btree root node
 375 * @node: btree root node to be examined
 376 * @ino: inode number
 377 *
 378 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 379 */
 380static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
 381				   unsigned long ino)
 382{
 383	int level, flags, nchildren;
 384	int ret = 0;
 385
 386	level = nilfs_btree_node_get_level(node);
 387	flags = nilfs_btree_node_get_flags(node);
 388	nchildren = nilfs_btree_node_get_nchildren(node);
 389
 390	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 391		     level >= NILFS_BTREE_LEVEL_MAX ||
 392		     nchildren < 0 ||
 393		     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
 394		pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
 395			ino, level, flags, nchildren);
 
 396		ret = 1;
 397	}
 398	return ret;
 399}
 400
 401int nilfs_btree_broken_node_block(struct buffer_head *bh)
 402{
 
 403	int ret;
 404
 405	if (buffer_nilfs_checked(bh))
 406		return 0;
 407
 
 408	ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
 409				       bh->b_size, bh->b_blocknr);
 410	if (likely(!ret))
 411		set_buffer_nilfs_checked(bh);
 412	return ret;
 413}
 414
 415static struct nilfs_btree_node *
 416nilfs_btree_get_root(const struct nilfs_bmap *btree)
 417{
 418	return (struct nilfs_btree_node *)btree->b_u.u_data;
 419}
 420
 421static struct nilfs_btree_node *
 422nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
 423{
 424	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
 425}
 426
 427static struct nilfs_btree_node *
 428nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
 429{
 430	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
 431}
 432
 433static int nilfs_btree_height(const struct nilfs_bmap *btree)
 434{
 435	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
 436}
 437
 438static struct nilfs_btree_node *
 439nilfs_btree_get_node(const struct nilfs_bmap *btree,
 440		     const struct nilfs_btree_path *path,
 441		     int level, int *ncmaxp)
 442{
 443	struct nilfs_btree_node *node;
 444
 445	if (level == nilfs_btree_height(btree) - 1) {
 446		node = nilfs_btree_get_root(btree);
 447		*ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
 448	} else {
 449		node = nilfs_btree_get_nonroot_node(path, level);
 450		*ncmaxp = nilfs_btree_nchildren_per_block(btree);
 451	}
 452	return node;
 453}
 454
 455static int
 456nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
 457{
 458	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
 459		dump_stack();
 460		printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
 461		       nilfs_btree_node_get_level(node), level);
 
 
 462		return 1;
 463	}
 464	return 0;
 465}
 466
 467struct nilfs_btree_readahead_info {
 468	struct nilfs_btree_node *node;	/* parent node */
 469	int max_ra_blocks;		/* max nof blocks to read ahead */
 470	int index;			/* current index on the parent node */
 471	int ncmax;			/* nof children in the parent node */
 472};
 473
 474static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 475				   struct buffer_head **bhp,
 476				   const struct nilfs_btree_readahead_info *ra)
 477{
 478	struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
 
 479	struct buffer_head *bh, *ra_bh;
 480	sector_t submit_ptr = 0;
 481	int ret;
 482
 483	ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
 
 484	if (ret) {
 485		if (ret != -EEXIST)
 486			return ret;
 487		goto out_check;
 
 
 
 
 
 
 
 
 
 488	}
 489
 490	if (ra) {
 491		int i, n;
 492		__u64 ptr2;
 493
 494		/* read ahead sibling nodes */
 495		for (n = ra->max_ra_blocks, i = ra->index + 1;
 496		     n > 0 && i < ra->ncmax; n--, i++) {
 497			ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
 498
 499			ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
 500							&ra_bh, &submit_ptr);
 
 501			if (likely(!ret || ret == -EEXIST))
 502				brelse(ra_bh);
 503			else if (ret != -EBUSY)
 504				break;
 505			if (!buffer_locked(bh))
 506				goto out_no_wait;
 507		}
 508	}
 509
 510	wait_on_buffer(bh);
 511
 512 out_no_wait:
 513	if (!buffer_uptodate(bh)) {
 
 
 
 514		brelse(bh);
 515		return -EIO;
 516	}
 517
 518 out_check:
 519	if (nilfs_btree_broken_node_block(bh)) {
 520		clear_buffer_uptodate(bh);
 521		brelse(bh);
 522		return -EINVAL;
 523	}
 524
 525	*bhp = bh;
 526	return 0;
 527}
 528
 529static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 530				   struct buffer_head **bhp)
 531{
 532	return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
 533}
 534
 535static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
 536				 struct nilfs_btree_path *path,
 537				 __u64 key, __u64 *ptrp, int minlevel,
 538				 int readahead)
 539{
 540	struct nilfs_btree_node *node;
 541	struct nilfs_btree_readahead_info p, *ra;
 542	__u64 ptr;
 543	int level, index, found, ncmax, ret;
 544
 545	node = nilfs_btree_get_root(btree);
 546	level = nilfs_btree_node_get_level(node);
 547	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
 548		return -ENOENT;
 549
 550	found = nilfs_btree_node_lookup(node, key, &index);
 551	ptr = nilfs_btree_node_get_ptr(node, index,
 552				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 553	path[level].bp_bh = NULL;
 554	path[level].bp_index = index;
 555
 556	ncmax = nilfs_btree_nchildren_per_block(btree);
 557
 558	while (--level >= minlevel) {
 559		ra = NULL;
 560		if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
 561			p.node = nilfs_btree_get_node(btree, path, level + 1,
 562						      &p.ncmax);
 563			p.index = index;
 564			p.max_ra_blocks = 7;
 565			ra = &p;
 566		}
 567		ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
 568					      ra);
 569		if (ret < 0)
 570			return ret;
 571
 572		node = nilfs_btree_get_nonroot_node(path, level);
 573		if (nilfs_btree_bad_node(node, level))
 574			return -EINVAL;
 575		if (!found)
 576			found = nilfs_btree_node_lookup(node, key, &index);
 577		else
 578			index = 0;
 579		if (index < ncmax) {
 580			ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 581		} else {
 582			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
 583			/* insert */
 584			ptr = NILFS_BMAP_INVALID_PTR;
 585		}
 586		path[level].bp_index = index;
 587	}
 588	if (!found)
 589		return -ENOENT;
 590
 591	if (ptrp != NULL)
 592		*ptrp = ptr;
 593
 594	return 0;
 595}
 596
 597static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
 598				      struct nilfs_btree_path *path,
 599				      __u64 *keyp, __u64 *ptrp)
 600{
 601	struct nilfs_btree_node *node;
 602	__u64 ptr;
 603	int index, level, ncmax, ret;
 604
 605	node = nilfs_btree_get_root(btree);
 606	index = nilfs_btree_node_get_nchildren(node) - 1;
 607	if (index < 0)
 608		return -ENOENT;
 609	level = nilfs_btree_node_get_level(node);
 610	ptr = nilfs_btree_node_get_ptr(node, index,
 611				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 612	path[level].bp_bh = NULL;
 613	path[level].bp_index = index;
 614	ncmax = nilfs_btree_nchildren_per_block(btree);
 615
 616	for (level--; level > 0; level--) {
 617		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
 618		if (ret < 0)
 619			return ret;
 620		node = nilfs_btree_get_nonroot_node(path, level);
 621		if (nilfs_btree_bad_node(node, level))
 622			return -EINVAL;
 623		index = nilfs_btree_node_get_nchildren(node) - 1;
 624		ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 625		path[level].bp_index = index;
 626	}
 627
 628	if (keyp != NULL)
 629		*keyp = nilfs_btree_node_get_key(node, index);
 630	if (ptrp != NULL)
 631		*ptrp = ptr;
 632
 633	return 0;
 634}
 635
 636/**
 637 * nilfs_btree_get_next_key - get next valid key from btree path array
 638 * @btree: bmap struct of btree
 639 * @path: array of nilfs_btree_path struct
 640 * @minlevel: start level
 641 * @nextkey: place to store the next valid key
 642 *
 643 * Return Value: If a next key was found, 0 is returned. Otherwise,
 644 * -ENOENT is returned.
 645 */
 646static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
 647				    const struct nilfs_btree_path *path,
 648				    int minlevel, __u64 *nextkey)
 649{
 650	struct nilfs_btree_node *node;
 651	int maxlevel = nilfs_btree_height(btree) - 1;
 652	int index, next_adj, level;
 653
 654	/* Next index is already set to bp_index for leaf nodes. */
 655	next_adj = 0;
 656	for (level = minlevel; level <= maxlevel; level++) {
 657		if (level == maxlevel)
 658			node = nilfs_btree_get_root(btree);
 659		else
 660			node = nilfs_btree_get_nonroot_node(path, level);
 661
 662		index = path[level].bp_index + next_adj;
 663		if (index < nilfs_btree_node_get_nchildren(node)) {
 664			/* Next key is in this node */
 665			*nextkey = nilfs_btree_node_get_key(node, index);
 666			return 0;
 667		}
 668		/* For non-leaf nodes, next index is stored at bp_index + 1. */
 669		next_adj = 1;
 670	}
 671	return -ENOENT;
 672}
 673
 674static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
 675			      __u64 key, int level, __u64 *ptrp)
 676{
 677	struct nilfs_btree_path *path;
 678	int ret;
 679
 680	path = nilfs_btree_alloc_path();
 681	if (path == NULL)
 682		return -ENOMEM;
 683
 684	ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
 685
 686	nilfs_btree_free_path(path);
 687
 688	return ret;
 689}
 690
 691static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
 692				     __u64 key, __u64 *ptrp, unsigned maxblocks)
 
 693{
 694	struct nilfs_btree_path *path;
 695	struct nilfs_btree_node *node;
 696	struct inode *dat = NULL;
 697	__u64 ptr, ptr2;
 698	sector_t blocknr;
 699	int level = NILFS_BTREE_LEVEL_NODE_MIN;
 700	int ret, cnt, index, maxlevel, ncmax;
 701	struct nilfs_btree_readahead_info p;
 702
 703	path = nilfs_btree_alloc_path();
 704	if (path == NULL)
 705		return -ENOMEM;
 706
 707	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
 708	if (ret < 0)
 709		goto out;
 710
 711	if (NILFS_BMAP_USE_VBN(btree)) {
 712		dat = nilfs_bmap_get_dat(btree);
 713		ret = nilfs_dat_translate(dat, ptr, &blocknr);
 714		if (ret < 0)
 715			goto out;
 716		ptr = blocknr;
 717	}
 718	cnt = 1;
 719	if (cnt == maxblocks)
 720		goto end;
 721
 722	maxlevel = nilfs_btree_height(btree) - 1;
 723	node = nilfs_btree_get_node(btree, path, level, &ncmax);
 724	index = path[level].bp_index + 1;
 725	for (;;) {
 726		while (index < nilfs_btree_node_get_nchildren(node)) {
 727			if (nilfs_btree_node_get_key(node, index) !=
 728			    key + cnt)
 729				goto end;
 730			ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
 731			if (dat) {
 732				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
 733				if (ret < 0)
 734					goto out;
 735				ptr2 = blocknr;
 736			}
 737			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
 738				goto end;
 739			index++;
 740			continue;
 741		}
 742		if (level == maxlevel)
 743			break;
 744
 745		/* look-up right sibling node */
 746		p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
 747		p.index = path[level + 1].bp_index + 1;
 748		p.max_ra_blocks = 7;
 749		if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
 750		    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
 751			break;
 752		ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
 753		path[level + 1].bp_index = p.index;
 754
 755		brelse(path[level].bp_bh);
 756		path[level].bp_bh = NULL;
 757
 758		ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
 759					      &p);
 760		if (ret < 0)
 761			goto out;
 762		node = nilfs_btree_get_nonroot_node(path, level);
 763		ncmax = nilfs_btree_nchildren_per_block(btree);
 764		index = 0;
 765		path[level].bp_index = index;
 766	}
 767 end:
 768	*ptrp = ptr;
 769	ret = cnt;
 770 out:
 771	nilfs_btree_free_path(path);
 772	return ret;
 773}
 774
 775static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
 776				    struct nilfs_btree_path *path,
 777				    int level, __u64 key)
 778{
 779	if (level < nilfs_btree_height(btree) - 1) {
 780		do {
 781			nilfs_btree_node_set_key(
 782				nilfs_btree_get_nonroot_node(path, level),
 783				path[level].bp_index, key);
 784			if (!buffer_dirty(path[level].bp_bh))
 785				mark_buffer_dirty(path[level].bp_bh);
 786		} while ((path[level].bp_index == 0) &&
 787			 (++level < nilfs_btree_height(btree) - 1));
 788	}
 789
 790	/* root */
 791	if (level == nilfs_btree_height(btree) - 1) {
 792		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
 793					 path[level].bp_index, key);
 794	}
 795}
 796
 797static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
 798				  struct nilfs_btree_path *path,
 799				  int level, __u64 *keyp, __u64 *ptrp)
 800{
 801	struct nilfs_btree_node *node;
 802	int ncblk;
 803
 804	if (level < nilfs_btree_height(btree) - 1) {
 805		node = nilfs_btree_get_nonroot_node(path, level);
 806		ncblk = nilfs_btree_nchildren_per_block(btree);
 807		nilfs_btree_node_insert(node, path[level].bp_index,
 808					*keyp, *ptrp, ncblk);
 809		if (!buffer_dirty(path[level].bp_bh))
 810			mark_buffer_dirty(path[level].bp_bh);
 811
 812		if (path[level].bp_index == 0)
 813			nilfs_btree_promote_key(btree, path, level + 1,
 814						nilfs_btree_node_get_key(node,
 815									 0));
 816	} else {
 817		node = nilfs_btree_get_root(btree);
 818		nilfs_btree_node_insert(node, path[level].bp_index,
 819					*keyp, *ptrp,
 820					NILFS_BTREE_ROOT_NCHILDREN_MAX);
 821	}
 822}
 823
 824static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
 825				   struct nilfs_btree_path *path,
 826				   int level, __u64 *keyp, __u64 *ptrp)
 827{
 828	struct nilfs_btree_node *node, *left;
 829	int nchildren, lnchildren, n, move, ncblk;
 830
 831	node = nilfs_btree_get_nonroot_node(path, level);
 832	left = nilfs_btree_get_sib_node(path, level);
 833	nchildren = nilfs_btree_node_get_nchildren(node);
 834	lnchildren = nilfs_btree_node_get_nchildren(left);
 835	ncblk = nilfs_btree_nchildren_per_block(btree);
 836	move = 0;
 837
 838	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
 839	if (n > path[level].bp_index) {
 840		/* move insert point */
 841		n--;
 842		move = 1;
 843	}
 844
 845	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
 846
 847	if (!buffer_dirty(path[level].bp_bh))
 848		mark_buffer_dirty(path[level].bp_bh);
 849	if (!buffer_dirty(path[level].bp_sib_bh))
 850		mark_buffer_dirty(path[level].bp_sib_bh);
 851
 852	nilfs_btree_promote_key(btree, path, level + 1,
 853				nilfs_btree_node_get_key(node, 0));
 854
 855	if (move) {
 856		brelse(path[level].bp_bh);
 857		path[level].bp_bh = path[level].bp_sib_bh;
 858		path[level].bp_sib_bh = NULL;
 859		path[level].bp_index += lnchildren;
 860		path[level + 1].bp_index--;
 861	} else {
 862		brelse(path[level].bp_sib_bh);
 863		path[level].bp_sib_bh = NULL;
 864		path[level].bp_index -= n;
 865	}
 866
 867	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 868}
 869
 870static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
 871				    struct nilfs_btree_path *path,
 872				    int level, __u64 *keyp, __u64 *ptrp)
 873{
 874	struct nilfs_btree_node *node, *right;
 875	int nchildren, rnchildren, n, move, ncblk;
 876
 877	node = nilfs_btree_get_nonroot_node(path, level);
 878	right = nilfs_btree_get_sib_node(path, level);
 879	nchildren = nilfs_btree_node_get_nchildren(node);
 880	rnchildren = nilfs_btree_node_get_nchildren(right);
 881	ncblk = nilfs_btree_nchildren_per_block(btree);
 882	move = 0;
 883
 884	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
 885	if (n > nchildren - path[level].bp_index) {
 886		/* move insert point */
 887		n--;
 888		move = 1;
 889	}
 890
 891	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 892
 893	if (!buffer_dirty(path[level].bp_bh))
 894		mark_buffer_dirty(path[level].bp_bh);
 895	if (!buffer_dirty(path[level].bp_sib_bh))
 896		mark_buffer_dirty(path[level].bp_sib_bh);
 897
 898	path[level + 1].bp_index++;
 899	nilfs_btree_promote_key(btree, path, level + 1,
 900				nilfs_btree_node_get_key(right, 0));
 901	path[level + 1].bp_index--;
 902
 903	if (move) {
 904		brelse(path[level].bp_bh);
 905		path[level].bp_bh = path[level].bp_sib_bh;
 906		path[level].bp_sib_bh = NULL;
 907		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 908		path[level + 1].bp_index++;
 909	} else {
 910		brelse(path[level].bp_sib_bh);
 911		path[level].bp_sib_bh = NULL;
 912	}
 913
 914	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 915}
 916
 917static void nilfs_btree_split(struct nilfs_bmap *btree,
 918			      struct nilfs_btree_path *path,
 919			      int level, __u64 *keyp, __u64 *ptrp)
 920{
 921	struct nilfs_btree_node *node, *right;
 922	int nchildren, n, move, ncblk;
 923
 924	node = nilfs_btree_get_nonroot_node(path, level);
 925	right = nilfs_btree_get_sib_node(path, level);
 926	nchildren = nilfs_btree_node_get_nchildren(node);
 927	ncblk = nilfs_btree_nchildren_per_block(btree);
 928	move = 0;
 929
 930	n = (nchildren + 1) / 2;
 931	if (n > nchildren - path[level].bp_index) {
 932		n--;
 933		move = 1;
 934	}
 935
 936	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 937
 938	if (!buffer_dirty(path[level].bp_bh))
 939		mark_buffer_dirty(path[level].bp_bh);
 940	if (!buffer_dirty(path[level].bp_sib_bh))
 941		mark_buffer_dirty(path[level].bp_sib_bh);
 942
 943	if (move) {
 944		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 945		nilfs_btree_node_insert(right, path[level].bp_index,
 946					*keyp, *ptrp, ncblk);
 947
 948		*keyp = nilfs_btree_node_get_key(right, 0);
 949		*ptrp = path[level].bp_newreq.bpr_ptr;
 950
 951		brelse(path[level].bp_bh);
 952		path[level].bp_bh = path[level].bp_sib_bh;
 953		path[level].bp_sib_bh = NULL;
 954	} else {
 955		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 956
 957		*keyp = nilfs_btree_node_get_key(right, 0);
 958		*ptrp = path[level].bp_newreq.bpr_ptr;
 959
 960		brelse(path[level].bp_sib_bh);
 961		path[level].bp_sib_bh = NULL;
 962	}
 963
 964	path[level + 1].bp_index++;
 965}
 966
 967static void nilfs_btree_grow(struct nilfs_bmap *btree,
 968			     struct nilfs_btree_path *path,
 969			     int level, __u64 *keyp, __u64 *ptrp)
 970{
 971	struct nilfs_btree_node *root, *child;
 972	int n, ncblk;
 973
 974	root = nilfs_btree_get_root(btree);
 975	child = nilfs_btree_get_sib_node(path, level);
 976	ncblk = nilfs_btree_nchildren_per_block(btree);
 977
 978	n = nilfs_btree_node_get_nchildren(root);
 979
 980	nilfs_btree_node_move_right(root, child, n,
 981				    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
 982	nilfs_btree_node_set_level(root, level + 1);
 983
 984	if (!buffer_dirty(path[level].bp_sib_bh))
 985		mark_buffer_dirty(path[level].bp_sib_bh);
 986
 987	path[level].bp_bh = path[level].bp_sib_bh;
 988	path[level].bp_sib_bh = NULL;
 989
 990	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 991
 992	*keyp = nilfs_btree_node_get_key(child, 0);
 993	*ptrp = path[level].bp_newreq.bpr_ptr;
 994}
 995
 996static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
 997				   const struct nilfs_btree_path *path)
 998{
 999	struct nilfs_btree_node *node;
1000	int level, ncmax;
1001
1002	if (path == NULL)
1003		return NILFS_BMAP_INVALID_PTR;
1004
1005	/* left sibling */
1006	level = NILFS_BTREE_LEVEL_NODE_MIN;
1007	if (path[level].bp_index > 0) {
1008		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1009		return nilfs_btree_node_get_ptr(node,
1010						path[level].bp_index - 1,
1011						ncmax);
1012	}
1013
1014	/* parent */
1015	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1016	if (level <= nilfs_btree_height(btree) - 1) {
1017		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1018		return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1019						ncmax);
1020	}
1021
1022	return NILFS_BMAP_INVALID_PTR;
1023}
1024
1025static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1026				       const struct nilfs_btree_path *path,
1027				       __u64 key)
1028{
1029	__u64 ptr;
1030
1031	ptr = nilfs_bmap_find_target_seq(btree, key);
1032	if (ptr != NILFS_BMAP_INVALID_PTR)
1033		/* sequential access */
1034		return ptr;
1035	else {
1036		ptr = nilfs_btree_find_near(btree, path);
1037		if (ptr != NILFS_BMAP_INVALID_PTR)
1038			/* near */
1039			return ptr;
1040	}
1041	/* block group */
1042	return nilfs_bmap_find_target_in_group(btree);
1043}
1044
1045static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1046				      struct nilfs_btree_path *path,
1047				      int *levelp, __u64 key, __u64 ptr,
1048				      struct nilfs_bmap_stats *stats)
1049{
1050	struct buffer_head *bh;
1051	struct nilfs_btree_node *node, *parent, *sib;
1052	__u64 sibptr;
1053	int pindex, level, ncmax, ncblk, ret;
1054	struct inode *dat = NULL;
1055
1056	stats->bs_nblocks = 0;
1057	level = NILFS_BTREE_LEVEL_DATA;
1058
1059	/* allocate a new ptr for data block */
1060	if (NILFS_BMAP_USE_VBN(btree)) {
1061		path[level].bp_newreq.bpr_ptr =
1062			nilfs_btree_find_target_v(btree, path, key);
1063		dat = nilfs_bmap_get_dat(btree);
1064	}
1065
1066	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1067	if (ret < 0)
1068		goto err_out_data;
1069
1070	ncblk = nilfs_btree_nchildren_per_block(btree);
1071
1072	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1073	     level < nilfs_btree_height(btree) - 1;
1074	     level++) {
1075		node = nilfs_btree_get_nonroot_node(path, level);
1076		if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1077			path[level].bp_op = nilfs_btree_do_insert;
1078			stats->bs_nblocks++;
1079			goto out;
1080		}
1081
1082		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1083		pindex = path[level + 1].bp_index;
1084
1085		/* left sibling */
1086		if (pindex > 0) {
1087			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1088							  ncmax);
1089			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1090			if (ret < 0)
1091				goto err_out_child_node;
1092			sib = (struct nilfs_btree_node *)bh->b_data;
1093			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1094				path[level].bp_sib_bh = bh;
1095				path[level].bp_op = nilfs_btree_carry_left;
1096				stats->bs_nblocks++;
1097				goto out;
1098			} else {
1099				brelse(bh);
1100			}
1101		}
1102
1103		/* right sibling */
1104		if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1105			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1106							  ncmax);
1107			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1108			if (ret < 0)
1109				goto err_out_child_node;
1110			sib = (struct nilfs_btree_node *)bh->b_data;
1111			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1112				path[level].bp_sib_bh = bh;
1113				path[level].bp_op = nilfs_btree_carry_right;
1114				stats->bs_nblocks++;
1115				goto out;
1116			} else {
1117				brelse(bh);
1118			}
1119		}
1120
1121		/* split */
1122		path[level].bp_newreq.bpr_ptr =
1123			path[level - 1].bp_newreq.bpr_ptr + 1;
1124		ret = nilfs_bmap_prepare_alloc_ptr(btree,
1125						   &path[level].bp_newreq, dat);
1126		if (ret < 0)
1127			goto err_out_child_node;
1128		ret = nilfs_btree_get_new_block(btree,
1129						path[level].bp_newreq.bpr_ptr,
1130						&bh);
1131		if (ret < 0)
1132			goto err_out_curr_node;
1133
1134		stats->bs_nblocks++;
1135
1136		sib = (struct nilfs_btree_node *)bh->b_data;
1137		nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1138		path[level].bp_sib_bh = bh;
1139		path[level].bp_op = nilfs_btree_split;
1140	}
1141
1142	/* root */
1143	node = nilfs_btree_get_root(btree);
1144	if (nilfs_btree_node_get_nchildren(node) <
1145	    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1146		path[level].bp_op = nilfs_btree_do_insert;
1147		stats->bs_nblocks++;
1148		goto out;
1149	}
1150
1151	/* grow */
1152	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1153	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1154	if (ret < 0)
1155		goto err_out_child_node;
1156	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1157					&bh);
1158	if (ret < 0)
1159		goto err_out_curr_node;
1160
1161	nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1162			      0, level, 0, ncblk, NULL, NULL);
1163	path[level].bp_sib_bh = bh;
1164	path[level].bp_op = nilfs_btree_grow;
1165
1166	level++;
1167	path[level].bp_op = nilfs_btree_do_insert;
1168
1169	/* a newly-created node block and a data block are added */
1170	stats->bs_nblocks += 2;
1171
1172	/* success */
1173 out:
1174	*levelp = level;
1175	return ret;
1176
1177	/* error */
1178 err_out_curr_node:
1179	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1180 err_out_child_node:
1181	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1182		nilfs_btnode_delete(path[level].bp_sib_bh);
1183		nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1184
1185	}
1186
1187	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1188 err_out_data:
1189	*levelp = level;
1190	stats->bs_nblocks = 0;
1191	return ret;
1192}
1193
1194static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1195				      struct nilfs_btree_path *path,
1196				      int maxlevel, __u64 key, __u64 ptr)
1197{
1198	struct inode *dat = NULL;
1199	int level;
1200
1201	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1202	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1203	if (NILFS_BMAP_USE_VBN(btree)) {
1204		nilfs_bmap_set_target_v(btree, key, ptr);
1205		dat = nilfs_bmap_get_dat(btree);
1206	}
1207
1208	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1209		nilfs_bmap_commit_alloc_ptr(btree,
1210					    &path[level - 1].bp_newreq, dat);
1211		path[level].bp_op(btree, path, level, &key, &ptr);
1212	}
1213
1214	if (!nilfs_bmap_dirty(btree))
1215		nilfs_bmap_set_dirty(btree);
1216}
1217
1218static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1219{
1220	struct nilfs_btree_path *path;
1221	struct nilfs_bmap_stats stats;
1222	int level, ret;
1223
1224	path = nilfs_btree_alloc_path();
1225	if (path == NULL)
1226		return -ENOMEM;
1227
1228	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1229				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1230	if (ret != -ENOENT) {
1231		if (ret == 0)
1232			ret = -EEXIST;
1233		goto out;
1234	}
1235
1236	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1237	if (ret < 0)
1238		goto out;
1239	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1240	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1241
1242 out:
1243	nilfs_btree_free_path(path);
1244	return ret;
1245}
1246
1247static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1248				  struct nilfs_btree_path *path,
1249				  int level, __u64 *keyp, __u64 *ptrp)
1250{
1251	struct nilfs_btree_node *node;
1252	int ncblk;
1253
1254	if (level < nilfs_btree_height(btree) - 1) {
1255		node = nilfs_btree_get_nonroot_node(path, level);
1256		ncblk = nilfs_btree_nchildren_per_block(btree);
1257		nilfs_btree_node_delete(node, path[level].bp_index,
1258					keyp, ptrp, ncblk);
1259		if (!buffer_dirty(path[level].bp_bh))
1260			mark_buffer_dirty(path[level].bp_bh);
1261		if (path[level].bp_index == 0)
1262			nilfs_btree_promote_key(btree, path, level + 1,
1263				nilfs_btree_node_get_key(node, 0));
1264	} else {
1265		node = nilfs_btree_get_root(btree);
1266		nilfs_btree_node_delete(node, path[level].bp_index,
1267					keyp, ptrp,
1268					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1269	}
1270}
1271
1272static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1273				    struct nilfs_btree_path *path,
1274				    int level, __u64 *keyp, __u64 *ptrp)
1275{
1276	struct nilfs_btree_node *node, *left;
1277	int nchildren, lnchildren, n, ncblk;
1278
1279	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1280
1281	node = nilfs_btree_get_nonroot_node(path, level);
1282	left = nilfs_btree_get_sib_node(path, level);
1283	nchildren = nilfs_btree_node_get_nchildren(node);
1284	lnchildren = nilfs_btree_node_get_nchildren(left);
1285	ncblk = nilfs_btree_nchildren_per_block(btree);
1286
1287	n = (nchildren + lnchildren) / 2 - nchildren;
1288
1289	nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1290
1291	if (!buffer_dirty(path[level].bp_bh))
1292		mark_buffer_dirty(path[level].bp_bh);
1293	if (!buffer_dirty(path[level].bp_sib_bh))
1294		mark_buffer_dirty(path[level].bp_sib_bh);
1295
1296	nilfs_btree_promote_key(btree, path, level + 1,
1297				nilfs_btree_node_get_key(node, 0));
1298
1299	brelse(path[level].bp_sib_bh);
1300	path[level].bp_sib_bh = NULL;
1301	path[level].bp_index += n;
1302}
1303
1304static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1305				     struct nilfs_btree_path *path,
1306				     int level, __u64 *keyp, __u64 *ptrp)
1307{
1308	struct nilfs_btree_node *node, *right;
1309	int nchildren, rnchildren, n, ncblk;
1310
1311	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1312
1313	node = nilfs_btree_get_nonroot_node(path, level);
1314	right = nilfs_btree_get_sib_node(path, level);
1315	nchildren = nilfs_btree_node_get_nchildren(node);
1316	rnchildren = nilfs_btree_node_get_nchildren(right);
1317	ncblk = nilfs_btree_nchildren_per_block(btree);
1318
1319	n = (nchildren + rnchildren) / 2 - nchildren;
1320
1321	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1322
1323	if (!buffer_dirty(path[level].bp_bh))
1324		mark_buffer_dirty(path[level].bp_bh);
1325	if (!buffer_dirty(path[level].bp_sib_bh))
1326		mark_buffer_dirty(path[level].bp_sib_bh);
1327
1328	path[level + 1].bp_index++;
1329	nilfs_btree_promote_key(btree, path, level + 1,
1330				nilfs_btree_node_get_key(right, 0));
1331	path[level + 1].bp_index--;
1332
1333	brelse(path[level].bp_sib_bh);
1334	path[level].bp_sib_bh = NULL;
1335}
1336
1337static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1338				    struct nilfs_btree_path *path,
1339				    int level, __u64 *keyp, __u64 *ptrp)
1340{
1341	struct nilfs_btree_node *node, *left;
1342	int n, ncblk;
1343
1344	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1345
1346	node = nilfs_btree_get_nonroot_node(path, level);
1347	left = nilfs_btree_get_sib_node(path, level);
1348	ncblk = nilfs_btree_nchildren_per_block(btree);
1349
1350	n = nilfs_btree_node_get_nchildren(node);
1351
1352	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1353
1354	if (!buffer_dirty(path[level].bp_sib_bh))
1355		mark_buffer_dirty(path[level].bp_sib_bh);
1356
1357	nilfs_btnode_delete(path[level].bp_bh);
1358	path[level].bp_bh = path[level].bp_sib_bh;
1359	path[level].bp_sib_bh = NULL;
1360	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1361}
1362
1363static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1364				     struct nilfs_btree_path *path,
1365				     int level, __u64 *keyp, __u64 *ptrp)
1366{
1367	struct nilfs_btree_node *node, *right;
1368	int n, ncblk;
1369
1370	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1371
1372	node = nilfs_btree_get_nonroot_node(path, level);
1373	right = nilfs_btree_get_sib_node(path, level);
1374	ncblk = nilfs_btree_nchildren_per_block(btree);
1375
1376	n = nilfs_btree_node_get_nchildren(right);
1377
1378	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1379
1380	if (!buffer_dirty(path[level].bp_bh))
1381		mark_buffer_dirty(path[level].bp_bh);
1382
1383	nilfs_btnode_delete(path[level].bp_sib_bh);
1384	path[level].bp_sib_bh = NULL;
1385	path[level + 1].bp_index++;
1386}
1387
1388static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1389			       struct nilfs_btree_path *path,
1390			       int level, __u64 *keyp, __u64 *ptrp)
1391{
1392	struct nilfs_btree_node *root, *child;
1393	int n, ncblk;
1394
1395	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1396
1397	root = nilfs_btree_get_root(btree);
1398	child = nilfs_btree_get_nonroot_node(path, level);
1399	ncblk = nilfs_btree_nchildren_per_block(btree);
1400
1401	nilfs_btree_node_delete(root, 0, NULL, NULL,
1402				NILFS_BTREE_ROOT_NCHILDREN_MAX);
1403	nilfs_btree_node_set_level(root, level);
1404	n = nilfs_btree_node_get_nchildren(child);
1405	nilfs_btree_node_move_left(root, child, n,
1406				   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1407
1408	nilfs_btnode_delete(path[level].bp_bh);
1409	path[level].bp_bh = NULL;
1410}
1411
1412static void nilfs_btree_nop(struct nilfs_bmap *btree,
1413			    struct nilfs_btree_path *path,
1414			    int level, __u64 *keyp, __u64 *ptrp)
1415{
1416}
1417
1418static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1419				      struct nilfs_btree_path *path,
1420				      int *levelp,
1421				      struct nilfs_bmap_stats *stats,
1422				      struct inode *dat)
1423{
1424	struct buffer_head *bh;
1425	struct nilfs_btree_node *node, *parent, *sib;
1426	__u64 sibptr;
1427	int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1428
1429	ret = 0;
1430	stats->bs_nblocks = 0;
1431	ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1432	ncblk = nilfs_btree_nchildren_per_block(btree);
1433
1434	for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1435	     level < nilfs_btree_height(btree) - 1;
1436	     level++) {
1437		node = nilfs_btree_get_nonroot_node(path, level);
1438		path[level].bp_oldreq.bpr_ptr =
1439			nilfs_btree_node_get_ptr(node, dindex, ncblk);
1440		ret = nilfs_bmap_prepare_end_ptr(btree,
1441						 &path[level].bp_oldreq, dat);
1442		if (ret < 0)
1443			goto err_out_child_node;
1444
1445		if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1446			path[level].bp_op = nilfs_btree_do_delete;
1447			stats->bs_nblocks++;
1448			goto out;
1449		}
1450
1451		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1452		pindex = path[level + 1].bp_index;
1453		dindex = pindex;
1454
1455		if (pindex > 0) {
1456			/* left sibling */
1457			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1458							  ncmax);
1459			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1460			if (ret < 0)
1461				goto err_out_curr_node;
1462			sib = (struct nilfs_btree_node *)bh->b_data;
1463			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1464				path[level].bp_sib_bh = bh;
1465				path[level].bp_op = nilfs_btree_borrow_left;
1466				stats->bs_nblocks++;
1467				goto out;
1468			} else {
1469				path[level].bp_sib_bh = bh;
1470				path[level].bp_op = nilfs_btree_concat_left;
1471				stats->bs_nblocks++;
1472				/* continue; */
1473			}
1474		} else if (pindex <
1475			   nilfs_btree_node_get_nchildren(parent) - 1) {
1476			/* right sibling */
1477			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1478							  ncmax);
1479			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1480			if (ret < 0)
1481				goto err_out_curr_node;
1482			sib = (struct nilfs_btree_node *)bh->b_data;
1483			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1484				path[level].bp_sib_bh = bh;
1485				path[level].bp_op = nilfs_btree_borrow_right;
1486				stats->bs_nblocks++;
1487				goto out;
1488			} else {
1489				path[level].bp_sib_bh = bh;
1490				path[level].bp_op = nilfs_btree_concat_right;
1491				stats->bs_nblocks++;
1492				/*
1493				 * When merging right sibling node
1494				 * into the current node, pointer to
1495				 * the right sibling node must be
1496				 * terminated instead.  The adjustment
1497				 * below is required for that.
1498				 */
1499				dindex = pindex + 1;
1500				/* continue; */
1501			}
1502		} else {
1503			/* no siblings */
1504			/* the only child of the root node */
1505			WARN_ON(level != nilfs_btree_height(btree) - 2);
1506			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1507			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1508				path[level].bp_op = nilfs_btree_shrink;
1509				stats->bs_nblocks += 2;
1510				level++;
1511				path[level].bp_op = nilfs_btree_nop;
1512				goto shrink_root_child;
1513			} else {
1514				path[level].bp_op = nilfs_btree_do_delete;
1515				stats->bs_nblocks++;
1516				goto out;
1517			}
1518		}
1519	}
1520
1521	/* child of the root node is deleted */
1522	path[level].bp_op = nilfs_btree_do_delete;
1523	stats->bs_nblocks++;
1524
1525shrink_root_child:
1526	node = nilfs_btree_get_root(btree);
1527	path[level].bp_oldreq.bpr_ptr =
1528		nilfs_btree_node_get_ptr(node, dindex,
1529					 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1530
1531	ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1532	if (ret < 0)
1533		goto err_out_child_node;
1534
1535	/* success */
1536 out:
1537	*levelp = level;
1538	return ret;
1539
1540	/* error */
1541 err_out_curr_node:
1542	nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1543 err_out_child_node:
1544	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1545		brelse(path[level].bp_sib_bh);
1546		nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1547	}
1548	*levelp = level;
1549	stats->bs_nblocks = 0;
1550	return ret;
1551}
1552
1553static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1554				      struct nilfs_btree_path *path,
1555				      int maxlevel, struct inode *dat)
1556{
1557	int level;
1558
1559	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1560		nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1561		path[level].bp_op(btree, path, level, NULL, NULL);
1562	}
1563
1564	if (!nilfs_bmap_dirty(btree))
1565		nilfs_bmap_set_dirty(btree);
1566}
1567
1568static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1569
1570{
1571	struct nilfs_btree_path *path;
1572	struct nilfs_bmap_stats stats;
1573	struct inode *dat;
1574	int level, ret;
1575
1576	path = nilfs_btree_alloc_path();
1577	if (path == NULL)
1578		return -ENOMEM;
1579
1580	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1581				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1582	if (ret < 0)
1583		goto out;
1584
1585
1586	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1587
1588	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1589	if (ret < 0)
1590		goto out;
1591	nilfs_btree_commit_delete(btree, path, level, dat);
1592	nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1593
1594out:
1595	nilfs_btree_free_path(path);
1596	return ret;
1597}
1598
1599static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1600				__u64 *keyp)
1601{
1602	struct nilfs_btree_path *path;
1603	const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1604	int ret;
1605
1606	path = nilfs_btree_alloc_path();
1607	if (!path)
1608		return -ENOMEM;
1609
1610	ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1611	if (!ret)
1612		*keyp = start;
1613	else if (ret == -ENOENT)
1614		ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1615
1616	nilfs_btree_free_path(path);
1617	return ret;
1618}
1619
1620static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1621{
1622	struct nilfs_btree_path *path;
1623	int ret;
1624
1625	path = nilfs_btree_alloc_path();
1626	if (path == NULL)
1627		return -ENOMEM;
1628
1629	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1630
1631	nilfs_btree_free_path(path);
1632
1633	return ret;
1634}
1635
1636static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1637{
1638	struct buffer_head *bh;
1639	struct nilfs_btree_node *root, *node;
1640	__u64 maxkey, nextmaxkey;
1641	__u64 ptr;
1642	int nchildren, ret;
1643
1644	root = nilfs_btree_get_root(btree);
1645	switch (nilfs_btree_height(btree)) {
1646	case 2:
1647		bh = NULL;
1648		node = root;
1649		break;
1650	case 3:
1651		nchildren = nilfs_btree_node_get_nchildren(root);
1652		if (nchildren > 1)
1653			return 0;
1654		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1655					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1656		ret = nilfs_btree_get_block(btree, ptr, &bh);
1657		if (ret < 0)
1658			return ret;
1659		node = (struct nilfs_btree_node *)bh->b_data;
1660		break;
1661	default:
1662		return 0;
1663	}
1664
1665	nchildren = nilfs_btree_node_get_nchildren(node);
1666	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1667	nextmaxkey = (nchildren > 1) ?
1668		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1669	if (bh != NULL)
1670		brelse(bh);
1671
1672	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1673}
1674
1675static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1676				   __u64 *keys, __u64 *ptrs, int nitems)
1677{
1678	struct buffer_head *bh;
1679	struct nilfs_btree_node *node, *root;
1680	__le64 *dkeys;
1681	__le64 *dptrs;
1682	__u64 ptr;
1683	int nchildren, ncmax, i, ret;
1684
1685	root = nilfs_btree_get_root(btree);
1686	switch (nilfs_btree_height(btree)) {
1687	case 2:
1688		bh = NULL;
1689		node = root;
1690		ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1691		break;
1692	case 3:
1693		nchildren = nilfs_btree_node_get_nchildren(root);
1694		WARN_ON(nchildren > 1);
1695		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1696					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1697		ret = nilfs_btree_get_block(btree, ptr, &bh);
1698		if (ret < 0)
1699			return ret;
1700		node = (struct nilfs_btree_node *)bh->b_data;
1701		ncmax = nilfs_btree_nchildren_per_block(btree);
1702		break;
1703	default:
1704		node = NULL;
1705		return -EINVAL;
1706	}
1707
1708	nchildren = nilfs_btree_node_get_nchildren(node);
1709	if (nchildren < nitems)
1710		nitems = nchildren;
1711	dkeys = nilfs_btree_node_dkeys(node);
1712	dptrs = nilfs_btree_node_dptrs(node, ncmax);
1713	for (i = 0; i < nitems; i++) {
1714		keys[i] = le64_to_cpu(dkeys[i]);
1715		ptrs[i] = le64_to_cpu(dptrs[i]);
1716	}
1717
1718	if (bh != NULL)
1719		brelse(bh);
1720
1721	return nitems;
1722}
1723
1724static int
1725nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1726				       union nilfs_bmap_ptr_req *dreq,
1727				       union nilfs_bmap_ptr_req *nreq,
1728				       struct buffer_head **bhp,
1729				       struct nilfs_bmap_stats *stats)
1730{
1731	struct buffer_head *bh;
1732	struct inode *dat = NULL;
1733	int ret;
1734
1735	stats->bs_nblocks = 0;
1736
1737	/* for data */
1738	/* cannot find near ptr */
1739	if (NILFS_BMAP_USE_VBN(btree)) {
1740		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1741		dat = nilfs_bmap_get_dat(btree);
1742	}
1743
 
 
 
 
1744	ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1745	if (ret < 0)
1746		return ret;
1747
1748	*bhp = NULL;
1749	stats->bs_nblocks++;
1750	if (nreq != NULL) {
1751		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1752		ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1753		if (ret < 0)
1754			goto err_out_dreq;
1755
1756		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1757		if (ret < 0)
1758			goto err_out_nreq;
1759
1760		*bhp = bh;
1761		stats->bs_nblocks++;
1762	}
1763
1764	/* success */
1765	return 0;
1766
1767	/* error */
1768 err_out_nreq:
1769	nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1770 err_out_dreq:
1771	nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1772	stats->bs_nblocks = 0;
1773	return ret;
1774
1775}
1776
1777static void
1778nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1779				      __u64 key, __u64 ptr,
1780				      const __u64 *keys, const __u64 *ptrs,
1781				      int n,
1782				      union nilfs_bmap_ptr_req *dreq,
1783				      union nilfs_bmap_ptr_req *nreq,
1784				      struct buffer_head *bh)
1785{
1786	struct nilfs_btree_node *node;
1787	struct inode *dat;
1788	__u64 tmpptr;
1789	int ncblk;
1790
1791	/* free resources */
1792	if (btree->b_ops->bop_clear != NULL)
1793		btree->b_ops->bop_clear(btree);
1794
1795	/* ptr must be a pointer to a buffer head. */
1796	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1797
1798	/* convert and insert */
1799	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1800	__nilfs_btree_init(btree);
1801	if (nreq != NULL) {
1802		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1803		nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1804
1805		/* create child node at level 1 */
1806		node = (struct nilfs_btree_node *)bh->b_data;
1807		ncblk = nilfs_btree_nchildren_per_block(btree);
1808		nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1809		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1810		if (!buffer_dirty(bh))
1811			mark_buffer_dirty(bh);
1812		if (!nilfs_bmap_dirty(btree))
1813			nilfs_bmap_set_dirty(btree);
1814
1815		brelse(bh);
1816
1817		/* create root node at level 2 */
1818		node = nilfs_btree_get_root(btree);
1819		tmpptr = nreq->bpr_ptr;
1820		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1821				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1822				      &keys[0], &tmpptr);
1823	} else {
1824		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1825
1826		/* create root node at level 1 */
1827		node = nilfs_btree_get_root(btree);
1828		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1829				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1830				      keys, ptrs);
1831		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1832					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1833		if (!nilfs_bmap_dirty(btree))
1834			nilfs_bmap_set_dirty(btree);
1835	}
1836
1837	if (NILFS_BMAP_USE_VBN(btree))
1838		nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1839}
1840
1841/**
1842 * nilfs_btree_convert_and_insert -
1843 * @bmap:
1844 * @key:
1845 * @ptr:
1846 * @keys:
1847 * @ptrs:
1848 * @n:
1849 */
1850int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1851				   __u64 key, __u64 ptr,
1852				   const __u64 *keys, const __u64 *ptrs, int n)
1853{
1854	struct buffer_head *bh = NULL;
1855	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1856	struct nilfs_bmap_stats stats;
1857	int ret;
1858
1859	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1860		di = &dreq;
1861		ni = NULL;
1862	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1863			   1 << btree->b_inode->i_blkbits)) {
1864		di = &dreq;
1865		ni = &nreq;
1866	} else {
1867		di = NULL;
1868		ni = NULL;
1869		BUG();
1870	}
1871
1872	ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1873						     &stats);
1874	if (ret < 0)
1875		return ret;
1876	nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1877					      di, ni, bh);
1878	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1879	return 0;
1880}
1881
1882static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1883				   struct nilfs_btree_path *path,
1884				   int level,
1885				   struct buffer_head *bh)
1886{
1887	while ((++level < nilfs_btree_height(btree) - 1) &&
1888	       !buffer_dirty(path[level].bp_bh))
1889		mark_buffer_dirty(path[level].bp_bh);
1890
1891	return 0;
1892}
1893
1894static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1895					struct nilfs_btree_path *path,
1896					int level, struct inode *dat)
1897{
1898	struct nilfs_btree_node *parent;
1899	int ncmax, ret;
1900
1901	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1902	path[level].bp_oldreq.bpr_ptr =
1903		nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1904					 ncmax);
1905	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1906	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1907				       &path[level].bp_newreq.bpr_req);
1908	if (ret < 0)
1909		return ret;
1910
1911	if (buffer_nilfs_node(path[level].bp_bh)) {
1912		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1913		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1914		path[level].bp_ctxt.bh = path[level].bp_bh;
1915		ret = nilfs_btnode_prepare_change_key(
1916			&NILFS_BMAP_I(btree)->i_btnode_cache,
1917			&path[level].bp_ctxt);
1918		if (ret < 0) {
1919			nilfs_dat_abort_update(dat,
1920					       &path[level].bp_oldreq.bpr_req,
1921					       &path[level].bp_newreq.bpr_req);
1922			return ret;
1923		}
1924	}
1925
1926	return 0;
1927}
1928
1929static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1930					struct nilfs_btree_path *path,
1931					int level, struct inode *dat)
1932{
1933	struct nilfs_btree_node *parent;
1934	int ncmax;
1935
1936	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1937				&path[level].bp_newreq.bpr_req,
1938				btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1939
1940	if (buffer_nilfs_node(path[level].bp_bh)) {
1941		nilfs_btnode_commit_change_key(
1942			&NILFS_BMAP_I(btree)->i_btnode_cache,
1943			&path[level].bp_ctxt);
1944		path[level].bp_bh = path[level].bp_ctxt.bh;
1945	}
1946	set_buffer_nilfs_volatile(path[level].bp_bh);
1947
1948	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1949	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1950				 path[level].bp_newreq.bpr_ptr, ncmax);
1951}
1952
1953static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1954				       struct nilfs_btree_path *path,
1955				       int level, struct inode *dat)
1956{
1957	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1958			       &path[level].bp_newreq.bpr_req);
1959	if (buffer_nilfs_node(path[level].bp_bh))
1960		nilfs_btnode_abort_change_key(
1961			&NILFS_BMAP_I(btree)->i_btnode_cache,
1962			&path[level].bp_ctxt);
1963}
1964
1965static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1966					   struct nilfs_btree_path *path,
1967					   int minlevel, int *maxlevelp,
1968					   struct inode *dat)
1969{
1970	int level, ret;
1971
1972	level = minlevel;
1973	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1974		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1975		if (ret < 0)
1976			return ret;
1977	}
1978	while ((++level < nilfs_btree_height(btree) - 1) &&
1979	       !buffer_dirty(path[level].bp_bh)) {
1980
1981		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1982		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1983		if (ret < 0)
1984			goto out;
1985	}
1986
1987	/* success */
1988	*maxlevelp = level - 1;
1989	return 0;
1990
1991	/* error */
1992 out:
1993	while (--level > minlevel)
1994		nilfs_btree_abort_update_v(btree, path, level, dat);
1995	if (!buffer_nilfs_volatile(path[level].bp_bh))
1996		nilfs_btree_abort_update_v(btree, path, level, dat);
1997	return ret;
1998}
1999
2000static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2001					   struct nilfs_btree_path *path,
2002					   int minlevel, int maxlevel,
2003					   struct buffer_head *bh,
2004					   struct inode *dat)
2005{
2006	int level;
2007
2008	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2009		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2010
2011	for (level = minlevel + 1; level <= maxlevel; level++)
2012		nilfs_btree_commit_update_v(btree, path, level, dat);
2013}
2014
2015static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2016				   struct nilfs_btree_path *path,
2017				   int level, struct buffer_head *bh)
2018{
2019	int maxlevel = 0, ret;
2020	struct nilfs_btree_node *parent;
2021	struct inode *dat = nilfs_bmap_get_dat(btree);
2022	__u64 ptr;
2023	int ncmax;
2024
2025	get_bh(bh);
2026	path[level].bp_bh = bh;
2027	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2028					      dat);
2029	if (ret < 0)
2030		goto out;
2031
2032	if (buffer_nilfs_volatile(path[level].bp_bh)) {
2033		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2034		ptr = nilfs_btree_node_get_ptr(parent,
2035					       path[level + 1].bp_index,
2036					       ncmax);
2037		ret = nilfs_dat_mark_dirty(dat, ptr);
2038		if (ret < 0)
2039			goto out;
2040	}
2041
2042	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2043
2044 out:
2045	brelse(path[level].bp_bh);
2046	path[level].bp_bh = NULL;
2047	return ret;
2048}
2049
2050static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2051				 struct buffer_head *bh)
2052{
2053	struct nilfs_btree_path *path;
2054	struct nilfs_btree_node *node;
2055	__u64 key;
2056	int level, ret;
2057
2058	WARN_ON(!buffer_dirty(bh));
2059
2060	path = nilfs_btree_alloc_path();
2061	if (path == NULL)
2062		return -ENOMEM;
2063
2064	if (buffer_nilfs_node(bh)) {
2065		node = (struct nilfs_btree_node *)bh->b_data;
2066		key = nilfs_btree_node_get_key(node, 0);
2067		level = nilfs_btree_node_get_level(node);
2068	} else {
2069		key = nilfs_bmap_data_get_key(btree, bh);
2070		level = NILFS_BTREE_LEVEL_DATA;
2071	}
2072
2073	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2074	if (ret < 0) {
2075		if (unlikely(ret == -ENOENT))
2076			printk(KERN_CRIT "%s: key = %llu, level == %d\n",
2077			       __func__, (unsigned long long)key, level);
 
 
2078		goto out;
2079	}
2080
2081	ret = NILFS_BMAP_USE_VBN(btree) ?
2082		nilfs_btree_propagate_v(btree, path, level, bh) :
2083		nilfs_btree_propagate_p(btree, path, level, bh);
2084
2085 out:
2086	nilfs_btree_free_path(path);
2087
2088	return ret;
2089}
2090
2091static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2092				    struct buffer_head *bh)
2093{
2094	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2095}
2096
2097static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2098					 struct list_head *lists,
2099					 struct buffer_head *bh)
2100{
2101	struct list_head *head;
2102	struct buffer_head *cbh;
2103	struct nilfs_btree_node *node, *cnode;
2104	__u64 key, ckey;
2105	int level;
2106
2107	get_bh(bh);
2108	node = (struct nilfs_btree_node *)bh->b_data;
2109	key = nilfs_btree_node_get_key(node, 0);
2110	level = nilfs_btree_node_get_level(node);
2111	if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2112	    level >= NILFS_BTREE_LEVEL_MAX) {
2113		dump_stack();
2114		printk(KERN_WARNING
2115		       "%s: invalid btree level: %d (key=%llu, ino=%lu, "
2116		       "blocknr=%llu)\n",
2117		       __func__, level, (unsigned long long)key,
2118		       NILFS_BMAP_I(btree)->vfs_inode.i_ino,
2119		       (unsigned long long)bh->b_blocknr);
2120		return;
2121	}
2122
2123	list_for_each(head, &lists[level]) {
2124		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2125		cnode = (struct nilfs_btree_node *)cbh->b_data;
2126		ckey = nilfs_btree_node_get_key(cnode, 0);
2127		if (key < ckey)
2128			break;
2129	}
2130	list_add_tail(&bh->b_assoc_buffers, head);
2131}
2132
2133static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2134					     struct list_head *listp)
2135{
2136	struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
 
2137	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2138	struct pagevec pvec;
2139	struct buffer_head *bh, *head;
2140	pgoff_t index = 0;
2141	int level, i;
2142
2143	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2144	     level < NILFS_BTREE_LEVEL_MAX;
2145	     level++)
2146		INIT_LIST_HEAD(&lists[level]);
2147
2148	pagevec_init(&pvec, 0);
2149
2150	while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2151				  PAGEVEC_SIZE)) {
2152		for (i = 0; i < pagevec_count(&pvec); i++) {
2153			bh = head = page_buffers(pvec.pages[i]);
2154			do {
2155				if (buffer_dirty(bh))
2156					nilfs_btree_add_dirty_buffer(btree,
2157								     lists, bh);
2158			} while ((bh = bh->b_this_page) != head);
2159		}
2160		pagevec_release(&pvec);
2161		cond_resched();
2162	}
2163
2164	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2165	     level < NILFS_BTREE_LEVEL_MAX;
2166	     level++)
2167		list_splice_tail(&lists[level], listp);
2168}
2169
2170static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2171				struct nilfs_btree_path *path,
2172				int level,
2173				struct buffer_head **bh,
2174				sector_t blocknr,
2175				union nilfs_binfo *binfo)
2176{
2177	struct nilfs_btree_node *parent;
2178	__u64 key;
2179	__u64 ptr;
2180	int ncmax, ret;
2181
2182	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2183	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2184				       ncmax);
2185	if (buffer_nilfs_node(*bh)) {
2186		path[level].bp_ctxt.oldkey = ptr;
2187		path[level].bp_ctxt.newkey = blocknr;
2188		path[level].bp_ctxt.bh = *bh;
2189		ret = nilfs_btnode_prepare_change_key(
2190			&NILFS_BMAP_I(btree)->i_btnode_cache,
2191			&path[level].bp_ctxt);
2192		if (ret < 0)
2193			return ret;
2194		nilfs_btnode_commit_change_key(
2195			&NILFS_BMAP_I(btree)->i_btnode_cache,
2196			&path[level].bp_ctxt);
2197		*bh = path[level].bp_ctxt.bh;
2198	}
2199
2200	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2201				 ncmax);
2202
2203	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2204	/* on-disk format */
2205	binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2206	binfo->bi_dat.bi_level = level;
2207
2208	return 0;
2209}
2210
2211static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2212				struct nilfs_btree_path *path,
2213				int level,
2214				struct buffer_head **bh,
2215				sector_t blocknr,
2216				union nilfs_binfo *binfo)
2217{
2218	struct nilfs_btree_node *parent;
2219	struct inode *dat = nilfs_bmap_get_dat(btree);
2220	__u64 key;
2221	__u64 ptr;
2222	union nilfs_bmap_ptr_req req;
2223	int ncmax, ret;
2224
2225	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2226	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2227				       ncmax);
2228	req.bpr_ptr = ptr;
2229	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2230	if (ret < 0)
2231		return ret;
2232	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2233
2234	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2235	/* on-disk format */
2236	binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2237	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2238
2239	return 0;
2240}
2241
2242static int nilfs_btree_assign(struct nilfs_bmap *btree,
2243			      struct buffer_head **bh,
2244			      sector_t blocknr,
2245			      union nilfs_binfo *binfo)
2246{
2247	struct nilfs_btree_path *path;
2248	struct nilfs_btree_node *node;
2249	__u64 key;
2250	int level, ret;
2251
2252	path = nilfs_btree_alloc_path();
2253	if (path == NULL)
2254		return -ENOMEM;
2255
2256	if (buffer_nilfs_node(*bh)) {
2257		node = (struct nilfs_btree_node *)(*bh)->b_data;
2258		key = nilfs_btree_node_get_key(node, 0);
2259		level = nilfs_btree_node_get_level(node);
2260	} else {
2261		key = nilfs_bmap_data_get_key(btree, *bh);
2262		level = NILFS_BTREE_LEVEL_DATA;
2263	}
2264
2265	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2266	if (ret < 0) {
2267		WARN_ON(ret == -ENOENT);
2268		goto out;
2269	}
2270
2271	ret = NILFS_BMAP_USE_VBN(btree) ?
2272		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2273		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2274
2275 out:
2276	nilfs_btree_free_path(path);
2277
2278	return ret;
2279}
2280
2281static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2282				 struct buffer_head **bh,
2283				 sector_t blocknr,
2284				 union nilfs_binfo *binfo)
2285{
2286	struct nilfs_btree_node *node;
2287	__u64 key;
2288	int ret;
2289
2290	ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2291			     blocknr);
2292	if (ret < 0)
2293		return ret;
2294
2295	if (buffer_nilfs_node(*bh)) {
2296		node = (struct nilfs_btree_node *)(*bh)->b_data;
2297		key = nilfs_btree_node_get_key(node, 0);
2298	} else
2299		key = nilfs_bmap_data_get_key(btree, *bh);
2300
2301	/* on-disk format */
2302	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2303	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2304
2305	return 0;
2306}
2307
2308static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2309{
2310	struct buffer_head *bh;
2311	struct nilfs_btree_path *path;
2312	__u64 ptr;
2313	int ret;
2314
2315	path = nilfs_btree_alloc_path();
2316	if (path == NULL)
2317		return -ENOMEM;
2318
2319	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2320	if (ret < 0) {
2321		WARN_ON(ret == -ENOENT);
2322		goto out;
2323	}
2324	ret = nilfs_btree_get_block(btree, ptr, &bh);
2325	if (ret < 0) {
2326		WARN_ON(ret == -ENOENT);
2327		goto out;
2328	}
2329
2330	if (!buffer_dirty(bh))
2331		mark_buffer_dirty(bh);
2332	brelse(bh);
2333	if (!nilfs_bmap_dirty(btree))
2334		nilfs_bmap_set_dirty(btree);
2335
2336 out:
2337	nilfs_btree_free_path(path);
2338	return ret;
2339}
2340
2341static const struct nilfs_bmap_operations nilfs_btree_ops = {
2342	.bop_lookup		=	nilfs_btree_lookup,
2343	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2344	.bop_insert		=	nilfs_btree_insert,
2345	.bop_delete		=	nilfs_btree_delete,
2346	.bop_clear		=	NULL,
2347
2348	.bop_propagate		=	nilfs_btree_propagate,
2349
2350	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2351
2352	.bop_assign		=	nilfs_btree_assign,
2353	.bop_mark		=	nilfs_btree_mark,
2354
2355	.bop_seek_key		=	nilfs_btree_seek_key,
2356	.bop_last_key		=	nilfs_btree_last_key,
2357
2358	.bop_check_insert	=	NULL,
2359	.bop_check_delete	=	nilfs_btree_check_delete,
2360	.bop_gather_data	=	nilfs_btree_gather_data,
2361};
2362
2363static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2364	.bop_lookup		=	NULL,
2365	.bop_lookup_contig	=	NULL,
2366	.bop_insert		=	NULL,
2367	.bop_delete		=	NULL,
2368	.bop_clear		=	NULL,
2369
2370	.bop_propagate		=	nilfs_btree_propagate_gc,
2371
2372	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2373
2374	.bop_assign		=	nilfs_btree_assign_gc,
2375	.bop_mark		=	NULL,
2376
2377	.bop_seek_key		=	NULL,
2378	.bop_last_key		=	NULL,
2379
2380	.bop_check_insert	=	NULL,
2381	.bop_check_delete	=	NULL,
2382	.bop_gather_data	=	NULL,
2383};
2384
2385static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2386{
2387	bmap->b_ops = &nilfs_btree_ops;
2388	bmap->b_nchildren_per_block =
2389		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2390}
2391
2392int nilfs_btree_init(struct nilfs_bmap *bmap)
2393{
2394	int ret = 0;
2395
2396	__nilfs_btree_init(bmap);
2397
2398	if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap),
2399				    bmap->b_inode->i_ino))
2400		ret = -EIO;
 
 
 
 
2401	return ret;
2402}
2403
2404void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2405{
2406	bmap->b_ops = &nilfs_btree_ops_gc;
2407	bmap->b_nchildren_per_block =
2408		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2409}