Linux Audio

Check our new training course

Loading...
v5.4
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * btree.c - NILFS B-tree.
   4 *
   5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   6 *
   7 * Written by Koji Sato.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   8 */
   9
  10#include <linux/slab.h>
  11#include <linux/string.h>
  12#include <linux/errno.h>
  13#include <linux/pagevec.h>
  14#include "nilfs.h"
  15#include "page.h"
  16#include "btnode.h"
  17#include "btree.h"
  18#include "alloc.h"
  19#include "dat.h"
  20
  21static void __nilfs_btree_init(struct nilfs_bmap *bmap);
  22
  23static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
  24{
  25	struct nilfs_btree_path *path;
  26	int level = NILFS_BTREE_LEVEL_DATA;
  27
  28	path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
  29	if (path == NULL)
  30		goto out;
  31
  32	for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
  33		path[level].bp_bh = NULL;
  34		path[level].bp_sib_bh = NULL;
  35		path[level].bp_index = 0;
  36		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  37		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  38		path[level].bp_op = NULL;
  39	}
  40
  41out:
  42	return path;
  43}
  44
  45static void nilfs_btree_free_path(struct nilfs_btree_path *path)
  46{
  47	int level = NILFS_BTREE_LEVEL_DATA;
  48
  49	for (; level < NILFS_BTREE_LEVEL_MAX; level++)
  50		brelse(path[level].bp_bh);
  51
  52	kmem_cache_free(nilfs_btree_path_cache, path);
  53}
  54
  55/*
  56 * B-tree node operations
  57 */
  58static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
  59				     __u64 ptr, struct buffer_head **bhp)
  60{
  61	struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
  62	struct buffer_head *bh;
  63
  64	bh = nilfs_btnode_create_block(btnc, ptr);
  65	if (!bh)
  66		return -ENOMEM;
  67
  68	set_buffer_nilfs_volatile(bh);
  69	*bhp = bh;
  70	return 0;
  71}
  72
  73static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
  74{
  75	return node->bn_flags;
  76}
  77
  78static void
  79nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
  80{
  81	node->bn_flags = flags;
  82}
  83
  84static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
  85{
  86	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
  87}
  88
  89static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
  90{
  91	return node->bn_level;
  92}
  93
  94static void
  95nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
  96{
  97	node->bn_level = level;
  98}
  99
 100static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
 101{
 102	return le16_to_cpu(node->bn_nchildren);
 103}
 104
 105static void
 106nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
 107{
 108	node->bn_nchildren = cpu_to_le16(nchildren);
 109}
 110
 111static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
 112{
 113	return i_blocksize(btree->b_inode);
 114}
 115
 116static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
 117{
 118	return btree->b_nchildren_per_block;
 119}
 120
 121static __le64 *
 122nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
 123{
 124	return (__le64 *)((char *)(node + 1) +
 125			  (nilfs_btree_node_root(node) ?
 126			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
 127}
 128
 129static __le64 *
 130nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
 131{
 132	return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
 133}
 134
 135static __u64
 136nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
 137{
 138	return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
 139}
 140
 141static void
 142nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
 143{
 144	*(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
 145}
 146
 147static __u64
 148nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
 149			 int ncmax)
 150{
 151	return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
 152}
 153
 154static void
 155nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
 156			 int ncmax)
 157{
 158	*(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
 159}
 160
 161static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
 162				  int level, int nchildren, int ncmax,
 163				  const __u64 *keys, const __u64 *ptrs)
 164{
 165	__le64 *dkeys;
 166	__le64 *dptrs;
 167	int i;
 168
 169	nilfs_btree_node_set_flags(node, flags);
 170	nilfs_btree_node_set_level(node, level);
 171	nilfs_btree_node_set_nchildren(node, nchildren);
 172
 173	dkeys = nilfs_btree_node_dkeys(node);
 174	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 175	for (i = 0; i < nchildren; i++) {
 176		dkeys[i] = cpu_to_le64(keys[i]);
 177		dptrs[i] = cpu_to_le64(ptrs[i]);
 178	}
 179}
 180
 181/* Assume the buffer heads corresponding to left and right are locked. */
 182static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
 183				       struct nilfs_btree_node *right,
 184				       int n, int lncmax, int rncmax)
 185{
 186	__le64 *ldkeys, *rdkeys;
 187	__le64 *ldptrs, *rdptrs;
 188	int lnchildren, rnchildren;
 189
 190	ldkeys = nilfs_btree_node_dkeys(left);
 191	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 192	lnchildren = nilfs_btree_node_get_nchildren(left);
 193
 194	rdkeys = nilfs_btree_node_dkeys(right);
 195	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 196	rnchildren = nilfs_btree_node_get_nchildren(right);
 197
 198	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
 199	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
 200	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
 201	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
 202
 203	lnchildren += n;
 204	rnchildren -= n;
 205	nilfs_btree_node_set_nchildren(left, lnchildren);
 206	nilfs_btree_node_set_nchildren(right, rnchildren);
 207}
 208
 209/* Assume that the buffer heads corresponding to left and right are locked. */
 210static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
 211					struct nilfs_btree_node *right,
 212					int n, int lncmax, int rncmax)
 213{
 214	__le64 *ldkeys, *rdkeys;
 215	__le64 *ldptrs, *rdptrs;
 216	int lnchildren, rnchildren;
 217
 218	ldkeys = nilfs_btree_node_dkeys(left);
 219	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 220	lnchildren = nilfs_btree_node_get_nchildren(left);
 221
 222	rdkeys = nilfs_btree_node_dkeys(right);
 223	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 224	rnchildren = nilfs_btree_node_get_nchildren(right);
 225
 226	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
 227	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
 228	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
 229	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
 230
 231	lnchildren -= n;
 232	rnchildren += n;
 233	nilfs_btree_node_set_nchildren(left, lnchildren);
 234	nilfs_btree_node_set_nchildren(right, rnchildren);
 235}
 236
 237/* Assume that the buffer head corresponding to node is locked. */
 238static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
 239				    __u64 key, __u64 ptr, int ncmax)
 240{
 241	__le64 *dkeys;
 242	__le64 *dptrs;
 243	int nchildren;
 244
 245	dkeys = nilfs_btree_node_dkeys(node);
 246	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 247	nchildren = nilfs_btree_node_get_nchildren(node);
 248	if (index < nchildren) {
 249		memmove(dkeys + index + 1, dkeys + index,
 250			(nchildren - index) * sizeof(*dkeys));
 251		memmove(dptrs + index + 1, dptrs + index,
 252			(nchildren - index) * sizeof(*dptrs));
 253	}
 254	dkeys[index] = cpu_to_le64(key);
 255	dptrs[index] = cpu_to_le64(ptr);
 256	nchildren++;
 257	nilfs_btree_node_set_nchildren(node, nchildren);
 258}
 259
 260/* Assume that the buffer head corresponding to node is locked. */
 261static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
 262				    __u64 *keyp, __u64 *ptrp, int ncmax)
 263{
 264	__u64 key;
 265	__u64 ptr;
 266	__le64 *dkeys;
 267	__le64 *dptrs;
 268	int nchildren;
 269
 270	dkeys = nilfs_btree_node_dkeys(node);
 271	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 272	key = le64_to_cpu(dkeys[index]);
 273	ptr = le64_to_cpu(dptrs[index]);
 274	nchildren = nilfs_btree_node_get_nchildren(node);
 275	if (keyp != NULL)
 276		*keyp = key;
 277	if (ptrp != NULL)
 278		*ptrp = ptr;
 279
 280	if (index < nchildren - 1) {
 281		memmove(dkeys + index, dkeys + index + 1,
 282			(nchildren - index - 1) * sizeof(*dkeys));
 283		memmove(dptrs + index, dptrs + index + 1,
 284			(nchildren - index - 1) * sizeof(*dptrs));
 285	}
 286	nchildren--;
 287	nilfs_btree_node_set_nchildren(node, nchildren);
 288}
 289
 290static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
 291				   __u64 key, int *indexp)
 292{
 293	__u64 nkey;
 294	int index, low, high, s;
 295
 296	/* binary search */
 297	low = 0;
 298	high = nilfs_btree_node_get_nchildren(node) - 1;
 299	index = 0;
 300	s = 0;
 301	while (low <= high) {
 302		index = (low + high) / 2;
 303		nkey = nilfs_btree_node_get_key(node, index);
 304		if (nkey == key) {
 305			s = 0;
 306			goto out;
 307		} else if (nkey < key) {
 308			low = index + 1;
 309			s = -1;
 310		} else {
 311			high = index - 1;
 312			s = 1;
 313		}
 314	}
 315
 316	/* adjust index */
 317	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
 318		if (s > 0 && index > 0)
 319			index--;
 320	} else if (s < 0)
 321		index++;
 322
 323 out:
 324	*indexp = index;
 325
 326	return s == 0;
 327}
 328
 329/**
 330 * nilfs_btree_node_broken - verify consistency of btree node
 331 * @node: btree node block to be examined
 332 * @size: node size (in bytes)
 333 * @inode: host inode of btree
 334 * @blocknr: block number
 335 *
 336 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 337 */
 338static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
 339				   size_t size, struct inode *inode,
 340				   sector_t blocknr)
 341{
 342	int level, flags, nchildren;
 343	int ret = 0;
 344
 345	level = nilfs_btree_node_get_level(node);
 346	flags = nilfs_btree_node_get_flags(node);
 347	nchildren = nilfs_btree_node_get_nchildren(node);
 348
 349	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 350		     level >= NILFS_BTREE_LEVEL_MAX ||
 351		     (flags & NILFS_BTREE_NODE_ROOT) ||
 352		     nchildren < 0 ||
 353		     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
 354		nilfs_msg(inode->i_sb, KERN_CRIT,
 355			  "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
 356			  inode->i_ino, (unsigned long long)blocknr, level,
 357			  flags, nchildren);
 358		ret = 1;
 359	}
 360	return ret;
 361}
 362
 363/**
 364 * nilfs_btree_root_broken - verify consistency of btree root node
 365 * @node: btree root node to be examined
 366 * @inode: host inode of btree
 367 *
 368 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 369 */
 370static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
 371				   struct inode *inode)
 372{
 373	int level, flags, nchildren;
 374	int ret = 0;
 375
 376	level = nilfs_btree_node_get_level(node);
 377	flags = nilfs_btree_node_get_flags(node);
 378	nchildren = nilfs_btree_node_get_nchildren(node);
 379
 380	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 381		     level >= NILFS_BTREE_LEVEL_MAX ||
 382		     nchildren < 0 ||
 383		     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
 384		nilfs_msg(inode->i_sb, KERN_CRIT,
 385			  "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
 386			  inode->i_ino, level, flags, nchildren);
 387		ret = 1;
 388	}
 389	return ret;
 390}
 391
 392int nilfs_btree_broken_node_block(struct buffer_head *bh)
 393{
 394	struct inode *inode;
 395	int ret;
 396
 397	if (buffer_nilfs_checked(bh))
 398		return 0;
 399
 400	inode = bh->b_page->mapping->host;
 401	ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
 402				      bh->b_size, inode, bh->b_blocknr);
 403	if (likely(!ret))
 404		set_buffer_nilfs_checked(bh);
 405	return ret;
 406}
 407
 408static struct nilfs_btree_node *
 409nilfs_btree_get_root(const struct nilfs_bmap *btree)
 410{
 411	return (struct nilfs_btree_node *)btree->b_u.u_data;
 412}
 413
 414static struct nilfs_btree_node *
 415nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
 416{
 417	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
 418}
 419
 420static struct nilfs_btree_node *
 421nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
 422{
 423	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
 424}
 425
 426static int nilfs_btree_height(const struct nilfs_bmap *btree)
 427{
 428	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
 429}
 430
 431static struct nilfs_btree_node *
 432nilfs_btree_get_node(const struct nilfs_bmap *btree,
 433		     const struct nilfs_btree_path *path,
 434		     int level, int *ncmaxp)
 435{
 436	struct nilfs_btree_node *node;
 437
 438	if (level == nilfs_btree_height(btree) - 1) {
 439		node = nilfs_btree_get_root(btree);
 440		*ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
 441	} else {
 442		node = nilfs_btree_get_nonroot_node(path, level);
 443		*ncmaxp = nilfs_btree_nchildren_per_block(btree);
 444	}
 445	return node;
 446}
 447
 448static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
 449				struct nilfs_btree_node *node, int level)
 450{
 451	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
 452		dump_stack();
 453		nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
 454			  "btree level mismatch (ino=%lu): %d != %d",
 455			  btree->b_inode->i_ino,
 456			  nilfs_btree_node_get_level(node), level);
 457		return 1;
 458	}
 459	return 0;
 460}
 461
 462struct nilfs_btree_readahead_info {
 463	struct nilfs_btree_node *node;	/* parent node */
 464	int max_ra_blocks;		/* max nof blocks to read ahead */
 465	int index;			/* current index on the parent node */
 466	int ncmax;			/* nof children in the parent node */
 467};
 468
 469static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 470				   struct buffer_head **bhp,
 471				   const struct nilfs_btree_readahead_info *ra)
 472{
 473	struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
 474	struct buffer_head *bh, *ra_bh;
 475	sector_t submit_ptr = 0;
 476	int ret;
 477
 478	ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
 479					&submit_ptr);
 480	if (ret) {
 481		if (ret != -EEXIST)
 482			return ret;
 483		goto out_check;
 484	}
 485
 486	if (ra) {
 487		int i, n;
 488		__u64 ptr2;
 489
 490		/* read ahead sibling nodes */
 491		for (n = ra->max_ra_blocks, i = ra->index + 1;
 492		     n > 0 && i < ra->ncmax; n--, i++) {
 493			ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
 494
 495			ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
 496							REQ_OP_READ, REQ_RAHEAD,
 497							&ra_bh, &submit_ptr);
 498			if (likely(!ret || ret == -EEXIST))
 499				brelse(ra_bh);
 500			else if (ret != -EBUSY)
 501				break;
 502			if (!buffer_locked(bh))
 503				goto out_no_wait;
 504		}
 505	}
 506
 507	wait_on_buffer(bh);
 508
 509 out_no_wait:
 510	if (!buffer_uptodate(bh)) {
 511		nilfs_msg(btree->b_inode->i_sb, KERN_ERR,
 512			  "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
 513			  btree->b_inode->i_ino, (unsigned long long)ptr);
 514		brelse(bh);
 515		return -EIO;
 516	}
 517
 518 out_check:
 519	if (nilfs_btree_broken_node_block(bh)) {
 520		clear_buffer_uptodate(bh);
 521		brelse(bh);
 522		return -EINVAL;
 523	}
 524
 525	*bhp = bh;
 526	return 0;
 527}
 528
 529static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 530				   struct buffer_head **bhp)
 531{
 532	return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
 533}
 534
 535static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
 536				 struct nilfs_btree_path *path,
 537				 __u64 key, __u64 *ptrp, int minlevel,
 538				 int readahead)
 539{
 540	struct nilfs_btree_node *node;
 541	struct nilfs_btree_readahead_info p, *ra;
 542	__u64 ptr;
 543	int level, index, found, ncmax, ret;
 544
 545	node = nilfs_btree_get_root(btree);
 546	level = nilfs_btree_node_get_level(node);
 547	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
 548		return -ENOENT;
 549
 550	found = nilfs_btree_node_lookup(node, key, &index);
 551	ptr = nilfs_btree_node_get_ptr(node, index,
 552				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 553	path[level].bp_bh = NULL;
 554	path[level].bp_index = index;
 555
 556	ncmax = nilfs_btree_nchildren_per_block(btree);
 557
 558	while (--level >= minlevel) {
 559		ra = NULL;
 560		if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
 561			p.node = nilfs_btree_get_node(btree, path, level + 1,
 562						      &p.ncmax);
 563			p.index = index;
 564			p.max_ra_blocks = 7;
 565			ra = &p;
 566		}
 567		ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
 568					      ra);
 569		if (ret < 0)
 570			return ret;
 571
 572		node = nilfs_btree_get_nonroot_node(path, level);
 573		if (nilfs_btree_bad_node(btree, node, level))
 574			return -EINVAL;
 575		if (!found)
 576			found = nilfs_btree_node_lookup(node, key, &index);
 577		else
 578			index = 0;
 579		if (index < ncmax) {
 580			ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 581		} else {
 582			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
 583			/* insert */
 584			ptr = NILFS_BMAP_INVALID_PTR;
 585		}
 586		path[level].bp_index = index;
 587	}
 588	if (!found)
 589		return -ENOENT;
 590
 591	if (ptrp != NULL)
 592		*ptrp = ptr;
 593
 594	return 0;
 595}
 596
 597static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
 598				      struct nilfs_btree_path *path,
 599				      __u64 *keyp, __u64 *ptrp)
 600{
 601	struct nilfs_btree_node *node;
 602	__u64 ptr;
 603	int index, level, ncmax, ret;
 604
 605	node = nilfs_btree_get_root(btree);
 606	index = nilfs_btree_node_get_nchildren(node) - 1;
 607	if (index < 0)
 608		return -ENOENT;
 609	level = nilfs_btree_node_get_level(node);
 610	ptr = nilfs_btree_node_get_ptr(node, index,
 611				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 612	path[level].bp_bh = NULL;
 613	path[level].bp_index = index;
 614	ncmax = nilfs_btree_nchildren_per_block(btree);
 615
 616	for (level--; level > 0; level--) {
 617		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
 618		if (ret < 0)
 619			return ret;
 620		node = nilfs_btree_get_nonroot_node(path, level);
 621		if (nilfs_btree_bad_node(btree, node, level))
 622			return -EINVAL;
 623		index = nilfs_btree_node_get_nchildren(node) - 1;
 624		ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 625		path[level].bp_index = index;
 626	}
 627
 628	if (keyp != NULL)
 629		*keyp = nilfs_btree_node_get_key(node, index);
 630	if (ptrp != NULL)
 631		*ptrp = ptr;
 632
 633	return 0;
 634}
 635
 636/**
 637 * nilfs_btree_get_next_key - get next valid key from btree path array
 638 * @btree: bmap struct of btree
 639 * @path: array of nilfs_btree_path struct
 640 * @minlevel: start level
 641 * @nextkey: place to store the next valid key
 642 *
 643 * Return Value: If a next key was found, 0 is returned. Otherwise,
 644 * -ENOENT is returned.
 645 */
 646static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
 647				    const struct nilfs_btree_path *path,
 648				    int minlevel, __u64 *nextkey)
 649{
 650	struct nilfs_btree_node *node;
 651	int maxlevel = nilfs_btree_height(btree) - 1;
 652	int index, next_adj, level;
 653
 654	/* Next index is already set to bp_index for leaf nodes. */
 655	next_adj = 0;
 656	for (level = minlevel; level <= maxlevel; level++) {
 657		if (level == maxlevel)
 658			node = nilfs_btree_get_root(btree);
 659		else
 660			node = nilfs_btree_get_nonroot_node(path, level);
 661
 662		index = path[level].bp_index + next_adj;
 663		if (index < nilfs_btree_node_get_nchildren(node)) {
 664			/* Next key is in this node */
 665			*nextkey = nilfs_btree_node_get_key(node, index);
 666			return 0;
 667		}
 668		/* For non-leaf nodes, next index is stored at bp_index + 1. */
 669		next_adj = 1;
 670	}
 671	return -ENOENT;
 672}
 673
 674static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
 675			      __u64 key, int level, __u64 *ptrp)
 676{
 677	struct nilfs_btree_path *path;
 678	int ret;
 679
 680	path = nilfs_btree_alloc_path();
 681	if (path == NULL)
 682		return -ENOMEM;
 683
 684	ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
 685
 686	nilfs_btree_free_path(path);
 687
 688	return ret;
 689}
 690
 691static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
 692				     __u64 key, __u64 *ptrp,
 693				     unsigned int maxblocks)
 694{
 695	struct nilfs_btree_path *path;
 696	struct nilfs_btree_node *node;
 697	struct inode *dat = NULL;
 698	__u64 ptr, ptr2;
 699	sector_t blocknr;
 700	int level = NILFS_BTREE_LEVEL_NODE_MIN;
 701	int ret, cnt, index, maxlevel, ncmax;
 702	struct nilfs_btree_readahead_info p;
 703
 704	path = nilfs_btree_alloc_path();
 705	if (path == NULL)
 706		return -ENOMEM;
 707
 708	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
 709	if (ret < 0)
 710		goto out;
 711
 712	if (NILFS_BMAP_USE_VBN(btree)) {
 713		dat = nilfs_bmap_get_dat(btree);
 714		ret = nilfs_dat_translate(dat, ptr, &blocknr);
 715		if (ret < 0)
 716			goto out;
 717		ptr = blocknr;
 718	}
 719	cnt = 1;
 720	if (cnt == maxblocks)
 721		goto end;
 722
 723	maxlevel = nilfs_btree_height(btree) - 1;
 724	node = nilfs_btree_get_node(btree, path, level, &ncmax);
 725	index = path[level].bp_index + 1;
 726	for (;;) {
 727		while (index < nilfs_btree_node_get_nchildren(node)) {
 728			if (nilfs_btree_node_get_key(node, index) !=
 729			    key + cnt)
 730				goto end;
 731			ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
 732			if (dat) {
 733				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
 734				if (ret < 0)
 735					goto out;
 736				ptr2 = blocknr;
 737			}
 738			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
 739				goto end;
 740			index++;
 741			continue;
 742		}
 743		if (level == maxlevel)
 744			break;
 745
 746		/* look-up right sibling node */
 747		p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
 748		p.index = path[level + 1].bp_index + 1;
 749		p.max_ra_blocks = 7;
 750		if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
 751		    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
 752			break;
 753		ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
 754		path[level + 1].bp_index = p.index;
 755
 756		brelse(path[level].bp_bh);
 757		path[level].bp_bh = NULL;
 758
 759		ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
 760					      &p);
 761		if (ret < 0)
 762			goto out;
 763		node = nilfs_btree_get_nonroot_node(path, level);
 764		ncmax = nilfs_btree_nchildren_per_block(btree);
 765		index = 0;
 766		path[level].bp_index = index;
 767	}
 768 end:
 769	*ptrp = ptr;
 770	ret = cnt;
 771 out:
 772	nilfs_btree_free_path(path);
 773	return ret;
 774}
 775
 776static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
 777				    struct nilfs_btree_path *path,
 778				    int level, __u64 key)
 779{
 780	if (level < nilfs_btree_height(btree) - 1) {
 781		do {
 782			nilfs_btree_node_set_key(
 783				nilfs_btree_get_nonroot_node(path, level),
 784				path[level].bp_index, key);
 785			if (!buffer_dirty(path[level].bp_bh))
 786				mark_buffer_dirty(path[level].bp_bh);
 787		} while ((path[level].bp_index == 0) &&
 788			 (++level < nilfs_btree_height(btree) - 1));
 789	}
 790
 791	/* root */
 792	if (level == nilfs_btree_height(btree) - 1) {
 793		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
 794					 path[level].bp_index, key);
 795	}
 796}
 797
 798static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
 799				  struct nilfs_btree_path *path,
 800				  int level, __u64 *keyp, __u64 *ptrp)
 801{
 802	struct nilfs_btree_node *node;
 803	int ncblk;
 804
 805	if (level < nilfs_btree_height(btree) - 1) {
 806		node = nilfs_btree_get_nonroot_node(path, level);
 807		ncblk = nilfs_btree_nchildren_per_block(btree);
 808		nilfs_btree_node_insert(node, path[level].bp_index,
 809					*keyp, *ptrp, ncblk);
 810		if (!buffer_dirty(path[level].bp_bh))
 811			mark_buffer_dirty(path[level].bp_bh);
 812
 813		if (path[level].bp_index == 0)
 814			nilfs_btree_promote_key(btree, path, level + 1,
 815						nilfs_btree_node_get_key(node,
 816									 0));
 817	} else {
 818		node = nilfs_btree_get_root(btree);
 819		nilfs_btree_node_insert(node, path[level].bp_index,
 820					*keyp, *ptrp,
 821					NILFS_BTREE_ROOT_NCHILDREN_MAX);
 822	}
 823}
 824
 825static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
 826				   struct nilfs_btree_path *path,
 827				   int level, __u64 *keyp, __u64 *ptrp)
 828{
 829	struct nilfs_btree_node *node, *left;
 830	int nchildren, lnchildren, n, move, ncblk;
 831
 832	node = nilfs_btree_get_nonroot_node(path, level);
 833	left = nilfs_btree_get_sib_node(path, level);
 834	nchildren = nilfs_btree_node_get_nchildren(node);
 835	lnchildren = nilfs_btree_node_get_nchildren(left);
 836	ncblk = nilfs_btree_nchildren_per_block(btree);
 837	move = 0;
 838
 839	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
 840	if (n > path[level].bp_index) {
 841		/* move insert point */
 842		n--;
 843		move = 1;
 844	}
 845
 846	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
 847
 848	if (!buffer_dirty(path[level].bp_bh))
 849		mark_buffer_dirty(path[level].bp_bh);
 850	if (!buffer_dirty(path[level].bp_sib_bh))
 851		mark_buffer_dirty(path[level].bp_sib_bh);
 852
 853	nilfs_btree_promote_key(btree, path, level + 1,
 854				nilfs_btree_node_get_key(node, 0));
 855
 856	if (move) {
 857		brelse(path[level].bp_bh);
 858		path[level].bp_bh = path[level].bp_sib_bh;
 859		path[level].bp_sib_bh = NULL;
 860		path[level].bp_index += lnchildren;
 861		path[level + 1].bp_index--;
 862	} else {
 863		brelse(path[level].bp_sib_bh);
 864		path[level].bp_sib_bh = NULL;
 865		path[level].bp_index -= n;
 866	}
 867
 868	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 869}
 870
 871static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
 872				    struct nilfs_btree_path *path,
 873				    int level, __u64 *keyp, __u64 *ptrp)
 874{
 875	struct nilfs_btree_node *node, *right;
 876	int nchildren, rnchildren, n, move, ncblk;
 877
 878	node = nilfs_btree_get_nonroot_node(path, level);
 879	right = nilfs_btree_get_sib_node(path, level);
 880	nchildren = nilfs_btree_node_get_nchildren(node);
 881	rnchildren = nilfs_btree_node_get_nchildren(right);
 882	ncblk = nilfs_btree_nchildren_per_block(btree);
 883	move = 0;
 884
 885	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
 886	if (n > nchildren - path[level].bp_index) {
 887		/* move insert point */
 888		n--;
 889		move = 1;
 890	}
 891
 892	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 893
 894	if (!buffer_dirty(path[level].bp_bh))
 895		mark_buffer_dirty(path[level].bp_bh);
 896	if (!buffer_dirty(path[level].bp_sib_bh))
 897		mark_buffer_dirty(path[level].bp_sib_bh);
 898
 899	path[level + 1].bp_index++;
 900	nilfs_btree_promote_key(btree, path, level + 1,
 901				nilfs_btree_node_get_key(right, 0));
 902	path[level + 1].bp_index--;
 903
 904	if (move) {
 905		brelse(path[level].bp_bh);
 906		path[level].bp_bh = path[level].bp_sib_bh;
 907		path[level].bp_sib_bh = NULL;
 908		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 909		path[level + 1].bp_index++;
 910	} else {
 911		brelse(path[level].bp_sib_bh);
 912		path[level].bp_sib_bh = NULL;
 913	}
 914
 915	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 916}
 917
 918static void nilfs_btree_split(struct nilfs_bmap *btree,
 919			      struct nilfs_btree_path *path,
 920			      int level, __u64 *keyp, __u64 *ptrp)
 921{
 922	struct nilfs_btree_node *node, *right;
 
 
 923	int nchildren, n, move, ncblk;
 924
 925	node = nilfs_btree_get_nonroot_node(path, level);
 926	right = nilfs_btree_get_sib_node(path, level);
 927	nchildren = nilfs_btree_node_get_nchildren(node);
 928	ncblk = nilfs_btree_nchildren_per_block(btree);
 929	move = 0;
 930
 931	n = (nchildren + 1) / 2;
 932	if (n > nchildren - path[level].bp_index) {
 933		n--;
 934		move = 1;
 935	}
 936
 937	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 938
 939	if (!buffer_dirty(path[level].bp_bh))
 940		mark_buffer_dirty(path[level].bp_bh);
 941	if (!buffer_dirty(path[level].bp_sib_bh))
 942		mark_buffer_dirty(path[level].bp_sib_bh);
 943
 
 
 
 944	if (move) {
 945		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 946		nilfs_btree_node_insert(right, path[level].bp_index,
 947					*keyp, *ptrp, ncblk);
 948
 949		*keyp = nilfs_btree_node_get_key(right, 0);
 950		*ptrp = path[level].bp_newreq.bpr_ptr;
 951
 952		brelse(path[level].bp_bh);
 953		path[level].bp_bh = path[level].bp_sib_bh;
 954		path[level].bp_sib_bh = NULL;
 955	} else {
 956		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 957
 958		*keyp = nilfs_btree_node_get_key(right, 0);
 959		*ptrp = path[level].bp_newreq.bpr_ptr;
 960
 961		brelse(path[level].bp_sib_bh);
 962		path[level].bp_sib_bh = NULL;
 963	}
 964
 965	path[level + 1].bp_index++;
 966}
 967
 968static void nilfs_btree_grow(struct nilfs_bmap *btree,
 969			     struct nilfs_btree_path *path,
 970			     int level, __u64 *keyp, __u64 *ptrp)
 971{
 972	struct nilfs_btree_node *root, *child;
 973	int n, ncblk;
 974
 975	root = nilfs_btree_get_root(btree);
 976	child = nilfs_btree_get_sib_node(path, level);
 977	ncblk = nilfs_btree_nchildren_per_block(btree);
 978
 979	n = nilfs_btree_node_get_nchildren(root);
 980
 981	nilfs_btree_node_move_right(root, child, n,
 982				    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
 983	nilfs_btree_node_set_level(root, level + 1);
 984
 985	if (!buffer_dirty(path[level].bp_sib_bh))
 986		mark_buffer_dirty(path[level].bp_sib_bh);
 987
 988	path[level].bp_bh = path[level].bp_sib_bh;
 989	path[level].bp_sib_bh = NULL;
 990
 991	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 992
 993	*keyp = nilfs_btree_node_get_key(child, 0);
 994	*ptrp = path[level].bp_newreq.bpr_ptr;
 995}
 996
 997static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
 998				   const struct nilfs_btree_path *path)
 999{
1000	struct nilfs_btree_node *node;
1001	int level, ncmax;
1002
1003	if (path == NULL)
1004		return NILFS_BMAP_INVALID_PTR;
1005
1006	/* left sibling */
1007	level = NILFS_BTREE_LEVEL_NODE_MIN;
1008	if (path[level].bp_index > 0) {
1009		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1010		return nilfs_btree_node_get_ptr(node,
1011						path[level].bp_index - 1,
1012						ncmax);
1013	}
1014
1015	/* parent */
1016	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1017	if (level <= nilfs_btree_height(btree) - 1) {
1018		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1019		return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1020						ncmax);
1021	}
1022
1023	return NILFS_BMAP_INVALID_PTR;
1024}
1025
1026static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1027				       const struct nilfs_btree_path *path,
1028				       __u64 key)
1029{
1030	__u64 ptr;
1031
1032	ptr = nilfs_bmap_find_target_seq(btree, key);
1033	if (ptr != NILFS_BMAP_INVALID_PTR)
1034		/* sequential access */
1035		return ptr;
1036
1037	ptr = nilfs_btree_find_near(btree, path);
1038	if (ptr != NILFS_BMAP_INVALID_PTR)
1039		/* near */
1040		return ptr;
1041
1042	/* block group */
1043	return nilfs_bmap_find_target_in_group(btree);
1044}
1045
1046static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1047				      struct nilfs_btree_path *path,
1048				      int *levelp, __u64 key, __u64 ptr,
1049				      struct nilfs_bmap_stats *stats)
1050{
1051	struct buffer_head *bh;
1052	struct nilfs_btree_node *node, *parent, *sib;
1053	__u64 sibptr;
1054	int pindex, level, ncmax, ncblk, ret;
1055	struct inode *dat = NULL;
1056
1057	stats->bs_nblocks = 0;
1058	level = NILFS_BTREE_LEVEL_DATA;
1059
1060	/* allocate a new ptr for data block */
1061	if (NILFS_BMAP_USE_VBN(btree)) {
1062		path[level].bp_newreq.bpr_ptr =
1063			nilfs_btree_find_target_v(btree, path, key);
1064		dat = nilfs_bmap_get_dat(btree);
1065	}
1066
1067	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1068	if (ret < 0)
1069		goto err_out_data;
1070
1071	ncblk = nilfs_btree_nchildren_per_block(btree);
1072
1073	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1074	     level < nilfs_btree_height(btree) - 1;
1075	     level++) {
1076		node = nilfs_btree_get_nonroot_node(path, level);
1077		if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1078			path[level].bp_op = nilfs_btree_do_insert;
1079			stats->bs_nblocks++;
1080			goto out;
1081		}
1082
1083		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1084		pindex = path[level + 1].bp_index;
1085
1086		/* left sibling */
1087		if (pindex > 0) {
1088			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1089							  ncmax);
1090			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1091			if (ret < 0)
1092				goto err_out_child_node;
1093			sib = (struct nilfs_btree_node *)bh->b_data;
1094			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1095				path[level].bp_sib_bh = bh;
1096				path[level].bp_op = nilfs_btree_carry_left;
1097				stats->bs_nblocks++;
1098				goto out;
1099			} else {
1100				brelse(bh);
1101			}
1102		}
1103
1104		/* right sibling */
1105		if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1106			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1107							  ncmax);
1108			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1109			if (ret < 0)
1110				goto err_out_child_node;
1111			sib = (struct nilfs_btree_node *)bh->b_data;
1112			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1113				path[level].bp_sib_bh = bh;
1114				path[level].bp_op = nilfs_btree_carry_right;
1115				stats->bs_nblocks++;
1116				goto out;
1117			} else {
1118				brelse(bh);
1119			}
1120		}
1121
1122		/* split */
1123		path[level].bp_newreq.bpr_ptr =
1124			path[level - 1].bp_newreq.bpr_ptr + 1;
1125		ret = nilfs_bmap_prepare_alloc_ptr(btree,
1126						   &path[level].bp_newreq, dat);
1127		if (ret < 0)
1128			goto err_out_child_node;
1129		ret = nilfs_btree_get_new_block(btree,
1130						path[level].bp_newreq.bpr_ptr,
1131						&bh);
1132		if (ret < 0)
1133			goto err_out_curr_node;
1134
1135		stats->bs_nblocks++;
1136
1137		sib = (struct nilfs_btree_node *)bh->b_data;
1138		nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1139		path[level].bp_sib_bh = bh;
1140		path[level].bp_op = nilfs_btree_split;
1141	}
1142
1143	/* root */
1144	node = nilfs_btree_get_root(btree);
1145	if (nilfs_btree_node_get_nchildren(node) <
1146	    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1147		path[level].bp_op = nilfs_btree_do_insert;
1148		stats->bs_nblocks++;
1149		goto out;
1150	}
1151
1152	/* grow */
1153	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1154	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1155	if (ret < 0)
1156		goto err_out_child_node;
1157	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1158					&bh);
1159	if (ret < 0)
1160		goto err_out_curr_node;
1161
1162	nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1163			      0, level, 0, ncblk, NULL, NULL);
1164	path[level].bp_sib_bh = bh;
1165	path[level].bp_op = nilfs_btree_grow;
1166
1167	level++;
1168	path[level].bp_op = nilfs_btree_do_insert;
1169
1170	/* a newly-created node block and a data block are added */
1171	stats->bs_nblocks += 2;
1172
1173	/* success */
1174 out:
1175	*levelp = level;
1176	return ret;
1177
1178	/* error */
1179 err_out_curr_node:
1180	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1181 err_out_child_node:
1182	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1183		nilfs_btnode_delete(path[level].bp_sib_bh);
1184		nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1185
1186	}
1187
1188	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1189 err_out_data:
1190	*levelp = level;
1191	stats->bs_nblocks = 0;
1192	return ret;
1193}
1194
1195static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1196				      struct nilfs_btree_path *path,
1197				      int maxlevel, __u64 key, __u64 ptr)
1198{
1199	struct inode *dat = NULL;
1200	int level;
1201
1202	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1203	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1204	if (NILFS_BMAP_USE_VBN(btree)) {
1205		nilfs_bmap_set_target_v(btree, key, ptr);
1206		dat = nilfs_bmap_get_dat(btree);
1207	}
1208
1209	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1210		nilfs_bmap_commit_alloc_ptr(btree,
1211					    &path[level - 1].bp_newreq, dat);
1212		path[level].bp_op(btree, path, level, &key, &ptr);
1213	}
1214
1215	if (!nilfs_bmap_dirty(btree))
1216		nilfs_bmap_set_dirty(btree);
1217}
1218
1219static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1220{
1221	struct nilfs_btree_path *path;
1222	struct nilfs_bmap_stats stats;
1223	int level, ret;
1224
1225	path = nilfs_btree_alloc_path();
1226	if (path == NULL)
1227		return -ENOMEM;
1228
1229	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1230				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1231	if (ret != -ENOENT) {
1232		if (ret == 0)
1233			ret = -EEXIST;
1234		goto out;
1235	}
1236
1237	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1238	if (ret < 0)
1239		goto out;
1240	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1241	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1242
1243 out:
1244	nilfs_btree_free_path(path);
1245	return ret;
1246}
1247
1248static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1249				  struct nilfs_btree_path *path,
1250				  int level, __u64 *keyp, __u64 *ptrp)
1251{
1252	struct nilfs_btree_node *node;
1253	int ncblk;
1254
1255	if (level < nilfs_btree_height(btree) - 1) {
1256		node = nilfs_btree_get_nonroot_node(path, level);
1257		ncblk = nilfs_btree_nchildren_per_block(btree);
1258		nilfs_btree_node_delete(node, path[level].bp_index,
1259					keyp, ptrp, ncblk);
1260		if (!buffer_dirty(path[level].bp_bh))
1261			mark_buffer_dirty(path[level].bp_bh);
1262		if (path[level].bp_index == 0)
1263			nilfs_btree_promote_key(btree, path, level + 1,
1264				nilfs_btree_node_get_key(node, 0));
1265	} else {
1266		node = nilfs_btree_get_root(btree);
1267		nilfs_btree_node_delete(node, path[level].bp_index,
1268					keyp, ptrp,
1269					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1270	}
1271}
1272
1273static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1274				    struct nilfs_btree_path *path,
1275				    int level, __u64 *keyp, __u64 *ptrp)
1276{
1277	struct nilfs_btree_node *node, *left;
1278	int nchildren, lnchildren, n, ncblk;
1279
1280	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1281
1282	node = nilfs_btree_get_nonroot_node(path, level);
1283	left = nilfs_btree_get_sib_node(path, level);
1284	nchildren = nilfs_btree_node_get_nchildren(node);
1285	lnchildren = nilfs_btree_node_get_nchildren(left);
1286	ncblk = nilfs_btree_nchildren_per_block(btree);
1287
1288	n = (nchildren + lnchildren) / 2 - nchildren;
1289
1290	nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1291
1292	if (!buffer_dirty(path[level].bp_bh))
1293		mark_buffer_dirty(path[level].bp_bh);
1294	if (!buffer_dirty(path[level].bp_sib_bh))
1295		mark_buffer_dirty(path[level].bp_sib_bh);
1296
1297	nilfs_btree_promote_key(btree, path, level + 1,
1298				nilfs_btree_node_get_key(node, 0));
1299
1300	brelse(path[level].bp_sib_bh);
1301	path[level].bp_sib_bh = NULL;
1302	path[level].bp_index += n;
1303}
1304
1305static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1306				     struct nilfs_btree_path *path,
1307				     int level, __u64 *keyp, __u64 *ptrp)
1308{
1309	struct nilfs_btree_node *node, *right;
1310	int nchildren, rnchildren, n, ncblk;
1311
1312	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1313
1314	node = nilfs_btree_get_nonroot_node(path, level);
1315	right = nilfs_btree_get_sib_node(path, level);
1316	nchildren = nilfs_btree_node_get_nchildren(node);
1317	rnchildren = nilfs_btree_node_get_nchildren(right);
1318	ncblk = nilfs_btree_nchildren_per_block(btree);
1319
1320	n = (nchildren + rnchildren) / 2 - nchildren;
1321
1322	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1323
1324	if (!buffer_dirty(path[level].bp_bh))
1325		mark_buffer_dirty(path[level].bp_bh);
1326	if (!buffer_dirty(path[level].bp_sib_bh))
1327		mark_buffer_dirty(path[level].bp_sib_bh);
1328
1329	path[level + 1].bp_index++;
1330	nilfs_btree_promote_key(btree, path, level + 1,
1331				nilfs_btree_node_get_key(right, 0));
1332	path[level + 1].bp_index--;
1333
1334	brelse(path[level].bp_sib_bh);
1335	path[level].bp_sib_bh = NULL;
1336}
1337
1338static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1339				    struct nilfs_btree_path *path,
1340				    int level, __u64 *keyp, __u64 *ptrp)
1341{
1342	struct nilfs_btree_node *node, *left;
1343	int n, ncblk;
1344
1345	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1346
1347	node = nilfs_btree_get_nonroot_node(path, level);
1348	left = nilfs_btree_get_sib_node(path, level);
1349	ncblk = nilfs_btree_nchildren_per_block(btree);
1350
1351	n = nilfs_btree_node_get_nchildren(node);
1352
1353	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1354
1355	if (!buffer_dirty(path[level].bp_sib_bh))
1356		mark_buffer_dirty(path[level].bp_sib_bh);
1357
1358	nilfs_btnode_delete(path[level].bp_bh);
1359	path[level].bp_bh = path[level].bp_sib_bh;
1360	path[level].bp_sib_bh = NULL;
1361	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1362}
1363
1364static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1365				     struct nilfs_btree_path *path,
1366				     int level, __u64 *keyp, __u64 *ptrp)
1367{
1368	struct nilfs_btree_node *node, *right;
1369	int n, ncblk;
1370
1371	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1372
1373	node = nilfs_btree_get_nonroot_node(path, level);
1374	right = nilfs_btree_get_sib_node(path, level);
1375	ncblk = nilfs_btree_nchildren_per_block(btree);
1376
1377	n = nilfs_btree_node_get_nchildren(right);
1378
1379	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1380
1381	if (!buffer_dirty(path[level].bp_bh))
1382		mark_buffer_dirty(path[level].bp_bh);
1383
1384	nilfs_btnode_delete(path[level].bp_sib_bh);
1385	path[level].bp_sib_bh = NULL;
1386	path[level + 1].bp_index++;
1387}
1388
1389static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1390			       struct nilfs_btree_path *path,
1391			       int level, __u64 *keyp, __u64 *ptrp)
1392{
1393	struct nilfs_btree_node *root, *child;
1394	int n, ncblk;
1395
1396	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1397
1398	root = nilfs_btree_get_root(btree);
1399	child = nilfs_btree_get_nonroot_node(path, level);
1400	ncblk = nilfs_btree_nchildren_per_block(btree);
1401
1402	nilfs_btree_node_delete(root, 0, NULL, NULL,
1403				NILFS_BTREE_ROOT_NCHILDREN_MAX);
1404	nilfs_btree_node_set_level(root, level);
1405	n = nilfs_btree_node_get_nchildren(child);
1406	nilfs_btree_node_move_left(root, child, n,
1407				   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1408
1409	nilfs_btnode_delete(path[level].bp_bh);
1410	path[level].bp_bh = NULL;
1411}
1412
1413static void nilfs_btree_nop(struct nilfs_bmap *btree,
1414			    struct nilfs_btree_path *path,
1415			    int level, __u64 *keyp, __u64 *ptrp)
1416{
1417}
1418
1419static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1420				      struct nilfs_btree_path *path,
1421				      int *levelp,
1422				      struct nilfs_bmap_stats *stats,
1423				      struct inode *dat)
1424{
1425	struct buffer_head *bh;
1426	struct nilfs_btree_node *node, *parent, *sib;
1427	__u64 sibptr;
1428	int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1429
1430	ret = 0;
1431	stats->bs_nblocks = 0;
1432	ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1433	ncblk = nilfs_btree_nchildren_per_block(btree);
1434
1435	for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1436	     level < nilfs_btree_height(btree) - 1;
1437	     level++) {
1438		node = nilfs_btree_get_nonroot_node(path, level);
1439		path[level].bp_oldreq.bpr_ptr =
1440			nilfs_btree_node_get_ptr(node, dindex, ncblk);
1441		ret = nilfs_bmap_prepare_end_ptr(btree,
1442						 &path[level].bp_oldreq, dat);
1443		if (ret < 0)
1444			goto err_out_child_node;
1445
1446		if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1447			path[level].bp_op = nilfs_btree_do_delete;
1448			stats->bs_nblocks++;
1449			goto out;
1450		}
1451
1452		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1453		pindex = path[level + 1].bp_index;
1454		dindex = pindex;
1455
1456		if (pindex > 0) {
1457			/* left sibling */
1458			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1459							  ncmax);
1460			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1461			if (ret < 0)
1462				goto err_out_curr_node;
1463			sib = (struct nilfs_btree_node *)bh->b_data;
1464			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1465				path[level].bp_sib_bh = bh;
1466				path[level].bp_op = nilfs_btree_borrow_left;
1467				stats->bs_nblocks++;
1468				goto out;
1469			} else {
1470				path[level].bp_sib_bh = bh;
1471				path[level].bp_op = nilfs_btree_concat_left;
1472				stats->bs_nblocks++;
1473				/* continue; */
1474			}
1475		} else if (pindex <
1476			   nilfs_btree_node_get_nchildren(parent) - 1) {
1477			/* right sibling */
1478			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1479							  ncmax);
1480			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1481			if (ret < 0)
1482				goto err_out_curr_node;
1483			sib = (struct nilfs_btree_node *)bh->b_data;
1484			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1485				path[level].bp_sib_bh = bh;
1486				path[level].bp_op = nilfs_btree_borrow_right;
1487				stats->bs_nblocks++;
1488				goto out;
1489			} else {
1490				path[level].bp_sib_bh = bh;
1491				path[level].bp_op = nilfs_btree_concat_right;
1492				stats->bs_nblocks++;
1493				/*
1494				 * When merging right sibling node
1495				 * into the current node, pointer to
1496				 * the right sibling node must be
1497				 * terminated instead.  The adjustment
1498				 * below is required for that.
1499				 */
1500				dindex = pindex + 1;
1501				/* continue; */
1502			}
1503		} else {
1504			/* no siblings */
1505			/* the only child of the root node */
1506			WARN_ON(level != nilfs_btree_height(btree) - 2);
1507			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1508			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1509				path[level].bp_op = nilfs_btree_shrink;
1510				stats->bs_nblocks += 2;
1511				level++;
1512				path[level].bp_op = nilfs_btree_nop;
1513				goto shrink_root_child;
1514			} else {
1515				path[level].bp_op = nilfs_btree_do_delete;
1516				stats->bs_nblocks++;
1517				goto out;
1518			}
1519		}
1520	}
1521
1522	/* child of the root node is deleted */
1523	path[level].bp_op = nilfs_btree_do_delete;
1524	stats->bs_nblocks++;
1525
1526shrink_root_child:
1527	node = nilfs_btree_get_root(btree);
1528	path[level].bp_oldreq.bpr_ptr =
1529		nilfs_btree_node_get_ptr(node, dindex,
1530					 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1531
1532	ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1533	if (ret < 0)
1534		goto err_out_child_node;
1535
1536	/* success */
1537 out:
1538	*levelp = level;
1539	return ret;
1540
1541	/* error */
1542 err_out_curr_node:
1543	nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1544 err_out_child_node:
1545	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1546		brelse(path[level].bp_sib_bh);
1547		nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1548	}
1549	*levelp = level;
1550	stats->bs_nblocks = 0;
1551	return ret;
1552}
1553
1554static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1555				      struct nilfs_btree_path *path,
1556				      int maxlevel, struct inode *dat)
1557{
1558	int level;
1559
1560	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1561		nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1562		path[level].bp_op(btree, path, level, NULL, NULL);
1563	}
1564
1565	if (!nilfs_bmap_dirty(btree))
1566		nilfs_bmap_set_dirty(btree);
1567}
1568
1569static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1570
1571{
1572	struct nilfs_btree_path *path;
1573	struct nilfs_bmap_stats stats;
1574	struct inode *dat;
1575	int level, ret;
1576
1577	path = nilfs_btree_alloc_path();
1578	if (path == NULL)
1579		return -ENOMEM;
1580
1581	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1582				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1583	if (ret < 0)
1584		goto out;
1585
1586
1587	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1588
1589	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1590	if (ret < 0)
1591		goto out;
1592	nilfs_btree_commit_delete(btree, path, level, dat);
1593	nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1594
1595out:
1596	nilfs_btree_free_path(path);
1597	return ret;
1598}
1599
1600static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1601				__u64 *keyp)
1602{
1603	struct nilfs_btree_path *path;
1604	const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1605	int ret;
1606
1607	path = nilfs_btree_alloc_path();
1608	if (!path)
1609		return -ENOMEM;
1610
1611	ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1612	if (!ret)
1613		*keyp = start;
1614	else if (ret == -ENOENT)
1615		ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1616
1617	nilfs_btree_free_path(path);
1618	return ret;
1619}
1620
1621static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1622{
1623	struct nilfs_btree_path *path;
1624	int ret;
1625
1626	path = nilfs_btree_alloc_path();
1627	if (path == NULL)
1628		return -ENOMEM;
1629
1630	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1631
1632	nilfs_btree_free_path(path);
1633
1634	return ret;
1635}
1636
1637static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1638{
1639	struct buffer_head *bh;
1640	struct nilfs_btree_node *root, *node;
1641	__u64 maxkey, nextmaxkey;
1642	__u64 ptr;
1643	int nchildren, ret;
1644
1645	root = nilfs_btree_get_root(btree);
1646	switch (nilfs_btree_height(btree)) {
1647	case 2:
1648		bh = NULL;
1649		node = root;
1650		break;
1651	case 3:
1652		nchildren = nilfs_btree_node_get_nchildren(root);
1653		if (nchildren > 1)
1654			return 0;
1655		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1656					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1657		ret = nilfs_btree_get_block(btree, ptr, &bh);
1658		if (ret < 0)
1659			return ret;
1660		node = (struct nilfs_btree_node *)bh->b_data;
1661		break;
1662	default:
1663		return 0;
1664	}
1665
1666	nchildren = nilfs_btree_node_get_nchildren(node);
1667	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1668	nextmaxkey = (nchildren > 1) ?
1669		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1670	if (bh != NULL)
1671		brelse(bh);
1672
1673	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1674}
1675
1676static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1677				   __u64 *keys, __u64 *ptrs, int nitems)
1678{
1679	struct buffer_head *bh;
1680	struct nilfs_btree_node *node, *root;
1681	__le64 *dkeys;
1682	__le64 *dptrs;
1683	__u64 ptr;
1684	int nchildren, ncmax, i, ret;
1685
1686	root = nilfs_btree_get_root(btree);
1687	switch (nilfs_btree_height(btree)) {
1688	case 2:
1689		bh = NULL;
1690		node = root;
1691		ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1692		break;
1693	case 3:
1694		nchildren = nilfs_btree_node_get_nchildren(root);
1695		WARN_ON(nchildren > 1);
1696		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1697					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1698		ret = nilfs_btree_get_block(btree, ptr, &bh);
1699		if (ret < 0)
1700			return ret;
1701		node = (struct nilfs_btree_node *)bh->b_data;
1702		ncmax = nilfs_btree_nchildren_per_block(btree);
1703		break;
1704	default:
1705		node = NULL;
1706		return -EINVAL;
1707	}
1708
1709	nchildren = nilfs_btree_node_get_nchildren(node);
1710	if (nchildren < nitems)
1711		nitems = nchildren;
1712	dkeys = nilfs_btree_node_dkeys(node);
1713	dptrs = nilfs_btree_node_dptrs(node, ncmax);
1714	for (i = 0; i < nitems; i++) {
1715		keys[i] = le64_to_cpu(dkeys[i]);
1716		ptrs[i] = le64_to_cpu(dptrs[i]);
1717	}
1718
1719	if (bh != NULL)
1720		brelse(bh);
1721
1722	return nitems;
1723}
1724
1725static int
1726nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1727				       union nilfs_bmap_ptr_req *dreq,
1728				       union nilfs_bmap_ptr_req *nreq,
1729				       struct buffer_head **bhp,
1730				       struct nilfs_bmap_stats *stats)
1731{
1732	struct buffer_head *bh;
1733	struct inode *dat = NULL;
1734	int ret;
1735
1736	stats->bs_nblocks = 0;
1737
1738	/* for data */
1739	/* cannot find near ptr */
1740	if (NILFS_BMAP_USE_VBN(btree)) {
1741		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1742		dat = nilfs_bmap_get_dat(btree);
1743	}
1744
1745	ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1746	if (ret < 0)
1747		return ret;
1748
1749	*bhp = NULL;
1750	stats->bs_nblocks++;
1751	if (nreq != NULL) {
1752		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1753		ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1754		if (ret < 0)
1755			goto err_out_dreq;
1756
1757		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1758		if (ret < 0)
1759			goto err_out_nreq;
1760
1761		*bhp = bh;
1762		stats->bs_nblocks++;
1763	}
1764
1765	/* success */
1766	return 0;
1767
1768	/* error */
1769 err_out_nreq:
1770	nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1771 err_out_dreq:
1772	nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1773	stats->bs_nblocks = 0;
1774	return ret;
1775
1776}
1777
1778static void
1779nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1780				      __u64 key, __u64 ptr,
1781				      const __u64 *keys, const __u64 *ptrs,
1782				      int n,
1783				      union nilfs_bmap_ptr_req *dreq,
1784				      union nilfs_bmap_ptr_req *nreq,
1785				      struct buffer_head *bh)
1786{
1787	struct nilfs_btree_node *node;
1788	struct inode *dat;
1789	__u64 tmpptr;
1790	int ncblk;
1791
1792	/* free resources */
1793	if (btree->b_ops->bop_clear != NULL)
1794		btree->b_ops->bop_clear(btree);
1795
1796	/* ptr must be a pointer to a buffer head. */
1797	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1798
1799	/* convert and insert */
1800	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1801	__nilfs_btree_init(btree);
1802	if (nreq != NULL) {
1803		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1804		nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1805
1806		/* create child node at level 1 */
1807		node = (struct nilfs_btree_node *)bh->b_data;
1808		ncblk = nilfs_btree_nchildren_per_block(btree);
1809		nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1810		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1811		if (!buffer_dirty(bh))
1812			mark_buffer_dirty(bh);
1813		if (!nilfs_bmap_dirty(btree))
1814			nilfs_bmap_set_dirty(btree);
1815
1816		brelse(bh);
1817
1818		/* create root node at level 2 */
1819		node = nilfs_btree_get_root(btree);
1820		tmpptr = nreq->bpr_ptr;
1821		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1822				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1823				      &keys[0], &tmpptr);
1824	} else {
1825		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1826
1827		/* create root node at level 1 */
1828		node = nilfs_btree_get_root(btree);
1829		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1830				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1831				      keys, ptrs);
1832		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1833					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1834		if (!nilfs_bmap_dirty(btree))
1835			nilfs_bmap_set_dirty(btree);
1836	}
1837
1838	if (NILFS_BMAP_USE_VBN(btree))
1839		nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1840}
1841
1842/**
1843 * nilfs_btree_convert_and_insert -
1844 * @bmap:
1845 * @key:
1846 * @ptr:
1847 * @keys:
1848 * @ptrs:
1849 * @n:
1850 */
1851int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1852				   __u64 key, __u64 ptr,
1853				   const __u64 *keys, const __u64 *ptrs, int n)
1854{
1855	struct buffer_head *bh = NULL;
1856	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1857	struct nilfs_bmap_stats stats;
1858	int ret;
1859
1860	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1861		di = &dreq;
1862		ni = NULL;
1863	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1864			   nilfs_btree_node_size(btree))) {
1865		di = &dreq;
1866		ni = &nreq;
1867	} else {
1868		di = NULL;
1869		ni = NULL;
1870		BUG();
1871	}
1872
1873	ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1874						     &stats);
1875	if (ret < 0)
1876		return ret;
1877	nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1878					      di, ni, bh);
1879	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1880	return 0;
1881}
1882
1883static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1884				   struct nilfs_btree_path *path,
1885				   int level,
1886				   struct buffer_head *bh)
1887{
1888	while ((++level < nilfs_btree_height(btree) - 1) &&
1889	       !buffer_dirty(path[level].bp_bh))
1890		mark_buffer_dirty(path[level].bp_bh);
1891
1892	return 0;
1893}
1894
1895static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1896					struct nilfs_btree_path *path,
1897					int level, struct inode *dat)
1898{
1899	struct nilfs_btree_node *parent;
1900	int ncmax, ret;
1901
1902	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1903	path[level].bp_oldreq.bpr_ptr =
1904		nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1905					 ncmax);
1906	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1907	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1908				       &path[level].bp_newreq.bpr_req);
1909	if (ret < 0)
1910		return ret;
1911
1912	if (buffer_nilfs_node(path[level].bp_bh)) {
1913		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1914		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1915		path[level].bp_ctxt.bh = path[level].bp_bh;
1916		ret = nilfs_btnode_prepare_change_key(
1917			&NILFS_BMAP_I(btree)->i_btnode_cache,
1918			&path[level].bp_ctxt);
1919		if (ret < 0) {
1920			nilfs_dat_abort_update(dat,
1921					       &path[level].bp_oldreq.bpr_req,
1922					       &path[level].bp_newreq.bpr_req);
1923			return ret;
1924		}
1925	}
1926
1927	return 0;
1928}
1929
1930static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1931					struct nilfs_btree_path *path,
1932					int level, struct inode *dat)
1933{
1934	struct nilfs_btree_node *parent;
1935	int ncmax;
1936
1937	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1938				&path[level].bp_newreq.bpr_req,
1939				btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1940
1941	if (buffer_nilfs_node(path[level].bp_bh)) {
1942		nilfs_btnode_commit_change_key(
1943			&NILFS_BMAP_I(btree)->i_btnode_cache,
1944			&path[level].bp_ctxt);
1945		path[level].bp_bh = path[level].bp_ctxt.bh;
1946	}
1947	set_buffer_nilfs_volatile(path[level].bp_bh);
1948
1949	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1950	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1951				 path[level].bp_newreq.bpr_ptr, ncmax);
1952}
1953
1954static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1955				       struct nilfs_btree_path *path,
1956				       int level, struct inode *dat)
1957{
1958	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1959			       &path[level].bp_newreq.bpr_req);
1960	if (buffer_nilfs_node(path[level].bp_bh))
1961		nilfs_btnode_abort_change_key(
1962			&NILFS_BMAP_I(btree)->i_btnode_cache,
1963			&path[level].bp_ctxt);
1964}
1965
1966static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1967					   struct nilfs_btree_path *path,
1968					   int minlevel, int *maxlevelp,
1969					   struct inode *dat)
1970{
1971	int level, ret;
1972
1973	level = minlevel;
1974	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1975		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1976		if (ret < 0)
1977			return ret;
1978	}
1979	while ((++level < nilfs_btree_height(btree) - 1) &&
1980	       !buffer_dirty(path[level].bp_bh)) {
1981
1982		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1983		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1984		if (ret < 0)
1985			goto out;
1986	}
1987
1988	/* success */
1989	*maxlevelp = level - 1;
1990	return 0;
1991
1992	/* error */
1993 out:
1994	while (--level > minlevel)
1995		nilfs_btree_abort_update_v(btree, path, level, dat);
1996	if (!buffer_nilfs_volatile(path[level].bp_bh))
1997		nilfs_btree_abort_update_v(btree, path, level, dat);
1998	return ret;
1999}
2000
2001static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2002					   struct nilfs_btree_path *path,
2003					   int minlevel, int maxlevel,
2004					   struct buffer_head *bh,
2005					   struct inode *dat)
2006{
2007	int level;
2008
2009	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2010		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2011
2012	for (level = minlevel + 1; level <= maxlevel; level++)
2013		nilfs_btree_commit_update_v(btree, path, level, dat);
2014}
2015
2016static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2017				   struct nilfs_btree_path *path,
2018				   int level, struct buffer_head *bh)
2019{
2020	int maxlevel = 0, ret;
2021	struct nilfs_btree_node *parent;
2022	struct inode *dat = nilfs_bmap_get_dat(btree);
2023	__u64 ptr;
2024	int ncmax;
2025
2026	get_bh(bh);
2027	path[level].bp_bh = bh;
2028	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2029					      dat);
2030	if (ret < 0)
2031		goto out;
2032
2033	if (buffer_nilfs_volatile(path[level].bp_bh)) {
2034		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2035		ptr = nilfs_btree_node_get_ptr(parent,
2036					       path[level + 1].bp_index,
2037					       ncmax);
2038		ret = nilfs_dat_mark_dirty(dat, ptr);
2039		if (ret < 0)
2040			goto out;
2041	}
2042
2043	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2044
2045 out:
2046	brelse(path[level].bp_bh);
2047	path[level].bp_bh = NULL;
2048	return ret;
2049}
2050
2051static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2052				 struct buffer_head *bh)
2053{
2054	struct nilfs_btree_path *path;
2055	struct nilfs_btree_node *node;
2056	__u64 key;
2057	int level, ret;
2058
2059	WARN_ON(!buffer_dirty(bh));
2060
2061	path = nilfs_btree_alloc_path();
2062	if (path == NULL)
2063		return -ENOMEM;
2064
2065	if (buffer_nilfs_node(bh)) {
2066		node = (struct nilfs_btree_node *)bh->b_data;
2067		key = nilfs_btree_node_get_key(node, 0);
2068		level = nilfs_btree_node_get_level(node);
2069	} else {
2070		key = nilfs_bmap_data_get_key(btree, bh);
2071		level = NILFS_BTREE_LEVEL_DATA;
2072	}
2073
2074	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2075	if (ret < 0) {
2076		if (unlikely(ret == -ENOENT))
2077			nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
2078				  "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2079				  btree->b_inode->i_ino,
2080				  (unsigned long long)key, level);
2081		goto out;
2082	}
2083
2084	ret = NILFS_BMAP_USE_VBN(btree) ?
2085		nilfs_btree_propagate_v(btree, path, level, bh) :
2086		nilfs_btree_propagate_p(btree, path, level, bh);
2087
2088 out:
2089	nilfs_btree_free_path(path);
2090
2091	return ret;
2092}
2093
2094static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2095				    struct buffer_head *bh)
2096{
2097	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2098}
2099
2100static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2101					 struct list_head *lists,
2102					 struct buffer_head *bh)
2103{
2104	struct list_head *head;
2105	struct buffer_head *cbh;
2106	struct nilfs_btree_node *node, *cnode;
2107	__u64 key, ckey;
2108	int level;
2109
2110	get_bh(bh);
2111	node = (struct nilfs_btree_node *)bh->b_data;
2112	key = nilfs_btree_node_get_key(node, 0);
2113	level = nilfs_btree_node_get_level(node);
2114	if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2115	    level >= NILFS_BTREE_LEVEL_MAX) {
2116		dump_stack();
2117		nilfs_msg(btree->b_inode->i_sb, KERN_WARNING,
2118			  "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2119			  level, (unsigned long long)key,
2120			  btree->b_inode->i_ino,
2121			  (unsigned long long)bh->b_blocknr);
 
2122		return;
2123	}
2124
2125	list_for_each(head, &lists[level]) {
2126		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2127		cnode = (struct nilfs_btree_node *)cbh->b_data;
2128		ckey = nilfs_btree_node_get_key(cnode, 0);
2129		if (key < ckey)
2130			break;
2131	}
2132	list_add_tail(&bh->b_assoc_buffers, head);
2133}
2134
2135static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2136					     struct list_head *listp)
2137{
2138	struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
2139	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2140	struct pagevec pvec;
2141	struct buffer_head *bh, *head;
2142	pgoff_t index = 0;
2143	int level, i;
2144
2145	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2146	     level < NILFS_BTREE_LEVEL_MAX;
2147	     level++)
2148		INIT_LIST_HEAD(&lists[level]);
2149
2150	pagevec_init(&pvec);
2151
2152	while (pagevec_lookup_tag(&pvec, btcache, &index,
2153					PAGECACHE_TAG_DIRTY)) {
2154		for (i = 0; i < pagevec_count(&pvec); i++) {
2155			bh = head = page_buffers(pvec.pages[i]);
2156			do {
2157				if (buffer_dirty(bh))
2158					nilfs_btree_add_dirty_buffer(btree,
2159								     lists, bh);
2160			} while ((bh = bh->b_this_page) != head);
2161		}
2162		pagevec_release(&pvec);
2163		cond_resched();
2164	}
2165
2166	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2167	     level < NILFS_BTREE_LEVEL_MAX;
2168	     level++)
2169		list_splice_tail(&lists[level], listp);
2170}
2171
2172static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2173				struct nilfs_btree_path *path,
2174				int level,
2175				struct buffer_head **bh,
2176				sector_t blocknr,
2177				union nilfs_binfo *binfo)
2178{
2179	struct nilfs_btree_node *parent;
2180	__u64 key;
2181	__u64 ptr;
2182	int ncmax, ret;
2183
2184	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2185	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2186				       ncmax);
2187	if (buffer_nilfs_node(*bh)) {
2188		path[level].bp_ctxt.oldkey = ptr;
2189		path[level].bp_ctxt.newkey = blocknr;
2190		path[level].bp_ctxt.bh = *bh;
2191		ret = nilfs_btnode_prepare_change_key(
2192			&NILFS_BMAP_I(btree)->i_btnode_cache,
2193			&path[level].bp_ctxt);
2194		if (ret < 0)
2195			return ret;
2196		nilfs_btnode_commit_change_key(
2197			&NILFS_BMAP_I(btree)->i_btnode_cache,
2198			&path[level].bp_ctxt);
2199		*bh = path[level].bp_ctxt.bh;
2200	}
2201
2202	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2203				 ncmax);
2204
2205	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2206	/* on-disk format */
2207	binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2208	binfo->bi_dat.bi_level = level;
2209
2210	return 0;
2211}
2212
2213static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2214				struct nilfs_btree_path *path,
2215				int level,
2216				struct buffer_head **bh,
2217				sector_t blocknr,
2218				union nilfs_binfo *binfo)
2219{
2220	struct nilfs_btree_node *parent;
2221	struct inode *dat = nilfs_bmap_get_dat(btree);
2222	__u64 key;
2223	__u64 ptr;
2224	union nilfs_bmap_ptr_req req;
2225	int ncmax, ret;
2226
2227	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2228	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2229				       ncmax);
2230	req.bpr_ptr = ptr;
2231	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2232	if (ret < 0)
2233		return ret;
2234	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2235
2236	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2237	/* on-disk format */
2238	binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2239	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2240
2241	return 0;
2242}
2243
2244static int nilfs_btree_assign(struct nilfs_bmap *btree,
2245			      struct buffer_head **bh,
2246			      sector_t blocknr,
2247			      union nilfs_binfo *binfo)
2248{
2249	struct nilfs_btree_path *path;
2250	struct nilfs_btree_node *node;
2251	__u64 key;
2252	int level, ret;
2253
2254	path = nilfs_btree_alloc_path();
2255	if (path == NULL)
2256		return -ENOMEM;
2257
2258	if (buffer_nilfs_node(*bh)) {
2259		node = (struct nilfs_btree_node *)(*bh)->b_data;
2260		key = nilfs_btree_node_get_key(node, 0);
2261		level = nilfs_btree_node_get_level(node);
2262	} else {
2263		key = nilfs_bmap_data_get_key(btree, *bh);
2264		level = NILFS_BTREE_LEVEL_DATA;
2265	}
2266
2267	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2268	if (ret < 0) {
2269		WARN_ON(ret == -ENOENT);
2270		goto out;
2271	}
2272
2273	ret = NILFS_BMAP_USE_VBN(btree) ?
2274		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2275		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2276
2277 out:
2278	nilfs_btree_free_path(path);
2279
2280	return ret;
2281}
2282
2283static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2284				 struct buffer_head **bh,
2285				 sector_t blocknr,
2286				 union nilfs_binfo *binfo)
2287{
2288	struct nilfs_btree_node *node;
2289	__u64 key;
2290	int ret;
2291
2292	ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2293			     blocknr);
2294	if (ret < 0)
2295		return ret;
2296
2297	if (buffer_nilfs_node(*bh)) {
2298		node = (struct nilfs_btree_node *)(*bh)->b_data;
2299		key = nilfs_btree_node_get_key(node, 0);
2300	} else
2301		key = nilfs_bmap_data_get_key(btree, *bh);
2302
2303	/* on-disk format */
2304	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2305	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2306
2307	return 0;
2308}
2309
2310static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2311{
2312	struct buffer_head *bh;
2313	struct nilfs_btree_path *path;
2314	__u64 ptr;
2315	int ret;
2316
2317	path = nilfs_btree_alloc_path();
2318	if (path == NULL)
2319		return -ENOMEM;
2320
2321	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2322	if (ret < 0) {
2323		WARN_ON(ret == -ENOENT);
2324		goto out;
2325	}
2326	ret = nilfs_btree_get_block(btree, ptr, &bh);
2327	if (ret < 0) {
2328		WARN_ON(ret == -ENOENT);
2329		goto out;
2330	}
2331
2332	if (!buffer_dirty(bh))
2333		mark_buffer_dirty(bh);
2334	brelse(bh);
2335	if (!nilfs_bmap_dirty(btree))
2336		nilfs_bmap_set_dirty(btree);
2337
2338 out:
2339	nilfs_btree_free_path(path);
2340	return ret;
2341}
2342
2343static const struct nilfs_bmap_operations nilfs_btree_ops = {
2344	.bop_lookup		=	nilfs_btree_lookup,
2345	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2346	.bop_insert		=	nilfs_btree_insert,
2347	.bop_delete		=	nilfs_btree_delete,
2348	.bop_clear		=	NULL,
2349
2350	.bop_propagate		=	nilfs_btree_propagate,
2351
2352	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2353
2354	.bop_assign		=	nilfs_btree_assign,
2355	.bop_mark		=	nilfs_btree_mark,
2356
2357	.bop_seek_key		=	nilfs_btree_seek_key,
2358	.bop_last_key		=	nilfs_btree_last_key,
2359
2360	.bop_check_insert	=	NULL,
2361	.bop_check_delete	=	nilfs_btree_check_delete,
2362	.bop_gather_data	=	nilfs_btree_gather_data,
2363};
2364
2365static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2366	.bop_lookup		=	NULL,
2367	.bop_lookup_contig	=	NULL,
2368	.bop_insert		=	NULL,
2369	.bop_delete		=	NULL,
2370	.bop_clear		=	NULL,
2371
2372	.bop_propagate		=	nilfs_btree_propagate_gc,
2373
2374	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2375
2376	.bop_assign		=	nilfs_btree_assign_gc,
2377	.bop_mark		=	NULL,
2378
2379	.bop_seek_key		=	NULL,
2380	.bop_last_key		=	NULL,
2381
2382	.bop_check_insert	=	NULL,
2383	.bop_check_delete	=	NULL,
2384	.bop_gather_data	=	NULL,
2385};
2386
2387static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2388{
2389	bmap->b_ops = &nilfs_btree_ops;
2390	bmap->b_nchildren_per_block =
2391		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2392}
2393
2394int nilfs_btree_init(struct nilfs_bmap *bmap)
2395{
2396	int ret = 0;
2397
2398	__nilfs_btree_init(bmap);
2399
2400	if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2401		ret = -EIO;
2402	return ret;
2403}
2404
2405void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2406{
2407	bmap->b_ops = &nilfs_btree_ops_gc;
2408	bmap->b_nchildren_per_block =
2409		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2410}
v3.15
 
   1/*
   2 * btree.c - NILFS B-tree.
   3 *
   4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  19 *
  20 * Written by Koji Sato <koji@osrg.net>.
  21 */
  22
  23#include <linux/slab.h>
  24#include <linux/string.h>
  25#include <linux/errno.h>
  26#include <linux/pagevec.h>
  27#include "nilfs.h"
  28#include "page.h"
  29#include "btnode.h"
  30#include "btree.h"
  31#include "alloc.h"
  32#include "dat.h"
  33
 
 
  34static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
  35{
  36	struct nilfs_btree_path *path;
  37	int level = NILFS_BTREE_LEVEL_DATA;
  38
  39	path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
  40	if (path == NULL)
  41		goto out;
  42
  43	for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
  44		path[level].bp_bh = NULL;
  45		path[level].bp_sib_bh = NULL;
  46		path[level].bp_index = 0;
  47		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  48		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  49		path[level].bp_op = NULL;
  50	}
  51
  52out:
  53	return path;
  54}
  55
  56static void nilfs_btree_free_path(struct nilfs_btree_path *path)
  57{
  58	int level = NILFS_BTREE_LEVEL_DATA;
  59
  60	for (; level < NILFS_BTREE_LEVEL_MAX; level++)
  61		brelse(path[level].bp_bh);
  62
  63	kmem_cache_free(nilfs_btree_path_cache, path);
  64}
  65
  66/*
  67 * B-tree node operations
  68 */
  69static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
  70				     __u64 ptr, struct buffer_head **bhp)
  71{
  72	struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
  73	struct buffer_head *bh;
  74
  75	bh = nilfs_btnode_create_block(btnc, ptr);
  76	if (!bh)
  77		return -ENOMEM;
  78
  79	set_buffer_nilfs_volatile(bh);
  80	*bhp = bh;
  81	return 0;
  82}
  83
  84static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
  85{
  86	return node->bn_flags;
  87}
  88
  89static void
  90nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
  91{
  92	node->bn_flags = flags;
  93}
  94
  95static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
  96{
  97	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
  98}
  99
 100static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
 101{
 102	return node->bn_level;
 103}
 104
 105static void
 106nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
 107{
 108	node->bn_level = level;
 109}
 110
 111static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
 112{
 113	return le16_to_cpu(node->bn_nchildren);
 114}
 115
 116static void
 117nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
 118{
 119	node->bn_nchildren = cpu_to_le16(nchildren);
 120}
 121
 122static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
 123{
 124	return 1 << btree->b_inode->i_blkbits;
 125}
 126
 127static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
 128{
 129	return btree->b_nchildren_per_block;
 130}
 131
 132static __le64 *
 133nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
 134{
 135	return (__le64 *)((char *)(node + 1) +
 136			  (nilfs_btree_node_root(node) ?
 137			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
 138}
 139
 140static __le64 *
 141nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
 142{
 143	return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
 144}
 145
 146static __u64
 147nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
 148{
 149	return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
 150}
 151
 152static void
 153nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
 154{
 155	*(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
 156}
 157
 158static __u64
 159nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
 160			 int ncmax)
 161{
 162	return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
 163}
 164
 165static void
 166nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
 167			 int ncmax)
 168{
 169	*(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
 170}
 171
 172static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
 173				  int level, int nchildren, int ncmax,
 174				  const __u64 *keys, const __u64 *ptrs)
 175{
 176	__le64 *dkeys;
 177	__le64 *dptrs;
 178	int i;
 179
 180	nilfs_btree_node_set_flags(node, flags);
 181	nilfs_btree_node_set_level(node, level);
 182	nilfs_btree_node_set_nchildren(node, nchildren);
 183
 184	dkeys = nilfs_btree_node_dkeys(node);
 185	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 186	for (i = 0; i < nchildren; i++) {
 187		dkeys[i] = cpu_to_le64(keys[i]);
 188		dptrs[i] = cpu_to_le64(ptrs[i]);
 189	}
 190}
 191
 192/* Assume the buffer heads corresponding to left and right are locked. */
 193static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
 194				       struct nilfs_btree_node *right,
 195				       int n, int lncmax, int rncmax)
 196{
 197	__le64 *ldkeys, *rdkeys;
 198	__le64 *ldptrs, *rdptrs;
 199	int lnchildren, rnchildren;
 200
 201	ldkeys = nilfs_btree_node_dkeys(left);
 202	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 203	lnchildren = nilfs_btree_node_get_nchildren(left);
 204
 205	rdkeys = nilfs_btree_node_dkeys(right);
 206	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 207	rnchildren = nilfs_btree_node_get_nchildren(right);
 208
 209	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
 210	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
 211	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
 212	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
 213
 214	lnchildren += n;
 215	rnchildren -= n;
 216	nilfs_btree_node_set_nchildren(left, lnchildren);
 217	nilfs_btree_node_set_nchildren(right, rnchildren);
 218}
 219
 220/* Assume that the buffer heads corresponding to left and right are locked. */
 221static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
 222					struct nilfs_btree_node *right,
 223					int n, int lncmax, int rncmax)
 224{
 225	__le64 *ldkeys, *rdkeys;
 226	__le64 *ldptrs, *rdptrs;
 227	int lnchildren, rnchildren;
 228
 229	ldkeys = nilfs_btree_node_dkeys(left);
 230	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 231	lnchildren = nilfs_btree_node_get_nchildren(left);
 232
 233	rdkeys = nilfs_btree_node_dkeys(right);
 234	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 235	rnchildren = nilfs_btree_node_get_nchildren(right);
 236
 237	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
 238	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
 239	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
 240	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
 241
 242	lnchildren -= n;
 243	rnchildren += n;
 244	nilfs_btree_node_set_nchildren(left, lnchildren);
 245	nilfs_btree_node_set_nchildren(right, rnchildren);
 246}
 247
 248/* Assume that the buffer head corresponding to node is locked. */
 249static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
 250				    __u64 key, __u64 ptr, int ncmax)
 251{
 252	__le64 *dkeys;
 253	__le64 *dptrs;
 254	int nchildren;
 255
 256	dkeys = nilfs_btree_node_dkeys(node);
 257	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 258	nchildren = nilfs_btree_node_get_nchildren(node);
 259	if (index < nchildren) {
 260		memmove(dkeys + index + 1, dkeys + index,
 261			(nchildren - index) * sizeof(*dkeys));
 262		memmove(dptrs + index + 1, dptrs + index,
 263			(nchildren - index) * sizeof(*dptrs));
 264	}
 265	dkeys[index] = cpu_to_le64(key);
 266	dptrs[index] = cpu_to_le64(ptr);
 267	nchildren++;
 268	nilfs_btree_node_set_nchildren(node, nchildren);
 269}
 270
 271/* Assume that the buffer head corresponding to node is locked. */
 272static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
 273				    __u64 *keyp, __u64 *ptrp, int ncmax)
 274{
 275	__u64 key;
 276	__u64 ptr;
 277	__le64 *dkeys;
 278	__le64 *dptrs;
 279	int nchildren;
 280
 281	dkeys = nilfs_btree_node_dkeys(node);
 282	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 283	key = le64_to_cpu(dkeys[index]);
 284	ptr = le64_to_cpu(dptrs[index]);
 285	nchildren = nilfs_btree_node_get_nchildren(node);
 286	if (keyp != NULL)
 287		*keyp = key;
 288	if (ptrp != NULL)
 289		*ptrp = ptr;
 290
 291	if (index < nchildren - 1) {
 292		memmove(dkeys + index, dkeys + index + 1,
 293			(nchildren - index - 1) * sizeof(*dkeys));
 294		memmove(dptrs + index, dptrs + index + 1,
 295			(nchildren - index - 1) * sizeof(*dptrs));
 296	}
 297	nchildren--;
 298	nilfs_btree_node_set_nchildren(node, nchildren);
 299}
 300
 301static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
 302				   __u64 key, int *indexp)
 303{
 304	__u64 nkey;
 305	int index, low, high, s;
 306
 307	/* binary search */
 308	low = 0;
 309	high = nilfs_btree_node_get_nchildren(node) - 1;
 310	index = 0;
 311	s = 0;
 312	while (low <= high) {
 313		index = (low + high) / 2;
 314		nkey = nilfs_btree_node_get_key(node, index);
 315		if (nkey == key) {
 316			s = 0;
 317			goto out;
 318		} else if (nkey < key) {
 319			low = index + 1;
 320			s = -1;
 321		} else {
 322			high = index - 1;
 323			s = 1;
 324		}
 325	}
 326
 327	/* adjust index */
 328	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
 329		if (s > 0 && index > 0)
 330			index--;
 331	} else if (s < 0)
 332		index++;
 333
 334 out:
 335	*indexp = index;
 336
 337	return s == 0;
 338}
 339
 340/**
 341 * nilfs_btree_node_broken - verify consistency of btree node
 342 * @node: btree node block to be examined
 343 * @size: node size (in bytes)
 
 344 * @blocknr: block number
 345 *
 346 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 347 */
 348static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
 349				   size_t size, sector_t blocknr)
 
 350{
 351	int level, flags, nchildren;
 352	int ret = 0;
 353
 354	level = nilfs_btree_node_get_level(node);
 355	flags = nilfs_btree_node_get_flags(node);
 356	nchildren = nilfs_btree_node_get_nchildren(node);
 357
 358	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 359		     level >= NILFS_BTREE_LEVEL_MAX ||
 360		     (flags & NILFS_BTREE_NODE_ROOT) ||
 361		     nchildren < 0 ||
 362		     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
 363		printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
 364		       "level = %d, flags = 0x%x, nchildren = %d\n",
 365		       (unsigned long long)blocknr, level, flags, nchildren);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 366		ret = 1;
 367	}
 368	return ret;
 369}
 370
 371int nilfs_btree_broken_node_block(struct buffer_head *bh)
 372{
 
 373	int ret;
 374
 375	if (buffer_nilfs_checked(bh))
 376		return 0;
 377
 
 378	ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
 379				       bh->b_size, bh->b_blocknr);
 380	if (likely(!ret))
 381		set_buffer_nilfs_checked(bh);
 382	return ret;
 383}
 384
 385static struct nilfs_btree_node *
 386nilfs_btree_get_root(const struct nilfs_bmap *btree)
 387{
 388	return (struct nilfs_btree_node *)btree->b_u.u_data;
 389}
 390
 391static struct nilfs_btree_node *
 392nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
 393{
 394	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
 395}
 396
 397static struct nilfs_btree_node *
 398nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
 399{
 400	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
 401}
 402
 403static int nilfs_btree_height(const struct nilfs_bmap *btree)
 404{
 405	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
 406}
 407
 408static struct nilfs_btree_node *
 409nilfs_btree_get_node(const struct nilfs_bmap *btree,
 410		     const struct nilfs_btree_path *path,
 411		     int level, int *ncmaxp)
 412{
 413	struct nilfs_btree_node *node;
 414
 415	if (level == nilfs_btree_height(btree) - 1) {
 416		node = nilfs_btree_get_root(btree);
 417		*ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
 418	} else {
 419		node = nilfs_btree_get_nonroot_node(path, level);
 420		*ncmaxp = nilfs_btree_nchildren_per_block(btree);
 421	}
 422	return node;
 423}
 424
 425static int
 426nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
 427{
 428	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
 429		dump_stack();
 430		printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
 431		       nilfs_btree_node_get_level(node), level);
 
 
 432		return 1;
 433	}
 434	return 0;
 435}
 436
 437struct nilfs_btree_readahead_info {
 438	struct nilfs_btree_node *node;	/* parent node */
 439	int max_ra_blocks;		/* max nof blocks to read ahead */
 440	int index;			/* current index on the parent node */
 441	int ncmax;			/* nof children in the parent node */
 442};
 443
 444static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 445				   struct buffer_head **bhp,
 446				   const struct nilfs_btree_readahead_info *ra)
 447{
 448	struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
 449	struct buffer_head *bh, *ra_bh;
 450	sector_t submit_ptr = 0;
 451	int ret;
 452
 453	ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
 
 454	if (ret) {
 455		if (ret != -EEXIST)
 456			return ret;
 457		goto out_check;
 458	}
 459
 460	if (ra) {
 461		int i, n;
 462		__u64 ptr2;
 463
 464		/* read ahead sibling nodes */
 465		for (n = ra->max_ra_blocks, i = ra->index + 1;
 466		     n > 0 && i < ra->ncmax; n--, i++) {
 467			ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
 468
 469			ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
 
 470							&ra_bh, &submit_ptr);
 471			if (likely(!ret || ret == -EEXIST))
 472				brelse(ra_bh);
 473			else if (ret != -EBUSY)
 474				break;
 475			if (!buffer_locked(bh))
 476				goto out_no_wait;
 477		}
 478	}
 479
 480	wait_on_buffer(bh);
 481
 482 out_no_wait:
 483	if (!buffer_uptodate(bh)) {
 
 
 
 484		brelse(bh);
 485		return -EIO;
 486	}
 487
 488 out_check:
 489	if (nilfs_btree_broken_node_block(bh)) {
 490		clear_buffer_uptodate(bh);
 491		brelse(bh);
 492		return -EINVAL;
 493	}
 494
 495	*bhp = bh;
 496	return 0;
 497}
 498
 499static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 500				   struct buffer_head **bhp)
 501{
 502	return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
 503}
 504
 505static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
 506				 struct nilfs_btree_path *path,
 507				 __u64 key, __u64 *ptrp, int minlevel,
 508				 int readahead)
 509{
 510	struct nilfs_btree_node *node;
 511	struct nilfs_btree_readahead_info p, *ra;
 512	__u64 ptr;
 513	int level, index, found, ncmax, ret;
 514
 515	node = nilfs_btree_get_root(btree);
 516	level = nilfs_btree_node_get_level(node);
 517	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
 518		return -ENOENT;
 519
 520	found = nilfs_btree_node_lookup(node, key, &index);
 521	ptr = nilfs_btree_node_get_ptr(node, index,
 522				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 523	path[level].bp_bh = NULL;
 524	path[level].bp_index = index;
 525
 526	ncmax = nilfs_btree_nchildren_per_block(btree);
 527
 528	while (--level >= minlevel) {
 529		ra = NULL;
 530		if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
 531			p.node = nilfs_btree_get_node(btree, path, level + 1,
 532						      &p.ncmax);
 533			p.index = index;
 534			p.max_ra_blocks = 7;
 535			ra = &p;
 536		}
 537		ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
 538					      ra);
 539		if (ret < 0)
 540			return ret;
 541
 542		node = nilfs_btree_get_nonroot_node(path, level);
 543		if (nilfs_btree_bad_node(node, level))
 544			return -EINVAL;
 545		if (!found)
 546			found = nilfs_btree_node_lookup(node, key, &index);
 547		else
 548			index = 0;
 549		if (index < ncmax) {
 550			ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 551		} else {
 552			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
 553			/* insert */
 554			ptr = NILFS_BMAP_INVALID_PTR;
 555		}
 556		path[level].bp_index = index;
 557	}
 558	if (!found)
 559		return -ENOENT;
 560
 561	if (ptrp != NULL)
 562		*ptrp = ptr;
 563
 564	return 0;
 565}
 566
 567static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
 568				      struct nilfs_btree_path *path,
 569				      __u64 *keyp, __u64 *ptrp)
 570{
 571	struct nilfs_btree_node *node;
 572	__u64 ptr;
 573	int index, level, ncmax, ret;
 574
 575	node = nilfs_btree_get_root(btree);
 576	index = nilfs_btree_node_get_nchildren(node) - 1;
 577	if (index < 0)
 578		return -ENOENT;
 579	level = nilfs_btree_node_get_level(node);
 580	ptr = nilfs_btree_node_get_ptr(node, index,
 581				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 582	path[level].bp_bh = NULL;
 583	path[level].bp_index = index;
 584	ncmax = nilfs_btree_nchildren_per_block(btree);
 585
 586	for (level--; level > 0; level--) {
 587		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
 588		if (ret < 0)
 589			return ret;
 590		node = nilfs_btree_get_nonroot_node(path, level);
 591		if (nilfs_btree_bad_node(node, level))
 592			return -EINVAL;
 593		index = nilfs_btree_node_get_nchildren(node) - 1;
 594		ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 595		path[level].bp_index = index;
 596	}
 597
 598	if (keyp != NULL)
 599		*keyp = nilfs_btree_node_get_key(node, index);
 600	if (ptrp != NULL)
 601		*ptrp = ptr;
 602
 603	return 0;
 604}
 605
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 606static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
 607			      __u64 key, int level, __u64 *ptrp)
 608{
 609	struct nilfs_btree_path *path;
 610	int ret;
 611
 612	path = nilfs_btree_alloc_path();
 613	if (path == NULL)
 614		return -ENOMEM;
 615
 616	ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
 617
 618	nilfs_btree_free_path(path);
 619
 620	return ret;
 621}
 622
 623static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
 624				     __u64 key, __u64 *ptrp, unsigned maxblocks)
 
 625{
 626	struct nilfs_btree_path *path;
 627	struct nilfs_btree_node *node;
 628	struct inode *dat = NULL;
 629	__u64 ptr, ptr2;
 630	sector_t blocknr;
 631	int level = NILFS_BTREE_LEVEL_NODE_MIN;
 632	int ret, cnt, index, maxlevel, ncmax;
 633	struct nilfs_btree_readahead_info p;
 634
 635	path = nilfs_btree_alloc_path();
 636	if (path == NULL)
 637		return -ENOMEM;
 638
 639	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
 640	if (ret < 0)
 641		goto out;
 642
 643	if (NILFS_BMAP_USE_VBN(btree)) {
 644		dat = nilfs_bmap_get_dat(btree);
 645		ret = nilfs_dat_translate(dat, ptr, &blocknr);
 646		if (ret < 0)
 647			goto out;
 648		ptr = blocknr;
 649	}
 650	cnt = 1;
 651	if (cnt == maxblocks)
 652		goto end;
 653
 654	maxlevel = nilfs_btree_height(btree) - 1;
 655	node = nilfs_btree_get_node(btree, path, level, &ncmax);
 656	index = path[level].bp_index + 1;
 657	for (;;) {
 658		while (index < nilfs_btree_node_get_nchildren(node)) {
 659			if (nilfs_btree_node_get_key(node, index) !=
 660			    key + cnt)
 661				goto end;
 662			ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
 663			if (dat) {
 664				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
 665				if (ret < 0)
 666					goto out;
 667				ptr2 = blocknr;
 668			}
 669			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
 670				goto end;
 671			index++;
 672			continue;
 673		}
 674		if (level == maxlevel)
 675			break;
 676
 677		/* look-up right sibling node */
 678		p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
 679		p.index = path[level + 1].bp_index + 1;
 680		p.max_ra_blocks = 7;
 681		if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
 682		    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
 683			break;
 684		ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
 685		path[level + 1].bp_index = p.index;
 686
 687		brelse(path[level].bp_bh);
 688		path[level].bp_bh = NULL;
 689
 690		ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
 691					      &p);
 692		if (ret < 0)
 693			goto out;
 694		node = nilfs_btree_get_nonroot_node(path, level);
 695		ncmax = nilfs_btree_nchildren_per_block(btree);
 696		index = 0;
 697		path[level].bp_index = index;
 698	}
 699 end:
 700	*ptrp = ptr;
 701	ret = cnt;
 702 out:
 703	nilfs_btree_free_path(path);
 704	return ret;
 705}
 706
 707static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
 708				    struct nilfs_btree_path *path,
 709				    int level, __u64 key)
 710{
 711	if (level < nilfs_btree_height(btree) - 1) {
 712		do {
 713			nilfs_btree_node_set_key(
 714				nilfs_btree_get_nonroot_node(path, level),
 715				path[level].bp_index, key);
 716			if (!buffer_dirty(path[level].bp_bh))
 717				mark_buffer_dirty(path[level].bp_bh);
 718		} while ((path[level].bp_index == 0) &&
 719			 (++level < nilfs_btree_height(btree) - 1));
 720	}
 721
 722	/* root */
 723	if (level == nilfs_btree_height(btree) - 1) {
 724		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
 725					 path[level].bp_index, key);
 726	}
 727}
 728
 729static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
 730				  struct nilfs_btree_path *path,
 731				  int level, __u64 *keyp, __u64 *ptrp)
 732{
 733	struct nilfs_btree_node *node;
 734	int ncblk;
 735
 736	if (level < nilfs_btree_height(btree) - 1) {
 737		node = nilfs_btree_get_nonroot_node(path, level);
 738		ncblk = nilfs_btree_nchildren_per_block(btree);
 739		nilfs_btree_node_insert(node, path[level].bp_index,
 740					*keyp, *ptrp, ncblk);
 741		if (!buffer_dirty(path[level].bp_bh))
 742			mark_buffer_dirty(path[level].bp_bh);
 743
 744		if (path[level].bp_index == 0)
 745			nilfs_btree_promote_key(btree, path, level + 1,
 746						nilfs_btree_node_get_key(node,
 747									 0));
 748	} else {
 749		node = nilfs_btree_get_root(btree);
 750		nilfs_btree_node_insert(node, path[level].bp_index,
 751					*keyp, *ptrp,
 752					NILFS_BTREE_ROOT_NCHILDREN_MAX);
 753	}
 754}
 755
 756static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
 757				   struct nilfs_btree_path *path,
 758				   int level, __u64 *keyp, __u64 *ptrp)
 759{
 760	struct nilfs_btree_node *node, *left;
 761	int nchildren, lnchildren, n, move, ncblk;
 762
 763	node = nilfs_btree_get_nonroot_node(path, level);
 764	left = nilfs_btree_get_sib_node(path, level);
 765	nchildren = nilfs_btree_node_get_nchildren(node);
 766	lnchildren = nilfs_btree_node_get_nchildren(left);
 767	ncblk = nilfs_btree_nchildren_per_block(btree);
 768	move = 0;
 769
 770	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
 771	if (n > path[level].bp_index) {
 772		/* move insert point */
 773		n--;
 774		move = 1;
 775	}
 776
 777	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
 778
 779	if (!buffer_dirty(path[level].bp_bh))
 780		mark_buffer_dirty(path[level].bp_bh);
 781	if (!buffer_dirty(path[level].bp_sib_bh))
 782		mark_buffer_dirty(path[level].bp_sib_bh);
 783
 784	nilfs_btree_promote_key(btree, path, level + 1,
 785				nilfs_btree_node_get_key(node, 0));
 786
 787	if (move) {
 788		brelse(path[level].bp_bh);
 789		path[level].bp_bh = path[level].bp_sib_bh;
 790		path[level].bp_sib_bh = NULL;
 791		path[level].bp_index += lnchildren;
 792		path[level + 1].bp_index--;
 793	} else {
 794		brelse(path[level].bp_sib_bh);
 795		path[level].bp_sib_bh = NULL;
 796		path[level].bp_index -= n;
 797	}
 798
 799	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 800}
 801
 802static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
 803				    struct nilfs_btree_path *path,
 804				    int level, __u64 *keyp, __u64 *ptrp)
 805{
 806	struct nilfs_btree_node *node, *right;
 807	int nchildren, rnchildren, n, move, ncblk;
 808
 809	node = nilfs_btree_get_nonroot_node(path, level);
 810	right = nilfs_btree_get_sib_node(path, level);
 811	nchildren = nilfs_btree_node_get_nchildren(node);
 812	rnchildren = nilfs_btree_node_get_nchildren(right);
 813	ncblk = nilfs_btree_nchildren_per_block(btree);
 814	move = 0;
 815
 816	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
 817	if (n > nchildren - path[level].bp_index) {
 818		/* move insert point */
 819		n--;
 820		move = 1;
 821	}
 822
 823	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 824
 825	if (!buffer_dirty(path[level].bp_bh))
 826		mark_buffer_dirty(path[level].bp_bh);
 827	if (!buffer_dirty(path[level].bp_sib_bh))
 828		mark_buffer_dirty(path[level].bp_sib_bh);
 829
 830	path[level + 1].bp_index++;
 831	nilfs_btree_promote_key(btree, path, level + 1,
 832				nilfs_btree_node_get_key(right, 0));
 833	path[level + 1].bp_index--;
 834
 835	if (move) {
 836		brelse(path[level].bp_bh);
 837		path[level].bp_bh = path[level].bp_sib_bh;
 838		path[level].bp_sib_bh = NULL;
 839		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 840		path[level + 1].bp_index++;
 841	} else {
 842		brelse(path[level].bp_sib_bh);
 843		path[level].bp_sib_bh = NULL;
 844	}
 845
 846	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 847}
 848
 849static void nilfs_btree_split(struct nilfs_bmap *btree,
 850			      struct nilfs_btree_path *path,
 851			      int level, __u64 *keyp, __u64 *ptrp)
 852{
 853	struct nilfs_btree_node *node, *right;
 854	__u64 newkey;
 855	__u64 newptr;
 856	int nchildren, n, move, ncblk;
 857
 858	node = nilfs_btree_get_nonroot_node(path, level);
 859	right = nilfs_btree_get_sib_node(path, level);
 860	nchildren = nilfs_btree_node_get_nchildren(node);
 861	ncblk = nilfs_btree_nchildren_per_block(btree);
 862	move = 0;
 863
 864	n = (nchildren + 1) / 2;
 865	if (n > nchildren - path[level].bp_index) {
 866		n--;
 867		move = 1;
 868	}
 869
 870	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 871
 872	if (!buffer_dirty(path[level].bp_bh))
 873		mark_buffer_dirty(path[level].bp_bh);
 874	if (!buffer_dirty(path[level].bp_sib_bh))
 875		mark_buffer_dirty(path[level].bp_sib_bh);
 876
 877	newkey = nilfs_btree_node_get_key(right, 0);
 878	newptr = path[level].bp_newreq.bpr_ptr;
 879
 880	if (move) {
 881		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 882		nilfs_btree_node_insert(right, path[level].bp_index,
 883					*keyp, *ptrp, ncblk);
 884
 885		*keyp = nilfs_btree_node_get_key(right, 0);
 886		*ptrp = path[level].bp_newreq.bpr_ptr;
 887
 888		brelse(path[level].bp_bh);
 889		path[level].bp_bh = path[level].bp_sib_bh;
 890		path[level].bp_sib_bh = NULL;
 891	} else {
 892		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 893
 894		*keyp = nilfs_btree_node_get_key(right, 0);
 895		*ptrp = path[level].bp_newreq.bpr_ptr;
 896
 897		brelse(path[level].bp_sib_bh);
 898		path[level].bp_sib_bh = NULL;
 899	}
 900
 901	path[level + 1].bp_index++;
 902}
 903
 904static void nilfs_btree_grow(struct nilfs_bmap *btree,
 905			     struct nilfs_btree_path *path,
 906			     int level, __u64 *keyp, __u64 *ptrp)
 907{
 908	struct nilfs_btree_node *root, *child;
 909	int n, ncblk;
 910
 911	root = nilfs_btree_get_root(btree);
 912	child = nilfs_btree_get_sib_node(path, level);
 913	ncblk = nilfs_btree_nchildren_per_block(btree);
 914
 915	n = nilfs_btree_node_get_nchildren(root);
 916
 917	nilfs_btree_node_move_right(root, child, n,
 918				    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
 919	nilfs_btree_node_set_level(root, level + 1);
 920
 921	if (!buffer_dirty(path[level].bp_sib_bh))
 922		mark_buffer_dirty(path[level].bp_sib_bh);
 923
 924	path[level].bp_bh = path[level].bp_sib_bh;
 925	path[level].bp_sib_bh = NULL;
 926
 927	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 928
 929	*keyp = nilfs_btree_node_get_key(child, 0);
 930	*ptrp = path[level].bp_newreq.bpr_ptr;
 931}
 932
 933static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
 934				   const struct nilfs_btree_path *path)
 935{
 936	struct nilfs_btree_node *node;
 937	int level, ncmax;
 938
 939	if (path == NULL)
 940		return NILFS_BMAP_INVALID_PTR;
 941
 942	/* left sibling */
 943	level = NILFS_BTREE_LEVEL_NODE_MIN;
 944	if (path[level].bp_index > 0) {
 945		node = nilfs_btree_get_node(btree, path, level, &ncmax);
 946		return nilfs_btree_node_get_ptr(node,
 947						path[level].bp_index - 1,
 948						ncmax);
 949	}
 950
 951	/* parent */
 952	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
 953	if (level <= nilfs_btree_height(btree) - 1) {
 954		node = nilfs_btree_get_node(btree, path, level, &ncmax);
 955		return nilfs_btree_node_get_ptr(node, path[level].bp_index,
 956						ncmax);
 957	}
 958
 959	return NILFS_BMAP_INVALID_PTR;
 960}
 961
 962static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
 963				       const struct nilfs_btree_path *path,
 964				       __u64 key)
 965{
 966	__u64 ptr;
 967
 968	ptr = nilfs_bmap_find_target_seq(btree, key);
 969	if (ptr != NILFS_BMAP_INVALID_PTR)
 970		/* sequential access */
 971		return ptr;
 972	else {
 973		ptr = nilfs_btree_find_near(btree, path);
 974		if (ptr != NILFS_BMAP_INVALID_PTR)
 975			/* near */
 976			return ptr;
 977	}
 978	/* block group */
 979	return nilfs_bmap_find_target_in_group(btree);
 980}
 981
 982static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
 983				      struct nilfs_btree_path *path,
 984				      int *levelp, __u64 key, __u64 ptr,
 985				      struct nilfs_bmap_stats *stats)
 986{
 987	struct buffer_head *bh;
 988	struct nilfs_btree_node *node, *parent, *sib;
 989	__u64 sibptr;
 990	int pindex, level, ncmax, ncblk, ret;
 991	struct inode *dat = NULL;
 992
 993	stats->bs_nblocks = 0;
 994	level = NILFS_BTREE_LEVEL_DATA;
 995
 996	/* allocate a new ptr for data block */
 997	if (NILFS_BMAP_USE_VBN(btree)) {
 998		path[level].bp_newreq.bpr_ptr =
 999			nilfs_btree_find_target_v(btree, path, key);
1000		dat = nilfs_bmap_get_dat(btree);
1001	}
1002
1003	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1004	if (ret < 0)
1005		goto err_out_data;
1006
1007	ncblk = nilfs_btree_nchildren_per_block(btree);
1008
1009	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1010	     level < nilfs_btree_height(btree) - 1;
1011	     level++) {
1012		node = nilfs_btree_get_nonroot_node(path, level);
1013		if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1014			path[level].bp_op = nilfs_btree_do_insert;
1015			stats->bs_nblocks++;
1016			goto out;
1017		}
1018
1019		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1020		pindex = path[level + 1].bp_index;
1021
1022		/* left sibling */
1023		if (pindex > 0) {
1024			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1025							  ncmax);
1026			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1027			if (ret < 0)
1028				goto err_out_child_node;
1029			sib = (struct nilfs_btree_node *)bh->b_data;
1030			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1031				path[level].bp_sib_bh = bh;
1032				path[level].bp_op = nilfs_btree_carry_left;
1033				stats->bs_nblocks++;
1034				goto out;
1035			} else {
1036				brelse(bh);
1037			}
1038		}
1039
1040		/* right sibling */
1041		if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1042			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1043							  ncmax);
1044			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1045			if (ret < 0)
1046				goto err_out_child_node;
1047			sib = (struct nilfs_btree_node *)bh->b_data;
1048			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1049				path[level].bp_sib_bh = bh;
1050				path[level].bp_op = nilfs_btree_carry_right;
1051				stats->bs_nblocks++;
1052				goto out;
1053			} else {
1054				brelse(bh);
1055			}
1056		}
1057
1058		/* split */
1059		path[level].bp_newreq.bpr_ptr =
1060			path[level - 1].bp_newreq.bpr_ptr + 1;
1061		ret = nilfs_bmap_prepare_alloc_ptr(btree,
1062						   &path[level].bp_newreq, dat);
1063		if (ret < 0)
1064			goto err_out_child_node;
1065		ret = nilfs_btree_get_new_block(btree,
1066						path[level].bp_newreq.bpr_ptr,
1067						&bh);
1068		if (ret < 0)
1069			goto err_out_curr_node;
1070
1071		stats->bs_nblocks++;
1072
1073		sib = (struct nilfs_btree_node *)bh->b_data;
1074		nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1075		path[level].bp_sib_bh = bh;
1076		path[level].bp_op = nilfs_btree_split;
1077	}
1078
1079	/* root */
1080	node = nilfs_btree_get_root(btree);
1081	if (nilfs_btree_node_get_nchildren(node) <
1082	    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1083		path[level].bp_op = nilfs_btree_do_insert;
1084		stats->bs_nblocks++;
1085		goto out;
1086	}
1087
1088	/* grow */
1089	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1090	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1091	if (ret < 0)
1092		goto err_out_child_node;
1093	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1094					&bh);
1095	if (ret < 0)
1096		goto err_out_curr_node;
1097
1098	nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1099			      0, level, 0, ncblk, NULL, NULL);
1100	path[level].bp_sib_bh = bh;
1101	path[level].bp_op = nilfs_btree_grow;
1102
1103	level++;
1104	path[level].bp_op = nilfs_btree_do_insert;
1105
1106	/* a newly-created node block and a data block are added */
1107	stats->bs_nblocks += 2;
1108
1109	/* success */
1110 out:
1111	*levelp = level;
1112	return ret;
1113
1114	/* error */
1115 err_out_curr_node:
1116	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1117 err_out_child_node:
1118	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1119		nilfs_btnode_delete(path[level].bp_sib_bh);
1120		nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1121
1122	}
1123
1124	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1125 err_out_data:
1126	*levelp = level;
1127	stats->bs_nblocks = 0;
1128	return ret;
1129}
1130
1131static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1132				      struct nilfs_btree_path *path,
1133				      int maxlevel, __u64 key, __u64 ptr)
1134{
1135	struct inode *dat = NULL;
1136	int level;
1137
1138	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1139	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1140	if (NILFS_BMAP_USE_VBN(btree)) {
1141		nilfs_bmap_set_target_v(btree, key, ptr);
1142		dat = nilfs_bmap_get_dat(btree);
1143	}
1144
1145	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1146		nilfs_bmap_commit_alloc_ptr(btree,
1147					    &path[level - 1].bp_newreq, dat);
1148		path[level].bp_op(btree, path, level, &key, &ptr);
1149	}
1150
1151	if (!nilfs_bmap_dirty(btree))
1152		nilfs_bmap_set_dirty(btree);
1153}
1154
1155static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1156{
1157	struct nilfs_btree_path *path;
1158	struct nilfs_bmap_stats stats;
1159	int level, ret;
1160
1161	path = nilfs_btree_alloc_path();
1162	if (path == NULL)
1163		return -ENOMEM;
1164
1165	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1166				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1167	if (ret != -ENOENT) {
1168		if (ret == 0)
1169			ret = -EEXIST;
1170		goto out;
1171	}
1172
1173	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1174	if (ret < 0)
1175		goto out;
1176	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1177	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1178
1179 out:
1180	nilfs_btree_free_path(path);
1181	return ret;
1182}
1183
1184static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1185				  struct nilfs_btree_path *path,
1186				  int level, __u64 *keyp, __u64 *ptrp)
1187{
1188	struct nilfs_btree_node *node;
1189	int ncblk;
1190
1191	if (level < nilfs_btree_height(btree) - 1) {
1192		node = nilfs_btree_get_nonroot_node(path, level);
1193		ncblk = nilfs_btree_nchildren_per_block(btree);
1194		nilfs_btree_node_delete(node, path[level].bp_index,
1195					keyp, ptrp, ncblk);
1196		if (!buffer_dirty(path[level].bp_bh))
1197			mark_buffer_dirty(path[level].bp_bh);
1198		if (path[level].bp_index == 0)
1199			nilfs_btree_promote_key(btree, path, level + 1,
1200				nilfs_btree_node_get_key(node, 0));
1201	} else {
1202		node = nilfs_btree_get_root(btree);
1203		nilfs_btree_node_delete(node, path[level].bp_index,
1204					keyp, ptrp,
1205					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1206	}
1207}
1208
1209static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1210				    struct nilfs_btree_path *path,
1211				    int level, __u64 *keyp, __u64 *ptrp)
1212{
1213	struct nilfs_btree_node *node, *left;
1214	int nchildren, lnchildren, n, ncblk;
1215
1216	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1217
1218	node = nilfs_btree_get_nonroot_node(path, level);
1219	left = nilfs_btree_get_sib_node(path, level);
1220	nchildren = nilfs_btree_node_get_nchildren(node);
1221	lnchildren = nilfs_btree_node_get_nchildren(left);
1222	ncblk = nilfs_btree_nchildren_per_block(btree);
1223
1224	n = (nchildren + lnchildren) / 2 - nchildren;
1225
1226	nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1227
1228	if (!buffer_dirty(path[level].bp_bh))
1229		mark_buffer_dirty(path[level].bp_bh);
1230	if (!buffer_dirty(path[level].bp_sib_bh))
1231		mark_buffer_dirty(path[level].bp_sib_bh);
1232
1233	nilfs_btree_promote_key(btree, path, level + 1,
1234				nilfs_btree_node_get_key(node, 0));
1235
1236	brelse(path[level].bp_sib_bh);
1237	path[level].bp_sib_bh = NULL;
1238	path[level].bp_index += n;
1239}
1240
1241static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1242				     struct nilfs_btree_path *path,
1243				     int level, __u64 *keyp, __u64 *ptrp)
1244{
1245	struct nilfs_btree_node *node, *right;
1246	int nchildren, rnchildren, n, ncblk;
1247
1248	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1249
1250	node = nilfs_btree_get_nonroot_node(path, level);
1251	right = nilfs_btree_get_sib_node(path, level);
1252	nchildren = nilfs_btree_node_get_nchildren(node);
1253	rnchildren = nilfs_btree_node_get_nchildren(right);
1254	ncblk = nilfs_btree_nchildren_per_block(btree);
1255
1256	n = (nchildren + rnchildren) / 2 - nchildren;
1257
1258	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1259
1260	if (!buffer_dirty(path[level].bp_bh))
1261		mark_buffer_dirty(path[level].bp_bh);
1262	if (!buffer_dirty(path[level].bp_sib_bh))
1263		mark_buffer_dirty(path[level].bp_sib_bh);
1264
1265	path[level + 1].bp_index++;
1266	nilfs_btree_promote_key(btree, path, level + 1,
1267				nilfs_btree_node_get_key(right, 0));
1268	path[level + 1].bp_index--;
1269
1270	brelse(path[level].bp_sib_bh);
1271	path[level].bp_sib_bh = NULL;
1272}
1273
1274static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1275				    struct nilfs_btree_path *path,
1276				    int level, __u64 *keyp, __u64 *ptrp)
1277{
1278	struct nilfs_btree_node *node, *left;
1279	int n, ncblk;
1280
1281	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1282
1283	node = nilfs_btree_get_nonroot_node(path, level);
1284	left = nilfs_btree_get_sib_node(path, level);
1285	ncblk = nilfs_btree_nchildren_per_block(btree);
1286
1287	n = nilfs_btree_node_get_nchildren(node);
1288
1289	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1290
1291	if (!buffer_dirty(path[level].bp_sib_bh))
1292		mark_buffer_dirty(path[level].bp_sib_bh);
1293
1294	nilfs_btnode_delete(path[level].bp_bh);
1295	path[level].bp_bh = path[level].bp_sib_bh;
1296	path[level].bp_sib_bh = NULL;
1297	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1298}
1299
1300static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1301				     struct nilfs_btree_path *path,
1302				     int level, __u64 *keyp, __u64 *ptrp)
1303{
1304	struct nilfs_btree_node *node, *right;
1305	int n, ncblk;
1306
1307	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1308
1309	node = nilfs_btree_get_nonroot_node(path, level);
1310	right = nilfs_btree_get_sib_node(path, level);
1311	ncblk = nilfs_btree_nchildren_per_block(btree);
1312
1313	n = nilfs_btree_node_get_nchildren(right);
1314
1315	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1316
1317	if (!buffer_dirty(path[level].bp_bh))
1318		mark_buffer_dirty(path[level].bp_bh);
1319
1320	nilfs_btnode_delete(path[level].bp_sib_bh);
1321	path[level].bp_sib_bh = NULL;
1322	path[level + 1].bp_index++;
1323}
1324
1325static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1326			       struct nilfs_btree_path *path,
1327			       int level, __u64 *keyp, __u64 *ptrp)
1328{
1329	struct nilfs_btree_node *root, *child;
1330	int n, ncblk;
1331
1332	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1333
1334	root = nilfs_btree_get_root(btree);
1335	child = nilfs_btree_get_nonroot_node(path, level);
1336	ncblk = nilfs_btree_nchildren_per_block(btree);
1337
1338	nilfs_btree_node_delete(root, 0, NULL, NULL,
1339				NILFS_BTREE_ROOT_NCHILDREN_MAX);
1340	nilfs_btree_node_set_level(root, level);
1341	n = nilfs_btree_node_get_nchildren(child);
1342	nilfs_btree_node_move_left(root, child, n,
1343				   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1344
1345	nilfs_btnode_delete(path[level].bp_bh);
1346	path[level].bp_bh = NULL;
1347}
1348
1349static void nilfs_btree_nop(struct nilfs_bmap *btree,
1350			    struct nilfs_btree_path *path,
1351			    int level, __u64 *keyp, __u64 *ptrp)
1352{
1353}
1354
1355static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1356				      struct nilfs_btree_path *path,
1357				      int *levelp,
1358				      struct nilfs_bmap_stats *stats,
1359				      struct inode *dat)
1360{
1361	struct buffer_head *bh;
1362	struct nilfs_btree_node *node, *parent, *sib;
1363	__u64 sibptr;
1364	int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1365
1366	ret = 0;
1367	stats->bs_nblocks = 0;
1368	ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1369	ncblk = nilfs_btree_nchildren_per_block(btree);
1370
1371	for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1372	     level < nilfs_btree_height(btree) - 1;
1373	     level++) {
1374		node = nilfs_btree_get_nonroot_node(path, level);
1375		path[level].bp_oldreq.bpr_ptr =
1376			nilfs_btree_node_get_ptr(node, dindex, ncblk);
1377		ret = nilfs_bmap_prepare_end_ptr(btree,
1378						 &path[level].bp_oldreq, dat);
1379		if (ret < 0)
1380			goto err_out_child_node;
1381
1382		if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1383			path[level].bp_op = nilfs_btree_do_delete;
1384			stats->bs_nblocks++;
1385			goto out;
1386		}
1387
1388		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1389		pindex = path[level + 1].bp_index;
1390		dindex = pindex;
1391
1392		if (pindex > 0) {
1393			/* left sibling */
1394			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1395							  ncmax);
1396			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1397			if (ret < 0)
1398				goto err_out_curr_node;
1399			sib = (struct nilfs_btree_node *)bh->b_data;
1400			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1401				path[level].bp_sib_bh = bh;
1402				path[level].bp_op = nilfs_btree_borrow_left;
1403				stats->bs_nblocks++;
1404				goto out;
1405			} else {
1406				path[level].bp_sib_bh = bh;
1407				path[level].bp_op = nilfs_btree_concat_left;
1408				stats->bs_nblocks++;
1409				/* continue; */
1410			}
1411		} else if (pindex <
1412			   nilfs_btree_node_get_nchildren(parent) - 1) {
1413			/* right sibling */
1414			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1415							  ncmax);
1416			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1417			if (ret < 0)
1418				goto err_out_curr_node;
1419			sib = (struct nilfs_btree_node *)bh->b_data;
1420			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1421				path[level].bp_sib_bh = bh;
1422				path[level].bp_op = nilfs_btree_borrow_right;
1423				stats->bs_nblocks++;
1424				goto out;
1425			} else {
1426				path[level].bp_sib_bh = bh;
1427				path[level].bp_op = nilfs_btree_concat_right;
1428				stats->bs_nblocks++;
1429				/*
1430				 * When merging right sibling node
1431				 * into the current node, pointer to
1432				 * the right sibling node must be
1433				 * terminated instead.  The adjustment
1434				 * below is required for that.
1435				 */
1436				dindex = pindex + 1;
1437				/* continue; */
1438			}
1439		} else {
1440			/* no siblings */
1441			/* the only child of the root node */
1442			WARN_ON(level != nilfs_btree_height(btree) - 2);
1443			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1444			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1445				path[level].bp_op = nilfs_btree_shrink;
1446				stats->bs_nblocks += 2;
1447				level++;
1448				path[level].bp_op = nilfs_btree_nop;
1449				goto shrink_root_child;
1450			} else {
1451				path[level].bp_op = nilfs_btree_do_delete;
1452				stats->bs_nblocks++;
1453				goto out;
1454			}
1455		}
1456	}
1457
1458	/* child of the root node is deleted */
1459	path[level].bp_op = nilfs_btree_do_delete;
1460	stats->bs_nblocks++;
1461
1462shrink_root_child:
1463	node = nilfs_btree_get_root(btree);
1464	path[level].bp_oldreq.bpr_ptr =
1465		nilfs_btree_node_get_ptr(node, dindex,
1466					 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1467
1468	ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1469	if (ret < 0)
1470		goto err_out_child_node;
1471
1472	/* success */
1473 out:
1474	*levelp = level;
1475	return ret;
1476
1477	/* error */
1478 err_out_curr_node:
1479	nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1480 err_out_child_node:
1481	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1482		brelse(path[level].bp_sib_bh);
1483		nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1484	}
1485	*levelp = level;
1486	stats->bs_nblocks = 0;
1487	return ret;
1488}
1489
1490static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1491				      struct nilfs_btree_path *path,
1492				      int maxlevel, struct inode *dat)
1493{
1494	int level;
1495
1496	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1497		nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1498		path[level].bp_op(btree, path, level, NULL, NULL);
1499	}
1500
1501	if (!nilfs_bmap_dirty(btree))
1502		nilfs_bmap_set_dirty(btree);
1503}
1504
1505static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1506
1507{
1508	struct nilfs_btree_path *path;
1509	struct nilfs_bmap_stats stats;
1510	struct inode *dat;
1511	int level, ret;
1512
1513	path = nilfs_btree_alloc_path();
1514	if (path == NULL)
1515		return -ENOMEM;
1516
1517	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1518				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1519	if (ret < 0)
1520		goto out;
1521
1522
1523	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1524
1525	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1526	if (ret < 0)
1527		goto out;
1528	nilfs_btree_commit_delete(btree, path, level, dat);
1529	nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1530
1531out:
1532	nilfs_btree_free_path(path);
1533	return ret;
1534}
1535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1536static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1537{
1538	struct nilfs_btree_path *path;
1539	int ret;
1540
1541	path = nilfs_btree_alloc_path();
1542	if (path == NULL)
1543		return -ENOMEM;
1544
1545	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1546
1547	nilfs_btree_free_path(path);
1548
1549	return ret;
1550}
1551
1552static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1553{
1554	struct buffer_head *bh;
1555	struct nilfs_btree_node *root, *node;
1556	__u64 maxkey, nextmaxkey;
1557	__u64 ptr;
1558	int nchildren, ret;
1559
1560	root = nilfs_btree_get_root(btree);
1561	switch (nilfs_btree_height(btree)) {
1562	case 2:
1563		bh = NULL;
1564		node = root;
1565		break;
1566	case 3:
1567		nchildren = nilfs_btree_node_get_nchildren(root);
1568		if (nchildren > 1)
1569			return 0;
1570		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1571					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1572		ret = nilfs_btree_get_block(btree, ptr, &bh);
1573		if (ret < 0)
1574			return ret;
1575		node = (struct nilfs_btree_node *)bh->b_data;
1576		break;
1577	default:
1578		return 0;
1579	}
1580
1581	nchildren = nilfs_btree_node_get_nchildren(node);
1582	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1583	nextmaxkey = (nchildren > 1) ?
1584		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1585	if (bh != NULL)
1586		brelse(bh);
1587
1588	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1589}
1590
1591static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1592				   __u64 *keys, __u64 *ptrs, int nitems)
1593{
1594	struct buffer_head *bh;
1595	struct nilfs_btree_node *node, *root;
1596	__le64 *dkeys;
1597	__le64 *dptrs;
1598	__u64 ptr;
1599	int nchildren, ncmax, i, ret;
1600
1601	root = nilfs_btree_get_root(btree);
1602	switch (nilfs_btree_height(btree)) {
1603	case 2:
1604		bh = NULL;
1605		node = root;
1606		ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1607		break;
1608	case 3:
1609		nchildren = nilfs_btree_node_get_nchildren(root);
1610		WARN_ON(nchildren > 1);
1611		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1612					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1613		ret = nilfs_btree_get_block(btree, ptr, &bh);
1614		if (ret < 0)
1615			return ret;
1616		node = (struct nilfs_btree_node *)bh->b_data;
1617		ncmax = nilfs_btree_nchildren_per_block(btree);
1618		break;
1619	default:
1620		node = NULL;
1621		return -EINVAL;
1622	}
1623
1624	nchildren = nilfs_btree_node_get_nchildren(node);
1625	if (nchildren < nitems)
1626		nitems = nchildren;
1627	dkeys = nilfs_btree_node_dkeys(node);
1628	dptrs = nilfs_btree_node_dptrs(node, ncmax);
1629	for (i = 0; i < nitems; i++) {
1630		keys[i] = le64_to_cpu(dkeys[i]);
1631		ptrs[i] = le64_to_cpu(dptrs[i]);
1632	}
1633
1634	if (bh != NULL)
1635		brelse(bh);
1636
1637	return nitems;
1638}
1639
1640static int
1641nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1642				       union nilfs_bmap_ptr_req *dreq,
1643				       union nilfs_bmap_ptr_req *nreq,
1644				       struct buffer_head **bhp,
1645				       struct nilfs_bmap_stats *stats)
1646{
1647	struct buffer_head *bh;
1648	struct inode *dat = NULL;
1649	int ret;
1650
1651	stats->bs_nblocks = 0;
1652
1653	/* for data */
1654	/* cannot find near ptr */
1655	if (NILFS_BMAP_USE_VBN(btree)) {
1656		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1657		dat = nilfs_bmap_get_dat(btree);
1658	}
1659
1660	ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1661	if (ret < 0)
1662		return ret;
1663
1664	*bhp = NULL;
1665	stats->bs_nblocks++;
1666	if (nreq != NULL) {
1667		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1668		ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1669		if (ret < 0)
1670			goto err_out_dreq;
1671
1672		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1673		if (ret < 0)
1674			goto err_out_nreq;
1675
1676		*bhp = bh;
1677		stats->bs_nblocks++;
1678	}
1679
1680	/* success */
1681	return 0;
1682
1683	/* error */
1684 err_out_nreq:
1685	nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1686 err_out_dreq:
1687	nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1688	stats->bs_nblocks = 0;
1689	return ret;
1690
1691}
1692
1693static void
1694nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1695				      __u64 key, __u64 ptr,
1696				      const __u64 *keys, const __u64 *ptrs,
1697				      int n,
1698				      union nilfs_bmap_ptr_req *dreq,
1699				      union nilfs_bmap_ptr_req *nreq,
1700				      struct buffer_head *bh)
1701{
1702	struct nilfs_btree_node *node;
1703	struct inode *dat;
1704	__u64 tmpptr;
1705	int ncblk;
1706
1707	/* free resources */
1708	if (btree->b_ops->bop_clear != NULL)
1709		btree->b_ops->bop_clear(btree);
1710
1711	/* ptr must be a pointer to a buffer head. */
1712	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1713
1714	/* convert and insert */
1715	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1716	nilfs_btree_init(btree);
1717	if (nreq != NULL) {
1718		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1719		nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1720
1721		/* create child node at level 1 */
1722		node = (struct nilfs_btree_node *)bh->b_data;
1723		ncblk = nilfs_btree_nchildren_per_block(btree);
1724		nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1725		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1726		if (!buffer_dirty(bh))
1727			mark_buffer_dirty(bh);
1728		if (!nilfs_bmap_dirty(btree))
1729			nilfs_bmap_set_dirty(btree);
1730
1731		brelse(bh);
1732
1733		/* create root node at level 2 */
1734		node = nilfs_btree_get_root(btree);
1735		tmpptr = nreq->bpr_ptr;
1736		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1737				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1738				      &keys[0], &tmpptr);
1739	} else {
1740		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1741
1742		/* create root node at level 1 */
1743		node = nilfs_btree_get_root(btree);
1744		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1745				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1746				      keys, ptrs);
1747		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1748					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1749		if (!nilfs_bmap_dirty(btree))
1750			nilfs_bmap_set_dirty(btree);
1751	}
1752
1753	if (NILFS_BMAP_USE_VBN(btree))
1754		nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1755}
1756
1757/**
1758 * nilfs_btree_convert_and_insert -
1759 * @bmap:
1760 * @key:
1761 * @ptr:
1762 * @keys:
1763 * @ptrs:
1764 * @n:
1765 */
1766int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1767				   __u64 key, __u64 ptr,
1768				   const __u64 *keys, const __u64 *ptrs, int n)
1769{
1770	struct buffer_head *bh;
1771	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1772	struct nilfs_bmap_stats stats;
1773	int ret;
1774
1775	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1776		di = &dreq;
1777		ni = NULL;
1778	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1779			   1 << btree->b_inode->i_blkbits)) {
1780		di = &dreq;
1781		ni = &nreq;
1782	} else {
1783		di = NULL;
1784		ni = NULL;
1785		BUG();
1786	}
1787
1788	ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1789						     &stats);
1790	if (ret < 0)
1791		return ret;
1792	nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1793					      di, ni, bh);
1794	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1795	return 0;
1796}
1797
1798static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1799				   struct nilfs_btree_path *path,
1800				   int level,
1801				   struct buffer_head *bh)
1802{
1803	while ((++level < nilfs_btree_height(btree) - 1) &&
1804	       !buffer_dirty(path[level].bp_bh))
1805		mark_buffer_dirty(path[level].bp_bh);
1806
1807	return 0;
1808}
1809
1810static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1811					struct nilfs_btree_path *path,
1812					int level, struct inode *dat)
1813{
1814	struct nilfs_btree_node *parent;
1815	int ncmax, ret;
1816
1817	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1818	path[level].bp_oldreq.bpr_ptr =
1819		nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1820					 ncmax);
1821	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1822	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1823				       &path[level].bp_newreq.bpr_req);
1824	if (ret < 0)
1825		return ret;
1826
1827	if (buffer_nilfs_node(path[level].bp_bh)) {
1828		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1829		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1830		path[level].bp_ctxt.bh = path[level].bp_bh;
1831		ret = nilfs_btnode_prepare_change_key(
1832			&NILFS_BMAP_I(btree)->i_btnode_cache,
1833			&path[level].bp_ctxt);
1834		if (ret < 0) {
1835			nilfs_dat_abort_update(dat,
1836					       &path[level].bp_oldreq.bpr_req,
1837					       &path[level].bp_newreq.bpr_req);
1838			return ret;
1839		}
1840	}
1841
1842	return 0;
1843}
1844
1845static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1846					struct nilfs_btree_path *path,
1847					int level, struct inode *dat)
1848{
1849	struct nilfs_btree_node *parent;
1850	int ncmax;
1851
1852	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1853				&path[level].bp_newreq.bpr_req,
1854				btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1855
1856	if (buffer_nilfs_node(path[level].bp_bh)) {
1857		nilfs_btnode_commit_change_key(
1858			&NILFS_BMAP_I(btree)->i_btnode_cache,
1859			&path[level].bp_ctxt);
1860		path[level].bp_bh = path[level].bp_ctxt.bh;
1861	}
1862	set_buffer_nilfs_volatile(path[level].bp_bh);
1863
1864	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1865	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1866				 path[level].bp_newreq.bpr_ptr, ncmax);
1867}
1868
1869static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1870				       struct nilfs_btree_path *path,
1871				       int level, struct inode *dat)
1872{
1873	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1874			       &path[level].bp_newreq.bpr_req);
1875	if (buffer_nilfs_node(path[level].bp_bh))
1876		nilfs_btnode_abort_change_key(
1877			&NILFS_BMAP_I(btree)->i_btnode_cache,
1878			&path[level].bp_ctxt);
1879}
1880
1881static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1882					   struct nilfs_btree_path *path,
1883					   int minlevel, int *maxlevelp,
1884					   struct inode *dat)
1885{
1886	int level, ret;
1887
1888	level = minlevel;
1889	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1890		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1891		if (ret < 0)
1892			return ret;
1893	}
1894	while ((++level < nilfs_btree_height(btree) - 1) &&
1895	       !buffer_dirty(path[level].bp_bh)) {
1896
1897		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1898		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1899		if (ret < 0)
1900			goto out;
1901	}
1902
1903	/* success */
1904	*maxlevelp = level - 1;
1905	return 0;
1906
1907	/* error */
1908 out:
1909	while (--level > minlevel)
1910		nilfs_btree_abort_update_v(btree, path, level, dat);
1911	if (!buffer_nilfs_volatile(path[level].bp_bh))
1912		nilfs_btree_abort_update_v(btree, path, level, dat);
1913	return ret;
1914}
1915
1916static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
1917					   struct nilfs_btree_path *path,
1918					   int minlevel, int maxlevel,
1919					   struct buffer_head *bh,
1920					   struct inode *dat)
1921{
1922	int level;
1923
1924	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1925		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1926
1927	for (level = minlevel + 1; level <= maxlevel; level++)
1928		nilfs_btree_commit_update_v(btree, path, level, dat);
1929}
1930
1931static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
1932				   struct nilfs_btree_path *path,
1933				   int level, struct buffer_head *bh)
1934{
1935	int maxlevel = 0, ret;
1936	struct nilfs_btree_node *parent;
1937	struct inode *dat = nilfs_bmap_get_dat(btree);
1938	__u64 ptr;
1939	int ncmax;
1940
1941	get_bh(bh);
1942	path[level].bp_bh = bh;
1943	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1944					      dat);
1945	if (ret < 0)
1946		goto out;
1947
1948	if (buffer_nilfs_volatile(path[level].bp_bh)) {
1949		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1950		ptr = nilfs_btree_node_get_ptr(parent,
1951					       path[level + 1].bp_index,
1952					       ncmax);
1953		ret = nilfs_dat_mark_dirty(dat, ptr);
1954		if (ret < 0)
1955			goto out;
1956	}
1957
1958	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1959
1960 out:
1961	brelse(path[level].bp_bh);
1962	path[level].bp_bh = NULL;
1963	return ret;
1964}
1965
1966static int nilfs_btree_propagate(struct nilfs_bmap *btree,
1967				 struct buffer_head *bh)
1968{
1969	struct nilfs_btree_path *path;
1970	struct nilfs_btree_node *node;
1971	__u64 key;
1972	int level, ret;
1973
1974	WARN_ON(!buffer_dirty(bh));
1975
1976	path = nilfs_btree_alloc_path();
1977	if (path == NULL)
1978		return -ENOMEM;
1979
1980	if (buffer_nilfs_node(bh)) {
1981		node = (struct nilfs_btree_node *)bh->b_data;
1982		key = nilfs_btree_node_get_key(node, 0);
1983		level = nilfs_btree_node_get_level(node);
1984	} else {
1985		key = nilfs_bmap_data_get_key(btree, bh);
1986		level = NILFS_BTREE_LEVEL_DATA;
1987	}
1988
1989	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
1990	if (ret < 0) {
1991		if (unlikely(ret == -ENOENT))
1992			printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1993			       __func__, (unsigned long long)key, level);
 
 
1994		goto out;
1995	}
1996
1997	ret = NILFS_BMAP_USE_VBN(btree) ?
1998		nilfs_btree_propagate_v(btree, path, level, bh) :
1999		nilfs_btree_propagate_p(btree, path, level, bh);
2000
2001 out:
2002	nilfs_btree_free_path(path);
2003
2004	return ret;
2005}
2006
2007static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2008				    struct buffer_head *bh)
2009{
2010	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2011}
2012
2013static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2014					 struct list_head *lists,
2015					 struct buffer_head *bh)
2016{
2017	struct list_head *head;
2018	struct buffer_head *cbh;
2019	struct nilfs_btree_node *node, *cnode;
2020	__u64 key, ckey;
2021	int level;
2022
2023	get_bh(bh);
2024	node = (struct nilfs_btree_node *)bh->b_data;
2025	key = nilfs_btree_node_get_key(node, 0);
2026	level = nilfs_btree_node_get_level(node);
2027	if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2028	    level >= NILFS_BTREE_LEVEL_MAX) {
2029		dump_stack();
2030		printk(KERN_WARNING
2031		       "%s: invalid btree level: %d (key=%llu, ino=%lu, "
2032		       "blocknr=%llu)\n",
2033		       __func__, level, (unsigned long long)key,
2034		       NILFS_BMAP_I(btree)->vfs_inode.i_ino,
2035		       (unsigned long long)bh->b_blocknr);
2036		return;
2037	}
2038
2039	list_for_each(head, &lists[level]) {
2040		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2041		cnode = (struct nilfs_btree_node *)cbh->b_data;
2042		ckey = nilfs_btree_node_get_key(cnode, 0);
2043		if (key < ckey)
2044			break;
2045	}
2046	list_add_tail(&bh->b_assoc_buffers, head);
2047}
2048
2049static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2050					     struct list_head *listp)
2051{
2052	struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
2053	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2054	struct pagevec pvec;
2055	struct buffer_head *bh, *head;
2056	pgoff_t index = 0;
2057	int level, i;
2058
2059	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2060	     level < NILFS_BTREE_LEVEL_MAX;
2061	     level++)
2062		INIT_LIST_HEAD(&lists[level]);
2063
2064	pagevec_init(&pvec, 0);
2065
2066	while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2067				  PAGEVEC_SIZE)) {
2068		for (i = 0; i < pagevec_count(&pvec); i++) {
2069			bh = head = page_buffers(pvec.pages[i]);
2070			do {
2071				if (buffer_dirty(bh))
2072					nilfs_btree_add_dirty_buffer(btree,
2073								     lists, bh);
2074			} while ((bh = bh->b_this_page) != head);
2075		}
2076		pagevec_release(&pvec);
2077		cond_resched();
2078	}
2079
2080	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2081	     level < NILFS_BTREE_LEVEL_MAX;
2082	     level++)
2083		list_splice_tail(&lists[level], listp);
2084}
2085
2086static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2087				struct nilfs_btree_path *path,
2088				int level,
2089				struct buffer_head **bh,
2090				sector_t blocknr,
2091				union nilfs_binfo *binfo)
2092{
2093	struct nilfs_btree_node *parent;
2094	__u64 key;
2095	__u64 ptr;
2096	int ncmax, ret;
2097
2098	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2099	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2100				       ncmax);
2101	if (buffer_nilfs_node(*bh)) {
2102		path[level].bp_ctxt.oldkey = ptr;
2103		path[level].bp_ctxt.newkey = blocknr;
2104		path[level].bp_ctxt.bh = *bh;
2105		ret = nilfs_btnode_prepare_change_key(
2106			&NILFS_BMAP_I(btree)->i_btnode_cache,
2107			&path[level].bp_ctxt);
2108		if (ret < 0)
2109			return ret;
2110		nilfs_btnode_commit_change_key(
2111			&NILFS_BMAP_I(btree)->i_btnode_cache,
2112			&path[level].bp_ctxt);
2113		*bh = path[level].bp_ctxt.bh;
2114	}
2115
2116	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2117				 ncmax);
2118
2119	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2120	/* on-disk format */
2121	binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2122	binfo->bi_dat.bi_level = level;
2123
2124	return 0;
2125}
2126
2127static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2128				struct nilfs_btree_path *path,
2129				int level,
2130				struct buffer_head **bh,
2131				sector_t blocknr,
2132				union nilfs_binfo *binfo)
2133{
2134	struct nilfs_btree_node *parent;
2135	struct inode *dat = nilfs_bmap_get_dat(btree);
2136	__u64 key;
2137	__u64 ptr;
2138	union nilfs_bmap_ptr_req req;
2139	int ncmax, ret;
2140
2141	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2142	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2143				       ncmax);
2144	req.bpr_ptr = ptr;
2145	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2146	if (ret < 0)
2147		return ret;
2148	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2149
2150	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2151	/* on-disk format */
2152	binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2153	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2154
2155	return 0;
2156}
2157
2158static int nilfs_btree_assign(struct nilfs_bmap *btree,
2159			      struct buffer_head **bh,
2160			      sector_t blocknr,
2161			      union nilfs_binfo *binfo)
2162{
2163	struct nilfs_btree_path *path;
2164	struct nilfs_btree_node *node;
2165	__u64 key;
2166	int level, ret;
2167
2168	path = nilfs_btree_alloc_path();
2169	if (path == NULL)
2170		return -ENOMEM;
2171
2172	if (buffer_nilfs_node(*bh)) {
2173		node = (struct nilfs_btree_node *)(*bh)->b_data;
2174		key = nilfs_btree_node_get_key(node, 0);
2175		level = nilfs_btree_node_get_level(node);
2176	} else {
2177		key = nilfs_bmap_data_get_key(btree, *bh);
2178		level = NILFS_BTREE_LEVEL_DATA;
2179	}
2180
2181	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2182	if (ret < 0) {
2183		WARN_ON(ret == -ENOENT);
2184		goto out;
2185	}
2186
2187	ret = NILFS_BMAP_USE_VBN(btree) ?
2188		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2189		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2190
2191 out:
2192	nilfs_btree_free_path(path);
2193
2194	return ret;
2195}
2196
2197static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2198				 struct buffer_head **bh,
2199				 sector_t blocknr,
2200				 union nilfs_binfo *binfo)
2201{
2202	struct nilfs_btree_node *node;
2203	__u64 key;
2204	int ret;
2205
2206	ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2207			     blocknr);
2208	if (ret < 0)
2209		return ret;
2210
2211	if (buffer_nilfs_node(*bh)) {
2212		node = (struct nilfs_btree_node *)(*bh)->b_data;
2213		key = nilfs_btree_node_get_key(node, 0);
2214	} else
2215		key = nilfs_bmap_data_get_key(btree, *bh);
2216
2217	/* on-disk format */
2218	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2219	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2220
2221	return 0;
2222}
2223
2224static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2225{
2226	struct buffer_head *bh;
2227	struct nilfs_btree_path *path;
2228	__u64 ptr;
2229	int ret;
2230
2231	path = nilfs_btree_alloc_path();
2232	if (path == NULL)
2233		return -ENOMEM;
2234
2235	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2236	if (ret < 0) {
2237		WARN_ON(ret == -ENOENT);
2238		goto out;
2239	}
2240	ret = nilfs_btree_get_block(btree, ptr, &bh);
2241	if (ret < 0) {
2242		WARN_ON(ret == -ENOENT);
2243		goto out;
2244	}
2245
2246	if (!buffer_dirty(bh))
2247		mark_buffer_dirty(bh);
2248	brelse(bh);
2249	if (!nilfs_bmap_dirty(btree))
2250		nilfs_bmap_set_dirty(btree);
2251
2252 out:
2253	nilfs_btree_free_path(path);
2254	return ret;
2255}
2256
2257static const struct nilfs_bmap_operations nilfs_btree_ops = {
2258	.bop_lookup		=	nilfs_btree_lookup,
2259	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2260	.bop_insert		=	nilfs_btree_insert,
2261	.bop_delete		=	nilfs_btree_delete,
2262	.bop_clear		=	NULL,
2263
2264	.bop_propagate		=	nilfs_btree_propagate,
2265
2266	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2267
2268	.bop_assign		=	nilfs_btree_assign,
2269	.bop_mark		=	nilfs_btree_mark,
2270
 
2271	.bop_last_key		=	nilfs_btree_last_key,
 
2272	.bop_check_insert	=	NULL,
2273	.bop_check_delete	=	nilfs_btree_check_delete,
2274	.bop_gather_data	=	nilfs_btree_gather_data,
2275};
2276
2277static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2278	.bop_lookup		=	NULL,
2279	.bop_lookup_contig	=	NULL,
2280	.bop_insert		=	NULL,
2281	.bop_delete		=	NULL,
2282	.bop_clear		=	NULL,
2283
2284	.bop_propagate		=	nilfs_btree_propagate_gc,
2285
2286	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2287
2288	.bop_assign		=	nilfs_btree_assign_gc,
2289	.bop_mark		=	NULL,
2290
 
2291	.bop_last_key		=	NULL,
 
2292	.bop_check_insert	=	NULL,
2293	.bop_check_delete	=	NULL,
2294	.bop_gather_data	=	NULL,
2295};
2296
2297int nilfs_btree_init(struct nilfs_bmap *bmap)
2298{
2299	bmap->b_ops = &nilfs_btree_ops;
2300	bmap->b_nchildren_per_block =
2301		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2302	return 0;
 
 
 
 
 
 
 
 
 
 
2303}
2304
2305void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2306{
2307	bmap->b_ops = &nilfs_btree_ops_gc;
2308	bmap->b_nchildren_per_block =
2309		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2310}