Linux Audio

Check our new training course

Loading...
v4.17
 
   1/*
   2 * btree.c - NILFS B-tree.
   3 *
   4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * Written by Koji Sato.
  17 */
  18
  19#include <linux/slab.h>
  20#include <linux/string.h>
  21#include <linux/errno.h>
  22#include <linux/pagevec.h>
  23#include "nilfs.h"
  24#include "page.h"
  25#include "btnode.h"
  26#include "btree.h"
  27#include "alloc.h"
  28#include "dat.h"
  29
  30static void __nilfs_btree_init(struct nilfs_bmap *bmap);
  31
  32static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
  33{
  34	struct nilfs_btree_path *path;
  35	int level = NILFS_BTREE_LEVEL_DATA;
  36
  37	path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
  38	if (path == NULL)
  39		goto out;
  40
  41	for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
  42		path[level].bp_bh = NULL;
  43		path[level].bp_sib_bh = NULL;
  44		path[level].bp_index = 0;
  45		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  46		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  47		path[level].bp_op = NULL;
  48	}
  49
  50out:
  51	return path;
  52}
  53
  54static void nilfs_btree_free_path(struct nilfs_btree_path *path)
  55{
  56	int level = NILFS_BTREE_LEVEL_DATA;
  57
  58	for (; level < NILFS_BTREE_LEVEL_MAX; level++)
  59		brelse(path[level].bp_bh);
  60
  61	kmem_cache_free(nilfs_btree_path_cache, path);
  62}
  63
  64/*
  65 * B-tree node operations
  66 */
  67static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
  68				     __u64 ptr, struct buffer_head **bhp)
  69{
  70	struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
 
  71	struct buffer_head *bh;
  72
  73	bh = nilfs_btnode_create_block(btnc, ptr);
  74	if (!bh)
  75		return -ENOMEM;
  76
  77	set_buffer_nilfs_volatile(bh);
  78	*bhp = bh;
  79	return 0;
  80}
  81
  82static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
  83{
  84	return node->bn_flags;
  85}
  86
  87static void
  88nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
  89{
  90	node->bn_flags = flags;
  91}
  92
  93static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
  94{
  95	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
  96}
  97
  98static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
  99{
 100	return node->bn_level;
 101}
 102
 103static void
 104nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
 105{
 106	node->bn_level = level;
 107}
 108
 109static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
 110{
 111	return le16_to_cpu(node->bn_nchildren);
 112}
 113
 114static void
 115nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
 116{
 117	node->bn_nchildren = cpu_to_le16(nchildren);
 118}
 119
 120static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
 121{
 122	return i_blocksize(btree->b_inode);
 123}
 124
 125static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
 126{
 127	return btree->b_nchildren_per_block;
 128}
 129
 130static __le64 *
 131nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
 132{
 133	return (__le64 *)((char *)(node + 1) +
 134			  (nilfs_btree_node_root(node) ?
 135			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
 136}
 137
 138static __le64 *
 139nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
 140{
 141	return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
 142}
 143
 144static __u64
 145nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
 146{
 147	return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
 148}
 149
 150static void
 151nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
 152{
 153	*(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
 154}
 155
 156static __u64
 157nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
 158			 int ncmax)
 159{
 160	return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
 161}
 162
 163static void
 164nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
 165			 int ncmax)
 166{
 167	*(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
 168}
 169
 170static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
 171				  int level, int nchildren, int ncmax,
 172				  const __u64 *keys, const __u64 *ptrs)
 173{
 174	__le64 *dkeys;
 175	__le64 *dptrs;
 176	int i;
 177
 178	nilfs_btree_node_set_flags(node, flags);
 179	nilfs_btree_node_set_level(node, level);
 180	nilfs_btree_node_set_nchildren(node, nchildren);
 181
 182	dkeys = nilfs_btree_node_dkeys(node);
 183	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 184	for (i = 0; i < nchildren; i++) {
 185		dkeys[i] = cpu_to_le64(keys[i]);
 186		dptrs[i] = cpu_to_le64(ptrs[i]);
 187	}
 188}
 189
 190/* Assume the buffer heads corresponding to left and right are locked. */
 191static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
 192				       struct nilfs_btree_node *right,
 193				       int n, int lncmax, int rncmax)
 194{
 195	__le64 *ldkeys, *rdkeys;
 196	__le64 *ldptrs, *rdptrs;
 197	int lnchildren, rnchildren;
 198
 199	ldkeys = nilfs_btree_node_dkeys(left);
 200	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 201	lnchildren = nilfs_btree_node_get_nchildren(left);
 202
 203	rdkeys = nilfs_btree_node_dkeys(right);
 204	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 205	rnchildren = nilfs_btree_node_get_nchildren(right);
 206
 207	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
 208	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
 209	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
 210	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
 211
 212	lnchildren += n;
 213	rnchildren -= n;
 214	nilfs_btree_node_set_nchildren(left, lnchildren);
 215	nilfs_btree_node_set_nchildren(right, rnchildren);
 216}
 217
 218/* Assume that the buffer heads corresponding to left and right are locked. */
 219static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
 220					struct nilfs_btree_node *right,
 221					int n, int lncmax, int rncmax)
 222{
 223	__le64 *ldkeys, *rdkeys;
 224	__le64 *ldptrs, *rdptrs;
 225	int lnchildren, rnchildren;
 226
 227	ldkeys = nilfs_btree_node_dkeys(left);
 228	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 229	lnchildren = nilfs_btree_node_get_nchildren(left);
 230
 231	rdkeys = nilfs_btree_node_dkeys(right);
 232	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 233	rnchildren = nilfs_btree_node_get_nchildren(right);
 234
 235	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
 236	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
 237	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
 238	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
 239
 240	lnchildren -= n;
 241	rnchildren += n;
 242	nilfs_btree_node_set_nchildren(left, lnchildren);
 243	nilfs_btree_node_set_nchildren(right, rnchildren);
 244}
 245
 246/* Assume that the buffer head corresponding to node is locked. */
 247static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
 248				    __u64 key, __u64 ptr, int ncmax)
 249{
 250	__le64 *dkeys;
 251	__le64 *dptrs;
 252	int nchildren;
 253
 254	dkeys = nilfs_btree_node_dkeys(node);
 255	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 256	nchildren = nilfs_btree_node_get_nchildren(node);
 257	if (index < nchildren) {
 258		memmove(dkeys + index + 1, dkeys + index,
 259			(nchildren - index) * sizeof(*dkeys));
 260		memmove(dptrs + index + 1, dptrs + index,
 261			(nchildren - index) * sizeof(*dptrs));
 262	}
 263	dkeys[index] = cpu_to_le64(key);
 264	dptrs[index] = cpu_to_le64(ptr);
 265	nchildren++;
 266	nilfs_btree_node_set_nchildren(node, nchildren);
 267}
 268
 269/* Assume that the buffer head corresponding to node is locked. */
 270static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
 271				    __u64 *keyp, __u64 *ptrp, int ncmax)
 272{
 273	__u64 key;
 274	__u64 ptr;
 275	__le64 *dkeys;
 276	__le64 *dptrs;
 277	int nchildren;
 278
 279	dkeys = nilfs_btree_node_dkeys(node);
 280	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 281	key = le64_to_cpu(dkeys[index]);
 282	ptr = le64_to_cpu(dptrs[index]);
 283	nchildren = nilfs_btree_node_get_nchildren(node);
 284	if (keyp != NULL)
 285		*keyp = key;
 286	if (ptrp != NULL)
 287		*ptrp = ptr;
 288
 289	if (index < nchildren - 1) {
 290		memmove(dkeys + index, dkeys + index + 1,
 291			(nchildren - index - 1) * sizeof(*dkeys));
 292		memmove(dptrs + index, dptrs + index + 1,
 293			(nchildren - index - 1) * sizeof(*dptrs));
 294	}
 295	nchildren--;
 296	nilfs_btree_node_set_nchildren(node, nchildren);
 297}
 298
 299static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
 300				   __u64 key, int *indexp)
 301{
 302	__u64 nkey;
 303	int index, low, high, s;
 304
 305	/* binary search */
 306	low = 0;
 307	high = nilfs_btree_node_get_nchildren(node) - 1;
 308	index = 0;
 309	s = 0;
 310	while (low <= high) {
 311		index = (low + high) / 2;
 312		nkey = nilfs_btree_node_get_key(node, index);
 313		if (nkey == key) {
 314			s = 0;
 315			goto out;
 316		} else if (nkey < key) {
 317			low = index + 1;
 318			s = -1;
 319		} else {
 320			high = index - 1;
 321			s = 1;
 322		}
 323	}
 324
 325	/* adjust index */
 326	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
 327		if (s > 0 && index > 0)
 328			index--;
 329	} else if (s < 0)
 330		index++;
 331
 332 out:
 333	*indexp = index;
 334
 335	return s == 0;
 336}
 337
 338/**
 339 * nilfs_btree_node_broken - verify consistency of btree node
 340 * @node: btree node block to be examined
 341 * @size: node size (in bytes)
 342 * @inode: host inode of btree
 343 * @blocknr: block number
 344 *
 345 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 346 */
 347static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
 348				   size_t size, struct inode *inode,
 349				   sector_t blocknr)
 350{
 351	int level, flags, nchildren;
 352	int ret = 0;
 353
 354	level = nilfs_btree_node_get_level(node);
 355	flags = nilfs_btree_node_get_flags(node);
 356	nchildren = nilfs_btree_node_get_nchildren(node);
 357
 358	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 359		     level >= NILFS_BTREE_LEVEL_MAX ||
 360		     (flags & NILFS_BTREE_NODE_ROOT) ||
 361		     nchildren < 0 ||
 362		     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
 363		nilfs_msg(inode->i_sb, KERN_CRIT,
 364			  "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
 365			  inode->i_ino, (unsigned long long)blocknr, level,
 366			  flags, nchildren);
 367		ret = 1;
 368	}
 369	return ret;
 370}
 371
 372/**
 373 * nilfs_btree_root_broken - verify consistency of btree root node
 374 * @node: btree root node to be examined
 375 * @inode: host inode of btree
 376 *
 377 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 378 */
 379static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
 380				   struct inode *inode)
 381{
 382	int level, flags, nchildren;
 383	int ret = 0;
 384
 385	level = nilfs_btree_node_get_level(node);
 386	flags = nilfs_btree_node_get_flags(node);
 387	nchildren = nilfs_btree_node_get_nchildren(node);
 388
 389	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 390		     level >= NILFS_BTREE_LEVEL_MAX ||
 391		     nchildren < 0 ||
 392		     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
 393		nilfs_msg(inode->i_sb, KERN_CRIT,
 394			  "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
 395			  inode->i_ino, level, flags, nchildren);
 
 396		ret = 1;
 397	}
 398	return ret;
 399}
 400
 401int nilfs_btree_broken_node_block(struct buffer_head *bh)
 402{
 403	struct inode *inode;
 404	int ret;
 405
 406	if (buffer_nilfs_checked(bh))
 407		return 0;
 408
 409	inode = bh->b_page->mapping->host;
 410	ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
 411				      bh->b_size, inode, bh->b_blocknr);
 412	if (likely(!ret))
 413		set_buffer_nilfs_checked(bh);
 414	return ret;
 415}
 416
 417static struct nilfs_btree_node *
 418nilfs_btree_get_root(const struct nilfs_bmap *btree)
 419{
 420	return (struct nilfs_btree_node *)btree->b_u.u_data;
 421}
 422
 423static struct nilfs_btree_node *
 424nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
 425{
 426	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
 427}
 428
 429static struct nilfs_btree_node *
 430nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
 431{
 432	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
 433}
 434
 435static int nilfs_btree_height(const struct nilfs_bmap *btree)
 436{
 437	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
 438}
 439
 440static struct nilfs_btree_node *
 441nilfs_btree_get_node(const struct nilfs_bmap *btree,
 442		     const struct nilfs_btree_path *path,
 443		     int level, int *ncmaxp)
 444{
 445	struct nilfs_btree_node *node;
 446
 447	if (level == nilfs_btree_height(btree) - 1) {
 448		node = nilfs_btree_get_root(btree);
 449		*ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
 450	} else {
 451		node = nilfs_btree_get_nonroot_node(path, level);
 452		*ncmaxp = nilfs_btree_nchildren_per_block(btree);
 453	}
 454	return node;
 455}
 456
 457static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
 458				struct nilfs_btree_node *node, int level)
 459{
 460	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
 461		dump_stack();
 462		nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
 463			  "btree level mismatch (ino=%lu): %d != %d",
 464			  btree->b_inode->i_ino,
 465			  nilfs_btree_node_get_level(node), level);
 466		return 1;
 467	}
 468	return 0;
 469}
 470
 471struct nilfs_btree_readahead_info {
 472	struct nilfs_btree_node *node;	/* parent node */
 473	int max_ra_blocks;		/* max nof blocks to read ahead */
 474	int index;			/* current index on the parent node */
 475	int ncmax;			/* nof children in the parent node */
 476};
 477
 478static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 479				   struct buffer_head **bhp,
 480				   const struct nilfs_btree_readahead_info *ra)
 481{
 482	struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
 
 483	struct buffer_head *bh, *ra_bh;
 484	sector_t submit_ptr = 0;
 485	int ret;
 486
 487	ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
 488					&submit_ptr);
 489	if (ret) {
 490		if (ret != -EEXIST)
 491			return ret;
 492		goto out_check;
 
 
 
 
 
 
 
 
 
 493	}
 494
 495	if (ra) {
 496		int i, n;
 497		__u64 ptr2;
 498
 499		/* read ahead sibling nodes */
 500		for (n = ra->max_ra_blocks, i = ra->index + 1;
 501		     n > 0 && i < ra->ncmax; n--, i++) {
 502			ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
 503
 504			ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
 505							REQ_OP_READ, REQ_RAHEAD,
 506							&ra_bh, &submit_ptr);
 507			if (likely(!ret || ret == -EEXIST))
 508				brelse(ra_bh);
 509			else if (ret != -EBUSY)
 510				break;
 511			if (!buffer_locked(bh))
 512				goto out_no_wait;
 513		}
 514	}
 515
 516	wait_on_buffer(bh);
 517
 518 out_no_wait:
 519	if (!buffer_uptodate(bh)) {
 520		nilfs_msg(btree->b_inode->i_sb, KERN_ERR,
 521			  "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
 522			  btree->b_inode->i_ino, (unsigned long long)ptr);
 523		brelse(bh);
 524		return -EIO;
 525	}
 526
 527 out_check:
 528	if (nilfs_btree_broken_node_block(bh)) {
 529		clear_buffer_uptodate(bh);
 530		brelse(bh);
 531		return -EINVAL;
 532	}
 533
 534	*bhp = bh;
 535	return 0;
 536}
 537
 538static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 539				   struct buffer_head **bhp)
 540{
 541	return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
 542}
 543
 544static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
 545				 struct nilfs_btree_path *path,
 546				 __u64 key, __u64 *ptrp, int minlevel,
 547				 int readahead)
 548{
 549	struct nilfs_btree_node *node;
 550	struct nilfs_btree_readahead_info p, *ra;
 551	__u64 ptr;
 552	int level, index, found, ncmax, ret;
 553
 554	node = nilfs_btree_get_root(btree);
 555	level = nilfs_btree_node_get_level(node);
 556	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
 557		return -ENOENT;
 558
 559	found = nilfs_btree_node_lookup(node, key, &index);
 560	ptr = nilfs_btree_node_get_ptr(node, index,
 561				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 562	path[level].bp_bh = NULL;
 563	path[level].bp_index = index;
 564
 565	ncmax = nilfs_btree_nchildren_per_block(btree);
 566
 567	while (--level >= minlevel) {
 568		ra = NULL;
 569		if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
 570			p.node = nilfs_btree_get_node(btree, path, level + 1,
 571						      &p.ncmax);
 572			p.index = index;
 573			p.max_ra_blocks = 7;
 574			ra = &p;
 575		}
 576		ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
 577					      ra);
 578		if (ret < 0)
 579			return ret;
 580
 581		node = nilfs_btree_get_nonroot_node(path, level);
 582		if (nilfs_btree_bad_node(btree, node, level))
 583			return -EINVAL;
 584		if (!found)
 585			found = nilfs_btree_node_lookup(node, key, &index);
 586		else
 587			index = 0;
 588		if (index < ncmax) {
 589			ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 590		} else {
 591			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
 592			/* insert */
 593			ptr = NILFS_BMAP_INVALID_PTR;
 594		}
 595		path[level].bp_index = index;
 596	}
 597	if (!found)
 598		return -ENOENT;
 599
 600	if (ptrp != NULL)
 601		*ptrp = ptr;
 602
 603	return 0;
 604}
 605
 606static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
 607				      struct nilfs_btree_path *path,
 608				      __u64 *keyp, __u64 *ptrp)
 609{
 610	struct nilfs_btree_node *node;
 611	__u64 ptr;
 612	int index, level, ncmax, ret;
 613
 614	node = nilfs_btree_get_root(btree);
 615	index = nilfs_btree_node_get_nchildren(node) - 1;
 616	if (index < 0)
 617		return -ENOENT;
 618	level = nilfs_btree_node_get_level(node);
 619	ptr = nilfs_btree_node_get_ptr(node, index,
 620				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 621	path[level].bp_bh = NULL;
 622	path[level].bp_index = index;
 623	ncmax = nilfs_btree_nchildren_per_block(btree);
 624
 625	for (level--; level > 0; level--) {
 626		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
 627		if (ret < 0)
 628			return ret;
 629		node = nilfs_btree_get_nonroot_node(path, level);
 630		if (nilfs_btree_bad_node(btree, node, level))
 631			return -EINVAL;
 632		index = nilfs_btree_node_get_nchildren(node) - 1;
 633		ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 634		path[level].bp_index = index;
 635	}
 636
 637	if (keyp != NULL)
 638		*keyp = nilfs_btree_node_get_key(node, index);
 639	if (ptrp != NULL)
 640		*ptrp = ptr;
 641
 642	return 0;
 643}
 644
 645/**
 646 * nilfs_btree_get_next_key - get next valid key from btree path array
 647 * @btree: bmap struct of btree
 648 * @path: array of nilfs_btree_path struct
 649 * @minlevel: start level
 650 * @nextkey: place to store the next valid key
 651 *
 652 * Return Value: If a next key was found, 0 is returned. Otherwise,
 653 * -ENOENT is returned.
 654 */
 655static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
 656				    const struct nilfs_btree_path *path,
 657				    int minlevel, __u64 *nextkey)
 658{
 659	struct nilfs_btree_node *node;
 660	int maxlevel = nilfs_btree_height(btree) - 1;
 661	int index, next_adj, level;
 662
 663	/* Next index is already set to bp_index for leaf nodes. */
 664	next_adj = 0;
 665	for (level = minlevel; level <= maxlevel; level++) {
 666		if (level == maxlevel)
 667			node = nilfs_btree_get_root(btree);
 668		else
 669			node = nilfs_btree_get_nonroot_node(path, level);
 670
 671		index = path[level].bp_index + next_adj;
 672		if (index < nilfs_btree_node_get_nchildren(node)) {
 673			/* Next key is in this node */
 674			*nextkey = nilfs_btree_node_get_key(node, index);
 675			return 0;
 676		}
 677		/* For non-leaf nodes, next index is stored at bp_index + 1. */
 678		next_adj = 1;
 679	}
 680	return -ENOENT;
 681}
 682
 683static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
 684			      __u64 key, int level, __u64 *ptrp)
 685{
 686	struct nilfs_btree_path *path;
 687	int ret;
 688
 689	path = nilfs_btree_alloc_path();
 690	if (path == NULL)
 691		return -ENOMEM;
 692
 693	ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
 694
 695	nilfs_btree_free_path(path);
 696
 697	return ret;
 698}
 699
 700static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
 701				     __u64 key, __u64 *ptrp,
 702				     unsigned int maxblocks)
 703{
 704	struct nilfs_btree_path *path;
 705	struct nilfs_btree_node *node;
 706	struct inode *dat = NULL;
 707	__u64 ptr, ptr2;
 708	sector_t blocknr;
 709	int level = NILFS_BTREE_LEVEL_NODE_MIN;
 710	int ret, cnt, index, maxlevel, ncmax;
 711	struct nilfs_btree_readahead_info p;
 712
 713	path = nilfs_btree_alloc_path();
 714	if (path == NULL)
 715		return -ENOMEM;
 716
 717	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
 718	if (ret < 0)
 719		goto out;
 720
 721	if (NILFS_BMAP_USE_VBN(btree)) {
 722		dat = nilfs_bmap_get_dat(btree);
 723		ret = nilfs_dat_translate(dat, ptr, &blocknr);
 724		if (ret < 0)
 725			goto out;
 726		ptr = blocknr;
 727	}
 728	cnt = 1;
 729	if (cnt == maxblocks)
 730		goto end;
 731
 732	maxlevel = nilfs_btree_height(btree) - 1;
 733	node = nilfs_btree_get_node(btree, path, level, &ncmax);
 734	index = path[level].bp_index + 1;
 735	for (;;) {
 736		while (index < nilfs_btree_node_get_nchildren(node)) {
 737			if (nilfs_btree_node_get_key(node, index) !=
 738			    key + cnt)
 739				goto end;
 740			ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
 741			if (dat) {
 742				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
 743				if (ret < 0)
 744					goto out;
 745				ptr2 = blocknr;
 746			}
 747			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
 748				goto end;
 749			index++;
 750			continue;
 751		}
 752		if (level == maxlevel)
 753			break;
 754
 755		/* look-up right sibling node */
 756		p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
 757		p.index = path[level + 1].bp_index + 1;
 758		p.max_ra_blocks = 7;
 759		if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
 760		    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
 761			break;
 762		ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
 763		path[level + 1].bp_index = p.index;
 764
 765		brelse(path[level].bp_bh);
 766		path[level].bp_bh = NULL;
 767
 768		ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
 769					      &p);
 770		if (ret < 0)
 771			goto out;
 772		node = nilfs_btree_get_nonroot_node(path, level);
 773		ncmax = nilfs_btree_nchildren_per_block(btree);
 774		index = 0;
 775		path[level].bp_index = index;
 776	}
 777 end:
 778	*ptrp = ptr;
 779	ret = cnt;
 780 out:
 781	nilfs_btree_free_path(path);
 782	return ret;
 
 
 
 
 
 783}
 784
 785static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
 786				    struct nilfs_btree_path *path,
 787				    int level, __u64 key)
 788{
 789	if (level < nilfs_btree_height(btree) - 1) {
 790		do {
 791			nilfs_btree_node_set_key(
 792				nilfs_btree_get_nonroot_node(path, level),
 793				path[level].bp_index, key);
 794			if (!buffer_dirty(path[level].bp_bh))
 795				mark_buffer_dirty(path[level].bp_bh);
 796		} while ((path[level].bp_index == 0) &&
 797			 (++level < nilfs_btree_height(btree) - 1));
 798	}
 799
 800	/* root */
 801	if (level == nilfs_btree_height(btree) - 1) {
 802		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
 803					 path[level].bp_index, key);
 804	}
 805}
 806
 807static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
 808				  struct nilfs_btree_path *path,
 809				  int level, __u64 *keyp, __u64 *ptrp)
 810{
 811	struct nilfs_btree_node *node;
 812	int ncblk;
 813
 814	if (level < nilfs_btree_height(btree) - 1) {
 815		node = nilfs_btree_get_nonroot_node(path, level);
 816		ncblk = nilfs_btree_nchildren_per_block(btree);
 817		nilfs_btree_node_insert(node, path[level].bp_index,
 818					*keyp, *ptrp, ncblk);
 819		if (!buffer_dirty(path[level].bp_bh))
 820			mark_buffer_dirty(path[level].bp_bh);
 821
 822		if (path[level].bp_index == 0)
 823			nilfs_btree_promote_key(btree, path, level + 1,
 824						nilfs_btree_node_get_key(node,
 825									 0));
 826	} else {
 827		node = nilfs_btree_get_root(btree);
 828		nilfs_btree_node_insert(node, path[level].bp_index,
 829					*keyp, *ptrp,
 830					NILFS_BTREE_ROOT_NCHILDREN_MAX);
 831	}
 832}
 833
 834static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
 835				   struct nilfs_btree_path *path,
 836				   int level, __u64 *keyp, __u64 *ptrp)
 837{
 838	struct nilfs_btree_node *node, *left;
 839	int nchildren, lnchildren, n, move, ncblk;
 840
 841	node = nilfs_btree_get_nonroot_node(path, level);
 842	left = nilfs_btree_get_sib_node(path, level);
 843	nchildren = nilfs_btree_node_get_nchildren(node);
 844	lnchildren = nilfs_btree_node_get_nchildren(left);
 845	ncblk = nilfs_btree_nchildren_per_block(btree);
 846	move = 0;
 847
 848	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
 849	if (n > path[level].bp_index) {
 850		/* move insert point */
 851		n--;
 852		move = 1;
 853	}
 854
 855	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
 856
 857	if (!buffer_dirty(path[level].bp_bh))
 858		mark_buffer_dirty(path[level].bp_bh);
 859	if (!buffer_dirty(path[level].bp_sib_bh))
 860		mark_buffer_dirty(path[level].bp_sib_bh);
 861
 862	nilfs_btree_promote_key(btree, path, level + 1,
 863				nilfs_btree_node_get_key(node, 0));
 864
 865	if (move) {
 866		brelse(path[level].bp_bh);
 867		path[level].bp_bh = path[level].bp_sib_bh;
 868		path[level].bp_sib_bh = NULL;
 869		path[level].bp_index += lnchildren;
 870		path[level + 1].bp_index--;
 871	} else {
 872		brelse(path[level].bp_sib_bh);
 873		path[level].bp_sib_bh = NULL;
 874		path[level].bp_index -= n;
 875	}
 876
 877	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 878}
 879
 880static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
 881				    struct nilfs_btree_path *path,
 882				    int level, __u64 *keyp, __u64 *ptrp)
 883{
 884	struct nilfs_btree_node *node, *right;
 885	int nchildren, rnchildren, n, move, ncblk;
 886
 887	node = nilfs_btree_get_nonroot_node(path, level);
 888	right = nilfs_btree_get_sib_node(path, level);
 889	nchildren = nilfs_btree_node_get_nchildren(node);
 890	rnchildren = nilfs_btree_node_get_nchildren(right);
 891	ncblk = nilfs_btree_nchildren_per_block(btree);
 892	move = 0;
 893
 894	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
 895	if (n > nchildren - path[level].bp_index) {
 896		/* move insert point */
 897		n--;
 898		move = 1;
 899	}
 900
 901	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 902
 903	if (!buffer_dirty(path[level].bp_bh))
 904		mark_buffer_dirty(path[level].bp_bh);
 905	if (!buffer_dirty(path[level].bp_sib_bh))
 906		mark_buffer_dirty(path[level].bp_sib_bh);
 907
 908	path[level + 1].bp_index++;
 909	nilfs_btree_promote_key(btree, path, level + 1,
 910				nilfs_btree_node_get_key(right, 0));
 911	path[level + 1].bp_index--;
 912
 913	if (move) {
 914		brelse(path[level].bp_bh);
 915		path[level].bp_bh = path[level].bp_sib_bh;
 916		path[level].bp_sib_bh = NULL;
 917		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 918		path[level + 1].bp_index++;
 919	} else {
 920		brelse(path[level].bp_sib_bh);
 921		path[level].bp_sib_bh = NULL;
 922	}
 923
 924	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 925}
 926
 927static void nilfs_btree_split(struct nilfs_bmap *btree,
 928			      struct nilfs_btree_path *path,
 929			      int level, __u64 *keyp, __u64 *ptrp)
 930{
 931	struct nilfs_btree_node *node, *right;
 932	int nchildren, n, move, ncblk;
 933
 934	node = nilfs_btree_get_nonroot_node(path, level);
 935	right = nilfs_btree_get_sib_node(path, level);
 936	nchildren = nilfs_btree_node_get_nchildren(node);
 937	ncblk = nilfs_btree_nchildren_per_block(btree);
 938	move = 0;
 939
 940	n = (nchildren + 1) / 2;
 941	if (n > nchildren - path[level].bp_index) {
 942		n--;
 943		move = 1;
 944	}
 945
 946	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 947
 948	if (!buffer_dirty(path[level].bp_bh))
 949		mark_buffer_dirty(path[level].bp_bh);
 950	if (!buffer_dirty(path[level].bp_sib_bh))
 951		mark_buffer_dirty(path[level].bp_sib_bh);
 952
 953	if (move) {
 954		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 955		nilfs_btree_node_insert(right, path[level].bp_index,
 956					*keyp, *ptrp, ncblk);
 957
 958		*keyp = nilfs_btree_node_get_key(right, 0);
 959		*ptrp = path[level].bp_newreq.bpr_ptr;
 960
 961		brelse(path[level].bp_bh);
 962		path[level].bp_bh = path[level].bp_sib_bh;
 963		path[level].bp_sib_bh = NULL;
 964	} else {
 965		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 966
 967		*keyp = nilfs_btree_node_get_key(right, 0);
 968		*ptrp = path[level].bp_newreq.bpr_ptr;
 969
 970		brelse(path[level].bp_sib_bh);
 971		path[level].bp_sib_bh = NULL;
 972	}
 973
 974	path[level + 1].bp_index++;
 975}
 976
 977static void nilfs_btree_grow(struct nilfs_bmap *btree,
 978			     struct nilfs_btree_path *path,
 979			     int level, __u64 *keyp, __u64 *ptrp)
 980{
 981	struct nilfs_btree_node *root, *child;
 982	int n, ncblk;
 983
 984	root = nilfs_btree_get_root(btree);
 985	child = nilfs_btree_get_sib_node(path, level);
 986	ncblk = nilfs_btree_nchildren_per_block(btree);
 987
 988	n = nilfs_btree_node_get_nchildren(root);
 989
 990	nilfs_btree_node_move_right(root, child, n,
 991				    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
 992	nilfs_btree_node_set_level(root, level + 1);
 993
 994	if (!buffer_dirty(path[level].bp_sib_bh))
 995		mark_buffer_dirty(path[level].bp_sib_bh);
 996
 997	path[level].bp_bh = path[level].bp_sib_bh;
 998	path[level].bp_sib_bh = NULL;
 999
1000	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
1001
1002	*keyp = nilfs_btree_node_get_key(child, 0);
1003	*ptrp = path[level].bp_newreq.bpr_ptr;
1004}
1005
1006static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
1007				   const struct nilfs_btree_path *path)
1008{
1009	struct nilfs_btree_node *node;
1010	int level, ncmax;
1011
1012	if (path == NULL)
1013		return NILFS_BMAP_INVALID_PTR;
1014
1015	/* left sibling */
1016	level = NILFS_BTREE_LEVEL_NODE_MIN;
1017	if (path[level].bp_index > 0) {
1018		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1019		return nilfs_btree_node_get_ptr(node,
1020						path[level].bp_index - 1,
1021						ncmax);
1022	}
1023
1024	/* parent */
1025	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1026	if (level <= nilfs_btree_height(btree) - 1) {
1027		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1028		return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1029						ncmax);
1030	}
1031
1032	return NILFS_BMAP_INVALID_PTR;
1033}
1034
1035static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1036				       const struct nilfs_btree_path *path,
1037				       __u64 key)
1038{
1039	__u64 ptr;
1040
1041	ptr = nilfs_bmap_find_target_seq(btree, key);
1042	if (ptr != NILFS_BMAP_INVALID_PTR)
1043		/* sequential access */
1044		return ptr;
1045
1046	ptr = nilfs_btree_find_near(btree, path);
1047	if (ptr != NILFS_BMAP_INVALID_PTR)
1048		/* near */
1049		return ptr;
1050
1051	/* block group */
1052	return nilfs_bmap_find_target_in_group(btree);
1053}
1054
1055static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1056				      struct nilfs_btree_path *path,
1057				      int *levelp, __u64 key, __u64 ptr,
1058				      struct nilfs_bmap_stats *stats)
1059{
1060	struct buffer_head *bh;
1061	struct nilfs_btree_node *node, *parent, *sib;
1062	__u64 sibptr;
1063	int pindex, level, ncmax, ncblk, ret;
1064	struct inode *dat = NULL;
1065
1066	stats->bs_nblocks = 0;
1067	level = NILFS_BTREE_LEVEL_DATA;
1068
1069	/* allocate a new ptr for data block */
1070	if (NILFS_BMAP_USE_VBN(btree)) {
1071		path[level].bp_newreq.bpr_ptr =
1072			nilfs_btree_find_target_v(btree, path, key);
1073		dat = nilfs_bmap_get_dat(btree);
1074	}
1075
1076	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1077	if (ret < 0)
1078		goto err_out_data;
1079
1080	ncblk = nilfs_btree_nchildren_per_block(btree);
1081
1082	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1083	     level < nilfs_btree_height(btree) - 1;
1084	     level++) {
1085		node = nilfs_btree_get_nonroot_node(path, level);
1086		if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1087			path[level].bp_op = nilfs_btree_do_insert;
1088			stats->bs_nblocks++;
1089			goto out;
1090		}
1091
1092		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1093		pindex = path[level + 1].bp_index;
1094
1095		/* left sibling */
1096		if (pindex > 0) {
1097			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1098							  ncmax);
1099			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1100			if (ret < 0)
1101				goto err_out_child_node;
1102			sib = (struct nilfs_btree_node *)bh->b_data;
1103			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1104				path[level].bp_sib_bh = bh;
1105				path[level].bp_op = nilfs_btree_carry_left;
1106				stats->bs_nblocks++;
1107				goto out;
1108			} else {
1109				brelse(bh);
1110			}
1111		}
1112
1113		/* right sibling */
1114		if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1115			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1116							  ncmax);
1117			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1118			if (ret < 0)
1119				goto err_out_child_node;
1120			sib = (struct nilfs_btree_node *)bh->b_data;
1121			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1122				path[level].bp_sib_bh = bh;
1123				path[level].bp_op = nilfs_btree_carry_right;
1124				stats->bs_nblocks++;
1125				goto out;
1126			} else {
1127				brelse(bh);
1128			}
1129		}
1130
1131		/* split */
1132		path[level].bp_newreq.bpr_ptr =
1133			path[level - 1].bp_newreq.bpr_ptr + 1;
1134		ret = nilfs_bmap_prepare_alloc_ptr(btree,
1135						   &path[level].bp_newreq, dat);
1136		if (ret < 0)
1137			goto err_out_child_node;
1138		ret = nilfs_btree_get_new_block(btree,
1139						path[level].bp_newreq.bpr_ptr,
1140						&bh);
1141		if (ret < 0)
1142			goto err_out_curr_node;
1143
1144		stats->bs_nblocks++;
1145
1146		sib = (struct nilfs_btree_node *)bh->b_data;
1147		nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1148		path[level].bp_sib_bh = bh;
1149		path[level].bp_op = nilfs_btree_split;
1150	}
1151
1152	/* root */
1153	node = nilfs_btree_get_root(btree);
1154	if (nilfs_btree_node_get_nchildren(node) <
1155	    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1156		path[level].bp_op = nilfs_btree_do_insert;
1157		stats->bs_nblocks++;
1158		goto out;
1159	}
1160
1161	/* grow */
1162	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1163	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1164	if (ret < 0)
1165		goto err_out_child_node;
1166	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1167					&bh);
1168	if (ret < 0)
1169		goto err_out_curr_node;
1170
1171	nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1172			      0, level, 0, ncblk, NULL, NULL);
1173	path[level].bp_sib_bh = bh;
1174	path[level].bp_op = nilfs_btree_grow;
1175
1176	level++;
1177	path[level].bp_op = nilfs_btree_do_insert;
1178
1179	/* a newly-created node block and a data block are added */
1180	stats->bs_nblocks += 2;
1181
1182	/* success */
1183 out:
1184	*levelp = level;
1185	return ret;
1186
1187	/* error */
1188 err_out_curr_node:
1189	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1190 err_out_child_node:
1191	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1192		nilfs_btnode_delete(path[level].bp_sib_bh);
1193		nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1194
1195	}
1196
1197	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1198 err_out_data:
1199	*levelp = level;
1200	stats->bs_nblocks = 0;
1201	return ret;
1202}
1203
1204static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1205				      struct nilfs_btree_path *path,
1206				      int maxlevel, __u64 key, __u64 ptr)
1207{
1208	struct inode *dat = NULL;
1209	int level;
1210
1211	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1212	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1213	if (NILFS_BMAP_USE_VBN(btree)) {
1214		nilfs_bmap_set_target_v(btree, key, ptr);
1215		dat = nilfs_bmap_get_dat(btree);
1216	}
1217
1218	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1219		nilfs_bmap_commit_alloc_ptr(btree,
1220					    &path[level - 1].bp_newreq, dat);
1221		path[level].bp_op(btree, path, level, &key, &ptr);
1222	}
1223
1224	if (!nilfs_bmap_dirty(btree))
1225		nilfs_bmap_set_dirty(btree);
1226}
1227
1228static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1229{
1230	struct nilfs_btree_path *path;
1231	struct nilfs_bmap_stats stats;
1232	int level, ret;
1233
1234	path = nilfs_btree_alloc_path();
1235	if (path == NULL)
1236		return -ENOMEM;
1237
1238	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1239				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1240	if (ret != -ENOENT) {
1241		if (ret == 0)
1242			ret = -EEXIST;
1243		goto out;
1244	}
1245
1246	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1247	if (ret < 0)
1248		goto out;
1249	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1250	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1251
1252 out:
1253	nilfs_btree_free_path(path);
1254	return ret;
1255}
1256
1257static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1258				  struct nilfs_btree_path *path,
1259				  int level, __u64 *keyp, __u64 *ptrp)
1260{
1261	struct nilfs_btree_node *node;
1262	int ncblk;
1263
1264	if (level < nilfs_btree_height(btree) - 1) {
1265		node = nilfs_btree_get_nonroot_node(path, level);
1266		ncblk = nilfs_btree_nchildren_per_block(btree);
1267		nilfs_btree_node_delete(node, path[level].bp_index,
1268					keyp, ptrp, ncblk);
1269		if (!buffer_dirty(path[level].bp_bh))
1270			mark_buffer_dirty(path[level].bp_bh);
1271		if (path[level].bp_index == 0)
1272			nilfs_btree_promote_key(btree, path, level + 1,
1273				nilfs_btree_node_get_key(node, 0));
1274	} else {
1275		node = nilfs_btree_get_root(btree);
1276		nilfs_btree_node_delete(node, path[level].bp_index,
1277					keyp, ptrp,
1278					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1279	}
1280}
1281
1282static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1283				    struct nilfs_btree_path *path,
1284				    int level, __u64 *keyp, __u64 *ptrp)
1285{
1286	struct nilfs_btree_node *node, *left;
1287	int nchildren, lnchildren, n, ncblk;
1288
1289	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1290
1291	node = nilfs_btree_get_nonroot_node(path, level);
1292	left = nilfs_btree_get_sib_node(path, level);
1293	nchildren = nilfs_btree_node_get_nchildren(node);
1294	lnchildren = nilfs_btree_node_get_nchildren(left);
1295	ncblk = nilfs_btree_nchildren_per_block(btree);
1296
1297	n = (nchildren + lnchildren) / 2 - nchildren;
1298
1299	nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1300
1301	if (!buffer_dirty(path[level].bp_bh))
1302		mark_buffer_dirty(path[level].bp_bh);
1303	if (!buffer_dirty(path[level].bp_sib_bh))
1304		mark_buffer_dirty(path[level].bp_sib_bh);
1305
1306	nilfs_btree_promote_key(btree, path, level + 1,
1307				nilfs_btree_node_get_key(node, 0));
1308
1309	brelse(path[level].bp_sib_bh);
1310	path[level].bp_sib_bh = NULL;
1311	path[level].bp_index += n;
1312}
1313
1314static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1315				     struct nilfs_btree_path *path,
1316				     int level, __u64 *keyp, __u64 *ptrp)
1317{
1318	struct nilfs_btree_node *node, *right;
1319	int nchildren, rnchildren, n, ncblk;
1320
1321	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1322
1323	node = nilfs_btree_get_nonroot_node(path, level);
1324	right = nilfs_btree_get_sib_node(path, level);
1325	nchildren = nilfs_btree_node_get_nchildren(node);
1326	rnchildren = nilfs_btree_node_get_nchildren(right);
1327	ncblk = nilfs_btree_nchildren_per_block(btree);
1328
1329	n = (nchildren + rnchildren) / 2 - nchildren;
1330
1331	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1332
1333	if (!buffer_dirty(path[level].bp_bh))
1334		mark_buffer_dirty(path[level].bp_bh);
1335	if (!buffer_dirty(path[level].bp_sib_bh))
1336		mark_buffer_dirty(path[level].bp_sib_bh);
1337
1338	path[level + 1].bp_index++;
1339	nilfs_btree_promote_key(btree, path, level + 1,
1340				nilfs_btree_node_get_key(right, 0));
1341	path[level + 1].bp_index--;
1342
1343	brelse(path[level].bp_sib_bh);
1344	path[level].bp_sib_bh = NULL;
1345}
1346
1347static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1348				    struct nilfs_btree_path *path,
1349				    int level, __u64 *keyp, __u64 *ptrp)
1350{
1351	struct nilfs_btree_node *node, *left;
1352	int n, ncblk;
1353
1354	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1355
1356	node = nilfs_btree_get_nonroot_node(path, level);
1357	left = nilfs_btree_get_sib_node(path, level);
1358	ncblk = nilfs_btree_nchildren_per_block(btree);
1359
1360	n = nilfs_btree_node_get_nchildren(node);
1361
1362	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1363
1364	if (!buffer_dirty(path[level].bp_sib_bh))
1365		mark_buffer_dirty(path[level].bp_sib_bh);
1366
1367	nilfs_btnode_delete(path[level].bp_bh);
1368	path[level].bp_bh = path[level].bp_sib_bh;
1369	path[level].bp_sib_bh = NULL;
1370	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1371}
1372
1373static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1374				     struct nilfs_btree_path *path,
1375				     int level, __u64 *keyp, __u64 *ptrp)
1376{
1377	struct nilfs_btree_node *node, *right;
1378	int n, ncblk;
1379
1380	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1381
1382	node = nilfs_btree_get_nonroot_node(path, level);
1383	right = nilfs_btree_get_sib_node(path, level);
1384	ncblk = nilfs_btree_nchildren_per_block(btree);
1385
1386	n = nilfs_btree_node_get_nchildren(right);
1387
1388	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1389
1390	if (!buffer_dirty(path[level].bp_bh))
1391		mark_buffer_dirty(path[level].bp_bh);
1392
1393	nilfs_btnode_delete(path[level].bp_sib_bh);
1394	path[level].bp_sib_bh = NULL;
1395	path[level + 1].bp_index++;
1396}
1397
1398static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1399			       struct nilfs_btree_path *path,
1400			       int level, __u64 *keyp, __u64 *ptrp)
1401{
1402	struct nilfs_btree_node *root, *child;
1403	int n, ncblk;
1404
1405	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1406
1407	root = nilfs_btree_get_root(btree);
1408	child = nilfs_btree_get_nonroot_node(path, level);
1409	ncblk = nilfs_btree_nchildren_per_block(btree);
1410
1411	nilfs_btree_node_delete(root, 0, NULL, NULL,
1412				NILFS_BTREE_ROOT_NCHILDREN_MAX);
1413	nilfs_btree_node_set_level(root, level);
1414	n = nilfs_btree_node_get_nchildren(child);
1415	nilfs_btree_node_move_left(root, child, n,
1416				   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1417
1418	nilfs_btnode_delete(path[level].bp_bh);
1419	path[level].bp_bh = NULL;
1420}
1421
1422static void nilfs_btree_nop(struct nilfs_bmap *btree,
1423			    struct nilfs_btree_path *path,
1424			    int level, __u64 *keyp, __u64 *ptrp)
1425{
1426}
1427
1428static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1429				      struct nilfs_btree_path *path,
1430				      int *levelp,
1431				      struct nilfs_bmap_stats *stats,
1432				      struct inode *dat)
1433{
1434	struct buffer_head *bh;
1435	struct nilfs_btree_node *node, *parent, *sib;
1436	__u64 sibptr;
1437	int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1438
1439	ret = 0;
1440	stats->bs_nblocks = 0;
1441	ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1442	ncblk = nilfs_btree_nchildren_per_block(btree);
1443
1444	for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1445	     level < nilfs_btree_height(btree) - 1;
1446	     level++) {
1447		node = nilfs_btree_get_nonroot_node(path, level);
1448		path[level].bp_oldreq.bpr_ptr =
1449			nilfs_btree_node_get_ptr(node, dindex, ncblk);
1450		ret = nilfs_bmap_prepare_end_ptr(btree,
1451						 &path[level].bp_oldreq, dat);
1452		if (ret < 0)
1453			goto err_out_child_node;
1454
1455		if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1456			path[level].bp_op = nilfs_btree_do_delete;
1457			stats->bs_nblocks++;
1458			goto out;
1459		}
1460
1461		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1462		pindex = path[level + 1].bp_index;
1463		dindex = pindex;
1464
1465		if (pindex > 0) {
1466			/* left sibling */
1467			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1468							  ncmax);
1469			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1470			if (ret < 0)
1471				goto err_out_curr_node;
1472			sib = (struct nilfs_btree_node *)bh->b_data;
1473			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1474				path[level].bp_sib_bh = bh;
1475				path[level].bp_op = nilfs_btree_borrow_left;
1476				stats->bs_nblocks++;
1477				goto out;
1478			} else {
1479				path[level].bp_sib_bh = bh;
1480				path[level].bp_op = nilfs_btree_concat_left;
1481				stats->bs_nblocks++;
1482				/* continue; */
1483			}
1484		} else if (pindex <
1485			   nilfs_btree_node_get_nchildren(parent) - 1) {
1486			/* right sibling */
1487			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1488							  ncmax);
1489			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1490			if (ret < 0)
1491				goto err_out_curr_node;
1492			sib = (struct nilfs_btree_node *)bh->b_data;
1493			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1494				path[level].bp_sib_bh = bh;
1495				path[level].bp_op = nilfs_btree_borrow_right;
1496				stats->bs_nblocks++;
1497				goto out;
1498			} else {
1499				path[level].bp_sib_bh = bh;
1500				path[level].bp_op = nilfs_btree_concat_right;
1501				stats->bs_nblocks++;
1502				/*
1503				 * When merging right sibling node
1504				 * into the current node, pointer to
1505				 * the right sibling node must be
1506				 * terminated instead.  The adjustment
1507				 * below is required for that.
1508				 */
1509				dindex = pindex + 1;
1510				/* continue; */
1511			}
1512		} else {
1513			/* no siblings */
1514			/* the only child of the root node */
1515			WARN_ON(level != nilfs_btree_height(btree) - 2);
1516			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1517			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1518				path[level].bp_op = nilfs_btree_shrink;
1519				stats->bs_nblocks += 2;
1520				level++;
1521				path[level].bp_op = nilfs_btree_nop;
1522				goto shrink_root_child;
1523			} else {
1524				path[level].bp_op = nilfs_btree_do_delete;
1525				stats->bs_nblocks++;
1526				goto out;
1527			}
1528		}
1529	}
1530
1531	/* child of the root node is deleted */
1532	path[level].bp_op = nilfs_btree_do_delete;
1533	stats->bs_nblocks++;
1534
1535shrink_root_child:
1536	node = nilfs_btree_get_root(btree);
1537	path[level].bp_oldreq.bpr_ptr =
1538		nilfs_btree_node_get_ptr(node, dindex,
1539					 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1540
1541	ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1542	if (ret < 0)
1543		goto err_out_child_node;
1544
1545	/* success */
1546 out:
1547	*levelp = level;
1548	return ret;
1549
1550	/* error */
1551 err_out_curr_node:
1552	nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1553 err_out_child_node:
1554	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1555		brelse(path[level].bp_sib_bh);
1556		nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1557	}
1558	*levelp = level;
1559	stats->bs_nblocks = 0;
1560	return ret;
1561}
1562
1563static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1564				      struct nilfs_btree_path *path,
1565				      int maxlevel, struct inode *dat)
1566{
1567	int level;
1568
1569	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1570		nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1571		path[level].bp_op(btree, path, level, NULL, NULL);
1572	}
1573
1574	if (!nilfs_bmap_dirty(btree))
1575		nilfs_bmap_set_dirty(btree);
1576}
1577
1578static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1579
1580{
1581	struct nilfs_btree_path *path;
1582	struct nilfs_bmap_stats stats;
1583	struct inode *dat;
1584	int level, ret;
1585
1586	path = nilfs_btree_alloc_path();
1587	if (path == NULL)
1588		return -ENOMEM;
1589
1590	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1591				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1592	if (ret < 0)
1593		goto out;
1594
1595
1596	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1597
1598	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1599	if (ret < 0)
1600		goto out;
1601	nilfs_btree_commit_delete(btree, path, level, dat);
1602	nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1603
1604out:
1605	nilfs_btree_free_path(path);
1606	return ret;
1607}
1608
1609static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1610				__u64 *keyp)
1611{
1612	struct nilfs_btree_path *path;
1613	const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1614	int ret;
1615
1616	path = nilfs_btree_alloc_path();
1617	if (!path)
1618		return -ENOMEM;
1619
1620	ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1621	if (!ret)
1622		*keyp = start;
1623	else if (ret == -ENOENT)
1624		ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1625
1626	nilfs_btree_free_path(path);
1627	return ret;
1628}
1629
1630static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1631{
1632	struct nilfs_btree_path *path;
1633	int ret;
1634
1635	path = nilfs_btree_alloc_path();
1636	if (path == NULL)
1637		return -ENOMEM;
1638
1639	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1640
1641	nilfs_btree_free_path(path);
1642
1643	return ret;
1644}
1645
1646static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1647{
1648	struct buffer_head *bh;
1649	struct nilfs_btree_node *root, *node;
1650	__u64 maxkey, nextmaxkey;
1651	__u64 ptr;
1652	int nchildren, ret;
1653
1654	root = nilfs_btree_get_root(btree);
 
 
 
 
1655	switch (nilfs_btree_height(btree)) {
1656	case 2:
1657		bh = NULL;
1658		node = root;
1659		break;
1660	case 3:
1661		nchildren = nilfs_btree_node_get_nchildren(root);
1662		if (nchildren > 1)
1663			return 0;
1664		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1665					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1666		ret = nilfs_btree_get_block(btree, ptr, &bh);
1667		if (ret < 0)
1668			return ret;
1669		node = (struct nilfs_btree_node *)bh->b_data;
 
1670		break;
1671	default:
1672		return 0;
1673	}
1674
1675	nchildren = nilfs_btree_node_get_nchildren(node);
1676	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1677	nextmaxkey = (nchildren > 1) ?
1678		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1679	if (bh != NULL)
1680		brelse(bh);
1681
1682	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1683}
1684
1685static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1686				   __u64 *keys, __u64 *ptrs, int nitems)
1687{
1688	struct buffer_head *bh;
1689	struct nilfs_btree_node *node, *root;
1690	__le64 *dkeys;
1691	__le64 *dptrs;
1692	__u64 ptr;
1693	int nchildren, ncmax, i, ret;
1694
1695	root = nilfs_btree_get_root(btree);
1696	switch (nilfs_btree_height(btree)) {
1697	case 2:
1698		bh = NULL;
1699		node = root;
1700		ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1701		break;
1702	case 3:
1703		nchildren = nilfs_btree_node_get_nchildren(root);
1704		WARN_ON(nchildren > 1);
1705		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1706					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1707		ret = nilfs_btree_get_block(btree, ptr, &bh);
1708		if (ret < 0)
1709			return ret;
1710		node = (struct nilfs_btree_node *)bh->b_data;
1711		ncmax = nilfs_btree_nchildren_per_block(btree);
1712		break;
1713	default:
1714		node = NULL;
1715		return -EINVAL;
1716	}
1717
1718	nchildren = nilfs_btree_node_get_nchildren(node);
1719	if (nchildren < nitems)
1720		nitems = nchildren;
1721	dkeys = nilfs_btree_node_dkeys(node);
1722	dptrs = nilfs_btree_node_dptrs(node, ncmax);
1723	for (i = 0; i < nitems; i++) {
1724		keys[i] = le64_to_cpu(dkeys[i]);
1725		ptrs[i] = le64_to_cpu(dptrs[i]);
1726	}
1727
1728	if (bh != NULL)
1729		brelse(bh);
1730
1731	return nitems;
1732}
1733
1734static int
1735nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1736				       union nilfs_bmap_ptr_req *dreq,
1737				       union nilfs_bmap_ptr_req *nreq,
1738				       struct buffer_head **bhp,
1739				       struct nilfs_bmap_stats *stats)
1740{
1741	struct buffer_head *bh;
1742	struct inode *dat = NULL;
1743	int ret;
1744
1745	stats->bs_nblocks = 0;
1746
1747	/* for data */
1748	/* cannot find near ptr */
1749	if (NILFS_BMAP_USE_VBN(btree)) {
1750		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1751		dat = nilfs_bmap_get_dat(btree);
1752	}
1753
 
 
 
 
1754	ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1755	if (ret < 0)
1756		return ret;
1757
1758	*bhp = NULL;
1759	stats->bs_nblocks++;
1760	if (nreq != NULL) {
1761		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1762		ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1763		if (ret < 0)
1764			goto err_out_dreq;
1765
1766		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1767		if (ret < 0)
1768			goto err_out_nreq;
1769
1770		*bhp = bh;
1771		stats->bs_nblocks++;
1772	}
1773
1774	/* success */
1775	return 0;
1776
1777	/* error */
1778 err_out_nreq:
1779	nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1780 err_out_dreq:
1781	nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1782	stats->bs_nblocks = 0;
1783	return ret;
1784
1785}
1786
1787static void
1788nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1789				      __u64 key, __u64 ptr,
1790				      const __u64 *keys, const __u64 *ptrs,
1791				      int n,
1792				      union nilfs_bmap_ptr_req *dreq,
1793				      union nilfs_bmap_ptr_req *nreq,
1794				      struct buffer_head *bh)
1795{
1796	struct nilfs_btree_node *node;
1797	struct inode *dat;
1798	__u64 tmpptr;
1799	int ncblk;
1800
1801	/* free resources */
1802	if (btree->b_ops->bop_clear != NULL)
1803		btree->b_ops->bop_clear(btree);
1804
1805	/* ptr must be a pointer to a buffer head. */
1806	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1807
1808	/* convert and insert */
1809	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1810	__nilfs_btree_init(btree);
1811	if (nreq != NULL) {
1812		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1813		nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1814
1815		/* create child node at level 1 */
1816		node = (struct nilfs_btree_node *)bh->b_data;
1817		ncblk = nilfs_btree_nchildren_per_block(btree);
1818		nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1819		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1820		if (!buffer_dirty(bh))
1821			mark_buffer_dirty(bh);
1822		if (!nilfs_bmap_dirty(btree))
1823			nilfs_bmap_set_dirty(btree);
1824
1825		brelse(bh);
1826
1827		/* create root node at level 2 */
1828		node = nilfs_btree_get_root(btree);
1829		tmpptr = nreq->bpr_ptr;
1830		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1831				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1832				      &keys[0], &tmpptr);
1833	} else {
1834		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1835
1836		/* create root node at level 1 */
1837		node = nilfs_btree_get_root(btree);
1838		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1839				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1840				      keys, ptrs);
1841		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1842					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1843		if (!nilfs_bmap_dirty(btree))
1844			nilfs_bmap_set_dirty(btree);
1845	}
1846
1847	if (NILFS_BMAP_USE_VBN(btree))
1848		nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1849}
1850
1851/**
1852 * nilfs_btree_convert_and_insert -
1853 * @bmap:
1854 * @key:
1855 * @ptr:
1856 * @keys:
1857 * @ptrs:
1858 * @n:
 
 
 
 
 
 
 
 
 
1859 */
1860int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1861				   __u64 key, __u64 ptr,
1862				   const __u64 *keys, const __u64 *ptrs, int n)
1863{
1864	struct buffer_head *bh = NULL;
1865	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1866	struct nilfs_bmap_stats stats;
1867	int ret;
1868
1869	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1870		di = &dreq;
1871		ni = NULL;
1872	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1873			   nilfs_btree_node_size(btree))) {
1874		di = &dreq;
1875		ni = &nreq;
1876	} else {
1877		di = NULL;
1878		ni = NULL;
1879		BUG();
1880	}
1881
1882	ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1883						     &stats);
1884	if (ret < 0)
1885		return ret;
1886	nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1887					      di, ni, bh);
1888	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1889	return 0;
1890}
1891
1892static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1893				   struct nilfs_btree_path *path,
1894				   int level,
1895				   struct buffer_head *bh)
1896{
1897	while ((++level < nilfs_btree_height(btree) - 1) &&
1898	       !buffer_dirty(path[level].bp_bh))
1899		mark_buffer_dirty(path[level].bp_bh);
1900
1901	return 0;
1902}
1903
1904static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1905					struct nilfs_btree_path *path,
1906					int level, struct inode *dat)
1907{
1908	struct nilfs_btree_node *parent;
1909	int ncmax, ret;
1910
1911	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1912	path[level].bp_oldreq.bpr_ptr =
1913		nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1914					 ncmax);
1915	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1916	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1917				       &path[level].bp_newreq.bpr_req);
1918	if (ret < 0)
1919		return ret;
1920
1921	if (buffer_nilfs_node(path[level].bp_bh)) {
1922		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1923		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1924		path[level].bp_ctxt.bh = path[level].bp_bh;
1925		ret = nilfs_btnode_prepare_change_key(
1926			&NILFS_BMAP_I(btree)->i_btnode_cache,
1927			&path[level].bp_ctxt);
1928		if (ret < 0) {
1929			nilfs_dat_abort_update(dat,
1930					       &path[level].bp_oldreq.bpr_req,
1931					       &path[level].bp_newreq.bpr_req);
1932			return ret;
1933		}
1934	}
1935
1936	return 0;
1937}
1938
1939static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1940					struct nilfs_btree_path *path,
1941					int level, struct inode *dat)
1942{
1943	struct nilfs_btree_node *parent;
1944	int ncmax;
1945
1946	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1947				&path[level].bp_newreq.bpr_req,
1948				btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1949
1950	if (buffer_nilfs_node(path[level].bp_bh)) {
1951		nilfs_btnode_commit_change_key(
1952			&NILFS_BMAP_I(btree)->i_btnode_cache,
1953			&path[level].bp_ctxt);
1954		path[level].bp_bh = path[level].bp_ctxt.bh;
1955	}
1956	set_buffer_nilfs_volatile(path[level].bp_bh);
1957
1958	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1959	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1960				 path[level].bp_newreq.bpr_ptr, ncmax);
1961}
1962
1963static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1964				       struct nilfs_btree_path *path,
1965				       int level, struct inode *dat)
1966{
1967	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1968			       &path[level].bp_newreq.bpr_req);
1969	if (buffer_nilfs_node(path[level].bp_bh))
1970		nilfs_btnode_abort_change_key(
1971			&NILFS_BMAP_I(btree)->i_btnode_cache,
1972			&path[level].bp_ctxt);
1973}
1974
1975static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1976					   struct nilfs_btree_path *path,
1977					   int minlevel, int *maxlevelp,
1978					   struct inode *dat)
1979{
1980	int level, ret;
1981
1982	level = minlevel;
1983	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1984		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1985		if (ret < 0)
1986			return ret;
1987	}
1988	while ((++level < nilfs_btree_height(btree) - 1) &&
1989	       !buffer_dirty(path[level].bp_bh)) {
1990
1991		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1992		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1993		if (ret < 0)
1994			goto out;
1995	}
1996
1997	/* success */
1998	*maxlevelp = level - 1;
1999	return 0;
2000
2001	/* error */
2002 out:
2003	while (--level > minlevel)
2004		nilfs_btree_abort_update_v(btree, path, level, dat);
2005	if (!buffer_nilfs_volatile(path[level].bp_bh))
2006		nilfs_btree_abort_update_v(btree, path, level, dat);
2007	return ret;
2008}
2009
2010static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2011					   struct nilfs_btree_path *path,
2012					   int minlevel, int maxlevel,
2013					   struct buffer_head *bh,
2014					   struct inode *dat)
2015{
2016	int level;
2017
2018	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2019		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2020
2021	for (level = minlevel + 1; level <= maxlevel; level++)
2022		nilfs_btree_commit_update_v(btree, path, level, dat);
2023}
2024
2025static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2026				   struct nilfs_btree_path *path,
2027				   int level, struct buffer_head *bh)
2028{
2029	int maxlevel = 0, ret;
2030	struct nilfs_btree_node *parent;
2031	struct inode *dat = nilfs_bmap_get_dat(btree);
2032	__u64 ptr;
2033	int ncmax;
2034
2035	get_bh(bh);
2036	path[level].bp_bh = bh;
2037	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2038					      dat);
2039	if (ret < 0)
2040		goto out;
2041
2042	if (buffer_nilfs_volatile(path[level].bp_bh)) {
2043		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2044		ptr = nilfs_btree_node_get_ptr(parent,
2045					       path[level + 1].bp_index,
2046					       ncmax);
2047		ret = nilfs_dat_mark_dirty(dat, ptr);
2048		if (ret < 0)
2049			goto out;
2050	}
2051
2052	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2053
2054 out:
2055	brelse(path[level].bp_bh);
2056	path[level].bp_bh = NULL;
2057	return ret;
2058}
2059
2060static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2061				 struct buffer_head *bh)
2062{
2063	struct nilfs_btree_path *path;
2064	struct nilfs_btree_node *node;
2065	__u64 key;
2066	int level, ret;
2067
2068	WARN_ON(!buffer_dirty(bh));
2069
2070	path = nilfs_btree_alloc_path();
2071	if (path == NULL)
2072		return -ENOMEM;
2073
2074	if (buffer_nilfs_node(bh)) {
2075		node = (struct nilfs_btree_node *)bh->b_data;
2076		key = nilfs_btree_node_get_key(node, 0);
2077		level = nilfs_btree_node_get_level(node);
2078	} else {
2079		key = nilfs_bmap_data_get_key(btree, bh);
2080		level = NILFS_BTREE_LEVEL_DATA;
2081	}
2082
2083	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2084	if (ret < 0) {
2085		if (unlikely(ret == -ENOENT))
2086			nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
2087				  "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2088				  btree->b_inode->i_ino,
2089				  (unsigned long long)key, level);
2090		goto out;
2091	}
2092
2093	ret = NILFS_BMAP_USE_VBN(btree) ?
2094		nilfs_btree_propagate_v(btree, path, level, bh) :
2095		nilfs_btree_propagate_p(btree, path, level, bh);
2096
2097 out:
2098	nilfs_btree_free_path(path);
2099
2100	return ret;
2101}
2102
2103static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2104				    struct buffer_head *bh)
2105{
2106	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2107}
2108
2109static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2110					 struct list_head *lists,
2111					 struct buffer_head *bh)
2112{
2113	struct list_head *head;
2114	struct buffer_head *cbh;
2115	struct nilfs_btree_node *node, *cnode;
2116	__u64 key, ckey;
2117	int level;
2118
2119	get_bh(bh);
2120	node = (struct nilfs_btree_node *)bh->b_data;
2121	key = nilfs_btree_node_get_key(node, 0);
2122	level = nilfs_btree_node_get_level(node);
2123	if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2124	    level >= NILFS_BTREE_LEVEL_MAX) {
2125		dump_stack();
2126		nilfs_msg(btree->b_inode->i_sb, KERN_WARNING,
2127			  "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2128			  level, (unsigned long long)key,
2129			  btree->b_inode->i_ino,
2130			  (unsigned long long)bh->b_blocknr);
2131		return;
2132	}
2133
2134	list_for_each(head, &lists[level]) {
2135		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2136		cnode = (struct nilfs_btree_node *)cbh->b_data;
2137		ckey = nilfs_btree_node_get_key(cnode, 0);
2138		if (key < ckey)
2139			break;
2140	}
2141	list_add_tail(&bh->b_assoc_buffers, head);
2142}
2143
2144static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2145					     struct list_head *listp)
2146{
2147	struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
 
2148	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2149	struct pagevec pvec;
2150	struct buffer_head *bh, *head;
2151	pgoff_t index = 0;
2152	int level, i;
2153
2154	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2155	     level < NILFS_BTREE_LEVEL_MAX;
2156	     level++)
2157		INIT_LIST_HEAD(&lists[level]);
2158
2159	pagevec_init(&pvec);
2160
2161	while (pagevec_lookup_tag(&pvec, btcache, &index,
2162					PAGECACHE_TAG_DIRTY)) {
2163		for (i = 0; i < pagevec_count(&pvec); i++) {
2164			bh = head = page_buffers(pvec.pages[i]);
2165			do {
2166				if (buffer_dirty(bh))
2167					nilfs_btree_add_dirty_buffer(btree,
2168								     lists, bh);
2169			} while ((bh = bh->b_this_page) != head);
2170		}
2171		pagevec_release(&pvec);
2172		cond_resched();
2173	}
2174
2175	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2176	     level < NILFS_BTREE_LEVEL_MAX;
2177	     level++)
2178		list_splice_tail(&lists[level], listp);
2179}
2180
2181static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2182				struct nilfs_btree_path *path,
2183				int level,
2184				struct buffer_head **bh,
2185				sector_t blocknr,
2186				union nilfs_binfo *binfo)
2187{
2188	struct nilfs_btree_node *parent;
2189	__u64 key;
2190	__u64 ptr;
2191	int ncmax, ret;
2192
2193	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2194	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2195				       ncmax);
2196	if (buffer_nilfs_node(*bh)) {
2197		path[level].bp_ctxt.oldkey = ptr;
2198		path[level].bp_ctxt.newkey = blocknr;
2199		path[level].bp_ctxt.bh = *bh;
2200		ret = nilfs_btnode_prepare_change_key(
2201			&NILFS_BMAP_I(btree)->i_btnode_cache,
2202			&path[level].bp_ctxt);
2203		if (ret < 0)
2204			return ret;
2205		nilfs_btnode_commit_change_key(
2206			&NILFS_BMAP_I(btree)->i_btnode_cache,
2207			&path[level].bp_ctxt);
2208		*bh = path[level].bp_ctxt.bh;
2209	}
2210
2211	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2212				 ncmax);
2213
2214	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2215	/* on-disk format */
2216	binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2217	binfo->bi_dat.bi_level = level;
 
2218
2219	return 0;
2220}
2221
2222static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2223				struct nilfs_btree_path *path,
2224				int level,
2225				struct buffer_head **bh,
2226				sector_t blocknr,
2227				union nilfs_binfo *binfo)
2228{
2229	struct nilfs_btree_node *parent;
2230	struct inode *dat = nilfs_bmap_get_dat(btree);
2231	__u64 key;
2232	__u64 ptr;
2233	union nilfs_bmap_ptr_req req;
2234	int ncmax, ret;
2235
2236	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2237	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2238				       ncmax);
2239	req.bpr_ptr = ptr;
2240	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2241	if (ret < 0)
2242		return ret;
2243	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2244
2245	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2246	/* on-disk format */
2247	binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2248	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2249
2250	return 0;
2251}
2252
2253static int nilfs_btree_assign(struct nilfs_bmap *btree,
2254			      struct buffer_head **bh,
2255			      sector_t blocknr,
2256			      union nilfs_binfo *binfo)
2257{
2258	struct nilfs_btree_path *path;
2259	struct nilfs_btree_node *node;
2260	__u64 key;
2261	int level, ret;
2262
2263	path = nilfs_btree_alloc_path();
2264	if (path == NULL)
2265		return -ENOMEM;
2266
2267	if (buffer_nilfs_node(*bh)) {
2268		node = (struct nilfs_btree_node *)(*bh)->b_data;
2269		key = nilfs_btree_node_get_key(node, 0);
2270		level = nilfs_btree_node_get_level(node);
2271	} else {
2272		key = nilfs_bmap_data_get_key(btree, *bh);
2273		level = NILFS_BTREE_LEVEL_DATA;
2274	}
2275
2276	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2277	if (ret < 0) {
2278		WARN_ON(ret == -ENOENT);
2279		goto out;
2280	}
2281
2282	ret = NILFS_BMAP_USE_VBN(btree) ?
2283		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2284		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2285
2286 out:
2287	nilfs_btree_free_path(path);
2288
2289	return ret;
2290}
2291
2292static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2293				 struct buffer_head **bh,
2294				 sector_t blocknr,
2295				 union nilfs_binfo *binfo)
2296{
2297	struct nilfs_btree_node *node;
2298	__u64 key;
2299	int ret;
2300
2301	ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2302			     blocknr);
2303	if (ret < 0)
2304		return ret;
2305
2306	if (buffer_nilfs_node(*bh)) {
2307		node = (struct nilfs_btree_node *)(*bh)->b_data;
2308		key = nilfs_btree_node_get_key(node, 0);
2309	} else
2310		key = nilfs_bmap_data_get_key(btree, *bh);
2311
2312	/* on-disk format */
2313	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2314	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2315
2316	return 0;
2317}
2318
2319static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2320{
2321	struct buffer_head *bh;
2322	struct nilfs_btree_path *path;
2323	__u64 ptr;
2324	int ret;
2325
2326	path = nilfs_btree_alloc_path();
2327	if (path == NULL)
2328		return -ENOMEM;
2329
2330	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2331	if (ret < 0) {
2332		WARN_ON(ret == -ENOENT);
2333		goto out;
2334	}
2335	ret = nilfs_btree_get_block(btree, ptr, &bh);
2336	if (ret < 0) {
2337		WARN_ON(ret == -ENOENT);
2338		goto out;
2339	}
2340
2341	if (!buffer_dirty(bh))
2342		mark_buffer_dirty(bh);
2343	brelse(bh);
2344	if (!nilfs_bmap_dirty(btree))
2345		nilfs_bmap_set_dirty(btree);
2346
2347 out:
2348	nilfs_btree_free_path(path);
2349	return ret;
2350}
2351
2352static const struct nilfs_bmap_operations nilfs_btree_ops = {
2353	.bop_lookup		=	nilfs_btree_lookup,
2354	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2355	.bop_insert		=	nilfs_btree_insert,
2356	.bop_delete		=	nilfs_btree_delete,
2357	.bop_clear		=	NULL,
2358
2359	.bop_propagate		=	nilfs_btree_propagate,
2360
2361	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2362
2363	.bop_assign		=	nilfs_btree_assign,
2364	.bop_mark		=	nilfs_btree_mark,
2365
2366	.bop_seek_key		=	nilfs_btree_seek_key,
2367	.bop_last_key		=	nilfs_btree_last_key,
2368
2369	.bop_check_insert	=	NULL,
2370	.bop_check_delete	=	nilfs_btree_check_delete,
2371	.bop_gather_data	=	nilfs_btree_gather_data,
2372};
2373
2374static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2375	.bop_lookup		=	NULL,
2376	.bop_lookup_contig	=	NULL,
2377	.bop_insert		=	NULL,
2378	.bop_delete		=	NULL,
2379	.bop_clear		=	NULL,
2380
2381	.bop_propagate		=	nilfs_btree_propagate_gc,
2382
2383	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2384
2385	.bop_assign		=	nilfs_btree_assign_gc,
2386	.bop_mark		=	NULL,
2387
2388	.bop_seek_key		=	NULL,
2389	.bop_last_key		=	NULL,
2390
2391	.bop_check_insert	=	NULL,
2392	.bop_check_delete	=	NULL,
2393	.bop_gather_data	=	NULL,
2394};
2395
2396static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2397{
2398	bmap->b_ops = &nilfs_btree_ops;
2399	bmap->b_nchildren_per_block =
2400		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2401}
2402
2403int nilfs_btree_init(struct nilfs_bmap *bmap)
2404{
2405	int ret = 0;
2406
2407	__nilfs_btree_init(bmap);
2408
2409	if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2410		ret = -EIO;
 
 
 
 
2411	return ret;
2412}
2413
2414void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2415{
2416	bmap->b_ops = &nilfs_btree_ops_gc;
2417	bmap->b_nchildren_per_block =
2418		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2419}
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * NILFS B-tree.
   4 *
   5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   6 *
 
 
 
 
 
 
 
 
 
 
   7 * Written by Koji Sato.
   8 */
   9
  10#include <linux/slab.h>
  11#include <linux/string.h>
  12#include <linux/errno.h>
  13#include <linux/pagevec.h>
  14#include "nilfs.h"
  15#include "page.h"
  16#include "btnode.h"
  17#include "btree.h"
  18#include "alloc.h"
  19#include "dat.h"
  20
  21static void __nilfs_btree_init(struct nilfs_bmap *bmap);
  22
  23static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
  24{
  25	struct nilfs_btree_path *path;
  26	int level = NILFS_BTREE_LEVEL_DATA;
  27
  28	path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
  29	if (path == NULL)
  30		goto out;
  31
  32	for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
  33		path[level].bp_bh = NULL;
  34		path[level].bp_sib_bh = NULL;
  35		path[level].bp_index = 0;
  36		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  37		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  38		path[level].bp_op = NULL;
  39	}
  40
  41out:
  42	return path;
  43}
  44
  45static void nilfs_btree_free_path(struct nilfs_btree_path *path)
  46{
  47	int level = NILFS_BTREE_LEVEL_DATA;
  48
  49	for (; level < NILFS_BTREE_LEVEL_MAX; level++)
  50		brelse(path[level].bp_bh);
  51
  52	kmem_cache_free(nilfs_btree_path_cache, path);
  53}
  54
  55/*
  56 * B-tree node operations
  57 */
  58static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
  59				     __u64 ptr, struct buffer_head **bhp)
  60{
  61	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
  62	struct address_space *btnc = btnc_inode->i_mapping;
  63	struct buffer_head *bh;
  64
  65	bh = nilfs_btnode_create_block(btnc, ptr);
  66	if (IS_ERR(bh))
  67		return PTR_ERR(bh);
  68
  69	set_buffer_nilfs_volatile(bh);
  70	*bhp = bh;
  71	return 0;
  72}
  73
  74static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
  75{
  76	return node->bn_flags;
  77}
  78
  79static void
  80nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
  81{
  82	node->bn_flags = flags;
  83}
  84
  85static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
  86{
  87	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
  88}
  89
  90static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
  91{
  92	return node->bn_level;
  93}
  94
  95static void
  96nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
  97{
  98	node->bn_level = level;
  99}
 100
 101static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
 102{
 103	return le16_to_cpu(node->bn_nchildren);
 104}
 105
 106static void
 107nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
 108{
 109	node->bn_nchildren = cpu_to_le16(nchildren);
 110}
 111
 112static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
 113{
 114	return i_blocksize(btree->b_inode);
 115}
 116
 117static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
 118{
 119	return btree->b_nchildren_per_block;
 120}
 121
 122static __le64 *
 123nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
 124{
 125	return (__le64 *)((char *)(node + 1) +
 126			  (nilfs_btree_node_root(node) ?
 127			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
 128}
 129
 130static __le64 *
 131nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
 132{
 133	return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
 134}
 135
 136static __u64
 137nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
 138{
 139	return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
 140}
 141
 142static void
 143nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
 144{
 145	*(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
 146}
 147
 148static __u64
 149nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
 150			 int ncmax)
 151{
 152	return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
 153}
 154
 155static void
 156nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
 157			 int ncmax)
 158{
 159	*(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
 160}
 161
 162static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
 163				  int level, int nchildren, int ncmax,
 164				  const __u64 *keys, const __u64 *ptrs)
 165{
 166	__le64 *dkeys;
 167	__le64 *dptrs;
 168	int i;
 169
 170	nilfs_btree_node_set_flags(node, flags);
 171	nilfs_btree_node_set_level(node, level);
 172	nilfs_btree_node_set_nchildren(node, nchildren);
 173
 174	dkeys = nilfs_btree_node_dkeys(node);
 175	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 176	for (i = 0; i < nchildren; i++) {
 177		dkeys[i] = cpu_to_le64(keys[i]);
 178		dptrs[i] = cpu_to_le64(ptrs[i]);
 179	}
 180}
 181
 182/* Assume the buffer heads corresponding to left and right are locked. */
 183static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
 184				       struct nilfs_btree_node *right,
 185				       int n, int lncmax, int rncmax)
 186{
 187	__le64 *ldkeys, *rdkeys;
 188	__le64 *ldptrs, *rdptrs;
 189	int lnchildren, rnchildren;
 190
 191	ldkeys = nilfs_btree_node_dkeys(left);
 192	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 193	lnchildren = nilfs_btree_node_get_nchildren(left);
 194
 195	rdkeys = nilfs_btree_node_dkeys(right);
 196	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 197	rnchildren = nilfs_btree_node_get_nchildren(right);
 198
 199	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
 200	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
 201	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
 202	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
 203
 204	lnchildren += n;
 205	rnchildren -= n;
 206	nilfs_btree_node_set_nchildren(left, lnchildren);
 207	nilfs_btree_node_set_nchildren(right, rnchildren);
 208}
 209
 210/* Assume that the buffer heads corresponding to left and right are locked. */
 211static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
 212					struct nilfs_btree_node *right,
 213					int n, int lncmax, int rncmax)
 214{
 215	__le64 *ldkeys, *rdkeys;
 216	__le64 *ldptrs, *rdptrs;
 217	int lnchildren, rnchildren;
 218
 219	ldkeys = nilfs_btree_node_dkeys(left);
 220	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 221	lnchildren = nilfs_btree_node_get_nchildren(left);
 222
 223	rdkeys = nilfs_btree_node_dkeys(right);
 224	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 225	rnchildren = nilfs_btree_node_get_nchildren(right);
 226
 227	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
 228	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
 229	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
 230	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
 231
 232	lnchildren -= n;
 233	rnchildren += n;
 234	nilfs_btree_node_set_nchildren(left, lnchildren);
 235	nilfs_btree_node_set_nchildren(right, rnchildren);
 236}
 237
 238/* Assume that the buffer head corresponding to node is locked. */
 239static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
 240				    __u64 key, __u64 ptr, int ncmax)
 241{
 242	__le64 *dkeys;
 243	__le64 *dptrs;
 244	int nchildren;
 245
 246	dkeys = nilfs_btree_node_dkeys(node);
 247	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 248	nchildren = nilfs_btree_node_get_nchildren(node);
 249	if (index < nchildren) {
 250		memmove(dkeys + index + 1, dkeys + index,
 251			(nchildren - index) * sizeof(*dkeys));
 252		memmove(dptrs + index + 1, dptrs + index,
 253			(nchildren - index) * sizeof(*dptrs));
 254	}
 255	dkeys[index] = cpu_to_le64(key);
 256	dptrs[index] = cpu_to_le64(ptr);
 257	nchildren++;
 258	nilfs_btree_node_set_nchildren(node, nchildren);
 259}
 260
 261/* Assume that the buffer head corresponding to node is locked. */
 262static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
 263				    __u64 *keyp, __u64 *ptrp, int ncmax)
 264{
 265	__u64 key;
 266	__u64 ptr;
 267	__le64 *dkeys;
 268	__le64 *dptrs;
 269	int nchildren;
 270
 271	dkeys = nilfs_btree_node_dkeys(node);
 272	dptrs = nilfs_btree_node_dptrs(node, ncmax);
 273	key = le64_to_cpu(dkeys[index]);
 274	ptr = le64_to_cpu(dptrs[index]);
 275	nchildren = nilfs_btree_node_get_nchildren(node);
 276	if (keyp != NULL)
 277		*keyp = key;
 278	if (ptrp != NULL)
 279		*ptrp = ptr;
 280
 281	if (index < nchildren - 1) {
 282		memmove(dkeys + index, dkeys + index + 1,
 283			(nchildren - index - 1) * sizeof(*dkeys));
 284		memmove(dptrs + index, dptrs + index + 1,
 285			(nchildren - index - 1) * sizeof(*dptrs));
 286	}
 287	nchildren--;
 288	nilfs_btree_node_set_nchildren(node, nchildren);
 289}
 290
 291static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
 292				   __u64 key, int *indexp)
 293{
 294	__u64 nkey;
 295	int index, low, high, s;
 296
 297	/* binary search */
 298	low = 0;
 299	high = nilfs_btree_node_get_nchildren(node) - 1;
 300	index = 0;
 301	s = 0;
 302	while (low <= high) {
 303		index = (low + high) / 2;
 304		nkey = nilfs_btree_node_get_key(node, index);
 305		if (nkey == key) {
 306			s = 0;
 307			goto out;
 308		} else if (nkey < key) {
 309			low = index + 1;
 310			s = -1;
 311		} else {
 312			high = index - 1;
 313			s = 1;
 314		}
 315	}
 316
 317	/* adjust index */
 318	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
 319		if (s > 0 && index > 0)
 320			index--;
 321	} else if (s < 0)
 322		index++;
 323
 324 out:
 325	*indexp = index;
 326
 327	return s == 0;
 328}
 329
 330/**
 331 * nilfs_btree_node_broken - verify consistency of btree node
 332 * @node: btree node block to be examined
 333 * @size: node size (in bytes)
 334 * @inode: host inode of btree
 335 * @blocknr: block number
 336 *
 337 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 338 */
 339static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
 340				   size_t size, struct inode *inode,
 341				   sector_t blocknr)
 342{
 343	int level, flags, nchildren;
 344	int ret = 0;
 345
 346	level = nilfs_btree_node_get_level(node);
 347	flags = nilfs_btree_node_get_flags(node);
 348	nchildren = nilfs_btree_node_get_nchildren(node);
 349
 350	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 351		     level >= NILFS_BTREE_LEVEL_MAX ||
 352		     (flags & NILFS_BTREE_NODE_ROOT) ||
 353		     nchildren <= 0 ||
 354		     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
 355		nilfs_crit(inode->i_sb,
 356			   "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
 357			   inode->i_ino, (unsigned long long)blocknr, level,
 358			   flags, nchildren);
 359		ret = 1;
 360	}
 361	return ret;
 362}
 363
 364/**
 365 * nilfs_btree_root_broken - verify consistency of btree root node
 366 * @node: btree root node to be examined
 367 * @inode: host inode of btree
 368 *
 369 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 370 */
 371static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
 372				   struct inode *inode)
 373{
 374	int level, flags, nchildren;
 375	int ret = 0;
 376
 377	level = nilfs_btree_node_get_level(node);
 378	flags = nilfs_btree_node_get_flags(node);
 379	nchildren = nilfs_btree_node_get_nchildren(node);
 380
 381	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 382		     level >= NILFS_BTREE_LEVEL_MAX ||
 383		     nchildren < 0 ||
 384		     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX ||
 385		     (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) {
 386		nilfs_crit(inode->i_sb,
 387			   "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
 388			   inode->i_ino, level, flags, nchildren);
 389		ret = 1;
 390	}
 391	return ret;
 392}
 393
 394int nilfs_btree_broken_node_block(struct buffer_head *bh)
 395{
 396	struct inode *inode;
 397	int ret;
 398
 399	if (buffer_nilfs_checked(bh))
 400		return 0;
 401
 402	inode = bh->b_folio->mapping->host;
 403	ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
 404				      bh->b_size, inode, bh->b_blocknr);
 405	if (likely(!ret))
 406		set_buffer_nilfs_checked(bh);
 407	return ret;
 408}
 409
 410static struct nilfs_btree_node *
 411nilfs_btree_get_root(const struct nilfs_bmap *btree)
 412{
 413	return (struct nilfs_btree_node *)btree->b_u.u_data;
 414}
 415
 416static struct nilfs_btree_node *
 417nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
 418{
 419	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
 420}
 421
 422static struct nilfs_btree_node *
 423nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
 424{
 425	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
 426}
 427
 428static int nilfs_btree_height(const struct nilfs_bmap *btree)
 429{
 430	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
 431}
 432
 433static struct nilfs_btree_node *
 434nilfs_btree_get_node(const struct nilfs_bmap *btree,
 435		     const struct nilfs_btree_path *path,
 436		     int level, int *ncmaxp)
 437{
 438	struct nilfs_btree_node *node;
 439
 440	if (level == nilfs_btree_height(btree) - 1) {
 441		node = nilfs_btree_get_root(btree);
 442		*ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
 443	} else {
 444		node = nilfs_btree_get_nonroot_node(path, level);
 445		*ncmaxp = nilfs_btree_nchildren_per_block(btree);
 446	}
 447	return node;
 448}
 449
 450static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
 451				struct nilfs_btree_node *node, int level)
 452{
 453	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
 454		dump_stack();
 455		nilfs_crit(btree->b_inode->i_sb,
 456			   "btree level mismatch (ino=%lu): %d != %d",
 457			   btree->b_inode->i_ino,
 458			   nilfs_btree_node_get_level(node), level);
 459		return 1;
 460	}
 461	return 0;
 462}
 463
 464struct nilfs_btree_readahead_info {
 465	struct nilfs_btree_node *node;	/* parent node */
 466	int max_ra_blocks;		/* max nof blocks to read ahead */
 467	int index;			/* current index on the parent node */
 468	int ncmax;			/* nof children in the parent node */
 469};
 470
 471static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 472				   struct buffer_head **bhp,
 473				   const struct nilfs_btree_readahead_info *ra)
 474{
 475	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
 476	struct address_space *btnc = btnc_inode->i_mapping;
 477	struct buffer_head *bh, *ra_bh;
 478	sector_t submit_ptr = 0;
 479	int ret;
 480
 481	ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh,
 482					&submit_ptr);
 483	if (ret) {
 484		if (likely(ret == -EEXIST))
 485			goto out_check;
 486		if (ret == -ENOENT) {
 487			/*
 488			 * Block address translation failed due to invalid
 489			 * value of 'ptr'.  In this case, return internal code
 490			 * -EINVAL (broken bmap) to notify bmap layer of fatal
 491			 * metadata corruption.
 492			 */
 493			ret = -EINVAL;
 494		}
 495		return ret;
 496	}
 497
 498	if (ra) {
 499		int i, n;
 500		__u64 ptr2;
 501
 502		/* read ahead sibling nodes */
 503		for (n = ra->max_ra_blocks, i = ra->index + 1;
 504		     n > 0 && i < ra->ncmax; n--, i++) {
 505			ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
 506
 507			ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
 508						REQ_OP_READ | REQ_RAHEAD,
 509						&ra_bh, &submit_ptr);
 510			if (likely(!ret || ret == -EEXIST))
 511				brelse(ra_bh);
 512			else if (ret != -EBUSY)
 513				break;
 514			if (!buffer_locked(bh))
 515				goto out_no_wait;
 516		}
 517	}
 518
 519	wait_on_buffer(bh);
 520
 521 out_no_wait:
 522	if (!buffer_uptodate(bh)) {
 523		nilfs_err(btree->b_inode->i_sb,
 524			  "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
 525			  btree->b_inode->i_ino, (unsigned long long)ptr);
 526		brelse(bh);
 527		return -EIO;
 528	}
 529
 530 out_check:
 531	if (nilfs_btree_broken_node_block(bh)) {
 532		clear_buffer_uptodate(bh);
 533		brelse(bh);
 534		return -EINVAL;
 535	}
 536
 537	*bhp = bh;
 538	return 0;
 539}
 540
 541static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 542				   struct buffer_head **bhp)
 543{
 544	return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
 545}
 546
 547static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
 548				 struct nilfs_btree_path *path,
 549				 __u64 key, __u64 *ptrp, int minlevel,
 550				 int readahead)
 551{
 552	struct nilfs_btree_node *node;
 553	struct nilfs_btree_readahead_info p, *ra;
 554	__u64 ptr;
 555	int level, index, found, ncmax, ret;
 556
 557	node = nilfs_btree_get_root(btree);
 558	level = nilfs_btree_node_get_level(node);
 559	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
 560		return -ENOENT;
 561
 562	found = nilfs_btree_node_lookup(node, key, &index);
 563	ptr = nilfs_btree_node_get_ptr(node, index,
 564				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 565	path[level].bp_bh = NULL;
 566	path[level].bp_index = index;
 567
 568	ncmax = nilfs_btree_nchildren_per_block(btree);
 569
 570	while (--level >= minlevel) {
 571		ra = NULL;
 572		if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
 573			p.node = nilfs_btree_get_node(btree, path, level + 1,
 574						      &p.ncmax);
 575			p.index = index;
 576			p.max_ra_blocks = 7;
 577			ra = &p;
 578		}
 579		ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
 580					      ra);
 581		if (ret < 0)
 582			return ret;
 583
 584		node = nilfs_btree_get_nonroot_node(path, level);
 585		if (nilfs_btree_bad_node(btree, node, level))
 586			return -EINVAL;
 587		if (!found)
 588			found = nilfs_btree_node_lookup(node, key, &index);
 589		else
 590			index = 0;
 591		if (index < ncmax) {
 592			ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 593		} else {
 594			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
 595			/* insert */
 596			ptr = NILFS_BMAP_INVALID_PTR;
 597		}
 598		path[level].bp_index = index;
 599	}
 600	if (!found)
 601		return -ENOENT;
 602
 603	if (ptrp != NULL)
 604		*ptrp = ptr;
 605
 606	return 0;
 607}
 608
 609static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
 610				      struct nilfs_btree_path *path,
 611				      __u64 *keyp, __u64 *ptrp)
 612{
 613	struct nilfs_btree_node *node;
 614	__u64 ptr;
 615	int index, level, ncmax, ret;
 616
 617	node = nilfs_btree_get_root(btree);
 618	index = nilfs_btree_node_get_nchildren(node) - 1;
 619	if (index < 0)
 620		return -ENOENT;
 621	level = nilfs_btree_node_get_level(node);
 622	ptr = nilfs_btree_node_get_ptr(node, index,
 623				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 624	path[level].bp_bh = NULL;
 625	path[level].bp_index = index;
 626	ncmax = nilfs_btree_nchildren_per_block(btree);
 627
 628	for (level--; level > 0; level--) {
 629		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
 630		if (ret < 0)
 631			return ret;
 632		node = nilfs_btree_get_nonroot_node(path, level);
 633		if (nilfs_btree_bad_node(btree, node, level))
 634			return -EINVAL;
 635		index = nilfs_btree_node_get_nchildren(node) - 1;
 636		ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 637		path[level].bp_index = index;
 638	}
 639
 640	if (keyp != NULL)
 641		*keyp = nilfs_btree_node_get_key(node, index);
 642	if (ptrp != NULL)
 643		*ptrp = ptr;
 644
 645	return 0;
 646}
 647
 648/**
 649 * nilfs_btree_get_next_key - get next valid key from btree path array
 650 * @btree: bmap struct of btree
 651 * @path: array of nilfs_btree_path struct
 652 * @minlevel: start level
 653 * @nextkey: place to store the next valid key
 654 *
 655 * Return Value: If a next key was found, 0 is returned. Otherwise,
 656 * -ENOENT is returned.
 657 */
 658static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
 659				    const struct nilfs_btree_path *path,
 660				    int minlevel, __u64 *nextkey)
 661{
 662	struct nilfs_btree_node *node;
 663	int maxlevel = nilfs_btree_height(btree) - 1;
 664	int index, next_adj, level;
 665
 666	/* Next index is already set to bp_index for leaf nodes. */
 667	next_adj = 0;
 668	for (level = minlevel; level <= maxlevel; level++) {
 669		if (level == maxlevel)
 670			node = nilfs_btree_get_root(btree);
 671		else
 672			node = nilfs_btree_get_nonroot_node(path, level);
 673
 674		index = path[level].bp_index + next_adj;
 675		if (index < nilfs_btree_node_get_nchildren(node)) {
 676			/* Next key is in this node */
 677			*nextkey = nilfs_btree_node_get_key(node, index);
 678			return 0;
 679		}
 680		/* For non-leaf nodes, next index is stored at bp_index + 1. */
 681		next_adj = 1;
 682	}
 683	return -ENOENT;
 684}
 685
 686static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
 687			      __u64 key, int level, __u64 *ptrp)
 688{
 689	struct nilfs_btree_path *path;
 690	int ret;
 691
 692	path = nilfs_btree_alloc_path();
 693	if (path == NULL)
 694		return -ENOMEM;
 695
 696	ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
 697
 698	nilfs_btree_free_path(path);
 699
 700	return ret;
 701}
 702
 703static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
 704				     __u64 key, __u64 *ptrp,
 705				     unsigned int maxblocks)
 706{
 707	struct nilfs_btree_path *path;
 708	struct nilfs_btree_node *node;
 709	struct inode *dat = NULL;
 710	__u64 ptr, ptr2;
 711	sector_t blocknr;
 712	int level = NILFS_BTREE_LEVEL_NODE_MIN;
 713	int ret, cnt, index, maxlevel, ncmax;
 714	struct nilfs_btree_readahead_info p;
 715
 716	path = nilfs_btree_alloc_path();
 717	if (path == NULL)
 718		return -ENOMEM;
 719
 720	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
 721	if (ret < 0)
 722		goto out;
 723
 724	if (NILFS_BMAP_USE_VBN(btree)) {
 725		dat = nilfs_bmap_get_dat(btree);
 726		ret = nilfs_dat_translate(dat, ptr, &blocknr);
 727		if (ret < 0)
 728			goto dat_error;
 729		ptr = blocknr;
 730	}
 731	cnt = 1;
 732	if (cnt == maxblocks)
 733		goto end;
 734
 735	maxlevel = nilfs_btree_height(btree) - 1;
 736	node = nilfs_btree_get_node(btree, path, level, &ncmax);
 737	index = path[level].bp_index + 1;
 738	for (;;) {
 739		while (index < nilfs_btree_node_get_nchildren(node)) {
 740			if (nilfs_btree_node_get_key(node, index) !=
 741			    key + cnt)
 742				goto end;
 743			ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
 744			if (dat) {
 745				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
 746				if (ret < 0)
 747					goto dat_error;
 748				ptr2 = blocknr;
 749			}
 750			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
 751				goto end;
 752			index++;
 
 753		}
 754		if (level == maxlevel)
 755			break;
 756
 757		/* look-up right sibling node */
 758		p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
 759		p.index = path[level + 1].bp_index + 1;
 760		p.max_ra_blocks = 7;
 761		if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
 762		    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
 763			break;
 764		ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
 765		path[level + 1].bp_index = p.index;
 766
 767		brelse(path[level].bp_bh);
 768		path[level].bp_bh = NULL;
 769
 770		ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
 771					      &p);
 772		if (ret < 0)
 773			goto out;
 774		node = nilfs_btree_get_nonroot_node(path, level);
 775		ncmax = nilfs_btree_nchildren_per_block(btree);
 776		index = 0;
 777		path[level].bp_index = index;
 778	}
 779 end:
 780	*ptrp = ptr;
 781	ret = cnt;
 782 out:
 783	nilfs_btree_free_path(path);
 784	return ret;
 785
 786 dat_error:
 787	if (ret == -ENOENT)
 788		ret = -EINVAL;  /* Notify bmap layer of metadata corruption */
 789	goto out;
 790}
 791
 792static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
 793				    struct nilfs_btree_path *path,
 794				    int level, __u64 key)
 795{
 796	if (level < nilfs_btree_height(btree) - 1) {
 797		do {
 798			nilfs_btree_node_set_key(
 799				nilfs_btree_get_nonroot_node(path, level),
 800				path[level].bp_index, key);
 801			if (!buffer_dirty(path[level].bp_bh))
 802				mark_buffer_dirty(path[level].bp_bh);
 803		} while ((path[level].bp_index == 0) &&
 804			 (++level < nilfs_btree_height(btree) - 1));
 805	}
 806
 807	/* root */
 808	if (level == nilfs_btree_height(btree) - 1) {
 809		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
 810					 path[level].bp_index, key);
 811	}
 812}
 813
 814static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
 815				  struct nilfs_btree_path *path,
 816				  int level, __u64 *keyp, __u64 *ptrp)
 817{
 818	struct nilfs_btree_node *node;
 819	int ncblk;
 820
 821	if (level < nilfs_btree_height(btree) - 1) {
 822		node = nilfs_btree_get_nonroot_node(path, level);
 823		ncblk = nilfs_btree_nchildren_per_block(btree);
 824		nilfs_btree_node_insert(node, path[level].bp_index,
 825					*keyp, *ptrp, ncblk);
 826		if (!buffer_dirty(path[level].bp_bh))
 827			mark_buffer_dirty(path[level].bp_bh);
 828
 829		if (path[level].bp_index == 0)
 830			nilfs_btree_promote_key(btree, path, level + 1,
 831						nilfs_btree_node_get_key(node,
 832									 0));
 833	} else {
 834		node = nilfs_btree_get_root(btree);
 835		nilfs_btree_node_insert(node, path[level].bp_index,
 836					*keyp, *ptrp,
 837					NILFS_BTREE_ROOT_NCHILDREN_MAX);
 838	}
 839}
 840
 841static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
 842				   struct nilfs_btree_path *path,
 843				   int level, __u64 *keyp, __u64 *ptrp)
 844{
 845	struct nilfs_btree_node *node, *left;
 846	int nchildren, lnchildren, n, move, ncblk;
 847
 848	node = nilfs_btree_get_nonroot_node(path, level);
 849	left = nilfs_btree_get_sib_node(path, level);
 850	nchildren = nilfs_btree_node_get_nchildren(node);
 851	lnchildren = nilfs_btree_node_get_nchildren(left);
 852	ncblk = nilfs_btree_nchildren_per_block(btree);
 853	move = 0;
 854
 855	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
 856	if (n > path[level].bp_index) {
 857		/* move insert point */
 858		n--;
 859		move = 1;
 860	}
 861
 862	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
 863
 864	if (!buffer_dirty(path[level].bp_bh))
 865		mark_buffer_dirty(path[level].bp_bh);
 866	if (!buffer_dirty(path[level].bp_sib_bh))
 867		mark_buffer_dirty(path[level].bp_sib_bh);
 868
 869	nilfs_btree_promote_key(btree, path, level + 1,
 870				nilfs_btree_node_get_key(node, 0));
 871
 872	if (move) {
 873		brelse(path[level].bp_bh);
 874		path[level].bp_bh = path[level].bp_sib_bh;
 875		path[level].bp_sib_bh = NULL;
 876		path[level].bp_index += lnchildren;
 877		path[level + 1].bp_index--;
 878	} else {
 879		brelse(path[level].bp_sib_bh);
 880		path[level].bp_sib_bh = NULL;
 881		path[level].bp_index -= n;
 882	}
 883
 884	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 885}
 886
 887static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
 888				    struct nilfs_btree_path *path,
 889				    int level, __u64 *keyp, __u64 *ptrp)
 890{
 891	struct nilfs_btree_node *node, *right;
 892	int nchildren, rnchildren, n, move, ncblk;
 893
 894	node = nilfs_btree_get_nonroot_node(path, level);
 895	right = nilfs_btree_get_sib_node(path, level);
 896	nchildren = nilfs_btree_node_get_nchildren(node);
 897	rnchildren = nilfs_btree_node_get_nchildren(right);
 898	ncblk = nilfs_btree_nchildren_per_block(btree);
 899	move = 0;
 900
 901	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
 902	if (n > nchildren - path[level].bp_index) {
 903		/* move insert point */
 904		n--;
 905		move = 1;
 906	}
 907
 908	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 909
 910	if (!buffer_dirty(path[level].bp_bh))
 911		mark_buffer_dirty(path[level].bp_bh);
 912	if (!buffer_dirty(path[level].bp_sib_bh))
 913		mark_buffer_dirty(path[level].bp_sib_bh);
 914
 915	path[level + 1].bp_index++;
 916	nilfs_btree_promote_key(btree, path, level + 1,
 917				nilfs_btree_node_get_key(right, 0));
 918	path[level + 1].bp_index--;
 919
 920	if (move) {
 921		brelse(path[level].bp_bh);
 922		path[level].bp_bh = path[level].bp_sib_bh;
 923		path[level].bp_sib_bh = NULL;
 924		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 925		path[level + 1].bp_index++;
 926	} else {
 927		brelse(path[level].bp_sib_bh);
 928		path[level].bp_sib_bh = NULL;
 929	}
 930
 931	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 932}
 933
 934static void nilfs_btree_split(struct nilfs_bmap *btree,
 935			      struct nilfs_btree_path *path,
 936			      int level, __u64 *keyp, __u64 *ptrp)
 937{
 938	struct nilfs_btree_node *node, *right;
 939	int nchildren, n, move, ncblk;
 940
 941	node = nilfs_btree_get_nonroot_node(path, level);
 942	right = nilfs_btree_get_sib_node(path, level);
 943	nchildren = nilfs_btree_node_get_nchildren(node);
 944	ncblk = nilfs_btree_nchildren_per_block(btree);
 945	move = 0;
 946
 947	n = (nchildren + 1) / 2;
 948	if (n > nchildren - path[level].bp_index) {
 949		n--;
 950		move = 1;
 951	}
 952
 953	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 954
 955	if (!buffer_dirty(path[level].bp_bh))
 956		mark_buffer_dirty(path[level].bp_bh);
 957	if (!buffer_dirty(path[level].bp_sib_bh))
 958		mark_buffer_dirty(path[level].bp_sib_bh);
 959
 960	if (move) {
 961		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 962		nilfs_btree_node_insert(right, path[level].bp_index,
 963					*keyp, *ptrp, ncblk);
 964
 965		*keyp = nilfs_btree_node_get_key(right, 0);
 966		*ptrp = path[level].bp_newreq.bpr_ptr;
 967
 968		brelse(path[level].bp_bh);
 969		path[level].bp_bh = path[level].bp_sib_bh;
 970		path[level].bp_sib_bh = NULL;
 971	} else {
 972		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 973
 974		*keyp = nilfs_btree_node_get_key(right, 0);
 975		*ptrp = path[level].bp_newreq.bpr_ptr;
 976
 977		brelse(path[level].bp_sib_bh);
 978		path[level].bp_sib_bh = NULL;
 979	}
 980
 981	path[level + 1].bp_index++;
 982}
 983
 984static void nilfs_btree_grow(struct nilfs_bmap *btree,
 985			     struct nilfs_btree_path *path,
 986			     int level, __u64 *keyp, __u64 *ptrp)
 987{
 988	struct nilfs_btree_node *root, *child;
 989	int n, ncblk;
 990
 991	root = nilfs_btree_get_root(btree);
 992	child = nilfs_btree_get_sib_node(path, level);
 993	ncblk = nilfs_btree_nchildren_per_block(btree);
 994
 995	n = nilfs_btree_node_get_nchildren(root);
 996
 997	nilfs_btree_node_move_right(root, child, n,
 998				    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
 999	nilfs_btree_node_set_level(root, level + 1);
1000
1001	if (!buffer_dirty(path[level].bp_sib_bh))
1002		mark_buffer_dirty(path[level].bp_sib_bh);
1003
1004	path[level].bp_bh = path[level].bp_sib_bh;
1005	path[level].bp_sib_bh = NULL;
1006
1007	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
1008
1009	*keyp = nilfs_btree_node_get_key(child, 0);
1010	*ptrp = path[level].bp_newreq.bpr_ptr;
1011}
1012
1013static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
1014				   const struct nilfs_btree_path *path)
1015{
1016	struct nilfs_btree_node *node;
1017	int level, ncmax;
1018
1019	if (path == NULL)
1020		return NILFS_BMAP_INVALID_PTR;
1021
1022	/* left sibling */
1023	level = NILFS_BTREE_LEVEL_NODE_MIN;
1024	if (path[level].bp_index > 0) {
1025		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1026		return nilfs_btree_node_get_ptr(node,
1027						path[level].bp_index - 1,
1028						ncmax);
1029	}
1030
1031	/* parent */
1032	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1033	if (level <= nilfs_btree_height(btree) - 1) {
1034		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1035		return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1036						ncmax);
1037	}
1038
1039	return NILFS_BMAP_INVALID_PTR;
1040}
1041
1042static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1043				       const struct nilfs_btree_path *path,
1044				       __u64 key)
1045{
1046	__u64 ptr;
1047
1048	ptr = nilfs_bmap_find_target_seq(btree, key);
1049	if (ptr != NILFS_BMAP_INVALID_PTR)
1050		/* sequential access */
1051		return ptr;
1052
1053	ptr = nilfs_btree_find_near(btree, path);
1054	if (ptr != NILFS_BMAP_INVALID_PTR)
1055		/* near */
1056		return ptr;
1057
1058	/* block group */
1059	return nilfs_bmap_find_target_in_group(btree);
1060}
1061
1062static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1063				      struct nilfs_btree_path *path,
1064				      int *levelp, __u64 key, __u64 ptr,
1065				      struct nilfs_bmap_stats *stats)
1066{
1067	struct buffer_head *bh;
1068	struct nilfs_btree_node *node, *parent, *sib;
1069	__u64 sibptr;
1070	int pindex, level, ncmax, ncblk, ret;
1071	struct inode *dat = NULL;
1072
1073	stats->bs_nblocks = 0;
1074	level = NILFS_BTREE_LEVEL_DATA;
1075
1076	/* allocate a new ptr for data block */
1077	if (NILFS_BMAP_USE_VBN(btree)) {
1078		path[level].bp_newreq.bpr_ptr =
1079			nilfs_btree_find_target_v(btree, path, key);
1080		dat = nilfs_bmap_get_dat(btree);
1081	}
1082
1083	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1084	if (ret < 0)
1085		goto err_out_data;
1086
1087	ncblk = nilfs_btree_nchildren_per_block(btree);
1088
1089	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1090	     level < nilfs_btree_height(btree) - 1;
1091	     level++) {
1092		node = nilfs_btree_get_nonroot_node(path, level);
1093		if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1094			path[level].bp_op = nilfs_btree_do_insert;
1095			stats->bs_nblocks++;
1096			goto out;
1097		}
1098
1099		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1100		pindex = path[level + 1].bp_index;
1101
1102		/* left sibling */
1103		if (pindex > 0) {
1104			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1105							  ncmax);
1106			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1107			if (ret < 0)
1108				goto err_out_child_node;
1109			sib = (struct nilfs_btree_node *)bh->b_data;
1110			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1111				path[level].bp_sib_bh = bh;
1112				path[level].bp_op = nilfs_btree_carry_left;
1113				stats->bs_nblocks++;
1114				goto out;
1115			} else {
1116				brelse(bh);
1117			}
1118		}
1119
1120		/* right sibling */
1121		if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1122			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1123							  ncmax);
1124			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1125			if (ret < 0)
1126				goto err_out_child_node;
1127			sib = (struct nilfs_btree_node *)bh->b_data;
1128			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1129				path[level].bp_sib_bh = bh;
1130				path[level].bp_op = nilfs_btree_carry_right;
1131				stats->bs_nblocks++;
1132				goto out;
1133			} else {
1134				brelse(bh);
1135			}
1136		}
1137
1138		/* split */
1139		path[level].bp_newreq.bpr_ptr =
1140			path[level - 1].bp_newreq.bpr_ptr + 1;
1141		ret = nilfs_bmap_prepare_alloc_ptr(btree,
1142						   &path[level].bp_newreq, dat);
1143		if (ret < 0)
1144			goto err_out_child_node;
1145		ret = nilfs_btree_get_new_block(btree,
1146						path[level].bp_newreq.bpr_ptr,
1147						&bh);
1148		if (ret < 0)
1149			goto err_out_curr_node;
1150
1151		stats->bs_nblocks++;
1152
1153		sib = (struct nilfs_btree_node *)bh->b_data;
1154		nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1155		path[level].bp_sib_bh = bh;
1156		path[level].bp_op = nilfs_btree_split;
1157	}
1158
1159	/* root */
1160	node = nilfs_btree_get_root(btree);
1161	if (nilfs_btree_node_get_nchildren(node) <
1162	    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1163		path[level].bp_op = nilfs_btree_do_insert;
1164		stats->bs_nblocks++;
1165		goto out;
1166	}
1167
1168	/* grow */
1169	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1170	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1171	if (ret < 0)
1172		goto err_out_child_node;
1173	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1174					&bh);
1175	if (ret < 0)
1176		goto err_out_curr_node;
1177
1178	nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1179			      0, level, 0, ncblk, NULL, NULL);
1180	path[level].bp_sib_bh = bh;
1181	path[level].bp_op = nilfs_btree_grow;
1182
1183	level++;
1184	path[level].bp_op = nilfs_btree_do_insert;
1185
1186	/* a newly-created node block and a data block are added */
1187	stats->bs_nblocks += 2;
1188
1189	/* success */
1190 out:
1191	*levelp = level;
1192	return ret;
1193
1194	/* error */
1195 err_out_curr_node:
1196	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1197 err_out_child_node:
1198	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1199		nilfs_btnode_delete(path[level].bp_sib_bh);
1200		nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1201
1202	}
1203
1204	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1205 err_out_data:
1206	*levelp = level;
1207	stats->bs_nblocks = 0;
1208	return ret;
1209}
1210
1211static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1212				      struct nilfs_btree_path *path,
1213				      int maxlevel, __u64 key, __u64 ptr)
1214{
1215	struct inode *dat = NULL;
1216	int level;
1217
1218	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1219	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1220	if (NILFS_BMAP_USE_VBN(btree)) {
1221		nilfs_bmap_set_target_v(btree, key, ptr);
1222		dat = nilfs_bmap_get_dat(btree);
1223	}
1224
1225	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1226		nilfs_bmap_commit_alloc_ptr(btree,
1227					    &path[level - 1].bp_newreq, dat);
1228		path[level].bp_op(btree, path, level, &key, &ptr);
1229	}
1230
1231	if (!nilfs_bmap_dirty(btree))
1232		nilfs_bmap_set_dirty(btree);
1233}
1234
1235static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1236{
1237	struct nilfs_btree_path *path;
1238	struct nilfs_bmap_stats stats;
1239	int level, ret;
1240
1241	path = nilfs_btree_alloc_path();
1242	if (path == NULL)
1243		return -ENOMEM;
1244
1245	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1246				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1247	if (ret != -ENOENT) {
1248		if (ret == 0)
1249			ret = -EEXIST;
1250		goto out;
1251	}
1252
1253	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1254	if (ret < 0)
1255		goto out;
1256	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1257	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1258
1259 out:
1260	nilfs_btree_free_path(path);
1261	return ret;
1262}
1263
1264static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1265				  struct nilfs_btree_path *path,
1266				  int level, __u64 *keyp, __u64 *ptrp)
1267{
1268	struct nilfs_btree_node *node;
1269	int ncblk;
1270
1271	if (level < nilfs_btree_height(btree) - 1) {
1272		node = nilfs_btree_get_nonroot_node(path, level);
1273		ncblk = nilfs_btree_nchildren_per_block(btree);
1274		nilfs_btree_node_delete(node, path[level].bp_index,
1275					keyp, ptrp, ncblk);
1276		if (!buffer_dirty(path[level].bp_bh))
1277			mark_buffer_dirty(path[level].bp_bh);
1278		if (path[level].bp_index == 0)
1279			nilfs_btree_promote_key(btree, path, level + 1,
1280				nilfs_btree_node_get_key(node, 0));
1281	} else {
1282		node = nilfs_btree_get_root(btree);
1283		nilfs_btree_node_delete(node, path[level].bp_index,
1284					keyp, ptrp,
1285					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1286	}
1287}
1288
1289static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1290				    struct nilfs_btree_path *path,
1291				    int level, __u64 *keyp, __u64 *ptrp)
1292{
1293	struct nilfs_btree_node *node, *left;
1294	int nchildren, lnchildren, n, ncblk;
1295
1296	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1297
1298	node = nilfs_btree_get_nonroot_node(path, level);
1299	left = nilfs_btree_get_sib_node(path, level);
1300	nchildren = nilfs_btree_node_get_nchildren(node);
1301	lnchildren = nilfs_btree_node_get_nchildren(left);
1302	ncblk = nilfs_btree_nchildren_per_block(btree);
1303
1304	n = (nchildren + lnchildren) / 2 - nchildren;
1305
1306	nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1307
1308	if (!buffer_dirty(path[level].bp_bh))
1309		mark_buffer_dirty(path[level].bp_bh);
1310	if (!buffer_dirty(path[level].bp_sib_bh))
1311		mark_buffer_dirty(path[level].bp_sib_bh);
1312
1313	nilfs_btree_promote_key(btree, path, level + 1,
1314				nilfs_btree_node_get_key(node, 0));
1315
1316	brelse(path[level].bp_sib_bh);
1317	path[level].bp_sib_bh = NULL;
1318	path[level].bp_index += n;
1319}
1320
1321static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1322				     struct nilfs_btree_path *path,
1323				     int level, __u64 *keyp, __u64 *ptrp)
1324{
1325	struct nilfs_btree_node *node, *right;
1326	int nchildren, rnchildren, n, ncblk;
1327
1328	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1329
1330	node = nilfs_btree_get_nonroot_node(path, level);
1331	right = nilfs_btree_get_sib_node(path, level);
1332	nchildren = nilfs_btree_node_get_nchildren(node);
1333	rnchildren = nilfs_btree_node_get_nchildren(right);
1334	ncblk = nilfs_btree_nchildren_per_block(btree);
1335
1336	n = (nchildren + rnchildren) / 2 - nchildren;
1337
1338	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1339
1340	if (!buffer_dirty(path[level].bp_bh))
1341		mark_buffer_dirty(path[level].bp_bh);
1342	if (!buffer_dirty(path[level].bp_sib_bh))
1343		mark_buffer_dirty(path[level].bp_sib_bh);
1344
1345	path[level + 1].bp_index++;
1346	nilfs_btree_promote_key(btree, path, level + 1,
1347				nilfs_btree_node_get_key(right, 0));
1348	path[level + 1].bp_index--;
1349
1350	brelse(path[level].bp_sib_bh);
1351	path[level].bp_sib_bh = NULL;
1352}
1353
1354static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1355				    struct nilfs_btree_path *path,
1356				    int level, __u64 *keyp, __u64 *ptrp)
1357{
1358	struct nilfs_btree_node *node, *left;
1359	int n, ncblk;
1360
1361	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1362
1363	node = nilfs_btree_get_nonroot_node(path, level);
1364	left = nilfs_btree_get_sib_node(path, level);
1365	ncblk = nilfs_btree_nchildren_per_block(btree);
1366
1367	n = nilfs_btree_node_get_nchildren(node);
1368
1369	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1370
1371	if (!buffer_dirty(path[level].bp_sib_bh))
1372		mark_buffer_dirty(path[level].bp_sib_bh);
1373
1374	nilfs_btnode_delete(path[level].bp_bh);
1375	path[level].bp_bh = path[level].bp_sib_bh;
1376	path[level].bp_sib_bh = NULL;
1377	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1378}
1379
1380static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1381				     struct nilfs_btree_path *path,
1382				     int level, __u64 *keyp, __u64 *ptrp)
1383{
1384	struct nilfs_btree_node *node, *right;
1385	int n, ncblk;
1386
1387	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1388
1389	node = nilfs_btree_get_nonroot_node(path, level);
1390	right = nilfs_btree_get_sib_node(path, level);
1391	ncblk = nilfs_btree_nchildren_per_block(btree);
1392
1393	n = nilfs_btree_node_get_nchildren(right);
1394
1395	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1396
1397	if (!buffer_dirty(path[level].bp_bh))
1398		mark_buffer_dirty(path[level].bp_bh);
1399
1400	nilfs_btnode_delete(path[level].bp_sib_bh);
1401	path[level].bp_sib_bh = NULL;
1402	path[level + 1].bp_index++;
1403}
1404
1405static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1406			       struct nilfs_btree_path *path,
1407			       int level, __u64 *keyp, __u64 *ptrp)
1408{
1409	struct nilfs_btree_node *root, *child;
1410	int n, ncblk;
1411
1412	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1413
1414	root = nilfs_btree_get_root(btree);
1415	child = nilfs_btree_get_nonroot_node(path, level);
1416	ncblk = nilfs_btree_nchildren_per_block(btree);
1417
1418	nilfs_btree_node_delete(root, 0, NULL, NULL,
1419				NILFS_BTREE_ROOT_NCHILDREN_MAX);
1420	nilfs_btree_node_set_level(root, level);
1421	n = nilfs_btree_node_get_nchildren(child);
1422	nilfs_btree_node_move_left(root, child, n,
1423				   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1424
1425	nilfs_btnode_delete(path[level].bp_bh);
1426	path[level].bp_bh = NULL;
1427}
1428
1429static void nilfs_btree_nop(struct nilfs_bmap *btree,
1430			    struct nilfs_btree_path *path,
1431			    int level, __u64 *keyp, __u64 *ptrp)
1432{
1433}
1434
1435static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1436				      struct nilfs_btree_path *path,
1437				      int *levelp,
1438				      struct nilfs_bmap_stats *stats,
1439				      struct inode *dat)
1440{
1441	struct buffer_head *bh;
1442	struct nilfs_btree_node *node, *parent, *sib;
1443	__u64 sibptr;
1444	int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1445
1446	ret = 0;
1447	stats->bs_nblocks = 0;
1448	ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1449	ncblk = nilfs_btree_nchildren_per_block(btree);
1450
1451	for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1452	     level < nilfs_btree_height(btree) - 1;
1453	     level++) {
1454		node = nilfs_btree_get_nonroot_node(path, level);
1455		path[level].bp_oldreq.bpr_ptr =
1456			nilfs_btree_node_get_ptr(node, dindex, ncblk);
1457		ret = nilfs_bmap_prepare_end_ptr(btree,
1458						 &path[level].bp_oldreq, dat);
1459		if (ret < 0)
1460			goto err_out_child_node;
1461
1462		if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1463			path[level].bp_op = nilfs_btree_do_delete;
1464			stats->bs_nblocks++;
1465			goto out;
1466		}
1467
1468		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1469		pindex = path[level + 1].bp_index;
1470		dindex = pindex;
1471
1472		if (pindex > 0) {
1473			/* left sibling */
1474			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1475							  ncmax);
1476			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1477			if (ret < 0)
1478				goto err_out_curr_node;
1479			sib = (struct nilfs_btree_node *)bh->b_data;
1480			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1481				path[level].bp_sib_bh = bh;
1482				path[level].bp_op = nilfs_btree_borrow_left;
1483				stats->bs_nblocks++;
1484				goto out;
1485			} else {
1486				path[level].bp_sib_bh = bh;
1487				path[level].bp_op = nilfs_btree_concat_left;
1488				stats->bs_nblocks++;
1489				/* continue; */
1490			}
1491		} else if (pindex <
1492			   nilfs_btree_node_get_nchildren(parent) - 1) {
1493			/* right sibling */
1494			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1495							  ncmax);
1496			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1497			if (ret < 0)
1498				goto err_out_curr_node;
1499			sib = (struct nilfs_btree_node *)bh->b_data;
1500			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1501				path[level].bp_sib_bh = bh;
1502				path[level].bp_op = nilfs_btree_borrow_right;
1503				stats->bs_nblocks++;
1504				goto out;
1505			} else {
1506				path[level].bp_sib_bh = bh;
1507				path[level].bp_op = nilfs_btree_concat_right;
1508				stats->bs_nblocks++;
1509				/*
1510				 * When merging right sibling node
1511				 * into the current node, pointer to
1512				 * the right sibling node must be
1513				 * terminated instead.  The adjustment
1514				 * below is required for that.
1515				 */
1516				dindex = pindex + 1;
1517				/* continue; */
1518			}
1519		} else {
1520			/* no siblings */
1521			/* the only child of the root node */
1522			WARN_ON(level != nilfs_btree_height(btree) - 2);
1523			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1524			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1525				path[level].bp_op = nilfs_btree_shrink;
1526				stats->bs_nblocks += 2;
1527				level++;
1528				path[level].bp_op = nilfs_btree_nop;
1529				goto shrink_root_child;
1530			} else {
1531				path[level].bp_op = nilfs_btree_do_delete;
1532				stats->bs_nblocks++;
1533				goto out;
1534			}
1535		}
1536	}
1537
1538	/* child of the root node is deleted */
1539	path[level].bp_op = nilfs_btree_do_delete;
1540	stats->bs_nblocks++;
1541
1542shrink_root_child:
1543	node = nilfs_btree_get_root(btree);
1544	path[level].bp_oldreq.bpr_ptr =
1545		nilfs_btree_node_get_ptr(node, dindex,
1546					 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1547
1548	ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1549	if (ret < 0)
1550		goto err_out_child_node;
1551
1552	/* success */
1553 out:
1554	*levelp = level;
1555	return ret;
1556
1557	/* error */
1558 err_out_curr_node:
1559	nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1560 err_out_child_node:
1561	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1562		brelse(path[level].bp_sib_bh);
1563		nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1564	}
1565	*levelp = level;
1566	stats->bs_nblocks = 0;
1567	return ret;
1568}
1569
1570static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1571				      struct nilfs_btree_path *path,
1572				      int maxlevel, struct inode *dat)
1573{
1574	int level;
1575
1576	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1577		nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1578		path[level].bp_op(btree, path, level, NULL, NULL);
1579	}
1580
1581	if (!nilfs_bmap_dirty(btree))
1582		nilfs_bmap_set_dirty(btree);
1583}
1584
1585static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1586
1587{
1588	struct nilfs_btree_path *path;
1589	struct nilfs_bmap_stats stats;
1590	struct inode *dat;
1591	int level, ret;
1592
1593	path = nilfs_btree_alloc_path();
1594	if (path == NULL)
1595		return -ENOMEM;
1596
1597	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1598				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1599	if (ret < 0)
1600		goto out;
1601
1602
1603	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1604
1605	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1606	if (ret < 0)
1607		goto out;
1608	nilfs_btree_commit_delete(btree, path, level, dat);
1609	nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1610
1611out:
1612	nilfs_btree_free_path(path);
1613	return ret;
1614}
1615
1616static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1617				__u64 *keyp)
1618{
1619	struct nilfs_btree_path *path;
1620	const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1621	int ret;
1622
1623	path = nilfs_btree_alloc_path();
1624	if (!path)
1625		return -ENOMEM;
1626
1627	ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1628	if (!ret)
1629		*keyp = start;
1630	else if (ret == -ENOENT)
1631		ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1632
1633	nilfs_btree_free_path(path);
1634	return ret;
1635}
1636
1637static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1638{
1639	struct nilfs_btree_path *path;
1640	int ret;
1641
1642	path = nilfs_btree_alloc_path();
1643	if (path == NULL)
1644		return -ENOMEM;
1645
1646	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1647
1648	nilfs_btree_free_path(path);
1649
1650	return ret;
1651}
1652
1653static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1654{
1655	struct buffer_head *bh;
1656	struct nilfs_btree_node *root, *node;
1657	__u64 maxkey, nextmaxkey;
1658	__u64 ptr;
1659	int nchildren, ret;
1660
1661	root = nilfs_btree_get_root(btree);
1662	nchildren = nilfs_btree_node_get_nchildren(root);
1663	if (unlikely(nchildren == 0))
1664		return 0;
1665
1666	switch (nilfs_btree_height(btree)) {
1667	case 2:
1668		bh = NULL;
1669		node = root;
1670		break;
1671	case 3:
 
1672		if (nchildren > 1)
1673			return 0;
1674		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1675					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1676		ret = nilfs_btree_get_block(btree, ptr, &bh);
1677		if (ret < 0)
1678			return ret;
1679		node = (struct nilfs_btree_node *)bh->b_data;
1680		nchildren = nilfs_btree_node_get_nchildren(node);
1681		break;
1682	default:
1683		return 0;
1684	}
1685
 
1686	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1687	nextmaxkey = (nchildren > 1) ?
1688		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1689	brelse(bh);
 
1690
1691	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1692}
1693
1694static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1695				   __u64 *keys, __u64 *ptrs, int nitems)
1696{
1697	struct buffer_head *bh;
1698	struct nilfs_btree_node *node, *root;
1699	__le64 *dkeys;
1700	__le64 *dptrs;
1701	__u64 ptr;
1702	int nchildren, ncmax, i, ret;
1703
1704	root = nilfs_btree_get_root(btree);
1705	switch (nilfs_btree_height(btree)) {
1706	case 2:
1707		bh = NULL;
1708		node = root;
1709		ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1710		break;
1711	case 3:
1712		nchildren = nilfs_btree_node_get_nchildren(root);
1713		WARN_ON(nchildren > 1);
1714		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1715					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1716		ret = nilfs_btree_get_block(btree, ptr, &bh);
1717		if (ret < 0)
1718			return ret;
1719		node = (struct nilfs_btree_node *)bh->b_data;
1720		ncmax = nilfs_btree_nchildren_per_block(btree);
1721		break;
1722	default:
1723		node = NULL;
1724		return -EINVAL;
1725	}
1726
1727	nchildren = nilfs_btree_node_get_nchildren(node);
1728	if (nchildren < nitems)
1729		nitems = nchildren;
1730	dkeys = nilfs_btree_node_dkeys(node);
1731	dptrs = nilfs_btree_node_dptrs(node, ncmax);
1732	for (i = 0; i < nitems; i++) {
1733		keys[i] = le64_to_cpu(dkeys[i]);
1734		ptrs[i] = le64_to_cpu(dptrs[i]);
1735	}
1736
1737	brelse(bh);
 
1738
1739	return nitems;
1740}
1741
1742static int
1743nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1744				       union nilfs_bmap_ptr_req *dreq,
1745				       union nilfs_bmap_ptr_req *nreq,
1746				       struct buffer_head **bhp,
1747				       struct nilfs_bmap_stats *stats)
1748{
1749	struct buffer_head *bh;
1750	struct inode *dat = NULL;
1751	int ret;
1752
1753	stats->bs_nblocks = 0;
1754
1755	/* for data */
1756	/* cannot find near ptr */
1757	if (NILFS_BMAP_USE_VBN(btree)) {
1758		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1759		dat = nilfs_bmap_get_dat(btree);
1760	}
1761
1762	ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
1763	if (ret < 0)
1764		return ret;
1765
1766	ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1767	if (ret < 0)
1768		return ret;
1769
1770	*bhp = NULL;
1771	stats->bs_nblocks++;
1772	if (nreq != NULL) {
1773		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1774		ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1775		if (ret < 0)
1776			goto err_out_dreq;
1777
1778		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1779		if (ret < 0)
1780			goto err_out_nreq;
1781
1782		*bhp = bh;
1783		stats->bs_nblocks++;
1784	}
1785
1786	/* success */
1787	return 0;
1788
1789	/* error */
1790 err_out_nreq:
1791	nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1792 err_out_dreq:
1793	nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1794	stats->bs_nblocks = 0;
1795	return ret;
1796
1797}
1798
1799static void
1800nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1801				      __u64 key, __u64 ptr,
1802				      const __u64 *keys, const __u64 *ptrs,
1803				      int n,
1804				      union nilfs_bmap_ptr_req *dreq,
1805				      union nilfs_bmap_ptr_req *nreq,
1806				      struct buffer_head *bh)
1807{
1808	struct nilfs_btree_node *node;
1809	struct inode *dat;
1810	__u64 tmpptr;
1811	int ncblk;
1812
1813	/* free resources */
1814	if (btree->b_ops->bop_clear != NULL)
1815		btree->b_ops->bop_clear(btree);
1816
1817	/* ptr must be a pointer to a buffer head. */
1818	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1819
1820	/* convert and insert */
1821	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1822	__nilfs_btree_init(btree);
1823	if (nreq != NULL) {
1824		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1825		nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1826
1827		/* create child node at level 1 */
1828		node = (struct nilfs_btree_node *)bh->b_data;
1829		ncblk = nilfs_btree_nchildren_per_block(btree);
1830		nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1831		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1832		if (!buffer_dirty(bh))
1833			mark_buffer_dirty(bh);
1834		if (!nilfs_bmap_dirty(btree))
1835			nilfs_bmap_set_dirty(btree);
1836
1837		brelse(bh);
1838
1839		/* create root node at level 2 */
1840		node = nilfs_btree_get_root(btree);
1841		tmpptr = nreq->bpr_ptr;
1842		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1843				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1844				      &keys[0], &tmpptr);
1845	} else {
1846		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1847
1848		/* create root node at level 1 */
1849		node = nilfs_btree_get_root(btree);
1850		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1851				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1852				      keys, ptrs);
1853		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1854					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1855		if (!nilfs_bmap_dirty(btree))
1856			nilfs_bmap_set_dirty(btree);
1857	}
1858
1859	if (NILFS_BMAP_USE_VBN(btree))
1860		nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1861}
1862
1863/**
1864 * nilfs_btree_convert_and_insert - Convert and insert entries into a B-tree
1865 * @btree: NILFS B-tree structure
1866 * @key: Key of the new entry to be inserted
1867 * @ptr: Pointer (block number) associated with the key to be inserted
1868 * @keys: Array of keys to be inserted in addition to @key
1869 * @ptrs: Array of pointers associated with @keys
1870 * @n: Number of keys and pointers in @keys and @ptrs
1871 *
1872 * This function is used to insert a new entry specified by @key and @ptr,
1873 * along with additional entries specified by @keys and @ptrs arrays, into a
1874 * NILFS B-tree.
1875 * It prepares the necessary changes by allocating the required blocks and any
1876 * necessary intermediate nodes. It converts configurations from other forms of
1877 * block mapping (the one that currently exists is direct mapping) to a B-tree.
1878 *
1879 * Return: 0 on success or a negative error code on failure.
1880 */
1881int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1882				   __u64 key, __u64 ptr,
1883				   const __u64 *keys, const __u64 *ptrs, int n)
1884{
1885	struct buffer_head *bh = NULL;
1886	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1887	struct nilfs_bmap_stats stats;
1888	int ret;
1889
1890	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1891		di = &dreq;
1892		ni = NULL;
1893	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1894			   nilfs_btree_node_size(btree))) {
1895		di = &dreq;
1896		ni = &nreq;
1897	} else {
1898		di = NULL;
1899		ni = NULL;
1900		BUG();
1901	}
1902
1903	ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1904						     &stats);
1905	if (ret < 0)
1906		return ret;
1907	nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1908					      di, ni, bh);
1909	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1910	return 0;
1911}
1912
1913static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1914				   struct nilfs_btree_path *path,
1915				   int level,
1916				   struct buffer_head *bh)
1917{
1918	while ((++level < nilfs_btree_height(btree) - 1) &&
1919	       !buffer_dirty(path[level].bp_bh))
1920		mark_buffer_dirty(path[level].bp_bh);
1921
1922	return 0;
1923}
1924
1925static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1926					struct nilfs_btree_path *path,
1927					int level, struct inode *dat)
1928{
1929	struct nilfs_btree_node *parent;
1930	int ncmax, ret;
1931
1932	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1933	path[level].bp_oldreq.bpr_ptr =
1934		nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1935					 ncmax);
1936	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1937	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1938				       &path[level].bp_newreq.bpr_req);
1939	if (ret < 0)
1940		return ret;
1941
1942	if (buffer_nilfs_node(path[level].bp_bh)) {
1943		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1944		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1945		path[level].bp_ctxt.bh = path[level].bp_bh;
1946		ret = nilfs_btnode_prepare_change_key(
1947			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1948			&path[level].bp_ctxt);
1949		if (ret < 0) {
1950			nilfs_dat_abort_update(dat,
1951					       &path[level].bp_oldreq.bpr_req,
1952					       &path[level].bp_newreq.bpr_req);
1953			return ret;
1954		}
1955	}
1956
1957	return 0;
1958}
1959
1960static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1961					struct nilfs_btree_path *path,
1962					int level, struct inode *dat)
1963{
1964	struct nilfs_btree_node *parent;
1965	int ncmax;
1966
1967	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1968				&path[level].bp_newreq.bpr_req,
1969				btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1970
1971	if (buffer_nilfs_node(path[level].bp_bh)) {
1972		nilfs_btnode_commit_change_key(
1973			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1974			&path[level].bp_ctxt);
1975		path[level].bp_bh = path[level].bp_ctxt.bh;
1976	}
1977	set_buffer_nilfs_volatile(path[level].bp_bh);
1978
1979	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1980	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1981				 path[level].bp_newreq.bpr_ptr, ncmax);
1982}
1983
1984static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1985				       struct nilfs_btree_path *path,
1986				       int level, struct inode *dat)
1987{
1988	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1989			       &path[level].bp_newreq.bpr_req);
1990	if (buffer_nilfs_node(path[level].bp_bh))
1991		nilfs_btnode_abort_change_key(
1992			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1993			&path[level].bp_ctxt);
1994}
1995
1996static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1997					   struct nilfs_btree_path *path,
1998					   int minlevel, int *maxlevelp,
1999					   struct inode *dat)
2000{
2001	int level, ret;
2002
2003	level = minlevel;
2004	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
2005		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
2006		if (ret < 0)
2007			return ret;
2008	}
2009	while ((++level < nilfs_btree_height(btree) - 1) &&
2010	       !buffer_dirty(path[level].bp_bh)) {
2011
2012		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
2013		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
2014		if (ret < 0)
2015			goto out;
2016	}
2017
2018	/* success */
2019	*maxlevelp = level - 1;
2020	return 0;
2021
2022	/* error */
2023 out:
2024	while (--level > minlevel)
2025		nilfs_btree_abort_update_v(btree, path, level, dat);
2026	if (!buffer_nilfs_volatile(path[level].bp_bh))
2027		nilfs_btree_abort_update_v(btree, path, level, dat);
2028	return ret;
2029}
2030
2031static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2032					   struct nilfs_btree_path *path,
2033					   int minlevel, int maxlevel,
2034					   struct buffer_head *bh,
2035					   struct inode *dat)
2036{
2037	int level;
2038
2039	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2040		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2041
2042	for (level = minlevel + 1; level <= maxlevel; level++)
2043		nilfs_btree_commit_update_v(btree, path, level, dat);
2044}
2045
2046static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2047				   struct nilfs_btree_path *path,
2048				   int level, struct buffer_head *bh)
2049{
2050	int maxlevel = 0, ret;
2051	struct nilfs_btree_node *parent;
2052	struct inode *dat = nilfs_bmap_get_dat(btree);
2053	__u64 ptr;
2054	int ncmax;
2055
2056	get_bh(bh);
2057	path[level].bp_bh = bh;
2058	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2059					      dat);
2060	if (ret < 0)
2061		goto out;
2062
2063	if (buffer_nilfs_volatile(path[level].bp_bh)) {
2064		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2065		ptr = nilfs_btree_node_get_ptr(parent,
2066					       path[level + 1].bp_index,
2067					       ncmax);
2068		ret = nilfs_dat_mark_dirty(dat, ptr);
2069		if (ret < 0)
2070			goto out;
2071	}
2072
2073	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2074
2075 out:
2076	brelse(path[level].bp_bh);
2077	path[level].bp_bh = NULL;
2078	return ret;
2079}
2080
2081static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2082				 struct buffer_head *bh)
2083{
2084	struct nilfs_btree_path *path;
2085	struct nilfs_btree_node *node;
2086	__u64 key;
2087	int level, ret;
2088
2089	WARN_ON(!buffer_dirty(bh));
2090
2091	path = nilfs_btree_alloc_path();
2092	if (path == NULL)
2093		return -ENOMEM;
2094
2095	if (buffer_nilfs_node(bh)) {
2096		node = (struct nilfs_btree_node *)bh->b_data;
2097		key = nilfs_btree_node_get_key(node, 0);
2098		level = nilfs_btree_node_get_level(node);
2099	} else {
2100		key = nilfs_bmap_data_get_key(btree, bh);
2101		level = NILFS_BTREE_LEVEL_DATA;
2102	}
2103
2104	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2105	if (ret < 0) {
2106		if (unlikely(ret == -ENOENT))
2107			nilfs_crit(btree->b_inode->i_sb,
2108				   "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2109				   btree->b_inode->i_ino,
2110				   (unsigned long long)key, level);
2111		goto out;
2112	}
2113
2114	ret = NILFS_BMAP_USE_VBN(btree) ?
2115		nilfs_btree_propagate_v(btree, path, level, bh) :
2116		nilfs_btree_propagate_p(btree, path, level, bh);
2117
2118 out:
2119	nilfs_btree_free_path(path);
2120
2121	return ret;
2122}
2123
2124static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2125				    struct buffer_head *bh)
2126{
2127	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2128}
2129
2130static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2131					 struct list_head *lists,
2132					 struct buffer_head *bh)
2133{
2134	struct list_head *head;
2135	struct buffer_head *cbh;
2136	struct nilfs_btree_node *node, *cnode;
2137	__u64 key, ckey;
2138	int level;
2139
2140	get_bh(bh);
2141	node = (struct nilfs_btree_node *)bh->b_data;
2142	key = nilfs_btree_node_get_key(node, 0);
2143	level = nilfs_btree_node_get_level(node);
2144	if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2145	    level >= NILFS_BTREE_LEVEL_MAX) {
2146		dump_stack();
2147		nilfs_warn(btree->b_inode->i_sb,
2148			   "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2149			   level, (unsigned long long)key,
2150			   btree->b_inode->i_ino,
2151			   (unsigned long long)bh->b_blocknr);
2152		return;
2153	}
2154
2155	list_for_each(head, &lists[level]) {
2156		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2157		cnode = (struct nilfs_btree_node *)cbh->b_data;
2158		ckey = nilfs_btree_node_get_key(cnode, 0);
2159		if (key < ckey)
2160			break;
2161	}
2162	list_add_tail(&bh->b_assoc_buffers, head);
2163}
2164
2165static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2166					     struct list_head *listp)
2167{
2168	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
2169	struct address_space *btcache = btnc_inode->i_mapping;
2170	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2171	struct folio_batch fbatch;
2172	struct buffer_head *bh, *head;
2173	pgoff_t index = 0;
2174	int level, i;
2175
2176	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2177	     level < NILFS_BTREE_LEVEL_MAX;
2178	     level++)
2179		INIT_LIST_HEAD(&lists[level]);
2180
2181	folio_batch_init(&fbatch);
2182
2183	while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1,
2184				PAGECACHE_TAG_DIRTY, &fbatch)) {
2185		for (i = 0; i < folio_batch_count(&fbatch); i++) {
2186			bh = head = folio_buffers(fbatch.folios[i]);
2187			do {
2188				if (buffer_dirty(bh))
2189					nilfs_btree_add_dirty_buffer(btree,
2190								     lists, bh);
2191			} while ((bh = bh->b_this_page) != head);
2192		}
2193		folio_batch_release(&fbatch);
2194		cond_resched();
2195	}
2196
2197	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2198	     level < NILFS_BTREE_LEVEL_MAX;
2199	     level++)
2200		list_splice_tail(&lists[level], listp);
2201}
2202
2203static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2204				struct nilfs_btree_path *path,
2205				int level,
2206				struct buffer_head **bh,
2207				sector_t blocknr,
2208				union nilfs_binfo *binfo)
2209{
2210	struct nilfs_btree_node *parent;
2211	__u64 key;
2212	__u64 ptr;
2213	int ncmax, ret;
2214
2215	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2216	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2217				       ncmax);
2218	if (buffer_nilfs_node(*bh)) {
2219		path[level].bp_ctxt.oldkey = ptr;
2220		path[level].bp_ctxt.newkey = blocknr;
2221		path[level].bp_ctxt.bh = *bh;
2222		ret = nilfs_btnode_prepare_change_key(
2223			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2224			&path[level].bp_ctxt);
2225		if (ret < 0)
2226			return ret;
2227		nilfs_btnode_commit_change_key(
2228			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2229			&path[level].bp_ctxt);
2230		*bh = path[level].bp_ctxt.bh;
2231	}
2232
2233	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2234				 ncmax);
2235
2236	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2237	/* on-disk format */
2238	binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2239	binfo->bi_dat.bi_level = level;
2240	memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
2241
2242	return 0;
2243}
2244
2245static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2246				struct nilfs_btree_path *path,
2247				int level,
2248				struct buffer_head **bh,
2249				sector_t blocknr,
2250				union nilfs_binfo *binfo)
2251{
2252	struct nilfs_btree_node *parent;
2253	struct inode *dat = nilfs_bmap_get_dat(btree);
2254	__u64 key;
2255	__u64 ptr;
2256	union nilfs_bmap_ptr_req req;
2257	int ncmax, ret;
2258
2259	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2260	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2261				       ncmax);
2262	req.bpr_ptr = ptr;
2263	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2264	if (ret < 0)
2265		return ret;
2266	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2267
2268	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2269	/* on-disk format */
2270	binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2271	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2272
2273	return 0;
2274}
2275
2276static int nilfs_btree_assign(struct nilfs_bmap *btree,
2277			      struct buffer_head **bh,
2278			      sector_t blocknr,
2279			      union nilfs_binfo *binfo)
2280{
2281	struct nilfs_btree_path *path;
2282	struct nilfs_btree_node *node;
2283	__u64 key;
2284	int level, ret;
2285
2286	path = nilfs_btree_alloc_path();
2287	if (path == NULL)
2288		return -ENOMEM;
2289
2290	if (buffer_nilfs_node(*bh)) {
2291		node = (struct nilfs_btree_node *)(*bh)->b_data;
2292		key = nilfs_btree_node_get_key(node, 0);
2293		level = nilfs_btree_node_get_level(node);
2294	} else {
2295		key = nilfs_bmap_data_get_key(btree, *bh);
2296		level = NILFS_BTREE_LEVEL_DATA;
2297	}
2298
2299	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2300	if (ret < 0) {
2301		WARN_ON(ret == -ENOENT);
2302		goto out;
2303	}
2304
2305	ret = NILFS_BMAP_USE_VBN(btree) ?
2306		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2307		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2308
2309 out:
2310	nilfs_btree_free_path(path);
2311
2312	return ret;
2313}
2314
2315static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2316				 struct buffer_head **bh,
2317				 sector_t blocknr,
2318				 union nilfs_binfo *binfo)
2319{
2320	struct nilfs_btree_node *node;
2321	__u64 key;
2322	int ret;
2323
2324	ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2325			     blocknr);
2326	if (ret < 0)
2327		return ret;
2328
2329	if (buffer_nilfs_node(*bh)) {
2330		node = (struct nilfs_btree_node *)(*bh)->b_data;
2331		key = nilfs_btree_node_get_key(node, 0);
2332	} else
2333		key = nilfs_bmap_data_get_key(btree, *bh);
2334
2335	/* on-disk format */
2336	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2337	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2338
2339	return 0;
2340}
2341
2342static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2343{
2344	struct buffer_head *bh;
2345	struct nilfs_btree_path *path;
2346	__u64 ptr;
2347	int ret;
2348
2349	path = nilfs_btree_alloc_path();
2350	if (path == NULL)
2351		return -ENOMEM;
2352
2353	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2354	if (ret < 0) {
2355		WARN_ON(ret == -ENOENT);
2356		goto out;
2357	}
2358	ret = nilfs_btree_get_block(btree, ptr, &bh);
2359	if (ret < 0) {
2360		WARN_ON(ret == -ENOENT);
2361		goto out;
2362	}
2363
2364	if (!buffer_dirty(bh))
2365		mark_buffer_dirty(bh);
2366	brelse(bh);
2367	if (!nilfs_bmap_dirty(btree))
2368		nilfs_bmap_set_dirty(btree);
2369
2370 out:
2371	nilfs_btree_free_path(path);
2372	return ret;
2373}
2374
2375static const struct nilfs_bmap_operations nilfs_btree_ops = {
2376	.bop_lookup		=	nilfs_btree_lookup,
2377	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2378	.bop_insert		=	nilfs_btree_insert,
2379	.bop_delete		=	nilfs_btree_delete,
2380	.bop_clear		=	NULL,
2381
2382	.bop_propagate		=	nilfs_btree_propagate,
2383
2384	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2385
2386	.bop_assign		=	nilfs_btree_assign,
2387	.bop_mark		=	nilfs_btree_mark,
2388
2389	.bop_seek_key		=	nilfs_btree_seek_key,
2390	.bop_last_key		=	nilfs_btree_last_key,
2391
2392	.bop_check_insert	=	NULL,
2393	.bop_check_delete	=	nilfs_btree_check_delete,
2394	.bop_gather_data	=	nilfs_btree_gather_data,
2395};
2396
2397static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2398	.bop_lookup		=	NULL,
2399	.bop_lookup_contig	=	NULL,
2400	.bop_insert		=	NULL,
2401	.bop_delete		=	NULL,
2402	.bop_clear		=	NULL,
2403
2404	.bop_propagate		=	nilfs_btree_propagate_gc,
2405
2406	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2407
2408	.bop_assign		=	nilfs_btree_assign_gc,
2409	.bop_mark		=	NULL,
2410
2411	.bop_seek_key		=	NULL,
2412	.bop_last_key		=	NULL,
2413
2414	.bop_check_insert	=	NULL,
2415	.bop_check_delete	=	NULL,
2416	.bop_gather_data	=	NULL,
2417};
2418
2419static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2420{
2421	bmap->b_ops = &nilfs_btree_ops;
2422	bmap->b_nchildren_per_block =
2423		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2424}
2425
2426int nilfs_btree_init(struct nilfs_bmap *bmap)
2427{
2428	int ret = 0;
2429
2430	__nilfs_btree_init(bmap);
2431
2432	if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2433		ret = -EIO;
2434	else
2435		ret = nilfs_attach_btree_node_cache(
2436			&NILFS_BMAP_I(bmap)->vfs_inode);
2437
2438	return ret;
2439}
2440
2441void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2442{
2443	bmap->b_ops = &nilfs_btree_ops_gc;
2444	bmap->b_nchildren_per_block =
2445		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2446}