Linux Audio

Check our new training course

Linux BSP development engineering services

Need help to port Linux and bootloaders to your hardware?
Loading...
   1/*
   2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
   3 */
   4
   5/* Now we have all buffers that must be used in balancing of the tree 	*/
   6/* Further calculations can not cause schedule(), and thus the buffer 	*/
   7/* tree will be stable until the balancing will be finished 		*/
   8/* balance the tree according to the analysis made before,		*/
   9/* and using buffers obtained after all above.				*/
  10
  11/**
  12 ** balance_leaf_when_delete
  13 ** balance_leaf
  14 ** do_balance
  15 **
  16 **/
  17
  18#include <asm/uaccess.h>
  19#include <linux/time.h>
  20#include "reiserfs.h"
  21#include <linux/buffer_head.h>
  22#include <linux/kernel.h>
  23
  24static inline void buffer_info_init_left(struct tree_balance *tb,
  25                                         struct buffer_info *bi)
  26{
  27	bi->tb          = tb;
  28	bi->bi_bh       = tb->L[0];
  29	bi->bi_parent   = tb->FL[0];
  30	bi->bi_position = get_left_neighbor_position(tb, 0);
  31}
  32
  33static inline void buffer_info_init_right(struct tree_balance *tb,
  34                                          struct buffer_info *bi)
  35{
  36	bi->tb          = tb;
  37	bi->bi_bh       = tb->R[0];
  38	bi->bi_parent   = tb->FR[0];
  39	bi->bi_position = get_right_neighbor_position(tb, 0);
  40}
  41
  42static inline void buffer_info_init_tbS0(struct tree_balance *tb,
  43                                         struct buffer_info *bi)
  44{
  45	bi->tb          = tb;
  46	bi->bi_bh        = PATH_PLAST_BUFFER(tb->tb_path);
  47	bi->bi_parent   = PATH_H_PPARENT(tb->tb_path, 0);
  48	bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
  49}
  50
  51static inline void buffer_info_init_bh(struct tree_balance *tb,
  52                                       struct buffer_info *bi,
  53                                       struct buffer_head *bh)
  54{
  55	bi->tb          = tb;
  56	bi->bi_bh       = bh;
  57	bi->bi_parent   = NULL;
  58	bi->bi_position = 0;
  59}
  60
  61inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
  62				       struct buffer_head *bh, int flag)
  63{
  64	journal_mark_dirty(tb->transaction_handle,
  65			   tb->transaction_handle->t_super, bh);
  66}
  67
  68#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
  69#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
  70
  71/* summary:
  72 if deleting something ( tb->insert_size[0] < 0 )
  73   return(balance_leaf_when_delete()); (flag d handled here)
  74 else
  75   if lnum is larger than 0 we put items into the left node
  76   if rnum is larger than 0 we put items into the right node
  77   if snum1 is larger than 0 we put items into the new node s1
  78   if snum2 is larger than 0 we put items into the new node s2
  79Note that all *num* count new items being created.
  80
  81It would be easier to read balance_leaf() if each of these summary
  82lines was a separate procedure rather than being inlined.  I think
  83that there are many passages here and in balance_leaf_when_delete() in
  84which two calls to one procedure can replace two passages, and it
  85might save cache space and improve software maintenance costs to do so.
  86
  87Vladimir made the perceptive comment that we should offload most of
  88the decision making in this function into fix_nodes/check_balance, and
  89then create some sort of structure in tb that says what actions should
  90be performed by do_balance.
  91
  92-Hans */
  93
  94/* Balance leaf node in case of delete or cut: insert_size[0] < 0
  95 *
  96 * lnum, rnum can have values >= -1
  97 *	-1 means that the neighbor must be joined with S
  98 *	 0 means that nothing should be done with the neighbor
  99 *	>0 means to shift entirely or partly the specified number of items to the neighbor
 100 */
 101static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
 102{
 103	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
 104	int item_pos = PATH_LAST_POSITION(tb->tb_path);
 105	int pos_in_item = tb->tb_path->pos_in_item;
 106	struct buffer_info bi;
 107	int n;
 108	struct item_head *ih;
 109
 110	RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
 111	       "vs- 12000: level: wrong FR %z", tb->FR[0]);
 112	RFALSE(tb->blknum[0] > 1,
 113	       "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
 114	RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
 115	       "PAP-12010: tree can not be empty");
 116
 117	ih = B_N_PITEM_HEAD(tbS0, item_pos);
 118	buffer_info_init_tbS0(tb, &bi);
 119
 120	/* Delete or truncate the item */
 121
 122	switch (flag) {
 123	case M_DELETE:		/* delete item in S[0] */
 124
 125		RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
 126		       "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
 127		       -tb->insert_size[0], ih);
 128
 129		leaf_delete_items(&bi, 0, item_pos, 1, -1);
 130
 131		if (!item_pos && tb->CFL[0]) {
 132			if (B_NR_ITEMS(tbS0)) {
 133				replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
 134					    0);
 135			} else {
 136				if (!PATH_H_POSITION(tb->tb_path, 1))
 137					replace_key(tb, tb->CFL[0], tb->lkey[0],
 138						    PATH_H_PPARENT(tb->tb_path,
 139								   0), 0);
 140			}
 141		}
 142
 143		RFALSE(!item_pos && !tb->CFL[0],
 144		       "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
 145		       tb->L[0]);
 146
 147		break;
 148
 149	case M_CUT:{		/* cut item in S[0] */
 150			if (is_direntry_le_ih(ih)) {
 151
 152				/* UFS unlink semantics are such that you can only delete one directory entry at a time. */
 153				/* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
 154				tb->insert_size[0] = -1;
 155				leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
 156						     -tb->insert_size[0]);
 157
 158				RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
 159				       "PAP-12030: can not change delimiting key. CFL[0]=%p",
 160				       tb->CFL[0]);
 161
 162				if (!item_pos && !pos_in_item && tb->CFL[0]) {
 163					replace_key(tb, tb->CFL[0], tb->lkey[0],
 164						    tbS0, 0);
 165				}
 166			} else {
 167				leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
 168						     -tb->insert_size[0]);
 169
 170				RFALSE(!ih_item_len(ih),
 171				       "PAP-12035: cut must leave non-zero dynamic length of item");
 172			}
 173			break;
 174		}
 175
 176	default:
 177		print_cur_tb("12040");
 178		reiserfs_panic(tb->tb_sb, "PAP-12040",
 179			       "unexpected mode: %s(%d)",
 180			       (flag ==
 181				M_PASTE) ? "PASTE" : ((flag ==
 182						       M_INSERT) ? "INSERT" :
 183						      "UNKNOWN"), flag);
 184	}
 185
 186	/* the rule is that no shifting occurs unless by shifting a node can be freed */
 187	n = B_NR_ITEMS(tbS0);
 188	if (tb->lnum[0]) {	/* L[0] takes part in balancing */
 189		if (tb->lnum[0] == -1) {	/* L[0] must be joined with S[0] */
 190			if (tb->rnum[0] == -1) {	/* R[0] must be also joined with S[0] */
 191				if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
 192					/* all contents of all the 3 buffers will be in L[0] */
 193					if (PATH_H_POSITION(tb->tb_path, 1) == 0
 194					    && 1 < B_NR_ITEMS(tb->FR[0]))
 195						replace_key(tb, tb->CFL[0],
 196							    tb->lkey[0],
 197							    tb->FR[0], 1);
 198
 199					leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
 200							-1, NULL);
 201					leaf_move_items(LEAF_FROM_R_TO_L, tb,
 202							B_NR_ITEMS(tb->R[0]),
 203							-1, NULL);
 204
 205					reiserfs_invalidate_buffer(tb, tbS0);
 206					reiserfs_invalidate_buffer(tb,
 207								   tb->R[0]);
 208
 209					return 0;
 210				}
 211				/* all contents of all the 3 buffers will be in R[0] */
 212				leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
 213						NULL);
 214				leaf_move_items(LEAF_FROM_L_TO_R, tb,
 215						B_NR_ITEMS(tb->L[0]), -1, NULL);
 216
 217				/* right_delimiting_key is correct in R[0] */
 218				replace_key(tb, tb->CFR[0], tb->rkey[0],
 219					    tb->R[0], 0);
 220
 221				reiserfs_invalidate_buffer(tb, tbS0);
 222				reiserfs_invalidate_buffer(tb, tb->L[0]);
 223
 224				return -1;
 225			}
 226
 227			RFALSE(tb->rnum[0] != 0,
 228			       "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
 229			/* all contents of L[0] and S[0] will be in L[0] */
 230			leaf_shift_left(tb, n, -1);
 231
 232			reiserfs_invalidate_buffer(tb, tbS0);
 233
 234			return 0;
 235		}
 236		/* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
 237
 238		RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
 239		       (tb->lnum[0] + tb->rnum[0] > n + 1),
 240		       "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
 241		       tb->rnum[0], tb->lnum[0], n);
 242		RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
 243		       (tb->lbytes != -1 || tb->rbytes != -1),
 244		       "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
 245		       tb->rbytes, tb->lbytes);
 246		RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
 247		       (tb->lbytes < 1 || tb->rbytes != -1),
 248		       "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
 249		       tb->rbytes, tb->lbytes);
 250
 251		leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
 252		leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
 253
 254		reiserfs_invalidate_buffer(tb, tbS0);
 255
 256		return 0;
 257	}
 258
 259	if (tb->rnum[0] == -1) {
 260		/* all contents of R[0] and S[0] will be in R[0] */
 261		leaf_shift_right(tb, n, -1);
 262		reiserfs_invalidate_buffer(tb, tbS0);
 263		return 0;
 264	}
 265
 266	RFALSE(tb->rnum[0],
 267	       "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
 268	return 0;
 269}
 270
 271static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item header of inserted item (this is on little endian) */
 272			const char *body,	/* body  of inserted item or bytes to paste */
 273			int flag,	/* i - insert, d - delete, c - cut, p - paste
 274					   (see comment to do_balance) */
 275			struct item_head *insert_key,	/* in our processing of one level we sometimes determine what
 276							   must be inserted into the next higher level.  This insertion
 277							   consists of a key or two keys and their corresponding
 278							   pointers */
 279			struct buffer_head **insert_ptr	/* inserted node-ptrs for the next level */
 280    )
 281{
 282	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
 283	int item_pos = PATH_LAST_POSITION(tb->tb_path);	/*  index into the array of item headers in S[0]
 284							   of the affected item */
 285	struct buffer_info bi;
 286	struct buffer_head *S_new[2];	/* new nodes allocated to hold what could not fit into S */
 287	int snum[2];		/* number of items that will be placed
 288				   into S_new (includes partially shifted
 289				   items) */
 290	int sbytes[2];		/* if an item is partially shifted into S_new then
 291				   if it is a directory item
 292				   it is the number of entries from the item that are shifted into S_new
 293				   else
 294				   it is the number of bytes from the item that are shifted into S_new
 295				 */
 296	int n, i;
 297	int ret_val;
 298	int pos_in_item;
 299	int zeros_num;
 300
 301	PROC_INFO_INC(tb->tb_sb, balance_at[0]);
 302
 303	/* Make balance in case insert_size[0] < 0 */
 304	if (tb->insert_size[0] < 0)
 305		return balance_leaf_when_delete(tb, flag);
 306
 307	zeros_num = 0;
 308	if (flag == M_INSERT && !body)
 309		zeros_num = ih_item_len(ih);
 310
 311	pos_in_item = tb->tb_path->pos_in_item;
 312	/* for indirect item pos_in_item is measured in unformatted node
 313	   pointers. Recalculate to bytes */
 314	if (flag != M_INSERT
 315	    && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
 316		pos_in_item *= UNFM_P_SIZE;
 317
 318	if (tb->lnum[0] > 0) {
 319		/* Shift lnum[0] items from S[0] to the left neighbor L[0] */
 320		if (item_pos < tb->lnum[0]) {
 321			/* new item or it part falls to L[0], shift it too */
 322			n = B_NR_ITEMS(tb->L[0]);
 323
 324			switch (flag) {
 325			case M_INSERT:	/* insert item into L[0] */
 326
 327				if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
 328					/* part of new item falls into L[0] */
 329					int new_item_len;
 330					int version;
 331
 332					ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
 333
 334					/* Calculate item length to insert to S[0] */
 335					new_item_len = ih_item_len(ih) - tb->lbytes;
 336					/* Calculate and check item length to insert to L[0] */
 337					put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
 338
 339					RFALSE(ih_item_len(ih) <= 0,
 340					       "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
 341					       ih_item_len(ih));
 342
 343					/* Insert new item into L[0] */
 344					buffer_info_init_left(tb, &bi);
 345					leaf_insert_into_buf(&bi,
 346							n + item_pos - ret_val, ih, body,
 347							zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
 348
 349					version = ih_version(ih);
 350
 351					/* Calculate key component, item length and body to insert into S[0] */
 352					set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
 353							(tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
 354
 355					put_ih_item_len(ih, new_item_len);
 356					if (tb->lbytes > zeros_num) {
 357						body += (tb->lbytes - zeros_num);
 358						zeros_num = 0;
 359					} else
 360						zeros_num -= tb->lbytes;
 361
 362					RFALSE(ih_item_len(ih) <= 0,
 363					       "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
 364					       ih_item_len(ih));
 365				} else {
 366					/* new item in whole falls into L[0] */
 367					/* Shift lnum[0]-1 items to L[0] */
 368					ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
 369					/* Insert new item into L[0] */
 370					buffer_info_init_left(tb, &bi);
 371					leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num);
 372					tb->insert_size[0] = 0;
 373					zeros_num = 0;
 374				}
 375				break;
 376
 377			case M_PASTE:	/* append item in L[0] */
 378
 379				if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
 380					/* we must shift the part of the appended item */
 381					if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) {
 382
 383						RFALSE(zeros_num,
 384						       "PAP-12090: invalid parameter in case of a directory");
 385						/* directory item */
 386						if (tb->lbytes > pos_in_item) {
 387							/* new directory entry falls into L[0] */
 388							struct item_head *pasted;
 389							int l_pos_in_item = pos_in_item;
 390
 391							/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
 392							ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1);
 393							if (ret_val && !item_pos) {
 394								pasted = B_N_PITEM_HEAD(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
 395								l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes -1);
 396							}
 397
 398							/* Append given directory entry to directory item */
 399							buffer_info_init_left(tb, &bi);
 400							leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num);
 401
 402							/* previous string prepared space for pasting new entry, following string pastes this entry */
 403
 404							/* when we have merge directory item, pos_in_item has been changed too */
 405
 406							/* paste new directory entry. 1 is entry number */
 407							leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item,
 408									   1, (struct reiserfs_de_head *) body,
 409									   body + DEH_SIZE, tb->insert_size[0]);
 410							tb->insert_size[0] = 0;
 411						} else {
 412							/* new directory item doesn't fall into L[0] */
 413							/* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
 414							leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
 415						}
 416						/* Calculate new position to append in item body */
 417						pos_in_item -= tb->lbytes;
 418					} else {
 419						/* regular object */
 420						RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes);
 421						RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
 422						       "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
 423						       ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),pos_in_item);
 424
 425						if (tb->lbytes >= pos_in_item) {
 426							/* appended item will be in L[0] in whole */
 427							int l_n;
 428
 429							/* this bytes number must be appended to the last item of L[h] */
 430							l_n = tb->lbytes - pos_in_item;
 431
 432							/* Calculate new insert_size[0] */
 433							tb->insert_size[0] -= l_n;
 434
 435							RFALSE(tb->insert_size[0] <= 0,
 436							       "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
 437							       tb->insert_size[0]);
 438							ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len
 439									    (B_N_PITEM_HEAD(tbS0, item_pos)));
 440							/* Append to body of item in L[0] */
 441							buffer_info_init_left(tb, &bi);
 442							leaf_paste_in_buffer
 443							    (&bi, n + item_pos - ret_val, ih_item_len
 444							     (B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val)),
 445							     l_n, body,
 446							     zeros_num > l_n ? l_n : zeros_num);
 447							/* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
 448							{
 449								int version;
 450								int temp_l = l_n;
 451
 452								RFALSE(ih_item_len(B_N_PITEM_HEAD(tbS0, 0)),
 453								     "PAP-12106: item length must be 0");
 454								RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY
 455								      (tb->L[0], n + item_pos - ret_val)),
 456								     "PAP-12107: items must be of the same file");
 457								if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
 458									temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT);
 459								}
 460								/* update key of first item in S0 */
 461								version = ih_version(B_N_PITEM_HEAD(tbS0, 0));
 462								set_le_key_k_offset(version, B_N_PKEY(tbS0, 0),
 463								     le_key_k_offset(version,B_N_PKEY(tbS0, 0)) + temp_l);
 464								/* update left delimiting key */
 465								set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
 466								     le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0])) + temp_l);
 467							}
 468
 469							/* Calculate new body, position in item and insert_size[0] */
 470							if (l_n > zeros_num) {
 471								body += (l_n - zeros_num);
 472								zeros_num = 0;
 473							} else
 474								zeros_num -= l_n;
 475							pos_in_item = 0;
 476
 477							RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1))
 478							     || !op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)
 479							     || !op_is_left_mergeable(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), tbS0->b_size),
 480							     "PAP-12120: item must be merge-able with left neighboring item");
 481						} else {	/* only part of the appended item will be in L[0] */
 482
 483							/* Calculate position in item for append in S[0] */
 484							pos_in_item -= tb->lbytes;
 485
 486							RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
 487
 488							/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
 489							leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
 490						}
 491					}
 492				} else {	/* appended item will be in L[0] in whole */
 493
 494					struct item_head *pasted;
 495
 496					if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) {	/* if we paste into first item of S[0] and it is left mergable */
 497						/* then increment pos_in_item by the size of the last item in L[0] */
 498						pasted = B_N_PITEM_HEAD(tb->L[0], n - 1);
 499						if (is_direntry_le_ih(pasted))
 500							pos_in_item += ih_entry_count(pasted);
 501						else
 502							pos_in_item += ih_item_len(pasted);
 503					}
 504
 505					/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
 506					ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
 507					/* Append to body of item in L[0] */
 508					buffer_info_init_left(tb, &bi);
 509					leaf_paste_in_buffer(&bi, n + item_pos - ret_val,
 510							     pos_in_item,
 511							     tb->insert_size[0],
 512							     body, zeros_num);
 513
 514					/* if appended item is directory, paste entry */
 515					pasted = B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val);
 516					if (is_direntry_le_ih(pasted))
 517						leaf_paste_entries(&bi, n + item_pos - ret_val,
 518								   pos_in_item, 1,
 519								   (struct reiserfs_de_head *) body,
 520								   body + DEH_SIZE,
 521								   tb->insert_size[0]);
 522					/* if appended item is indirect item, put unformatted node into un list */
 523					if (is_indirect_le_ih(pasted))
 524						set_ih_free_space(pasted, 0);
 525					tb->insert_size[0] = 0;
 526					zeros_num = 0;
 527				}
 528				break;
 529			default:	/* cases d and t */
 530				reiserfs_panic(tb->tb_sb, "PAP-12130",
 531					       "lnum > 0: unexpected mode: "
 532					       " %s(%d)",
 533					       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
 534			}
 535		} else {
 536			/* new item doesn't fall into L[0] */
 537			leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
 538		}
 539	}
 540
 541	/* tb->lnum[0] > 0 */
 542	/* Calculate new item position */
 543	item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
 544
 545	if (tb->rnum[0] > 0) {
 546		/* shift rnum[0] items from S[0] to the right neighbor R[0] */
 547		n = B_NR_ITEMS(tbS0);
 548		switch (flag) {
 549
 550		case M_INSERT:	/* insert item */
 551			if (n - tb->rnum[0] < item_pos) {	/* new item or its part falls to R[0] */
 552				if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) {	/* part of new item falls into R[0] */
 553					loff_t old_key_comp, old_len, r_zeros_number;
 554					const char *r_body;
 555					int version;
 556					loff_t offset;
 557
 558					leaf_shift_right(tb, tb->rnum[0] - 1, -1);
 559
 560					version = ih_version(ih);
 561					/* Remember key component and item length */
 562					old_key_comp = le_ih_k_offset(ih);
 563					old_len = ih_item_len(ih);
 564
 565					/* Calculate key component and item length to insert into R[0] */
 566					offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0));
 567					set_le_ih_k_offset(ih, offset);
 568					put_ih_item_len(ih, tb->rbytes);
 569					/* Insert part of the item into R[0] */
 570					buffer_info_init_right(tb, &bi);
 571					if ((old_len - tb->rbytes) > zeros_num) {
 572						r_zeros_number = 0;
 573						r_body = body + (old_len - tb->rbytes) - zeros_num;
 574					} else {
 575						r_body = body;
 576						r_zeros_number = zeros_num - (old_len - tb->rbytes);
 577						zeros_num -= r_zeros_number;
 578					}
 579
 580					leaf_insert_into_buf(&bi, 0, ih, r_body,
 581							     r_zeros_number);
 582
 583					/* Replace right delimiting key by first key in R[0] */
 584					replace_key(tb, tb->CFR[0], tb->rkey[0],
 585						    tb->R[0], 0);
 586
 587					/* Calculate key component and item length to insert into S[0] */
 588					set_le_ih_k_offset(ih, old_key_comp);
 589					put_ih_item_len(ih, old_len - tb->rbytes);
 590
 591					tb->insert_size[0] -= tb->rbytes;
 592
 593				} else {	/* whole new item falls into R[0] */
 594
 595					/* Shift rnum[0]-1 items to R[0] */
 596					ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
 597					/* Insert new item into R[0] */
 598					buffer_info_init_right(tb, &bi);
 599					leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1,
 600							     ih, body, zeros_num);
 601
 602					if (item_pos - n + tb->rnum[0] - 1 == 0) {
 603						replace_key(tb, tb->CFR[0],
 604							    tb->rkey[0],
 605							    tb->R[0], 0);
 606
 607					}
 608					zeros_num = tb->insert_size[0] = 0;
 609				}
 610			} else {	/* new item or part of it doesn't fall into R[0] */
 611
 612				leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
 613			}
 614			break;
 615
 616		case M_PASTE:	/* append item */
 617
 618			if (n - tb->rnum[0] <= item_pos) {	/* pasted item or part of it falls to R[0] */
 619				if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) {	/* we must shift the part of the appended item */
 620					if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) {	/* we append to directory item */
 621						int entry_count;
 622
 623						RFALSE(zeros_num,
 624						       "PAP-12145: invalid parameter in case of a directory");
 625						entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD
 626								  (tbS0, item_pos));
 627						if (entry_count - tb->rbytes <
 628						    pos_in_item)
 629							/* new directory entry falls into R[0] */
 630						{
 631							int paste_entry_position;
 632
 633							RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0],
 634							       "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
 635							       tb->rbytes, entry_count);
 636							/* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
 637							leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
 638							/* Paste given directory entry to directory item */
 639							paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
 640							buffer_info_init_right(tb, &bi);
 641							leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num);
 642							/* paste entry */
 643							leaf_paste_entries(&bi, 0, paste_entry_position, 1,
 644									   (struct reiserfs_de_head *) body,
 645									   body + DEH_SIZE, tb->insert_size[0]);
 646
 647							if (paste_entry_position == 0) {
 648								/* change delimiting keys */
 649								replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0);
 650							}
 651
 652							tb->insert_size[0] = 0;
 653							pos_in_item++;
 654						} else {	/* new directory entry doesn't fall into R[0] */
 655
 656							leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
 657						}
 658					} else {	/* regular object */
 659
 660						int n_shift, n_rem, r_zeros_number;
 661						const char *r_body;
 662
 663						/* Calculate number of bytes which must be shifted from appended item */
 664						if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0)
 665							n_shift = 0;
 666
 667						RFALSE(pos_in_item != ih_item_len
 668						       (B_N_PITEM_HEAD(tbS0, item_pos)),
 669						       "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
 670						       pos_in_item, ih_item_len
 671						       (B_N_PITEM_HEAD(tbS0, item_pos)));
 672
 673						leaf_shift_right(tb, tb->rnum[0], n_shift);
 674						/* Calculate number of bytes which must remain in body after appending to R[0] */
 675						if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0)
 676							n_rem = 0;
 677
 678						{
 679							int version;
 680							unsigned long temp_rem = n_rem;
 681
 682							version = ih_version(B_N_PITEM_HEAD(tb->R[0], 0));
 683							if (is_indirect_le_key(version, B_N_PKEY(tb->R[0], 0))) {
 684								temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
 685							}
 686							set_le_key_k_offset(version, B_N_PKEY(tb->R[0], 0),
 687							     le_key_k_offset(version, B_N_PKEY(tb->R[0], 0)) + temp_rem);
 688							set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]),
 689							     le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])) + temp_rem);
 690						}
 691/*		  k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
 692		  k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
 693						do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
 694
 695						/* Append part of body into R[0] */
 696						buffer_info_init_right(tb, &bi);
 697						if (n_rem > zeros_num) {
 698							r_zeros_number = 0;
 699							r_body = body + n_rem - zeros_num;
 700						} else {
 701							r_body = body;
 702							r_zeros_number = zeros_num - n_rem;
 703							zeros_num -= r_zeros_number;
 704						}
 705
 706						leaf_paste_in_buffer(&bi, 0, n_shift,
 707								     tb->insert_size[0] - n_rem,
 708								     r_body, r_zeros_number);
 709
 710						if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->R[0], 0))) {
 711#if 0
 712							RFALSE(n_rem,
 713							       "PAP-12160: paste more than one unformatted node pointer");
 714#endif
 715							set_ih_free_space(B_N_PITEM_HEAD(tb->R[0], 0), 0);
 716						}
 717						tb->insert_size[0] = n_rem;
 718						if (!n_rem)
 719							pos_in_item++;
 720					}
 721				} else {	/* pasted item in whole falls into R[0] */
 722
 723					struct item_head *pasted;
 724
 725					ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
 726					/* append item in R[0] */
 727					if (pos_in_item >= 0) {
 728						buffer_info_init_right(tb, &bi);
 729						leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item,
 730								     tb->insert_size[0], body, zeros_num);
 731					}
 732
 733					/* paste new entry, if item is directory item */
 734					pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
 735					if (is_direntry_le_ih(pasted) && pos_in_item >= 0) {
 736						leaf_paste_entries(&bi, item_pos - n + tb->rnum[0],
 737								   pos_in_item, 1,
 738								   (struct reiserfs_de_head *) body,
 739								   body + DEH_SIZE, tb->insert_size[0]);
 740						if (!pos_in_item) {
 741
 742							RFALSE(item_pos - n + tb->rnum[0],
 743							       "PAP-12165: directory item must be first item of node when pasting is in 0th position");
 744
 745							/* update delimiting keys */
 746							replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
 747						}
 748					}
 749
 750					if (is_indirect_le_ih(pasted))
 751						set_ih_free_space(pasted, 0);
 752					zeros_num = tb->insert_size[0] = 0;
 753				}
 754			} else {	/* new item doesn't fall into R[0] */
 755
 756				leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
 757			}
 758			break;
 759		default:	/* cases d and t */
 760			reiserfs_panic(tb->tb_sb, "PAP-12175",
 761				       "rnum > 0: unexpected mode: %s(%d)",
 762				       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
 763		}
 764
 765	}
 766
 767	/* tb->rnum[0] > 0 */
 768	RFALSE(tb->blknum[0] > 3,
 769	       "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
 770	RFALSE(tb->blknum[0] < 0,
 771	       "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
 772
 773	/* if while adding to a node we discover that it is possible to split
 774	   it in two, and merge the left part into the left neighbor and the
 775	   right part into the right neighbor, eliminating the node */
 776	if (tb->blknum[0] == 0) {	/* node S[0] is empty now */
 777
 778		RFALSE(!tb->lnum[0] || !tb->rnum[0],
 779		       "PAP-12190: lnum and rnum must not be zero");
 780		/* if insertion was done before 0-th position in R[0], right
 781		   delimiting key of the tb->L[0]'s and left delimiting key are
 782		   not set correctly */
 783		if (tb->CFL[0]) {
 784			if (!tb->CFR[0])
 785				reiserfs_panic(tb->tb_sb, "vs-12195",
 786					       "CFR not initialized");
 787			copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
 788				 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
 789			do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
 790		}
 791
 792		reiserfs_invalidate_buffer(tb, tbS0);
 793		return 0;
 794	}
 795
 796	/* Fill new nodes that appear in place of S[0] */
 797
 798	/* I am told that this copying is because we need an array to enable
 799	   the looping code. -Hans */
 800	snum[0] = tb->s1num, snum[1] = tb->s2num;
 801	sbytes[0] = tb->s1bytes;
 802	sbytes[1] = tb->s2bytes;
 803	for (i = tb->blknum[0] - 2; i >= 0; i--) {
 804
 805		RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
 806		       snum[i]);
 807
 808		/* here we shift from S to S_new nodes */
 809
 810		S_new[i] = get_FEB(tb);
 811
 812		/* initialized block type and tree level */
 813		set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
 814
 815		n = B_NR_ITEMS(tbS0);
 816
 817		switch (flag) {
 818		case M_INSERT:	/* insert item */
 819
 820			if (n - snum[i] < item_pos) {	/* new item or it's part falls to first new node S_new[i] */
 821				if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) {	/* part of new item falls into S_new[i] */
 822					int old_key_comp, old_len, r_zeros_number;
 823					const char *r_body;
 824					int version;
 825
 826					/* Move snum[i]-1 items from S[0] to S_new[i] */
 827					leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
 828							snum[i] - 1, -1,
 829							S_new[i]);
 830					/* Remember key component and item length */
 831					version = ih_version(ih);
 832					old_key_comp = le_ih_k_offset(ih);
 833					old_len = ih_item_len(ih);
 834
 835					/* Calculate key component and item length to insert into S_new[i] */
 836					set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
 837							   ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
 838
 839					put_ih_item_len(ih, sbytes[i]);
 840
 841					/* Insert part of the item into S_new[i] before 0-th item */
 842					buffer_info_init_bh(tb, &bi, S_new[i]);
 843
 844					if ((old_len - sbytes[i]) > zeros_num) {
 845						r_zeros_number = 0;
 846						r_body = body + (old_len - sbytes[i]) - zeros_num;
 847					} else {
 848						r_body = body;
 849						r_zeros_number = zeros_num - (old_len - sbytes[i]);
 850						zeros_num -= r_zeros_number;
 851					}
 852
 853					leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number);
 854
 855					/* Calculate key component and item length to insert into S[i] */
 856					set_le_ih_k_offset(ih, old_key_comp);
 857					put_ih_item_len(ih, old_len - sbytes[i]);
 858					tb->insert_size[0] -= sbytes[i];
 859				} else {	/* whole new item falls into S_new[i] */
 860
 861					/* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
 862					leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
 863							snum[i] - 1, sbytes[i], S_new[i]);
 864
 865					/* Insert new item into S_new[i] */
 866					buffer_info_init_bh(tb, &bi, S_new[i]);
 867					leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1,
 868							     ih, body, zeros_num);
 869
 870					zeros_num = tb->insert_size[0] = 0;
 871				}
 872			}
 873
 874			else {	/* new item or it part don't falls into S_new[i] */
 875
 876				leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
 877						snum[i], sbytes[i], S_new[i]);
 878			}
 879			break;
 880
 881		case M_PASTE:	/* append item */
 882
 883			if (n - snum[i] <= item_pos) {	/* pasted item or part if it falls to S_new[i] */
 884				if (item_pos == n - snum[i] && sbytes[i] != -1) {	/* we must shift part of the appended item */
 885					struct item_head *aux_ih;
 886
 887					RFALSE(ih, "PAP-12210: ih must be 0");
 888
 889					aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
 890					if (is_direntry_le_ih(aux_ih)) {
 891						/* we append to directory item */
 892
 893						int entry_count;
 894
 895						entry_count = ih_entry_count(aux_ih);
 896
 897						if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) {
 898							/* new directory entry falls into S_new[i] */
 899
 900							RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0");
 901							RFALSE(sbytes[i] - 1 >= entry_count,
 902							       "PAP-12220: there are no so much entries (%d), only %d",
 903							       sbytes[i] - 1, entry_count);
 904
 905							/* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
 906							leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]);
 907							/* Paste given directory entry to directory item */
 908							buffer_info_init_bh(tb, &bi, S_new[i]);
 909							leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
 910							     tb->insert_size[0], body, zeros_num);
 911							/* paste new directory entry */
 912							leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1,
 913									   (struct reiserfs_de_head *) body,
 914									   body + DEH_SIZE, tb->insert_size[0]);
 915							tb->insert_size[0] = 0;
 916							pos_in_item++;
 917						} else {	/* new directory entry doesn't fall into S_new[i] */
 918							leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]);
 919						}
 920					} else {	/* regular object */
 921
 922						int n_shift, n_rem, r_zeros_number;
 923						const char *r_body;
 924
 925						RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)) || tb->insert_size[0] <= 0,
 926						       "PAP-12225: item too short or insert_size <= 0");
 927
 928						/* Calculate number of bytes which must be shifted from appended item */
 929						n_shift = sbytes[i] - tb->insert_size[0];
 930						if (n_shift < 0)
 931							n_shift = 0;
 932						leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
 933
 934						/* Calculate number of bytes which must remain in body after append to S_new[i] */
 935						n_rem = tb->insert_size[0] - sbytes[i];
 936						if (n_rem < 0)
 937							n_rem = 0;
 938						/* Append part of body into S_new[0] */
 939						buffer_info_init_bh(tb, &bi, S_new[i]);
 940						if (n_rem > zeros_num) {
 941							r_zeros_number = 0;
 942							r_body = body + n_rem - zeros_num;
 943						} else {
 944							r_body = body;
 945							r_zeros_number = zeros_num - n_rem;
 946							zeros_num -= r_zeros_number;
 947						}
 948
 949						leaf_paste_in_buffer(&bi, 0, n_shift,
 950								     tb->insert_size[0] - n_rem,
 951								     r_body, r_zeros_number);
 952						{
 953							struct item_head *tmp;
 954
 955							tmp = B_N_PITEM_HEAD(S_new[i], 0);
 956							if (is_indirect_le_ih
 957							    (tmp)) {
 958								set_ih_free_space(tmp, 0);
 959								set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
 960							} else {
 961								set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem);
 962							}
 963						}
 964
 965						tb->insert_size[0] = n_rem;
 966						if (!n_rem)
 967							pos_in_item++;
 968					}
 969				} else
 970					/* item falls wholly into S_new[i] */
 971				{
 972					int leaf_mi;
 973					struct item_head *pasted;
 974
 975#ifdef CONFIG_REISERFS_CHECK
 976					struct item_head *ih_check = B_N_PITEM_HEAD(tbS0, item_pos);
 977
 978					if (!is_direntry_le_ih(ih_check)
 979					    && (pos_in_item != ih_item_len(ih_check)
 980						|| tb->insert_size[0] <= 0))
 981						reiserfs_panic(tb->tb_sb,
 982							     "PAP-12235",
 983							     "pos_in_item "
 984							     "must be equal "
 985							     "to ih_item_len");
 986#endif				/* CONFIG_REISERFS_CHECK */
 987
 988					leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW,
 989							    tb, snum[i],
 990							    sbytes[i],
 991							    S_new[i]);
 992
 993					RFALSE(leaf_mi,
 994					       "PAP-12240: unexpected value returned by leaf_move_items (%d)",
 995					       leaf_mi);
 996
 997					/* paste into item */
 998					buffer_info_init_bh(tb, &bi, S_new[i]);
 999					leaf_paste_in_buffer(&bi,
1000							     item_pos - n + snum[i],
1001							     pos_in_item,
1002							     tb->insert_size[0],
1003							     body, zeros_num);
1004
1005					pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
1006					if (is_direntry_le_ih(pasted)) {
1007						leaf_paste_entries(&bi,
1008								   item_pos - n + snum[i],
1009								   pos_in_item, 1,
1010								   (struct reiserfs_de_head *)body,
1011								   body + DEH_SIZE,
1012								   tb->insert_size[0]
1013						    );
1014					}
1015
1016					/* if we paste to indirect item update ih_free_space */
1017					if (is_indirect_le_ih(pasted))
1018						set_ih_free_space(pasted, 0);
1019					zeros_num = tb->insert_size[0] = 0;
1020				}
1021			}
1022
1023			else {	/* pasted item doesn't fall into S_new[i] */
1024
1025				leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1026						snum[i], sbytes[i], S_new[i]);
1027			}
1028			break;
1029		default:	/* cases d and t */
1030			reiserfs_panic(tb->tb_sb, "PAP-12245",
1031				       "blknum > 2: unexpected mode: %s(%d)",
1032				       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1033		}
1034
1035		memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
1036		insert_ptr[i] = S_new[i];
1037
1038		RFALSE(!buffer_journaled(S_new[i])
1039		       || buffer_journal_dirty(S_new[i])
1040		       || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1041		       i, S_new[i]);
1042	}
1043
1044	/* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1045	   affected item which remains in S */
1046	if (0 <= item_pos && item_pos < tb->s0num) {	/* if we must insert or append into buffer S[0] */
1047
1048		switch (flag) {
1049		case M_INSERT:	/* insert item into S[0] */
1050			buffer_info_init_tbS0(tb, &bi);
1051			leaf_insert_into_buf(&bi, item_pos, ih, body,
1052					     zeros_num);
1053
1054			/* If we insert the first key change the delimiting key */
1055			if (item_pos == 0) {
1056				if (tb->CFL[0])	/* can be 0 in reiserfsck */
1057					replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1058			}
1059			break;
1060
1061		case M_PASTE:{	/* append item in S[0] */
1062				struct item_head *pasted;
1063
1064				pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1065				/* when directory, may be new entry already pasted */
1066				if (is_direntry_le_ih(pasted)) {
1067					if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) {
1068
1069						RFALSE(!tb->insert_size[0],
1070						       "PAP-12260: insert_size is 0 already");
1071
1072						/* prepare space */
1073						buffer_info_init_tbS0(tb, &bi);
1074						leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1075								     tb->insert_size[0], body,
1076								     zeros_num);
1077
1078						/* paste entry */
1079						leaf_paste_entries(&bi, item_pos, pos_in_item, 1,
1080								   (struct reiserfs_de_head *)body,
1081								   body + DEH_SIZE,
1082								   tb->insert_size[0]);
1083						if (!item_pos && !pos_in_item) {
1084							RFALSE(!tb->CFL[0] || !tb->L[0],
1085							       "PAP-12270: CFL[0]/L[0] must be specified");
1086							if (tb->CFL[0])
1087								replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1088						}
1089						tb->insert_size[0] = 0;
1090					}
1091				} else {	/* regular object */
1092					if (pos_in_item == ih_item_len(pasted)) {
1093
1094						RFALSE(tb->insert_size[0] <= 0,
1095						       "PAP-12275: insert size must not be %d",
1096						       tb->insert_size[0]);
1097						buffer_info_init_tbS0(tb, &bi);
1098						leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1099								     tb->insert_size[0], body, zeros_num);
1100
1101						if (is_indirect_le_ih(pasted)) {
1102#if 0
1103							RFALSE(tb->
1104							       insert_size[0] !=
1105							       UNFM_P_SIZE,
1106							       "PAP-12280: insert_size for indirect item must be %d, not %d",
1107							       UNFM_P_SIZE,
1108							       tb->
1109							       insert_size[0]);
1110#endif
1111							set_ih_free_space(pasted, 0);
1112						}
1113						tb->insert_size[0] = 0;
1114					}
1115#ifdef CONFIG_REISERFS_CHECK
1116					else {
1117						if (tb->insert_size[0]) {
1118							print_cur_tb("12285");
1119							reiserfs_panic(tb->tb_sb,
1120							    "PAP-12285",
1121							    "insert_size "
1122							    "must be 0 "
1123							    "(%d)",
1124							    tb->insert_size[0]);
1125						}
1126					}
1127#endif				/* CONFIG_REISERFS_CHECK */
1128
1129				}
1130			}	/* case M_PASTE: */
1131		}
1132	}
1133#ifdef CONFIG_REISERFS_CHECK
1134	if (flag == M_PASTE && tb->insert_size[0]) {
1135		print_cur_tb("12290");
1136		reiserfs_panic(tb->tb_sb,
1137			       "PAP-12290", "insert_size is still not 0 (%d)",
1138			       tb->insert_size[0]);
1139	}
1140#endif				/* CONFIG_REISERFS_CHECK */
1141	return 0;
1142}				/* Leaf level of the tree is balanced (end of balance_leaf) */
1143
1144/* Make empty node */
1145void make_empty_node(struct buffer_info *bi)
1146{
1147	struct block_head *blkh;
1148
1149	RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1150
1151	blkh = B_BLK_HEAD(bi->bi_bh);
1152	set_blkh_nr_item(blkh, 0);
1153	set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1154
1155	if (bi->bi_parent)
1156		B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0;	/* Endian safe if 0 */
1157}
1158
1159/* Get first empty buffer */
1160struct buffer_head *get_FEB(struct tree_balance *tb)
1161{
1162	int i;
1163	struct buffer_info bi;
1164
1165	for (i = 0; i < MAX_FEB_SIZE; i++)
1166		if (tb->FEB[i] != NULL)
1167			break;
1168
1169	if (i == MAX_FEB_SIZE)
1170		reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
1171
1172	buffer_info_init_bh(tb, &bi, tb->FEB[i]);
1173	make_empty_node(&bi);
1174	set_buffer_uptodate(tb->FEB[i]);
1175	tb->used[i] = tb->FEB[i];
1176	tb->FEB[i] = NULL;
1177
1178	return tb->used[i];
1179}
1180
1181/* This is now used because reiserfs_free_block has to be able to
1182** schedule.
1183*/
1184static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1185{
1186	int i;
1187
1188	if (buffer_dirty(bh))
1189		reiserfs_warning(tb->tb_sb, "reiserfs-12320",
1190				 "called with dirty buffer");
1191	for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
1192		if (!tb->thrown[i]) {
1193			tb->thrown[i] = bh;
1194			get_bh(bh);	/* free_thrown puts this */
1195			return;
1196		}
1197	reiserfs_warning(tb->tb_sb, "reiserfs-12321",
1198			 "too many thrown buffers");
1199}
1200
1201static void free_thrown(struct tree_balance *tb)
1202{
1203	int i;
1204	b_blocknr_t blocknr;
1205	for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
1206		if (tb->thrown[i]) {
1207			blocknr = tb->thrown[i]->b_blocknr;
1208			if (buffer_dirty(tb->thrown[i]))
1209				reiserfs_warning(tb->tb_sb, "reiserfs-12322",
1210						 "called with dirty buffer %d",
1211						 blocknr);
1212			brelse(tb->thrown[i]);	/* incremented in store_thrown */
1213			reiserfs_free_block(tb->transaction_handle, NULL,
1214					    blocknr, 0);
1215		}
1216	}
1217}
1218
1219void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1220{
1221	struct block_head *blkh;
1222	blkh = B_BLK_HEAD(bh);
1223	set_blkh_level(blkh, FREE_LEVEL);
1224	set_blkh_nr_item(blkh, 0);
1225
1226	clear_buffer_dirty(bh);
1227	store_thrown(tb, bh);
1228}
1229
1230/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1231void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1232		 struct buffer_head *src, int n_src)
1233{
1234
1235	RFALSE(dest == NULL || src == NULL,
1236	       "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1237	       src, dest);
1238	RFALSE(!B_IS_KEYS_LEVEL(dest),
1239	       "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1240	       dest);
1241	RFALSE(n_dest < 0 || n_src < 0,
1242	       "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1243	RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1244	       "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1245	       n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1246
1247	if (B_IS_ITEMS_LEVEL(src))
1248		/* source buffer contains leaf node */
1249		memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
1250		       KEY_SIZE);
1251	else
1252		memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
1253		       KEY_SIZE);
1254
1255	do_balance_mark_internal_dirty(tb, dest, 0);
1256}
1257
1258int get_left_neighbor_position(struct tree_balance *tb, int h)
1259{
1260	int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1261
1262	RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
1263	       "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1264	       h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1265
1266	if (Sh_position == 0)
1267		return B_NR_ITEMS(tb->FL[h]);
1268	else
1269		return Sh_position - 1;
1270}
1271
1272int get_right_neighbor_position(struct tree_balance *tb, int h)
1273{
1274	int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1275
1276	RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
1277	       "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1278	       h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1279
1280	if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1281		return 0;
1282	else
1283		return Sh_position + 1;
1284}
1285
1286#ifdef CONFIG_REISERFS_CHECK
1287
1288int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1289static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1290				char *mes)
1291{
1292	struct disk_child *dc;
1293	int i;
1294
1295	RFALSE(!bh, "PAP-12336: bh == 0");
1296
1297	if (!bh || !B_IS_IN_TREE(bh))
1298		return;
1299
1300	RFALSE(!buffer_dirty(bh) &&
1301	       !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1302	       "PAP-12337: buffer (%b) must be dirty", bh);
1303	dc = B_N_CHILD(bh, 0);
1304
1305	for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1306		if (!is_reusable(s, dc_block_number(dc), 1)) {
1307			print_cur_tb(mes);
1308			reiserfs_panic(s, "PAP-12338",
1309				       "invalid child pointer %y in %b",
1310				       dc, bh);
1311		}
1312	}
1313}
1314
1315static int locked_or_not_in_tree(struct tree_balance *tb,
1316				  struct buffer_head *bh, char *which)
1317{
1318	if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1319	    !B_IS_IN_TREE(bh)) {
1320		reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
1321		return 1;
1322	}
1323	return 0;
1324}
1325
1326static int check_before_balancing(struct tree_balance *tb)
1327{
1328	int retval = 0;
1329
1330	if (REISERFS_SB(tb->tb_sb)->cur_tb) {
1331		reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1332			       "occurred based on cur_tb not being null at "
1333			       "this point in code. do_balance cannot properly "
1334			       "handle concurrent tree accesses on a same "
1335			       "mount point.");
1336	}
1337
1338	/* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1339	   prepped all of these for us). */
1340	if (tb->lnum[0]) {
1341		retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1342		retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
1343		retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
1344		check_leaf(tb->L[0]);
1345	}
1346	if (tb->rnum[0]) {
1347		retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
1348		retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
1349		retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
1350		check_leaf(tb->R[0]);
1351	}
1352	retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
1353					"S[0]");
1354	check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1355
1356	return retval;
1357}
1358
1359static void check_after_balance_leaf(struct tree_balance *tb)
1360{
1361	if (tb->lnum[0]) {
1362		if (B_FREE_SPACE(tb->L[0]) !=
1363		    MAX_CHILD_SIZE(tb->L[0]) -
1364		    dc_size(B_N_CHILD
1365			    (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1366			print_cur_tb("12221");
1367			reiserfs_panic(tb->tb_sb, "PAP-12355",
1368				       "shift to left was incorrect");
1369		}
1370	}
1371	if (tb->rnum[0]) {
1372		if (B_FREE_SPACE(tb->R[0]) !=
1373		    MAX_CHILD_SIZE(tb->R[0]) -
1374		    dc_size(B_N_CHILD
1375			    (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1376			print_cur_tb("12222");
1377			reiserfs_panic(tb->tb_sb, "PAP-12360",
1378				       "shift to right was incorrect");
1379		}
1380	}
1381	if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1382	    (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1383	     (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1384	      dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1385				PATH_H_POSITION(tb->tb_path, 1)))))) {
1386		int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1387		int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1388			     dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1389					       PATH_H_POSITION(tb->tb_path,
1390							       1))));
1391		print_cur_tb("12223");
1392		reiserfs_warning(tb->tb_sb, "reiserfs-12363",
1393				 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1394				 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1395				 left,
1396				 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1397				 PATH_H_PBUFFER(tb->tb_path, 1),
1398				 PATH_H_POSITION(tb->tb_path, 1),
1399				 dc_size(B_N_CHILD
1400					 (PATH_H_PBUFFER(tb->tb_path, 1),
1401					  PATH_H_POSITION(tb->tb_path, 1))),
1402				 right);
1403		reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
1404	}
1405}
1406
1407static void check_leaf_level(struct tree_balance *tb)
1408{
1409	check_leaf(tb->L[0]);
1410	check_leaf(tb->R[0]);
1411	check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1412}
1413
1414static void check_internal_levels(struct tree_balance *tb)
1415{
1416	int h;
1417
1418	/* check all internal nodes */
1419	for (h = 1; tb->insert_size[h]; h++) {
1420		check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1421				    "BAD BUFFER ON PATH");
1422		if (tb->lnum[h])
1423			check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1424		if (tb->rnum[h])
1425			check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1426	}
1427
1428}
1429
1430#endif
1431
1432/* Now we have all of the buffers that must be used in balancing of
1433   the tree.  We rely on the assumption that schedule() will not occur
1434   while do_balance works. ( Only interrupt handlers are acceptable.)
1435   We balance the tree according to the analysis made before this,
1436   using buffers already obtained.  For SMP support it will someday be
1437   necessary to add ordered locking of tb. */
1438
1439/* Some interesting rules of balancing:
1440
1441   we delete a maximum of two nodes per level per balancing: we never
1442   delete R, when we delete two of three nodes L, S, R then we move
1443   them into R.
1444
1445   we only delete L if we are deleting two nodes, if we delete only
1446   one node we delete S
1447
1448   if we shift leaves then we shift as much as we can: this is a
1449   deliberate policy of extremism in node packing which results in
1450   higher average utilization after repeated random balance operations
1451   at the cost of more memory copies and more balancing as a result of
1452   small insertions to full nodes.
1453
1454   if we shift internal nodes we try to evenly balance the node
1455   utilization, with consequent less balancing at the cost of lower
1456   utilization.
1457
1458   one could argue that the policy for directories in leaves should be
1459   that of internal nodes, but we will wait until another day to
1460   evaluate this....  It would be nice to someday measure and prove
1461   these assumptions as to what is optimal....
1462
1463*/
1464
1465static inline void do_balance_starts(struct tree_balance *tb)
1466{
1467	/* use print_cur_tb() to see initial state of struct
1468	   tree_balance */
1469
1470	/* store_print_tb (tb); */
1471
1472	/* do not delete, just comment it out */
1473/*    print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1474	     "check");*/
1475	RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1476#ifdef CONFIG_REISERFS_CHECK
1477	REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1478#endif
1479}
1480
1481static inline void do_balance_completed(struct tree_balance *tb)
1482{
1483
1484#ifdef CONFIG_REISERFS_CHECK
1485	check_leaf_level(tb);
1486	check_internal_levels(tb);
1487	REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1488#endif
1489
1490	/* reiserfs_free_block is no longer schedule safe.  So, we need to
1491	 ** put the buffers we want freed on the thrown list during do_balance,
1492	 ** and then free them now
1493	 */
1494
1495	REISERFS_SB(tb->tb_sb)->s_do_balance++;
1496
1497	/* release all nodes hold to perform the balancing */
1498	unfix_nodes(tb);
1499
1500	free_thrown(tb);
1501}
1502
1503void do_balance(struct tree_balance *tb,	/* tree_balance structure */
1504		struct item_head *ih,	/* item header of inserted item */
1505		const char *body,	/* body  of inserted item or bytes to paste */
1506		int flag)
1507{				/* i - insert, d - delete
1508				   c - cut, p - paste
1509
1510				   Cut means delete part of an item
1511				   (includes removing an entry from a
1512				   directory).
1513
1514				   Delete means delete whole item.
1515
1516				   Insert means add a new item into the
1517				   tree.
1518
1519				   Paste means to append to the end of an
1520				   existing file or to insert a directory
1521				   entry.  */
1522	int child_pos,		/* position of a child node in its parent */
1523	 h;			/* level of the tree being processed */
1524	struct item_head insert_key[2];	/* in our processing of one level
1525					   we sometimes determine what
1526					   must be inserted into the next
1527					   higher level.  This insertion
1528					   consists of a key or two keys
1529					   and their corresponding
1530					   pointers */
1531	struct buffer_head *insert_ptr[2];	/* inserted node-ptrs for the next
1532						   level */
1533
1534	tb->tb_mode = flag;
1535	tb->need_balance_dirty = 0;
1536
1537	if (FILESYSTEM_CHANGED_TB(tb)) {
1538		reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
1539			       "changed");
1540	}
1541	/* if we have no real work to do  */
1542	if (!tb->insert_size[0]) {
1543		reiserfs_warning(tb->tb_sb, "PAP-12350",
1544				 "insert_size == 0, mode == %c", flag);
1545		unfix_nodes(tb);
1546		return;
1547	}
1548
1549	atomic_inc(&(fs_generation(tb->tb_sb)));
1550	do_balance_starts(tb);
1551
1552	/* balance leaf returns 0 except if combining L R and S into
1553	   one node.  see balance_internal() for explanation of this
1554	   line of code. */
1555	child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1556	    balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1557
1558#ifdef CONFIG_REISERFS_CHECK
1559	check_after_balance_leaf(tb);
1560#endif
1561
1562	/* Balance internal level of the tree. */
1563	for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1564		child_pos =
1565		    balance_internal(tb, h, child_pos, insert_key, insert_ptr);
1566
1567	do_balance_completed(tb);
1568
1569}