Linux Audio

Check our new training course

Loading...
v4.6
   1/*
   2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
   3 */
   4/* Reiserfs block (de)allocator, bitmap-based. */
   5
   6#include <linux/time.h>
   7#include "reiserfs.h"
   8#include <linux/errno.h>
   9#include <linux/buffer_head.h>
  10#include <linux/kernel.h>
  11#include <linux/pagemap.h>
  12#include <linux/vmalloc.h>
  13#include <linux/quotaops.h>
  14#include <linux/seq_file.h>
  15
  16#define PREALLOCATION_SIZE 9
  17
  18/* different reiserfs block allocator options */
  19
  20#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
  21
  22#define  _ALLOC_concentrating_formatted_nodes 0
  23#define  _ALLOC_displacing_large_files 1
  24#define  _ALLOC_displacing_new_packing_localities 2
  25#define  _ALLOC_old_hashed_relocation 3
  26#define  _ALLOC_new_hashed_relocation 4
  27#define  _ALLOC_skip_busy 5
  28#define  _ALLOC_displace_based_on_dirid 6
  29#define  _ALLOC_hashed_formatted_nodes 7
  30#define  _ALLOC_old_way 8
  31#define  _ALLOC_hundredth_slices 9
  32#define  _ALLOC_dirid_groups 10
  33#define  _ALLOC_oid_groups 11
  34#define  _ALLOC_packing_groups 12
  35
  36#define  concentrating_formatted_nodes(s)	test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
  37#define  displacing_large_files(s)		test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
  38#define  displacing_new_packing_localities(s)	test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
  39
  40#define SET_OPTION(optname) \
  41   do { \
  42	reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \
  43	set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
  44    } while(0)
  45#define TEST_OPTION(optname, s) \
  46    test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
  47
  48static inline void get_bit_address(struct super_block *s,
  49				   b_blocknr_t block,
  50				   unsigned int *bmap_nr,
  51				   unsigned int *offset)
  52{
  53	/*
  54	 * It is in the bitmap block number equal to the block
  55	 * number divided by the number of bits in a block.
  56	 */
  57	*bmap_nr = block >> (s->s_blocksize_bits + 3);
  58	/* Within that bitmap block it is located at bit offset *offset. */
  59	*offset = block & ((s->s_blocksize << 3) - 1);
  60}
  61
  62int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
  63{
  64	unsigned int bmap, offset;
  65	unsigned int bmap_count = reiserfs_bmap_count(s);
  66
  67	if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
  68		reiserfs_error(s, "vs-4010",
  69			       "block number is out of range %lu (%u)",
  70			       block, SB_BLOCK_COUNT(s));
  71		return 0;
  72	}
  73
  74	get_bit_address(s, block, &bmap, &offset);
  75
  76	/*
  77	 * Old format filesystem? Unlikely, but the bitmaps are all
  78	 * up front so we need to account for it.
  79	 */
  80	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
  81			      &REISERFS_SB(s)->s_properties))) {
  82		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
  83		if (block >= bmap1 &&
  84		    block <= bmap1 + bmap_count) {
  85			reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) "
  86				       "can't be freed or reused",
  87				       block, bmap_count);
  88			return 0;
  89		}
  90	} else {
  91		if (offset == 0) {
  92			reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) "
  93				       "can't be freed or reused",
  94				       block, bmap_count);
  95			return 0;
  96		}
  97	}
  98
  99	if (bmap >= bmap_count) {
 100		reiserfs_error(s, "vs-4030", "bitmap for requested block "
 101			       "is out of range: block=%lu, bitmap_nr=%u",
 102			       block, bmap);
 103		return 0;
 104	}
 105
 106	if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
 107		reiserfs_error(s, "vs-4050", "this is root block (%u), "
 108			       "it must be busy", SB_ROOT_BLOCK(s));
 109		return 0;
 110	}
 111
 112	return 1;
 113}
 114
 115/*
 116 * Searches in journal structures for a given block number (bmap, off).
 117 * If block is found in reiserfs journal it suggests next free block
 118 * candidate to test.
 119 */
 120static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
 121				      int off, int *next)
 122{
 123	b_blocknr_t tmp;
 124
 125	if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
 126		if (tmp) {	/* hint supplied */
 127			*next = tmp;
 128			PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
 129		} else {
 130			(*next) = off + 1;  /* inc offset to avoid looping. */
 131			PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
 132		}
 133		PROC_INFO_INC(s, scan_bitmap.retry);
 134		return 1;
 135	}
 136	return 0;
 137}
 138
 139/*
 140 * Searches for a window of zero bits with given minimum and maximum
 141 * lengths in one bitmap block
 142 */
 143static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
 144			     unsigned int bmap_n, int *beg, int boundary,
 145			     int min, int max, int unfm)
 146{
 147	struct super_block *s = th->t_super;
 148	struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
 149	struct buffer_head *bh;
 150	int end, next;
 151	int org = *beg;
 152
 153	BUG_ON(!th->t_trans_id);
 
 154	RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
 155	       "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
 156	PROC_INFO_INC(s, scan_bitmap.bmap);
 
 
 
 
 157
 158	if (!bi) {
 159		reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
 160			       "for bitmap %d", bmap_n);
 161		return 0;
 162	}
 163
 164	bh = reiserfs_read_bitmap_block(s, bmap_n);
 165	if (bh == NULL)
 166		return 0;
 167
 168	while (1) {
 169cont:
 170		if (bi->free_count < min) {
 171			brelse(bh);
 172			return 0;	/* No free blocks in this bitmap */
 173		}
 174
 175		/* search for a first zero bit -- beginning of a window */
 176		*beg = reiserfs_find_next_zero_le_bit
 177		    ((unsigned long *)(bh->b_data), boundary, *beg);
 178
 179		/*
 180		 * search for a zero bit fails or the rest of bitmap block
 181		 * cannot contain a zero window of minimum size
 182		 */
 183		if (*beg + min > boundary) {
 184			brelse(bh);
 185			return 0;
 186		}
 187
 188		if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
 189			continue;
 190		/* first zero bit found; we check next bits */
 191		for (end = *beg + 1;; end++) {
 192			if (end >= *beg + max || end >= boundary
 193			    || reiserfs_test_le_bit(end, bh->b_data)) {
 194				next = end;
 195				break;
 196			}
 197
 198			/*
 199			 * finding the other end of zero bit window requires
 200			 * looking into journal structures (in case of
 201			 * searching for free blocks for unformatted nodes)
 202			 */
 203			if (unfm && is_block_in_journal(s, bmap_n, end, &next))
 204				break;
 205		}
 206
 207		/*
 208		 * now (*beg) points to beginning of zero bits window,
 209		 * (end) points to one bit after the window end
 210		 */
 211
 212		/* found window of proper size */
 213		if (end - *beg >= min) {
 214			int i;
 215			reiserfs_prepare_for_journal(s, bh, 1);
 216			/*
 217			 * try to set all blocks used checking are
 218			 * they still free
 219			 */
 220			for (i = *beg; i < end; i++) {
 221				/* Don't check in journal again. */
 222				if (reiserfs_test_and_set_le_bit
 223				    (i, bh->b_data)) {
 224					/*
 225					 * bit was set by another process while
 226					 * we slept in prepare_for_journal()
 227					 */
 228					PROC_INFO_INC(s, scan_bitmap.stolen);
 229
 230					/*
 231					 * we can continue with smaller set
 232					 * of allocated blocks, if length of
 233					 * this set is more or equal to `min'
 234					 */
 235					if (i >= *beg + min) {
 236						end = i;
 237						break;
 238					}
 239
 240					/*
 241					 * otherwise we clear all bit
 242					 * were set ...
 243					 */
 244					while (--i >= *beg)
 245						reiserfs_clear_le_bit
 246						    (i, bh->b_data);
 247					reiserfs_restore_prepared_buffer(s, bh);
 248					*beg = org;
 249
 250					/*
 251					 * Search again in current block
 252					 * from beginning
 253					 */
 254					goto cont;
 255				}
 256			}
 257			bi->free_count -= (end - *beg);
 258			journal_mark_dirty(th, bh);
 259			brelse(bh);
 260
 261			/* free block count calculation */
 262			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
 263						     1);
 264			PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
 265			journal_mark_dirty(th, SB_BUFFER_WITH_SB(s));
 266
 267			return end - (*beg);
 268		} else {
 269			*beg = next;
 270		}
 271	}
 272}
 273
 274static int bmap_hash_id(struct super_block *s, u32 id)
 275{
 276	char *hash_in = NULL;
 277	unsigned long hash;
 278	unsigned bm;
 279
 280	if (id <= 2) {
 281		bm = 1;
 282	} else {
 283		hash_in = (char *)(&id);
 284		hash = keyed_hash(hash_in, 4);
 285		bm = hash % reiserfs_bmap_count(s);
 286		if (!bm)
 287			bm = 1;
 288	}
 289	/* this can only be true when SB_BMAP_NR = 1 */
 290	if (bm >= reiserfs_bmap_count(s))
 291		bm = 0;
 292	return bm;
 293}
 294
 295/*
 296 * hashes the id and then returns > 0 if the block group for the
 297 * corresponding hash is full
 298 */
 299static inline int block_group_used(struct super_block *s, u32 id)
 300{
 301	int bm = bmap_hash_id(s, id);
 302	struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];
 303
 304	/*
 305	 * If we don't have cached information on this bitmap block, we're
 306	 * going to have to load it later anyway. Loading it here allows us
 307	 * to make a better decision. This favors long-term performance gain
 308	 * with a better on-disk layout vs. a short term gain of skipping the
 309	 * read and potentially having a bad placement.
 310	 */
 311	if (info->free_count == UINT_MAX) {
 312		struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
 313		brelse(bh);
 314	}
 315
 316	if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) {
 317		return 0;
 318	}
 319	return 1;
 320}
 321
 322/*
 323 * the packing is returned in disk byte order
 324 */
 325__le32 reiserfs_choose_packing(struct inode * dir)
 326{
 327	__le32 packing;
 328	if (TEST_OPTION(packing_groups, dir->i_sb)) {
 329		u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
 330		/*
 331		 * some versions of reiserfsck expect packing locality 1 to be
 332		 * special
 333		 */
 334		if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
 335			packing = INODE_PKEY(dir)->k_objectid;
 336		else
 337			packing = INODE_PKEY(dir)->k_dir_id;
 338	} else
 339		packing = INODE_PKEY(dir)->k_objectid;
 340	return packing;
 341}
 342
 343/*
 344 * Tries to find contiguous zero bit window (given size) in given region of
 345 * bitmap and place new blocks there. Returns number of allocated blocks.
 346 */
 347static int scan_bitmap(struct reiserfs_transaction_handle *th,
 348		       b_blocknr_t * start, b_blocknr_t finish,
 349		       int min, int max, int unfm, sector_t file_block)
 350{
 351	int nr_allocated = 0;
 352	struct super_block *s = th->t_super;
 
 
 
 353	unsigned int bm, off;
 354	unsigned int end_bm, end_off;
 355	unsigned int off_max = s->s_blocksize << 3;
 356
 357	BUG_ON(!th->t_trans_id);
 358	PROC_INFO_INC(s, scan_bitmap.call);
 359
 360	/* No point in looking for more free blocks */
 361	if (SB_FREE_BLOCKS(s) <= 0)
 362		return 0;
 363
 364	get_bit_address(s, *start, &bm, &off);
 365	get_bit_address(s, finish, &end_bm, &end_off);
 366	if (bm > reiserfs_bmap_count(s))
 367		return 0;
 368	if (end_bm > reiserfs_bmap_count(s))
 369		end_bm = reiserfs_bmap_count(s);
 370
 371	/*
 372	 * When the bitmap is more than 10% free, anyone can allocate.
 373	 * When it's less than 10% free, only files that already use the
 374	 * bitmap are allowed. Once we pass 80% full, this restriction
 375	 * is lifted.
 376	 *
 377	 * We do this so that files that grow later still have space close to
 378	 * their original allocation. This improves locality, and presumably
 379	 * performance as a result.
 380	 *
 381	 * This is only an allocation policy and does not make up for getting a
 382	 * bad hint. Decent hinting must be implemented for this to work well.
 383	 */
 384	if (TEST_OPTION(skip_busy, s)
 385	    && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
 386		for (; bm < end_bm; bm++, off = 0) {
 387			if ((off && (!unfm || (file_block != 0)))
 388			    || SB_AP_BITMAP(s)[bm].free_count >
 389			    (s->s_blocksize << 3) / 10)
 390				nr_allocated =
 391				    scan_bitmap_block(th, bm, &off, off_max,
 392						      min, max, unfm);
 393			if (nr_allocated)
 394				goto ret;
 395		}
 396		/* we know from above that start is a reasonable number */
 397		get_bit_address(s, *start, &bm, &off);
 398	}
 399
 400	for (; bm < end_bm; bm++, off = 0) {
 401		nr_allocated =
 402		    scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
 403		if (nr_allocated)
 404			goto ret;
 405	}
 406
 407	nr_allocated =
 408	    scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
 409
 410ret:
 411	*start = bm * off_max + off;
 412	return nr_allocated;
 413
 414}
 415
 416static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
 417				 struct inode *inode, b_blocknr_t block,
 418				 int for_unformatted)
 419{
 420	struct super_block *s = th->t_super;
 421	struct reiserfs_super_block *rs;
 422	struct buffer_head *sbh, *bmbh;
 423	struct reiserfs_bitmap_info *apbi;
 424	unsigned int nr, offset;
 425
 426	BUG_ON(!th->t_trans_id);
 
 427	PROC_INFO_INC(s, free_block);
 
 428	rs = SB_DISK_SUPER_BLOCK(s);
 429	sbh = SB_BUFFER_WITH_SB(s);
 430	apbi = SB_AP_BITMAP(s);
 431
 432	get_bit_address(s, block, &nr, &offset);
 433
 434	if (nr >= reiserfs_bmap_count(s)) {
 435		reiserfs_error(s, "vs-4075", "block %lu is out of range",
 436			       block);
 437		return;
 438	}
 439
 440	bmbh = reiserfs_read_bitmap_block(s, nr);
 441	if (!bmbh)
 442		return;
 443
 444	reiserfs_prepare_for_journal(s, bmbh, 1);
 445
 446	/* clear bit for the given block in bit map */
 447	if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
 448		reiserfs_error(s, "vs-4080",
 449			       "block %lu: bit already cleared", block);
 450	}
 451	apbi[nr].free_count++;
 452	journal_mark_dirty(th, bmbh);
 453	brelse(bmbh);
 454
 455	reiserfs_prepare_for_journal(s, sbh, 1);
 456	/* update super block */
 457	set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
 458
 459	journal_mark_dirty(th, sbh);
 460	if (for_unformatted) {
 461		int depth = reiserfs_write_unlock_nested(s);
 462		dquot_free_block_nodirty(inode, 1);
 463		reiserfs_write_lock_nested(s, depth);
 464	}
 465}
 466
 467void reiserfs_free_block(struct reiserfs_transaction_handle *th,
 468			 struct inode *inode, b_blocknr_t block,
 469			 int for_unformatted)
 470{
 471	struct super_block *s = th->t_super;
 472
 473	BUG_ON(!th->t_trans_id);
 
 474	RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
 475	if (!is_reusable(s, block, 1))
 476		return;
 477
 478	if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
 479		reiserfs_error(th->t_super, "bitmap-4072",
 480			       "Trying to free block outside file system "
 481			       "boundaries (%lu > %lu)",
 482			       block, sb_block_count(REISERFS_SB(s)->s_rs));
 483		return;
 484	}
 485	/* mark it before we clear it, just in case */
 486	journal_mark_freed(th, s, block);
 487	_reiserfs_free_block(th, inode, block, for_unformatted);
 488}
 489
 490/* preallocated blocks don't need to be run through journal_mark_freed */
 491static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
 492					 struct inode *inode, b_blocknr_t block)
 493{
 494	BUG_ON(!th->t_trans_id);
 495	RFALSE(!th->t_super,
 496	       "vs-4060: trying to free block on nonexistent device");
 497	if (!is_reusable(th->t_super, block, 1))
 498		return;
 499	_reiserfs_free_block(th, inode, block, 1);
 500}
 501
 502static void __discard_prealloc(struct reiserfs_transaction_handle *th,
 503			       struct reiserfs_inode_info *ei)
 504{
 505	unsigned long save = ei->i_prealloc_block;
 506	int dirty = 0;
 507	struct inode *inode = &ei->vfs_inode;
 508
 509	BUG_ON(!th->t_trans_id);
 510#ifdef CONFIG_REISERFS_CHECK
 511	if (ei->i_prealloc_count < 0)
 512		reiserfs_error(th->t_super, "zam-4001",
 513			       "inode has negative prealloc blocks count.");
 514#endif
 515	while (ei->i_prealloc_count > 0) {
 516		reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
 517		ei->i_prealloc_block++;
 518		ei->i_prealloc_count--;
 519		dirty = 1;
 520	}
 521	if (dirty)
 522		reiserfs_update_sd(th, inode);
 523	ei->i_prealloc_block = save;
 524	list_del_init(&ei->i_prealloc_list);
 525}
 526
 527/* FIXME: It should be inline function */
 528void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
 529			       struct inode *inode)
 530{
 531	struct reiserfs_inode_info *ei = REISERFS_I(inode);
 532
 533	BUG_ON(!th->t_trans_id);
 534	if (ei->i_prealloc_count)
 535		__discard_prealloc(th, ei);
 536}
 537
 538void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
 539{
 540	struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
 541
 542	BUG_ON(!th->t_trans_id);
 
 543	while (!list_empty(plist)) {
 544		struct reiserfs_inode_info *ei;
 545		ei = list_entry(plist->next, struct reiserfs_inode_info,
 546				i_prealloc_list);
 547#ifdef CONFIG_REISERFS_CHECK
 548		if (!ei->i_prealloc_count) {
 549			reiserfs_error(th->t_super, "zam-4001",
 550				       "inode is in prealloc list but has "
 551				       "no preallocated blocks.");
 552		}
 553#endif
 554		__discard_prealloc(th, ei);
 555	}
 556}
 557
 558void reiserfs_init_alloc_options(struct super_block *s)
 559{
 560	set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
 561	set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
 562	set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
 563}
 564
 565/* block allocator related options are parsed here */
 566int reiserfs_parse_alloc_options(struct super_block *s, char *options)
 567{
 568	char *this_char, *value;
 569
 570	/* clear default settings */
 571	REISERFS_SB(s)->s_alloc_options.bits = 0;
 572
 573	while ((this_char = strsep(&options, ":")) != NULL) {
 574		if ((value = strchr(this_char, '=')) != NULL)
 575			*value++ = 0;
 576
 577		if (!strcmp(this_char, "concentrating_formatted_nodes")) {
 578			int temp;
 579			SET_OPTION(concentrating_formatted_nodes);
 580			temp = (value
 581				&& *value) ? simple_strtoul(value, &value,
 582							    0) : 10;
 583			if (temp <= 0 || temp > 100) {
 584				REISERFS_SB(s)->s_alloc_options.border = 10;
 585			} else {
 586				REISERFS_SB(s)->s_alloc_options.border =
 587				    100 / temp;
 588			}
 589			continue;
 590		}
 591		if (!strcmp(this_char, "displacing_large_files")) {
 592			SET_OPTION(displacing_large_files);
 593			REISERFS_SB(s)->s_alloc_options.large_file_size =
 594			    (value
 595			     && *value) ? simple_strtoul(value, &value, 0) : 16;
 596			continue;
 597		}
 598		if (!strcmp(this_char, "displacing_new_packing_localities")) {
 599			SET_OPTION(displacing_new_packing_localities);
 600			continue;
 601		}
 602
 603		if (!strcmp(this_char, "old_hashed_relocation")) {
 604			SET_OPTION(old_hashed_relocation);
 605			continue;
 606		}
 607
 608		if (!strcmp(this_char, "new_hashed_relocation")) {
 609			SET_OPTION(new_hashed_relocation);
 610			continue;
 611		}
 612
 613		if (!strcmp(this_char, "dirid_groups")) {
 614			SET_OPTION(dirid_groups);
 615			continue;
 616		}
 617		if (!strcmp(this_char, "oid_groups")) {
 618			SET_OPTION(oid_groups);
 619			continue;
 620		}
 621		if (!strcmp(this_char, "packing_groups")) {
 622			SET_OPTION(packing_groups);
 623			continue;
 624		}
 625		if (!strcmp(this_char, "hashed_formatted_nodes")) {
 626			SET_OPTION(hashed_formatted_nodes);
 627			continue;
 628		}
 629
 630		if (!strcmp(this_char, "skip_busy")) {
 631			SET_OPTION(skip_busy);
 632			continue;
 633		}
 634
 635		if (!strcmp(this_char, "hundredth_slices")) {
 636			SET_OPTION(hundredth_slices);
 637			continue;
 638		}
 639
 640		if (!strcmp(this_char, "old_way")) {
 641			SET_OPTION(old_way);
 642			continue;
 643		}
 644
 645		if (!strcmp(this_char, "displace_based_on_dirid")) {
 646			SET_OPTION(displace_based_on_dirid);
 647			continue;
 648		}
 649
 650		if (!strcmp(this_char, "preallocmin")) {
 651			REISERFS_SB(s)->s_alloc_options.preallocmin =
 652			    (value
 653			     && *value) ? simple_strtoul(value, &value, 0) : 4;
 654			continue;
 655		}
 656
 657		if (!strcmp(this_char, "preallocsize")) {
 658			REISERFS_SB(s)->s_alloc_options.preallocsize =
 659			    (value
 660			     && *value) ? simple_strtoul(value, &value,
 661							 0) :
 662			    PREALLOCATION_SIZE;
 663			continue;
 664		}
 665
 666		reiserfs_warning(s, "zam-4001", "unknown option - %s",
 667				 this_char);
 668		return 1;
 669	}
 670
 671	reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
 672	return 0;
 673}
 674
 675static void print_sep(struct seq_file *seq, int *first)
 676{
 677	if (!*first)
 678		seq_puts(seq, ":");
 679	else
 680		*first = 0;
 681}
 682
 683void show_alloc_options(struct seq_file *seq, struct super_block *s)
 684{
 685	int first = 1;
 686
 687	if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
 688		(1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
 689		return;
 690
 691	seq_puts(seq, ",alloc=");
 692
 693	if (TEST_OPTION(concentrating_formatted_nodes, s)) {
 694		print_sep(seq, &first);
 695		if (REISERFS_SB(s)->s_alloc_options.border != 10) {
 696			seq_printf(seq, "concentrating_formatted_nodes=%d",
 697				100 / REISERFS_SB(s)->s_alloc_options.border);
 698		} else
 699			seq_puts(seq, "concentrating_formatted_nodes");
 700	}
 701	if (TEST_OPTION(displacing_large_files, s)) {
 702		print_sep(seq, &first);
 703		if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
 704			seq_printf(seq, "displacing_large_files=%lu",
 705			    REISERFS_SB(s)->s_alloc_options.large_file_size);
 706		} else
 707			seq_puts(seq, "displacing_large_files");
 708	}
 709	if (TEST_OPTION(displacing_new_packing_localities, s)) {
 710		print_sep(seq, &first);
 711		seq_puts(seq, "displacing_new_packing_localities");
 712	}
 713	if (TEST_OPTION(old_hashed_relocation, s)) {
 714		print_sep(seq, &first);
 715		seq_puts(seq, "old_hashed_relocation");
 716	}
 717	if (TEST_OPTION(new_hashed_relocation, s)) {
 718		print_sep(seq, &first);
 719		seq_puts(seq, "new_hashed_relocation");
 720	}
 721	if (TEST_OPTION(dirid_groups, s)) {
 722		print_sep(seq, &first);
 723		seq_puts(seq, "dirid_groups");
 724	}
 725	if (TEST_OPTION(oid_groups, s)) {
 726		print_sep(seq, &first);
 727		seq_puts(seq, "oid_groups");
 728	}
 729	if (TEST_OPTION(packing_groups, s)) {
 730		print_sep(seq, &first);
 731		seq_puts(seq, "packing_groups");
 732	}
 733	if (TEST_OPTION(hashed_formatted_nodes, s)) {
 734		print_sep(seq, &first);
 735		seq_puts(seq, "hashed_formatted_nodes");
 736	}
 737	if (TEST_OPTION(skip_busy, s)) {
 738		print_sep(seq, &first);
 739		seq_puts(seq, "skip_busy");
 740	}
 741	if (TEST_OPTION(hundredth_slices, s)) {
 742		print_sep(seq, &first);
 743		seq_puts(seq, "hundredth_slices");
 744	}
 745	if (TEST_OPTION(old_way, s)) {
 746		print_sep(seq, &first);
 747		seq_puts(seq, "old_way");
 748	}
 749	if (TEST_OPTION(displace_based_on_dirid, s)) {
 750		print_sep(seq, &first);
 751		seq_puts(seq, "displace_based_on_dirid");
 752	}
 753	if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
 754		print_sep(seq, &first);
 755		seq_printf(seq, "preallocmin=%d",
 756				REISERFS_SB(s)->s_alloc_options.preallocmin);
 757	}
 758	if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
 759		print_sep(seq, &first);
 760		seq_printf(seq, "preallocsize=%d",
 761				REISERFS_SB(s)->s_alloc_options.preallocsize);
 762	}
 763}
 764
 765static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 766{
 767	char *hash_in;
 768
 769	if (hint->formatted_node) {
 770		hash_in = (char *)&hint->key.k_dir_id;
 771	} else {
 772		if (!hint->inode) {
 773			/*hint->search_start = hint->beg;*/
 774			hash_in = (char *)&hint->key.k_dir_id;
 775		} else
 776		    if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 777			hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
 778		else
 779			hash_in =
 780			    (char *)(&INODE_PKEY(hint->inode)->k_objectid);
 781	}
 782
 783	hint->search_start =
 784	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
 785}
 786
 787/*
 788 * Relocation based on dirid, hashing them into a given bitmap block
 789 * files. Formatted nodes are unaffected, a separate policy covers them
 790 */
 791static void dirid_groups(reiserfs_blocknr_hint_t * hint)
 792{
 793	unsigned long hash;
 794	__u32 dirid = 0;
 795	int bm = 0;
 796	struct super_block *sb = hint->th->t_super;
 797
 798	if (hint->inode)
 799		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
 800	else if (hint->formatted_node)
 801		dirid = hint->key.k_dir_id;
 802
 803	if (dirid) {
 804		bm = bmap_hash_id(sb, dirid);
 805		hash = bm * (sb->s_blocksize << 3);
 806		/* give a portion of the block group to metadata */
 807		if (hint->inode)
 808			hash += sb->s_blocksize / 2;
 809		hint->search_start = hash;
 810	}
 811}
 812
 813/*
 814 * Relocation based on oid, hashing them into a given bitmap block
 815 * files. Formatted nodes are unaffected, a separate policy covers them
 816 */
 817static void oid_groups(reiserfs_blocknr_hint_t * hint)
 818{
 819	if (hint->inode) {
 820		unsigned long hash;
 821		__u32 oid;
 822		__u32 dirid;
 823		int bm;
 824
 825		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
 826
 827		/*
 828		 * keep the root dir and it's first set of subdirs close to
 829		 * the start of the disk
 830		 */
 831		if (dirid <= 2)
 832			hash = (hint->inode->i_sb->s_blocksize << 3);
 833		else {
 834			oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
 835			bm = bmap_hash_id(hint->inode->i_sb, oid);
 836			hash = bm * (hint->inode->i_sb->s_blocksize << 3);
 837		}
 838		hint->search_start = hash;
 839	}
 840}
 841
 842/*
 843 * returns 1 if it finds an indirect item and gets valid hint info
 844 * from it, otherwise 0
 845 */
 846static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
 847{
 848	struct treepath *path;
 849	struct buffer_head *bh;
 850	struct item_head *ih;
 851	int pos_in_item;
 852	__le32 *item;
 853	int ret = 0;
 854
 855	/*
 856	 * reiserfs code can call this function w/o pointer to path
 857	 * structure supplied; then we rely on supplied search_start
 858	 */
 859	if (!hint->path)
 860		return 0;
 861
 862	path = hint->path;
 863	bh = get_last_bh(path);
 864	RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
 865	ih = tp_item_head(path);
 866	pos_in_item = path->pos_in_item;
 867	item = tp_item_body(path);
 868
 869	hint->search_start = bh->b_blocknr;
 870
 871	/*
 872	 * for indirect item: go to left and look for the first non-hole entry
 873	 * in the indirect item
 874	 */
 875	if (!hint->formatted_node && is_indirect_le_ih(ih)) {
 
 
 876		if (pos_in_item == I_UNFM_NUM(ih))
 877			pos_in_item--;
 
 878		while (pos_in_item >= 0) {
 879			int t = get_block_num(item, pos_in_item);
 880			if (t) {
 881				hint->search_start = t;
 882				ret = 1;
 883				break;
 884			}
 885			pos_in_item--;
 886		}
 887	}
 888
 889	/* does result value fit into specified region? */
 890	return ret;
 891}
 892
 893/*
 894 * should be, if formatted node, then try to put on first part of the device
 895 * specified as number of percent with mount option device, else try to put
 896 * on last of device.  This is not to say it is good code to do so,
 897 * but the effect should be measured.
 898 */
 899static inline void set_border_in_hint(struct super_block *s,
 900				      reiserfs_blocknr_hint_t * hint)
 901{
 902	b_blocknr_t border =
 903	    SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
 904
 905	if (hint->formatted_node)
 906		hint->end = border - 1;
 907	else
 908		hint->beg = border;
 909}
 910
 911static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
 912{
 913	if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 914		hint->search_start =
 915		    hint->beg +
 916		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
 917			       4) % (hint->end - hint->beg);
 918	else
 919		hint->search_start =
 920		    hint->beg +
 921		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
 922			       4) % (hint->end - hint->beg);
 923}
 924
 925static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
 926{
 927	char *hash_in;
 928
 929	if (!hint->inode)
 930		hash_in = (char *)&hint->key.k_dir_id;
 931	else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 932		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
 933	else
 934		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
 935
 936	hint->search_start =
 937	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
 938}
 939
 940static inline int
 941this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
 942						   hint)
 943{
 944	return hint->block ==
 945	    REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
 946}
 947
 948#ifdef DISPLACE_NEW_PACKING_LOCALITIES
 949static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
 950{
 951	struct in_core_key *key = &hint->key;
 952
 953	hint->th->displace_new_blocks = 0;
 954	hint->search_start =
 955	    hint->beg + keyed_hash((char *)(&key->k_objectid),
 956				   4) % (hint->end - hint->beg);
 957}
 958#endif
 959
 960static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 961{
 962	b_blocknr_t border;
 963	u32 hash_in;
 964
 965	if (hint->formatted_node || hint->inode == NULL) {
 966		return 0;
 967	}
 968
 969	hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
 970	border =
 971	    hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
 972					 4) % (hint->end - hint->beg - 1);
 973	if (border > hint->search_start)
 974		hint->search_start = border;
 975
 976	return 1;
 977}
 978
 979static inline int old_way(reiserfs_blocknr_hint_t * hint)
 980{
 981	b_blocknr_t border;
 982
 983	if (hint->formatted_node || hint->inode == NULL) {
 984		return 0;
 985	}
 986
 987	border =
 988	    hint->beg +
 989	    le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
 990							      hint->beg);
 991	if (border > hint->search_start)
 992		hint->search_start = border;
 993
 994	return 1;
 995}
 996
 997static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
 998{
 999	struct in_core_key *key = &hint->key;
1000	b_blocknr_t slice_start;
1001
1002	slice_start =
1003	    (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
1004	if (slice_start > hint->search_start
1005	    || slice_start + (hint->end / 100) <= hint->search_start) {
1006		hint->search_start = slice_start;
1007	}
1008}
1009
1010static void determine_search_start(reiserfs_blocknr_hint_t * hint,
1011				   int amount_needed)
1012{
1013	struct super_block *s = hint->th->t_super;
1014	int unfm_hint;
1015
1016	hint->beg = 0;
1017	hint->end = SB_BLOCK_COUNT(s) - 1;
1018
1019	/* This is former border algorithm. Now with tunable border offset */
1020	if (concentrating_formatted_nodes(s))
1021		set_border_in_hint(s, hint);
1022
1023#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1024	/*
1025	 * whenever we create a new directory, we displace it.  At first
1026	 * we will hash for location, later we might look for a moderately
1027	 * empty place for it
1028	 */
1029	if (displacing_new_packing_localities(s)
1030	    && hint->th->displace_new_blocks) {
1031		displace_new_packing_locality(hint);
1032
1033		/*
1034		 * we do not continue determine_search_start,
1035		 * if new packing locality is being displaced
1036		 */
1037		return;
1038	}
1039#endif
1040
1041	/*
1042	 * all persons should feel encouraged to add more special cases
1043	 * here and test them
1044	 */
1045
1046	if (displacing_large_files(s) && !hint->formatted_node
1047	    && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
1048		displace_large_file(hint);
1049		return;
1050	}
1051
1052	/*
1053	 * if none of our special cases is relevant, use the left
1054	 * neighbor in the tree order of the new node we are allocating for
1055	 */
1056	if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
1057		hash_formatted_node(hint);
1058		return;
1059	}
1060
1061	unfm_hint = get_left_neighbor(hint);
1062
1063	/*
1064	 * Mimic old block allocator behaviour, that is if VFS allowed for
1065	 * preallocation, new blocks are displaced based on directory ID.
1066	 * Also, if suggested search_start is less than last preallocated
1067	 * block, we start searching from it, assuming that HDD dataflow
1068	 * is faster in forward direction
1069	 */
1070	if (TEST_OPTION(old_way, s)) {
1071		if (!hint->formatted_node) {
1072			if (!reiserfs_hashed_relocation(s))
1073				old_way(hint);
1074			else if (!reiserfs_no_unhashed_relocation(s))
1075				old_hashed_relocation(hint);
1076
1077			if (hint->inode
1078			    && hint->search_start <
1079			    REISERFS_I(hint->inode)->i_prealloc_block)
1080				hint->search_start =
1081				    REISERFS_I(hint->inode)->i_prealloc_block;
1082		}
1083		return;
1084	}
1085
1086	/* This is an approach proposed by Hans */
1087	if (TEST_OPTION(hundredth_slices, s)
1088	    && !(displacing_large_files(s) && !hint->formatted_node)) {
1089		hundredth_slices(hint);
1090		return;
1091	}
1092
1093	/* old_hashed_relocation only works on unformatted */
1094	if (!unfm_hint && !hint->formatted_node &&
1095	    TEST_OPTION(old_hashed_relocation, s)) {
1096		old_hashed_relocation(hint);
1097	}
1098
1099	/* new_hashed_relocation works with both formatted/unformatted nodes */
1100	if ((!unfm_hint || hint->formatted_node) &&
1101	    TEST_OPTION(new_hashed_relocation, s)) {
1102		new_hashed_relocation(hint);
1103	}
1104
1105	/* dirid grouping works only on unformatted nodes */
1106	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1107		dirid_groups(hint);
1108	}
1109#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1110	if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1111		dirid_groups(hint);
1112	}
1113#endif
1114
1115	/* oid grouping works only on unformatted nodes */
1116	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
1117		oid_groups(hint);
1118	}
1119	return;
1120}
1121
1122static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
1123{
1124	/* make minimum size a mount option and benchmark both ways */
1125	/* we preallocate blocks only for regular files, specific size */
1126	/* benchmark preallocating always and see what happens */
1127
1128	hint->prealloc_size = 0;
1129
1130	if (!hint->formatted_node && hint->preallocate) {
1131		if (S_ISREG(hint->inode->i_mode)
1132		    && hint->inode->i_size >=
1133		    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1134		    preallocmin * hint->inode->i_sb->s_blocksize)
1135			hint->prealloc_size =
1136			    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1137			    preallocsize - 1;
1138	}
1139	return CARRY_ON;
1140}
1141
 
 
1142static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
1143						 b_blocknr_t * new_blocknrs,
1144						 b_blocknr_t start,
1145						 b_blocknr_t finish, int min,
1146						 int amount_needed,
1147						 int prealloc_size)
1148{
1149	int rest = amount_needed;
1150	int nr_allocated;
1151
1152	while (rest > 0 && start <= finish) {
1153		nr_allocated = scan_bitmap(hint->th, &start, finish, min,
1154					   rest + prealloc_size,
1155					   !hint->formatted_node, hint->block);
1156
1157		if (nr_allocated == 0)	/* no new blocks allocated, return */
1158			break;
1159
1160		/* fill free_blocknrs array first */
1161		while (rest > 0 && nr_allocated > 0) {
1162			*new_blocknrs++ = start++;
1163			rest--;
1164			nr_allocated--;
1165		}
1166
1167		/* do we have something to fill prealloc. array also ? */
1168		if (nr_allocated > 0) {
1169			/*
1170			 * it means prealloc_size was greater that 0 and
1171			 * we do preallocation
1172			 */
1173			list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
1174				 &SB_JOURNAL(hint->th->t_super)->
1175				 j_prealloc_list);
1176			REISERFS_I(hint->inode)->i_prealloc_block = start;
1177			REISERFS_I(hint->inode)->i_prealloc_count =
1178			    nr_allocated;
1179			break;
1180		}
1181	}
1182
1183	return (amount_needed - rest);
1184}
1185
1186static inline int blocknrs_and_prealloc_arrays_from_search_start
1187    (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
1188     int amount_needed) {
1189	struct super_block *s = hint->th->t_super;
1190	b_blocknr_t start = hint->search_start;
1191	b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
1192	int passno = 0;
1193	int nr_allocated = 0;
1194	int depth;
1195
1196	determine_prealloc_size(hint);
1197	if (!hint->formatted_node) {
1198		int quota_ret;
1199#ifdef REISERQUOTA_DEBUG
1200		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1201			       "reiserquota: allocating %d blocks id=%u",
1202			       amount_needed, hint->inode->i_uid);
1203#endif
1204		depth = reiserfs_write_unlock_nested(s);
1205		quota_ret =
1206		    dquot_alloc_block_nodirty(hint->inode, amount_needed);
1207		if (quota_ret) {	/* Quota exceeded? */
1208			reiserfs_write_lock_nested(s, depth);
1209			return QUOTA_EXCEEDED;
1210		}
1211		if (hint->preallocate && hint->prealloc_size) {
1212#ifdef REISERQUOTA_DEBUG
1213			reiserfs_debug(s, REISERFS_DEBUG_CODE,
1214				       "reiserquota: allocating (prealloc) %d blocks id=%u",
1215				       hint->prealloc_size, hint->inode->i_uid);
1216#endif
1217			quota_ret = dquot_prealloc_block_nodirty(hint->inode,
1218							 hint->prealloc_size);
1219			if (quota_ret)
1220				hint->preallocate = hint->prealloc_size = 0;
1221		}
1222		/* for unformatted nodes, force large allocations */
1223		reiserfs_write_lock_nested(s, depth);
1224	}
1225
1226	do {
1227		switch (passno++) {
1228		case 0:	/* Search from hint->search_start to end of disk */
1229			start = hint->search_start;
1230			finish = SB_BLOCK_COUNT(s) - 1;
1231			break;
1232		case 1:	/* Search from hint->beg to hint->search_start */
1233			start = hint->beg;
1234			finish = hint->search_start;
1235			break;
1236		case 2:	/* Last chance: Search from 0 to hint->beg */
1237			start = 0;
1238			finish = hint->beg;
1239			break;
1240		default:
1241			/* We've tried searching everywhere, not enough space */
1242			/* Free the blocks */
1243			if (!hint->formatted_node) {
1244#ifdef REISERQUOTA_DEBUG
1245				reiserfs_debug(s, REISERFS_DEBUG_CODE,
1246					       "reiserquota: freeing (nospace) %d blocks id=%u",
1247					       amount_needed +
1248					       hint->prealloc_size -
1249					       nr_allocated,
1250					       hint->inode->i_uid);
1251#endif
1252				/* Free not allocated blocks */
1253				depth = reiserfs_write_unlock_nested(s);
1254				dquot_free_block_nodirty(hint->inode,
1255					amount_needed + hint->prealloc_size -
1256					nr_allocated);
1257				reiserfs_write_lock_nested(s, depth);
1258			}
1259			while (nr_allocated--)
1260				reiserfs_free_block(hint->th, hint->inode,
1261						    new_blocknrs[nr_allocated],
1262						    !hint->formatted_node);
1263
1264			return NO_DISK_SPACE;
1265		}
1266	} while ((nr_allocated += allocate_without_wrapping_disk(hint,
1267								 new_blocknrs +
1268								 nr_allocated,
1269								 start, finish,
1270								 1,
1271								 amount_needed -
1272								 nr_allocated,
1273								 hint->
1274								 prealloc_size))
1275		 < amount_needed);
1276	if (!hint->formatted_node &&
1277	    amount_needed + hint->prealloc_size >
1278	    nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
1279		/* Some of preallocation blocks were not allocated */
1280#ifdef REISERQUOTA_DEBUG
1281		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1282			       "reiserquota: freeing (failed prealloc) %d blocks id=%u",
1283			       amount_needed + hint->prealloc_size -
1284			       nr_allocated -
1285			       REISERFS_I(hint->inode)->i_prealloc_count,
1286			       hint->inode->i_uid);
1287#endif
1288
1289		depth = reiserfs_write_unlock_nested(s);
1290		dquot_free_block_nodirty(hint->inode, amount_needed +
1291					 hint->prealloc_size - nr_allocated -
1292					 REISERFS_I(hint->inode)->
1293					 i_prealloc_count);
1294		reiserfs_write_lock_nested(s, depth);
1295	}
1296
1297	return CARRY_ON;
1298}
1299
1300/* grab new blocknrs from preallocated list */
1301/* return amount still needed after using them */
1302static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1303					      b_blocknr_t * new_blocknrs,
1304					      int amount_needed)
1305{
1306	struct inode *inode = hint->inode;
1307
1308	if (REISERFS_I(inode)->i_prealloc_count > 0) {
1309		while (amount_needed) {
1310
1311			*new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
1312			REISERFS_I(inode)->i_prealloc_count--;
1313
1314			amount_needed--;
1315
1316			if (REISERFS_I(inode)->i_prealloc_count <= 0) {
1317				list_del(&REISERFS_I(inode)->i_prealloc_list);
1318				break;
1319			}
1320		}
1321	}
1322	/* return amount still needed after using preallocated blocks */
1323	return amount_needed;
1324}
1325
1326int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
1327			       b_blocknr_t *new_blocknrs,
1328			       int amount_needed,
1329			       /* Amount of blocks we have already reserved */
1330			       int reserved_by_us)
1331{
1332	int initial_amount_needed = amount_needed;
1333	int ret;
1334	struct super_block *s = hint->th->t_super;
1335
1336	/* Check if there is enough space, taking into account reserved space */
1337	if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
1338	    amount_needed - reserved_by_us)
1339		return NO_DISK_SPACE;
1340	/* should this be if !hint->inode &&  hint->preallocate? */
1341	/* do you mean hint->formatted_node can be removed ? - Zam */
1342	/*
1343	 * hint->formatted_node cannot be removed because we try to access
1344	 * inode information here, and there is often no inode associated with
1345	 * metadata allocations - green
1346	 */
1347
1348	if (!hint->formatted_node && hint->preallocate) {
1349		amount_needed = use_preallocated_list_if_available
1350		    (hint, new_blocknrs, amount_needed);
1351
1352		/*
1353		 * We have all the block numbers we need from the
1354		 * prealloc list
1355		 */
1356		if (amount_needed == 0)
1357			return CARRY_ON;
1358		new_blocknrs += (initial_amount_needed - amount_needed);
1359	}
1360
1361	/* find search start and save it in hint structure */
1362	determine_search_start(hint, amount_needed);
1363	if (hint->search_start >= SB_BLOCK_COUNT(s))
1364		hint->search_start = SB_BLOCK_COUNT(s) - 1;
1365
1366	/* allocation itself; fill new_blocknrs and preallocation arrays */
1367	ret = blocknrs_and_prealloc_arrays_from_search_start
1368	    (hint, new_blocknrs, amount_needed);
1369
1370	/*
1371	 * We used prealloc. list to fill (partially) new_blocknrs array.
1372	 * If final allocation fails we need to return blocks back to
1373	 * prealloc. list or just free them. -- Zam (I chose second
1374	 * variant)
1375	 */
1376	if (ret != CARRY_ON) {
1377		while (amount_needed++ < initial_amount_needed) {
1378			reiserfs_free_block(hint->th, hint->inode,
1379					    *(--new_blocknrs), 1);
1380		}
1381	}
1382	return ret;
1383}
1384
1385void reiserfs_cache_bitmap_metadata(struct super_block *sb,
1386                                    struct buffer_head *bh,
1387                                    struct reiserfs_bitmap_info *info)
1388{
1389	unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
1390
1391	/* The first bit must ALWAYS be 1 */
1392	if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data))
1393		reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is "
1394			       "corrupted: first bit must be 1", bh->b_blocknr);
1395
1396	info->free_count = 0;
1397
1398	while (--cur >= (unsigned long *)bh->b_data) {
1399		/* 0 and ~0 are special, we can optimize for them */
1400		if (*cur == 0)
1401			info->free_count += BITS_PER_LONG;
1402		else if (*cur != ~0L)	/* A mix, investigate */
1403			info->free_count += BITS_PER_LONG - hweight_long(*cur);
1404	}
1405}
1406
1407struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1408                                               unsigned int bitmap)
1409{
1410	b_blocknr_t block = (sb->s_blocksize << 3) * bitmap;
1411	struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap;
1412	struct buffer_head *bh;
1413
1414	/*
1415	 * Way old format filesystems had the bitmaps packed up front.
1416	 * I doubt there are any of these left, but just in case...
1417	 */
1418	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
1419			      &REISERFS_SB(sb)->s_properties)))
1420		block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap;
1421	else if (bitmap == 0)
1422		block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
1423
 
1424	bh = sb_bread(sb, block);
 
1425	if (bh == NULL)
1426		reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1427		                 "reading failed", __func__, block);
1428	else {
1429		if (buffer_locked(bh)) {
1430			int depth;
1431			PROC_INFO_INC(sb, scan_bitmap.wait);
1432			depth = reiserfs_write_unlock_nested(sb);
1433			__wait_on_buffer(bh);
1434			reiserfs_write_lock_nested(sb, depth);
1435		}
1436		BUG_ON(!buffer_uptodate(bh));
1437		BUG_ON(atomic_read(&bh->b_count) == 0);
1438
1439		if (info->free_count == UINT_MAX)
1440			reiserfs_cache_bitmap_metadata(sb, bh, info);
1441	}
1442
1443	return bh;
1444}
1445
1446int reiserfs_init_bitmap_cache(struct super_block *sb)
1447{
1448	struct reiserfs_bitmap_info *bitmap;
1449	unsigned int bmap_nr = reiserfs_bmap_count(sb);
1450
1451	bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1452	if (bitmap == NULL)
1453		return -ENOMEM;
1454
1455	memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
1456
1457	SB_AP_BITMAP(sb) = bitmap;
1458
1459	return 0;
1460}
1461
1462void reiserfs_free_bitmap_cache(struct super_block *sb)
1463{
1464	if (SB_AP_BITMAP(sb)) {
1465		vfree(SB_AP_BITMAP(sb));
1466		SB_AP_BITMAP(sb) = NULL;
1467	}
1468}
v3.5.6
   1/*
   2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
   3 */
   4/* Reiserfs block (de)allocator, bitmap-based. */
   5
   6#include <linux/time.h>
   7#include "reiserfs.h"
   8#include <linux/errno.h>
   9#include <linux/buffer_head.h>
  10#include <linux/kernel.h>
  11#include <linux/pagemap.h>
  12#include <linux/vmalloc.h>
  13#include <linux/quotaops.h>
  14#include <linux/seq_file.h>
  15
  16#define PREALLOCATION_SIZE 9
  17
  18/* different reiserfs block allocator options */
  19
  20#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
  21
  22#define  _ALLOC_concentrating_formatted_nodes 0
  23#define  _ALLOC_displacing_large_files 1
  24#define  _ALLOC_displacing_new_packing_localities 2
  25#define  _ALLOC_old_hashed_relocation 3
  26#define  _ALLOC_new_hashed_relocation 4
  27#define  _ALLOC_skip_busy 5
  28#define  _ALLOC_displace_based_on_dirid 6
  29#define  _ALLOC_hashed_formatted_nodes 7
  30#define  _ALLOC_old_way 8
  31#define  _ALLOC_hundredth_slices 9
  32#define  _ALLOC_dirid_groups 10
  33#define  _ALLOC_oid_groups 11
  34#define  _ALLOC_packing_groups 12
  35
  36#define  concentrating_formatted_nodes(s)	test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
  37#define  displacing_large_files(s)		test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
  38#define  displacing_new_packing_localities(s)	test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
  39
  40#define SET_OPTION(optname) \
  41   do { \
  42	reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \
  43	set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
  44    } while(0)
  45#define TEST_OPTION(optname, s) \
  46    test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
  47
  48static inline void get_bit_address(struct super_block *s,
  49				   b_blocknr_t block,
  50				   unsigned int *bmap_nr,
  51				   unsigned int *offset)
  52{
  53	/* It is in the bitmap block number equal to the block
  54	 * number divided by the number of bits in a block. */
 
 
  55	*bmap_nr = block >> (s->s_blocksize_bits + 3);
  56	/* Within that bitmap block it is located at bit offset *offset. */
  57	*offset = block & ((s->s_blocksize << 3) - 1);
  58}
  59
  60int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
  61{
  62	unsigned int bmap, offset;
  63	unsigned int bmap_count = reiserfs_bmap_count(s);
  64
  65	if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
  66		reiserfs_error(s, "vs-4010",
  67			       "block number is out of range %lu (%u)",
  68			       block, SB_BLOCK_COUNT(s));
  69		return 0;
  70	}
  71
  72	get_bit_address(s, block, &bmap, &offset);
  73
  74	/* Old format filesystem? Unlikely, but the bitmaps are all up front so
  75	 * we need to account for it. */
 
 
  76	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
  77			      &(REISERFS_SB(s)->s_properties)))) {
  78		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
  79		if (block >= bmap1 &&
  80		    block <= bmap1 + bmap_count) {
  81			reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) "
  82				       "can't be freed or reused",
  83				       block, bmap_count);
  84			return 0;
  85		}
  86	} else {
  87		if (offset == 0) {
  88			reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) "
  89				       "can't be freed or reused",
  90				       block, bmap_count);
  91			return 0;
  92		}
  93	}
  94
  95	if (bmap >= bmap_count) {
  96		reiserfs_error(s, "vs-4030", "bitmap for requested block "
  97			       "is out of range: block=%lu, bitmap_nr=%u",
  98			       block, bmap);
  99		return 0;
 100	}
 101
 102	if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
 103		reiserfs_error(s, "vs-4050", "this is root block (%u), "
 104			       "it must be busy", SB_ROOT_BLOCK(s));
 105		return 0;
 106	}
 107
 108	return 1;
 109}
 110
 111/* searches in journal structures for a given block number (bmap, off). If block
 112   is found in reiserfs journal it suggests next free block candidate to test. */
 
 
 
 113static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
 114				      int off, int *next)
 115{
 116	b_blocknr_t tmp;
 117
 118	if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
 119		if (tmp) {	/* hint supplied */
 120			*next = tmp;
 121			PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
 122		} else {
 123			(*next) = off + 1;	/* inc offset to avoid looping. */
 124			PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
 125		}
 126		PROC_INFO_INC(s, scan_bitmap.retry);
 127		return 1;
 128	}
 129	return 0;
 130}
 131
 132/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
 133 * block; */
 
 
 134static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
 135			     unsigned int bmap_n, int *beg, int boundary,
 136			     int min, int max, int unfm)
 137{
 138	struct super_block *s = th->t_super;
 139	struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
 140	struct buffer_head *bh;
 141	int end, next;
 142	int org = *beg;
 143
 144	BUG_ON(!th->t_trans_id);
 145
 146	RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
 147	       "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
 148	PROC_INFO_INC(s, scan_bitmap.bmap);
 149/* this is unclear and lacks comments, explain how journal bitmaps
 150   work here for the reader.  Convey a sense of the design here. What
 151   is a window? */
 152/* - I mean `a window of zero bits' as in description of this function - Zam. */
 153
 154	if (!bi) {
 155		reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
 156			       "for bitmap %d", bmap_n);
 157		return 0;
 158	}
 159
 160	bh = reiserfs_read_bitmap_block(s, bmap_n);
 161	if (bh == NULL)
 162		return 0;
 163
 164	while (1) {
 165	      cont:
 166		if (bi->free_count < min) {
 167			brelse(bh);
 168			return 0;	// No free blocks in this bitmap
 169		}
 170
 171		/* search for a first zero bit -- beginning of a window */
 172		*beg = reiserfs_find_next_zero_le_bit
 173		    ((unsigned long *)(bh->b_data), boundary, *beg);
 174
 175		if (*beg + min > boundary) {	/* search for a zero bit fails or the rest of bitmap block
 176						 * cannot contain a zero window of minimum size */
 
 
 
 177			brelse(bh);
 178			return 0;
 179		}
 180
 181		if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
 182			continue;
 183		/* first zero bit found; we check next bits */
 184		for (end = *beg + 1;; end++) {
 185			if (end >= *beg + max || end >= boundary
 186			    || reiserfs_test_le_bit(end, bh->b_data)) {
 187				next = end;
 188				break;
 189			}
 190			/* finding the other end of zero bit window requires looking into journal structures (in
 191			 * case of searching for free blocks for unformatted nodes) */
 
 
 
 
 192			if (unfm && is_block_in_journal(s, bmap_n, end, &next))
 193				break;
 194		}
 195
 196		/* now (*beg) points to beginning of zero bits window,
 197		 * (end) points to one bit after the window end */
 198		if (end - *beg >= min) {	/* it seems we have found window of proper size */
 
 
 
 
 199			int i;
 200			reiserfs_prepare_for_journal(s, bh, 1);
 201			/* try to set all blocks used checking are they still free */
 
 
 
 202			for (i = *beg; i < end; i++) {
 203				/* It seems that we should not check in journal again. */
 204				if (reiserfs_test_and_set_le_bit
 205				    (i, bh->b_data)) {
 206					/* bit was set by another process
 207					 * while we slept in prepare_for_journal() */
 
 
 208					PROC_INFO_INC(s, scan_bitmap.stolen);
 209					if (i >= *beg + min) {	/* we can continue with smaller set of allocated blocks,
 210								 * if length of this set is more or equal to `min' */
 
 
 
 
 
 211						end = i;
 212						break;
 213					}
 214					/* otherwise we clear all bit were set ... */
 
 
 
 
 215					while (--i >= *beg)
 216						reiserfs_clear_le_bit
 217						    (i, bh->b_data);
 218					reiserfs_restore_prepared_buffer(s, bh);
 219					*beg = org;
 220					/* ... and search again in current block from beginning */
 
 
 
 
 221					goto cont;
 222				}
 223			}
 224			bi->free_count -= (end - *beg);
 225			journal_mark_dirty(th, s, bh);
 226			brelse(bh);
 227
 228			/* free block count calculation */
 229			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
 230						     1);
 231			PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
 232			journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
 233
 234			return end - (*beg);
 235		} else {
 236			*beg = next;
 237		}
 238	}
 239}
 240
 241static int bmap_hash_id(struct super_block *s, u32 id)
 242{
 243	char *hash_in = NULL;
 244	unsigned long hash;
 245	unsigned bm;
 246
 247	if (id <= 2) {
 248		bm = 1;
 249	} else {
 250		hash_in = (char *)(&id);
 251		hash = keyed_hash(hash_in, 4);
 252		bm = hash % reiserfs_bmap_count(s);
 253		if (!bm)
 254			bm = 1;
 255	}
 256	/* this can only be true when SB_BMAP_NR = 1 */
 257	if (bm >= reiserfs_bmap_count(s))
 258		bm = 0;
 259	return bm;
 260}
 261
 262/*
 263 * hashes the id and then returns > 0 if the block group for the
 264 * corresponding hash is full
 265 */
 266static inline int block_group_used(struct super_block *s, u32 id)
 267{
 268	int bm = bmap_hash_id(s, id);
 269	struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];
 270
 271	/* If we don't have cached information on this bitmap block, we're
 
 272	 * going to have to load it later anyway. Loading it here allows us
 273	 * to make a better decision. This favors long-term performance gain
 274	 * with a better on-disk layout vs. a short term gain of skipping the
 275	 * read and potentially having a bad placement. */
 
 276	if (info->free_count == UINT_MAX) {
 277		struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
 278		brelse(bh);
 279	}
 280
 281	if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) {
 282		return 0;
 283	}
 284	return 1;
 285}
 286
 287/*
 288 * the packing is returned in disk byte order
 289 */
 290__le32 reiserfs_choose_packing(struct inode * dir)
 291{
 292	__le32 packing;
 293	if (TEST_OPTION(packing_groups, dir->i_sb)) {
 294		u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
 295		/*
 296		 * some versions of reiserfsck expect packing locality 1 to be
 297		 * special
 298		 */
 299		if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
 300			packing = INODE_PKEY(dir)->k_objectid;
 301		else
 302			packing = INODE_PKEY(dir)->k_dir_id;
 303	} else
 304		packing = INODE_PKEY(dir)->k_objectid;
 305	return packing;
 306}
 307
 308/* Tries to find contiguous zero bit window (given size) in given region of
 309 * bitmap and place new blocks there. Returns number of allocated blocks. */
 
 
 310static int scan_bitmap(struct reiserfs_transaction_handle *th,
 311		       b_blocknr_t * start, b_blocknr_t finish,
 312		       int min, int max, int unfm, sector_t file_block)
 313{
 314	int nr_allocated = 0;
 315	struct super_block *s = th->t_super;
 316	/* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
 317	 * - Hans, it is not a block number - Zam. */
 318
 319	unsigned int bm, off;
 320	unsigned int end_bm, end_off;
 321	unsigned int off_max = s->s_blocksize << 3;
 322
 323	BUG_ON(!th->t_trans_id);
 
 324
 325	PROC_INFO_INC(s, scan_bitmap.call);
 326	if (SB_FREE_BLOCKS(s) <= 0)
 327		return 0;	// No point in looking for more free blocks
 328
 329	get_bit_address(s, *start, &bm, &off);
 330	get_bit_address(s, finish, &end_bm, &end_off);
 331	if (bm > reiserfs_bmap_count(s))
 332		return 0;
 333	if (end_bm > reiserfs_bmap_count(s))
 334		end_bm = reiserfs_bmap_count(s);
 335
 336	/* When the bitmap is more than 10% free, anyone can allocate.
 
 337	 * When it's less than 10% free, only files that already use the
 338	 * bitmap are allowed. Once we pass 80% full, this restriction
 339	 * is lifted.
 340	 *
 341	 * We do this so that files that grow later still have space close to
 342	 * their original allocation. This improves locality, and presumably
 343	 * performance as a result.
 344	 *
 345	 * This is only an allocation policy and does not make up for getting a
 346	 * bad hint. Decent hinting must be implemented for this to work well.
 347	 */
 348	if (TEST_OPTION(skip_busy, s)
 349	    && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
 350		for (; bm < end_bm; bm++, off = 0) {
 351			if ((off && (!unfm || (file_block != 0)))
 352			    || SB_AP_BITMAP(s)[bm].free_count >
 353			    (s->s_blocksize << 3) / 10)
 354				nr_allocated =
 355				    scan_bitmap_block(th, bm, &off, off_max,
 356						      min, max, unfm);
 357			if (nr_allocated)
 358				goto ret;
 359		}
 360		/* we know from above that start is a reasonable number */
 361		get_bit_address(s, *start, &bm, &off);
 362	}
 363
 364	for (; bm < end_bm; bm++, off = 0) {
 365		nr_allocated =
 366		    scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
 367		if (nr_allocated)
 368			goto ret;
 369	}
 370
 371	nr_allocated =
 372	    scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
 373
 374      ret:
 375	*start = bm * off_max + off;
 376	return nr_allocated;
 377
 378}
 379
 380static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
 381				 struct inode *inode, b_blocknr_t block,
 382				 int for_unformatted)
 383{
 384	struct super_block *s = th->t_super;
 385	struct reiserfs_super_block *rs;
 386	struct buffer_head *sbh, *bmbh;
 387	struct reiserfs_bitmap_info *apbi;
 388	unsigned int nr, offset;
 389
 390	BUG_ON(!th->t_trans_id);
 391
 392	PROC_INFO_INC(s, free_block);
 393
 394	rs = SB_DISK_SUPER_BLOCK(s);
 395	sbh = SB_BUFFER_WITH_SB(s);
 396	apbi = SB_AP_BITMAP(s);
 397
 398	get_bit_address(s, block, &nr, &offset);
 399
 400	if (nr >= reiserfs_bmap_count(s)) {
 401		reiserfs_error(s, "vs-4075", "block %lu is out of range",
 402			       block);
 403		return;
 404	}
 405
 406	bmbh = reiserfs_read_bitmap_block(s, nr);
 407	if (!bmbh)
 408		return;
 409
 410	reiserfs_prepare_for_journal(s, bmbh, 1);
 411
 412	/* clear bit for the given block in bit map */
 413	if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
 414		reiserfs_error(s, "vs-4080",
 415			       "block %lu: bit already cleared", block);
 416	}
 417	apbi[nr].free_count++;
 418	journal_mark_dirty(th, s, bmbh);
 419	brelse(bmbh);
 420
 421	reiserfs_prepare_for_journal(s, sbh, 1);
 422	/* update super block */
 423	set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
 424
 425	journal_mark_dirty(th, s, sbh);
 426	if (for_unformatted)
 
 427		dquot_free_block_nodirty(inode, 1);
 
 
 428}
 429
 430void reiserfs_free_block(struct reiserfs_transaction_handle *th,
 431			 struct inode *inode, b_blocknr_t block,
 432			 int for_unformatted)
 433{
 434	struct super_block *s = th->t_super;
 
 435	BUG_ON(!th->t_trans_id);
 436
 437	RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
 438	if (!is_reusable(s, block, 1))
 439		return;
 440
 441	if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
 442		reiserfs_error(th->t_super, "bitmap-4072",
 443			       "Trying to free block outside file system "
 444			       "boundaries (%lu > %lu)",
 445			       block, sb_block_count(REISERFS_SB(s)->s_rs));
 446		return;
 447	}
 448	/* mark it before we clear it, just in case */
 449	journal_mark_freed(th, s, block);
 450	_reiserfs_free_block(th, inode, block, for_unformatted);
 451}
 452
 453/* preallocated blocks don't need to be run through journal_mark_freed */
 454static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
 455					 struct inode *inode, b_blocknr_t block)
 456{
 457	BUG_ON(!th->t_trans_id);
 458	RFALSE(!th->t_super,
 459	       "vs-4060: trying to free block on nonexistent device");
 460	if (!is_reusable(th->t_super, block, 1))
 461		return;
 462	_reiserfs_free_block(th, inode, block, 1);
 463}
 464
 465static void __discard_prealloc(struct reiserfs_transaction_handle *th,
 466			       struct reiserfs_inode_info *ei)
 467{
 468	unsigned long save = ei->i_prealloc_block;
 469	int dirty = 0;
 470	struct inode *inode = &ei->vfs_inode;
 
 471	BUG_ON(!th->t_trans_id);
 472#ifdef CONFIG_REISERFS_CHECK
 473	if (ei->i_prealloc_count < 0)
 474		reiserfs_error(th->t_super, "zam-4001",
 475			       "inode has negative prealloc blocks count.");
 476#endif
 477	while (ei->i_prealloc_count > 0) {
 478		reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
 479		ei->i_prealloc_block++;
 480		ei->i_prealloc_count--;
 481		dirty = 1;
 482	}
 483	if (dirty)
 484		reiserfs_update_sd(th, inode);
 485	ei->i_prealloc_block = save;
 486	list_del_init(&(ei->i_prealloc_list));
 487}
 488
 489/* FIXME: It should be inline function */
 490void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
 491			       struct inode *inode)
 492{
 493	struct reiserfs_inode_info *ei = REISERFS_I(inode);
 
 494	BUG_ON(!th->t_trans_id);
 495	if (ei->i_prealloc_count)
 496		__discard_prealloc(th, ei);
 497}
 498
 499void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
 500{
 501	struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
 502
 503	BUG_ON(!th->t_trans_id);
 504
 505	while (!list_empty(plist)) {
 506		struct reiserfs_inode_info *ei;
 507		ei = list_entry(plist->next, struct reiserfs_inode_info,
 508				i_prealloc_list);
 509#ifdef CONFIG_REISERFS_CHECK
 510		if (!ei->i_prealloc_count) {
 511			reiserfs_error(th->t_super, "zam-4001",
 512				       "inode is in prealloc list but has "
 513				       "no preallocated blocks.");
 514		}
 515#endif
 516		__discard_prealloc(th, ei);
 517	}
 518}
 519
 520void reiserfs_init_alloc_options(struct super_block *s)
 521{
 522	set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
 523	set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
 524	set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
 525}
 526
 527/* block allocator related options are parsed here */
 528int reiserfs_parse_alloc_options(struct super_block *s, char *options)
 529{
 530	char *this_char, *value;
 531
 532	REISERFS_SB(s)->s_alloc_options.bits = 0;	/* clear default settings */
 
 533
 534	while ((this_char = strsep(&options, ":")) != NULL) {
 535		if ((value = strchr(this_char, '=')) != NULL)
 536			*value++ = 0;
 537
 538		if (!strcmp(this_char, "concentrating_formatted_nodes")) {
 539			int temp;
 540			SET_OPTION(concentrating_formatted_nodes);
 541			temp = (value
 542				&& *value) ? simple_strtoul(value, &value,
 543							    0) : 10;
 544			if (temp <= 0 || temp > 100) {
 545				REISERFS_SB(s)->s_alloc_options.border = 10;
 546			} else {
 547				REISERFS_SB(s)->s_alloc_options.border =
 548				    100 / temp;
 549			}
 550			continue;
 551		}
 552		if (!strcmp(this_char, "displacing_large_files")) {
 553			SET_OPTION(displacing_large_files);
 554			REISERFS_SB(s)->s_alloc_options.large_file_size =
 555			    (value
 556			     && *value) ? simple_strtoul(value, &value, 0) : 16;
 557			continue;
 558		}
 559		if (!strcmp(this_char, "displacing_new_packing_localities")) {
 560			SET_OPTION(displacing_new_packing_localities);
 561			continue;
 562		};
 563
 564		if (!strcmp(this_char, "old_hashed_relocation")) {
 565			SET_OPTION(old_hashed_relocation);
 566			continue;
 567		}
 568
 569		if (!strcmp(this_char, "new_hashed_relocation")) {
 570			SET_OPTION(new_hashed_relocation);
 571			continue;
 572		}
 573
 574		if (!strcmp(this_char, "dirid_groups")) {
 575			SET_OPTION(dirid_groups);
 576			continue;
 577		}
 578		if (!strcmp(this_char, "oid_groups")) {
 579			SET_OPTION(oid_groups);
 580			continue;
 581		}
 582		if (!strcmp(this_char, "packing_groups")) {
 583			SET_OPTION(packing_groups);
 584			continue;
 585		}
 586		if (!strcmp(this_char, "hashed_formatted_nodes")) {
 587			SET_OPTION(hashed_formatted_nodes);
 588			continue;
 589		}
 590
 591		if (!strcmp(this_char, "skip_busy")) {
 592			SET_OPTION(skip_busy);
 593			continue;
 594		}
 595
 596		if (!strcmp(this_char, "hundredth_slices")) {
 597			SET_OPTION(hundredth_slices);
 598			continue;
 599		}
 600
 601		if (!strcmp(this_char, "old_way")) {
 602			SET_OPTION(old_way);
 603			continue;
 604		}
 605
 606		if (!strcmp(this_char, "displace_based_on_dirid")) {
 607			SET_OPTION(displace_based_on_dirid);
 608			continue;
 609		}
 610
 611		if (!strcmp(this_char, "preallocmin")) {
 612			REISERFS_SB(s)->s_alloc_options.preallocmin =
 613			    (value
 614			     && *value) ? simple_strtoul(value, &value, 0) : 4;
 615			continue;
 616		}
 617
 618		if (!strcmp(this_char, "preallocsize")) {
 619			REISERFS_SB(s)->s_alloc_options.preallocsize =
 620			    (value
 621			     && *value) ? simple_strtoul(value, &value,
 622							 0) :
 623			    PREALLOCATION_SIZE;
 624			continue;
 625		}
 626
 627		reiserfs_warning(s, "zam-4001", "unknown option - %s",
 628				 this_char);
 629		return 1;
 630	}
 631
 632	reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
 633	return 0;
 634}
 635
 636static void print_sep(struct seq_file *seq, int *first)
 637{
 638	if (!*first)
 639		seq_puts(seq, ":");
 640	else
 641		*first = 0;
 642}
 643
 644void show_alloc_options(struct seq_file *seq, struct super_block *s)
 645{
 646	int first = 1;
 647
 648	if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
 649		(1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
 650		return;
 651
 652	seq_puts(seq, ",alloc=");
 653
 654	if (TEST_OPTION(concentrating_formatted_nodes, s)) {
 655		print_sep(seq, &first);
 656		if (REISERFS_SB(s)->s_alloc_options.border != 10) {
 657			seq_printf(seq, "concentrating_formatted_nodes=%d",
 658				100 / REISERFS_SB(s)->s_alloc_options.border);
 659		} else
 660			seq_puts(seq, "concentrating_formatted_nodes");
 661	}
 662	if (TEST_OPTION(displacing_large_files, s)) {
 663		print_sep(seq, &first);
 664		if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
 665			seq_printf(seq, "displacing_large_files=%lu",
 666			    REISERFS_SB(s)->s_alloc_options.large_file_size);
 667		} else
 668			seq_puts(seq, "displacing_large_files");
 669	}
 670	if (TEST_OPTION(displacing_new_packing_localities, s)) {
 671		print_sep(seq, &first);
 672		seq_puts(seq, "displacing_new_packing_localities");
 673	}
 674	if (TEST_OPTION(old_hashed_relocation, s)) {
 675		print_sep(seq, &first);
 676		seq_puts(seq, "old_hashed_relocation");
 677	}
 678	if (TEST_OPTION(new_hashed_relocation, s)) {
 679		print_sep(seq, &first);
 680		seq_puts(seq, "new_hashed_relocation");
 681	}
 682	if (TEST_OPTION(dirid_groups, s)) {
 683		print_sep(seq, &first);
 684		seq_puts(seq, "dirid_groups");
 685	}
 686	if (TEST_OPTION(oid_groups, s)) {
 687		print_sep(seq, &first);
 688		seq_puts(seq, "oid_groups");
 689	}
 690	if (TEST_OPTION(packing_groups, s)) {
 691		print_sep(seq, &first);
 692		seq_puts(seq, "packing_groups");
 693	}
 694	if (TEST_OPTION(hashed_formatted_nodes, s)) {
 695		print_sep(seq, &first);
 696		seq_puts(seq, "hashed_formatted_nodes");
 697	}
 698	if (TEST_OPTION(skip_busy, s)) {
 699		print_sep(seq, &first);
 700		seq_puts(seq, "skip_busy");
 701	}
 702	if (TEST_OPTION(hundredth_slices, s)) {
 703		print_sep(seq, &first);
 704		seq_puts(seq, "hundredth_slices");
 705	}
 706	if (TEST_OPTION(old_way, s)) {
 707		print_sep(seq, &first);
 708		seq_puts(seq, "old_way");
 709	}
 710	if (TEST_OPTION(displace_based_on_dirid, s)) {
 711		print_sep(seq, &first);
 712		seq_puts(seq, "displace_based_on_dirid");
 713	}
 714	if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
 715		print_sep(seq, &first);
 716		seq_printf(seq, "preallocmin=%d",
 717				REISERFS_SB(s)->s_alloc_options.preallocmin);
 718	}
 719	if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
 720		print_sep(seq, &first);
 721		seq_printf(seq, "preallocsize=%d",
 722				REISERFS_SB(s)->s_alloc_options.preallocsize);
 723	}
 724}
 725
 726static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 727{
 728	char *hash_in;
 
 729	if (hint->formatted_node) {
 730		hash_in = (char *)&hint->key.k_dir_id;
 731	} else {
 732		if (!hint->inode) {
 733			//hint->search_start = hint->beg;
 734			hash_in = (char *)&hint->key.k_dir_id;
 735		} else
 736		    if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 737			hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
 738		else
 739			hash_in =
 740			    (char *)(&INODE_PKEY(hint->inode)->k_objectid);
 741	}
 742
 743	hint->search_start =
 744	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
 745}
 746
 747/*
 748 * Relocation based on dirid, hashing them into a given bitmap block
 749 * files. Formatted nodes are unaffected, a separate policy covers them
 750 */
 751static void dirid_groups(reiserfs_blocknr_hint_t * hint)
 752{
 753	unsigned long hash;
 754	__u32 dirid = 0;
 755	int bm = 0;
 756	struct super_block *sb = hint->th->t_super;
 
 757	if (hint->inode)
 758		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
 759	else if (hint->formatted_node)
 760		dirid = hint->key.k_dir_id;
 761
 762	if (dirid) {
 763		bm = bmap_hash_id(sb, dirid);
 764		hash = bm * (sb->s_blocksize << 3);
 765		/* give a portion of the block group to metadata */
 766		if (hint->inode)
 767			hash += sb->s_blocksize / 2;
 768		hint->search_start = hash;
 769	}
 770}
 771
 772/*
 773 * Relocation based on oid, hashing them into a given bitmap block
 774 * files. Formatted nodes are unaffected, a separate policy covers them
 775 */
 776static void oid_groups(reiserfs_blocknr_hint_t * hint)
 777{
 778	if (hint->inode) {
 779		unsigned long hash;
 780		__u32 oid;
 781		__u32 dirid;
 782		int bm;
 783
 784		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
 785
 786		/* keep the root dir and it's first set of subdirs close to
 
 787		 * the start of the disk
 788		 */
 789		if (dirid <= 2)
 790			hash = (hint->inode->i_sb->s_blocksize << 3);
 791		else {
 792			oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
 793			bm = bmap_hash_id(hint->inode->i_sb, oid);
 794			hash = bm * (hint->inode->i_sb->s_blocksize << 3);
 795		}
 796		hint->search_start = hash;
 797	}
 798}
 799
 800/* returns 1 if it finds an indirect item and gets valid hint info
 
 801 * from it, otherwise 0
 802 */
 803static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
 804{
 805	struct treepath *path;
 806	struct buffer_head *bh;
 807	struct item_head *ih;
 808	int pos_in_item;
 809	__le32 *item;
 810	int ret = 0;
 811
 812	if (!hint->path)	/* reiserfs code can call this function w/o pointer to path
 813				 * structure supplied; then we rely on supplied search_start */
 
 
 
 814		return 0;
 815
 816	path = hint->path;
 817	bh = get_last_bh(path);
 818	RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
 819	ih = get_ih(path);
 820	pos_in_item = path->pos_in_item;
 821	item = get_item(path);
 822
 823	hint->search_start = bh->b_blocknr;
 824
 
 
 
 
 825	if (!hint->formatted_node && is_indirect_le_ih(ih)) {
 826		/* for indirect item: go to left and look for the first non-hole entry
 827		   in the indirect item */
 828		if (pos_in_item == I_UNFM_NUM(ih))
 829			pos_in_item--;
 830//          pos_in_item = I_UNFM_NUM (ih) - 1;
 831		while (pos_in_item >= 0) {
 832			int t = get_block_num(item, pos_in_item);
 833			if (t) {
 834				hint->search_start = t;
 835				ret = 1;
 836				break;
 837			}
 838			pos_in_item--;
 839		}
 840	}
 841
 842	/* does result value fit into specified region? */
 843	return ret;
 844}
 845
 846/* should be, if formatted node, then try to put on first part of the device
 847   specified as number of percent with mount option device, else try to put
 848   on last of device.  This is not to say it is good code to do so,
 849   but the effect should be measured.  */
 
 
 850static inline void set_border_in_hint(struct super_block *s,
 851				      reiserfs_blocknr_hint_t * hint)
 852{
 853	b_blocknr_t border =
 854	    SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
 855
 856	if (hint->formatted_node)
 857		hint->end = border - 1;
 858	else
 859		hint->beg = border;
 860}
 861
 862static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
 863{
 864	if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 865		hint->search_start =
 866		    hint->beg +
 867		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
 868			       4) % (hint->end - hint->beg);
 869	else
 870		hint->search_start =
 871		    hint->beg +
 872		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
 873			       4) % (hint->end - hint->beg);
 874}
 875
 876static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
 877{
 878	char *hash_in;
 879
 880	if (!hint->inode)
 881		hash_in = (char *)&hint->key.k_dir_id;
 882	else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 883		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
 884	else
 885		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
 886
 887	hint->search_start =
 888	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
 889}
 890
 891static inline int
 892this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
 893						   hint)
 894{
 895	return hint->block ==
 896	    REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
 897}
 898
 899#ifdef DISPLACE_NEW_PACKING_LOCALITIES
 900static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
 901{
 902	struct in_core_key *key = &hint->key;
 903
 904	hint->th->displace_new_blocks = 0;
 905	hint->search_start =
 906	    hint->beg + keyed_hash((char *)(&key->k_objectid),
 907				   4) % (hint->end - hint->beg);
 908}
 909#endif
 910
 911static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 912{
 913	b_blocknr_t border;
 914	u32 hash_in;
 915
 916	if (hint->formatted_node || hint->inode == NULL) {
 917		return 0;
 918	}
 919
 920	hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
 921	border =
 922	    hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
 923					 4) % (hint->end - hint->beg - 1);
 924	if (border > hint->search_start)
 925		hint->search_start = border;
 926
 927	return 1;
 928}
 929
 930static inline int old_way(reiserfs_blocknr_hint_t * hint)
 931{
 932	b_blocknr_t border;
 933
 934	if (hint->formatted_node || hint->inode == NULL) {
 935		return 0;
 936	}
 937
 938	border =
 939	    hint->beg +
 940	    le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
 941							      hint->beg);
 942	if (border > hint->search_start)
 943		hint->search_start = border;
 944
 945	return 1;
 946}
 947
 948static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
 949{
 950	struct in_core_key *key = &hint->key;
 951	b_blocknr_t slice_start;
 952
 953	slice_start =
 954	    (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
 955	if (slice_start > hint->search_start
 956	    || slice_start + (hint->end / 100) <= hint->search_start) {
 957		hint->search_start = slice_start;
 958	}
 959}
 960
 961static void determine_search_start(reiserfs_blocknr_hint_t * hint,
 962				   int amount_needed)
 963{
 964	struct super_block *s = hint->th->t_super;
 965	int unfm_hint;
 966
 967	hint->beg = 0;
 968	hint->end = SB_BLOCK_COUNT(s) - 1;
 969
 970	/* This is former border algorithm. Now with tunable border offset */
 971	if (concentrating_formatted_nodes(s))
 972		set_border_in_hint(s, hint);
 973
 974#ifdef DISPLACE_NEW_PACKING_LOCALITIES
 975	/* whenever we create a new directory, we displace it.  At first we will
 976	   hash for location, later we might look for a moderately empty place for
 977	   it */
 
 
 978	if (displacing_new_packing_localities(s)
 979	    && hint->th->displace_new_blocks) {
 980		displace_new_packing_locality(hint);
 981
 982		/* we do not continue determine_search_start,
 983		 * if new packing locality is being displaced */
 
 
 984		return;
 985	}
 986#endif
 987
 988	/* all persons should feel encouraged to add more special cases here and
 989	 * test them */
 
 
 990
 991	if (displacing_large_files(s) && !hint->formatted_node
 992	    && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
 993		displace_large_file(hint);
 994		return;
 995	}
 996
 997	/* if none of our special cases is relevant, use the left neighbor in the
 998	   tree order of the new node we are allocating for */
 
 
 999	if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
1000		hash_formatted_node(hint);
1001		return;
1002	}
1003
1004	unfm_hint = get_left_neighbor(hint);
1005
1006	/* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
1007	   new blocks are displaced based on directory ID. Also, if suggested search_start
1008	   is less than last preallocated block, we start searching from it, assuming that
1009	   HDD dataflow is faster in forward direction */
 
 
 
1010	if (TEST_OPTION(old_way, s)) {
1011		if (!hint->formatted_node) {
1012			if (!reiserfs_hashed_relocation(s))
1013				old_way(hint);
1014			else if (!reiserfs_no_unhashed_relocation(s))
1015				old_hashed_relocation(hint);
1016
1017			if (hint->inode
1018			    && hint->search_start <
1019			    REISERFS_I(hint->inode)->i_prealloc_block)
1020				hint->search_start =
1021				    REISERFS_I(hint->inode)->i_prealloc_block;
1022		}
1023		return;
1024	}
1025
1026	/* This is an approach proposed by Hans */
1027	if (TEST_OPTION(hundredth_slices, s)
1028	    && !(displacing_large_files(s) && !hint->formatted_node)) {
1029		hundredth_slices(hint);
1030		return;
1031	}
1032
1033	/* old_hashed_relocation only works on unformatted */
1034	if (!unfm_hint && !hint->formatted_node &&
1035	    TEST_OPTION(old_hashed_relocation, s)) {
1036		old_hashed_relocation(hint);
1037	}
 
1038	/* new_hashed_relocation works with both formatted/unformatted nodes */
1039	if ((!unfm_hint || hint->formatted_node) &&
1040	    TEST_OPTION(new_hashed_relocation, s)) {
1041		new_hashed_relocation(hint);
1042	}
 
1043	/* dirid grouping works only on unformatted nodes */
1044	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1045		dirid_groups(hint);
1046	}
1047#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1048	if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1049		dirid_groups(hint);
1050	}
1051#endif
1052
1053	/* oid grouping works only on unformatted nodes */
1054	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
1055		oid_groups(hint);
1056	}
1057	return;
1058}
1059
1060static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
1061{
1062	/* make minimum size a mount option and benchmark both ways */
1063	/* we preallocate blocks only for regular files, specific size */
1064	/* benchmark preallocating always and see what happens */
1065
1066	hint->prealloc_size = 0;
1067
1068	if (!hint->formatted_node && hint->preallocate) {
1069		if (S_ISREG(hint->inode->i_mode)
1070		    && hint->inode->i_size >=
1071		    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1072		    preallocmin * hint->inode->i_sb->s_blocksize)
1073			hint->prealloc_size =
1074			    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1075			    preallocsize - 1;
1076	}
1077	return CARRY_ON;
1078}
1079
1080/* XXX I know it could be merged with upper-level function;
1081   but may be result function would be too complex. */
1082static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
1083						 b_blocknr_t * new_blocknrs,
1084						 b_blocknr_t start,
1085						 b_blocknr_t finish, int min,
1086						 int amount_needed,
1087						 int prealloc_size)
1088{
1089	int rest = amount_needed;
1090	int nr_allocated;
1091
1092	while (rest > 0 && start <= finish) {
1093		nr_allocated = scan_bitmap(hint->th, &start, finish, min,
1094					   rest + prealloc_size,
1095					   !hint->formatted_node, hint->block);
1096
1097		if (nr_allocated == 0)	/* no new blocks allocated, return */
1098			break;
1099
1100		/* fill free_blocknrs array first */
1101		while (rest > 0 && nr_allocated > 0) {
1102			*new_blocknrs++ = start++;
1103			rest--;
1104			nr_allocated--;
1105		}
1106
1107		/* do we have something to fill prealloc. array also ? */
1108		if (nr_allocated > 0) {
1109			/* it means prealloc_size was greater that 0 and we do preallocation */
 
 
 
1110			list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
1111				 &SB_JOURNAL(hint->th->t_super)->
1112				 j_prealloc_list);
1113			REISERFS_I(hint->inode)->i_prealloc_block = start;
1114			REISERFS_I(hint->inode)->i_prealloc_count =
1115			    nr_allocated;
1116			break;
1117		}
1118	}
1119
1120	return (amount_needed - rest);
1121}
1122
1123static inline int blocknrs_and_prealloc_arrays_from_search_start
1124    (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
1125     int amount_needed) {
1126	struct super_block *s = hint->th->t_super;
1127	b_blocknr_t start = hint->search_start;
1128	b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
1129	int passno = 0;
1130	int nr_allocated = 0;
 
1131
1132	determine_prealloc_size(hint);
1133	if (!hint->formatted_node) {
1134		int quota_ret;
1135#ifdef REISERQUOTA_DEBUG
1136		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1137			       "reiserquota: allocating %d blocks id=%u",
1138			       amount_needed, hint->inode->i_uid);
1139#endif
 
1140		quota_ret =
1141		    dquot_alloc_block_nodirty(hint->inode, amount_needed);
1142		if (quota_ret)	/* Quota exceeded? */
 
1143			return QUOTA_EXCEEDED;
 
1144		if (hint->preallocate && hint->prealloc_size) {
1145#ifdef REISERQUOTA_DEBUG
1146			reiserfs_debug(s, REISERFS_DEBUG_CODE,
1147				       "reiserquota: allocating (prealloc) %d blocks id=%u",
1148				       hint->prealloc_size, hint->inode->i_uid);
1149#endif
1150			quota_ret = dquot_prealloc_block_nodirty(hint->inode,
1151							 hint->prealloc_size);
1152			if (quota_ret)
1153				hint->preallocate = hint->prealloc_size = 0;
1154		}
1155		/* for unformatted nodes, force large allocations */
 
1156	}
1157
1158	do {
1159		switch (passno++) {
1160		case 0:	/* Search from hint->search_start to end of disk */
1161			start = hint->search_start;
1162			finish = SB_BLOCK_COUNT(s) - 1;
1163			break;
1164		case 1:	/* Search from hint->beg to hint->search_start */
1165			start = hint->beg;
1166			finish = hint->search_start;
1167			break;
1168		case 2:	/* Last chance: Search from 0 to hint->beg */
1169			start = 0;
1170			finish = hint->beg;
1171			break;
1172		default:	/* We've tried searching everywhere, not enough space */
 
1173			/* Free the blocks */
1174			if (!hint->formatted_node) {
1175#ifdef REISERQUOTA_DEBUG
1176				reiserfs_debug(s, REISERFS_DEBUG_CODE,
1177					       "reiserquota: freeing (nospace) %d blocks id=%u",
1178					       amount_needed +
1179					       hint->prealloc_size -
1180					       nr_allocated,
1181					       hint->inode->i_uid);
1182#endif
1183				/* Free not allocated blocks */
 
1184				dquot_free_block_nodirty(hint->inode,
1185					amount_needed + hint->prealloc_size -
1186					nr_allocated);
 
1187			}
1188			while (nr_allocated--)
1189				reiserfs_free_block(hint->th, hint->inode,
1190						    new_blocknrs[nr_allocated],
1191						    !hint->formatted_node);
1192
1193			return NO_DISK_SPACE;
1194		}
1195	} while ((nr_allocated += allocate_without_wrapping_disk(hint,
1196								 new_blocknrs +
1197								 nr_allocated,
1198								 start, finish,
1199								 1,
1200								 amount_needed -
1201								 nr_allocated,
1202								 hint->
1203								 prealloc_size))
1204		 < amount_needed);
1205	if (!hint->formatted_node &&
1206	    amount_needed + hint->prealloc_size >
1207	    nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
1208		/* Some of preallocation blocks were not allocated */
1209#ifdef REISERQUOTA_DEBUG
1210		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1211			       "reiserquota: freeing (failed prealloc) %d blocks id=%u",
1212			       amount_needed + hint->prealloc_size -
1213			       nr_allocated -
1214			       REISERFS_I(hint->inode)->i_prealloc_count,
1215			       hint->inode->i_uid);
1216#endif
 
 
1217		dquot_free_block_nodirty(hint->inode, amount_needed +
1218					 hint->prealloc_size - nr_allocated -
1219					 REISERFS_I(hint->inode)->
1220					 i_prealloc_count);
 
1221	}
1222
1223	return CARRY_ON;
1224}
1225
1226/* grab new blocknrs from preallocated list */
1227/* return amount still needed after using them */
1228static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1229					      b_blocknr_t * new_blocknrs,
1230					      int amount_needed)
1231{
1232	struct inode *inode = hint->inode;
1233
1234	if (REISERFS_I(inode)->i_prealloc_count > 0) {
1235		while (amount_needed) {
1236
1237			*new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
1238			REISERFS_I(inode)->i_prealloc_count--;
1239
1240			amount_needed--;
1241
1242			if (REISERFS_I(inode)->i_prealloc_count <= 0) {
1243				list_del(&REISERFS_I(inode)->i_prealloc_list);
1244				break;
1245			}
1246		}
1247	}
1248	/* return amount still needed after using preallocated blocks */
1249	return amount_needed;
1250}
1251
1252int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us	/* Amount of blocks we have
1253																	   already reserved */ )
 
 
 
1254{
1255	int initial_amount_needed = amount_needed;
1256	int ret;
1257	struct super_block *s = hint->th->t_super;
1258
1259	/* Check if there is enough space, taking into account reserved space */
1260	if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
1261	    amount_needed - reserved_by_us)
1262		return NO_DISK_SPACE;
1263	/* should this be if !hint->inode &&  hint->preallocate? */
1264	/* do you mean hint->formatted_node can be removed ? - Zam */
1265	/* hint->formatted_node cannot be removed because we try to access
1266	   inode information here, and there is often no inode assotiated with
1267	   metadata allocations - green */
 
 
1268
1269	if (!hint->formatted_node && hint->preallocate) {
1270		amount_needed = use_preallocated_list_if_available
1271		    (hint, new_blocknrs, amount_needed);
1272		if (amount_needed == 0)	/* all blocknrs we need we got from
1273					   prealloc. list */
 
 
 
 
1274			return CARRY_ON;
1275		new_blocknrs += (initial_amount_needed - amount_needed);
1276	}
1277
1278	/* find search start and save it in hint structure */
1279	determine_search_start(hint, amount_needed);
1280	if (hint->search_start >= SB_BLOCK_COUNT(s))
1281		hint->search_start = SB_BLOCK_COUNT(s) - 1;
1282
1283	/* allocation itself; fill new_blocknrs and preallocation arrays */
1284	ret = blocknrs_and_prealloc_arrays_from_search_start
1285	    (hint, new_blocknrs, amount_needed);
1286
1287	/* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
1288	 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
1289	 * variant) */
1290
 
 
1291	if (ret != CARRY_ON) {
1292		while (amount_needed++ < initial_amount_needed) {
1293			reiserfs_free_block(hint->th, hint->inode,
1294					    *(--new_blocknrs), 1);
1295		}
1296	}
1297	return ret;
1298}
1299
1300void reiserfs_cache_bitmap_metadata(struct super_block *sb,
1301                                    struct buffer_head *bh,
1302                                    struct reiserfs_bitmap_info *info)
1303{
1304	unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
1305
1306	/* The first bit must ALWAYS be 1 */
1307	if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data))
1308		reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is "
1309			       "corrupted: first bit must be 1", bh->b_blocknr);
1310
1311	info->free_count = 0;
1312
1313	while (--cur >= (unsigned long *)bh->b_data) {
1314		/* 0 and ~0 are special, we can optimize for them */
1315		if (*cur == 0)
1316			info->free_count += BITS_PER_LONG;
1317		else if (*cur != ~0L)	/* A mix, investigate */
1318			info->free_count += BITS_PER_LONG - hweight_long(*cur);
1319	}
1320}
1321
1322struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1323                                               unsigned int bitmap)
1324{
1325	b_blocknr_t block = (sb->s_blocksize << 3) * bitmap;
1326	struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap;
1327	struct buffer_head *bh;
1328
1329	/* Way old format filesystems had the bitmaps packed up front.
1330	 * I doubt there are any of these left, but just in case... */
 
 
1331	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
1332	                      &(REISERFS_SB(sb)->s_properties))))
1333		block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap;
1334	else if (bitmap == 0)
1335		block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
1336
1337	reiserfs_write_unlock(sb);
1338	bh = sb_bread(sb, block);
1339	reiserfs_write_lock(sb);
1340	if (bh == NULL)
1341		reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1342		                 "reading failed", __func__, block);
1343	else {
1344		if (buffer_locked(bh)) {
 
1345			PROC_INFO_INC(sb, scan_bitmap.wait);
1346			reiserfs_write_unlock(sb);
1347			__wait_on_buffer(bh);
1348			reiserfs_write_lock(sb);
1349		}
1350		BUG_ON(!buffer_uptodate(bh));
1351		BUG_ON(atomic_read(&bh->b_count) == 0);
1352
1353		if (info->free_count == UINT_MAX)
1354			reiserfs_cache_bitmap_metadata(sb, bh, info);
1355	}
1356
1357	return bh;
1358}
1359
1360int reiserfs_init_bitmap_cache(struct super_block *sb)
1361{
1362	struct reiserfs_bitmap_info *bitmap;
1363	unsigned int bmap_nr = reiserfs_bmap_count(sb);
1364
1365	bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1366	if (bitmap == NULL)
1367		return -ENOMEM;
1368
1369	memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
1370
1371	SB_AP_BITMAP(sb) = bitmap;
1372
1373	return 0;
1374}
1375
1376void reiserfs_free_bitmap_cache(struct super_block *sb)
1377{
1378	if (SB_AP_BITMAP(sb)) {
1379		vfree(SB_AP_BITMAP(sb));
1380		SB_AP_BITMAP(sb) = NULL;
1381	}
1382}