Linux Audio

Check our new training course

Loading...
v5.4
   1/*
   2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
   3 */
   4/* Reiserfs block (de)allocator, bitmap-based. */
   5
   6#include <linux/time.h>
   7#include "reiserfs.h"
   8#include <linux/errno.h>
   9#include <linux/buffer_head.h>
  10#include <linux/kernel.h>
  11#include <linux/pagemap.h>
  12#include <linux/vmalloc.h>
  13#include <linux/quotaops.h>
  14#include <linux/seq_file.h>
  15
  16#define PREALLOCATION_SIZE 9
  17
  18/* different reiserfs block allocator options */
  19
  20#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
  21
  22#define  _ALLOC_concentrating_formatted_nodes 0
  23#define  _ALLOC_displacing_large_files 1
  24#define  _ALLOC_displacing_new_packing_localities 2
  25#define  _ALLOC_old_hashed_relocation 3
  26#define  _ALLOC_new_hashed_relocation 4
  27#define  _ALLOC_skip_busy 5
  28#define  _ALLOC_displace_based_on_dirid 6
  29#define  _ALLOC_hashed_formatted_nodes 7
  30#define  _ALLOC_old_way 8
  31#define  _ALLOC_hundredth_slices 9
  32#define  _ALLOC_dirid_groups 10
  33#define  _ALLOC_oid_groups 11
  34#define  _ALLOC_packing_groups 12
  35
  36#define  concentrating_formatted_nodes(s)	test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
  37#define  displacing_large_files(s)		test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
  38#define  displacing_new_packing_localities(s)	test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
  39
  40#define SET_OPTION(optname) \
  41   do { \
  42	reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \
  43	set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
  44    } while(0)
  45#define TEST_OPTION(optname, s) \
  46    test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
  47
  48static inline void get_bit_address(struct super_block *s,
  49				   b_blocknr_t block,
  50				   unsigned int *bmap_nr,
  51				   unsigned int *offset)
  52{
  53	/*
  54	 * It is in the bitmap block number equal to the block
  55	 * number divided by the number of bits in a block.
  56	 */
  57	*bmap_nr = block >> (s->s_blocksize_bits + 3);
  58	/* Within that bitmap block it is located at bit offset *offset. */
  59	*offset = block & ((s->s_blocksize << 3) - 1);
  60}
  61
  62int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
  63{
  64	unsigned int bmap, offset;
  65	unsigned int bmap_count = reiserfs_bmap_count(s);
  66
  67	if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
  68		reiserfs_error(s, "vs-4010",
  69			       "block number is out of range %lu (%u)",
  70			       block, SB_BLOCK_COUNT(s));
  71		return 0;
  72	}
  73
  74	get_bit_address(s, block, &bmap, &offset);
  75
  76	/*
  77	 * Old format filesystem? Unlikely, but the bitmaps are all
  78	 * up front so we need to account for it.
  79	 */
  80	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
  81			      &REISERFS_SB(s)->s_properties))) {
  82		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
  83		if (block >= bmap1 &&
  84		    block <= bmap1 + bmap_count) {
  85			reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) "
  86				       "can't be freed or reused",
  87				       block, bmap_count);
  88			return 0;
  89		}
  90	} else {
  91		if (offset == 0) {
  92			reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) "
  93				       "can't be freed or reused",
  94				       block, bmap_count);
  95			return 0;
  96		}
  97	}
  98
  99	if (bmap >= bmap_count) {
 100		reiserfs_error(s, "vs-4030", "bitmap for requested block "
 101			       "is out of range: block=%lu, bitmap_nr=%u",
 102			       block, bmap);
 103		return 0;
 104	}
 105
 106	if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
 107		reiserfs_error(s, "vs-4050", "this is root block (%u), "
 108			       "it must be busy", SB_ROOT_BLOCK(s));
 109		return 0;
 110	}
 111
 112	return 1;
 113}
 114
 115/*
 116 * Searches in journal structures for a given block number (bmap, off).
 117 * If block is found in reiserfs journal it suggests next free block
 118 * candidate to test.
 119 */
 120static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
 121				      int off, int *next)
 122{
 123	b_blocknr_t tmp;
 124
 125	if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
 126		if (tmp) {	/* hint supplied */
 127			*next = tmp;
 128			PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
 129		} else {
 130			(*next) = off + 1;  /* inc offset to avoid looping. */
 131			PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
 132		}
 133		PROC_INFO_INC(s, scan_bitmap.retry);
 134		return 1;
 135	}
 136	return 0;
 137}
 138
 139/*
 140 * Searches for a window of zero bits with given minimum and maximum
 141 * lengths in one bitmap block
 142 */
 143static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
 144			     unsigned int bmap_n, int *beg, int boundary,
 145			     int min, int max, int unfm)
 146{
 147	struct super_block *s = th->t_super;
 148	struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
 149	struct buffer_head *bh;
 150	int end, next;
 151	int org = *beg;
 152
 153	BUG_ON(!th->t_trans_id);
 154	RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
 155	       "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
 156	PROC_INFO_INC(s, scan_bitmap.bmap);
 157
 158	if (!bi) {
 159		reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
 160			       "for bitmap %d", bmap_n);
 161		return 0;
 162	}
 163
 164	bh = reiserfs_read_bitmap_block(s, bmap_n);
 165	if (bh == NULL)
 166		return 0;
 167
 168	while (1) {
 169cont:
 170		if (bi->free_count < min) {
 171			brelse(bh);
 172			return 0;	/* No free blocks in this bitmap */
 173		}
 174
 175		/* search for a first zero bit -- beginning of a window */
 176		*beg = reiserfs_find_next_zero_le_bit
 177		    ((unsigned long *)(bh->b_data), boundary, *beg);
 178
 179		/*
 180		 * search for a zero bit fails or the rest of bitmap block
 181		 * cannot contain a zero window of minimum size
 182		 */
 183		if (*beg + min > boundary) {
 184			brelse(bh);
 185			return 0;
 186		}
 187
 188		if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
 189			continue;
 190		/* first zero bit found; we check next bits */
 191		for (end = *beg + 1;; end++) {
 192			if (end >= *beg + max || end >= boundary
 193			    || reiserfs_test_le_bit(end, bh->b_data)) {
 194				next = end;
 195				break;
 196			}
 197
 198			/*
 199			 * finding the other end of zero bit window requires
 200			 * looking into journal structures (in case of
 201			 * searching for free blocks for unformatted nodes)
 202			 */
 203			if (unfm && is_block_in_journal(s, bmap_n, end, &next))
 204				break;
 205		}
 206
 207		/*
 208		 * now (*beg) points to beginning of zero bits window,
 209		 * (end) points to one bit after the window end
 210		 */
 211
 212		/* found window of proper size */
 213		if (end - *beg >= min) {
 214			int i;
 215			reiserfs_prepare_for_journal(s, bh, 1);
 216			/*
 217			 * try to set all blocks used checking are
 218			 * they still free
 219			 */
 220			for (i = *beg; i < end; i++) {
 221				/* Don't check in journal again. */
 222				if (reiserfs_test_and_set_le_bit
 223				    (i, bh->b_data)) {
 224					/*
 225					 * bit was set by another process while
 226					 * we slept in prepare_for_journal()
 227					 */
 228					PROC_INFO_INC(s, scan_bitmap.stolen);
 229
 230					/*
 231					 * we can continue with smaller set
 232					 * of allocated blocks, if length of
 233					 * this set is more or equal to `min'
 234					 */
 235					if (i >= *beg + min) {
 236						end = i;
 237						break;
 238					}
 239
 240					/*
 241					 * otherwise we clear all bit
 242					 * were set ...
 243					 */
 244					while (--i >= *beg)
 245						reiserfs_clear_le_bit
 246						    (i, bh->b_data);
 247					reiserfs_restore_prepared_buffer(s, bh);
 248					*beg = org;
 249
 250					/*
 251					 * Search again in current block
 252					 * from beginning
 253					 */
 254					goto cont;
 255				}
 256			}
 257			bi->free_count -= (end - *beg);
 258			journal_mark_dirty(th, bh);
 259			brelse(bh);
 260
 261			/* free block count calculation */
 262			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
 263						     1);
 264			PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
 265			journal_mark_dirty(th, SB_BUFFER_WITH_SB(s));
 266
 267			return end - (*beg);
 268		} else {
 269			*beg = next;
 270		}
 271	}
 272}
 273
 274static int bmap_hash_id(struct super_block *s, u32 id)
 275{
 276	char *hash_in = NULL;
 277	unsigned long hash;
 278	unsigned bm;
 279
 280	if (id <= 2) {
 281		bm = 1;
 282	} else {
 283		hash_in = (char *)(&id);
 284		hash = keyed_hash(hash_in, 4);
 285		bm = hash % reiserfs_bmap_count(s);
 286		if (!bm)
 287			bm = 1;
 288	}
 289	/* this can only be true when SB_BMAP_NR = 1 */
 290	if (bm >= reiserfs_bmap_count(s))
 291		bm = 0;
 292	return bm;
 293}
 294
 295/*
 296 * hashes the id and then returns > 0 if the block group for the
 297 * corresponding hash is full
 298 */
 299static inline int block_group_used(struct super_block *s, u32 id)
 300{
 301	int bm = bmap_hash_id(s, id);
 302	struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];
 303
 304	/*
 305	 * If we don't have cached information on this bitmap block, we're
 306	 * going to have to load it later anyway. Loading it here allows us
 307	 * to make a better decision. This favors long-term performance gain
 308	 * with a better on-disk layout vs. a short term gain of skipping the
 309	 * read and potentially having a bad placement.
 310	 */
 311	if (info->free_count == UINT_MAX) {
 312		struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
 313		brelse(bh);
 314	}
 315
 316	if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) {
 317		return 0;
 318	}
 319	return 1;
 320}
 321
 322/*
 323 * the packing is returned in disk byte order
 324 */
 325__le32 reiserfs_choose_packing(struct inode * dir)
 326{
 327	__le32 packing;
 328	if (TEST_OPTION(packing_groups, dir->i_sb)) {
 329		u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
 330		/*
 331		 * some versions of reiserfsck expect packing locality 1 to be
 332		 * special
 333		 */
 334		if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
 335			packing = INODE_PKEY(dir)->k_objectid;
 336		else
 337			packing = INODE_PKEY(dir)->k_dir_id;
 338	} else
 339		packing = INODE_PKEY(dir)->k_objectid;
 340	return packing;
 341}
 342
 343/*
 344 * Tries to find contiguous zero bit window (given size) in given region of
 345 * bitmap and place new blocks there. Returns number of allocated blocks.
 346 */
 347static int scan_bitmap(struct reiserfs_transaction_handle *th,
 348		       b_blocknr_t * start, b_blocknr_t finish,
 349		       int min, int max, int unfm, sector_t file_block)
 350{
 351	int nr_allocated = 0;
 352	struct super_block *s = th->t_super;
 353	unsigned int bm, off;
 354	unsigned int end_bm, end_off;
 355	unsigned int off_max = s->s_blocksize << 3;
 356
 357	BUG_ON(!th->t_trans_id);
 358	PROC_INFO_INC(s, scan_bitmap.call);
 359
 360	/* No point in looking for more free blocks */
 361	if (SB_FREE_BLOCKS(s) <= 0)
 362		return 0;
 363
 364	get_bit_address(s, *start, &bm, &off);
 365	get_bit_address(s, finish, &end_bm, &end_off);
 366	if (bm > reiserfs_bmap_count(s))
 367		return 0;
 368	if (end_bm > reiserfs_bmap_count(s))
 369		end_bm = reiserfs_bmap_count(s);
 370
 371	/*
 372	 * When the bitmap is more than 10% free, anyone can allocate.
 373	 * When it's less than 10% free, only files that already use the
 374	 * bitmap are allowed. Once we pass 80% full, this restriction
 375	 * is lifted.
 376	 *
 377	 * We do this so that files that grow later still have space close to
 378	 * their original allocation. This improves locality, and presumably
 379	 * performance as a result.
 380	 *
 381	 * This is only an allocation policy and does not make up for getting a
 382	 * bad hint. Decent hinting must be implemented for this to work well.
 383	 */
 384	if (TEST_OPTION(skip_busy, s)
 385	    && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
 386		for (; bm < end_bm; bm++, off = 0) {
 387			if ((off && (!unfm || (file_block != 0)))
 388			    || SB_AP_BITMAP(s)[bm].free_count >
 389			    (s->s_blocksize << 3) / 10)
 390				nr_allocated =
 391				    scan_bitmap_block(th, bm, &off, off_max,
 392						      min, max, unfm);
 393			if (nr_allocated)
 394				goto ret;
 395		}
 396		/* we know from above that start is a reasonable number */
 397		get_bit_address(s, *start, &bm, &off);
 398	}
 399
 400	for (; bm < end_bm; bm++, off = 0) {
 401		nr_allocated =
 402		    scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
 403		if (nr_allocated)
 404			goto ret;
 405	}
 406
 407	nr_allocated =
 408	    scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
 409
 410ret:
 411	*start = bm * off_max + off;
 412	return nr_allocated;
 413
 414}
 415
 416static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
 417				 struct inode *inode, b_blocknr_t block,
 418				 int for_unformatted)
 419{
 420	struct super_block *s = th->t_super;
 421	struct reiserfs_super_block *rs;
 422	struct buffer_head *sbh, *bmbh;
 423	struct reiserfs_bitmap_info *apbi;
 424	unsigned int nr, offset;
 425
 426	BUG_ON(!th->t_trans_id);
 427	PROC_INFO_INC(s, free_block);
 428	rs = SB_DISK_SUPER_BLOCK(s);
 429	sbh = SB_BUFFER_WITH_SB(s);
 430	apbi = SB_AP_BITMAP(s);
 431
 432	get_bit_address(s, block, &nr, &offset);
 433
 434	if (nr >= reiserfs_bmap_count(s)) {
 435		reiserfs_error(s, "vs-4075", "block %lu is out of range",
 436			       block);
 437		return;
 438	}
 439
 440	bmbh = reiserfs_read_bitmap_block(s, nr);
 441	if (!bmbh)
 442		return;
 443
 444	reiserfs_prepare_for_journal(s, bmbh, 1);
 445
 446	/* clear bit for the given block in bit map */
 447	if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
 448		reiserfs_error(s, "vs-4080",
 449			       "block %lu: bit already cleared", block);
 450	}
 451	apbi[nr].free_count++;
 452	journal_mark_dirty(th, bmbh);
 453	brelse(bmbh);
 454
 455	reiserfs_prepare_for_journal(s, sbh, 1);
 456	/* update super block */
 457	set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
 458
 459	journal_mark_dirty(th, sbh);
 460	if (for_unformatted) {
 461		int depth = reiserfs_write_unlock_nested(s);
 462		dquot_free_block_nodirty(inode, 1);
 463		reiserfs_write_lock_nested(s, depth);
 464	}
 465}
 466
 467void reiserfs_free_block(struct reiserfs_transaction_handle *th,
 468			 struct inode *inode, b_blocknr_t block,
 469			 int for_unformatted)
 470{
 471	struct super_block *s = th->t_super;
 472
 473	BUG_ON(!th->t_trans_id);
 474	RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
 475	if (!is_reusable(s, block, 1))
 476		return;
 477
 478	if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
 479		reiserfs_error(th->t_super, "bitmap-4072",
 480			       "Trying to free block outside file system "
 481			       "boundaries (%lu > %lu)",
 482			       block, sb_block_count(REISERFS_SB(s)->s_rs));
 483		return;
 484	}
 485	/* mark it before we clear it, just in case */
 486	journal_mark_freed(th, s, block);
 487	_reiserfs_free_block(th, inode, block, for_unformatted);
 488}
 489
 490/* preallocated blocks don't need to be run through journal_mark_freed */
 491static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
 492					 struct inode *inode, b_blocknr_t block)
 493{
 494	BUG_ON(!th->t_trans_id);
 495	RFALSE(!th->t_super,
 496	       "vs-4060: trying to free block on nonexistent device");
 497	if (!is_reusable(th->t_super, block, 1))
 498		return;
 499	_reiserfs_free_block(th, inode, block, 1);
 500}
 501
 502static void __discard_prealloc(struct reiserfs_transaction_handle *th,
 503			       struct reiserfs_inode_info *ei)
 504{
 505	unsigned long save = ei->i_prealloc_block;
 506	int dirty = 0;
 507	struct inode *inode = &ei->vfs_inode;
 508
 509	BUG_ON(!th->t_trans_id);
 510#ifdef CONFIG_REISERFS_CHECK
 511	if (ei->i_prealloc_count < 0)
 512		reiserfs_error(th->t_super, "zam-4001",
 513			       "inode has negative prealloc blocks count.");
 514#endif
 515	while (ei->i_prealloc_count > 0) {
 516		b_blocknr_t block_to_free;
 517
 518		/*
 519		 * reiserfs_free_prealloc_block can drop the write lock,
 520		 * which could allow another caller to free the same block.
 521		 * We can protect against it by modifying the prealloc
 522		 * state before calling it.
 523		 */
 524		block_to_free = ei->i_prealloc_block++;
 525		ei->i_prealloc_count--;
 526		reiserfs_free_prealloc_block(th, inode, block_to_free);
 527		dirty = 1;
 528	}
 529	if (dirty)
 530		reiserfs_update_sd(th, inode);
 531	ei->i_prealloc_block = save;
 532	list_del_init(&ei->i_prealloc_list);
 533}
 534
 535/* FIXME: It should be inline function */
 536void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
 537			       struct inode *inode)
 538{
 539	struct reiserfs_inode_info *ei = REISERFS_I(inode);
 540
 541	BUG_ON(!th->t_trans_id);
 542	if (ei->i_prealloc_count)
 543		__discard_prealloc(th, ei);
 544}
 545
 546void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
 547{
 548	struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
 549
 550	BUG_ON(!th->t_trans_id);
 551	while (!list_empty(plist)) {
 552		struct reiserfs_inode_info *ei;
 553		ei = list_entry(plist->next, struct reiserfs_inode_info,
 554				i_prealloc_list);
 555#ifdef CONFIG_REISERFS_CHECK
 556		if (!ei->i_prealloc_count) {
 557			reiserfs_error(th->t_super, "zam-4001",
 558				       "inode is in prealloc list but has "
 559				       "no preallocated blocks.");
 560		}
 561#endif
 562		__discard_prealloc(th, ei);
 563	}
 564}
 565
 566void reiserfs_init_alloc_options(struct super_block *s)
 567{
 568	set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
 569	set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
 570	set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
 571}
 572
 573/* block allocator related options are parsed here */
 574int reiserfs_parse_alloc_options(struct super_block *s, char *options)
 575{
 576	char *this_char, *value;
 577
 578	/* clear default settings */
 579	REISERFS_SB(s)->s_alloc_options.bits = 0;
 580
 581	while ((this_char = strsep(&options, ":")) != NULL) {
 582		if ((value = strchr(this_char, '=')) != NULL)
 583			*value++ = 0;
 584
 585		if (!strcmp(this_char, "concentrating_formatted_nodes")) {
 586			int temp;
 587			SET_OPTION(concentrating_formatted_nodes);
 588			temp = (value
 589				&& *value) ? simple_strtoul(value, &value,
 590							    0) : 10;
 591			if (temp <= 0 || temp > 100) {
 592				REISERFS_SB(s)->s_alloc_options.border = 10;
 593			} else {
 594				REISERFS_SB(s)->s_alloc_options.border =
 595				    100 / temp;
 596			}
 597			continue;
 598		}
 599		if (!strcmp(this_char, "displacing_large_files")) {
 600			SET_OPTION(displacing_large_files);
 601			REISERFS_SB(s)->s_alloc_options.large_file_size =
 602			    (value
 603			     && *value) ? simple_strtoul(value, &value, 0) : 16;
 604			continue;
 605		}
 606		if (!strcmp(this_char, "displacing_new_packing_localities")) {
 607			SET_OPTION(displacing_new_packing_localities);
 608			continue;
 609		}
 610
 611		if (!strcmp(this_char, "old_hashed_relocation")) {
 612			SET_OPTION(old_hashed_relocation);
 613			continue;
 614		}
 615
 616		if (!strcmp(this_char, "new_hashed_relocation")) {
 617			SET_OPTION(new_hashed_relocation);
 618			continue;
 619		}
 620
 621		if (!strcmp(this_char, "dirid_groups")) {
 622			SET_OPTION(dirid_groups);
 623			continue;
 624		}
 625		if (!strcmp(this_char, "oid_groups")) {
 626			SET_OPTION(oid_groups);
 627			continue;
 628		}
 629		if (!strcmp(this_char, "packing_groups")) {
 630			SET_OPTION(packing_groups);
 631			continue;
 632		}
 633		if (!strcmp(this_char, "hashed_formatted_nodes")) {
 634			SET_OPTION(hashed_formatted_nodes);
 635			continue;
 636		}
 637
 638		if (!strcmp(this_char, "skip_busy")) {
 639			SET_OPTION(skip_busy);
 640			continue;
 641		}
 642
 643		if (!strcmp(this_char, "hundredth_slices")) {
 644			SET_OPTION(hundredth_slices);
 645			continue;
 646		}
 647
 648		if (!strcmp(this_char, "old_way")) {
 649			SET_OPTION(old_way);
 650			continue;
 651		}
 652
 653		if (!strcmp(this_char, "displace_based_on_dirid")) {
 654			SET_OPTION(displace_based_on_dirid);
 655			continue;
 656		}
 657
 658		if (!strcmp(this_char, "preallocmin")) {
 659			REISERFS_SB(s)->s_alloc_options.preallocmin =
 660			    (value
 661			     && *value) ? simple_strtoul(value, &value, 0) : 4;
 662			continue;
 663		}
 664
 665		if (!strcmp(this_char, "preallocsize")) {
 666			REISERFS_SB(s)->s_alloc_options.preallocsize =
 667			    (value
 668			     && *value) ? simple_strtoul(value, &value,
 669							 0) :
 670			    PREALLOCATION_SIZE;
 671			continue;
 672		}
 673
 674		reiserfs_warning(s, "zam-4001", "unknown option - %s",
 675				 this_char);
 676		return 1;
 677	}
 678
 679	reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
 680	return 0;
 681}
 682
 683static void print_sep(struct seq_file *seq, int *first)
 684{
 685	if (!*first)
 686		seq_puts(seq, ":");
 687	else
 688		*first = 0;
 689}
 690
 691void show_alloc_options(struct seq_file *seq, struct super_block *s)
 692{
 693	int first = 1;
 694
 695	if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
 696		(1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
 697		return;
 698
 699	seq_puts(seq, ",alloc=");
 700
 701	if (TEST_OPTION(concentrating_formatted_nodes, s)) {
 702		print_sep(seq, &first);
 703		if (REISERFS_SB(s)->s_alloc_options.border != 10) {
 704			seq_printf(seq, "concentrating_formatted_nodes=%d",
 705				100 / REISERFS_SB(s)->s_alloc_options.border);
 706		} else
 707			seq_puts(seq, "concentrating_formatted_nodes");
 708	}
 709	if (TEST_OPTION(displacing_large_files, s)) {
 710		print_sep(seq, &first);
 711		if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
 712			seq_printf(seq, "displacing_large_files=%lu",
 713			    REISERFS_SB(s)->s_alloc_options.large_file_size);
 714		} else
 715			seq_puts(seq, "displacing_large_files");
 716	}
 717	if (TEST_OPTION(displacing_new_packing_localities, s)) {
 718		print_sep(seq, &first);
 719		seq_puts(seq, "displacing_new_packing_localities");
 720	}
 721	if (TEST_OPTION(old_hashed_relocation, s)) {
 722		print_sep(seq, &first);
 723		seq_puts(seq, "old_hashed_relocation");
 724	}
 725	if (TEST_OPTION(new_hashed_relocation, s)) {
 726		print_sep(seq, &first);
 727		seq_puts(seq, "new_hashed_relocation");
 728	}
 729	if (TEST_OPTION(dirid_groups, s)) {
 730		print_sep(seq, &first);
 731		seq_puts(seq, "dirid_groups");
 732	}
 733	if (TEST_OPTION(oid_groups, s)) {
 734		print_sep(seq, &first);
 735		seq_puts(seq, "oid_groups");
 736	}
 737	if (TEST_OPTION(packing_groups, s)) {
 738		print_sep(seq, &first);
 739		seq_puts(seq, "packing_groups");
 740	}
 741	if (TEST_OPTION(hashed_formatted_nodes, s)) {
 742		print_sep(seq, &first);
 743		seq_puts(seq, "hashed_formatted_nodes");
 744	}
 745	if (TEST_OPTION(skip_busy, s)) {
 746		print_sep(seq, &first);
 747		seq_puts(seq, "skip_busy");
 748	}
 749	if (TEST_OPTION(hundredth_slices, s)) {
 750		print_sep(seq, &first);
 751		seq_puts(seq, "hundredth_slices");
 752	}
 753	if (TEST_OPTION(old_way, s)) {
 754		print_sep(seq, &first);
 755		seq_puts(seq, "old_way");
 756	}
 757	if (TEST_OPTION(displace_based_on_dirid, s)) {
 758		print_sep(seq, &first);
 759		seq_puts(seq, "displace_based_on_dirid");
 760	}
 761	if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
 762		print_sep(seq, &first);
 763		seq_printf(seq, "preallocmin=%d",
 764				REISERFS_SB(s)->s_alloc_options.preallocmin);
 765	}
 766	if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
 767		print_sep(seq, &first);
 768		seq_printf(seq, "preallocsize=%d",
 769				REISERFS_SB(s)->s_alloc_options.preallocsize);
 770	}
 771}
 772
 773static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 774{
 775	char *hash_in;
 776
 777	if (hint->formatted_node) {
 778		hash_in = (char *)&hint->key.k_dir_id;
 779	} else {
 780		if (!hint->inode) {
 781			/*hint->search_start = hint->beg;*/
 782			hash_in = (char *)&hint->key.k_dir_id;
 783		} else
 784		    if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 785			hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
 786		else
 787			hash_in =
 788			    (char *)(&INODE_PKEY(hint->inode)->k_objectid);
 789	}
 790
 791	hint->search_start =
 792	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
 793}
 794
 795/*
 796 * Relocation based on dirid, hashing them into a given bitmap block
 797 * files. Formatted nodes are unaffected, a separate policy covers them
 798 */
 799static void dirid_groups(reiserfs_blocknr_hint_t * hint)
 800{
 801	unsigned long hash;
 802	__u32 dirid = 0;
 803	int bm = 0;
 804	struct super_block *sb = hint->th->t_super;
 805
 806	if (hint->inode)
 807		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
 808	else if (hint->formatted_node)
 809		dirid = hint->key.k_dir_id;
 810
 811	if (dirid) {
 812		bm = bmap_hash_id(sb, dirid);
 813		hash = bm * (sb->s_blocksize << 3);
 814		/* give a portion of the block group to metadata */
 815		if (hint->inode)
 816			hash += sb->s_blocksize / 2;
 817		hint->search_start = hash;
 818	}
 819}
 820
 821/*
 822 * Relocation based on oid, hashing them into a given bitmap block
 823 * files. Formatted nodes are unaffected, a separate policy covers them
 824 */
 825static void oid_groups(reiserfs_blocknr_hint_t * hint)
 826{
 827	if (hint->inode) {
 828		unsigned long hash;
 829		__u32 oid;
 830		__u32 dirid;
 831		int bm;
 832
 833		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
 834
 835		/*
 836		 * keep the root dir and it's first set of subdirs close to
 837		 * the start of the disk
 838		 */
 839		if (dirid <= 2)
 840			hash = (hint->inode->i_sb->s_blocksize << 3);
 841		else {
 842			oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
 843			bm = bmap_hash_id(hint->inode->i_sb, oid);
 844			hash = bm * (hint->inode->i_sb->s_blocksize << 3);
 845		}
 846		hint->search_start = hash;
 847	}
 848}
 849
 850/*
 851 * returns 1 if it finds an indirect item and gets valid hint info
 852 * from it, otherwise 0
 853 */
 854static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
 855{
 856	struct treepath *path;
 857	struct buffer_head *bh;
 858	struct item_head *ih;
 859	int pos_in_item;
 860	__le32 *item;
 861	int ret = 0;
 862
 863	/*
 864	 * reiserfs code can call this function w/o pointer to path
 865	 * structure supplied; then we rely on supplied search_start
 866	 */
 867	if (!hint->path)
 868		return 0;
 869
 870	path = hint->path;
 871	bh = get_last_bh(path);
 872	RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
 873	ih = tp_item_head(path);
 874	pos_in_item = path->pos_in_item;
 875	item = tp_item_body(path);
 876
 877	hint->search_start = bh->b_blocknr;
 878
 879	/*
 880	 * for indirect item: go to left and look for the first non-hole entry
 881	 * in the indirect item
 882	 */
 883	if (!hint->formatted_node && is_indirect_le_ih(ih)) {
 884		if (pos_in_item == I_UNFM_NUM(ih))
 885			pos_in_item--;
 886		while (pos_in_item >= 0) {
 887			int t = get_block_num(item, pos_in_item);
 888			if (t) {
 889				hint->search_start = t;
 890				ret = 1;
 891				break;
 892			}
 893			pos_in_item--;
 894		}
 895	}
 896
 897	/* does result value fit into specified region? */
 898	return ret;
 899}
 900
 901/*
 902 * should be, if formatted node, then try to put on first part of the device
 903 * specified as number of percent with mount option device, else try to put
 904 * on last of device.  This is not to say it is good code to do so,
 905 * but the effect should be measured.
 906 */
 907static inline void set_border_in_hint(struct super_block *s,
 908				      reiserfs_blocknr_hint_t * hint)
 909{
 910	b_blocknr_t border =
 911	    SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
 912
 913	if (hint->formatted_node)
 914		hint->end = border - 1;
 915	else
 916		hint->beg = border;
 917}
 918
 919static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
 920{
 921	if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 922		hint->search_start =
 923		    hint->beg +
 924		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
 925			       4) % (hint->end - hint->beg);
 926	else
 927		hint->search_start =
 928		    hint->beg +
 929		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
 930			       4) % (hint->end - hint->beg);
 931}
 932
 933static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
 934{
 935	char *hash_in;
 936
 937	if (!hint->inode)
 938		hash_in = (char *)&hint->key.k_dir_id;
 939	else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 940		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
 941	else
 942		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
 943
 944	hint->search_start =
 945	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
 946}
 947
 948static inline int
 949this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
 950						   hint)
 951{
 952	return hint->block ==
 953	    REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
 954}
 955
 956#ifdef DISPLACE_NEW_PACKING_LOCALITIES
 957static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
 958{
 959	struct in_core_key *key = &hint->key;
 960
 961	hint->th->displace_new_blocks = 0;
 962	hint->search_start =
 963	    hint->beg + keyed_hash((char *)(&key->k_objectid),
 964				   4) % (hint->end - hint->beg);
 965}
 966#endif
 967
 968static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 969{
 970	b_blocknr_t border;
 971	u32 hash_in;
 972
 973	if (hint->formatted_node || hint->inode == NULL) {
 974		return 0;
 975	}
 976
 977	hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
 978	border =
 979	    hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
 980					 4) % (hint->end - hint->beg - 1);
 981	if (border > hint->search_start)
 982		hint->search_start = border;
 983
 984	return 1;
 985}
 986
 987static inline int old_way(reiserfs_blocknr_hint_t * hint)
 988{
 989	b_blocknr_t border;
 990
 991	if (hint->formatted_node || hint->inode == NULL) {
 992		return 0;
 993	}
 994
 995	border =
 996	    hint->beg +
 997	    le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
 998							      hint->beg);
 999	if (border > hint->search_start)
1000		hint->search_start = border;
1001
1002	return 1;
1003}
1004
1005static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
1006{
1007	struct in_core_key *key = &hint->key;
1008	b_blocknr_t slice_start;
1009
1010	slice_start =
1011	    (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
1012	if (slice_start > hint->search_start
1013	    || slice_start + (hint->end / 100) <= hint->search_start) {
1014		hint->search_start = slice_start;
1015	}
1016}
1017
1018static void determine_search_start(reiserfs_blocknr_hint_t * hint,
1019				   int amount_needed)
1020{
1021	struct super_block *s = hint->th->t_super;
1022	int unfm_hint;
1023
1024	hint->beg = 0;
1025	hint->end = SB_BLOCK_COUNT(s) - 1;
1026
1027	/* This is former border algorithm. Now with tunable border offset */
1028	if (concentrating_formatted_nodes(s))
1029		set_border_in_hint(s, hint);
1030
1031#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1032	/*
1033	 * whenever we create a new directory, we displace it.  At first
1034	 * we will hash for location, later we might look for a moderately
1035	 * empty place for it
1036	 */
1037	if (displacing_new_packing_localities(s)
1038	    && hint->th->displace_new_blocks) {
1039		displace_new_packing_locality(hint);
1040
1041		/*
1042		 * we do not continue determine_search_start,
1043		 * if new packing locality is being displaced
1044		 */
1045		return;
1046	}
1047#endif
1048
1049	/*
1050	 * all persons should feel encouraged to add more special cases
1051	 * here and test them
1052	 */
1053
1054	if (displacing_large_files(s) && !hint->formatted_node
1055	    && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
1056		displace_large_file(hint);
1057		return;
1058	}
1059
1060	/*
1061	 * if none of our special cases is relevant, use the left
1062	 * neighbor in the tree order of the new node we are allocating for
1063	 */
1064	if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
1065		hash_formatted_node(hint);
1066		return;
1067	}
1068
1069	unfm_hint = get_left_neighbor(hint);
1070
1071	/*
1072	 * Mimic old block allocator behaviour, that is if VFS allowed for
1073	 * preallocation, new blocks are displaced based on directory ID.
1074	 * Also, if suggested search_start is less than last preallocated
1075	 * block, we start searching from it, assuming that HDD dataflow
1076	 * is faster in forward direction
1077	 */
1078	if (TEST_OPTION(old_way, s)) {
1079		if (!hint->formatted_node) {
1080			if (!reiserfs_hashed_relocation(s))
1081				old_way(hint);
1082			else if (!reiserfs_no_unhashed_relocation(s))
1083				old_hashed_relocation(hint);
1084
1085			if (hint->inode
1086			    && hint->search_start <
1087			    REISERFS_I(hint->inode)->i_prealloc_block)
1088				hint->search_start =
1089				    REISERFS_I(hint->inode)->i_prealloc_block;
1090		}
1091		return;
1092	}
1093
1094	/* This is an approach proposed by Hans */
1095	if (TEST_OPTION(hundredth_slices, s)
1096	    && !(displacing_large_files(s) && !hint->formatted_node)) {
1097		hundredth_slices(hint);
1098		return;
1099	}
1100
1101	/* old_hashed_relocation only works on unformatted */
1102	if (!unfm_hint && !hint->formatted_node &&
1103	    TEST_OPTION(old_hashed_relocation, s)) {
1104		old_hashed_relocation(hint);
1105	}
1106
1107	/* new_hashed_relocation works with both formatted/unformatted nodes */
1108	if ((!unfm_hint || hint->formatted_node) &&
1109	    TEST_OPTION(new_hashed_relocation, s)) {
1110		new_hashed_relocation(hint);
1111	}
1112
1113	/* dirid grouping works only on unformatted nodes */
1114	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1115		dirid_groups(hint);
1116	}
1117#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1118	if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1119		dirid_groups(hint);
1120	}
1121#endif
1122
1123	/* oid grouping works only on unformatted nodes */
1124	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
1125		oid_groups(hint);
1126	}
1127	return;
1128}
1129
1130static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
1131{
1132	/* make minimum size a mount option and benchmark both ways */
1133	/* we preallocate blocks only for regular files, specific size */
1134	/* benchmark preallocating always and see what happens */
1135
1136	hint->prealloc_size = 0;
1137
1138	if (!hint->formatted_node && hint->preallocate) {
1139		if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode)
1140		    && hint->inode->i_size >=
1141		    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1142		    preallocmin * hint->inode->i_sb->s_blocksize)
1143			hint->prealloc_size =
1144			    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1145			    preallocsize - 1;
1146	}
1147	return CARRY_ON;
1148}
1149
1150static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
1151						 b_blocknr_t * new_blocknrs,
1152						 b_blocknr_t start,
1153						 b_blocknr_t finish, int min,
1154						 int amount_needed,
1155						 int prealloc_size)
1156{
1157	int rest = amount_needed;
1158	int nr_allocated;
1159
1160	while (rest > 0 && start <= finish) {
1161		nr_allocated = scan_bitmap(hint->th, &start, finish, min,
1162					   rest + prealloc_size,
1163					   !hint->formatted_node, hint->block);
1164
1165		if (nr_allocated == 0)	/* no new blocks allocated, return */
1166			break;
1167
1168		/* fill free_blocknrs array first */
1169		while (rest > 0 && nr_allocated > 0) {
1170			*new_blocknrs++ = start++;
1171			rest--;
1172			nr_allocated--;
1173		}
1174
1175		/* do we have something to fill prealloc. array also ? */
1176		if (nr_allocated > 0) {
1177			/*
1178			 * it means prealloc_size was greater that 0 and
1179			 * we do preallocation
1180			 */
1181			list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
1182				 &SB_JOURNAL(hint->th->t_super)->
1183				 j_prealloc_list);
1184			REISERFS_I(hint->inode)->i_prealloc_block = start;
1185			REISERFS_I(hint->inode)->i_prealloc_count =
1186			    nr_allocated;
1187			break;
1188		}
1189	}
1190
1191	return (amount_needed - rest);
1192}
1193
1194static inline int blocknrs_and_prealloc_arrays_from_search_start
1195    (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
1196     int amount_needed) {
1197	struct super_block *s = hint->th->t_super;
1198	b_blocknr_t start = hint->search_start;
1199	b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
1200	int passno = 0;
1201	int nr_allocated = 0;
1202	int depth;
1203
1204	determine_prealloc_size(hint);
1205	if (!hint->formatted_node) {
1206		int quota_ret;
1207#ifdef REISERQUOTA_DEBUG
1208		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1209			       "reiserquota: allocating %d blocks id=%u",
1210			       amount_needed, hint->inode->i_uid);
1211#endif
1212		depth = reiserfs_write_unlock_nested(s);
1213		quota_ret =
1214		    dquot_alloc_block_nodirty(hint->inode, amount_needed);
1215		if (quota_ret) {	/* Quota exceeded? */
1216			reiserfs_write_lock_nested(s, depth);
1217			return QUOTA_EXCEEDED;
1218		}
1219		if (hint->preallocate && hint->prealloc_size) {
1220#ifdef REISERQUOTA_DEBUG
1221			reiserfs_debug(s, REISERFS_DEBUG_CODE,
1222				       "reiserquota: allocating (prealloc) %d blocks id=%u",
1223				       hint->prealloc_size, hint->inode->i_uid);
1224#endif
1225			quota_ret = dquot_prealloc_block_nodirty(hint->inode,
1226							 hint->prealloc_size);
1227			if (quota_ret)
1228				hint->preallocate = hint->prealloc_size = 0;
1229		}
1230		/* for unformatted nodes, force large allocations */
1231		reiserfs_write_lock_nested(s, depth);
1232	}
1233
1234	do {
1235		switch (passno++) {
1236		case 0:	/* Search from hint->search_start to end of disk */
1237			start = hint->search_start;
1238			finish = SB_BLOCK_COUNT(s) - 1;
1239			break;
1240		case 1:	/* Search from hint->beg to hint->search_start */
1241			start = hint->beg;
1242			finish = hint->search_start;
1243			break;
1244		case 2:	/* Last chance: Search from 0 to hint->beg */
1245			start = 0;
1246			finish = hint->beg;
1247			break;
1248		default:
1249			/* We've tried searching everywhere, not enough space */
1250			/* Free the blocks */
1251			if (!hint->formatted_node) {
1252#ifdef REISERQUOTA_DEBUG
1253				reiserfs_debug(s, REISERFS_DEBUG_CODE,
1254					       "reiserquota: freeing (nospace) %d blocks id=%u",
1255					       amount_needed +
1256					       hint->prealloc_size -
1257					       nr_allocated,
1258					       hint->inode->i_uid);
1259#endif
1260				/* Free not allocated blocks */
1261				depth = reiserfs_write_unlock_nested(s);
1262				dquot_free_block_nodirty(hint->inode,
1263					amount_needed + hint->prealloc_size -
1264					nr_allocated);
1265				reiserfs_write_lock_nested(s, depth);
1266			}
1267			while (nr_allocated--)
1268				reiserfs_free_block(hint->th, hint->inode,
1269						    new_blocknrs[nr_allocated],
1270						    !hint->formatted_node);
1271
1272			return NO_DISK_SPACE;
1273		}
1274	} while ((nr_allocated += allocate_without_wrapping_disk(hint,
1275								 new_blocknrs +
1276								 nr_allocated,
1277								 start, finish,
1278								 1,
1279								 amount_needed -
1280								 nr_allocated,
1281								 hint->
1282								 prealloc_size))
1283		 < amount_needed);
1284	if (!hint->formatted_node &&
1285	    amount_needed + hint->prealloc_size >
1286	    nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
1287		/* Some of preallocation blocks were not allocated */
1288#ifdef REISERQUOTA_DEBUG
1289		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1290			       "reiserquota: freeing (failed prealloc) %d blocks id=%u",
1291			       amount_needed + hint->prealloc_size -
1292			       nr_allocated -
1293			       REISERFS_I(hint->inode)->i_prealloc_count,
1294			       hint->inode->i_uid);
1295#endif
1296
1297		depth = reiserfs_write_unlock_nested(s);
1298		dquot_free_block_nodirty(hint->inode, amount_needed +
1299					 hint->prealloc_size - nr_allocated -
1300					 REISERFS_I(hint->inode)->
1301					 i_prealloc_count);
1302		reiserfs_write_lock_nested(s, depth);
1303	}
1304
1305	return CARRY_ON;
1306}
1307
1308/* grab new blocknrs from preallocated list */
1309/* return amount still needed after using them */
1310static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1311					      b_blocknr_t * new_blocknrs,
1312					      int amount_needed)
1313{
1314	struct inode *inode = hint->inode;
1315
1316	if (REISERFS_I(inode)->i_prealloc_count > 0) {
1317		while (amount_needed) {
1318
1319			*new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
1320			REISERFS_I(inode)->i_prealloc_count--;
1321
1322			amount_needed--;
1323
1324			if (REISERFS_I(inode)->i_prealloc_count <= 0) {
1325				list_del(&REISERFS_I(inode)->i_prealloc_list);
1326				break;
1327			}
1328		}
1329	}
1330	/* return amount still needed after using preallocated blocks */
1331	return amount_needed;
1332}
1333
1334int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
1335			       b_blocknr_t *new_blocknrs,
1336			       int amount_needed,
1337			       /* Amount of blocks we have already reserved */
1338			       int reserved_by_us)
1339{
1340	int initial_amount_needed = amount_needed;
1341	int ret;
1342	struct super_block *s = hint->th->t_super;
1343
1344	/* Check if there is enough space, taking into account reserved space */
1345	if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
1346	    amount_needed - reserved_by_us)
1347		return NO_DISK_SPACE;
1348	/* should this be if !hint->inode &&  hint->preallocate? */
1349	/* do you mean hint->formatted_node can be removed ? - Zam */
1350	/*
1351	 * hint->formatted_node cannot be removed because we try to access
1352	 * inode information here, and there is often no inode associated with
1353	 * metadata allocations - green
1354	 */
1355
1356	if (!hint->formatted_node && hint->preallocate) {
1357		amount_needed = use_preallocated_list_if_available
1358		    (hint, new_blocknrs, amount_needed);
1359
1360		/*
1361		 * We have all the block numbers we need from the
1362		 * prealloc list
1363		 */
1364		if (amount_needed == 0)
1365			return CARRY_ON;
1366		new_blocknrs += (initial_amount_needed - amount_needed);
1367	}
1368
1369	/* find search start and save it in hint structure */
1370	determine_search_start(hint, amount_needed);
1371	if (hint->search_start >= SB_BLOCK_COUNT(s))
1372		hint->search_start = SB_BLOCK_COUNT(s) - 1;
1373
1374	/* allocation itself; fill new_blocknrs and preallocation arrays */
1375	ret = blocknrs_and_prealloc_arrays_from_search_start
1376	    (hint, new_blocknrs, amount_needed);
1377
1378	/*
1379	 * We used prealloc. list to fill (partially) new_blocknrs array.
1380	 * If final allocation fails we need to return blocks back to
1381	 * prealloc. list or just free them. -- Zam (I chose second
1382	 * variant)
1383	 */
1384	if (ret != CARRY_ON) {
1385		while (amount_needed++ < initial_amount_needed) {
1386			reiserfs_free_block(hint->th, hint->inode,
1387					    *(--new_blocknrs), 1);
1388		}
1389	}
1390	return ret;
1391}
1392
1393void reiserfs_cache_bitmap_metadata(struct super_block *sb,
1394                                    struct buffer_head *bh,
1395                                    struct reiserfs_bitmap_info *info)
1396{
1397	unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
1398
1399	/* The first bit must ALWAYS be 1 */
1400	if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data))
1401		reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is "
1402			       "corrupted: first bit must be 1", bh->b_blocknr);
1403
1404	info->free_count = 0;
1405
1406	while (--cur >= (unsigned long *)bh->b_data) {
1407		/* 0 and ~0 are special, we can optimize for them */
1408		if (*cur == 0)
1409			info->free_count += BITS_PER_LONG;
1410		else if (*cur != ~0L)	/* A mix, investigate */
1411			info->free_count += BITS_PER_LONG - hweight_long(*cur);
1412	}
1413}
1414
1415struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1416                                               unsigned int bitmap)
1417{
1418	b_blocknr_t block = (sb->s_blocksize << 3) * bitmap;
1419	struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap;
1420	struct buffer_head *bh;
1421
1422	/*
1423	 * Way old format filesystems had the bitmaps packed up front.
1424	 * I doubt there are any of these left, but just in case...
1425	 */
1426	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
1427			      &REISERFS_SB(sb)->s_properties)))
1428		block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap;
1429	else if (bitmap == 0)
1430		block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
1431
1432	bh = sb_bread(sb, block);
1433	if (bh == NULL)
1434		reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1435		                 "reading failed", __func__, block);
1436	else {
1437		if (buffer_locked(bh)) {
1438			int depth;
1439			PROC_INFO_INC(sb, scan_bitmap.wait);
1440			depth = reiserfs_write_unlock_nested(sb);
1441			__wait_on_buffer(bh);
1442			reiserfs_write_lock_nested(sb, depth);
1443		}
1444		BUG_ON(!buffer_uptodate(bh));
1445		BUG_ON(atomic_read(&bh->b_count) == 0);
1446
1447		if (info->free_count == UINT_MAX)
1448			reiserfs_cache_bitmap_metadata(sb, bh, info);
1449	}
1450
1451	return bh;
1452}
1453
1454int reiserfs_init_bitmap_cache(struct super_block *sb)
1455{
1456	struct reiserfs_bitmap_info *bitmap;
1457	unsigned int bmap_nr = reiserfs_bmap_count(sb);
1458
1459	bitmap = vmalloc(array_size(bmap_nr, sizeof(*bitmap)));
1460	if (bitmap == NULL)
1461		return -ENOMEM;
1462
1463	memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
1464
1465	SB_AP_BITMAP(sb) = bitmap;
1466
1467	return 0;
1468}
1469
1470void reiserfs_free_bitmap_cache(struct super_block *sb)
1471{
1472	if (SB_AP_BITMAP(sb)) {
1473		vfree(SB_AP_BITMAP(sb));
1474		SB_AP_BITMAP(sb) = NULL;
1475	}
1476}
v4.6
   1/*
   2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
   3 */
   4/* Reiserfs block (de)allocator, bitmap-based. */
   5
   6#include <linux/time.h>
   7#include "reiserfs.h"
   8#include <linux/errno.h>
   9#include <linux/buffer_head.h>
  10#include <linux/kernel.h>
  11#include <linux/pagemap.h>
  12#include <linux/vmalloc.h>
  13#include <linux/quotaops.h>
  14#include <linux/seq_file.h>
  15
  16#define PREALLOCATION_SIZE 9
  17
  18/* different reiserfs block allocator options */
  19
  20#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
  21
  22#define  _ALLOC_concentrating_formatted_nodes 0
  23#define  _ALLOC_displacing_large_files 1
  24#define  _ALLOC_displacing_new_packing_localities 2
  25#define  _ALLOC_old_hashed_relocation 3
  26#define  _ALLOC_new_hashed_relocation 4
  27#define  _ALLOC_skip_busy 5
  28#define  _ALLOC_displace_based_on_dirid 6
  29#define  _ALLOC_hashed_formatted_nodes 7
  30#define  _ALLOC_old_way 8
  31#define  _ALLOC_hundredth_slices 9
  32#define  _ALLOC_dirid_groups 10
  33#define  _ALLOC_oid_groups 11
  34#define  _ALLOC_packing_groups 12
  35
  36#define  concentrating_formatted_nodes(s)	test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
  37#define  displacing_large_files(s)		test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
  38#define  displacing_new_packing_localities(s)	test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
  39
  40#define SET_OPTION(optname) \
  41   do { \
  42	reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \
  43	set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
  44    } while(0)
  45#define TEST_OPTION(optname, s) \
  46    test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
  47
  48static inline void get_bit_address(struct super_block *s,
  49				   b_blocknr_t block,
  50				   unsigned int *bmap_nr,
  51				   unsigned int *offset)
  52{
  53	/*
  54	 * It is in the bitmap block number equal to the block
  55	 * number divided by the number of bits in a block.
  56	 */
  57	*bmap_nr = block >> (s->s_blocksize_bits + 3);
  58	/* Within that bitmap block it is located at bit offset *offset. */
  59	*offset = block & ((s->s_blocksize << 3) - 1);
  60}
  61
  62int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
  63{
  64	unsigned int bmap, offset;
  65	unsigned int bmap_count = reiserfs_bmap_count(s);
  66
  67	if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
  68		reiserfs_error(s, "vs-4010",
  69			       "block number is out of range %lu (%u)",
  70			       block, SB_BLOCK_COUNT(s));
  71		return 0;
  72	}
  73
  74	get_bit_address(s, block, &bmap, &offset);
  75
  76	/*
  77	 * Old format filesystem? Unlikely, but the bitmaps are all
  78	 * up front so we need to account for it.
  79	 */
  80	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
  81			      &REISERFS_SB(s)->s_properties))) {
  82		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
  83		if (block >= bmap1 &&
  84		    block <= bmap1 + bmap_count) {
  85			reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) "
  86				       "can't be freed or reused",
  87				       block, bmap_count);
  88			return 0;
  89		}
  90	} else {
  91		if (offset == 0) {
  92			reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) "
  93				       "can't be freed or reused",
  94				       block, bmap_count);
  95			return 0;
  96		}
  97	}
  98
  99	if (bmap >= bmap_count) {
 100		reiserfs_error(s, "vs-4030", "bitmap for requested block "
 101			       "is out of range: block=%lu, bitmap_nr=%u",
 102			       block, bmap);
 103		return 0;
 104	}
 105
 106	if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
 107		reiserfs_error(s, "vs-4050", "this is root block (%u), "
 108			       "it must be busy", SB_ROOT_BLOCK(s));
 109		return 0;
 110	}
 111
 112	return 1;
 113}
 114
 115/*
 116 * Searches in journal structures for a given block number (bmap, off).
 117 * If block is found in reiserfs journal it suggests next free block
 118 * candidate to test.
 119 */
 120static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
 121				      int off, int *next)
 122{
 123	b_blocknr_t tmp;
 124
 125	if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
 126		if (tmp) {	/* hint supplied */
 127			*next = tmp;
 128			PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
 129		} else {
 130			(*next) = off + 1;  /* inc offset to avoid looping. */
 131			PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
 132		}
 133		PROC_INFO_INC(s, scan_bitmap.retry);
 134		return 1;
 135	}
 136	return 0;
 137}
 138
 139/*
 140 * Searches for a window of zero bits with given minimum and maximum
 141 * lengths in one bitmap block
 142 */
 143static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
 144			     unsigned int bmap_n, int *beg, int boundary,
 145			     int min, int max, int unfm)
 146{
 147	struct super_block *s = th->t_super;
 148	struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
 149	struct buffer_head *bh;
 150	int end, next;
 151	int org = *beg;
 152
 153	BUG_ON(!th->t_trans_id);
 154	RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
 155	       "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
 156	PROC_INFO_INC(s, scan_bitmap.bmap);
 157
 158	if (!bi) {
 159		reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
 160			       "for bitmap %d", bmap_n);
 161		return 0;
 162	}
 163
 164	bh = reiserfs_read_bitmap_block(s, bmap_n);
 165	if (bh == NULL)
 166		return 0;
 167
 168	while (1) {
 169cont:
 170		if (bi->free_count < min) {
 171			brelse(bh);
 172			return 0;	/* No free blocks in this bitmap */
 173		}
 174
 175		/* search for a first zero bit -- beginning of a window */
 176		*beg = reiserfs_find_next_zero_le_bit
 177		    ((unsigned long *)(bh->b_data), boundary, *beg);
 178
 179		/*
 180		 * search for a zero bit fails or the rest of bitmap block
 181		 * cannot contain a zero window of minimum size
 182		 */
 183		if (*beg + min > boundary) {
 184			brelse(bh);
 185			return 0;
 186		}
 187
 188		if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
 189			continue;
 190		/* first zero bit found; we check next bits */
 191		for (end = *beg + 1;; end++) {
 192			if (end >= *beg + max || end >= boundary
 193			    || reiserfs_test_le_bit(end, bh->b_data)) {
 194				next = end;
 195				break;
 196			}
 197
 198			/*
 199			 * finding the other end of zero bit window requires
 200			 * looking into journal structures (in case of
 201			 * searching for free blocks for unformatted nodes)
 202			 */
 203			if (unfm && is_block_in_journal(s, bmap_n, end, &next))
 204				break;
 205		}
 206
 207		/*
 208		 * now (*beg) points to beginning of zero bits window,
 209		 * (end) points to one bit after the window end
 210		 */
 211
 212		/* found window of proper size */
 213		if (end - *beg >= min) {
 214			int i;
 215			reiserfs_prepare_for_journal(s, bh, 1);
 216			/*
 217			 * try to set all blocks used checking are
 218			 * they still free
 219			 */
 220			for (i = *beg; i < end; i++) {
 221				/* Don't check in journal again. */
 222				if (reiserfs_test_and_set_le_bit
 223				    (i, bh->b_data)) {
 224					/*
 225					 * bit was set by another process while
 226					 * we slept in prepare_for_journal()
 227					 */
 228					PROC_INFO_INC(s, scan_bitmap.stolen);
 229
 230					/*
 231					 * we can continue with smaller set
 232					 * of allocated blocks, if length of
 233					 * this set is more or equal to `min'
 234					 */
 235					if (i >= *beg + min) {
 236						end = i;
 237						break;
 238					}
 239
 240					/*
 241					 * otherwise we clear all bit
 242					 * were set ...
 243					 */
 244					while (--i >= *beg)
 245						reiserfs_clear_le_bit
 246						    (i, bh->b_data);
 247					reiserfs_restore_prepared_buffer(s, bh);
 248					*beg = org;
 249
 250					/*
 251					 * Search again in current block
 252					 * from beginning
 253					 */
 254					goto cont;
 255				}
 256			}
 257			bi->free_count -= (end - *beg);
 258			journal_mark_dirty(th, bh);
 259			brelse(bh);
 260
 261			/* free block count calculation */
 262			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
 263						     1);
 264			PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
 265			journal_mark_dirty(th, SB_BUFFER_WITH_SB(s));
 266
 267			return end - (*beg);
 268		} else {
 269			*beg = next;
 270		}
 271	}
 272}
 273
 274static int bmap_hash_id(struct super_block *s, u32 id)
 275{
 276	char *hash_in = NULL;
 277	unsigned long hash;
 278	unsigned bm;
 279
 280	if (id <= 2) {
 281		bm = 1;
 282	} else {
 283		hash_in = (char *)(&id);
 284		hash = keyed_hash(hash_in, 4);
 285		bm = hash % reiserfs_bmap_count(s);
 286		if (!bm)
 287			bm = 1;
 288	}
 289	/* this can only be true when SB_BMAP_NR = 1 */
 290	if (bm >= reiserfs_bmap_count(s))
 291		bm = 0;
 292	return bm;
 293}
 294
 295/*
 296 * hashes the id and then returns > 0 if the block group for the
 297 * corresponding hash is full
 298 */
 299static inline int block_group_used(struct super_block *s, u32 id)
 300{
 301	int bm = bmap_hash_id(s, id);
 302	struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];
 303
 304	/*
 305	 * If we don't have cached information on this bitmap block, we're
 306	 * going to have to load it later anyway. Loading it here allows us
 307	 * to make a better decision. This favors long-term performance gain
 308	 * with a better on-disk layout vs. a short term gain of skipping the
 309	 * read and potentially having a bad placement.
 310	 */
 311	if (info->free_count == UINT_MAX) {
 312		struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
 313		brelse(bh);
 314	}
 315
 316	if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) {
 317		return 0;
 318	}
 319	return 1;
 320}
 321
 322/*
 323 * the packing is returned in disk byte order
 324 */
 325__le32 reiserfs_choose_packing(struct inode * dir)
 326{
 327	__le32 packing;
 328	if (TEST_OPTION(packing_groups, dir->i_sb)) {
 329		u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
 330		/*
 331		 * some versions of reiserfsck expect packing locality 1 to be
 332		 * special
 333		 */
 334		if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
 335			packing = INODE_PKEY(dir)->k_objectid;
 336		else
 337			packing = INODE_PKEY(dir)->k_dir_id;
 338	} else
 339		packing = INODE_PKEY(dir)->k_objectid;
 340	return packing;
 341}
 342
 343/*
 344 * Tries to find contiguous zero bit window (given size) in given region of
 345 * bitmap and place new blocks there. Returns number of allocated blocks.
 346 */
 347static int scan_bitmap(struct reiserfs_transaction_handle *th,
 348		       b_blocknr_t * start, b_blocknr_t finish,
 349		       int min, int max, int unfm, sector_t file_block)
 350{
 351	int nr_allocated = 0;
 352	struct super_block *s = th->t_super;
 353	unsigned int bm, off;
 354	unsigned int end_bm, end_off;
 355	unsigned int off_max = s->s_blocksize << 3;
 356
 357	BUG_ON(!th->t_trans_id);
 358	PROC_INFO_INC(s, scan_bitmap.call);
 359
 360	/* No point in looking for more free blocks */
 361	if (SB_FREE_BLOCKS(s) <= 0)
 362		return 0;
 363
 364	get_bit_address(s, *start, &bm, &off);
 365	get_bit_address(s, finish, &end_bm, &end_off);
 366	if (bm > reiserfs_bmap_count(s))
 367		return 0;
 368	if (end_bm > reiserfs_bmap_count(s))
 369		end_bm = reiserfs_bmap_count(s);
 370
 371	/*
 372	 * When the bitmap is more than 10% free, anyone can allocate.
 373	 * When it's less than 10% free, only files that already use the
 374	 * bitmap are allowed. Once we pass 80% full, this restriction
 375	 * is lifted.
 376	 *
 377	 * We do this so that files that grow later still have space close to
 378	 * their original allocation. This improves locality, and presumably
 379	 * performance as a result.
 380	 *
 381	 * This is only an allocation policy and does not make up for getting a
 382	 * bad hint. Decent hinting must be implemented for this to work well.
 383	 */
 384	if (TEST_OPTION(skip_busy, s)
 385	    && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
 386		for (; bm < end_bm; bm++, off = 0) {
 387			if ((off && (!unfm || (file_block != 0)))
 388			    || SB_AP_BITMAP(s)[bm].free_count >
 389			    (s->s_blocksize << 3) / 10)
 390				nr_allocated =
 391				    scan_bitmap_block(th, bm, &off, off_max,
 392						      min, max, unfm);
 393			if (nr_allocated)
 394				goto ret;
 395		}
 396		/* we know from above that start is a reasonable number */
 397		get_bit_address(s, *start, &bm, &off);
 398	}
 399
 400	for (; bm < end_bm; bm++, off = 0) {
 401		nr_allocated =
 402		    scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
 403		if (nr_allocated)
 404			goto ret;
 405	}
 406
 407	nr_allocated =
 408	    scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
 409
 410ret:
 411	*start = bm * off_max + off;
 412	return nr_allocated;
 413
 414}
 415
 416static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
 417				 struct inode *inode, b_blocknr_t block,
 418				 int for_unformatted)
 419{
 420	struct super_block *s = th->t_super;
 421	struct reiserfs_super_block *rs;
 422	struct buffer_head *sbh, *bmbh;
 423	struct reiserfs_bitmap_info *apbi;
 424	unsigned int nr, offset;
 425
 426	BUG_ON(!th->t_trans_id);
 427	PROC_INFO_INC(s, free_block);
 428	rs = SB_DISK_SUPER_BLOCK(s);
 429	sbh = SB_BUFFER_WITH_SB(s);
 430	apbi = SB_AP_BITMAP(s);
 431
 432	get_bit_address(s, block, &nr, &offset);
 433
 434	if (nr >= reiserfs_bmap_count(s)) {
 435		reiserfs_error(s, "vs-4075", "block %lu is out of range",
 436			       block);
 437		return;
 438	}
 439
 440	bmbh = reiserfs_read_bitmap_block(s, nr);
 441	if (!bmbh)
 442		return;
 443
 444	reiserfs_prepare_for_journal(s, bmbh, 1);
 445
 446	/* clear bit for the given block in bit map */
 447	if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
 448		reiserfs_error(s, "vs-4080",
 449			       "block %lu: bit already cleared", block);
 450	}
 451	apbi[nr].free_count++;
 452	journal_mark_dirty(th, bmbh);
 453	brelse(bmbh);
 454
 455	reiserfs_prepare_for_journal(s, sbh, 1);
 456	/* update super block */
 457	set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
 458
 459	journal_mark_dirty(th, sbh);
 460	if (for_unformatted) {
 461		int depth = reiserfs_write_unlock_nested(s);
 462		dquot_free_block_nodirty(inode, 1);
 463		reiserfs_write_lock_nested(s, depth);
 464	}
 465}
 466
 467void reiserfs_free_block(struct reiserfs_transaction_handle *th,
 468			 struct inode *inode, b_blocknr_t block,
 469			 int for_unformatted)
 470{
 471	struct super_block *s = th->t_super;
 472
 473	BUG_ON(!th->t_trans_id);
 474	RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
 475	if (!is_reusable(s, block, 1))
 476		return;
 477
 478	if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
 479		reiserfs_error(th->t_super, "bitmap-4072",
 480			       "Trying to free block outside file system "
 481			       "boundaries (%lu > %lu)",
 482			       block, sb_block_count(REISERFS_SB(s)->s_rs));
 483		return;
 484	}
 485	/* mark it before we clear it, just in case */
 486	journal_mark_freed(th, s, block);
 487	_reiserfs_free_block(th, inode, block, for_unformatted);
 488}
 489
 490/* preallocated blocks don't need to be run through journal_mark_freed */
 491static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
 492					 struct inode *inode, b_blocknr_t block)
 493{
 494	BUG_ON(!th->t_trans_id);
 495	RFALSE(!th->t_super,
 496	       "vs-4060: trying to free block on nonexistent device");
 497	if (!is_reusable(th->t_super, block, 1))
 498		return;
 499	_reiserfs_free_block(th, inode, block, 1);
 500}
 501
 502static void __discard_prealloc(struct reiserfs_transaction_handle *th,
 503			       struct reiserfs_inode_info *ei)
 504{
 505	unsigned long save = ei->i_prealloc_block;
 506	int dirty = 0;
 507	struct inode *inode = &ei->vfs_inode;
 508
 509	BUG_ON(!th->t_trans_id);
 510#ifdef CONFIG_REISERFS_CHECK
 511	if (ei->i_prealloc_count < 0)
 512		reiserfs_error(th->t_super, "zam-4001",
 513			       "inode has negative prealloc blocks count.");
 514#endif
 515	while (ei->i_prealloc_count > 0) {
 516		reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
 517		ei->i_prealloc_block++;
 
 
 
 
 
 
 
 518		ei->i_prealloc_count--;
 
 519		dirty = 1;
 520	}
 521	if (dirty)
 522		reiserfs_update_sd(th, inode);
 523	ei->i_prealloc_block = save;
 524	list_del_init(&ei->i_prealloc_list);
 525}
 526
 527/* FIXME: It should be inline function */
 528void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
 529			       struct inode *inode)
 530{
 531	struct reiserfs_inode_info *ei = REISERFS_I(inode);
 532
 533	BUG_ON(!th->t_trans_id);
 534	if (ei->i_prealloc_count)
 535		__discard_prealloc(th, ei);
 536}
 537
 538void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
 539{
 540	struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
 541
 542	BUG_ON(!th->t_trans_id);
 543	while (!list_empty(plist)) {
 544		struct reiserfs_inode_info *ei;
 545		ei = list_entry(plist->next, struct reiserfs_inode_info,
 546				i_prealloc_list);
 547#ifdef CONFIG_REISERFS_CHECK
 548		if (!ei->i_prealloc_count) {
 549			reiserfs_error(th->t_super, "zam-4001",
 550				       "inode is in prealloc list but has "
 551				       "no preallocated blocks.");
 552		}
 553#endif
 554		__discard_prealloc(th, ei);
 555	}
 556}
 557
 558void reiserfs_init_alloc_options(struct super_block *s)
 559{
 560	set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
 561	set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
 562	set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
 563}
 564
 565/* block allocator related options are parsed here */
 566int reiserfs_parse_alloc_options(struct super_block *s, char *options)
 567{
 568	char *this_char, *value;
 569
 570	/* clear default settings */
 571	REISERFS_SB(s)->s_alloc_options.bits = 0;
 572
 573	while ((this_char = strsep(&options, ":")) != NULL) {
 574		if ((value = strchr(this_char, '=')) != NULL)
 575			*value++ = 0;
 576
 577		if (!strcmp(this_char, "concentrating_formatted_nodes")) {
 578			int temp;
 579			SET_OPTION(concentrating_formatted_nodes);
 580			temp = (value
 581				&& *value) ? simple_strtoul(value, &value,
 582							    0) : 10;
 583			if (temp <= 0 || temp > 100) {
 584				REISERFS_SB(s)->s_alloc_options.border = 10;
 585			} else {
 586				REISERFS_SB(s)->s_alloc_options.border =
 587				    100 / temp;
 588			}
 589			continue;
 590		}
 591		if (!strcmp(this_char, "displacing_large_files")) {
 592			SET_OPTION(displacing_large_files);
 593			REISERFS_SB(s)->s_alloc_options.large_file_size =
 594			    (value
 595			     && *value) ? simple_strtoul(value, &value, 0) : 16;
 596			continue;
 597		}
 598		if (!strcmp(this_char, "displacing_new_packing_localities")) {
 599			SET_OPTION(displacing_new_packing_localities);
 600			continue;
 601		}
 602
 603		if (!strcmp(this_char, "old_hashed_relocation")) {
 604			SET_OPTION(old_hashed_relocation);
 605			continue;
 606		}
 607
 608		if (!strcmp(this_char, "new_hashed_relocation")) {
 609			SET_OPTION(new_hashed_relocation);
 610			continue;
 611		}
 612
 613		if (!strcmp(this_char, "dirid_groups")) {
 614			SET_OPTION(dirid_groups);
 615			continue;
 616		}
 617		if (!strcmp(this_char, "oid_groups")) {
 618			SET_OPTION(oid_groups);
 619			continue;
 620		}
 621		if (!strcmp(this_char, "packing_groups")) {
 622			SET_OPTION(packing_groups);
 623			continue;
 624		}
 625		if (!strcmp(this_char, "hashed_formatted_nodes")) {
 626			SET_OPTION(hashed_formatted_nodes);
 627			continue;
 628		}
 629
 630		if (!strcmp(this_char, "skip_busy")) {
 631			SET_OPTION(skip_busy);
 632			continue;
 633		}
 634
 635		if (!strcmp(this_char, "hundredth_slices")) {
 636			SET_OPTION(hundredth_slices);
 637			continue;
 638		}
 639
 640		if (!strcmp(this_char, "old_way")) {
 641			SET_OPTION(old_way);
 642			continue;
 643		}
 644
 645		if (!strcmp(this_char, "displace_based_on_dirid")) {
 646			SET_OPTION(displace_based_on_dirid);
 647			continue;
 648		}
 649
 650		if (!strcmp(this_char, "preallocmin")) {
 651			REISERFS_SB(s)->s_alloc_options.preallocmin =
 652			    (value
 653			     && *value) ? simple_strtoul(value, &value, 0) : 4;
 654			continue;
 655		}
 656
 657		if (!strcmp(this_char, "preallocsize")) {
 658			REISERFS_SB(s)->s_alloc_options.preallocsize =
 659			    (value
 660			     && *value) ? simple_strtoul(value, &value,
 661							 0) :
 662			    PREALLOCATION_SIZE;
 663			continue;
 664		}
 665
 666		reiserfs_warning(s, "zam-4001", "unknown option - %s",
 667				 this_char);
 668		return 1;
 669	}
 670
 671	reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
 672	return 0;
 673}
 674
 675static void print_sep(struct seq_file *seq, int *first)
 676{
 677	if (!*first)
 678		seq_puts(seq, ":");
 679	else
 680		*first = 0;
 681}
 682
 683void show_alloc_options(struct seq_file *seq, struct super_block *s)
 684{
 685	int first = 1;
 686
 687	if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
 688		(1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
 689		return;
 690
 691	seq_puts(seq, ",alloc=");
 692
 693	if (TEST_OPTION(concentrating_formatted_nodes, s)) {
 694		print_sep(seq, &first);
 695		if (REISERFS_SB(s)->s_alloc_options.border != 10) {
 696			seq_printf(seq, "concentrating_formatted_nodes=%d",
 697				100 / REISERFS_SB(s)->s_alloc_options.border);
 698		} else
 699			seq_puts(seq, "concentrating_formatted_nodes");
 700	}
 701	if (TEST_OPTION(displacing_large_files, s)) {
 702		print_sep(seq, &first);
 703		if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
 704			seq_printf(seq, "displacing_large_files=%lu",
 705			    REISERFS_SB(s)->s_alloc_options.large_file_size);
 706		} else
 707			seq_puts(seq, "displacing_large_files");
 708	}
 709	if (TEST_OPTION(displacing_new_packing_localities, s)) {
 710		print_sep(seq, &first);
 711		seq_puts(seq, "displacing_new_packing_localities");
 712	}
 713	if (TEST_OPTION(old_hashed_relocation, s)) {
 714		print_sep(seq, &first);
 715		seq_puts(seq, "old_hashed_relocation");
 716	}
 717	if (TEST_OPTION(new_hashed_relocation, s)) {
 718		print_sep(seq, &first);
 719		seq_puts(seq, "new_hashed_relocation");
 720	}
 721	if (TEST_OPTION(dirid_groups, s)) {
 722		print_sep(seq, &first);
 723		seq_puts(seq, "dirid_groups");
 724	}
 725	if (TEST_OPTION(oid_groups, s)) {
 726		print_sep(seq, &first);
 727		seq_puts(seq, "oid_groups");
 728	}
 729	if (TEST_OPTION(packing_groups, s)) {
 730		print_sep(seq, &first);
 731		seq_puts(seq, "packing_groups");
 732	}
 733	if (TEST_OPTION(hashed_formatted_nodes, s)) {
 734		print_sep(seq, &first);
 735		seq_puts(seq, "hashed_formatted_nodes");
 736	}
 737	if (TEST_OPTION(skip_busy, s)) {
 738		print_sep(seq, &first);
 739		seq_puts(seq, "skip_busy");
 740	}
 741	if (TEST_OPTION(hundredth_slices, s)) {
 742		print_sep(seq, &first);
 743		seq_puts(seq, "hundredth_slices");
 744	}
 745	if (TEST_OPTION(old_way, s)) {
 746		print_sep(seq, &first);
 747		seq_puts(seq, "old_way");
 748	}
 749	if (TEST_OPTION(displace_based_on_dirid, s)) {
 750		print_sep(seq, &first);
 751		seq_puts(seq, "displace_based_on_dirid");
 752	}
 753	if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
 754		print_sep(seq, &first);
 755		seq_printf(seq, "preallocmin=%d",
 756				REISERFS_SB(s)->s_alloc_options.preallocmin);
 757	}
 758	if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
 759		print_sep(seq, &first);
 760		seq_printf(seq, "preallocsize=%d",
 761				REISERFS_SB(s)->s_alloc_options.preallocsize);
 762	}
 763}
 764
 765static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 766{
 767	char *hash_in;
 768
 769	if (hint->formatted_node) {
 770		hash_in = (char *)&hint->key.k_dir_id;
 771	} else {
 772		if (!hint->inode) {
 773			/*hint->search_start = hint->beg;*/
 774			hash_in = (char *)&hint->key.k_dir_id;
 775		} else
 776		    if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 777			hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
 778		else
 779			hash_in =
 780			    (char *)(&INODE_PKEY(hint->inode)->k_objectid);
 781	}
 782
 783	hint->search_start =
 784	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
 785}
 786
 787/*
 788 * Relocation based on dirid, hashing them into a given bitmap block
 789 * files. Formatted nodes are unaffected, a separate policy covers them
 790 */
 791static void dirid_groups(reiserfs_blocknr_hint_t * hint)
 792{
 793	unsigned long hash;
 794	__u32 dirid = 0;
 795	int bm = 0;
 796	struct super_block *sb = hint->th->t_super;
 797
 798	if (hint->inode)
 799		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
 800	else if (hint->formatted_node)
 801		dirid = hint->key.k_dir_id;
 802
 803	if (dirid) {
 804		bm = bmap_hash_id(sb, dirid);
 805		hash = bm * (sb->s_blocksize << 3);
 806		/* give a portion of the block group to metadata */
 807		if (hint->inode)
 808			hash += sb->s_blocksize / 2;
 809		hint->search_start = hash;
 810	}
 811}
 812
 813/*
 814 * Relocation based on oid, hashing them into a given bitmap block
 815 * files. Formatted nodes are unaffected, a separate policy covers them
 816 */
 817static void oid_groups(reiserfs_blocknr_hint_t * hint)
 818{
 819	if (hint->inode) {
 820		unsigned long hash;
 821		__u32 oid;
 822		__u32 dirid;
 823		int bm;
 824
 825		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
 826
 827		/*
 828		 * keep the root dir and it's first set of subdirs close to
 829		 * the start of the disk
 830		 */
 831		if (dirid <= 2)
 832			hash = (hint->inode->i_sb->s_blocksize << 3);
 833		else {
 834			oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
 835			bm = bmap_hash_id(hint->inode->i_sb, oid);
 836			hash = bm * (hint->inode->i_sb->s_blocksize << 3);
 837		}
 838		hint->search_start = hash;
 839	}
 840}
 841
 842/*
 843 * returns 1 if it finds an indirect item and gets valid hint info
 844 * from it, otherwise 0
 845 */
 846static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
 847{
 848	struct treepath *path;
 849	struct buffer_head *bh;
 850	struct item_head *ih;
 851	int pos_in_item;
 852	__le32 *item;
 853	int ret = 0;
 854
 855	/*
 856	 * reiserfs code can call this function w/o pointer to path
 857	 * structure supplied; then we rely on supplied search_start
 858	 */
 859	if (!hint->path)
 860		return 0;
 861
 862	path = hint->path;
 863	bh = get_last_bh(path);
 864	RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
 865	ih = tp_item_head(path);
 866	pos_in_item = path->pos_in_item;
 867	item = tp_item_body(path);
 868
 869	hint->search_start = bh->b_blocknr;
 870
 871	/*
 872	 * for indirect item: go to left and look for the first non-hole entry
 873	 * in the indirect item
 874	 */
 875	if (!hint->formatted_node && is_indirect_le_ih(ih)) {
 876		if (pos_in_item == I_UNFM_NUM(ih))
 877			pos_in_item--;
 878		while (pos_in_item >= 0) {
 879			int t = get_block_num(item, pos_in_item);
 880			if (t) {
 881				hint->search_start = t;
 882				ret = 1;
 883				break;
 884			}
 885			pos_in_item--;
 886		}
 887	}
 888
 889	/* does result value fit into specified region? */
 890	return ret;
 891}
 892
 893/*
 894 * should be, if formatted node, then try to put on first part of the device
 895 * specified as number of percent with mount option device, else try to put
 896 * on last of device.  This is not to say it is good code to do so,
 897 * but the effect should be measured.
 898 */
 899static inline void set_border_in_hint(struct super_block *s,
 900				      reiserfs_blocknr_hint_t * hint)
 901{
 902	b_blocknr_t border =
 903	    SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
 904
 905	if (hint->formatted_node)
 906		hint->end = border - 1;
 907	else
 908		hint->beg = border;
 909}
 910
 911static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
 912{
 913	if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 914		hint->search_start =
 915		    hint->beg +
 916		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
 917			       4) % (hint->end - hint->beg);
 918	else
 919		hint->search_start =
 920		    hint->beg +
 921		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
 922			       4) % (hint->end - hint->beg);
 923}
 924
 925static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
 926{
 927	char *hash_in;
 928
 929	if (!hint->inode)
 930		hash_in = (char *)&hint->key.k_dir_id;
 931	else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 932		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
 933	else
 934		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
 935
 936	hint->search_start =
 937	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
 938}
 939
 940static inline int
 941this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
 942						   hint)
 943{
 944	return hint->block ==
 945	    REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
 946}
 947
 948#ifdef DISPLACE_NEW_PACKING_LOCALITIES
 949static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
 950{
 951	struct in_core_key *key = &hint->key;
 952
 953	hint->th->displace_new_blocks = 0;
 954	hint->search_start =
 955	    hint->beg + keyed_hash((char *)(&key->k_objectid),
 956				   4) % (hint->end - hint->beg);
 957}
 958#endif
 959
 960static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 961{
 962	b_blocknr_t border;
 963	u32 hash_in;
 964
 965	if (hint->formatted_node || hint->inode == NULL) {
 966		return 0;
 967	}
 968
 969	hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
 970	border =
 971	    hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
 972					 4) % (hint->end - hint->beg - 1);
 973	if (border > hint->search_start)
 974		hint->search_start = border;
 975
 976	return 1;
 977}
 978
 979static inline int old_way(reiserfs_blocknr_hint_t * hint)
 980{
 981	b_blocknr_t border;
 982
 983	if (hint->formatted_node || hint->inode == NULL) {
 984		return 0;
 985	}
 986
 987	border =
 988	    hint->beg +
 989	    le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
 990							      hint->beg);
 991	if (border > hint->search_start)
 992		hint->search_start = border;
 993
 994	return 1;
 995}
 996
 997static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
 998{
 999	struct in_core_key *key = &hint->key;
1000	b_blocknr_t slice_start;
1001
1002	slice_start =
1003	    (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
1004	if (slice_start > hint->search_start
1005	    || slice_start + (hint->end / 100) <= hint->search_start) {
1006		hint->search_start = slice_start;
1007	}
1008}
1009
1010static void determine_search_start(reiserfs_blocknr_hint_t * hint,
1011				   int amount_needed)
1012{
1013	struct super_block *s = hint->th->t_super;
1014	int unfm_hint;
1015
1016	hint->beg = 0;
1017	hint->end = SB_BLOCK_COUNT(s) - 1;
1018
1019	/* This is former border algorithm. Now with tunable border offset */
1020	if (concentrating_formatted_nodes(s))
1021		set_border_in_hint(s, hint);
1022
1023#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1024	/*
1025	 * whenever we create a new directory, we displace it.  At first
1026	 * we will hash for location, later we might look for a moderately
1027	 * empty place for it
1028	 */
1029	if (displacing_new_packing_localities(s)
1030	    && hint->th->displace_new_blocks) {
1031		displace_new_packing_locality(hint);
1032
1033		/*
1034		 * we do not continue determine_search_start,
1035		 * if new packing locality is being displaced
1036		 */
1037		return;
1038	}
1039#endif
1040
1041	/*
1042	 * all persons should feel encouraged to add more special cases
1043	 * here and test them
1044	 */
1045
1046	if (displacing_large_files(s) && !hint->formatted_node
1047	    && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
1048		displace_large_file(hint);
1049		return;
1050	}
1051
1052	/*
1053	 * if none of our special cases is relevant, use the left
1054	 * neighbor in the tree order of the new node we are allocating for
1055	 */
1056	if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
1057		hash_formatted_node(hint);
1058		return;
1059	}
1060
1061	unfm_hint = get_left_neighbor(hint);
1062
1063	/*
1064	 * Mimic old block allocator behaviour, that is if VFS allowed for
1065	 * preallocation, new blocks are displaced based on directory ID.
1066	 * Also, if suggested search_start is less than last preallocated
1067	 * block, we start searching from it, assuming that HDD dataflow
1068	 * is faster in forward direction
1069	 */
1070	if (TEST_OPTION(old_way, s)) {
1071		if (!hint->formatted_node) {
1072			if (!reiserfs_hashed_relocation(s))
1073				old_way(hint);
1074			else if (!reiserfs_no_unhashed_relocation(s))
1075				old_hashed_relocation(hint);
1076
1077			if (hint->inode
1078			    && hint->search_start <
1079			    REISERFS_I(hint->inode)->i_prealloc_block)
1080				hint->search_start =
1081				    REISERFS_I(hint->inode)->i_prealloc_block;
1082		}
1083		return;
1084	}
1085
1086	/* This is an approach proposed by Hans */
1087	if (TEST_OPTION(hundredth_slices, s)
1088	    && !(displacing_large_files(s) && !hint->formatted_node)) {
1089		hundredth_slices(hint);
1090		return;
1091	}
1092
1093	/* old_hashed_relocation only works on unformatted */
1094	if (!unfm_hint && !hint->formatted_node &&
1095	    TEST_OPTION(old_hashed_relocation, s)) {
1096		old_hashed_relocation(hint);
1097	}
1098
1099	/* new_hashed_relocation works with both formatted/unformatted nodes */
1100	if ((!unfm_hint || hint->formatted_node) &&
1101	    TEST_OPTION(new_hashed_relocation, s)) {
1102		new_hashed_relocation(hint);
1103	}
1104
1105	/* dirid grouping works only on unformatted nodes */
1106	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1107		dirid_groups(hint);
1108	}
1109#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1110	if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1111		dirid_groups(hint);
1112	}
1113#endif
1114
1115	/* oid grouping works only on unformatted nodes */
1116	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
1117		oid_groups(hint);
1118	}
1119	return;
1120}
1121
1122static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
1123{
1124	/* make minimum size a mount option and benchmark both ways */
1125	/* we preallocate blocks only for regular files, specific size */
1126	/* benchmark preallocating always and see what happens */
1127
1128	hint->prealloc_size = 0;
1129
1130	if (!hint->formatted_node && hint->preallocate) {
1131		if (S_ISREG(hint->inode->i_mode)
1132		    && hint->inode->i_size >=
1133		    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1134		    preallocmin * hint->inode->i_sb->s_blocksize)
1135			hint->prealloc_size =
1136			    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1137			    preallocsize - 1;
1138	}
1139	return CARRY_ON;
1140}
1141
1142static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
1143						 b_blocknr_t * new_blocknrs,
1144						 b_blocknr_t start,
1145						 b_blocknr_t finish, int min,
1146						 int amount_needed,
1147						 int prealloc_size)
1148{
1149	int rest = amount_needed;
1150	int nr_allocated;
1151
1152	while (rest > 0 && start <= finish) {
1153		nr_allocated = scan_bitmap(hint->th, &start, finish, min,
1154					   rest + prealloc_size,
1155					   !hint->formatted_node, hint->block);
1156
1157		if (nr_allocated == 0)	/* no new blocks allocated, return */
1158			break;
1159
1160		/* fill free_blocknrs array first */
1161		while (rest > 0 && nr_allocated > 0) {
1162			*new_blocknrs++ = start++;
1163			rest--;
1164			nr_allocated--;
1165		}
1166
1167		/* do we have something to fill prealloc. array also ? */
1168		if (nr_allocated > 0) {
1169			/*
1170			 * it means prealloc_size was greater that 0 and
1171			 * we do preallocation
1172			 */
1173			list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
1174				 &SB_JOURNAL(hint->th->t_super)->
1175				 j_prealloc_list);
1176			REISERFS_I(hint->inode)->i_prealloc_block = start;
1177			REISERFS_I(hint->inode)->i_prealloc_count =
1178			    nr_allocated;
1179			break;
1180		}
1181	}
1182
1183	return (amount_needed - rest);
1184}
1185
1186static inline int blocknrs_and_prealloc_arrays_from_search_start
1187    (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
1188     int amount_needed) {
1189	struct super_block *s = hint->th->t_super;
1190	b_blocknr_t start = hint->search_start;
1191	b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
1192	int passno = 0;
1193	int nr_allocated = 0;
1194	int depth;
1195
1196	determine_prealloc_size(hint);
1197	if (!hint->formatted_node) {
1198		int quota_ret;
1199#ifdef REISERQUOTA_DEBUG
1200		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1201			       "reiserquota: allocating %d blocks id=%u",
1202			       amount_needed, hint->inode->i_uid);
1203#endif
1204		depth = reiserfs_write_unlock_nested(s);
1205		quota_ret =
1206		    dquot_alloc_block_nodirty(hint->inode, amount_needed);
1207		if (quota_ret) {	/* Quota exceeded? */
1208			reiserfs_write_lock_nested(s, depth);
1209			return QUOTA_EXCEEDED;
1210		}
1211		if (hint->preallocate && hint->prealloc_size) {
1212#ifdef REISERQUOTA_DEBUG
1213			reiserfs_debug(s, REISERFS_DEBUG_CODE,
1214				       "reiserquota: allocating (prealloc) %d blocks id=%u",
1215				       hint->prealloc_size, hint->inode->i_uid);
1216#endif
1217			quota_ret = dquot_prealloc_block_nodirty(hint->inode,
1218							 hint->prealloc_size);
1219			if (quota_ret)
1220				hint->preallocate = hint->prealloc_size = 0;
1221		}
1222		/* for unformatted nodes, force large allocations */
1223		reiserfs_write_lock_nested(s, depth);
1224	}
1225
1226	do {
1227		switch (passno++) {
1228		case 0:	/* Search from hint->search_start to end of disk */
1229			start = hint->search_start;
1230			finish = SB_BLOCK_COUNT(s) - 1;
1231			break;
1232		case 1:	/* Search from hint->beg to hint->search_start */
1233			start = hint->beg;
1234			finish = hint->search_start;
1235			break;
1236		case 2:	/* Last chance: Search from 0 to hint->beg */
1237			start = 0;
1238			finish = hint->beg;
1239			break;
1240		default:
1241			/* We've tried searching everywhere, not enough space */
1242			/* Free the blocks */
1243			if (!hint->formatted_node) {
1244#ifdef REISERQUOTA_DEBUG
1245				reiserfs_debug(s, REISERFS_DEBUG_CODE,
1246					       "reiserquota: freeing (nospace) %d blocks id=%u",
1247					       amount_needed +
1248					       hint->prealloc_size -
1249					       nr_allocated,
1250					       hint->inode->i_uid);
1251#endif
1252				/* Free not allocated blocks */
1253				depth = reiserfs_write_unlock_nested(s);
1254				dquot_free_block_nodirty(hint->inode,
1255					amount_needed + hint->prealloc_size -
1256					nr_allocated);
1257				reiserfs_write_lock_nested(s, depth);
1258			}
1259			while (nr_allocated--)
1260				reiserfs_free_block(hint->th, hint->inode,
1261						    new_blocknrs[nr_allocated],
1262						    !hint->formatted_node);
1263
1264			return NO_DISK_SPACE;
1265		}
1266	} while ((nr_allocated += allocate_without_wrapping_disk(hint,
1267								 new_blocknrs +
1268								 nr_allocated,
1269								 start, finish,
1270								 1,
1271								 amount_needed -
1272								 nr_allocated,
1273								 hint->
1274								 prealloc_size))
1275		 < amount_needed);
1276	if (!hint->formatted_node &&
1277	    amount_needed + hint->prealloc_size >
1278	    nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
1279		/* Some of preallocation blocks were not allocated */
1280#ifdef REISERQUOTA_DEBUG
1281		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1282			       "reiserquota: freeing (failed prealloc) %d blocks id=%u",
1283			       amount_needed + hint->prealloc_size -
1284			       nr_allocated -
1285			       REISERFS_I(hint->inode)->i_prealloc_count,
1286			       hint->inode->i_uid);
1287#endif
1288
1289		depth = reiserfs_write_unlock_nested(s);
1290		dquot_free_block_nodirty(hint->inode, amount_needed +
1291					 hint->prealloc_size - nr_allocated -
1292					 REISERFS_I(hint->inode)->
1293					 i_prealloc_count);
1294		reiserfs_write_lock_nested(s, depth);
1295	}
1296
1297	return CARRY_ON;
1298}
1299
1300/* grab new blocknrs from preallocated list */
1301/* return amount still needed after using them */
1302static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1303					      b_blocknr_t * new_blocknrs,
1304					      int amount_needed)
1305{
1306	struct inode *inode = hint->inode;
1307
1308	if (REISERFS_I(inode)->i_prealloc_count > 0) {
1309		while (amount_needed) {
1310
1311			*new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
1312			REISERFS_I(inode)->i_prealloc_count--;
1313
1314			amount_needed--;
1315
1316			if (REISERFS_I(inode)->i_prealloc_count <= 0) {
1317				list_del(&REISERFS_I(inode)->i_prealloc_list);
1318				break;
1319			}
1320		}
1321	}
1322	/* return amount still needed after using preallocated blocks */
1323	return amount_needed;
1324}
1325
1326int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
1327			       b_blocknr_t *new_blocknrs,
1328			       int amount_needed,
1329			       /* Amount of blocks we have already reserved */
1330			       int reserved_by_us)
1331{
1332	int initial_amount_needed = amount_needed;
1333	int ret;
1334	struct super_block *s = hint->th->t_super;
1335
1336	/* Check if there is enough space, taking into account reserved space */
1337	if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
1338	    amount_needed - reserved_by_us)
1339		return NO_DISK_SPACE;
1340	/* should this be if !hint->inode &&  hint->preallocate? */
1341	/* do you mean hint->formatted_node can be removed ? - Zam */
1342	/*
1343	 * hint->formatted_node cannot be removed because we try to access
1344	 * inode information here, and there is often no inode associated with
1345	 * metadata allocations - green
1346	 */
1347
1348	if (!hint->formatted_node && hint->preallocate) {
1349		amount_needed = use_preallocated_list_if_available
1350		    (hint, new_blocknrs, amount_needed);
1351
1352		/*
1353		 * We have all the block numbers we need from the
1354		 * prealloc list
1355		 */
1356		if (amount_needed == 0)
1357			return CARRY_ON;
1358		new_blocknrs += (initial_amount_needed - amount_needed);
1359	}
1360
1361	/* find search start and save it in hint structure */
1362	determine_search_start(hint, amount_needed);
1363	if (hint->search_start >= SB_BLOCK_COUNT(s))
1364		hint->search_start = SB_BLOCK_COUNT(s) - 1;
1365
1366	/* allocation itself; fill new_blocknrs and preallocation arrays */
1367	ret = blocknrs_and_prealloc_arrays_from_search_start
1368	    (hint, new_blocknrs, amount_needed);
1369
1370	/*
1371	 * We used prealloc. list to fill (partially) new_blocknrs array.
1372	 * If final allocation fails we need to return blocks back to
1373	 * prealloc. list or just free them. -- Zam (I chose second
1374	 * variant)
1375	 */
1376	if (ret != CARRY_ON) {
1377		while (amount_needed++ < initial_amount_needed) {
1378			reiserfs_free_block(hint->th, hint->inode,
1379					    *(--new_blocknrs), 1);
1380		}
1381	}
1382	return ret;
1383}
1384
1385void reiserfs_cache_bitmap_metadata(struct super_block *sb,
1386                                    struct buffer_head *bh,
1387                                    struct reiserfs_bitmap_info *info)
1388{
1389	unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
1390
1391	/* The first bit must ALWAYS be 1 */
1392	if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data))
1393		reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is "
1394			       "corrupted: first bit must be 1", bh->b_blocknr);
1395
1396	info->free_count = 0;
1397
1398	while (--cur >= (unsigned long *)bh->b_data) {
1399		/* 0 and ~0 are special, we can optimize for them */
1400		if (*cur == 0)
1401			info->free_count += BITS_PER_LONG;
1402		else if (*cur != ~0L)	/* A mix, investigate */
1403			info->free_count += BITS_PER_LONG - hweight_long(*cur);
1404	}
1405}
1406
1407struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1408                                               unsigned int bitmap)
1409{
1410	b_blocknr_t block = (sb->s_blocksize << 3) * bitmap;
1411	struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap;
1412	struct buffer_head *bh;
1413
1414	/*
1415	 * Way old format filesystems had the bitmaps packed up front.
1416	 * I doubt there are any of these left, but just in case...
1417	 */
1418	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
1419			      &REISERFS_SB(sb)->s_properties)))
1420		block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap;
1421	else if (bitmap == 0)
1422		block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
1423
1424	bh = sb_bread(sb, block);
1425	if (bh == NULL)
1426		reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1427		                 "reading failed", __func__, block);
1428	else {
1429		if (buffer_locked(bh)) {
1430			int depth;
1431			PROC_INFO_INC(sb, scan_bitmap.wait);
1432			depth = reiserfs_write_unlock_nested(sb);
1433			__wait_on_buffer(bh);
1434			reiserfs_write_lock_nested(sb, depth);
1435		}
1436		BUG_ON(!buffer_uptodate(bh));
1437		BUG_ON(atomic_read(&bh->b_count) == 0);
1438
1439		if (info->free_count == UINT_MAX)
1440			reiserfs_cache_bitmap_metadata(sb, bh, info);
1441	}
1442
1443	return bh;
1444}
1445
1446int reiserfs_init_bitmap_cache(struct super_block *sb)
1447{
1448	struct reiserfs_bitmap_info *bitmap;
1449	unsigned int bmap_nr = reiserfs_bmap_count(sb);
1450
1451	bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1452	if (bitmap == NULL)
1453		return -ENOMEM;
1454
1455	memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
1456
1457	SB_AP_BITMAP(sb) = bitmap;
1458
1459	return 0;
1460}
1461
1462void reiserfs_free_bitmap_cache(struct super_block *sb)
1463{
1464	if (SB_AP_BITMAP(sb)) {
1465		vfree(SB_AP_BITMAP(sb));
1466		SB_AP_BITMAP(sb) = NULL;
1467	}
1468}