Linux Audio

Check our new training course

Yocto / OpenEmbedded training

Mar 24-27, 2025, special US time zones
Register
Loading...
v5.4
   1/*
   2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
   3 */
   4/* Reiserfs block (de)allocator, bitmap-based. */
   5
   6#include <linux/time.h>
   7#include "reiserfs.h"
   8#include <linux/errno.h>
   9#include <linux/buffer_head.h>
  10#include <linux/kernel.h>
  11#include <linux/pagemap.h>
  12#include <linux/vmalloc.h>
  13#include <linux/quotaops.h>
  14#include <linux/seq_file.h>
  15
  16#define PREALLOCATION_SIZE 9
  17
  18/* different reiserfs block allocator options */
  19
  20#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
  21
  22#define  _ALLOC_concentrating_formatted_nodes 0
  23#define  _ALLOC_displacing_large_files 1
  24#define  _ALLOC_displacing_new_packing_localities 2
  25#define  _ALLOC_old_hashed_relocation 3
  26#define  _ALLOC_new_hashed_relocation 4
  27#define  _ALLOC_skip_busy 5
  28#define  _ALLOC_displace_based_on_dirid 6
  29#define  _ALLOC_hashed_formatted_nodes 7
  30#define  _ALLOC_old_way 8
  31#define  _ALLOC_hundredth_slices 9
  32#define  _ALLOC_dirid_groups 10
  33#define  _ALLOC_oid_groups 11
  34#define  _ALLOC_packing_groups 12
  35
  36#define  concentrating_formatted_nodes(s)	test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
  37#define  displacing_large_files(s)		test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
  38#define  displacing_new_packing_localities(s)	test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
  39
  40#define SET_OPTION(optname) \
  41   do { \
  42	reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \
  43	set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
  44    } while(0)
  45#define TEST_OPTION(optname, s) \
  46    test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
  47
  48static inline void get_bit_address(struct super_block *s,
  49				   b_blocknr_t block,
  50				   unsigned int *bmap_nr,
  51				   unsigned int *offset)
  52{
  53	/*
  54	 * It is in the bitmap block number equal to the block
  55	 * number divided by the number of bits in a block.
  56	 */
  57	*bmap_nr = block >> (s->s_blocksize_bits + 3);
  58	/* Within that bitmap block it is located at bit offset *offset. */
  59	*offset = block & ((s->s_blocksize << 3) - 1);
  60}
  61
  62int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
  63{
  64	unsigned int bmap, offset;
  65	unsigned int bmap_count = reiserfs_bmap_count(s);
  66
  67	if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
  68		reiserfs_error(s, "vs-4010",
  69			       "block number is out of range %lu (%u)",
  70			       block, SB_BLOCK_COUNT(s));
  71		return 0;
  72	}
  73
  74	get_bit_address(s, block, &bmap, &offset);
  75
  76	/*
  77	 * Old format filesystem? Unlikely, but the bitmaps are all
  78	 * up front so we need to account for it.
  79	 */
  80	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
  81			      &REISERFS_SB(s)->s_properties))) {
  82		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
  83		if (block >= bmap1 &&
  84		    block <= bmap1 + bmap_count) {
  85			reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) "
  86				       "can't be freed or reused",
  87				       block, bmap_count);
  88			return 0;
  89		}
  90	} else {
  91		if (offset == 0) {
  92			reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) "
  93				       "can't be freed or reused",
  94				       block, bmap_count);
  95			return 0;
  96		}
  97	}
  98
  99	if (bmap >= bmap_count) {
 100		reiserfs_error(s, "vs-4030", "bitmap for requested block "
 101			       "is out of range: block=%lu, bitmap_nr=%u",
 102			       block, bmap);
 103		return 0;
 104	}
 105
 106	if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
 107		reiserfs_error(s, "vs-4050", "this is root block (%u), "
 108			       "it must be busy", SB_ROOT_BLOCK(s));
 109		return 0;
 110	}
 111
 112	return 1;
 113}
 114
 115/*
 116 * Searches in journal structures for a given block number (bmap, off).
 117 * If block is found in reiserfs journal it suggests next free block
 118 * candidate to test.
 119 */
 120static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
 121				      int off, int *next)
 122{
 123	b_blocknr_t tmp;
 124
 125	if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
 126		if (tmp) {	/* hint supplied */
 127			*next = tmp;
 128			PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
 129		} else {
 130			(*next) = off + 1;  /* inc offset to avoid looping. */
 131			PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
 132		}
 133		PROC_INFO_INC(s, scan_bitmap.retry);
 134		return 1;
 135	}
 136	return 0;
 137}
 138
 139/*
 140 * Searches for a window of zero bits with given minimum and maximum
 141 * lengths in one bitmap block
 142 */
 143static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
 144			     unsigned int bmap_n, int *beg, int boundary,
 145			     int min, int max, int unfm)
 146{
 147	struct super_block *s = th->t_super;
 148	struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
 149	struct buffer_head *bh;
 150	int end, next;
 151	int org = *beg;
 152
 153	BUG_ON(!th->t_trans_id);
 
 154	RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
 155	       "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
 156	PROC_INFO_INC(s, scan_bitmap.bmap);
 
 
 
 
 157
 158	if (!bi) {
 159		reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
 160			       "for bitmap %d", bmap_n);
 161		return 0;
 162	}
 163
 164	bh = reiserfs_read_bitmap_block(s, bmap_n);
 165	if (bh == NULL)
 166		return 0;
 167
 168	while (1) {
 169cont:
 170		if (bi->free_count < min) {
 171			brelse(bh);
 172			return 0;	/* No free blocks in this bitmap */
 173		}
 174
 175		/* search for a first zero bit -- beginning of a window */
 176		*beg = reiserfs_find_next_zero_le_bit
 177		    ((unsigned long *)(bh->b_data), boundary, *beg);
 178
 179		/*
 180		 * search for a zero bit fails or the rest of bitmap block
 181		 * cannot contain a zero window of minimum size
 182		 */
 183		if (*beg + min > boundary) {
 184			brelse(bh);
 185			return 0;
 186		}
 187
 188		if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
 189			continue;
 190		/* first zero bit found; we check next bits */
 191		for (end = *beg + 1;; end++) {
 192			if (end >= *beg + max || end >= boundary
 193			    || reiserfs_test_le_bit(end, bh->b_data)) {
 194				next = end;
 195				break;
 196			}
 197
 198			/*
 199			 * finding the other end of zero bit window requires
 200			 * looking into journal structures (in case of
 201			 * searching for free blocks for unformatted nodes)
 202			 */
 203			if (unfm && is_block_in_journal(s, bmap_n, end, &next))
 204				break;
 205		}
 206
 207		/*
 208		 * now (*beg) points to beginning of zero bits window,
 209		 * (end) points to one bit after the window end
 210		 */
 211
 212		/* found window of proper size */
 213		if (end - *beg >= min) {
 214			int i;
 215			reiserfs_prepare_for_journal(s, bh, 1);
 216			/*
 217			 * try to set all blocks used checking are
 218			 * they still free
 219			 */
 220			for (i = *beg; i < end; i++) {
 221				/* Don't check in journal again. */
 222				if (reiserfs_test_and_set_le_bit
 223				    (i, bh->b_data)) {
 224					/*
 225					 * bit was set by another process while
 226					 * we slept in prepare_for_journal()
 227					 */
 228					PROC_INFO_INC(s, scan_bitmap.stolen);
 229
 230					/*
 231					 * we can continue with smaller set
 232					 * of allocated blocks, if length of
 233					 * this set is more or equal to `min'
 234					 */
 235					if (i >= *beg + min) {
 236						end = i;
 237						break;
 238					}
 239
 240					/*
 241					 * otherwise we clear all bit
 242					 * were set ...
 243					 */
 244					while (--i >= *beg)
 245						reiserfs_clear_le_bit
 246						    (i, bh->b_data);
 247					reiserfs_restore_prepared_buffer(s, bh);
 248					*beg = org;
 249
 250					/*
 251					 * Search again in current block
 252					 * from beginning
 253					 */
 254					goto cont;
 255				}
 256			}
 257			bi->free_count -= (end - *beg);
 258			journal_mark_dirty(th, bh);
 259			brelse(bh);
 260
 261			/* free block count calculation */
 262			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
 263						     1);
 264			PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
 265			journal_mark_dirty(th, SB_BUFFER_WITH_SB(s));
 266
 267			return end - (*beg);
 268		} else {
 269			*beg = next;
 270		}
 271	}
 272}
 273
 274static int bmap_hash_id(struct super_block *s, u32 id)
 275{
 276	char *hash_in = NULL;
 277	unsigned long hash;
 278	unsigned bm;
 279
 280	if (id <= 2) {
 281		bm = 1;
 282	} else {
 283		hash_in = (char *)(&id);
 284		hash = keyed_hash(hash_in, 4);
 285		bm = hash % reiserfs_bmap_count(s);
 286		if (!bm)
 287			bm = 1;
 288	}
 289	/* this can only be true when SB_BMAP_NR = 1 */
 290	if (bm >= reiserfs_bmap_count(s))
 291		bm = 0;
 292	return bm;
 293}
 294
 295/*
 296 * hashes the id and then returns > 0 if the block group for the
 297 * corresponding hash is full
 298 */
 299static inline int block_group_used(struct super_block *s, u32 id)
 300{
 301	int bm = bmap_hash_id(s, id);
 302	struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];
 303
 304	/*
 305	 * If we don't have cached information on this bitmap block, we're
 306	 * going to have to load it later anyway. Loading it here allows us
 307	 * to make a better decision. This favors long-term performance gain
 308	 * with a better on-disk layout vs. a short term gain of skipping the
 309	 * read and potentially having a bad placement.
 310	 */
 311	if (info->free_count == UINT_MAX) {
 312		struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
 313		brelse(bh);
 314	}
 315
 316	if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) {
 317		return 0;
 318	}
 319	return 1;
 320}
 321
 322/*
 323 * the packing is returned in disk byte order
 324 */
 325__le32 reiserfs_choose_packing(struct inode * dir)
 326{
 327	__le32 packing;
 328	if (TEST_OPTION(packing_groups, dir->i_sb)) {
 329		u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
 330		/*
 331		 * some versions of reiserfsck expect packing locality 1 to be
 332		 * special
 333		 */
 334		if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
 335			packing = INODE_PKEY(dir)->k_objectid;
 336		else
 337			packing = INODE_PKEY(dir)->k_dir_id;
 338	} else
 339		packing = INODE_PKEY(dir)->k_objectid;
 340	return packing;
 341}
 342
 343/*
 344 * Tries to find contiguous zero bit window (given size) in given region of
 345 * bitmap and place new blocks there. Returns number of allocated blocks.
 346 */
 347static int scan_bitmap(struct reiserfs_transaction_handle *th,
 348		       b_blocknr_t * start, b_blocknr_t finish,
 349		       int min, int max, int unfm, sector_t file_block)
 350{
 351	int nr_allocated = 0;
 352	struct super_block *s = th->t_super;
 
 
 
 353	unsigned int bm, off;
 354	unsigned int end_bm, end_off;
 355	unsigned int off_max = s->s_blocksize << 3;
 356
 357	BUG_ON(!th->t_trans_id);
 358	PROC_INFO_INC(s, scan_bitmap.call);
 359
 360	/* No point in looking for more free blocks */
 361	if (SB_FREE_BLOCKS(s) <= 0)
 362		return 0;
 363
 364	get_bit_address(s, *start, &bm, &off);
 365	get_bit_address(s, finish, &end_bm, &end_off);
 366	if (bm > reiserfs_bmap_count(s))
 367		return 0;
 368	if (end_bm > reiserfs_bmap_count(s))
 369		end_bm = reiserfs_bmap_count(s);
 370
 371	/*
 372	 * When the bitmap is more than 10% free, anyone can allocate.
 373	 * When it's less than 10% free, only files that already use the
 374	 * bitmap are allowed. Once we pass 80% full, this restriction
 375	 * is lifted.
 376	 *
 377	 * We do this so that files that grow later still have space close to
 378	 * their original allocation. This improves locality, and presumably
 379	 * performance as a result.
 380	 *
 381	 * This is only an allocation policy and does not make up for getting a
 382	 * bad hint. Decent hinting must be implemented for this to work well.
 383	 */
 384	if (TEST_OPTION(skip_busy, s)
 385	    && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
 386		for (; bm < end_bm; bm++, off = 0) {
 387			if ((off && (!unfm || (file_block != 0)))
 388			    || SB_AP_BITMAP(s)[bm].free_count >
 389			    (s->s_blocksize << 3) / 10)
 390				nr_allocated =
 391				    scan_bitmap_block(th, bm, &off, off_max,
 392						      min, max, unfm);
 393			if (nr_allocated)
 394				goto ret;
 395		}
 396		/* we know from above that start is a reasonable number */
 397		get_bit_address(s, *start, &bm, &off);
 398	}
 399
 400	for (; bm < end_bm; bm++, off = 0) {
 401		nr_allocated =
 402		    scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
 403		if (nr_allocated)
 404			goto ret;
 405	}
 406
 407	nr_allocated =
 408	    scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
 409
 410ret:
 411	*start = bm * off_max + off;
 412	return nr_allocated;
 413
 414}
 415
 416static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
 417				 struct inode *inode, b_blocknr_t block,
 418				 int for_unformatted)
 419{
 420	struct super_block *s = th->t_super;
 421	struct reiserfs_super_block *rs;
 422	struct buffer_head *sbh, *bmbh;
 423	struct reiserfs_bitmap_info *apbi;
 424	unsigned int nr, offset;
 425
 426	BUG_ON(!th->t_trans_id);
 
 427	PROC_INFO_INC(s, free_block);
 
 428	rs = SB_DISK_SUPER_BLOCK(s);
 429	sbh = SB_BUFFER_WITH_SB(s);
 430	apbi = SB_AP_BITMAP(s);
 431
 432	get_bit_address(s, block, &nr, &offset);
 433
 434	if (nr >= reiserfs_bmap_count(s)) {
 435		reiserfs_error(s, "vs-4075", "block %lu is out of range",
 436			       block);
 437		return;
 438	}
 439
 440	bmbh = reiserfs_read_bitmap_block(s, nr);
 441	if (!bmbh)
 442		return;
 443
 444	reiserfs_prepare_for_journal(s, bmbh, 1);
 445
 446	/* clear bit for the given block in bit map */
 447	if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
 448		reiserfs_error(s, "vs-4080",
 449			       "block %lu: bit already cleared", block);
 450	}
 451	apbi[nr].free_count++;
 452	journal_mark_dirty(th, bmbh);
 453	brelse(bmbh);
 454
 455	reiserfs_prepare_for_journal(s, sbh, 1);
 456	/* update super block */
 457	set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
 458
 459	journal_mark_dirty(th, sbh);
 460	if (for_unformatted) {
 461		int depth = reiserfs_write_unlock_nested(s);
 462		dquot_free_block_nodirty(inode, 1);
 463		reiserfs_write_lock_nested(s, depth);
 464	}
 465}
 466
 467void reiserfs_free_block(struct reiserfs_transaction_handle *th,
 468			 struct inode *inode, b_blocknr_t block,
 469			 int for_unformatted)
 470{
 471	struct super_block *s = th->t_super;
 472
 473	BUG_ON(!th->t_trans_id);
 
 474	RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
 475	if (!is_reusable(s, block, 1))
 476		return;
 477
 478	if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
 479		reiserfs_error(th->t_super, "bitmap-4072",
 480			       "Trying to free block outside file system "
 481			       "boundaries (%lu > %lu)",
 482			       block, sb_block_count(REISERFS_SB(s)->s_rs));
 483		return;
 484	}
 485	/* mark it before we clear it, just in case */
 486	journal_mark_freed(th, s, block);
 487	_reiserfs_free_block(th, inode, block, for_unformatted);
 488}
 489
 490/* preallocated blocks don't need to be run through journal_mark_freed */
 491static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
 492					 struct inode *inode, b_blocknr_t block)
 493{
 494	BUG_ON(!th->t_trans_id);
 495	RFALSE(!th->t_super,
 496	       "vs-4060: trying to free block on nonexistent device");
 497	if (!is_reusable(th->t_super, block, 1))
 498		return;
 499	_reiserfs_free_block(th, inode, block, 1);
 500}
 501
 502static void __discard_prealloc(struct reiserfs_transaction_handle *th,
 503			       struct reiserfs_inode_info *ei)
 504{
 505	unsigned long save = ei->i_prealloc_block;
 506	int dirty = 0;
 507	struct inode *inode = &ei->vfs_inode;
 508
 509	BUG_ON(!th->t_trans_id);
 510#ifdef CONFIG_REISERFS_CHECK
 511	if (ei->i_prealloc_count < 0)
 512		reiserfs_error(th->t_super, "zam-4001",
 513			       "inode has negative prealloc blocks count.");
 514#endif
 515	while (ei->i_prealloc_count > 0) {
 516		b_blocknr_t block_to_free;
 517
 518		/*
 519		 * reiserfs_free_prealloc_block can drop the write lock,
 520		 * which could allow another caller to free the same block.
 521		 * We can protect against it by modifying the prealloc
 522		 * state before calling it.
 523		 */
 524		block_to_free = ei->i_prealloc_block++;
 525		ei->i_prealloc_count--;
 526		reiserfs_free_prealloc_block(th, inode, block_to_free);
 527		dirty = 1;
 528	}
 529	if (dirty)
 530		reiserfs_update_sd(th, inode);
 531	ei->i_prealloc_block = save;
 532	list_del_init(&ei->i_prealloc_list);
 533}
 534
 535/* FIXME: It should be inline function */
 536void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
 537			       struct inode *inode)
 538{
 539	struct reiserfs_inode_info *ei = REISERFS_I(inode);
 540
 541	BUG_ON(!th->t_trans_id);
 542	if (ei->i_prealloc_count)
 543		__discard_prealloc(th, ei);
 544}
 545
 546void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
 547{
 548	struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
 549
 550	BUG_ON(!th->t_trans_id);
 
 551	while (!list_empty(plist)) {
 552		struct reiserfs_inode_info *ei;
 553		ei = list_entry(plist->next, struct reiserfs_inode_info,
 554				i_prealloc_list);
 555#ifdef CONFIG_REISERFS_CHECK
 556		if (!ei->i_prealloc_count) {
 557			reiserfs_error(th->t_super, "zam-4001",
 558				       "inode is in prealloc list but has "
 559				       "no preallocated blocks.");
 560		}
 561#endif
 562		__discard_prealloc(th, ei);
 563	}
 564}
 565
 566void reiserfs_init_alloc_options(struct super_block *s)
 567{
 568	set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
 569	set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
 570	set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
 571}
 572
 573/* block allocator related options are parsed here */
 574int reiserfs_parse_alloc_options(struct super_block *s, char *options)
 575{
 576	char *this_char, *value;
 577
 578	/* clear default settings */
 579	REISERFS_SB(s)->s_alloc_options.bits = 0;
 580
 581	while ((this_char = strsep(&options, ":")) != NULL) {
 582		if ((value = strchr(this_char, '=')) != NULL)
 583			*value++ = 0;
 584
 585		if (!strcmp(this_char, "concentrating_formatted_nodes")) {
 586			int temp;
 587			SET_OPTION(concentrating_formatted_nodes);
 588			temp = (value
 589				&& *value) ? simple_strtoul(value, &value,
 590							    0) : 10;
 591			if (temp <= 0 || temp > 100) {
 592				REISERFS_SB(s)->s_alloc_options.border = 10;
 593			} else {
 594				REISERFS_SB(s)->s_alloc_options.border =
 595				    100 / temp;
 596			}
 597			continue;
 598		}
 599		if (!strcmp(this_char, "displacing_large_files")) {
 600			SET_OPTION(displacing_large_files);
 601			REISERFS_SB(s)->s_alloc_options.large_file_size =
 602			    (value
 603			     && *value) ? simple_strtoul(value, &value, 0) : 16;
 604			continue;
 605		}
 606		if (!strcmp(this_char, "displacing_new_packing_localities")) {
 607			SET_OPTION(displacing_new_packing_localities);
 608			continue;
 609		}
 610
 611		if (!strcmp(this_char, "old_hashed_relocation")) {
 612			SET_OPTION(old_hashed_relocation);
 613			continue;
 614		}
 615
 616		if (!strcmp(this_char, "new_hashed_relocation")) {
 617			SET_OPTION(new_hashed_relocation);
 618			continue;
 619		}
 620
 621		if (!strcmp(this_char, "dirid_groups")) {
 622			SET_OPTION(dirid_groups);
 623			continue;
 624		}
 625		if (!strcmp(this_char, "oid_groups")) {
 626			SET_OPTION(oid_groups);
 627			continue;
 628		}
 629		if (!strcmp(this_char, "packing_groups")) {
 630			SET_OPTION(packing_groups);
 631			continue;
 632		}
 633		if (!strcmp(this_char, "hashed_formatted_nodes")) {
 634			SET_OPTION(hashed_formatted_nodes);
 635			continue;
 636		}
 637
 638		if (!strcmp(this_char, "skip_busy")) {
 639			SET_OPTION(skip_busy);
 640			continue;
 641		}
 642
 643		if (!strcmp(this_char, "hundredth_slices")) {
 644			SET_OPTION(hundredth_slices);
 645			continue;
 646		}
 647
 648		if (!strcmp(this_char, "old_way")) {
 649			SET_OPTION(old_way);
 650			continue;
 651		}
 652
 653		if (!strcmp(this_char, "displace_based_on_dirid")) {
 654			SET_OPTION(displace_based_on_dirid);
 655			continue;
 656		}
 657
 658		if (!strcmp(this_char, "preallocmin")) {
 659			REISERFS_SB(s)->s_alloc_options.preallocmin =
 660			    (value
 661			     && *value) ? simple_strtoul(value, &value, 0) : 4;
 662			continue;
 663		}
 664
 665		if (!strcmp(this_char, "preallocsize")) {
 666			REISERFS_SB(s)->s_alloc_options.preallocsize =
 667			    (value
 668			     && *value) ? simple_strtoul(value, &value,
 669							 0) :
 670			    PREALLOCATION_SIZE;
 671			continue;
 672		}
 673
 674		reiserfs_warning(s, "zam-4001", "unknown option - %s",
 675				 this_char);
 676		return 1;
 677	}
 678
 679	reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
 680	return 0;
 681}
 682
 683static void print_sep(struct seq_file *seq, int *first)
 684{
 685	if (!*first)
 686		seq_puts(seq, ":");
 687	else
 688		*first = 0;
 689}
 690
 691void show_alloc_options(struct seq_file *seq, struct super_block *s)
 692{
 693	int first = 1;
 694
 695	if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
 696		(1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
 697		return;
 698
 699	seq_puts(seq, ",alloc=");
 700
 701	if (TEST_OPTION(concentrating_formatted_nodes, s)) {
 702		print_sep(seq, &first);
 703		if (REISERFS_SB(s)->s_alloc_options.border != 10) {
 704			seq_printf(seq, "concentrating_formatted_nodes=%d",
 705				100 / REISERFS_SB(s)->s_alloc_options.border);
 706		} else
 707			seq_puts(seq, "concentrating_formatted_nodes");
 708	}
 709	if (TEST_OPTION(displacing_large_files, s)) {
 710		print_sep(seq, &first);
 711		if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
 712			seq_printf(seq, "displacing_large_files=%lu",
 713			    REISERFS_SB(s)->s_alloc_options.large_file_size);
 714		} else
 715			seq_puts(seq, "displacing_large_files");
 716	}
 717	if (TEST_OPTION(displacing_new_packing_localities, s)) {
 718		print_sep(seq, &first);
 719		seq_puts(seq, "displacing_new_packing_localities");
 720	}
 721	if (TEST_OPTION(old_hashed_relocation, s)) {
 722		print_sep(seq, &first);
 723		seq_puts(seq, "old_hashed_relocation");
 724	}
 725	if (TEST_OPTION(new_hashed_relocation, s)) {
 726		print_sep(seq, &first);
 727		seq_puts(seq, "new_hashed_relocation");
 728	}
 729	if (TEST_OPTION(dirid_groups, s)) {
 730		print_sep(seq, &first);
 731		seq_puts(seq, "dirid_groups");
 732	}
 733	if (TEST_OPTION(oid_groups, s)) {
 734		print_sep(seq, &first);
 735		seq_puts(seq, "oid_groups");
 736	}
 737	if (TEST_OPTION(packing_groups, s)) {
 738		print_sep(seq, &first);
 739		seq_puts(seq, "packing_groups");
 740	}
 741	if (TEST_OPTION(hashed_formatted_nodes, s)) {
 742		print_sep(seq, &first);
 743		seq_puts(seq, "hashed_formatted_nodes");
 744	}
 745	if (TEST_OPTION(skip_busy, s)) {
 746		print_sep(seq, &first);
 747		seq_puts(seq, "skip_busy");
 748	}
 749	if (TEST_OPTION(hundredth_slices, s)) {
 750		print_sep(seq, &first);
 751		seq_puts(seq, "hundredth_slices");
 752	}
 753	if (TEST_OPTION(old_way, s)) {
 754		print_sep(seq, &first);
 755		seq_puts(seq, "old_way");
 756	}
 757	if (TEST_OPTION(displace_based_on_dirid, s)) {
 758		print_sep(seq, &first);
 759		seq_puts(seq, "displace_based_on_dirid");
 760	}
 761	if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
 762		print_sep(seq, &first);
 763		seq_printf(seq, "preallocmin=%d",
 764				REISERFS_SB(s)->s_alloc_options.preallocmin);
 765	}
 766	if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
 767		print_sep(seq, &first);
 768		seq_printf(seq, "preallocsize=%d",
 769				REISERFS_SB(s)->s_alloc_options.preallocsize);
 770	}
 771}
 772
 773static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 774{
 775	char *hash_in;
 776
 777	if (hint->formatted_node) {
 778		hash_in = (char *)&hint->key.k_dir_id;
 779	} else {
 780		if (!hint->inode) {
 781			/*hint->search_start = hint->beg;*/
 782			hash_in = (char *)&hint->key.k_dir_id;
 783		} else
 784		    if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 785			hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
 786		else
 787			hash_in =
 788			    (char *)(&INODE_PKEY(hint->inode)->k_objectid);
 789	}
 790
 791	hint->search_start =
 792	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
 793}
 794
 795/*
 796 * Relocation based on dirid, hashing them into a given bitmap block
 797 * files. Formatted nodes are unaffected, a separate policy covers them
 798 */
 799static void dirid_groups(reiserfs_blocknr_hint_t * hint)
 800{
 801	unsigned long hash;
 802	__u32 dirid = 0;
 803	int bm = 0;
 804	struct super_block *sb = hint->th->t_super;
 805
 806	if (hint->inode)
 807		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
 808	else if (hint->formatted_node)
 809		dirid = hint->key.k_dir_id;
 810
 811	if (dirid) {
 812		bm = bmap_hash_id(sb, dirid);
 813		hash = bm * (sb->s_blocksize << 3);
 814		/* give a portion of the block group to metadata */
 815		if (hint->inode)
 816			hash += sb->s_blocksize / 2;
 817		hint->search_start = hash;
 818	}
 819}
 820
 821/*
 822 * Relocation based on oid, hashing them into a given bitmap block
 823 * files. Formatted nodes are unaffected, a separate policy covers them
 824 */
 825static void oid_groups(reiserfs_blocknr_hint_t * hint)
 826{
 827	if (hint->inode) {
 828		unsigned long hash;
 829		__u32 oid;
 830		__u32 dirid;
 831		int bm;
 832
 833		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
 834
 835		/*
 836		 * keep the root dir and it's first set of subdirs close to
 837		 * the start of the disk
 838		 */
 839		if (dirid <= 2)
 840			hash = (hint->inode->i_sb->s_blocksize << 3);
 841		else {
 842			oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
 843			bm = bmap_hash_id(hint->inode->i_sb, oid);
 844			hash = bm * (hint->inode->i_sb->s_blocksize << 3);
 845		}
 846		hint->search_start = hash;
 847	}
 848}
 849
 850/*
 851 * returns 1 if it finds an indirect item and gets valid hint info
 852 * from it, otherwise 0
 853 */
 854static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
 855{
 856	struct treepath *path;
 857	struct buffer_head *bh;
 858	struct item_head *ih;
 859	int pos_in_item;
 860	__le32 *item;
 861	int ret = 0;
 862
 863	/*
 864	 * reiserfs code can call this function w/o pointer to path
 865	 * structure supplied; then we rely on supplied search_start
 866	 */
 867	if (!hint->path)
 868		return 0;
 869
 870	path = hint->path;
 871	bh = get_last_bh(path);
 872	RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
 873	ih = tp_item_head(path);
 874	pos_in_item = path->pos_in_item;
 875	item = tp_item_body(path);
 876
 877	hint->search_start = bh->b_blocknr;
 878
 879	/*
 880	 * for indirect item: go to left and look for the first non-hole entry
 881	 * in the indirect item
 882	 */
 883	if (!hint->formatted_node && is_indirect_le_ih(ih)) {
 
 
 884		if (pos_in_item == I_UNFM_NUM(ih))
 885			pos_in_item--;
 
 886		while (pos_in_item >= 0) {
 887			int t = get_block_num(item, pos_in_item);
 888			if (t) {
 889				hint->search_start = t;
 890				ret = 1;
 891				break;
 892			}
 893			pos_in_item--;
 894		}
 895	}
 896
 897	/* does result value fit into specified region? */
 898	return ret;
 899}
 900
 901/*
 902 * should be, if formatted node, then try to put on first part of the device
 903 * specified as number of percent with mount option device, else try to put
 904 * on last of device.  This is not to say it is good code to do so,
 905 * but the effect should be measured.
 906 */
 907static inline void set_border_in_hint(struct super_block *s,
 908				      reiserfs_blocknr_hint_t * hint)
 909{
 910	b_blocknr_t border =
 911	    SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
 912
 913	if (hint->formatted_node)
 914		hint->end = border - 1;
 915	else
 916		hint->beg = border;
 917}
 918
 919static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
 920{
 921	if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 922		hint->search_start =
 923		    hint->beg +
 924		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
 925			       4) % (hint->end - hint->beg);
 926	else
 927		hint->search_start =
 928		    hint->beg +
 929		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
 930			       4) % (hint->end - hint->beg);
 931}
 932
 933static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
 934{
 935	char *hash_in;
 936
 937	if (!hint->inode)
 938		hash_in = (char *)&hint->key.k_dir_id;
 939	else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 940		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
 941	else
 942		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
 943
 944	hint->search_start =
 945	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
 946}
 947
 948static inline int
 949this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
 950						   hint)
 951{
 952	return hint->block ==
 953	    REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
 954}
 955
 956#ifdef DISPLACE_NEW_PACKING_LOCALITIES
 957static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
 958{
 959	struct in_core_key *key = &hint->key;
 960
 961	hint->th->displace_new_blocks = 0;
 962	hint->search_start =
 963	    hint->beg + keyed_hash((char *)(&key->k_objectid),
 964				   4) % (hint->end - hint->beg);
 965}
 966#endif
 967
 968static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 969{
 970	b_blocknr_t border;
 971	u32 hash_in;
 972
 973	if (hint->formatted_node || hint->inode == NULL) {
 974		return 0;
 975	}
 976
 977	hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
 978	border =
 979	    hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
 980					 4) % (hint->end - hint->beg - 1);
 981	if (border > hint->search_start)
 982		hint->search_start = border;
 983
 984	return 1;
 985}
 986
 987static inline int old_way(reiserfs_blocknr_hint_t * hint)
 988{
 989	b_blocknr_t border;
 990
 991	if (hint->formatted_node || hint->inode == NULL) {
 992		return 0;
 993	}
 994
 995	border =
 996	    hint->beg +
 997	    le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
 998							      hint->beg);
 999	if (border > hint->search_start)
1000		hint->search_start = border;
1001
1002	return 1;
1003}
1004
1005static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
1006{
1007	struct in_core_key *key = &hint->key;
1008	b_blocknr_t slice_start;
1009
1010	slice_start =
1011	    (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
1012	if (slice_start > hint->search_start
1013	    || slice_start + (hint->end / 100) <= hint->search_start) {
1014		hint->search_start = slice_start;
1015	}
1016}
1017
1018static void determine_search_start(reiserfs_blocknr_hint_t * hint,
1019				   int amount_needed)
1020{
1021	struct super_block *s = hint->th->t_super;
1022	int unfm_hint;
1023
1024	hint->beg = 0;
1025	hint->end = SB_BLOCK_COUNT(s) - 1;
1026
1027	/* This is former border algorithm. Now with tunable border offset */
1028	if (concentrating_formatted_nodes(s))
1029		set_border_in_hint(s, hint);
1030
1031#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1032	/*
1033	 * whenever we create a new directory, we displace it.  At first
1034	 * we will hash for location, later we might look for a moderately
1035	 * empty place for it
1036	 */
1037	if (displacing_new_packing_localities(s)
1038	    && hint->th->displace_new_blocks) {
1039		displace_new_packing_locality(hint);
1040
1041		/*
1042		 * we do not continue determine_search_start,
1043		 * if new packing locality is being displaced
1044		 */
1045		return;
1046	}
1047#endif
1048
1049	/*
1050	 * all persons should feel encouraged to add more special cases
1051	 * here and test them
1052	 */
1053
1054	if (displacing_large_files(s) && !hint->formatted_node
1055	    && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
1056		displace_large_file(hint);
1057		return;
1058	}
1059
1060	/*
1061	 * if none of our special cases is relevant, use the left
1062	 * neighbor in the tree order of the new node we are allocating for
1063	 */
1064	if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
1065		hash_formatted_node(hint);
1066		return;
1067	}
1068
1069	unfm_hint = get_left_neighbor(hint);
1070
1071	/*
1072	 * Mimic old block allocator behaviour, that is if VFS allowed for
1073	 * preallocation, new blocks are displaced based on directory ID.
1074	 * Also, if suggested search_start is less than last preallocated
1075	 * block, we start searching from it, assuming that HDD dataflow
1076	 * is faster in forward direction
1077	 */
1078	if (TEST_OPTION(old_way, s)) {
1079		if (!hint->formatted_node) {
1080			if (!reiserfs_hashed_relocation(s))
1081				old_way(hint);
1082			else if (!reiserfs_no_unhashed_relocation(s))
1083				old_hashed_relocation(hint);
1084
1085			if (hint->inode
1086			    && hint->search_start <
1087			    REISERFS_I(hint->inode)->i_prealloc_block)
1088				hint->search_start =
1089				    REISERFS_I(hint->inode)->i_prealloc_block;
1090		}
1091		return;
1092	}
1093
1094	/* This is an approach proposed by Hans */
1095	if (TEST_OPTION(hundredth_slices, s)
1096	    && !(displacing_large_files(s) && !hint->formatted_node)) {
1097		hundredth_slices(hint);
1098		return;
1099	}
1100
1101	/* old_hashed_relocation only works on unformatted */
1102	if (!unfm_hint && !hint->formatted_node &&
1103	    TEST_OPTION(old_hashed_relocation, s)) {
1104		old_hashed_relocation(hint);
1105	}
1106
1107	/* new_hashed_relocation works with both formatted/unformatted nodes */
1108	if ((!unfm_hint || hint->formatted_node) &&
1109	    TEST_OPTION(new_hashed_relocation, s)) {
1110		new_hashed_relocation(hint);
1111	}
1112
1113	/* dirid grouping works only on unformatted nodes */
1114	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1115		dirid_groups(hint);
1116	}
1117#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1118	if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1119		dirid_groups(hint);
1120	}
1121#endif
1122
1123	/* oid grouping works only on unformatted nodes */
1124	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
1125		oid_groups(hint);
1126	}
1127	return;
1128}
1129
1130static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
1131{
1132	/* make minimum size a mount option and benchmark both ways */
1133	/* we preallocate blocks only for regular files, specific size */
1134	/* benchmark preallocating always and see what happens */
1135
1136	hint->prealloc_size = 0;
1137
1138	if (!hint->formatted_node && hint->preallocate) {
1139		if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode)
1140		    && hint->inode->i_size >=
1141		    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1142		    preallocmin * hint->inode->i_sb->s_blocksize)
1143			hint->prealloc_size =
1144			    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1145			    preallocsize - 1;
1146	}
1147	return CARRY_ON;
1148}
1149
 
 
1150static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
1151						 b_blocknr_t * new_blocknrs,
1152						 b_blocknr_t start,
1153						 b_blocknr_t finish, int min,
1154						 int amount_needed,
1155						 int prealloc_size)
1156{
1157	int rest = amount_needed;
1158	int nr_allocated;
1159
1160	while (rest > 0 && start <= finish) {
1161		nr_allocated = scan_bitmap(hint->th, &start, finish, min,
1162					   rest + prealloc_size,
1163					   !hint->formatted_node, hint->block);
1164
1165		if (nr_allocated == 0)	/* no new blocks allocated, return */
1166			break;
1167
1168		/* fill free_blocknrs array first */
1169		while (rest > 0 && nr_allocated > 0) {
1170			*new_blocknrs++ = start++;
1171			rest--;
1172			nr_allocated--;
1173		}
1174
1175		/* do we have something to fill prealloc. array also ? */
1176		if (nr_allocated > 0) {
1177			/*
1178			 * it means prealloc_size was greater that 0 and
1179			 * we do preallocation
1180			 */
1181			list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
1182				 &SB_JOURNAL(hint->th->t_super)->
1183				 j_prealloc_list);
1184			REISERFS_I(hint->inode)->i_prealloc_block = start;
1185			REISERFS_I(hint->inode)->i_prealloc_count =
1186			    nr_allocated;
1187			break;
1188		}
1189	}
1190
1191	return (amount_needed - rest);
1192}
1193
1194static inline int blocknrs_and_prealloc_arrays_from_search_start
1195    (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
1196     int amount_needed) {
1197	struct super_block *s = hint->th->t_super;
1198	b_blocknr_t start = hint->search_start;
1199	b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
1200	int passno = 0;
1201	int nr_allocated = 0;
1202	int depth;
1203
1204	determine_prealloc_size(hint);
1205	if (!hint->formatted_node) {
1206		int quota_ret;
1207#ifdef REISERQUOTA_DEBUG
1208		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1209			       "reiserquota: allocating %d blocks id=%u",
1210			       amount_needed, hint->inode->i_uid);
1211#endif
1212		depth = reiserfs_write_unlock_nested(s);
1213		quota_ret =
1214		    dquot_alloc_block_nodirty(hint->inode, amount_needed);
1215		if (quota_ret) {	/* Quota exceeded? */
1216			reiserfs_write_lock_nested(s, depth);
1217			return QUOTA_EXCEEDED;
1218		}
1219		if (hint->preallocate && hint->prealloc_size) {
1220#ifdef REISERQUOTA_DEBUG
1221			reiserfs_debug(s, REISERFS_DEBUG_CODE,
1222				       "reiserquota: allocating (prealloc) %d blocks id=%u",
1223				       hint->prealloc_size, hint->inode->i_uid);
1224#endif
1225			quota_ret = dquot_prealloc_block_nodirty(hint->inode,
1226							 hint->prealloc_size);
1227			if (quota_ret)
1228				hint->preallocate = hint->prealloc_size = 0;
1229		}
1230		/* for unformatted nodes, force large allocations */
1231		reiserfs_write_lock_nested(s, depth);
1232	}
1233
1234	do {
1235		switch (passno++) {
1236		case 0:	/* Search from hint->search_start to end of disk */
1237			start = hint->search_start;
1238			finish = SB_BLOCK_COUNT(s) - 1;
1239			break;
1240		case 1:	/* Search from hint->beg to hint->search_start */
1241			start = hint->beg;
1242			finish = hint->search_start;
1243			break;
1244		case 2:	/* Last chance: Search from 0 to hint->beg */
1245			start = 0;
1246			finish = hint->beg;
1247			break;
1248		default:
1249			/* We've tried searching everywhere, not enough space */
1250			/* Free the blocks */
1251			if (!hint->formatted_node) {
1252#ifdef REISERQUOTA_DEBUG
1253				reiserfs_debug(s, REISERFS_DEBUG_CODE,
1254					       "reiserquota: freeing (nospace) %d blocks id=%u",
1255					       amount_needed +
1256					       hint->prealloc_size -
1257					       nr_allocated,
1258					       hint->inode->i_uid);
1259#endif
1260				/* Free not allocated blocks */
1261				depth = reiserfs_write_unlock_nested(s);
1262				dquot_free_block_nodirty(hint->inode,
1263					amount_needed + hint->prealloc_size -
1264					nr_allocated);
1265				reiserfs_write_lock_nested(s, depth);
1266			}
1267			while (nr_allocated--)
1268				reiserfs_free_block(hint->th, hint->inode,
1269						    new_blocknrs[nr_allocated],
1270						    !hint->formatted_node);
1271
1272			return NO_DISK_SPACE;
1273		}
1274	} while ((nr_allocated += allocate_without_wrapping_disk(hint,
1275								 new_blocknrs +
1276								 nr_allocated,
1277								 start, finish,
1278								 1,
1279								 amount_needed -
1280								 nr_allocated,
1281								 hint->
1282								 prealloc_size))
1283		 < amount_needed);
1284	if (!hint->formatted_node &&
1285	    amount_needed + hint->prealloc_size >
1286	    nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
1287		/* Some of preallocation blocks were not allocated */
1288#ifdef REISERQUOTA_DEBUG
1289		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1290			       "reiserquota: freeing (failed prealloc) %d blocks id=%u",
1291			       amount_needed + hint->prealloc_size -
1292			       nr_allocated -
1293			       REISERFS_I(hint->inode)->i_prealloc_count,
1294			       hint->inode->i_uid);
1295#endif
1296
1297		depth = reiserfs_write_unlock_nested(s);
1298		dquot_free_block_nodirty(hint->inode, amount_needed +
1299					 hint->prealloc_size - nr_allocated -
1300					 REISERFS_I(hint->inode)->
1301					 i_prealloc_count);
1302		reiserfs_write_lock_nested(s, depth);
1303	}
1304
1305	return CARRY_ON;
1306}
1307
1308/* grab new blocknrs from preallocated list */
1309/* return amount still needed after using them */
1310static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1311					      b_blocknr_t * new_blocknrs,
1312					      int amount_needed)
1313{
1314	struct inode *inode = hint->inode;
1315
1316	if (REISERFS_I(inode)->i_prealloc_count > 0) {
1317		while (amount_needed) {
1318
1319			*new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
1320			REISERFS_I(inode)->i_prealloc_count--;
1321
1322			amount_needed--;
1323
1324			if (REISERFS_I(inode)->i_prealloc_count <= 0) {
1325				list_del(&REISERFS_I(inode)->i_prealloc_list);
1326				break;
1327			}
1328		}
1329	}
1330	/* return amount still needed after using preallocated blocks */
1331	return amount_needed;
1332}
1333
1334int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
1335			       b_blocknr_t *new_blocknrs,
1336			       int amount_needed,
1337			       /* Amount of blocks we have already reserved */
1338			       int reserved_by_us)
1339{
1340	int initial_amount_needed = amount_needed;
1341	int ret;
1342	struct super_block *s = hint->th->t_super;
1343
1344	/* Check if there is enough space, taking into account reserved space */
1345	if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
1346	    amount_needed - reserved_by_us)
1347		return NO_DISK_SPACE;
1348	/* should this be if !hint->inode &&  hint->preallocate? */
1349	/* do you mean hint->formatted_node can be removed ? - Zam */
1350	/*
1351	 * hint->formatted_node cannot be removed because we try to access
1352	 * inode information here, and there is often no inode associated with
1353	 * metadata allocations - green
1354	 */
1355
1356	if (!hint->formatted_node && hint->preallocate) {
1357		amount_needed = use_preallocated_list_if_available
1358		    (hint, new_blocknrs, amount_needed);
1359
1360		/*
1361		 * We have all the block numbers we need from the
1362		 * prealloc list
1363		 */
1364		if (amount_needed == 0)
1365			return CARRY_ON;
1366		new_blocknrs += (initial_amount_needed - amount_needed);
1367	}
1368
1369	/* find search start and save it in hint structure */
1370	determine_search_start(hint, amount_needed);
1371	if (hint->search_start >= SB_BLOCK_COUNT(s))
1372		hint->search_start = SB_BLOCK_COUNT(s) - 1;
1373
1374	/* allocation itself; fill new_blocknrs and preallocation arrays */
1375	ret = blocknrs_and_prealloc_arrays_from_search_start
1376	    (hint, new_blocknrs, amount_needed);
1377
1378	/*
1379	 * We used prealloc. list to fill (partially) new_blocknrs array.
1380	 * If final allocation fails we need to return blocks back to
1381	 * prealloc. list or just free them. -- Zam (I chose second
1382	 * variant)
1383	 */
1384	if (ret != CARRY_ON) {
1385		while (amount_needed++ < initial_amount_needed) {
1386			reiserfs_free_block(hint->th, hint->inode,
1387					    *(--new_blocknrs), 1);
1388		}
1389	}
1390	return ret;
1391}
1392
1393void reiserfs_cache_bitmap_metadata(struct super_block *sb,
1394                                    struct buffer_head *bh,
1395                                    struct reiserfs_bitmap_info *info)
1396{
1397	unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
1398
1399	/* The first bit must ALWAYS be 1 */
1400	if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data))
1401		reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is "
1402			       "corrupted: first bit must be 1", bh->b_blocknr);
1403
1404	info->free_count = 0;
1405
1406	while (--cur >= (unsigned long *)bh->b_data) {
1407		/* 0 and ~0 are special, we can optimize for them */
1408		if (*cur == 0)
1409			info->free_count += BITS_PER_LONG;
1410		else if (*cur != ~0L)	/* A mix, investigate */
1411			info->free_count += BITS_PER_LONG - hweight_long(*cur);
1412	}
1413}
1414
1415struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1416                                               unsigned int bitmap)
1417{
1418	b_blocknr_t block = (sb->s_blocksize << 3) * bitmap;
1419	struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap;
1420	struct buffer_head *bh;
1421
1422	/*
1423	 * Way old format filesystems had the bitmaps packed up front.
1424	 * I doubt there are any of these left, but just in case...
1425	 */
1426	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
1427			      &REISERFS_SB(sb)->s_properties)))
1428		block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap;
1429	else if (bitmap == 0)
1430		block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
1431
1432	bh = sb_bread(sb, block);
1433	if (bh == NULL)
1434		reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1435		                 "reading failed", __func__, block);
1436	else {
1437		if (buffer_locked(bh)) {
1438			int depth;
1439			PROC_INFO_INC(sb, scan_bitmap.wait);
1440			depth = reiserfs_write_unlock_nested(sb);
1441			__wait_on_buffer(bh);
1442			reiserfs_write_lock_nested(sb, depth);
1443		}
1444		BUG_ON(!buffer_uptodate(bh));
1445		BUG_ON(atomic_read(&bh->b_count) == 0);
1446
1447		if (info->free_count == UINT_MAX)
1448			reiserfs_cache_bitmap_metadata(sb, bh, info);
1449	}
1450
1451	return bh;
1452}
1453
1454int reiserfs_init_bitmap_cache(struct super_block *sb)
1455{
1456	struct reiserfs_bitmap_info *bitmap;
1457	unsigned int bmap_nr = reiserfs_bmap_count(sb);
1458
1459	bitmap = vmalloc(array_size(bmap_nr, sizeof(*bitmap)));
1460	if (bitmap == NULL)
1461		return -ENOMEM;
1462
1463	memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
1464
1465	SB_AP_BITMAP(sb) = bitmap;
1466
1467	return 0;
1468}
1469
1470void reiserfs_free_bitmap_cache(struct super_block *sb)
1471{
1472	if (SB_AP_BITMAP(sb)) {
1473		vfree(SB_AP_BITMAP(sb));
1474		SB_AP_BITMAP(sb) = NULL;
1475	}
1476}
v3.15
   1/*
   2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
   3 */
   4/* Reiserfs block (de)allocator, bitmap-based. */
   5
   6#include <linux/time.h>
   7#include "reiserfs.h"
   8#include <linux/errno.h>
   9#include <linux/buffer_head.h>
  10#include <linux/kernel.h>
  11#include <linux/pagemap.h>
  12#include <linux/vmalloc.h>
  13#include <linux/quotaops.h>
  14#include <linux/seq_file.h>
  15
  16#define PREALLOCATION_SIZE 9
  17
  18/* different reiserfs block allocator options */
  19
  20#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
  21
  22#define  _ALLOC_concentrating_formatted_nodes 0
  23#define  _ALLOC_displacing_large_files 1
  24#define  _ALLOC_displacing_new_packing_localities 2
  25#define  _ALLOC_old_hashed_relocation 3
  26#define  _ALLOC_new_hashed_relocation 4
  27#define  _ALLOC_skip_busy 5
  28#define  _ALLOC_displace_based_on_dirid 6
  29#define  _ALLOC_hashed_formatted_nodes 7
  30#define  _ALLOC_old_way 8
  31#define  _ALLOC_hundredth_slices 9
  32#define  _ALLOC_dirid_groups 10
  33#define  _ALLOC_oid_groups 11
  34#define  _ALLOC_packing_groups 12
  35
  36#define  concentrating_formatted_nodes(s)	test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
  37#define  displacing_large_files(s)		test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
  38#define  displacing_new_packing_localities(s)	test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
  39
  40#define SET_OPTION(optname) \
  41   do { \
  42	reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \
  43	set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
  44    } while(0)
  45#define TEST_OPTION(optname, s) \
  46    test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
  47
  48static inline void get_bit_address(struct super_block *s,
  49				   b_blocknr_t block,
  50				   unsigned int *bmap_nr,
  51				   unsigned int *offset)
  52{
  53	/* It is in the bitmap block number equal to the block
  54	 * number divided by the number of bits in a block. */
 
 
  55	*bmap_nr = block >> (s->s_blocksize_bits + 3);
  56	/* Within that bitmap block it is located at bit offset *offset. */
  57	*offset = block & ((s->s_blocksize << 3) - 1);
  58}
  59
  60int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
  61{
  62	unsigned int bmap, offset;
  63	unsigned int bmap_count = reiserfs_bmap_count(s);
  64
  65	if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
  66		reiserfs_error(s, "vs-4010",
  67			       "block number is out of range %lu (%u)",
  68			       block, SB_BLOCK_COUNT(s));
  69		return 0;
  70	}
  71
  72	get_bit_address(s, block, &bmap, &offset);
  73
  74	/* Old format filesystem? Unlikely, but the bitmaps are all up front so
  75	 * we need to account for it. */
 
 
  76	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
  77			      &(REISERFS_SB(s)->s_properties)))) {
  78		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
  79		if (block >= bmap1 &&
  80		    block <= bmap1 + bmap_count) {
  81			reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) "
  82				       "can't be freed or reused",
  83				       block, bmap_count);
  84			return 0;
  85		}
  86	} else {
  87		if (offset == 0) {
  88			reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) "
  89				       "can't be freed or reused",
  90				       block, bmap_count);
  91			return 0;
  92		}
  93	}
  94
  95	if (bmap >= bmap_count) {
  96		reiserfs_error(s, "vs-4030", "bitmap for requested block "
  97			       "is out of range: block=%lu, bitmap_nr=%u",
  98			       block, bmap);
  99		return 0;
 100	}
 101
 102	if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
 103		reiserfs_error(s, "vs-4050", "this is root block (%u), "
 104			       "it must be busy", SB_ROOT_BLOCK(s));
 105		return 0;
 106	}
 107
 108	return 1;
 109}
 110
 111/* searches in journal structures for a given block number (bmap, off). If block
 112   is found in reiserfs journal it suggests next free block candidate to test. */
 
 
 
 113static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
 114				      int off, int *next)
 115{
 116	b_blocknr_t tmp;
 117
 118	if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
 119		if (tmp) {	/* hint supplied */
 120			*next = tmp;
 121			PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
 122		} else {
 123			(*next) = off + 1;	/* inc offset to avoid looping. */
 124			PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
 125		}
 126		PROC_INFO_INC(s, scan_bitmap.retry);
 127		return 1;
 128	}
 129	return 0;
 130}
 131
 132/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
 133 * block; */
 
 
 134static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
 135			     unsigned int bmap_n, int *beg, int boundary,
 136			     int min, int max, int unfm)
 137{
 138	struct super_block *s = th->t_super;
 139	struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
 140	struct buffer_head *bh;
 141	int end, next;
 142	int org = *beg;
 143
 144	BUG_ON(!th->t_trans_id);
 145
 146	RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
 147	       "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
 148	PROC_INFO_INC(s, scan_bitmap.bmap);
 149/* this is unclear and lacks comments, explain how journal bitmaps
 150   work here for the reader.  Convey a sense of the design here. What
 151   is a window? */
 152/* - I mean `a window of zero bits' as in description of this function - Zam. */
 153
 154	if (!bi) {
 155		reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
 156			       "for bitmap %d", bmap_n);
 157		return 0;
 158	}
 159
 160	bh = reiserfs_read_bitmap_block(s, bmap_n);
 161	if (bh == NULL)
 162		return 0;
 163
 164	while (1) {
 165	      cont:
 166		if (bi->free_count < min) {
 167			brelse(bh);
 168			return 0;	// No free blocks in this bitmap
 169		}
 170
 171		/* search for a first zero bit -- beginning of a window */
 172		*beg = reiserfs_find_next_zero_le_bit
 173		    ((unsigned long *)(bh->b_data), boundary, *beg);
 174
 175		if (*beg + min > boundary) {	/* search for a zero bit fails or the rest of bitmap block
 176						 * cannot contain a zero window of minimum size */
 
 
 
 177			brelse(bh);
 178			return 0;
 179		}
 180
 181		if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
 182			continue;
 183		/* first zero bit found; we check next bits */
 184		for (end = *beg + 1;; end++) {
 185			if (end >= *beg + max || end >= boundary
 186			    || reiserfs_test_le_bit(end, bh->b_data)) {
 187				next = end;
 188				break;
 189			}
 190			/* finding the other end of zero bit window requires looking into journal structures (in
 191			 * case of searching for free blocks for unformatted nodes) */
 
 
 
 
 192			if (unfm && is_block_in_journal(s, bmap_n, end, &next))
 193				break;
 194		}
 195
 196		/* now (*beg) points to beginning of zero bits window,
 197		 * (end) points to one bit after the window end */
 198		if (end - *beg >= min) {	/* it seems we have found window of proper size */
 
 
 
 
 199			int i;
 200			reiserfs_prepare_for_journal(s, bh, 1);
 201			/* try to set all blocks used checking are they still free */
 
 
 
 202			for (i = *beg; i < end; i++) {
 203				/* It seems that we should not check in journal again. */
 204				if (reiserfs_test_and_set_le_bit
 205				    (i, bh->b_data)) {
 206					/* bit was set by another process
 207					 * while we slept in prepare_for_journal() */
 
 
 208					PROC_INFO_INC(s, scan_bitmap.stolen);
 209					if (i >= *beg + min) {	/* we can continue with smaller set of allocated blocks,
 210								 * if length of this set is more or equal to `min' */
 
 
 
 
 
 211						end = i;
 212						break;
 213					}
 214					/* otherwise we clear all bit were set ... */
 
 
 
 
 215					while (--i >= *beg)
 216						reiserfs_clear_le_bit
 217						    (i, bh->b_data);
 218					reiserfs_restore_prepared_buffer(s, bh);
 219					*beg = org;
 220					/* ... and search again in current block from beginning */
 
 
 
 
 221					goto cont;
 222				}
 223			}
 224			bi->free_count -= (end - *beg);
 225			journal_mark_dirty(th, s, bh);
 226			brelse(bh);
 227
 228			/* free block count calculation */
 229			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
 230						     1);
 231			PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
 232			journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
 233
 234			return end - (*beg);
 235		} else {
 236			*beg = next;
 237		}
 238	}
 239}
 240
 241static int bmap_hash_id(struct super_block *s, u32 id)
 242{
 243	char *hash_in = NULL;
 244	unsigned long hash;
 245	unsigned bm;
 246
 247	if (id <= 2) {
 248		bm = 1;
 249	} else {
 250		hash_in = (char *)(&id);
 251		hash = keyed_hash(hash_in, 4);
 252		bm = hash % reiserfs_bmap_count(s);
 253		if (!bm)
 254			bm = 1;
 255	}
 256	/* this can only be true when SB_BMAP_NR = 1 */
 257	if (bm >= reiserfs_bmap_count(s))
 258		bm = 0;
 259	return bm;
 260}
 261
 262/*
 263 * hashes the id and then returns > 0 if the block group for the
 264 * corresponding hash is full
 265 */
 266static inline int block_group_used(struct super_block *s, u32 id)
 267{
 268	int bm = bmap_hash_id(s, id);
 269	struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];
 270
 271	/* If we don't have cached information on this bitmap block, we're
 
 272	 * going to have to load it later anyway. Loading it here allows us
 273	 * to make a better decision. This favors long-term performance gain
 274	 * with a better on-disk layout vs. a short term gain of skipping the
 275	 * read and potentially having a bad placement. */
 
 276	if (info->free_count == UINT_MAX) {
 277		struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
 278		brelse(bh);
 279	}
 280
 281	if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) {
 282		return 0;
 283	}
 284	return 1;
 285}
 286
 287/*
 288 * the packing is returned in disk byte order
 289 */
 290__le32 reiserfs_choose_packing(struct inode * dir)
 291{
 292	__le32 packing;
 293	if (TEST_OPTION(packing_groups, dir->i_sb)) {
 294		u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
 295		/*
 296		 * some versions of reiserfsck expect packing locality 1 to be
 297		 * special
 298		 */
 299		if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
 300			packing = INODE_PKEY(dir)->k_objectid;
 301		else
 302			packing = INODE_PKEY(dir)->k_dir_id;
 303	} else
 304		packing = INODE_PKEY(dir)->k_objectid;
 305	return packing;
 306}
 307
 308/* Tries to find contiguous zero bit window (given size) in given region of
 309 * bitmap and place new blocks there. Returns number of allocated blocks. */
 
 
 310static int scan_bitmap(struct reiserfs_transaction_handle *th,
 311		       b_blocknr_t * start, b_blocknr_t finish,
 312		       int min, int max, int unfm, sector_t file_block)
 313{
 314	int nr_allocated = 0;
 315	struct super_block *s = th->t_super;
 316	/* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
 317	 * - Hans, it is not a block number - Zam. */
 318
 319	unsigned int bm, off;
 320	unsigned int end_bm, end_off;
 321	unsigned int off_max = s->s_blocksize << 3;
 322
 323	BUG_ON(!th->t_trans_id);
 
 324
 325	PROC_INFO_INC(s, scan_bitmap.call);
 326	if (SB_FREE_BLOCKS(s) <= 0)
 327		return 0;	// No point in looking for more free blocks
 328
 329	get_bit_address(s, *start, &bm, &off);
 330	get_bit_address(s, finish, &end_bm, &end_off);
 331	if (bm > reiserfs_bmap_count(s))
 332		return 0;
 333	if (end_bm > reiserfs_bmap_count(s))
 334		end_bm = reiserfs_bmap_count(s);
 335
 336	/* When the bitmap is more than 10% free, anyone can allocate.
 
 337	 * When it's less than 10% free, only files that already use the
 338	 * bitmap are allowed. Once we pass 80% full, this restriction
 339	 * is lifted.
 340	 *
 341	 * We do this so that files that grow later still have space close to
 342	 * their original allocation. This improves locality, and presumably
 343	 * performance as a result.
 344	 *
 345	 * This is only an allocation policy and does not make up for getting a
 346	 * bad hint. Decent hinting must be implemented for this to work well.
 347	 */
 348	if (TEST_OPTION(skip_busy, s)
 349	    && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
 350		for (; bm < end_bm; bm++, off = 0) {
 351			if ((off && (!unfm || (file_block != 0)))
 352			    || SB_AP_BITMAP(s)[bm].free_count >
 353			    (s->s_blocksize << 3) / 10)
 354				nr_allocated =
 355				    scan_bitmap_block(th, bm, &off, off_max,
 356						      min, max, unfm);
 357			if (nr_allocated)
 358				goto ret;
 359		}
 360		/* we know from above that start is a reasonable number */
 361		get_bit_address(s, *start, &bm, &off);
 362	}
 363
 364	for (; bm < end_bm; bm++, off = 0) {
 365		nr_allocated =
 366		    scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
 367		if (nr_allocated)
 368			goto ret;
 369	}
 370
 371	nr_allocated =
 372	    scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
 373
 374      ret:
 375	*start = bm * off_max + off;
 376	return nr_allocated;
 377
 378}
 379
 380static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
 381				 struct inode *inode, b_blocknr_t block,
 382				 int for_unformatted)
 383{
 384	struct super_block *s = th->t_super;
 385	struct reiserfs_super_block *rs;
 386	struct buffer_head *sbh, *bmbh;
 387	struct reiserfs_bitmap_info *apbi;
 388	unsigned int nr, offset;
 389
 390	BUG_ON(!th->t_trans_id);
 391
 392	PROC_INFO_INC(s, free_block);
 393
 394	rs = SB_DISK_SUPER_BLOCK(s);
 395	sbh = SB_BUFFER_WITH_SB(s);
 396	apbi = SB_AP_BITMAP(s);
 397
 398	get_bit_address(s, block, &nr, &offset);
 399
 400	if (nr >= reiserfs_bmap_count(s)) {
 401		reiserfs_error(s, "vs-4075", "block %lu is out of range",
 402			       block);
 403		return;
 404	}
 405
 406	bmbh = reiserfs_read_bitmap_block(s, nr);
 407	if (!bmbh)
 408		return;
 409
 410	reiserfs_prepare_for_journal(s, bmbh, 1);
 411
 412	/* clear bit for the given block in bit map */
 413	if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
 414		reiserfs_error(s, "vs-4080",
 415			       "block %lu: bit already cleared", block);
 416	}
 417	apbi[nr].free_count++;
 418	journal_mark_dirty(th, s, bmbh);
 419	brelse(bmbh);
 420
 421	reiserfs_prepare_for_journal(s, sbh, 1);
 422	/* update super block */
 423	set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
 424
 425	journal_mark_dirty(th, s, sbh);
 426	if (for_unformatted) {
 427		int depth = reiserfs_write_unlock_nested(s);
 428		dquot_free_block_nodirty(inode, 1);
 429		reiserfs_write_lock_nested(s, depth);
 430	}
 431}
 432
 433void reiserfs_free_block(struct reiserfs_transaction_handle *th,
 434			 struct inode *inode, b_blocknr_t block,
 435			 int for_unformatted)
 436{
 437	struct super_block *s = th->t_super;
 
 438	BUG_ON(!th->t_trans_id);
 439
 440	RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
 441	if (!is_reusable(s, block, 1))
 442		return;
 443
 444	if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
 445		reiserfs_error(th->t_super, "bitmap-4072",
 446			       "Trying to free block outside file system "
 447			       "boundaries (%lu > %lu)",
 448			       block, sb_block_count(REISERFS_SB(s)->s_rs));
 449		return;
 450	}
 451	/* mark it before we clear it, just in case */
 452	journal_mark_freed(th, s, block);
 453	_reiserfs_free_block(th, inode, block, for_unformatted);
 454}
 455
 456/* preallocated blocks don't need to be run through journal_mark_freed */
 457static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
 458					 struct inode *inode, b_blocknr_t block)
 459{
 460	BUG_ON(!th->t_trans_id);
 461	RFALSE(!th->t_super,
 462	       "vs-4060: trying to free block on nonexistent device");
 463	if (!is_reusable(th->t_super, block, 1))
 464		return;
 465	_reiserfs_free_block(th, inode, block, 1);
 466}
 467
 468static void __discard_prealloc(struct reiserfs_transaction_handle *th,
 469			       struct reiserfs_inode_info *ei)
 470{
 471	unsigned long save = ei->i_prealloc_block;
 472	int dirty = 0;
 473	struct inode *inode = &ei->vfs_inode;
 
 474	BUG_ON(!th->t_trans_id);
 475#ifdef CONFIG_REISERFS_CHECK
 476	if (ei->i_prealloc_count < 0)
 477		reiserfs_error(th->t_super, "zam-4001",
 478			       "inode has negative prealloc blocks count.");
 479#endif
 480	while (ei->i_prealloc_count > 0) {
 481		reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
 482		ei->i_prealloc_block++;
 
 
 
 
 
 
 
 483		ei->i_prealloc_count--;
 
 484		dirty = 1;
 485	}
 486	if (dirty)
 487		reiserfs_update_sd(th, inode);
 488	ei->i_prealloc_block = save;
 489	list_del_init(&(ei->i_prealloc_list));
 490}
 491
 492/* FIXME: It should be inline function */
 493void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
 494			       struct inode *inode)
 495{
 496	struct reiserfs_inode_info *ei = REISERFS_I(inode);
 
 497	BUG_ON(!th->t_trans_id);
 498	if (ei->i_prealloc_count)
 499		__discard_prealloc(th, ei);
 500}
 501
 502void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
 503{
 504	struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
 505
 506	BUG_ON(!th->t_trans_id);
 507
 508	while (!list_empty(plist)) {
 509		struct reiserfs_inode_info *ei;
 510		ei = list_entry(plist->next, struct reiserfs_inode_info,
 511				i_prealloc_list);
 512#ifdef CONFIG_REISERFS_CHECK
 513		if (!ei->i_prealloc_count) {
 514			reiserfs_error(th->t_super, "zam-4001",
 515				       "inode is in prealloc list but has "
 516				       "no preallocated blocks.");
 517		}
 518#endif
 519		__discard_prealloc(th, ei);
 520	}
 521}
 522
 523void reiserfs_init_alloc_options(struct super_block *s)
 524{
 525	set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
 526	set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
 527	set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
 528}
 529
 530/* block allocator related options are parsed here */
 531int reiserfs_parse_alloc_options(struct super_block *s, char *options)
 532{
 533	char *this_char, *value;
 534
 535	REISERFS_SB(s)->s_alloc_options.bits = 0;	/* clear default settings */
 
 536
 537	while ((this_char = strsep(&options, ":")) != NULL) {
 538		if ((value = strchr(this_char, '=')) != NULL)
 539			*value++ = 0;
 540
 541		if (!strcmp(this_char, "concentrating_formatted_nodes")) {
 542			int temp;
 543			SET_OPTION(concentrating_formatted_nodes);
 544			temp = (value
 545				&& *value) ? simple_strtoul(value, &value,
 546							    0) : 10;
 547			if (temp <= 0 || temp > 100) {
 548				REISERFS_SB(s)->s_alloc_options.border = 10;
 549			} else {
 550				REISERFS_SB(s)->s_alloc_options.border =
 551				    100 / temp;
 552			}
 553			continue;
 554		}
 555		if (!strcmp(this_char, "displacing_large_files")) {
 556			SET_OPTION(displacing_large_files);
 557			REISERFS_SB(s)->s_alloc_options.large_file_size =
 558			    (value
 559			     && *value) ? simple_strtoul(value, &value, 0) : 16;
 560			continue;
 561		}
 562		if (!strcmp(this_char, "displacing_new_packing_localities")) {
 563			SET_OPTION(displacing_new_packing_localities);
 564			continue;
 565		};
 566
 567		if (!strcmp(this_char, "old_hashed_relocation")) {
 568			SET_OPTION(old_hashed_relocation);
 569			continue;
 570		}
 571
 572		if (!strcmp(this_char, "new_hashed_relocation")) {
 573			SET_OPTION(new_hashed_relocation);
 574			continue;
 575		}
 576
 577		if (!strcmp(this_char, "dirid_groups")) {
 578			SET_OPTION(dirid_groups);
 579			continue;
 580		}
 581		if (!strcmp(this_char, "oid_groups")) {
 582			SET_OPTION(oid_groups);
 583			continue;
 584		}
 585		if (!strcmp(this_char, "packing_groups")) {
 586			SET_OPTION(packing_groups);
 587			continue;
 588		}
 589		if (!strcmp(this_char, "hashed_formatted_nodes")) {
 590			SET_OPTION(hashed_formatted_nodes);
 591			continue;
 592		}
 593
 594		if (!strcmp(this_char, "skip_busy")) {
 595			SET_OPTION(skip_busy);
 596			continue;
 597		}
 598
 599		if (!strcmp(this_char, "hundredth_slices")) {
 600			SET_OPTION(hundredth_slices);
 601			continue;
 602		}
 603
 604		if (!strcmp(this_char, "old_way")) {
 605			SET_OPTION(old_way);
 606			continue;
 607		}
 608
 609		if (!strcmp(this_char, "displace_based_on_dirid")) {
 610			SET_OPTION(displace_based_on_dirid);
 611			continue;
 612		}
 613
 614		if (!strcmp(this_char, "preallocmin")) {
 615			REISERFS_SB(s)->s_alloc_options.preallocmin =
 616			    (value
 617			     && *value) ? simple_strtoul(value, &value, 0) : 4;
 618			continue;
 619		}
 620
 621		if (!strcmp(this_char, "preallocsize")) {
 622			REISERFS_SB(s)->s_alloc_options.preallocsize =
 623			    (value
 624			     && *value) ? simple_strtoul(value, &value,
 625							 0) :
 626			    PREALLOCATION_SIZE;
 627			continue;
 628		}
 629
 630		reiserfs_warning(s, "zam-4001", "unknown option - %s",
 631				 this_char);
 632		return 1;
 633	}
 634
 635	reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
 636	return 0;
 637}
 638
 639static void print_sep(struct seq_file *seq, int *first)
 640{
 641	if (!*first)
 642		seq_puts(seq, ":");
 643	else
 644		*first = 0;
 645}
 646
 647void show_alloc_options(struct seq_file *seq, struct super_block *s)
 648{
 649	int first = 1;
 650
 651	if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
 652		(1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
 653		return;
 654
 655	seq_puts(seq, ",alloc=");
 656
 657	if (TEST_OPTION(concentrating_formatted_nodes, s)) {
 658		print_sep(seq, &first);
 659		if (REISERFS_SB(s)->s_alloc_options.border != 10) {
 660			seq_printf(seq, "concentrating_formatted_nodes=%d",
 661				100 / REISERFS_SB(s)->s_alloc_options.border);
 662		} else
 663			seq_puts(seq, "concentrating_formatted_nodes");
 664	}
 665	if (TEST_OPTION(displacing_large_files, s)) {
 666		print_sep(seq, &first);
 667		if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
 668			seq_printf(seq, "displacing_large_files=%lu",
 669			    REISERFS_SB(s)->s_alloc_options.large_file_size);
 670		} else
 671			seq_puts(seq, "displacing_large_files");
 672	}
 673	if (TEST_OPTION(displacing_new_packing_localities, s)) {
 674		print_sep(seq, &first);
 675		seq_puts(seq, "displacing_new_packing_localities");
 676	}
 677	if (TEST_OPTION(old_hashed_relocation, s)) {
 678		print_sep(seq, &first);
 679		seq_puts(seq, "old_hashed_relocation");
 680	}
 681	if (TEST_OPTION(new_hashed_relocation, s)) {
 682		print_sep(seq, &first);
 683		seq_puts(seq, "new_hashed_relocation");
 684	}
 685	if (TEST_OPTION(dirid_groups, s)) {
 686		print_sep(seq, &first);
 687		seq_puts(seq, "dirid_groups");
 688	}
 689	if (TEST_OPTION(oid_groups, s)) {
 690		print_sep(seq, &first);
 691		seq_puts(seq, "oid_groups");
 692	}
 693	if (TEST_OPTION(packing_groups, s)) {
 694		print_sep(seq, &first);
 695		seq_puts(seq, "packing_groups");
 696	}
 697	if (TEST_OPTION(hashed_formatted_nodes, s)) {
 698		print_sep(seq, &first);
 699		seq_puts(seq, "hashed_formatted_nodes");
 700	}
 701	if (TEST_OPTION(skip_busy, s)) {
 702		print_sep(seq, &first);
 703		seq_puts(seq, "skip_busy");
 704	}
 705	if (TEST_OPTION(hundredth_slices, s)) {
 706		print_sep(seq, &first);
 707		seq_puts(seq, "hundredth_slices");
 708	}
 709	if (TEST_OPTION(old_way, s)) {
 710		print_sep(seq, &first);
 711		seq_puts(seq, "old_way");
 712	}
 713	if (TEST_OPTION(displace_based_on_dirid, s)) {
 714		print_sep(seq, &first);
 715		seq_puts(seq, "displace_based_on_dirid");
 716	}
 717	if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
 718		print_sep(seq, &first);
 719		seq_printf(seq, "preallocmin=%d",
 720				REISERFS_SB(s)->s_alloc_options.preallocmin);
 721	}
 722	if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
 723		print_sep(seq, &first);
 724		seq_printf(seq, "preallocsize=%d",
 725				REISERFS_SB(s)->s_alloc_options.preallocsize);
 726	}
 727}
 728
 729static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 730{
 731	char *hash_in;
 
 732	if (hint->formatted_node) {
 733		hash_in = (char *)&hint->key.k_dir_id;
 734	} else {
 735		if (!hint->inode) {
 736			//hint->search_start = hint->beg;
 737			hash_in = (char *)&hint->key.k_dir_id;
 738		} else
 739		    if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 740			hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
 741		else
 742			hash_in =
 743			    (char *)(&INODE_PKEY(hint->inode)->k_objectid);
 744	}
 745
 746	hint->search_start =
 747	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
 748}
 749
 750/*
 751 * Relocation based on dirid, hashing them into a given bitmap block
 752 * files. Formatted nodes are unaffected, a separate policy covers them
 753 */
 754static void dirid_groups(reiserfs_blocknr_hint_t * hint)
 755{
 756	unsigned long hash;
 757	__u32 dirid = 0;
 758	int bm = 0;
 759	struct super_block *sb = hint->th->t_super;
 
 760	if (hint->inode)
 761		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
 762	else if (hint->formatted_node)
 763		dirid = hint->key.k_dir_id;
 764
 765	if (dirid) {
 766		bm = bmap_hash_id(sb, dirid);
 767		hash = bm * (sb->s_blocksize << 3);
 768		/* give a portion of the block group to metadata */
 769		if (hint->inode)
 770			hash += sb->s_blocksize / 2;
 771		hint->search_start = hash;
 772	}
 773}
 774
 775/*
 776 * Relocation based on oid, hashing them into a given bitmap block
 777 * files. Formatted nodes are unaffected, a separate policy covers them
 778 */
 779static void oid_groups(reiserfs_blocknr_hint_t * hint)
 780{
 781	if (hint->inode) {
 782		unsigned long hash;
 783		__u32 oid;
 784		__u32 dirid;
 785		int bm;
 786
 787		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
 788
 789		/* keep the root dir and it's first set of subdirs close to
 
 790		 * the start of the disk
 791		 */
 792		if (dirid <= 2)
 793			hash = (hint->inode->i_sb->s_blocksize << 3);
 794		else {
 795			oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
 796			bm = bmap_hash_id(hint->inode->i_sb, oid);
 797			hash = bm * (hint->inode->i_sb->s_blocksize << 3);
 798		}
 799		hint->search_start = hash;
 800	}
 801}
 802
 803/* returns 1 if it finds an indirect item and gets valid hint info
 
 804 * from it, otherwise 0
 805 */
 806static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
 807{
 808	struct treepath *path;
 809	struct buffer_head *bh;
 810	struct item_head *ih;
 811	int pos_in_item;
 812	__le32 *item;
 813	int ret = 0;
 814
 815	if (!hint->path)	/* reiserfs code can call this function w/o pointer to path
 816				 * structure supplied; then we rely on supplied search_start */
 
 
 
 817		return 0;
 818
 819	path = hint->path;
 820	bh = get_last_bh(path);
 821	RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
 822	ih = get_ih(path);
 823	pos_in_item = path->pos_in_item;
 824	item = get_item(path);
 825
 826	hint->search_start = bh->b_blocknr;
 827
 
 
 
 
 828	if (!hint->formatted_node && is_indirect_le_ih(ih)) {
 829		/* for indirect item: go to left and look for the first non-hole entry
 830		   in the indirect item */
 831		if (pos_in_item == I_UNFM_NUM(ih))
 832			pos_in_item--;
 833//          pos_in_item = I_UNFM_NUM (ih) - 1;
 834		while (pos_in_item >= 0) {
 835			int t = get_block_num(item, pos_in_item);
 836			if (t) {
 837				hint->search_start = t;
 838				ret = 1;
 839				break;
 840			}
 841			pos_in_item--;
 842		}
 843	}
 844
 845	/* does result value fit into specified region? */
 846	return ret;
 847}
 848
 849/* should be, if formatted node, then try to put on first part of the device
 850   specified as number of percent with mount option device, else try to put
 851   on last of device.  This is not to say it is good code to do so,
 852   but the effect should be measured.  */
 
 
 853static inline void set_border_in_hint(struct super_block *s,
 854				      reiserfs_blocknr_hint_t * hint)
 855{
 856	b_blocknr_t border =
 857	    SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
 858
 859	if (hint->formatted_node)
 860		hint->end = border - 1;
 861	else
 862		hint->beg = border;
 863}
 864
 865static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
 866{
 867	if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 868		hint->search_start =
 869		    hint->beg +
 870		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
 871			       4) % (hint->end - hint->beg);
 872	else
 873		hint->search_start =
 874		    hint->beg +
 875		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
 876			       4) % (hint->end - hint->beg);
 877}
 878
 879static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
 880{
 881	char *hash_in;
 882
 883	if (!hint->inode)
 884		hash_in = (char *)&hint->key.k_dir_id;
 885	else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
 886		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
 887	else
 888		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
 889
 890	hint->search_start =
 891	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
 892}
 893
 894static inline int
 895this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
 896						   hint)
 897{
 898	return hint->block ==
 899	    REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
 900}
 901
 902#ifdef DISPLACE_NEW_PACKING_LOCALITIES
 903static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
 904{
 905	struct in_core_key *key = &hint->key;
 906
 907	hint->th->displace_new_blocks = 0;
 908	hint->search_start =
 909	    hint->beg + keyed_hash((char *)(&key->k_objectid),
 910				   4) % (hint->end - hint->beg);
 911}
 912#endif
 913
 914static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 915{
 916	b_blocknr_t border;
 917	u32 hash_in;
 918
 919	if (hint->formatted_node || hint->inode == NULL) {
 920		return 0;
 921	}
 922
 923	hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
 924	border =
 925	    hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
 926					 4) % (hint->end - hint->beg - 1);
 927	if (border > hint->search_start)
 928		hint->search_start = border;
 929
 930	return 1;
 931}
 932
 933static inline int old_way(reiserfs_blocknr_hint_t * hint)
 934{
 935	b_blocknr_t border;
 936
 937	if (hint->formatted_node || hint->inode == NULL) {
 938		return 0;
 939	}
 940
 941	border =
 942	    hint->beg +
 943	    le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
 944							      hint->beg);
 945	if (border > hint->search_start)
 946		hint->search_start = border;
 947
 948	return 1;
 949}
 950
 951static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
 952{
 953	struct in_core_key *key = &hint->key;
 954	b_blocknr_t slice_start;
 955
 956	slice_start =
 957	    (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
 958	if (slice_start > hint->search_start
 959	    || slice_start + (hint->end / 100) <= hint->search_start) {
 960		hint->search_start = slice_start;
 961	}
 962}
 963
 964static void determine_search_start(reiserfs_blocknr_hint_t * hint,
 965				   int amount_needed)
 966{
 967	struct super_block *s = hint->th->t_super;
 968	int unfm_hint;
 969
 970	hint->beg = 0;
 971	hint->end = SB_BLOCK_COUNT(s) - 1;
 972
 973	/* This is former border algorithm. Now with tunable border offset */
 974	if (concentrating_formatted_nodes(s))
 975		set_border_in_hint(s, hint);
 976
 977#ifdef DISPLACE_NEW_PACKING_LOCALITIES
 978	/* whenever we create a new directory, we displace it.  At first we will
 979	   hash for location, later we might look for a moderately empty place for
 980	   it */
 
 
 981	if (displacing_new_packing_localities(s)
 982	    && hint->th->displace_new_blocks) {
 983		displace_new_packing_locality(hint);
 984
 985		/* we do not continue determine_search_start,
 986		 * if new packing locality is being displaced */
 
 
 987		return;
 988	}
 989#endif
 990
 991	/* all persons should feel encouraged to add more special cases here and
 992	 * test them */
 
 
 993
 994	if (displacing_large_files(s) && !hint->formatted_node
 995	    && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
 996		displace_large_file(hint);
 997		return;
 998	}
 999
1000	/* if none of our special cases is relevant, use the left neighbor in the
1001	   tree order of the new node we are allocating for */
 
 
1002	if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
1003		hash_formatted_node(hint);
1004		return;
1005	}
1006
1007	unfm_hint = get_left_neighbor(hint);
1008
1009	/* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
1010	   new blocks are displaced based on directory ID. Also, if suggested search_start
1011	   is less than last preallocated block, we start searching from it, assuming that
1012	   HDD dataflow is faster in forward direction */
 
 
 
1013	if (TEST_OPTION(old_way, s)) {
1014		if (!hint->formatted_node) {
1015			if (!reiserfs_hashed_relocation(s))
1016				old_way(hint);
1017			else if (!reiserfs_no_unhashed_relocation(s))
1018				old_hashed_relocation(hint);
1019
1020			if (hint->inode
1021			    && hint->search_start <
1022			    REISERFS_I(hint->inode)->i_prealloc_block)
1023				hint->search_start =
1024				    REISERFS_I(hint->inode)->i_prealloc_block;
1025		}
1026		return;
1027	}
1028
1029	/* This is an approach proposed by Hans */
1030	if (TEST_OPTION(hundredth_slices, s)
1031	    && !(displacing_large_files(s) && !hint->formatted_node)) {
1032		hundredth_slices(hint);
1033		return;
1034	}
1035
1036	/* old_hashed_relocation only works on unformatted */
1037	if (!unfm_hint && !hint->formatted_node &&
1038	    TEST_OPTION(old_hashed_relocation, s)) {
1039		old_hashed_relocation(hint);
1040	}
 
1041	/* new_hashed_relocation works with both formatted/unformatted nodes */
1042	if ((!unfm_hint || hint->formatted_node) &&
1043	    TEST_OPTION(new_hashed_relocation, s)) {
1044		new_hashed_relocation(hint);
1045	}
 
1046	/* dirid grouping works only on unformatted nodes */
1047	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1048		dirid_groups(hint);
1049	}
1050#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1051	if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1052		dirid_groups(hint);
1053	}
1054#endif
1055
1056	/* oid grouping works only on unformatted nodes */
1057	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
1058		oid_groups(hint);
1059	}
1060	return;
1061}
1062
1063static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
1064{
1065	/* make minimum size a mount option and benchmark both ways */
1066	/* we preallocate blocks only for regular files, specific size */
1067	/* benchmark preallocating always and see what happens */
1068
1069	hint->prealloc_size = 0;
1070
1071	if (!hint->formatted_node && hint->preallocate) {
1072		if (S_ISREG(hint->inode->i_mode)
1073		    && hint->inode->i_size >=
1074		    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1075		    preallocmin * hint->inode->i_sb->s_blocksize)
1076			hint->prealloc_size =
1077			    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1078			    preallocsize - 1;
1079	}
1080	return CARRY_ON;
1081}
1082
1083/* XXX I know it could be merged with upper-level function;
1084   but may be result function would be too complex. */
1085static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
1086						 b_blocknr_t * new_blocknrs,
1087						 b_blocknr_t start,
1088						 b_blocknr_t finish, int min,
1089						 int amount_needed,
1090						 int prealloc_size)
1091{
1092	int rest = amount_needed;
1093	int nr_allocated;
1094
1095	while (rest > 0 && start <= finish) {
1096		nr_allocated = scan_bitmap(hint->th, &start, finish, min,
1097					   rest + prealloc_size,
1098					   !hint->formatted_node, hint->block);
1099
1100		if (nr_allocated == 0)	/* no new blocks allocated, return */
1101			break;
1102
1103		/* fill free_blocknrs array first */
1104		while (rest > 0 && nr_allocated > 0) {
1105			*new_blocknrs++ = start++;
1106			rest--;
1107			nr_allocated--;
1108		}
1109
1110		/* do we have something to fill prealloc. array also ? */
1111		if (nr_allocated > 0) {
1112			/* it means prealloc_size was greater that 0 and we do preallocation */
 
 
 
1113			list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
1114				 &SB_JOURNAL(hint->th->t_super)->
1115				 j_prealloc_list);
1116			REISERFS_I(hint->inode)->i_prealloc_block = start;
1117			REISERFS_I(hint->inode)->i_prealloc_count =
1118			    nr_allocated;
1119			break;
1120		}
1121	}
1122
1123	return (amount_needed - rest);
1124}
1125
1126static inline int blocknrs_and_prealloc_arrays_from_search_start
1127    (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
1128     int amount_needed) {
1129	struct super_block *s = hint->th->t_super;
1130	b_blocknr_t start = hint->search_start;
1131	b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
1132	int passno = 0;
1133	int nr_allocated = 0;
1134	int depth;
1135
1136	determine_prealloc_size(hint);
1137	if (!hint->formatted_node) {
1138		int quota_ret;
1139#ifdef REISERQUOTA_DEBUG
1140		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1141			       "reiserquota: allocating %d blocks id=%u",
1142			       amount_needed, hint->inode->i_uid);
1143#endif
1144		depth = reiserfs_write_unlock_nested(s);
1145		quota_ret =
1146		    dquot_alloc_block_nodirty(hint->inode, amount_needed);
1147		if (quota_ret) {	/* Quota exceeded? */
1148			reiserfs_write_lock_nested(s, depth);
1149			return QUOTA_EXCEEDED;
1150		}
1151		if (hint->preallocate && hint->prealloc_size) {
1152#ifdef REISERQUOTA_DEBUG
1153			reiserfs_debug(s, REISERFS_DEBUG_CODE,
1154				       "reiserquota: allocating (prealloc) %d blocks id=%u",
1155				       hint->prealloc_size, hint->inode->i_uid);
1156#endif
1157			quota_ret = dquot_prealloc_block_nodirty(hint->inode,
1158							 hint->prealloc_size);
1159			if (quota_ret)
1160				hint->preallocate = hint->prealloc_size = 0;
1161		}
1162		/* for unformatted nodes, force large allocations */
1163		reiserfs_write_lock_nested(s, depth);
1164	}
1165
1166	do {
1167		switch (passno++) {
1168		case 0:	/* Search from hint->search_start to end of disk */
1169			start = hint->search_start;
1170			finish = SB_BLOCK_COUNT(s) - 1;
1171			break;
1172		case 1:	/* Search from hint->beg to hint->search_start */
1173			start = hint->beg;
1174			finish = hint->search_start;
1175			break;
1176		case 2:	/* Last chance: Search from 0 to hint->beg */
1177			start = 0;
1178			finish = hint->beg;
1179			break;
1180		default:	/* We've tried searching everywhere, not enough space */
 
1181			/* Free the blocks */
1182			if (!hint->formatted_node) {
1183#ifdef REISERQUOTA_DEBUG
1184				reiserfs_debug(s, REISERFS_DEBUG_CODE,
1185					       "reiserquota: freeing (nospace) %d blocks id=%u",
1186					       amount_needed +
1187					       hint->prealloc_size -
1188					       nr_allocated,
1189					       hint->inode->i_uid);
1190#endif
1191				/* Free not allocated blocks */
1192				depth = reiserfs_write_unlock_nested(s);
1193				dquot_free_block_nodirty(hint->inode,
1194					amount_needed + hint->prealloc_size -
1195					nr_allocated);
1196				reiserfs_write_lock_nested(s, depth);
1197			}
1198			while (nr_allocated--)
1199				reiserfs_free_block(hint->th, hint->inode,
1200						    new_blocknrs[nr_allocated],
1201						    !hint->formatted_node);
1202
1203			return NO_DISK_SPACE;
1204		}
1205	} while ((nr_allocated += allocate_without_wrapping_disk(hint,
1206								 new_blocknrs +
1207								 nr_allocated,
1208								 start, finish,
1209								 1,
1210								 amount_needed -
1211								 nr_allocated,
1212								 hint->
1213								 prealloc_size))
1214		 < amount_needed);
1215	if (!hint->formatted_node &&
1216	    amount_needed + hint->prealloc_size >
1217	    nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
1218		/* Some of preallocation blocks were not allocated */
1219#ifdef REISERQUOTA_DEBUG
1220		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1221			       "reiserquota: freeing (failed prealloc) %d blocks id=%u",
1222			       amount_needed + hint->prealloc_size -
1223			       nr_allocated -
1224			       REISERFS_I(hint->inode)->i_prealloc_count,
1225			       hint->inode->i_uid);
1226#endif
1227
1228		depth = reiserfs_write_unlock_nested(s);
1229		dquot_free_block_nodirty(hint->inode, amount_needed +
1230					 hint->prealloc_size - nr_allocated -
1231					 REISERFS_I(hint->inode)->
1232					 i_prealloc_count);
1233		reiserfs_write_lock_nested(s, depth);
1234	}
1235
1236	return CARRY_ON;
1237}
1238
1239/* grab new blocknrs from preallocated list */
1240/* return amount still needed after using them */
1241static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1242					      b_blocknr_t * new_blocknrs,
1243					      int amount_needed)
1244{
1245	struct inode *inode = hint->inode;
1246
1247	if (REISERFS_I(inode)->i_prealloc_count > 0) {
1248		while (amount_needed) {
1249
1250			*new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
1251			REISERFS_I(inode)->i_prealloc_count--;
1252
1253			amount_needed--;
1254
1255			if (REISERFS_I(inode)->i_prealloc_count <= 0) {
1256				list_del(&REISERFS_I(inode)->i_prealloc_list);
1257				break;
1258			}
1259		}
1260	}
1261	/* return amount still needed after using preallocated blocks */
1262	return amount_needed;
1263}
1264
1265int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us	/* Amount of blocks we have
1266																	   already reserved */ )
 
 
 
1267{
1268	int initial_amount_needed = amount_needed;
1269	int ret;
1270	struct super_block *s = hint->th->t_super;
1271
1272	/* Check if there is enough space, taking into account reserved space */
1273	if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
1274	    amount_needed - reserved_by_us)
1275		return NO_DISK_SPACE;
1276	/* should this be if !hint->inode &&  hint->preallocate? */
1277	/* do you mean hint->formatted_node can be removed ? - Zam */
1278	/* hint->formatted_node cannot be removed because we try to access
1279	   inode information here, and there is often no inode assotiated with
1280	   metadata allocations - green */
 
 
1281
1282	if (!hint->formatted_node && hint->preallocate) {
1283		amount_needed = use_preallocated_list_if_available
1284		    (hint, new_blocknrs, amount_needed);
1285		if (amount_needed == 0)	/* all blocknrs we need we got from
1286					   prealloc. list */
 
 
 
 
1287			return CARRY_ON;
1288		new_blocknrs += (initial_amount_needed - amount_needed);
1289	}
1290
1291	/* find search start and save it in hint structure */
1292	determine_search_start(hint, amount_needed);
1293	if (hint->search_start >= SB_BLOCK_COUNT(s))
1294		hint->search_start = SB_BLOCK_COUNT(s) - 1;
1295
1296	/* allocation itself; fill new_blocknrs and preallocation arrays */
1297	ret = blocknrs_and_prealloc_arrays_from_search_start
1298	    (hint, new_blocknrs, amount_needed);
1299
1300	/* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
1301	 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
1302	 * variant) */
1303
 
 
1304	if (ret != CARRY_ON) {
1305		while (amount_needed++ < initial_amount_needed) {
1306			reiserfs_free_block(hint->th, hint->inode,
1307					    *(--new_blocknrs), 1);
1308		}
1309	}
1310	return ret;
1311}
1312
1313void reiserfs_cache_bitmap_metadata(struct super_block *sb,
1314                                    struct buffer_head *bh,
1315                                    struct reiserfs_bitmap_info *info)
1316{
1317	unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
1318
1319	/* The first bit must ALWAYS be 1 */
1320	if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data))
1321		reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is "
1322			       "corrupted: first bit must be 1", bh->b_blocknr);
1323
1324	info->free_count = 0;
1325
1326	while (--cur >= (unsigned long *)bh->b_data) {
1327		/* 0 and ~0 are special, we can optimize for them */
1328		if (*cur == 0)
1329			info->free_count += BITS_PER_LONG;
1330		else if (*cur != ~0L)	/* A mix, investigate */
1331			info->free_count += BITS_PER_LONG - hweight_long(*cur);
1332	}
1333}
1334
1335struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1336                                               unsigned int bitmap)
1337{
1338	b_blocknr_t block = (sb->s_blocksize << 3) * bitmap;
1339	struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap;
1340	struct buffer_head *bh;
1341
1342	/* Way old format filesystems had the bitmaps packed up front.
1343	 * I doubt there are any of these left, but just in case... */
 
 
1344	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
1345	                      &(REISERFS_SB(sb)->s_properties))))
1346		block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap;
1347	else if (bitmap == 0)
1348		block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
1349
1350	bh = sb_bread(sb, block);
1351	if (bh == NULL)
1352		reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1353		                 "reading failed", __func__, block);
1354	else {
1355		if (buffer_locked(bh)) {
1356			int depth;
1357			PROC_INFO_INC(sb, scan_bitmap.wait);
1358			depth = reiserfs_write_unlock_nested(sb);
1359			__wait_on_buffer(bh);
1360			reiserfs_write_lock_nested(sb, depth);
1361		}
1362		BUG_ON(!buffer_uptodate(bh));
1363		BUG_ON(atomic_read(&bh->b_count) == 0);
1364
1365		if (info->free_count == UINT_MAX)
1366			reiserfs_cache_bitmap_metadata(sb, bh, info);
1367	}
1368
1369	return bh;
1370}
1371
1372int reiserfs_init_bitmap_cache(struct super_block *sb)
1373{
1374	struct reiserfs_bitmap_info *bitmap;
1375	unsigned int bmap_nr = reiserfs_bmap_count(sb);
1376
1377	bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1378	if (bitmap == NULL)
1379		return -ENOMEM;
1380
1381	memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
1382
1383	SB_AP_BITMAP(sb) = bitmap;
1384
1385	return 0;
1386}
1387
1388void reiserfs_free_bitmap_cache(struct super_block *sb)
1389{
1390	if (SB_AP_BITMAP(sb)) {
1391		vfree(SB_AP_BITMAP(sb));
1392		SB_AP_BITMAP(sb) = NULL;
1393	}
1394}