Linux Audio

Check our new training course

Loading...
v3.1
  1/*
  2 *  linux/fs/ext3/ialloc.c
  3 *
  4 * Copyright (C) 1992, 1993, 1994, 1995
  5 * Remy Card (card@masi.ibp.fr)
  6 * Laboratoire MASI - Institut Blaise Pascal
  7 * Universite Pierre et Marie Curie (Paris VI)
  8 *
  9 *  BSD ufs-inspired inode and directory allocation by
 10 *  Stephen Tweedie (sct@redhat.com), 1993
 11 *  Big-endian to little-endian byte-swapping/bitmaps by
 12 *        David S. Miller (davem@caip.rutgers.edu), 1995
 13 */
 14
 15#include <linux/time.h>
 16#include <linux/fs.h>
 17#include <linux/jbd.h>
 18#include <linux/ext3_fs.h>
 19#include <linux/ext3_jbd.h>
 20#include <linux/stat.h>
 21#include <linux/string.h>
 22#include <linux/quotaops.h>
 23#include <linux/buffer_head.h>
 24#include <linux/random.h>
 25#include <linux/bitops.h>
 26#include <trace/events/ext3.h>
 27
 28#include <asm/byteorder.h>
 29
 
 30#include "xattr.h"
 31#include "acl.h"
 32
 33/*
 34 * ialloc.c contains the inodes allocation and deallocation routines
 35 */
 36
 37/*
 38 * The free inodes are managed by bitmaps.  A file system contains several
 39 * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
 40 * block for inodes, N blocks for the inode table and data blocks.
 41 *
 42 * The file system contains group descriptors which are located after the
 43 * super block.  Each descriptor contains the number of the bitmap block and
 44 * the free blocks count in the block.
 45 */
 46
 47
 48/*
 49 * Read the inode allocation bitmap for a given block_group, reading
 50 * into the specified slot in the superblock's bitmap cache.
 51 *
 52 * Return buffer_head of bitmap on success or NULL.
 53 */
 54static struct buffer_head *
 55read_inode_bitmap(struct super_block * sb, unsigned long block_group)
 56{
 57	struct ext3_group_desc *desc;
 58	struct buffer_head *bh = NULL;
 59
 60	desc = ext3_get_group_desc(sb, block_group, NULL);
 61	if (!desc)
 62		goto error_out;
 63
 64	bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
 65	if (!bh)
 66		ext3_error(sb, "read_inode_bitmap",
 67			    "Cannot read inode bitmap - "
 68			    "block_group = %lu, inode_bitmap = %u",
 69			    block_group, le32_to_cpu(desc->bg_inode_bitmap));
 70error_out:
 71	return bh;
 72}
 73
 74/*
 75 * NOTE! When we get the inode, we're the only people
 76 * that have access to it, and as such there are no
 77 * race conditions we have to worry about. The inode
 78 * is not on the hash-lists, and it cannot be reached
 79 * through the filesystem because the directory entry
 80 * has been deleted earlier.
 81 *
 82 * HOWEVER: we must make sure that we get no aliases,
 83 * which means that we have to call "clear_inode()"
 84 * _before_ we mark the inode not in use in the inode
 85 * bitmaps. Otherwise a newly created file might use
 86 * the same inode number (not actually the same pointer
 87 * though), and then we'd have two inodes sharing the
 88 * same inode number and space on the harddisk.
 89 */
 90void ext3_free_inode (handle_t *handle, struct inode * inode)
 91{
 92	struct super_block * sb = inode->i_sb;
 93	int is_directory;
 94	unsigned long ino;
 95	struct buffer_head *bitmap_bh = NULL;
 96	struct buffer_head *bh2;
 97	unsigned long block_group;
 98	unsigned long bit;
 99	struct ext3_group_desc * gdp;
100	struct ext3_super_block * es;
101	struct ext3_sb_info *sbi;
102	int fatal = 0, err;
103
104	if (atomic_read(&inode->i_count) > 1) {
105		printk ("ext3_free_inode: inode has count=%d\n",
106					atomic_read(&inode->i_count));
107		return;
108	}
109	if (inode->i_nlink) {
110		printk ("ext3_free_inode: inode has nlink=%d\n",
111			inode->i_nlink);
112		return;
113	}
114	if (!sb) {
115		printk("ext3_free_inode: inode on nonexistent device\n");
116		return;
117	}
118	sbi = EXT3_SB(sb);
119
120	ino = inode->i_ino;
121	ext3_debug ("freeing inode %lu\n", ino);
122	trace_ext3_free_inode(inode);
123
124	is_directory = S_ISDIR(inode->i_mode);
125
126	es = EXT3_SB(sb)->s_es;
127	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
128		ext3_error (sb, "ext3_free_inode",
129			    "reserved or nonexistent inode %lu", ino);
130		goto error_return;
131	}
132	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
133	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
134	bitmap_bh = read_inode_bitmap(sb, block_group);
135	if (!bitmap_bh)
136		goto error_return;
137
138	BUFFER_TRACE(bitmap_bh, "get_write_access");
139	fatal = ext3_journal_get_write_access(handle, bitmap_bh);
140	if (fatal)
141		goto error_return;
142
143	/* Ok, now we can actually update the inode bitmaps.. */
144	if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
145					bit, bitmap_bh->b_data))
146		ext3_error (sb, "ext3_free_inode",
147			      "bit already cleared for inode %lu", ino);
148	else {
149		gdp = ext3_get_group_desc (sb, block_group, &bh2);
150
151		BUFFER_TRACE(bh2, "get_write_access");
152		fatal = ext3_journal_get_write_access(handle, bh2);
153		if (fatal) goto error_return;
154
155		if (gdp) {
156			spin_lock(sb_bgl_lock(sbi, block_group));
157			le16_add_cpu(&gdp->bg_free_inodes_count, 1);
158			if (is_directory)
159				le16_add_cpu(&gdp->bg_used_dirs_count, -1);
160			spin_unlock(sb_bgl_lock(sbi, block_group));
161			percpu_counter_inc(&sbi->s_freeinodes_counter);
162			if (is_directory)
163				percpu_counter_dec(&sbi->s_dirs_counter);
164
165		}
166		BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
167		err = ext3_journal_dirty_metadata(handle, bh2);
168		if (!fatal) fatal = err;
169	}
170	BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
171	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
172	if (!fatal)
173		fatal = err;
174
175error_return:
176	brelse(bitmap_bh);
177	ext3_std_error(sb, fatal);
178}
179
180/*
181 * There are two policies for allocating an inode.  If the new inode is
182 * a directory, then a forward search is made for a block group with both
183 * free space and a low directory-to-inode ratio; if that fails, then of
184 * the groups with above-average free space, that group with the fewest
185 * directories already is chosen.
186 *
187 * For other inodes, search forward from the parent directory\'s block
188 * group to find a free inode.
189 */
190static int find_group_dir(struct super_block *sb, struct inode *parent)
191{
192	int ngroups = EXT3_SB(sb)->s_groups_count;
193	unsigned int freei, avefreei;
194	struct ext3_group_desc *desc, *best_desc = NULL;
195	int group, best_group = -1;
196
197	freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
198	avefreei = freei / ngroups;
199
200	for (group = 0; group < ngroups; group++) {
201		desc = ext3_get_group_desc (sb, group, NULL);
202		if (!desc || !desc->bg_free_inodes_count)
203			continue;
204		if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
205			continue;
206		if (!best_desc ||
207		    (le16_to_cpu(desc->bg_free_blocks_count) >
208		     le16_to_cpu(best_desc->bg_free_blocks_count))) {
209			best_group = group;
210			best_desc = desc;
211		}
212	}
213	return best_group;
214}
215
216/*
217 * Orlov's allocator for directories.
218 *
219 * We always try to spread first-level directories.
220 *
221 * If there are blockgroups with both free inodes and free blocks counts
222 * not worse than average we return one with smallest directory count.
223 * Otherwise we simply return a random group.
224 *
225 * For the rest rules look so:
226 *
227 * It's OK to put directory into a group unless
228 * it has too many directories already (max_dirs) or
229 * it has too few free inodes left (min_inodes) or
230 * it has too few free blocks left (min_blocks) or
231 * it's already running too large debt (max_debt).
232 * Parent's group is preferred, if it doesn't satisfy these
233 * conditions we search cyclically through the rest. If none
234 * of the groups look good we just look for a group with more
235 * free inodes than average (starting at parent's group).
236 *
237 * Debt is incremented each time we allocate a directory and decremented
238 * when we allocate an inode, within 0--255.
239 */
240
241#define INODE_COST 64
242#define BLOCK_COST 256
243
244static int find_group_orlov(struct super_block *sb, struct inode *parent)
245{
246	int parent_group = EXT3_I(parent)->i_block_group;
247	struct ext3_sb_info *sbi = EXT3_SB(sb);
248	struct ext3_super_block *es = sbi->s_es;
249	int ngroups = sbi->s_groups_count;
250	int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
251	unsigned int freei, avefreei;
252	ext3_fsblk_t freeb, avefreeb;
253	ext3_fsblk_t blocks_per_dir;
254	unsigned int ndirs;
255	int max_debt, max_dirs, min_inodes;
256	ext3_grpblk_t min_blocks;
257	int group = -1, i;
258	struct ext3_group_desc *desc;
259
260	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
261	avefreei = freei / ngroups;
262	freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
263	avefreeb = freeb / ngroups;
264	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
265
266	if ((parent == sb->s_root->d_inode) ||
267	    (EXT3_I(parent)->i_flags & EXT3_TOPDIR_FL)) {
268		int best_ndir = inodes_per_group;
269		int best_group = -1;
270
271		get_random_bytes(&group, sizeof(group));
272		parent_group = (unsigned)group % ngroups;
273		for (i = 0; i < ngroups; i++) {
274			group = (parent_group + i) % ngroups;
275			desc = ext3_get_group_desc (sb, group, NULL);
276			if (!desc || !desc->bg_free_inodes_count)
277				continue;
278			if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
279				continue;
280			if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
281				continue;
282			if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
283				continue;
284			best_group = group;
285			best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
286		}
287		if (best_group >= 0)
288			return best_group;
289		goto fallback;
290	}
291
292	blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs;
293
294	max_dirs = ndirs / ngroups + inodes_per_group / 16;
295	min_inodes = avefreei - inodes_per_group / 4;
296	min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
297
298	max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST);
299	if (max_debt * INODE_COST > inodes_per_group)
300		max_debt = inodes_per_group / INODE_COST;
301	if (max_debt > 255)
302		max_debt = 255;
303	if (max_debt == 0)
304		max_debt = 1;
305
306	for (i = 0; i < ngroups; i++) {
307		group = (parent_group + i) % ngroups;
308		desc = ext3_get_group_desc (sb, group, NULL);
309		if (!desc || !desc->bg_free_inodes_count)
310			continue;
311		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
312			continue;
313		if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
314			continue;
315		if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
316			continue;
317		return group;
318	}
319
320fallback:
321	for (i = 0; i < ngroups; i++) {
322		group = (parent_group + i) % ngroups;
323		desc = ext3_get_group_desc (sb, group, NULL);
324		if (!desc || !desc->bg_free_inodes_count)
325			continue;
326		if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
327			return group;
328	}
329
330	if (avefreei) {
331		/*
332		 * The free-inodes counter is approximate, and for really small
333		 * filesystems the above test can fail to find any blockgroups
334		 */
335		avefreei = 0;
336		goto fallback;
337	}
338
339	return -1;
340}
341
342static int find_group_other(struct super_block *sb, struct inode *parent)
343{
344	int parent_group = EXT3_I(parent)->i_block_group;
345	int ngroups = EXT3_SB(sb)->s_groups_count;
346	struct ext3_group_desc *desc;
347	int group, i;
348
349	/*
350	 * Try to place the inode in its parent directory
351	 */
352	group = parent_group;
353	desc = ext3_get_group_desc (sb, group, NULL);
354	if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
355			le16_to_cpu(desc->bg_free_blocks_count))
356		return group;
357
358	/*
359	 * We're going to place this inode in a different blockgroup from its
360	 * parent.  We want to cause files in a common directory to all land in
361	 * the same blockgroup.  But we want files which are in a different
362	 * directory which shares a blockgroup with our parent to land in a
363	 * different blockgroup.
364	 *
365	 * So add our directory's i_ino into the starting point for the hash.
366	 */
367	group = (group + parent->i_ino) % ngroups;
368
369	/*
370	 * Use a quadratic hash to find a group with a free inode and some free
371	 * blocks.
372	 */
373	for (i = 1; i < ngroups; i <<= 1) {
374		group += i;
375		if (group >= ngroups)
376			group -= ngroups;
377		desc = ext3_get_group_desc (sb, group, NULL);
378		if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
379				le16_to_cpu(desc->bg_free_blocks_count))
380			return group;
381	}
382
383	/*
384	 * That failed: try linear search for a free inode, even if that group
385	 * has no free blocks.
386	 */
387	group = parent_group;
388	for (i = 0; i < ngroups; i++) {
389		if (++group >= ngroups)
390			group = 0;
391		desc = ext3_get_group_desc (sb, group, NULL);
392		if (desc && le16_to_cpu(desc->bg_free_inodes_count))
393			return group;
394	}
395
396	return -1;
397}
398
399/*
400 * There are two policies for allocating an inode.  If the new inode is
401 * a directory, then a forward search is made for a block group with both
402 * free space and a low directory-to-inode ratio; if that fails, then of
403 * the groups with above-average free space, that group with the fewest
404 * directories already is chosen.
405 *
406 * For other inodes, search forward from the parent directory's block
407 * group to find a free inode.
408 */
409struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
410			     const struct qstr *qstr, int mode)
411{
412	struct super_block *sb;
413	struct buffer_head *bitmap_bh = NULL;
414	struct buffer_head *bh2;
415	int group;
416	unsigned long ino = 0;
417	struct inode * inode;
418	struct ext3_group_desc * gdp = NULL;
419	struct ext3_super_block * es;
420	struct ext3_inode_info *ei;
421	struct ext3_sb_info *sbi;
422	int err = 0;
423	struct inode *ret;
424	int i;
425
426	/* Cannot create files in a deleted directory */
427	if (!dir || !dir->i_nlink)
428		return ERR_PTR(-EPERM);
429
430	sb = dir->i_sb;
431	trace_ext3_request_inode(dir, mode);
432	inode = new_inode(sb);
433	if (!inode)
434		return ERR_PTR(-ENOMEM);
435	ei = EXT3_I(inode);
436
437	sbi = EXT3_SB(sb);
438	es = sbi->s_es;
439	if (S_ISDIR(mode)) {
440		if (test_opt (sb, OLDALLOC))
441			group = find_group_dir(sb, dir);
442		else
443			group = find_group_orlov(sb, dir);
444	} else
445		group = find_group_other(sb, dir);
446
447	err = -ENOSPC;
448	if (group == -1)
449		goto out;
450
451	for (i = 0; i < sbi->s_groups_count; i++) {
452		err = -EIO;
453
454		gdp = ext3_get_group_desc(sb, group, &bh2);
455		if (!gdp)
456			goto fail;
457
458		brelse(bitmap_bh);
459		bitmap_bh = read_inode_bitmap(sb, group);
460		if (!bitmap_bh)
461			goto fail;
462
463		ino = 0;
464
465repeat_in_this_group:
466		ino = ext3_find_next_zero_bit((unsigned long *)
467				bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb), ino);
468		if (ino < EXT3_INODES_PER_GROUP(sb)) {
469
470			BUFFER_TRACE(bitmap_bh, "get_write_access");
471			err = ext3_journal_get_write_access(handle, bitmap_bh);
472			if (err)
473				goto fail;
474
475			if (!ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
476						ino, bitmap_bh->b_data)) {
477				/* we won it */
478				BUFFER_TRACE(bitmap_bh,
479					"call ext3_journal_dirty_metadata");
480				err = ext3_journal_dirty_metadata(handle,
481								bitmap_bh);
482				if (err)
483					goto fail;
484				goto got;
485			}
486			/* we lost it */
487			journal_release_buffer(handle, bitmap_bh);
488
489			if (++ino < EXT3_INODES_PER_GROUP(sb))
490				goto repeat_in_this_group;
491		}
492
493		/*
494		 * This case is possible in concurrent environment.  It is very
495		 * rare.  We cannot repeat the find_group_xxx() call because
496		 * that will simply return the same blockgroup, because the
497		 * group descriptor metadata has not yet been updated.
498		 * So we just go onto the next blockgroup.
499		 */
500		if (++group == sbi->s_groups_count)
501			group = 0;
502	}
503	err = -ENOSPC;
504	goto out;
505
506got:
507	ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
508	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
509		ext3_error (sb, "ext3_new_inode",
510			    "reserved inode or inode > inodes count - "
511			    "block_group = %d, inode=%lu", group, ino);
512		err = -EIO;
513		goto fail;
514	}
515
516	BUFFER_TRACE(bh2, "get_write_access");
517	err = ext3_journal_get_write_access(handle, bh2);
518	if (err) goto fail;
519	spin_lock(sb_bgl_lock(sbi, group));
520	le16_add_cpu(&gdp->bg_free_inodes_count, -1);
521	if (S_ISDIR(mode)) {
522		le16_add_cpu(&gdp->bg_used_dirs_count, 1);
523	}
524	spin_unlock(sb_bgl_lock(sbi, group));
525	BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
526	err = ext3_journal_dirty_metadata(handle, bh2);
527	if (err) goto fail;
528
529	percpu_counter_dec(&sbi->s_freeinodes_counter);
530	if (S_ISDIR(mode))
531		percpu_counter_inc(&sbi->s_dirs_counter);
532
533
534	if (test_opt(sb, GRPID)) {
535		inode->i_mode = mode;
536		inode->i_uid = current_fsuid();
537		inode->i_gid = dir->i_gid;
538	} else
539		inode_init_owner(inode, dir, mode);
540
541	inode->i_ino = ino;
542	/* This is the optimal IO size (for stat), not the fs block size */
543	inode->i_blocks = 0;
544	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
545
546	memset(ei->i_data, 0, sizeof(ei->i_data));
547	ei->i_dir_start_lookup = 0;
548	ei->i_disksize = 0;
549
550	ei->i_flags =
551		ext3_mask_flags(mode, EXT3_I(dir)->i_flags & EXT3_FL_INHERITED);
552#ifdef EXT3_FRAGMENTS
553	ei->i_faddr = 0;
554	ei->i_frag_no = 0;
555	ei->i_frag_size = 0;
556#endif
557	ei->i_file_acl = 0;
558	ei->i_dir_acl = 0;
559	ei->i_dtime = 0;
560	ei->i_block_alloc_info = NULL;
561	ei->i_block_group = group;
562
563	ext3_set_inode_flags(inode);
564	if (IS_DIRSYNC(inode))
565		handle->h_sync = 1;
566	if (insert_inode_locked(inode) < 0) {
567		err = -EINVAL;
568		goto fail_drop;
 
 
 
 
569	}
570	spin_lock(&sbi->s_next_gen_lock);
571	inode->i_generation = sbi->s_next_generation++;
572	spin_unlock(&sbi->s_next_gen_lock);
573
574	ei->i_state_flags = 0;
575	ext3_set_inode_state(inode, EXT3_STATE_NEW);
576
577	/* See comment in ext3_iget for explanation */
578	if (ino >= EXT3_FIRST_INO(sb) + 1 &&
579	    EXT3_INODE_SIZE(sb) > EXT3_GOOD_OLD_INODE_SIZE) {
580		ei->i_extra_isize =
581			sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE;
582	} else {
583		ei->i_extra_isize = 0;
584	}
585
586	ret = inode;
587	dquot_initialize(inode);
588	err = dquot_alloc_inode(inode);
589	if (err)
590		goto fail_drop;
591
592	err = ext3_init_acl(handle, inode, dir);
593	if (err)
594		goto fail_free_drop;
595
596	err = ext3_init_security(handle, inode, dir, qstr);
597	if (err)
598		goto fail_free_drop;
599
600	err = ext3_mark_inode_dirty(handle, inode);
601	if (err) {
602		ext3_std_error(sb, err);
603		goto fail_free_drop;
604	}
605
606	ext3_debug("allocating inode %lu\n", inode->i_ino);
607	trace_ext3_allocate_inode(inode, dir, mode);
608	goto really_out;
609fail:
610	ext3_std_error(sb, err);
611out:
612	iput(inode);
613	ret = ERR_PTR(err);
614really_out:
615	brelse(bitmap_bh);
616	return ret;
617
618fail_free_drop:
619	dquot_free_inode(inode);
620
621fail_drop:
622	dquot_drop(inode);
623	inode->i_flags |= S_NOQUOTA;
624	inode->i_nlink = 0;
625	unlock_new_inode(inode);
626	iput(inode);
627	brelse(bitmap_bh);
628	return ERR_PTR(err);
629}
630
631/* Verify that we are loading a valid orphan from disk */
632struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
633{
634	unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
635	unsigned long block_group;
636	int bit;
637	struct buffer_head *bitmap_bh;
638	struct inode *inode = NULL;
639	long err = -EIO;
640
641	/* Error cases - e2fsck has already cleaned up for us */
642	if (ino > max_ino) {
643		ext3_warning(sb, __func__,
644			     "bad orphan ino %lu!  e2fsck was run?", ino);
645		goto error;
646	}
647
648	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
649	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
650	bitmap_bh = read_inode_bitmap(sb, block_group);
651	if (!bitmap_bh) {
652		ext3_warning(sb, __func__,
653			     "inode bitmap error for orphan %lu", ino);
654		goto error;
655	}
656
657	/* Having the inode bit set should be a 100% indicator that this
658	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
659	 * inodes that were being truncated, so we can't check i_nlink==0.
660	 */
661	if (!ext3_test_bit(bit, bitmap_bh->b_data))
662		goto bad_orphan;
663
664	inode = ext3_iget(sb, ino);
665	if (IS_ERR(inode))
666		goto iget_failed;
667
668	/*
669	 * If the orphans has i_nlinks > 0 then it should be able to be
670	 * truncated, otherwise it won't be removed from the orphan list
671	 * during processing and an infinite loop will result.
672	 */
673	if (inode->i_nlink && !ext3_can_truncate(inode))
674		goto bad_orphan;
675
676	if (NEXT_ORPHAN(inode) > max_ino)
677		goto bad_orphan;
678	brelse(bitmap_bh);
679	return inode;
680
681iget_failed:
682	err = PTR_ERR(inode);
683	inode = NULL;
684bad_orphan:
685	ext3_warning(sb, __func__,
686		     "bad orphan inode %lu!  e2fsck was run?", ino);
687	printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
688	       bit, (unsigned long long)bitmap_bh->b_blocknr,
689	       ext3_test_bit(bit, bitmap_bh->b_data));
690	printk(KERN_NOTICE "inode=%p\n", inode);
691	if (inode) {
692		printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
693		       is_bad_inode(inode));
694		printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
695		       NEXT_ORPHAN(inode));
696		printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
697		printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
698		/* Avoid freeing blocks if we got a bad deleted inode */
699		if (inode->i_nlink == 0)
700			inode->i_blocks = 0;
701		iput(inode);
702	}
703	brelse(bitmap_bh);
704error:
705	return ERR_PTR(err);
706}
707
708unsigned long ext3_count_free_inodes (struct super_block * sb)
709{
710	unsigned long desc_count;
711	struct ext3_group_desc *gdp;
712	int i;
713#ifdef EXT3FS_DEBUG
714	struct ext3_super_block *es;
715	unsigned long bitmap_count, x;
716	struct buffer_head *bitmap_bh = NULL;
717
718	es = EXT3_SB(sb)->s_es;
719	desc_count = 0;
720	bitmap_count = 0;
721	gdp = NULL;
722	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
723		gdp = ext3_get_group_desc (sb, i, NULL);
724		if (!gdp)
725			continue;
726		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
727		brelse(bitmap_bh);
728		bitmap_bh = read_inode_bitmap(sb, i);
729		if (!bitmap_bh)
730			continue;
731
732		x = ext3_count_free(bitmap_bh, EXT3_INODES_PER_GROUP(sb) / 8);
733		printk("group %d: stored = %d, counted = %lu\n",
734			i, le16_to_cpu(gdp->bg_free_inodes_count), x);
735		bitmap_count += x;
736	}
737	brelse(bitmap_bh);
738	printk("ext3_count_free_inodes: stored = %u, computed = %lu, %lu\n",
739		le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
740	return desc_count;
741#else
742	desc_count = 0;
743	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
744		gdp = ext3_get_group_desc (sb, i, NULL);
745		if (!gdp)
746			continue;
747		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
748		cond_resched();
749	}
750	return desc_count;
751#endif
752}
753
754/* Called at mount-time, super-block is locked */
755unsigned long ext3_count_dirs (struct super_block * sb)
756{
757	unsigned long count = 0;
758	int i;
759
760	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
761		struct ext3_group_desc *gdp = ext3_get_group_desc (sb, i, NULL);
762		if (!gdp)
763			continue;
764		count += le16_to_cpu(gdp->bg_used_dirs_count);
765	}
766	return count;
767}
768
v3.5.6
  1/*
  2 *  linux/fs/ext3/ialloc.c
  3 *
  4 * Copyright (C) 1992, 1993, 1994, 1995
  5 * Remy Card (card@masi.ibp.fr)
  6 * Laboratoire MASI - Institut Blaise Pascal
  7 * Universite Pierre et Marie Curie (Paris VI)
  8 *
  9 *  BSD ufs-inspired inode and directory allocation by
 10 *  Stephen Tweedie (sct@redhat.com), 1993
 11 *  Big-endian to little-endian byte-swapping/bitmaps by
 12 *        David S. Miller (davem@caip.rutgers.edu), 1995
 13 */
 14
 
 
 
 
 
 
 
 15#include <linux/quotaops.h>
 
 16#include <linux/random.h>
 
 
 
 
 17
 18#include "ext3.h"
 19#include "xattr.h"
 20#include "acl.h"
 21
 22/*
 23 * ialloc.c contains the inodes allocation and deallocation routines
 24 */
 25
 26/*
 27 * The free inodes are managed by bitmaps.  A file system contains several
 28 * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
 29 * block for inodes, N blocks for the inode table and data blocks.
 30 *
 31 * The file system contains group descriptors which are located after the
 32 * super block.  Each descriptor contains the number of the bitmap block and
 33 * the free blocks count in the block.
 34 */
 35
 36
 37/*
 38 * Read the inode allocation bitmap for a given block_group, reading
 39 * into the specified slot in the superblock's bitmap cache.
 40 *
 41 * Return buffer_head of bitmap on success or NULL.
 42 */
 43static struct buffer_head *
 44read_inode_bitmap(struct super_block * sb, unsigned long block_group)
 45{
 46	struct ext3_group_desc *desc;
 47	struct buffer_head *bh = NULL;
 48
 49	desc = ext3_get_group_desc(sb, block_group, NULL);
 50	if (!desc)
 51		goto error_out;
 52
 53	bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
 54	if (!bh)
 55		ext3_error(sb, "read_inode_bitmap",
 56			    "Cannot read inode bitmap - "
 57			    "block_group = %lu, inode_bitmap = %u",
 58			    block_group, le32_to_cpu(desc->bg_inode_bitmap));
 59error_out:
 60	return bh;
 61}
 62
 63/*
 64 * NOTE! When we get the inode, we're the only people
 65 * that have access to it, and as such there are no
 66 * race conditions we have to worry about. The inode
 67 * is not on the hash-lists, and it cannot be reached
 68 * through the filesystem because the directory entry
 69 * has been deleted earlier.
 70 *
 71 * HOWEVER: we must make sure that we get no aliases,
 72 * which means that we have to call "clear_inode()"
 73 * _before_ we mark the inode not in use in the inode
 74 * bitmaps. Otherwise a newly created file might use
 75 * the same inode number (not actually the same pointer
 76 * though), and then we'd have two inodes sharing the
 77 * same inode number and space on the harddisk.
 78 */
 79void ext3_free_inode (handle_t *handle, struct inode * inode)
 80{
 81	struct super_block * sb = inode->i_sb;
 82	int is_directory;
 83	unsigned long ino;
 84	struct buffer_head *bitmap_bh = NULL;
 85	struct buffer_head *bh2;
 86	unsigned long block_group;
 87	unsigned long bit;
 88	struct ext3_group_desc * gdp;
 89	struct ext3_super_block * es;
 90	struct ext3_sb_info *sbi;
 91	int fatal = 0, err;
 92
 93	if (atomic_read(&inode->i_count) > 1) {
 94		printk ("ext3_free_inode: inode has count=%d\n",
 95					atomic_read(&inode->i_count));
 96		return;
 97	}
 98	if (inode->i_nlink) {
 99		printk ("ext3_free_inode: inode has nlink=%d\n",
100			inode->i_nlink);
101		return;
102	}
103	if (!sb) {
104		printk("ext3_free_inode: inode on nonexistent device\n");
105		return;
106	}
107	sbi = EXT3_SB(sb);
108
109	ino = inode->i_ino;
110	ext3_debug ("freeing inode %lu\n", ino);
111	trace_ext3_free_inode(inode);
112
113	is_directory = S_ISDIR(inode->i_mode);
114
115	es = EXT3_SB(sb)->s_es;
116	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
117		ext3_error (sb, "ext3_free_inode",
118			    "reserved or nonexistent inode %lu", ino);
119		goto error_return;
120	}
121	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
122	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
123	bitmap_bh = read_inode_bitmap(sb, block_group);
124	if (!bitmap_bh)
125		goto error_return;
126
127	BUFFER_TRACE(bitmap_bh, "get_write_access");
128	fatal = ext3_journal_get_write_access(handle, bitmap_bh);
129	if (fatal)
130		goto error_return;
131
132	/* Ok, now we can actually update the inode bitmaps.. */
133	if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
134					bit, bitmap_bh->b_data))
135		ext3_error (sb, "ext3_free_inode",
136			      "bit already cleared for inode %lu", ino);
137	else {
138		gdp = ext3_get_group_desc (sb, block_group, &bh2);
139
140		BUFFER_TRACE(bh2, "get_write_access");
141		fatal = ext3_journal_get_write_access(handle, bh2);
142		if (fatal) goto error_return;
143
144		if (gdp) {
145			spin_lock(sb_bgl_lock(sbi, block_group));
146			le16_add_cpu(&gdp->bg_free_inodes_count, 1);
147			if (is_directory)
148				le16_add_cpu(&gdp->bg_used_dirs_count, -1);
149			spin_unlock(sb_bgl_lock(sbi, block_group));
150			percpu_counter_inc(&sbi->s_freeinodes_counter);
151			if (is_directory)
152				percpu_counter_dec(&sbi->s_dirs_counter);
153
154		}
155		BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
156		err = ext3_journal_dirty_metadata(handle, bh2);
157		if (!fatal) fatal = err;
158	}
159	BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
160	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
161	if (!fatal)
162		fatal = err;
163
164error_return:
165	brelse(bitmap_bh);
166	ext3_std_error(sb, fatal);
167}
168
169/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170 * Orlov's allocator for directories.
171 *
172 * We always try to spread first-level directories.
173 *
174 * If there are blockgroups with both free inodes and free blocks counts
175 * not worse than average we return one with smallest directory count.
176 * Otherwise we simply return a random group.
177 *
178 * For the rest rules look so:
179 *
180 * It's OK to put directory into a group unless
181 * it has too many directories already (max_dirs) or
182 * it has too few free inodes left (min_inodes) or
183 * it has too few free blocks left (min_blocks).
 
184 * Parent's group is preferred, if it doesn't satisfy these
185 * conditions we search cyclically through the rest. If none
186 * of the groups look good we just look for a group with more
187 * free inodes than average (starting at parent's group).
188 *
189 * Debt is incremented each time we allocate a directory and decremented
190 * when we allocate an inode, within 0--255.
191 */
192
 
 
 
193static int find_group_orlov(struct super_block *sb, struct inode *parent)
194{
195	int parent_group = EXT3_I(parent)->i_block_group;
196	struct ext3_sb_info *sbi = EXT3_SB(sb);
 
197	int ngroups = sbi->s_groups_count;
198	int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
199	unsigned int freei, avefreei;
200	ext3_fsblk_t freeb, avefreeb;
 
201	unsigned int ndirs;
202	int max_dirs, min_inodes;
203	ext3_grpblk_t min_blocks;
204	int group = -1, i;
205	struct ext3_group_desc *desc;
206
207	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
208	avefreei = freei / ngroups;
209	freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
210	avefreeb = freeb / ngroups;
211	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
212
213	if ((parent == sb->s_root->d_inode) ||
214	    (EXT3_I(parent)->i_flags & EXT3_TOPDIR_FL)) {
215		int best_ndir = inodes_per_group;
216		int best_group = -1;
217
218		get_random_bytes(&group, sizeof(group));
219		parent_group = (unsigned)group % ngroups;
220		for (i = 0; i < ngroups; i++) {
221			group = (parent_group + i) % ngroups;
222			desc = ext3_get_group_desc (sb, group, NULL);
223			if (!desc || !desc->bg_free_inodes_count)
224				continue;
225			if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
226				continue;
227			if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
228				continue;
229			if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
230				continue;
231			best_group = group;
232			best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
233		}
234		if (best_group >= 0)
235			return best_group;
236		goto fallback;
237	}
238
 
 
239	max_dirs = ndirs / ngroups + inodes_per_group / 16;
240	min_inodes = avefreei - inodes_per_group / 4;
241	min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
242
 
 
 
 
 
 
 
 
243	for (i = 0; i < ngroups; i++) {
244		group = (parent_group + i) % ngroups;
245		desc = ext3_get_group_desc (sb, group, NULL);
246		if (!desc || !desc->bg_free_inodes_count)
247			continue;
248		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
249			continue;
250		if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
251			continue;
252		if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
253			continue;
254		return group;
255	}
256
257fallback:
258	for (i = 0; i < ngroups; i++) {
259		group = (parent_group + i) % ngroups;
260		desc = ext3_get_group_desc (sb, group, NULL);
261		if (!desc || !desc->bg_free_inodes_count)
262			continue;
263		if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
264			return group;
265	}
266
267	if (avefreei) {
268		/*
269		 * The free-inodes counter is approximate, and for really small
270		 * filesystems the above test can fail to find any blockgroups
271		 */
272		avefreei = 0;
273		goto fallback;
274	}
275
276	return -1;
277}
278
279static int find_group_other(struct super_block *sb, struct inode *parent)
280{
281	int parent_group = EXT3_I(parent)->i_block_group;
282	int ngroups = EXT3_SB(sb)->s_groups_count;
283	struct ext3_group_desc *desc;
284	int group, i;
285
286	/*
287	 * Try to place the inode in its parent directory
288	 */
289	group = parent_group;
290	desc = ext3_get_group_desc (sb, group, NULL);
291	if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
292			le16_to_cpu(desc->bg_free_blocks_count))
293		return group;
294
295	/*
296	 * We're going to place this inode in a different blockgroup from its
297	 * parent.  We want to cause files in a common directory to all land in
298	 * the same blockgroup.  But we want files which are in a different
299	 * directory which shares a blockgroup with our parent to land in a
300	 * different blockgroup.
301	 *
302	 * So add our directory's i_ino into the starting point for the hash.
303	 */
304	group = (group + parent->i_ino) % ngroups;
305
306	/*
307	 * Use a quadratic hash to find a group with a free inode and some free
308	 * blocks.
309	 */
310	for (i = 1; i < ngroups; i <<= 1) {
311		group += i;
312		if (group >= ngroups)
313			group -= ngroups;
314		desc = ext3_get_group_desc (sb, group, NULL);
315		if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
316				le16_to_cpu(desc->bg_free_blocks_count))
317			return group;
318	}
319
320	/*
321	 * That failed: try linear search for a free inode, even if that group
322	 * has no free blocks.
323	 */
324	group = parent_group;
325	for (i = 0; i < ngroups; i++) {
326		if (++group >= ngroups)
327			group = 0;
328		desc = ext3_get_group_desc (sb, group, NULL);
329		if (desc && le16_to_cpu(desc->bg_free_inodes_count))
330			return group;
331	}
332
333	return -1;
334}
335
336/*
337 * There are two policies for allocating an inode.  If the new inode is
338 * a directory, then a forward search is made for a block group with both
339 * free space and a low directory-to-inode ratio; if that fails, then of
340 * the groups with above-average free space, that group with the fewest
341 * directories already is chosen.
342 *
343 * For other inodes, search forward from the parent directory's block
344 * group to find a free inode.
345 */
346struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
347			     const struct qstr *qstr, umode_t mode)
348{
349	struct super_block *sb;
350	struct buffer_head *bitmap_bh = NULL;
351	struct buffer_head *bh2;
352	int group;
353	unsigned long ino = 0;
354	struct inode * inode;
355	struct ext3_group_desc * gdp = NULL;
356	struct ext3_super_block * es;
357	struct ext3_inode_info *ei;
358	struct ext3_sb_info *sbi;
359	int err = 0;
360	struct inode *ret;
361	int i;
362
363	/* Cannot create files in a deleted directory */
364	if (!dir || !dir->i_nlink)
365		return ERR_PTR(-EPERM);
366
367	sb = dir->i_sb;
368	trace_ext3_request_inode(dir, mode);
369	inode = new_inode(sb);
370	if (!inode)
371		return ERR_PTR(-ENOMEM);
372	ei = EXT3_I(inode);
373
374	sbi = EXT3_SB(sb);
375	es = sbi->s_es;
376	if (S_ISDIR(mode))
377		group = find_group_orlov(sb, dir);
378	else
 
 
 
379		group = find_group_other(sb, dir);
380
381	err = -ENOSPC;
382	if (group == -1)
383		goto out;
384
385	for (i = 0; i < sbi->s_groups_count; i++) {
386		err = -EIO;
387
388		gdp = ext3_get_group_desc(sb, group, &bh2);
389		if (!gdp)
390			goto fail;
391
392		brelse(bitmap_bh);
393		bitmap_bh = read_inode_bitmap(sb, group);
394		if (!bitmap_bh)
395			goto fail;
396
397		ino = 0;
398
399repeat_in_this_group:
400		ino = ext3_find_next_zero_bit((unsigned long *)
401				bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb), ino);
402		if (ino < EXT3_INODES_PER_GROUP(sb)) {
403
404			BUFFER_TRACE(bitmap_bh, "get_write_access");
405			err = ext3_journal_get_write_access(handle, bitmap_bh);
406			if (err)
407				goto fail;
408
409			if (!ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
410						ino, bitmap_bh->b_data)) {
411				/* we won it */
412				BUFFER_TRACE(bitmap_bh,
413					"call ext3_journal_dirty_metadata");
414				err = ext3_journal_dirty_metadata(handle,
415								bitmap_bh);
416				if (err)
417					goto fail;
418				goto got;
419			}
420			/* we lost it */
421			journal_release_buffer(handle, bitmap_bh);
422
423			if (++ino < EXT3_INODES_PER_GROUP(sb))
424				goto repeat_in_this_group;
425		}
426
427		/*
428		 * This case is possible in concurrent environment.  It is very
429		 * rare.  We cannot repeat the find_group_xxx() call because
430		 * that will simply return the same blockgroup, because the
431		 * group descriptor metadata has not yet been updated.
432		 * So we just go onto the next blockgroup.
433		 */
434		if (++group == sbi->s_groups_count)
435			group = 0;
436	}
437	err = -ENOSPC;
438	goto out;
439
440got:
441	ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
442	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
443		ext3_error (sb, "ext3_new_inode",
444			    "reserved inode or inode > inodes count - "
445			    "block_group = %d, inode=%lu", group, ino);
446		err = -EIO;
447		goto fail;
448	}
449
450	BUFFER_TRACE(bh2, "get_write_access");
451	err = ext3_journal_get_write_access(handle, bh2);
452	if (err) goto fail;
453	spin_lock(sb_bgl_lock(sbi, group));
454	le16_add_cpu(&gdp->bg_free_inodes_count, -1);
455	if (S_ISDIR(mode)) {
456		le16_add_cpu(&gdp->bg_used_dirs_count, 1);
457	}
458	spin_unlock(sb_bgl_lock(sbi, group));
459	BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
460	err = ext3_journal_dirty_metadata(handle, bh2);
461	if (err) goto fail;
462
463	percpu_counter_dec(&sbi->s_freeinodes_counter);
464	if (S_ISDIR(mode))
465		percpu_counter_inc(&sbi->s_dirs_counter);
466
467
468	if (test_opt(sb, GRPID)) {
469		inode->i_mode = mode;
470		inode->i_uid = current_fsuid();
471		inode->i_gid = dir->i_gid;
472	} else
473		inode_init_owner(inode, dir, mode);
474
475	inode->i_ino = ino;
476	/* This is the optimal IO size (for stat), not the fs block size */
477	inode->i_blocks = 0;
478	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
479
480	memset(ei->i_data, 0, sizeof(ei->i_data));
481	ei->i_dir_start_lookup = 0;
482	ei->i_disksize = 0;
483
484	ei->i_flags =
485		ext3_mask_flags(mode, EXT3_I(dir)->i_flags & EXT3_FL_INHERITED);
486#ifdef EXT3_FRAGMENTS
487	ei->i_faddr = 0;
488	ei->i_frag_no = 0;
489	ei->i_frag_size = 0;
490#endif
491	ei->i_file_acl = 0;
492	ei->i_dir_acl = 0;
493	ei->i_dtime = 0;
494	ei->i_block_alloc_info = NULL;
495	ei->i_block_group = group;
496
497	ext3_set_inode_flags(inode);
498	if (IS_DIRSYNC(inode))
499		handle->h_sync = 1;
500	if (insert_inode_locked(inode) < 0) {
501		/*
502		 * Likely a bitmap corruption causing inode to be allocated
503		 * twice.
504		 */
505		err = -EIO;
506		goto fail;
507	}
508	spin_lock(&sbi->s_next_gen_lock);
509	inode->i_generation = sbi->s_next_generation++;
510	spin_unlock(&sbi->s_next_gen_lock);
511
512	ei->i_state_flags = 0;
513	ext3_set_inode_state(inode, EXT3_STATE_NEW);
514
515	/* See comment in ext3_iget for explanation */
516	if (ino >= EXT3_FIRST_INO(sb) + 1 &&
517	    EXT3_INODE_SIZE(sb) > EXT3_GOOD_OLD_INODE_SIZE) {
518		ei->i_extra_isize =
519			sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE;
520	} else {
521		ei->i_extra_isize = 0;
522	}
523
524	ret = inode;
525	dquot_initialize(inode);
526	err = dquot_alloc_inode(inode);
527	if (err)
528		goto fail_drop;
529
530	err = ext3_init_acl(handle, inode, dir);
531	if (err)
532		goto fail_free_drop;
533
534	err = ext3_init_security(handle, inode, dir, qstr);
535	if (err)
536		goto fail_free_drop;
537
538	err = ext3_mark_inode_dirty(handle, inode);
539	if (err) {
540		ext3_std_error(sb, err);
541		goto fail_free_drop;
542	}
543
544	ext3_debug("allocating inode %lu\n", inode->i_ino);
545	trace_ext3_allocate_inode(inode, dir, mode);
546	goto really_out;
547fail:
548	ext3_std_error(sb, err);
549out:
550	iput(inode);
551	ret = ERR_PTR(err);
552really_out:
553	brelse(bitmap_bh);
554	return ret;
555
556fail_free_drop:
557	dquot_free_inode(inode);
558
559fail_drop:
560	dquot_drop(inode);
561	inode->i_flags |= S_NOQUOTA;
562	clear_nlink(inode);
563	unlock_new_inode(inode);
564	iput(inode);
565	brelse(bitmap_bh);
566	return ERR_PTR(err);
567}
568
569/* Verify that we are loading a valid orphan from disk */
570struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
571{
572	unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
573	unsigned long block_group;
574	int bit;
575	struct buffer_head *bitmap_bh;
576	struct inode *inode = NULL;
577	long err = -EIO;
578
579	/* Error cases - e2fsck has already cleaned up for us */
580	if (ino > max_ino) {
581		ext3_warning(sb, __func__,
582			     "bad orphan ino %lu!  e2fsck was run?", ino);
583		goto error;
584	}
585
586	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
587	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
588	bitmap_bh = read_inode_bitmap(sb, block_group);
589	if (!bitmap_bh) {
590		ext3_warning(sb, __func__,
591			     "inode bitmap error for orphan %lu", ino);
592		goto error;
593	}
594
595	/* Having the inode bit set should be a 100% indicator that this
596	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
597	 * inodes that were being truncated, so we can't check i_nlink==0.
598	 */
599	if (!ext3_test_bit(bit, bitmap_bh->b_data))
600		goto bad_orphan;
601
602	inode = ext3_iget(sb, ino);
603	if (IS_ERR(inode))
604		goto iget_failed;
605
606	/*
607	 * If the orphans has i_nlinks > 0 then it should be able to be
608	 * truncated, otherwise it won't be removed from the orphan list
609	 * during processing and an infinite loop will result.
610	 */
611	if (inode->i_nlink && !ext3_can_truncate(inode))
612		goto bad_orphan;
613
614	if (NEXT_ORPHAN(inode) > max_ino)
615		goto bad_orphan;
616	brelse(bitmap_bh);
617	return inode;
618
619iget_failed:
620	err = PTR_ERR(inode);
621	inode = NULL;
622bad_orphan:
623	ext3_warning(sb, __func__,
624		     "bad orphan inode %lu!  e2fsck was run?", ino);
625	printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
626	       bit, (unsigned long long)bitmap_bh->b_blocknr,
627	       ext3_test_bit(bit, bitmap_bh->b_data));
628	printk(KERN_NOTICE "inode=%p\n", inode);
629	if (inode) {
630		printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
631		       is_bad_inode(inode));
632		printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
633		       NEXT_ORPHAN(inode));
634		printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
635		printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
636		/* Avoid freeing blocks if we got a bad deleted inode */
637		if (inode->i_nlink == 0)
638			inode->i_blocks = 0;
639		iput(inode);
640	}
641	brelse(bitmap_bh);
642error:
643	return ERR_PTR(err);
644}
645
646unsigned long ext3_count_free_inodes (struct super_block * sb)
647{
648	unsigned long desc_count;
649	struct ext3_group_desc *gdp;
650	int i;
651#ifdef EXT3FS_DEBUG
652	struct ext3_super_block *es;
653	unsigned long bitmap_count, x;
654	struct buffer_head *bitmap_bh = NULL;
655
656	es = EXT3_SB(sb)->s_es;
657	desc_count = 0;
658	bitmap_count = 0;
659	gdp = NULL;
660	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
661		gdp = ext3_get_group_desc (sb, i, NULL);
662		if (!gdp)
663			continue;
664		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
665		brelse(bitmap_bh);
666		bitmap_bh = read_inode_bitmap(sb, i);
667		if (!bitmap_bh)
668			continue;
669
670		x = ext3_count_free(bitmap_bh, EXT3_INODES_PER_GROUP(sb) / 8);
671		printk("group %d: stored = %d, counted = %lu\n",
672			i, le16_to_cpu(gdp->bg_free_inodes_count), x);
673		bitmap_count += x;
674	}
675	brelse(bitmap_bh);
676	printk("ext3_count_free_inodes: stored = %u, computed = %lu, %lu\n",
677		le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
678	return desc_count;
679#else
680	desc_count = 0;
681	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
682		gdp = ext3_get_group_desc (sb, i, NULL);
683		if (!gdp)
684			continue;
685		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
686		cond_resched();
687	}
688	return desc_count;
689#endif
690}
691
692/* Called at mount-time, super-block is locked */
693unsigned long ext3_count_dirs (struct super_block * sb)
694{
695	unsigned long count = 0;
696	int i;
697
698	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
699		struct ext3_group_desc *gdp = ext3_get_group_desc (sb, i, NULL);
700		if (!gdp)
701			continue;
702		count += le16_to_cpu(gdp->bg_used_dirs_count);
703	}
704	return count;
705}
706