resize.c - fs/ext3/resize.c - Linux source code v3.5.6

Note: File does not exist in v6.2.
   1/*
   2 *  linux/fs/ext3/resize.c
   3 *
   4 * Support for resizing an ext3 filesystem while it is mounted.
   5 *
   6 * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
   7 *
   8 * This could probably be made into a module, because it is not often in use.
   9 */
  10
  11
  12#define EXT3FS_DEBUG
  13
  14#include "ext3.h"
  15
  16
  17#define outside(b, first, last)	((b) < (first) || (b) >= (last))
  18#define inside(b, first, last)	((b) >= (first) && (b) < (last))
  19
  20static int verify_group_input(struct super_block *sb,
  21			      struct ext3_new_group_data *input)
  22{
  23	struct ext3_sb_info *sbi = EXT3_SB(sb);
  24	struct ext3_super_block *es = sbi->s_es;
  25	ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count);
  26	ext3_fsblk_t end = start + input->blocks_count;
  27	unsigned group = input->group;
  28	ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
  29	unsigned overhead = ext3_bg_has_super(sb, group) ?
  30		(1 + ext3_bg_num_gdb(sb, group) +
  31		 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
  32	ext3_fsblk_t metaend = start + overhead;
  33	struct buffer_head *bh = NULL;
  34	ext3_grpblk_t free_blocks_count;
  35	int err = -EINVAL;
  36
  37	input->free_blocks_count = free_blocks_count =
  38		input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
  39
  40	if (test_opt(sb, DEBUG))
  41		printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks "
  42		       "(%d free, %u reserved)\n",
  43		       ext3_bg_has_super(sb, input->group) ? "normal" :
  44		       "no-super", input->group, input->blocks_count,
  45		       free_blocks_count, input->reserved_blocks);
  46
  47	if (group != sbi->s_groups_count)
  48		ext3_warning(sb, __func__,
  49			     "Cannot add at group %u (only %lu groups)",
  50			     input->group, sbi->s_groups_count);
  51	else if ((start - le32_to_cpu(es->s_first_data_block)) %
  52		 EXT3_BLOCKS_PER_GROUP(sb))
  53		ext3_warning(sb, __func__, "Last group not full");
  54	else if (input->reserved_blocks > input->blocks_count / 5)
  55		ext3_warning(sb, __func__, "Reserved blocks too high (%u)",
  56			     input->reserved_blocks);
  57	else if (free_blocks_count < 0)
  58		ext3_warning(sb, __func__, "Bad blocks count %u",
  59			     input->blocks_count);
  60	else if (!(bh = sb_bread(sb, end - 1)))
  61		ext3_warning(sb, __func__,
  62			     "Cannot read last block ("E3FSBLK")",
  63			     end - 1);
  64	else if (outside(input->block_bitmap, start, end))
  65		ext3_warning(sb, __func__,
  66			     "Block bitmap not in group (block %u)",
  67			     input->block_bitmap);
  68	else if (outside(input->inode_bitmap, start, end))
  69		ext3_warning(sb, __func__,
  70			     "Inode bitmap not in group (block %u)",
  71			     input->inode_bitmap);
  72	else if (outside(input->inode_table, start, end) ||
  73	         outside(itend - 1, start, end))
  74		ext3_warning(sb, __func__,
  75			     "Inode table not in group (blocks %u-"E3FSBLK")",
  76			     input->inode_table, itend - 1);
  77	else if (input->inode_bitmap == input->block_bitmap)
  78		ext3_warning(sb, __func__,
  79			     "Block bitmap same as inode bitmap (%u)",
  80			     input->block_bitmap);
  81	else if (inside(input->block_bitmap, input->inode_table, itend))
  82		ext3_warning(sb, __func__,
  83			     "Block bitmap (%u) in inode table (%u-"E3FSBLK")",
  84			     input->block_bitmap, input->inode_table, itend-1);
  85	else if (inside(input->inode_bitmap, input->inode_table, itend))
  86		ext3_warning(sb, __func__,
  87			     "Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
  88			     input->inode_bitmap, input->inode_table, itend-1);
  89	else if (inside(input->block_bitmap, start, metaend))
  90		ext3_warning(sb, __func__,
  91			     "Block bitmap (%u) in GDT table"
  92			     " ("E3FSBLK"-"E3FSBLK")",
  93			     input->block_bitmap, start, metaend - 1);
  94	else if (inside(input->inode_bitmap, start, metaend))
  95		ext3_warning(sb, __func__,
  96			     "Inode bitmap (%u) in GDT table"
  97			     " ("E3FSBLK"-"E3FSBLK")",
  98			     input->inode_bitmap, start, metaend - 1);
  99	else if (inside(input->inode_table, start, metaend) ||
 100	         inside(itend - 1, start, metaend))
 101		ext3_warning(sb, __func__,
 102			     "Inode table (%u-"E3FSBLK") overlaps"
 103			     "GDT table ("E3FSBLK"-"E3FSBLK")",
 104			     input->inode_table, itend - 1, start, metaend - 1);
 105	else
 106		err = 0;
 107	brelse(bh);
 108
 109	return err;
 110}
 111
 112static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
 113				  ext3_fsblk_t blk)
 114{
 115	struct buffer_head *bh;
 116	int err;
 117
 118	bh = sb_getblk(sb, blk);
 119	if (!bh)
 120		return ERR_PTR(-EIO);
 121	if ((err = ext3_journal_get_write_access(handle, bh))) {
 122		brelse(bh);
 123		bh = ERR_PTR(err);
 124	} else {
 125		lock_buffer(bh);
 126		memset(bh->b_data, 0, sb->s_blocksize);
 127		set_buffer_uptodate(bh);
 128		unlock_buffer(bh);
 129	}
 130
 131	return bh;
 132}
 133
 134/*
 135 * To avoid calling the atomic setbit hundreds or thousands of times, we only
 136 * need to use it within a single byte (to ensure we get endianness right).
 137 * We can use memset for the rest of the bitmap as there are no other users.
 138 */
 139static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
 140{
 141	int i;
 142
 143	if (start_bit >= end_bit)
 144		return;
 145
 146	ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
 147	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
 148		ext3_set_bit(i, bitmap);
 149	if (i < end_bit)
 150		memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
 151}
 152
 153/*
 154 * If we have fewer than thresh credits, extend by EXT3_MAX_TRANS_DATA.
 155 * If that fails, restart the transaction & regain write access for the
 156 * buffer head which is used for block_bitmap modifications.
 157 */
 158static int extend_or_restart_transaction(handle_t *handle, int thresh,
 159					 struct buffer_head *bh)
 160{
 161	int err;
 162
 163	if (handle->h_buffer_credits >= thresh)
 164		return 0;
 165
 166	err = ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA);
 167	if (err < 0)
 168		return err;
 169	if (err) {
 170		err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA);
 171		if (err)
 172			return err;
 173		err = ext3_journal_get_write_access(handle, bh);
 174		if (err)
 175			return err;
 176	}
 177
 178	return 0;
 179}
 180
 181/*
 182 * Set up the block and inode bitmaps, and the inode table for the new group.
 183 * This doesn't need to be part of the main transaction, since we are only
 184 * changing blocks outside the actual filesystem.  We still do journaling to
 185 * ensure the recovery is correct in case of a failure just after resize.
 186 * If any part of this fails, we simply abort the resize.
 187 */
 188static int setup_new_group_blocks(struct super_block *sb,
 189				  struct ext3_new_group_data *input)
 190{
 191	struct ext3_sb_info *sbi = EXT3_SB(sb);
 192	ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group);
 193	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
 194		le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
 195	unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group);
 196	struct buffer_head *bh;
 197	handle_t *handle;
 198	ext3_fsblk_t block;
 199	ext3_grpblk_t bit;
 200	int i;
 201	int err = 0, err2;
 202
 203	/* This transaction may be extended/restarted along the way */
 204	handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
 205
 206	if (IS_ERR(handle))
 207		return PTR_ERR(handle);
 208
 209	mutex_lock(&sbi->s_resize_lock);
 210	if (input->group != sbi->s_groups_count) {
 211		err = -EBUSY;
 212		goto exit_journal;
 213	}
 214
 215	if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) {
 216		err = PTR_ERR(bh);
 217		goto exit_journal;
 218	}
 219
 220	if (ext3_bg_has_super(sb, input->group)) {
 221		ext3_debug("mark backup superblock %#04lx (+0)\n", start);
 222		ext3_set_bit(0, bh->b_data);
 223	}
 224
 225	/* Copy all of the GDT blocks into the backup in this group */
 226	for (i = 0, bit = 1, block = start + 1;
 227	     i < gdblocks; i++, block++, bit++) {
 228		struct buffer_head *gdb;
 229
 230		ext3_debug("update backup group %#04lx (+%d)\n", block, bit);
 231
 232		err = extend_or_restart_transaction(handle, 1, bh);
 233		if (err)
 234			goto exit_bh;
 235
 236		gdb = sb_getblk(sb, block);
 237		if (!gdb) {
 238			err = -EIO;
 239			goto exit_bh;
 240		}
 241		if ((err = ext3_journal_get_write_access(handle, gdb))) {
 242			brelse(gdb);
 243			goto exit_bh;
 244		}
 245		lock_buffer(gdb);
 246		memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
 247		set_buffer_uptodate(gdb);
 248		unlock_buffer(gdb);
 249		err = ext3_journal_dirty_metadata(handle, gdb);
 250		if (err) {
 251			brelse(gdb);
 252			goto exit_bh;
 253		}
 254		ext3_set_bit(bit, bh->b_data);
 255		brelse(gdb);
 256	}
 257
 258	/* Zero out all of the reserved backup group descriptor table blocks */
 259	for (i = 0, bit = gdblocks + 1, block = start + bit;
 260	     i < reserved_gdb; i++, block++, bit++) {
 261		struct buffer_head *gdb;
 262
 263		ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit);
 264
 265		err = extend_or_restart_transaction(handle, 1, bh);
 266		if (err)
 267			goto exit_bh;
 268
 269		if (IS_ERR(gdb = bclean(handle, sb, block))) {
 270			err = PTR_ERR(gdb);
 271			goto exit_bh;
 272		}
 273		err = ext3_journal_dirty_metadata(handle, gdb);
 274		if (err) {
 275			brelse(gdb);
 276			goto exit_bh;
 277		}
 278		ext3_set_bit(bit, bh->b_data);
 279		brelse(gdb);
 280	}
 281	ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
 282		   input->block_bitmap - start);
 283	ext3_set_bit(input->block_bitmap - start, bh->b_data);
 284	ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
 285		   input->inode_bitmap - start);
 286	ext3_set_bit(input->inode_bitmap - start, bh->b_data);
 287
 288	/* Zero out all of the inode table blocks */
 289	for (i = 0, block = input->inode_table, bit = block - start;
 290	     i < sbi->s_itb_per_group; i++, bit++, block++) {
 291		struct buffer_head *it;
 292
 293		ext3_debug("clear inode block %#04lx (+%d)\n", block, bit);
 294
 295		err = extend_or_restart_transaction(handle, 1, bh);
 296		if (err)
 297			goto exit_bh;
 298
 299		if (IS_ERR(it = bclean(handle, sb, block))) {
 300			err = PTR_ERR(it);
 301			goto exit_bh;
 302		}
 303		err = ext3_journal_dirty_metadata(handle, it);
 304		if (err) {
 305			brelse(it);
 306			goto exit_bh;
 307		}
 308		brelse(it);
 309		ext3_set_bit(bit, bh->b_data);
 310	}
 311
 312	err = extend_or_restart_transaction(handle, 2, bh);
 313	if (err)
 314		goto exit_bh;
 315
 316	mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
 317			bh->b_data);
 318	err = ext3_journal_dirty_metadata(handle, bh);
 319	if (err)
 320		goto exit_bh;
 321	brelse(bh);
 322
 323	/* Mark unused entries in inode bitmap used */
 324	ext3_debug("clear inode bitmap %#04x (+%ld)\n",
 325		   input->inode_bitmap, input->inode_bitmap - start);
 326	if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
 327		err = PTR_ERR(bh);
 328		goto exit_journal;
 329	}
 330
 331	mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
 332			bh->b_data);
 333	err = ext3_journal_dirty_metadata(handle, bh);
 334exit_bh:
 335	brelse(bh);
 336
 337exit_journal:
 338	mutex_unlock(&sbi->s_resize_lock);
 339	if ((err2 = ext3_journal_stop(handle)) && !err)
 340		err = err2;
 341
 342	return err;
 343}
 344
 345/*
 346 * Iterate through the groups which hold BACKUP superblock/GDT copies in an
 347 * ext3 filesystem.  The counters should be initialized to 1, 5, and 7 before
 348 * calling this for the first time.  In a sparse filesystem it will be the
 349 * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
 350 * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
 351 */
 352static unsigned ext3_list_backups(struct super_block *sb, unsigned *three,
 353				  unsigned *five, unsigned *seven)
 354{
 355	unsigned *min = three;
 356	int mult = 3;
 357	unsigned ret;
 358
 359	if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
 360					EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
 361		ret = *min;
 362		*min += 1;
 363		return ret;
 364	}
 365
 366	if (*five < *min) {
 367		min = five;
 368		mult = 5;
 369	}
 370	if (*seven < *min) {
 371		min = seven;
 372		mult = 7;
 373	}
 374
 375	ret = *min;
 376	*min *= mult;
 377
 378	return ret;
 379}
 380
 381/*
 382 * Check that all of the backup GDT blocks are held in the primary GDT block.
 383 * It is assumed that they are stored in group order.  Returns the number of
 384 * groups in current filesystem that have BACKUPS, or -ve error code.
 385 */
 386static int verify_reserved_gdb(struct super_block *sb,
 387			       struct buffer_head *primary)
 388{
 389	const ext3_fsblk_t blk = primary->b_blocknr;
 390	const unsigned long end = EXT3_SB(sb)->s_groups_count;
 391	unsigned three = 1;
 392	unsigned five = 5;
 393	unsigned seven = 7;
 394	unsigned grp;
 395	__le32 *p = (__le32 *)primary->b_data;
 396	int gdbackups = 0;
 397
 398	while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
 399		if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
 400			ext3_warning(sb, __func__,
 401				     "reserved GDT "E3FSBLK
 402				     " missing grp %d ("E3FSBLK")",
 403				     blk, grp,
 404				     grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
 405			return -EINVAL;
 406		}
 407		if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb))
 408			return -EFBIG;
 409	}
 410
 411	return gdbackups;
 412}
 413
 414/*
 415 * Called when we need to bring a reserved group descriptor table block into
 416 * use from the resize inode.  The primary copy of the new GDT block currently
 417 * is an indirect block (under the double indirect block in the resize inode).
 418 * The new backup GDT blocks will be stored as leaf blocks in this indirect
 419 * block, in group order.  Even though we know all the block numbers we need,
 420 * we check to ensure that the resize inode has actually reserved these blocks.
 421 *
 422 * Don't need to update the block bitmaps because the blocks are still in use.
 423 *
 424 * We get all of the error cases out of the way, so that we are sure to not
 425 * fail once we start modifying the data on disk, because JBD has no rollback.
 426 */
 427static int add_new_gdb(handle_t *handle, struct inode *inode,
 428		       struct ext3_new_group_data *input,
 429		       struct buffer_head **primary)
 430{
 431	struct super_block *sb = inode->i_sb;
 432	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
 433	unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
 434	ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
 435	struct buffer_head **o_group_desc, **n_group_desc;
 436	struct buffer_head *dind;
 437	int gdbackups;
 438	struct ext3_iloc iloc;
 439	__le32 *data;
 440	int err;
 441
 442	if (test_opt(sb, DEBUG))
 443		printk(KERN_DEBUG
 444		       "EXT3-fs: ext3_add_new_gdb: adding group block %lu\n",
 445		       gdb_num);
 446
 447	/*
 448	 * If we are not using the primary superblock/GDT copy don't resize,
 449	 * because the user tools have no way of handling this.  Probably a
 450	 * bad time to do it anyways.
 451	 */
 452	if (EXT3_SB(sb)->s_sbh->b_blocknr !=
 453	    le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
 454		ext3_warning(sb, __func__,
 455			"won't resize using backup superblock at %llu",
 456			(unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
 457		return -EPERM;
 458	}
 459
 460	*primary = sb_bread(sb, gdblock);
 461	if (!*primary)
 462		return -EIO;
 463
 464	if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) {
 465		err = gdbackups;
 466		goto exit_bh;
 467	}
 468
 469	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
 470	dind = sb_bread(sb, le32_to_cpu(*data));
 471	if (!dind) {
 472		err = -EIO;
 473		goto exit_bh;
 474	}
 475
 476	data = (__le32 *)dind->b_data;
 477	if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
 478		ext3_warning(sb, __func__,
 479			     "new group %u GDT block "E3FSBLK" not reserved",
 480			     input->group, gdblock);
 481		err = -EINVAL;
 482		goto exit_dind;
 483	}
 484
 485	if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh)))
 486		goto exit_dind;
 487
 488	if ((err = ext3_journal_get_write_access(handle, *primary)))
 489		goto exit_sbh;
 490
 491	if ((err = ext3_journal_get_write_access(handle, dind)))
 492		goto exit_primary;
 493
 494	/* ext3_reserve_inode_write() gets a reference on the iloc */
 495	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
 496		goto exit_dindj;
 497
 498	n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
 499			GFP_NOFS);
 500	if (!n_group_desc) {
 501		err = -ENOMEM;
 502		ext3_warning (sb, __func__,
 503			      "not enough memory for %lu groups", gdb_num + 1);
 504		goto exit_inode;
 505	}
 506
 507	/*
 508	 * Finally, we have all of the possible failures behind us...
 509	 *
 510	 * Remove new GDT block from inode double-indirect block and clear out
 511	 * the new GDT block for use (which also "frees" the backup GDT blocks
 512	 * from the reserved inode).  We don't need to change the bitmaps for
 513	 * these blocks, because they are marked as in-use from being in the
 514	 * reserved inode, and will become GDT blocks (primary and backup).
 515	 */
 516	data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
 517	err = ext3_journal_dirty_metadata(handle, dind);
 518	if (err)
 519		goto exit_group_desc;
 520	brelse(dind);
 521	dind = NULL;
 522	inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
 523	err = ext3_mark_iloc_dirty(handle, inode, &iloc);
 524	if (err)
 525		goto exit_group_desc;
 526	memset((*primary)->b_data, 0, sb->s_blocksize);
 527	err = ext3_journal_dirty_metadata(handle, *primary);
 528	if (err)
 529		goto exit_group_desc;
 530
 531	o_group_desc = EXT3_SB(sb)->s_group_desc;
 532	memcpy(n_group_desc, o_group_desc,
 533	       EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
 534	n_group_desc[gdb_num] = *primary;
 535	EXT3_SB(sb)->s_group_desc = n_group_desc;
 536	EXT3_SB(sb)->s_gdb_count++;
 537	kfree(o_group_desc);
 538
 539	le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
 540	err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
 541	if (err)
 542		goto exit_inode;
 543
 544	return 0;
 545
 546exit_group_desc:
 547	kfree(n_group_desc);
 548exit_inode:
 549	//ext3_journal_release_buffer(handle, iloc.bh);
 550	brelse(iloc.bh);
 551exit_dindj:
 552	//ext3_journal_release_buffer(handle, dind);
 553exit_primary:
 554	//ext3_journal_release_buffer(handle, *primary);
 555exit_sbh:
 556	//ext3_journal_release_buffer(handle, *primary);
 557exit_dind:
 558	brelse(dind);
 559exit_bh:
 560	brelse(*primary);
 561
 562	ext3_debug("leaving with error %d\n", err);
 563	return err;
 564}
 565
 566/*
 567 * Called when we are adding a new group which has a backup copy of each of
 568 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
 569 * We need to add these reserved backup GDT blocks to the resize inode, so
 570 * that they are kept for future resizing and not allocated to files.
 571 *
 572 * Each reserved backup GDT block will go into a different indirect block.
 573 * The indirect blocks are actually the primary reserved GDT blocks,
 574 * so we know in advance what their block numbers are.  We only get the
 575 * double-indirect block to verify it is pointing to the primary reserved
 576 * GDT blocks so we don't overwrite a data block by accident.  The reserved
 577 * backup GDT blocks are stored in their reserved primary GDT block.
 578 */
 579static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 580			      struct ext3_new_group_data *input)
 581{
 582	struct super_block *sb = inode->i_sb;
 583	int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks);
 584	struct buffer_head **primary;
 585	struct buffer_head *dind;
 586	struct ext3_iloc iloc;
 587	ext3_fsblk_t blk;
 588	__le32 *data, *end;
 589	int gdbackups = 0;
 590	int res, i;
 591	int err;
 592
 593	primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
 594	if (!primary)
 595		return -ENOMEM;
 596
 597	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
 598	dind = sb_bread(sb, le32_to_cpu(*data));
 599	if (!dind) {
 600		err = -EIO;
 601		goto exit_free;
 602	}
 603
 604	blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
 605	data = (__le32 *)dind->b_data + (EXT3_SB(sb)->s_gdb_count %
 606					 EXT3_ADDR_PER_BLOCK(sb));
 607	end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
 608
 609	/* Get each reserved primary GDT block and verify it holds backups */
 610	for (res = 0; res < reserved_gdb; res++, blk++) {
 611		if (le32_to_cpu(*data) != blk) {
 612			ext3_warning(sb, __func__,
 613				     "reserved block "E3FSBLK
 614				     " not at offset %ld",
 615				     blk,
 616				     (long)(data - (__le32 *)dind->b_data));
 617			err = -EINVAL;
 618			goto exit_bh;
 619		}
 620		primary[res] = sb_bread(sb, blk);
 621		if (!primary[res]) {
 622			err = -EIO;
 623			goto exit_bh;
 624		}
 625		if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
 626			brelse(primary[res]);
 627			err = gdbackups;
 628			goto exit_bh;
 629		}
 630		if (++data >= end)
 631			data = (__le32 *)dind->b_data;
 632	}
 633
 634	for (i = 0; i < reserved_gdb; i++) {
 635		if ((err = ext3_journal_get_write_access(handle, primary[i]))) {
 636			/*
 637			int j;
 638			for (j = 0; j < i; j++)
 639				ext3_journal_release_buffer(handle, primary[j]);
 640			 */
 641			goto exit_bh;
 642		}
 643	}
 644
 645	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
 646		goto exit_bh;
 647
 648	/*
 649	 * Finally we can add each of the reserved backup GDT blocks from
 650	 * the new group to its reserved primary GDT block.
 651	 */
 652	blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
 653	for (i = 0; i < reserved_gdb; i++) {
 654		int err2;
 655		data = (__le32 *)primary[i]->b_data;
 656		/* printk("reserving backup %lu[%u] = %lu\n",
 657		       primary[i]->b_blocknr, gdbackups,
 658		       blk + primary[i]->b_blocknr); */
 659		data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
 660		err2 = ext3_journal_dirty_metadata(handle, primary[i]);
 661		if (!err)
 662			err = err2;
 663	}
 664	inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
 665	ext3_mark_iloc_dirty(handle, inode, &iloc);
 666
 667exit_bh:
 668	while (--res >= 0)
 669		brelse(primary[res]);
 670	brelse(dind);
 671
 672exit_free:
 673	kfree(primary);
 674
 675	return err;
 676}
 677
 678/*
 679 * Update the backup copies of the ext3 metadata.  These don't need to be part
 680 * of the main resize transaction, because e2fsck will re-write them if there
 681 * is a problem (basically only OOM will cause a problem).  However, we
 682 * _should_ update the backups if possible, in case the primary gets trashed
 683 * for some reason and we need to run e2fsck from a backup superblock.  The
 684 * important part is that the new block and inode counts are in the backup
 685 * superblocks, and the location of the new group metadata in the GDT backups.
 686 *
 687 * We do not need take the s_resize_lock for this, because these
 688 * blocks are not otherwise touched by the filesystem code when it is
 689 * mounted.  We don't need to worry about last changing from
 690 * sbi->s_groups_count, because the worst that can happen is that we
 691 * do not copy the full number of backups at this time.  The resize
 692 * which changed s_groups_count will backup again.
 693 */
 694static void update_backups(struct super_block *sb,
 695			   int blk_off, char *data, int size)
 696{
 697	struct ext3_sb_info *sbi = EXT3_SB(sb);
 698	const unsigned long last = sbi->s_groups_count;
 699	const int bpg = EXT3_BLOCKS_PER_GROUP(sb);
 700	unsigned three = 1;
 701	unsigned five = 5;
 702	unsigned seven = 7;
 703	unsigned group;
 704	int rest = sb->s_blocksize - size;
 705	handle_t *handle;
 706	int err = 0, err2;
 707
 708	handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
 709	if (IS_ERR(handle)) {
 710		group = 1;
 711		err = PTR_ERR(handle);
 712		goto exit_err;
 713	}
 714
 715	while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) {
 716		struct buffer_head *bh;
 717
 718		/* Out of journal space, and can't get more - abort - so sad */
 719		if (handle->h_buffer_credits == 0 &&
 720		    ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) &&
 721		    (err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA)))
 722			break;
 723
 724		bh = sb_getblk(sb, group * bpg + blk_off);
 725		if (!bh) {
 726			err = -EIO;
 727			break;
 728		}
 729		ext3_debug("update metadata backup %#04lx\n",
 730			  (unsigned long)bh->b_blocknr);
 731		if ((err = ext3_journal_get_write_access(handle, bh))) {
 732			brelse(bh);
 733			break;
 734		}
 735		lock_buffer(bh);
 736		memcpy(bh->b_data, data, size);
 737		if (rest)
 738			memset(bh->b_data + size, 0, rest);
 739		set_buffer_uptodate(bh);
 740		unlock_buffer(bh);
 741		err = ext3_journal_dirty_metadata(handle, bh);
 742		brelse(bh);
 743		if (err)
 744			break;
 745	}
 746	if ((err2 = ext3_journal_stop(handle)) && !err)
 747		err = err2;
 748
 749	/*
 750	 * Ugh! Need to have e2fsck write the backup copies.  It is too
 751	 * late to revert the resize, we shouldn't fail just because of
 752	 * the backup copies (they are only needed in case of corruption).
 753	 *
 754	 * However, if we got here we have a journal problem too, so we
 755	 * can't really start a transaction to mark the superblock.
 756	 * Chicken out and just set the flag on the hope it will be written
 757	 * to disk, and if not - we will simply wait until next fsck.
 758	 */
 759exit_err:
 760	if (err) {
 761		ext3_warning(sb, __func__,
 762			     "can't update backup for group %d (err %d), "
 763			     "forcing fsck on next reboot", group, err);
 764		sbi->s_mount_state &= ~EXT3_VALID_FS;
 765		sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
 766		mark_buffer_dirty(sbi->s_sbh);
 767	}
 768}
 769
 770/* Add group descriptor data to an existing or new group descriptor block.
 771 * Ensure we handle all possible error conditions _before_ we start modifying
 772 * the filesystem, because we cannot abort the transaction and not have it
 773 * write the data to disk.
 774 *
 775 * If we are on a GDT block boundary, we need to get the reserved GDT block.
 776 * Otherwise, we may need to add backup GDT blocks for a sparse group.
 777 *
 778 * We only need to hold the superblock lock while we are actually adding
 779 * in the new group's counts to the superblock.  Prior to that we have
 780 * not really "added" the group at all.  We re-check that we are still
 781 * adding in the last group in case things have changed since verifying.
 782 */
 783int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 784{
 785	struct ext3_sb_info *sbi = EXT3_SB(sb);
 786	struct ext3_super_block *es = sbi->s_es;
 787	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
 788		le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
 789	struct buffer_head *primary = NULL;
 790	struct ext3_group_desc *gdp;
 791	struct inode *inode = NULL;
 792	handle_t *handle;
 793	int gdb_off, gdb_num;
 794	int err, err2;
 795
 796	gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
 797	gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb);
 798
 799	if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
 800					EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
 801		ext3_warning(sb, __func__,
 802			     "Can't resize non-sparse filesystem further");
 803		return -EPERM;
 804	}
 805
 806	if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
 807	    le32_to_cpu(es->s_blocks_count)) {
 808		ext3_warning(sb, __func__, "blocks_count overflow\n");
 809		return -EINVAL;
 810	}
 811
 812	if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) <
 813	    le32_to_cpu(es->s_inodes_count)) {
 814		ext3_warning(sb, __func__, "inodes_count overflow\n");
 815		return -EINVAL;
 816	}
 817
 818	if (reserved_gdb || gdb_off == 0) {
 819		if (!EXT3_HAS_COMPAT_FEATURE(sb,
 820					     EXT3_FEATURE_COMPAT_RESIZE_INODE)
 821		    || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
 822			ext3_warning(sb, __func__,
 823				     "No reserved GDT blocks, can't resize");
 824			return -EPERM;
 825		}
 826		inode = ext3_iget(sb, EXT3_RESIZE_INO);
 827		if (IS_ERR(inode)) {
 828			ext3_warning(sb, __func__,
 829				     "Error opening resize inode");
 830			return PTR_ERR(inode);
 831		}
 832	}
 833
 834	if ((err = verify_group_input(sb, input)))
 835		goto exit_put;
 836
 837	if ((err = setup_new_group_blocks(sb, input)))
 838		goto exit_put;
 839
 840	/*
 841	 * We will always be modifying at least the superblock and a GDT
 842	 * block.  If we are adding a group past the last current GDT block,
 843	 * we will also modify the inode and the dindirect block.  If we
 844	 * are adding a group with superblock/GDT backups  we will also
 845	 * modify each of the reserved GDT dindirect blocks.
 846	 */
 847	handle = ext3_journal_start_sb(sb,
 848				       ext3_bg_has_super(sb, input->group) ?
 849				       3 + reserved_gdb : 4);
 850	if (IS_ERR(handle)) {
 851		err = PTR_ERR(handle);
 852		goto exit_put;
 853	}
 854
 855	mutex_lock(&sbi->s_resize_lock);
 856	if (input->group != sbi->s_groups_count) {
 857		ext3_warning(sb, __func__,
 858			     "multiple resizers run on filesystem!");
 859		err = -EBUSY;
 860		goto exit_journal;
 861	}
 862
 863	if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh)))
 864		goto exit_journal;
 865
 866	/*
 867	 * We will only either add reserved group blocks to a backup group
 868	 * or remove reserved blocks for the first group in a new group block.
 869	 * Doing both would be mean more complex code, and sane people don't
 870	 * use non-sparse filesystems anymore.  This is already checked above.
 871	 */
 872	if (gdb_off) {
 873		primary = sbi->s_group_desc[gdb_num];
 874		if ((err = ext3_journal_get_write_access(handle, primary)))
 875			goto exit_journal;
 876
 877		if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) &&
 878		    (err = reserve_backup_gdb(handle, inode, input)))
 879			goto exit_journal;
 880	} else if ((err = add_new_gdb(handle, inode, input, &primary)))
 881		goto exit_journal;
 882
 883	/*
 884	 * OK, now we've set up the new group.  Time to make it active.
 885	 *
 886	 * We do not lock all allocations via s_resize_lock
 887	 * so we have to be safe wrt. concurrent accesses the group
 888	 * data.  So we need to be careful to set all of the relevant
 889	 * group descriptor data etc. *before* we enable the group.
 890	 *
 891	 * The key field here is sbi->s_groups_count: as long as
 892	 * that retains its old value, nobody is going to access the new
 893	 * group.
 894	 *
 895	 * So first we update all the descriptor metadata for the new
 896	 * group; then we update the total disk blocks count; then we
 897	 * update the groups count to enable the group; then finally we
 898	 * update the free space counts so that the system can start
 899	 * using the new disk blocks.
 900	 */
 901
 902	/* Update group descriptor block for new group */
 903	gdp = (struct ext3_group_desc *)primary->b_data + gdb_off;
 904
 905	gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
 906	gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
 907	gdp->bg_inode_table = cpu_to_le32(input->inode_table);
 908	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
 909	gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
 910
 911	/*
 912	 * Make the new blocks and inodes valid next.  We do this before
 913	 * increasing the group count so that once the group is enabled,
 914	 * all of its blocks and inodes are already valid.
 915	 *
 916	 * We always allocate group-by-group, then block-by-block or
 917	 * inode-by-inode within a group, so enabling these
 918	 * blocks/inodes before the group is live won't actually let us
 919	 * allocate the new space yet.
 920	 */
 921	le32_add_cpu(&es->s_blocks_count, input->blocks_count);
 922	le32_add_cpu(&es->s_inodes_count, EXT3_INODES_PER_GROUP(sb));
 923
 924	/*
 925	 * We need to protect s_groups_count against other CPUs seeing
 926	 * inconsistent state in the superblock.
 927	 *
 928	 * The precise rules we use are:
 929	 *
 930	 * * Writers of s_groups_count *must* hold s_resize_lock
 931	 * AND
 932	 * * Writers must perform a smp_wmb() after updating all dependent
 933	 *   data and before modifying the groups count
 934	 *
 935	 * * Readers must hold s_resize_lock over the access
 936	 * OR
 937	 * * Readers must perform an smp_rmb() after reading the groups count
 938	 *   and before reading any dependent data.
 939	 *
 940	 * NB. These rules can be relaxed when checking the group count
 941	 * while freeing data, as we can only allocate from a block
 942	 * group after serialising against the group count, and we can
 943	 * only then free after serialising in turn against that
 944	 * allocation.
 945	 */
 946	smp_wmb();
 947
 948	/* Update the global fs size fields */
 949	sbi->s_groups_count++;
 950
 951	err = ext3_journal_dirty_metadata(handle, primary);
 952	if (err)
 953		goto exit_journal;
 954
 955	/* Update the reserved block counts only once the new group is
 956	 * active. */
 957	le32_add_cpu(&es->s_r_blocks_count, input->reserved_blocks);
 958
 959	/* Update the free space counts */
 960	percpu_counter_add(&sbi->s_freeblocks_counter,
 961			   input->free_blocks_count);
 962	percpu_counter_add(&sbi->s_freeinodes_counter,
 963			   EXT3_INODES_PER_GROUP(sb));
 964
 965	err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
 966
 967exit_journal:
 968	mutex_unlock(&sbi->s_resize_lock);
 969	if ((err2 = ext3_journal_stop(handle)) && !err)
 970		err = err2;
 971	if (!err) {
 972		update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
 973			       sizeof(struct ext3_super_block));
 974		update_backups(sb, primary->b_blocknr, primary->b_data,
 975			       primary->b_size);
 976	}
 977exit_put:
 978	iput(inode);
 979	return err;
 980} /* ext3_group_add */
 981
 982/* Extend the filesystem to the new number of blocks specified.  This entry
 983 * point is only used to extend the current filesystem to the end of the last
 984 * existing group.  It can be accessed via ioctl, or by "remount,resize=<size>"
 985 * for emergencies (because it has no dependencies on reserved blocks).
 986 *
 987 * If we _really_ wanted, we could use default values to call ext3_group_add()
 988 * allow the "remount" trick to work for arbitrary resizing, assuming enough
 989 * GDT blocks are reserved to grow to the desired size.
 990 */
 991int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 992		      ext3_fsblk_t n_blocks_count)
 993{
 994	ext3_fsblk_t o_blocks_count;
 995	ext3_grpblk_t last;
 996	ext3_grpblk_t add;
 997	struct buffer_head * bh;
 998	handle_t *handle;
 999	int err;
1000	unsigned long freed_blocks;
1001
1002	/* We don't need to worry about locking wrt other resizers just
1003	 * yet: we're going to revalidate es->s_blocks_count after
1004	 * taking the s_resize_lock below. */
1005	o_blocks_count = le32_to_cpu(es->s_blocks_count);
1006
1007	if (test_opt(sb, DEBUG))
1008		printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK
1009		       " up to "E3FSBLK" blocks\n",
1010		       o_blocks_count, n_blocks_count);
1011
1012	if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
1013		return 0;
1014
1015	if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
1016		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
1017			" too large to resize to "E3FSBLK" blocks safely\n",
1018			sb->s_id, n_blocks_count);
1019		if (sizeof(sector_t) < 8)
1020			ext3_warning(sb, __func__,
1021			"CONFIG_LBDAF not enabled\n");
1022		return -EINVAL;
1023	}
1024
1025	if (n_blocks_count < o_blocks_count) {
1026		ext3_warning(sb, __func__,
1027			     "can't shrink FS - resize aborted");
1028		return -EBUSY;
1029	}
1030
1031	/* Handle the remaining blocks in the last group only. */
1032	last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
1033		EXT3_BLOCKS_PER_GROUP(sb);
1034
1035	if (last == 0) {
1036		ext3_warning(sb, __func__,
1037			     "need to use ext2online to resize further");
1038		return -EPERM;
1039	}
1040
1041	add = EXT3_BLOCKS_PER_GROUP(sb) - last;
1042
1043	if (o_blocks_count + add < o_blocks_count) {
1044		ext3_warning(sb, __func__, "blocks_count overflow");
1045		return -EINVAL;
1046	}
1047
1048	if (o_blocks_count + add > n_blocks_count)
1049		add = n_blocks_count - o_blocks_count;
1050
1051	if (o_blocks_count + add < n_blocks_count)
1052		ext3_warning(sb, __func__,
1053			     "will only finish group ("E3FSBLK
1054			     " blocks, %u new)",
1055			     o_blocks_count + add, add);
1056
1057	/* See if the device is actually as big as what was requested */
1058	bh = sb_bread(sb, o_blocks_count + add -1);
1059	if (!bh) {
1060		ext3_warning(sb, __func__,
1061			     "can't read last block, resize aborted");
1062		return -ENOSPC;
1063	}
1064	brelse(bh);
1065
1066	/* We will update the superblock, one block bitmap, and
1067	 * one group descriptor via ext3_free_blocks().
1068	 */
1069	handle = ext3_journal_start_sb(sb, 3);
1070	if (IS_ERR(handle)) {
1071		err = PTR_ERR(handle);
1072		ext3_warning(sb, __func__, "error %d on journal start",err);
1073		goto exit_put;
1074	}
1075
1076	mutex_lock(&EXT3_SB(sb)->s_resize_lock);
1077	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
1078		ext3_warning(sb, __func__,
1079			     "multiple resizers run on filesystem!");
1080		mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1081		ext3_journal_stop(handle);
1082		err = -EBUSY;
1083		goto exit_put;
1084	}
1085
1086	if ((err = ext3_journal_get_write_access(handle,
1087						 EXT3_SB(sb)->s_sbh))) {
1088		ext3_warning(sb, __func__,
1089			     "error %d on journal write access", err);
1090		mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1091		ext3_journal_stop(handle);
1092		goto exit_put;
1093	}
1094	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
1095	err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
1096	mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1097	if (err) {
1098		ext3_warning(sb, __func__,
1099			     "error %d on journal dirty metadata", err);
1100		ext3_journal_stop(handle);
1101		goto exit_put;
1102	}
1103	ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n",
1104		   o_blocks_count, o_blocks_count + add);
1105	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
1106	ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n",
1107		   o_blocks_count, o_blocks_count + add);
1108	if ((err = ext3_journal_stop(handle)))
1109		goto exit_put;
1110	if (test_opt(sb, DEBUG))
1111		printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n",
1112		       le32_to_cpu(es->s_blocks_count));
1113	update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
1114		       sizeof(struct ext3_super_block));
1115exit_put:
1116	return err;
1117} /* ext3_group_extend */