Linux Audio

Check our new training course

Loading...
v6.8
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2015 Facebook.  All rights reserved.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/sched/mm.h>
   8#include "messages.h"
   9#include "ctree.h"
  10#include "disk-io.h"
  11#include "locking.h"
  12#include "free-space-tree.h"
  13#include "transaction.h"
  14#include "block-group.h"
  15#include "fs.h"
  16#include "accessors.h"
  17#include "extent-tree.h"
  18#include "root-tree.h"
  19
  20static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
  21					struct btrfs_block_group *block_group,
 
  22					struct btrfs_path *path);
  23
  24static struct btrfs_root *btrfs_free_space_root(
  25				struct btrfs_block_group *block_group)
  26{
  27	struct btrfs_key key = {
  28		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
  29		.type = BTRFS_ROOT_ITEM_KEY,
  30		.offset = 0,
  31	};
  32
  33	if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
  34		key.offset = block_group->global_root_id;
  35	return btrfs_global_root(block_group->fs_info, &key);
  36}
  37
  38void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
  39{
  40	u32 bitmap_range;
  41	size_t bitmap_size;
  42	u64 num_bitmaps, total_bitmap_size;
  43
  44	if (WARN_ON(cache->length == 0))
  45		btrfs_warn(cache->fs_info, "block group %llu length is zero",
  46			   cache->start);
  47
  48	/*
  49	 * We convert to bitmaps when the disk space required for using extents
  50	 * exceeds that required for using bitmaps.
  51	 */
  52	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
  53	num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
 
  54	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
  55	total_bitmap_size = num_bitmaps * bitmap_size;
  56	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
  57					    sizeof(struct btrfs_item));
  58
  59	/*
  60	 * We allow for a small buffer between the high threshold and low
  61	 * threshold to avoid thrashing back and forth between the two formats.
  62	 */
  63	if (cache->bitmap_high_thresh > 100)
  64		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
  65	else
  66		cache->bitmap_low_thresh = 0;
  67}
  68
  69static int add_new_free_space_info(struct btrfs_trans_handle *trans,
  70				   struct btrfs_block_group *block_group,
 
  71				   struct btrfs_path *path)
  72{
  73	struct btrfs_root *root = btrfs_free_space_root(block_group);
  74	struct btrfs_free_space_info *info;
  75	struct btrfs_key key;
  76	struct extent_buffer *leaf;
  77	int ret;
  78
  79	key.objectid = block_group->start;
  80	key.type = BTRFS_FREE_SPACE_INFO_KEY;
  81	key.offset = block_group->length;
  82
  83	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
  84	if (ret)
  85		goto out;
  86
  87	leaf = path->nodes[0];
  88	info = btrfs_item_ptr(leaf, path->slots[0],
  89			      struct btrfs_free_space_info);
  90	btrfs_set_free_space_extent_count(leaf, info, 0);
  91	btrfs_set_free_space_flags(leaf, info, 0);
  92	btrfs_mark_buffer_dirty(trans, leaf);
  93
  94	ret = 0;
  95out:
  96	btrfs_release_path(path);
  97	return ret;
  98}
  99
 100EXPORT_FOR_TESTS
 101struct btrfs_free_space_info *search_free_space_info(
 102		struct btrfs_trans_handle *trans,
 103		struct btrfs_block_group *block_group,
 104		struct btrfs_path *path, int cow)
 105{
 106	struct btrfs_fs_info *fs_info = block_group->fs_info;
 107	struct btrfs_root *root = btrfs_free_space_root(block_group);
 108	struct btrfs_key key;
 109	int ret;
 110
 111	key.objectid = block_group->start;
 112	key.type = BTRFS_FREE_SPACE_INFO_KEY;
 113	key.offset = block_group->length;
 114
 115	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
 116	if (ret < 0)
 117		return ERR_PTR(ret);
 118	if (ret != 0) {
 119		btrfs_warn(fs_info, "missing free space info for %llu",
 120			   block_group->start);
 121		ASSERT(0);
 122		return ERR_PTR(-ENOENT);
 123	}
 124
 125	return btrfs_item_ptr(path->nodes[0], path->slots[0],
 126			      struct btrfs_free_space_info);
 127}
 128
 129/*
 130 * btrfs_search_slot() but we're looking for the greatest key less than the
 131 * passed key.
 132 */
 133static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
 134				  struct btrfs_root *root,
 135				  struct btrfs_key *key, struct btrfs_path *p,
 136				  int ins_len, int cow)
 137{
 138	int ret;
 139
 140	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
 141	if (ret < 0)
 142		return ret;
 143
 144	if (ret == 0) {
 145		ASSERT(0);
 146		return -EIO;
 147	}
 148
 149	if (p->slots[0] == 0) {
 150		ASSERT(0);
 151		return -EIO;
 152	}
 153	p->slots[0]--;
 154
 155	return 0;
 156}
 157
 158static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
 159					 u64 size)
 160{
 161	return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
 162}
 163
 164static unsigned long *alloc_bitmap(u32 bitmap_size)
 165{
 166	unsigned long *ret;
 167	unsigned int nofs_flag;
 168	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
 169
 170	/*
 171	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
 172	 * into the filesystem as the free space bitmap can be modified in the
 173	 * critical section of a transaction commit.
 174	 *
 175	 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
 176	 * know that recursion is unsafe.
 177	 */
 178	nofs_flag = memalloc_nofs_save();
 179	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
 180	memalloc_nofs_restore(nofs_flag);
 181	return ret;
 182}
 183
 184static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
 185{
 186	u8 *p = ((u8 *)map) + BIT_BYTE(start);
 187	const unsigned int size = start + len;
 188	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
 189	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
 190
 191	while (len - bits_to_set >= 0) {
 192		*p |= mask_to_set;
 193		len -= bits_to_set;
 194		bits_to_set = BITS_PER_BYTE;
 195		mask_to_set = ~0;
 196		p++;
 197	}
 198	if (len) {
 199		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
 200		*p |= mask_to_set;
 201	}
 202}
 203
 204EXPORT_FOR_TESTS
 205int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 206				  struct btrfs_block_group *block_group,
 
 207				  struct btrfs_path *path)
 208{
 209	struct btrfs_fs_info *fs_info = trans->fs_info;
 210	struct btrfs_root *root = btrfs_free_space_root(block_group);
 211	struct btrfs_free_space_info *info;
 212	struct btrfs_key key, found_key;
 213	struct extent_buffer *leaf;
 214	unsigned long *bitmap;
 215	char *bitmap_cursor;
 216	u64 start, end;
 217	u64 bitmap_range, i;
 218	u32 bitmap_size, flags, expected_extent_count;
 219	u32 extent_count = 0;
 220	int done = 0, nr;
 221	int ret;
 222
 223	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
 
 224	bitmap = alloc_bitmap(bitmap_size);
 225	if (!bitmap) {
 226		ret = -ENOMEM;
 227		goto out;
 228	}
 229
 230	start = block_group->start;
 231	end = block_group->start + block_group->length;
 232
 233	key.objectid = end - 1;
 234	key.type = (u8)-1;
 235	key.offset = (u64)-1;
 236
 237	while (!done) {
 238		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 239		if (ret)
 240			goto out;
 241
 242		leaf = path->nodes[0];
 243		nr = 0;
 244		path->slots[0]++;
 245		while (path->slots[0] > 0) {
 246			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
 247
 248			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
 249				ASSERT(found_key.objectid == block_group->start);
 250				ASSERT(found_key.offset == block_group->length);
 251				done = 1;
 252				break;
 253			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
 254				u64 first, last;
 255
 256				ASSERT(found_key.objectid >= start);
 257				ASSERT(found_key.objectid < end);
 258				ASSERT(found_key.objectid + found_key.offset <= end);
 259
 260				first = div_u64(found_key.objectid - start,
 261						fs_info->sectorsize);
 262				last = div_u64(found_key.objectid + found_key.offset - start,
 263					       fs_info->sectorsize);
 264				le_bitmap_set(bitmap, first, last - first);
 265
 266				extent_count++;
 267				nr++;
 268				path->slots[0]--;
 269			} else {
 270				ASSERT(0);
 271			}
 272		}
 273
 274		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
 275		if (ret)
 276			goto out;
 277		btrfs_release_path(path);
 278	}
 279
 280	info = search_free_space_info(trans, block_group, path, 1);
 281	if (IS_ERR(info)) {
 282		ret = PTR_ERR(info);
 283		goto out;
 284	}
 285	leaf = path->nodes[0];
 286	flags = btrfs_free_space_flags(leaf, info);
 287	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
 288	btrfs_set_free_space_flags(leaf, info, flags);
 289	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
 290	btrfs_mark_buffer_dirty(trans, leaf);
 291	btrfs_release_path(path);
 292
 293	if (extent_count != expected_extent_count) {
 294		btrfs_err(fs_info,
 295			  "incorrect extent count for %llu; counted %u, expected %u",
 296			  block_group->start, extent_count,
 297			  expected_extent_count);
 298		ASSERT(0);
 299		ret = -EIO;
 300		goto out;
 301	}
 302
 303	bitmap_cursor = (char *)bitmap;
 304	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
 305	i = start;
 306	while (i < end) {
 307		unsigned long ptr;
 308		u64 extent_size;
 309		u32 data_size;
 310
 311		extent_size = min(end - i, bitmap_range);
 312		data_size = free_space_bitmap_size(fs_info, extent_size);
 
 313
 314		key.objectid = i;
 315		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
 316		key.offset = extent_size;
 317
 318		ret = btrfs_insert_empty_item(trans, root, path, &key,
 319					      data_size);
 320		if (ret)
 321			goto out;
 322
 323		leaf = path->nodes[0];
 324		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 325		write_extent_buffer(leaf, bitmap_cursor, ptr,
 326				    data_size);
 327		btrfs_mark_buffer_dirty(trans, leaf);
 328		btrfs_release_path(path);
 329
 330		i += extent_size;
 331		bitmap_cursor += data_size;
 332	}
 333
 334	ret = 0;
 335out:
 336	kvfree(bitmap);
 337	if (ret)
 338		btrfs_abort_transaction(trans, ret);
 339	return ret;
 340}
 341
 342EXPORT_FOR_TESTS
 343int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 344				  struct btrfs_block_group *block_group,
 
 345				  struct btrfs_path *path)
 346{
 347	struct btrfs_fs_info *fs_info = trans->fs_info;
 348	struct btrfs_root *root = btrfs_free_space_root(block_group);
 349	struct btrfs_free_space_info *info;
 350	struct btrfs_key key, found_key;
 351	struct extent_buffer *leaf;
 352	unsigned long *bitmap;
 353	u64 start, end;
 
 
 
 354	u32 bitmap_size, flags, expected_extent_count;
 355	unsigned long nrbits, start_bit, end_bit;
 356	u32 extent_count = 0;
 357	int done = 0, nr;
 358	int ret;
 359
 360	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
 
 361	bitmap = alloc_bitmap(bitmap_size);
 362	if (!bitmap) {
 363		ret = -ENOMEM;
 364		goto out;
 365	}
 366
 367	start = block_group->start;
 368	end = block_group->start + block_group->length;
 369
 370	key.objectid = end - 1;
 371	key.type = (u8)-1;
 372	key.offset = (u64)-1;
 373
 374	while (!done) {
 375		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 376		if (ret)
 377			goto out;
 378
 379		leaf = path->nodes[0];
 380		nr = 0;
 381		path->slots[0]++;
 382		while (path->slots[0] > 0) {
 383			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
 384
 385			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
 386				ASSERT(found_key.objectid == block_group->start);
 387				ASSERT(found_key.offset == block_group->length);
 388				done = 1;
 389				break;
 390			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
 391				unsigned long ptr;
 392				char *bitmap_cursor;
 393				u32 bitmap_pos, data_size;
 394
 395				ASSERT(found_key.objectid >= start);
 396				ASSERT(found_key.objectid < end);
 397				ASSERT(found_key.objectid + found_key.offset <= end);
 398
 399				bitmap_pos = div_u64(found_key.objectid - start,
 400						     fs_info->sectorsize *
 401						     BITS_PER_BYTE);
 402				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
 403				data_size = free_space_bitmap_size(fs_info,
 404								found_key.offset);
 405
 406				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
 407				read_extent_buffer(leaf, bitmap_cursor, ptr,
 408						   data_size);
 409
 410				nr++;
 411				path->slots[0]--;
 412			} else {
 413				ASSERT(0);
 414			}
 415		}
 416
 417		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
 418		if (ret)
 419			goto out;
 420		btrfs_release_path(path);
 421	}
 422
 423	info = search_free_space_info(trans, block_group, path, 1);
 424	if (IS_ERR(info)) {
 425		ret = PTR_ERR(info);
 426		goto out;
 427	}
 428	leaf = path->nodes[0];
 429	flags = btrfs_free_space_flags(leaf, info);
 430	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
 431	btrfs_set_free_space_flags(leaf, info, flags);
 432	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
 433	btrfs_mark_buffer_dirty(trans, leaf);
 434	btrfs_release_path(path);
 435
 436	nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
 437	start_bit = find_next_bit_le(bitmap, nrbits, 0);
 
 
 
 
 
 
 
 
 438
 439	while (start_bit < nrbits) {
 440		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
 441		ASSERT(start_bit < end_bit);
 
 442
 443		key.objectid = start + start_bit * block_group->fs_info->sectorsize;
 
 
 
 
 
 
 
 444		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 445		key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
 446
 447		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 448		if (ret)
 449			goto out;
 450		btrfs_release_path(path);
 451
 452		extent_count++;
 453
 454		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
 455	}
 456
 457	if (extent_count != expected_extent_count) {
 458		btrfs_err(fs_info,
 459			  "incorrect extent count for %llu; counted %u, expected %u",
 460			  block_group->start, extent_count,
 461			  expected_extent_count);
 462		ASSERT(0);
 463		ret = -EIO;
 464		goto out;
 465	}
 466
 467	ret = 0;
 468out:
 469	kvfree(bitmap);
 470	if (ret)
 471		btrfs_abort_transaction(trans, ret);
 472	return ret;
 473}
 474
 475static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
 476					  struct btrfs_block_group *block_group,
 
 477					  struct btrfs_path *path,
 478					  int new_extents)
 479{
 480	struct btrfs_free_space_info *info;
 481	u32 flags;
 482	u32 extent_count;
 483	int ret = 0;
 484
 485	if (new_extents == 0)
 486		return 0;
 487
 488	info = search_free_space_info(trans, block_group, path, 1);
 489	if (IS_ERR(info)) {
 490		ret = PTR_ERR(info);
 491		goto out;
 492	}
 493	flags = btrfs_free_space_flags(path->nodes[0], info);
 494	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
 495
 496	extent_count += new_extents;
 497	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
 498	btrfs_mark_buffer_dirty(trans, path->nodes[0]);
 499	btrfs_release_path(path);
 500
 501	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 502	    extent_count > block_group->bitmap_high_thresh) {
 503		ret = convert_free_space_to_bitmaps(trans, block_group, path);
 
 504	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 505		   extent_count < block_group->bitmap_low_thresh) {
 506		ret = convert_free_space_to_extents(trans, block_group, path);
 
 507	}
 508
 509out:
 510	return ret;
 511}
 512
 513EXPORT_FOR_TESTS
 514int free_space_test_bit(struct btrfs_block_group *block_group,
 515			struct btrfs_path *path, u64 offset)
 516{
 517	struct extent_buffer *leaf;
 518	struct btrfs_key key;
 519	u64 found_start, found_end;
 520	unsigned long ptr, i;
 521
 522	leaf = path->nodes[0];
 523	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 524	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
 525
 526	found_start = key.objectid;
 527	found_end = key.objectid + key.offset;
 528	ASSERT(offset >= found_start && offset < found_end);
 529
 530	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 531	i = div_u64(offset - found_start,
 532		    block_group->fs_info->sectorsize);
 533	return !!extent_buffer_test_bit(leaf, ptr, i);
 534}
 535
 536static void free_space_set_bits(struct btrfs_trans_handle *trans,
 537				struct btrfs_block_group *block_group,
 538				struct btrfs_path *path, u64 *start, u64 *size,
 539				int bit)
 540{
 541	struct btrfs_fs_info *fs_info = block_group->fs_info;
 542	struct extent_buffer *leaf;
 543	struct btrfs_key key;
 544	u64 end = *start + *size;
 545	u64 found_start, found_end;
 546	unsigned long ptr, first, last;
 547
 548	leaf = path->nodes[0];
 549	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 550	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
 551
 552	found_start = key.objectid;
 553	found_end = key.objectid + key.offset;
 554	ASSERT(*start >= found_start && *start < found_end);
 555	ASSERT(end > found_start);
 556
 557	if (end > found_end)
 558		end = found_end;
 559
 560	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 561	first = (*start - found_start) >> fs_info->sectorsize_bits;
 562	last = (end - found_start) >> fs_info->sectorsize_bits;
 563	if (bit)
 564		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
 565	else
 566		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
 567	btrfs_mark_buffer_dirty(trans, leaf);
 568
 569	*size -= end - *start;
 570	*start = end;
 571}
 572
 573/*
 574 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
 575 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
 576 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
 577 * looking for.
 578 */
 579static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
 580				  struct btrfs_root *root, struct btrfs_path *p)
 581{
 582	struct btrfs_key key;
 583
 584	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
 585		p->slots[0]++;
 586		return 0;
 587	}
 588
 589	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
 590	btrfs_release_path(p);
 591
 592	key.objectid += key.offset;
 593	key.type = (u8)-1;
 594	key.offset = (u64)-1;
 595
 596	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
 597}
 598
 599/*
 600 * If remove is 1, then we are removing free space, thus clearing bits in the
 601 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
 602 * the bitmap.
 603 */
 604static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
 605				    struct btrfs_block_group *block_group,
 
 606				    struct btrfs_path *path,
 607				    u64 start, u64 size, int remove)
 608{
 609	struct btrfs_root *root = btrfs_free_space_root(block_group);
 610	struct btrfs_key key;
 611	u64 end = start + size;
 612	u64 cur_start, cur_size;
 613	int prev_bit, next_bit;
 614	int new_extents;
 615	int ret;
 616
 617	/*
 618	 * Read the bit for the block immediately before the extent of space if
 619	 * that block is within the block group.
 620	 */
 621	if (start > block_group->start) {
 622		u64 prev_block = start - block_group->fs_info->sectorsize;
 623
 624		key.objectid = prev_block;
 625		key.type = (u8)-1;
 626		key.offset = (u64)-1;
 627
 628		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
 629		if (ret)
 630			goto out;
 631
 632		prev_bit = free_space_test_bit(block_group, path, prev_block);
 633
 634		/* The previous block may have been in the previous bitmap. */
 635		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 636		if (start >= key.objectid + key.offset) {
 637			ret = free_space_next_bitmap(trans, root, path);
 638			if (ret)
 639				goto out;
 640		}
 641	} else {
 642		key.objectid = start;
 643		key.type = (u8)-1;
 644		key.offset = (u64)-1;
 645
 646		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
 647		if (ret)
 648			goto out;
 649
 650		prev_bit = -1;
 651	}
 652
 653	/*
 654	 * Iterate over all of the bitmaps overlapped by the extent of space,
 655	 * clearing/setting bits as required.
 656	 */
 657	cur_start = start;
 658	cur_size = size;
 659	while (1) {
 660		free_space_set_bits(trans, block_group, path, &cur_start, &cur_size,
 661				    !remove);
 662		if (cur_size == 0)
 663			break;
 664		ret = free_space_next_bitmap(trans, root, path);
 665		if (ret)
 666			goto out;
 667	}
 668
 669	/*
 670	 * Read the bit for the block immediately after the extent of space if
 671	 * that block is within the block group.
 672	 */
 673	if (end < block_group->start + block_group->length) {
 674		/* The next block may be in the next bitmap. */
 675		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 676		if (end >= key.objectid + key.offset) {
 677			ret = free_space_next_bitmap(trans, root, path);
 678			if (ret)
 679				goto out;
 680		}
 681
 682		next_bit = free_space_test_bit(block_group, path, end);
 683	} else {
 684		next_bit = -1;
 685	}
 686
 687	if (remove) {
 688		new_extents = -1;
 689		if (prev_bit == 1) {
 690			/* Leftover on the left. */
 691			new_extents++;
 692		}
 693		if (next_bit == 1) {
 694			/* Leftover on the right. */
 695			new_extents++;
 696		}
 697	} else {
 698		new_extents = 1;
 699		if (prev_bit == 1) {
 700			/* Merging with neighbor on the left. */
 701			new_extents--;
 702		}
 703		if (next_bit == 1) {
 704			/* Merging with neighbor on the right. */
 705			new_extents--;
 706		}
 707	}
 708
 709	btrfs_release_path(path);
 710	ret = update_free_space_extent_count(trans, block_group, path,
 711					     new_extents);
 712
 713out:
 714	return ret;
 715}
 716
 717static int remove_free_space_extent(struct btrfs_trans_handle *trans,
 718				    struct btrfs_block_group *block_group,
 
 719				    struct btrfs_path *path,
 720				    u64 start, u64 size)
 721{
 722	struct btrfs_root *root = btrfs_free_space_root(block_group);
 723	struct btrfs_key key;
 724	u64 found_start, found_end;
 725	u64 end = start + size;
 726	int new_extents = -1;
 727	int ret;
 728
 729	key.objectid = start;
 730	key.type = (u8)-1;
 731	key.offset = (u64)-1;
 732
 733	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 734	if (ret)
 735		goto out;
 736
 737	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 738
 739	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
 740
 741	found_start = key.objectid;
 742	found_end = key.objectid + key.offset;
 743	ASSERT(start >= found_start && end <= found_end);
 744
 745	/*
 746	 * Okay, now that we've found the free space extent which contains the
 747	 * free space that we are removing, there are four cases:
 748	 *
 749	 * 1. We're using the whole extent: delete the key we found and
 750	 * decrement the free space extent count.
 751	 * 2. We are using part of the extent starting at the beginning: delete
 752	 * the key we found and insert a new key representing the leftover at
 753	 * the end. There is no net change in the number of extents.
 754	 * 3. We are using part of the extent ending at the end: delete the key
 755	 * we found and insert a new key representing the leftover at the
 756	 * beginning. There is no net change in the number of extents.
 757	 * 4. We are using part of the extent in the middle: delete the key we
 758	 * found and insert two new keys representing the leftovers on each
 759	 * side. Where we used to have one extent, we now have two, so increment
 760	 * the extent count. We may need to convert the block group to bitmaps
 761	 * as a result.
 762	 */
 763
 764	/* Delete the existing key (cases 1-4). */
 765	ret = btrfs_del_item(trans, root, path);
 766	if (ret)
 767		goto out;
 768
 769	/* Add a key for leftovers at the beginning (cases 3 and 4). */
 770	if (start > found_start) {
 771		key.objectid = found_start;
 772		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 773		key.offset = start - found_start;
 774
 775		btrfs_release_path(path);
 776		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 777		if (ret)
 778			goto out;
 779		new_extents++;
 780	}
 781
 782	/* Add a key for leftovers at the end (cases 2 and 4). */
 783	if (end < found_end) {
 784		key.objectid = end;
 785		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 786		key.offset = found_end - end;
 787
 788		btrfs_release_path(path);
 789		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 790		if (ret)
 791			goto out;
 792		new_extents++;
 793	}
 794
 795	btrfs_release_path(path);
 796	ret = update_free_space_extent_count(trans, block_group, path,
 797					     new_extents);
 798
 799out:
 800	return ret;
 801}
 802
 803EXPORT_FOR_TESTS
 804int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
 805				  struct btrfs_block_group *block_group,
 
 806				  struct btrfs_path *path, u64 start, u64 size)
 807{
 808	struct btrfs_free_space_info *info;
 809	u32 flags;
 810	int ret;
 811
 812	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
 813		ret = __add_block_group_free_space(trans, block_group, path);
 
 814		if (ret)
 815			return ret;
 816	}
 817
 818	info = search_free_space_info(NULL, block_group, path, 0);
 819	if (IS_ERR(info))
 820		return PTR_ERR(info);
 821	flags = btrfs_free_space_flags(path->nodes[0], info);
 822	btrfs_release_path(path);
 823
 824	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
 825		return modify_free_space_bitmap(trans, block_group, path,
 826						start, size, 1);
 827	} else {
 828		return remove_free_space_extent(trans, block_group, path,
 829						start, size);
 830	}
 831}
 832
 833int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
 
 834				u64 start, u64 size)
 835{
 836	struct btrfs_block_group *block_group;
 837	struct btrfs_path *path;
 838	int ret;
 839
 840	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
 841		return 0;
 842
 843	path = btrfs_alloc_path();
 844	if (!path) {
 845		ret = -ENOMEM;
 846		goto out;
 847	}
 848
 849	block_group = btrfs_lookup_block_group(trans->fs_info, start);
 850	if (!block_group) {
 851		ASSERT(0);
 852		ret = -ENOENT;
 853		goto out;
 854	}
 855
 856	mutex_lock(&block_group->free_space_lock);
 857	ret = __remove_from_free_space_tree(trans, block_group, path, start,
 858					    size);
 859	mutex_unlock(&block_group->free_space_lock);
 860
 861	btrfs_put_block_group(block_group);
 862out:
 863	btrfs_free_path(path);
 864	if (ret)
 865		btrfs_abort_transaction(trans, ret);
 866	return ret;
 867}
 868
 869static int add_free_space_extent(struct btrfs_trans_handle *trans,
 870				 struct btrfs_block_group *block_group,
 
 871				 struct btrfs_path *path,
 872				 u64 start, u64 size)
 873{
 874	struct btrfs_root *root = btrfs_free_space_root(block_group);
 875	struct btrfs_key key, new_key;
 876	u64 found_start, found_end;
 877	u64 end = start + size;
 878	int new_extents = 1;
 879	int ret;
 880
 881	/*
 882	 * We are adding a new extent of free space, but we need to merge
 883	 * extents. There are four cases here:
 884	 *
 885	 * 1. The new extent does not have any immediate neighbors to merge
 886	 * with: add the new key and increment the free space extent count. We
 887	 * may need to convert the block group to bitmaps as a result.
 888	 * 2. The new extent has an immediate neighbor before it: remove the
 889	 * previous key and insert a new key combining both of them. There is no
 890	 * net change in the number of extents.
 891	 * 3. The new extent has an immediate neighbor after it: remove the next
 892	 * key and insert a new key combining both of them. There is no net
 893	 * change in the number of extents.
 894	 * 4. The new extent has immediate neighbors on both sides: remove both
 895	 * of the keys and insert a new key combining all of them. Where we used
 896	 * to have two extents, we now have one, so decrement the extent count.
 897	 */
 898
 899	new_key.objectid = start;
 900	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 901	new_key.offset = size;
 902
 903	/* Search for a neighbor on the left. */
 904	if (start == block_group->start)
 905		goto right;
 906	key.objectid = start - 1;
 907	key.type = (u8)-1;
 908	key.offset = (u64)-1;
 909
 910	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 911	if (ret)
 912		goto out;
 913
 914	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 915
 916	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
 917		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
 918		btrfs_release_path(path);
 919		goto right;
 920	}
 921
 922	found_start = key.objectid;
 923	found_end = key.objectid + key.offset;
 924	ASSERT(found_start >= block_group->start &&
 925	       found_end > block_group->start);
 926	ASSERT(found_start < start && found_end <= start);
 927
 928	/*
 929	 * Delete the neighbor on the left and absorb it into the new key (cases
 930	 * 2 and 4).
 931	 */
 932	if (found_end == start) {
 933		ret = btrfs_del_item(trans, root, path);
 934		if (ret)
 935			goto out;
 936		new_key.objectid = found_start;
 937		new_key.offset += key.offset;
 938		new_extents--;
 939	}
 940	btrfs_release_path(path);
 941
 942right:
 943	/* Search for a neighbor on the right. */
 944	if (end == block_group->start + block_group->length)
 945		goto insert;
 946	key.objectid = end;
 947	key.type = (u8)-1;
 948	key.offset = (u64)-1;
 949
 950	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 951	if (ret)
 952		goto out;
 953
 954	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 955
 956	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
 957		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
 958		btrfs_release_path(path);
 959		goto insert;
 960	}
 961
 962	found_start = key.objectid;
 963	found_end = key.objectid + key.offset;
 964	ASSERT(found_start >= block_group->start &&
 965	       found_end > block_group->start);
 966	ASSERT((found_start < start && found_end <= start) ||
 967	       (found_start >= end && found_end > end));
 968
 969	/*
 970	 * Delete the neighbor on the right and absorb it into the new key
 971	 * (cases 3 and 4).
 972	 */
 973	if (found_start == end) {
 974		ret = btrfs_del_item(trans, root, path);
 975		if (ret)
 976			goto out;
 977		new_key.offset += key.offset;
 978		new_extents--;
 979	}
 980	btrfs_release_path(path);
 981
 982insert:
 983	/* Insert the new key (cases 1-4). */
 984	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
 985	if (ret)
 986		goto out;
 987
 988	btrfs_release_path(path);
 989	ret = update_free_space_extent_count(trans, block_group, path,
 990					     new_extents);
 991
 992out:
 993	return ret;
 994}
 995
 996EXPORT_FOR_TESTS
 997int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
 998			     struct btrfs_block_group *block_group,
 
 999			     struct btrfs_path *path, u64 start, u64 size)
1000{
1001	struct btrfs_free_space_info *info;
1002	u32 flags;
1003	int ret;
1004
1005	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1006		ret = __add_block_group_free_space(trans, block_group, path);
 
1007		if (ret)
1008			return ret;
1009	}
1010
1011	info = search_free_space_info(NULL, block_group, path, 0);
1012	if (IS_ERR(info))
1013		return PTR_ERR(info);
1014	flags = btrfs_free_space_flags(path->nodes[0], info);
1015	btrfs_release_path(path);
1016
1017	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
1018		return modify_free_space_bitmap(trans, block_group, path,
1019						start, size, 0);
1020	} else {
1021		return add_free_space_extent(trans, block_group, path, start,
1022					     size);
1023	}
1024}
1025
1026int add_to_free_space_tree(struct btrfs_trans_handle *trans,
 
1027			   u64 start, u64 size)
1028{
1029	struct btrfs_block_group *block_group;
1030	struct btrfs_path *path;
1031	int ret;
1032
1033	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1034		return 0;
1035
1036	path = btrfs_alloc_path();
1037	if (!path) {
1038		ret = -ENOMEM;
1039		goto out;
1040	}
1041
1042	block_group = btrfs_lookup_block_group(trans->fs_info, start);
1043	if (!block_group) {
1044		ASSERT(0);
1045		ret = -ENOENT;
1046		goto out;
1047	}
1048
1049	mutex_lock(&block_group->free_space_lock);
1050	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
 
1051	mutex_unlock(&block_group->free_space_lock);
1052
1053	btrfs_put_block_group(block_group);
1054out:
1055	btrfs_free_path(path);
1056	if (ret)
1057		btrfs_abort_transaction(trans, ret);
1058	return ret;
1059}
1060
1061/*
1062 * Populate the free space tree by walking the extent tree. Operations on the
1063 * extent tree that happen as a result of writes to the free space tree will go
1064 * through the normal add/remove hooks.
1065 */
1066static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1067				    struct btrfs_block_group *block_group)
 
1068{
1069	struct btrfs_root *extent_root;
1070	struct btrfs_path *path, *path2;
1071	struct btrfs_key key;
1072	u64 start, end;
1073	int ret;
1074
1075	path = btrfs_alloc_path();
1076	if (!path)
1077		return -ENOMEM;
1078	path->reada = READA_FORWARD;
1079
1080	path2 = btrfs_alloc_path();
1081	if (!path2) {
1082		btrfs_free_path(path);
1083		return -ENOMEM;
1084	}
1085
1086	ret = add_new_free_space_info(trans, block_group, path2);
1087	if (ret)
1088		goto out;
1089
1090	mutex_lock(&block_group->free_space_lock);
1091
1092	/*
1093	 * Iterate through all of the extent and metadata items in this block
1094	 * group, adding the free space between them and the free space at the
1095	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1096	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1097	 * contained in.
1098	 */
1099	key.objectid = block_group->start;
1100	key.type = BTRFS_EXTENT_ITEM_KEY;
1101	key.offset = 0;
1102
1103	extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
1104	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1105	if (ret < 0)
1106		goto out_locked;
1107	ASSERT(ret == 0);
1108
1109	start = block_group->start;
1110	end = block_group->start + block_group->length;
1111	while (1) {
1112		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1113
1114		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1115		    key.type == BTRFS_METADATA_ITEM_KEY) {
1116			if (key.objectid >= end)
1117				break;
1118
1119			if (start < key.objectid) {
1120				ret = __add_to_free_space_tree(trans,
1121							       block_group,
1122							       path2, start,
1123							       key.objectid -
1124							       start);
1125				if (ret)
1126					goto out_locked;
1127			}
1128			start = key.objectid;
1129			if (key.type == BTRFS_METADATA_ITEM_KEY)
1130				start += trans->fs_info->nodesize;
1131			else
1132				start += key.offset;
1133		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1134			if (key.objectid != block_group->start)
1135				break;
1136		}
1137
1138		ret = btrfs_next_item(extent_root, path);
1139		if (ret < 0)
1140			goto out_locked;
1141		if (ret)
1142			break;
1143	}
1144	if (start < end) {
1145		ret = __add_to_free_space_tree(trans, block_group, path2,
1146					       start, end - start);
1147		if (ret)
1148			goto out_locked;
1149	}
1150
1151	ret = 0;
1152out_locked:
1153	mutex_unlock(&block_group->free_space_lock);
1154out:
1155	btrfs_free_path(path2);
1156	btrfs_free_path(path);
1157	return ret;
1158}
1159
1160int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1161{
1162	struct btrfs_trans_handle *trans;
1163	struct btrfs_root *tree_root = fs_info->tree_root;
1164	struct btrfs_root *free_space_root;
1165	struct btrfs_block_group *block_group;
1166	struct rb_node *node;
1167	int ret;
1168
1169	trans = btrfs_start_transaction(tree_root, 0);
1170	if (IS_ERR(trans))
1171		return PTR_ERR(trans);
1172
1173	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1174	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1175	free_space_root = btrfs_create_tree(trans,
1176					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1177	if (IS_ERR(free_space_root)) {
1178		ret = PTR_ERR(free_space_root);
1179		goto abort;
1180	}
1181	ret = btrfs_global_root_insert(free_space_root);
1182	if (ret) {
1183		btrfs_put_root(free_space_root);
1184		goto abort;
1185	}
1186
1187	node = rb_first_cached(&fs_info->block_group_cache_tree);
1188	while (node) {
1189		block_group = rb_entry(node, struct btrfs_block_group,
1190				       cache_node);
1191		ret = populate_free_space_tree(trans, block_group);
1192		if (ret)
1193			goto abort;
1194		node = rb_next(node);
1195	}
1196
1197	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1198	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1199	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
 
1200	ret = btrfs_commit_transaction(trans);
 
 
1201
1202	/*
1203	 * Now that we've committed the transaction any reading of our commit
1204	 * root will be safe, so we can cache from the free space tree now.
1205	 */
1206	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1207	return ret;
1208
1209abort:
1210	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1211	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1212	btrfs_abort_transaction(trans, ret);
1213	btrfs_end_transaction(trans);
1214	return ret;
1215}
1216
1217static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1218				 struct btrfs_root *root)
1219{
1220	struct btrfs_path *path;
1221	struct btrfs_key key;
1222	int nr;
1223	int ret;
1224
1225	path = btrfs_alloc_path();
1226	if (!path)
1227		return -ENOMEM;
1228
 
 
1229	key.objectid = 0;
1230	key.type = 0;
1231	key.offset = 0;
1232
1233	while (1) {
1234		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1235		if (ret < 0)
1236			goto out;
1237
1238		nr = btrfs_header_nritems(path->nodes[0]);
1239		if (!nr)
1240			break;
1241
1242		path->slots[0] = 0;
1243		ret = btrfs_del_items(trans, root, path, 0, nr);
1244		if (ret)
1245			goto out;
1246
1247		btrfs_release_path(path);
1248	}
1249
1250	ret = 0;
1251out:
1252	btrfs_free_path(path);
1253	return ret;
1254}
1255
1256int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
1257{
1258	struct btrfs_trans_handle *trans;
1259	struct btrfs_root *tree_root = fs_info->tree_root;
1260	struct btrfs_key key = {
1261		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1262		.type = BTRFS_ROOT_ITEM_KEY,
1263		.offset = 0,
1264	};
1265	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1266	int ret;
1267
1268	trans = btrfs_start_transaction(tree_root, 0);
1269	if (IS_ERR(trans))
1270		return PTR_ERR(trans);
1271
1272	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1273	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
 
1274
1275	ret = clear_free_space_tree(trans, free_space_root);
1276	if (ret)
1277		goto abort;
1278
1279	ret = btrfs_del_root(trans, &free_space_root->root_key);
1280	if (ret)
1281		goto abort;
1282
1283	btrfs_global_root_delete(free_space_root);
1284
1285	spin_lock(&fs_info->trans_lock);
1286	list_del(&free_space_root->dirty_list);
1287	spin_unlock(&fs_info->trans_lock);
1288
1289	btrfs_tree_lock(free_space_root->node);
1290	btrfs_clear_buffer_dirty(trans, free_space_root->node);
1291	btrfs_tree_unlock(free_space_root->node);
1292	btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1293			      free_space_root->node, 0, 1);
1294
1295	btrfs_put_root(free_space_root);
1296
1297	return btrfs_commit_transaction(trans);
1298
1299abort:
1300	btrfs_abort_transaction(trans, ret);
1301	btrfs_end_transaction(trans);
1302	return ret;
1303}
1304
1305int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
1306{
1307	struct btrfs_trans_handle *trans;
1308	struct btrfs_key key = {
1309		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1310		.type = BTRFS_ROOT_ITEM_KEY,
1311		.offset = 0,
1312	};
1313	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1314	struct rb_node *node;
1315	int ret;
1316
1317	trans = btrfs_start_transaction(free_space_root, 1);
1318	if (IS_ERR(trans))
1319		return PTR_ERR(trans);
1320
1321	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1322	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1323
1324	ret = clear_free_space_tree(trans, free_space_root);
1325	if (ret)
1326		goto abort;
1327
1328	node = rb_first_cached(&fs_info->block_group_cache_tree);
1329	while (node) {
1330		struct btrfs_block_group *block_group;
1331
1332		block_group = rb_entry(node, struct btrfs_block_group,
1333				       cache_node);
1334		ret = populate_free_space_tree(trans, block_group);
1335		if (ret)
1336			goto abort;
1337		node = rb_next(node);
1338	}
1339
1340	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1341	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1342	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1343
1344	ret = btrfs_commit_transaction(trans);
1345	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1346	return ret;
1347abort:
1348	btrfs_abort_transaction(trans, ret);
1349	btrfs_end_transaction(trans);
1350	return ret;
1351}
1352
1353static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1354					struct btrfs_block_group *block_group,
 
1355					struct btrfs_path *path)
1356{
 
1357	int ret;
1358
1359	clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
 
1360
1361	ret = add_new_free_space_info(trans, block_group, path);
 
 
1362	if (ret)
1363		return ret;
1364
1365	return __add_to_free_space_tree(trans, block_group, path,
1366					block_group->start,
1367					block_group->length);
1368}
1369
1370int add_block_group_free_space(struct btrfs_trans_handle *trans,
1371			       struct btrfs_block_group *block_group)
 
1372{
1373	struct btrfs_fs_info *fs_info = trans->fs_info;
1374	struct btrfs_path *path = NULL;
1375	int ret = 0;
1376
1377	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1378		return 0;
1379
1380	mutex_lock(&block_group->free_space_lock);
1381	if (!test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags))
1382		goto out;
1383
1384	path = btrfs_alloc_path();
1385	if (!path) {
1386		ret = -ENOMEM;
1387		goto out;
1388	}
1389
1390	ret = __add_block_group_free_space(trans, block_group, path);
1391
1392out:
1393	btrfs_free_path(path);
1394	mutex_unlock(&block_group->free_space_lock);
1395	if (ret)
1396		btrfs_abort_transaction(trans, ret);
1397	return ret;
1398}
1399
1400int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1401				  struct btrfs_block_group *block_group)
 
1402{
1403	struct btrfs_root *root = btrfs_free_space_root(block_group);
1404	struct btrfs_path *path;
1405	struct btrfs_key key, found_key;
1406	struct extent_buffer *leaf;
1407	u64 start, end;
1408	int done = 0, nr;
1409	int ret;
1410
1411	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1412		return 0;
1413
1414	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1415		/* We never added this block group to the free space tree. */
1416		return 0;
1417	}
1418
1419	path = btrfs_alloc_path();
1420	if (!path) {
1421		ret = -ENOMEM;
1422		goto out;
1423	}
1424
1425	start = block_group->start;
1426	end = block_group->start + block_group->length;
1427
1428	key.objectid = end - 1;
1429	key.type = (u8)-1;
1430	key.offset = (u64)-1;
1431
1432	while (!done) {
1433		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1434		if (ret)
1435			goto out;
1436
1437		leaf = path->nodes[0];
1438		nr = 0;
1439		path->slots[0]++;
1440		while (path->slots[0] > 0) {
1441			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1442
1443			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1444				ASSERT(found_key.objectid == block_group->start);
1445				ASSERT(found_key.offset == block_group->length);
1446				done = 1;
1447				nr++;
1448				path->slots[0]--;
1449				break;
1450			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1451				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1452				ASSERT(found_key.objectid >= start);
1453				ASSERT(found_key.objectid < end);
1454				ASSERT(found_key.objectid + found_key.offset <= end);
1455				nr++;
1456				path->slots[0]--;
1457			} else {
1458				ASSERT(0);
1459			}
1460		}
1461
1462		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1463		if (ret)
1464			goto out;
1465		btrfs_release_path(path);
1466	}
1467
1468	ret = 0;
1469out:
1470	btrfs_free_path(path);
1471	if (ret)
1472		btrfs_abort_transaction(trans, ret);
1473	return ret;
1474}
1475
1476static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1477				   struct btrfs_path *path,
1478				   u32 expected_extent_count)
1479{
1480	struct btrfs_block_group *block_group;
1481	struct btrfs_fs_info *fs_info;
1482	struct btrfs_root *root;
1483	struct btrfs_key key;
1484	int prev_bit = 0, bit;
1485	/* Initialize to silence GCC. */
1486	u64 extent_start = 0;
1487	u64 end, offset;
1488	u64 total_found = 0;
1489	u32 extent_count = 0;
1490	int ret;
1491
1492	block_group = caching_ctl->block_group;
1493	fs_info = block_group->fs_info;
1494	root = btrfs_free_space_root(block_group);
1495
1496	end = block_group->start + block_group->length;
1497
1498	while (1) {
1499		ret = btrfs_next_item(root, path);
1500		if (ret < 0)
1501			goto out;
1502		if (ret)
1503			break;
1504
1505		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1506
1507		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1508			break;
1509
1510		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1511		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1512
 
 
1513		offset = key.objectid;
1514		while (offset < key.objectid + key.offset) {
1515			bit = free_space_test_bit(block_group, path, offset);
1516			if (prev_bit == 0 && bit == 1) {
1517				extent_start = offset;
1518			} else if (prev_bit == 1 && bit == 0) {
1519				u64 space_added;
1520
1521				ret = btrfs_add_new_free_space(block_group,
1522							       extent_start,
1523							       offset,
1524							       &space_added);
1525				if (ret)
1526					goto out;
1527				total_found += space_added;
1528				if (total_found > CACHING_CTL_WAKE_UP) {
1529					total_found = 0;
1530					wake_up(&caching_ctl->wait);
1531				}
1532				extent_count++;
1533			}
1534			prev_bit = bit;
1535			offset += fs_info->sectorsize;
1536		}
1537	}
1538	if (prev_bit == 1) {
1539		ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
1540		if (ret)
1541			goto out;
1542		extent_count++;
1543	}
1544
1545	if (extent_count != expected_extent_count) {
1546		btrfs_err(fs_info,
1547			  "incorrect extent count for %llu; counted %u, expected %u",
1548			  block_group->start, extent_count,
1549			  expected_extent_count);
1550		ASSERT(0);
1551		ret = -EIO;
1552		goto out;
1553	}
1554
 
 
1555	ret = 0;
1556out:
1557	return ret;
1558}
1559
1560static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1561				   struct btrfs_path *path,
1562				   u32 expected_extent_count)
1563{
1564	struct btrfs_block_group *block_group;
1565	struct btrfs_fs_info *fs_info;
1566	struct btrfs_root *root;
1567	struct btrfs_key key;
1568	u64 end;
1569	u64 total_found = 0;
1570	u32 extent_count = 0;
1571	int ret;
1572
1573	block_group = caching_ctl->block_group;
1574	fs_info = block_group->fs_info;
1575	root = btrfs_free_space_root(block_group);
1576
1577	end = block_group->start + block_group->length;
1578
1579	while (1) {
1580		u64 space_added;
1581
1582		ret = btrfs_next_item(root, path);
1583		if (ret < 0)
1584			goto out;
1585		if (ret)
1586			break;
1587
1588		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1589
1590		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1591			break;
1592
1593		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1594		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1595
1596		ret = btrfs_add_new_free_space(block_group, key.objectid,
1597					       key.objectid + key.offset,
1598					       &space_added);
1599		if (ret)
1600			goto out;
1601		total_found += space_added;
1602		if (total_found > CACHING_CTL_WAKE_UP) {
1603			total_found = 0;
1604			wake_up(&caching_ctl->wait);
1605		}
1606		extent_count++;
1607	}
1608
1609	if (extent_count != expected_extent_count) {
1610		btrfs_err(fs_info,
1611			  "incorrect extent count for %llu; counted %u, expected %u",
1612			  block_group->start, extent_count,
1613			  expected_extent_count);
1614		ASSERT(0);
1615		ret = -EIO;
1616		goto out;
1617	}
1618
 
 
1619	ret = 0;
1620out:
1621	return ret;
1622}
1623
1624int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1625{
1626	struct btrfs_block_group *block_group;
 
1627	struct btrfs_free_space_info *info;
1628	struct btrfs_path *path;
1629	u32 extent_count, flags;
1630	int ret;
1631
1632	block_group = caching_ctl->block_group;
 
1633
1634	path = btrfs_alloc_path();
1635	if (!path)
1636		return -ENOMEM;
1637
1638	/*
1639	 * Just like caching_thread() doesn't want to deadlock on the extent
1640	 * tree, we don't want to deadlock on the free space tree.
1641	 */
1642	path->skip_locking = 1;
1643	path->search_commit_root = 1;
1644	path->reada = READA_FORWARD;
1645
1646	info = search_free_space_info(NULL, block_group, path, 0);
1647	if (IS_ERR(info)) {
1648		ret = PTR_ERR(info);
1649		goto out;
1650	}
1651	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1652	flags = btrfs_free_space_flags(path->nodes[0], info);
1653
1654	/*
1655	 * We left path pointing to the free space info item, so now
1656	 * load_free_space_foo can just iterate through the free space tree from
1657	 * there.
1658	 */
1659	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1660		ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1661	else
1662		ret = load_free_space_extents(caching_ctl, path, extent_count);
1663
1664out:
1665	btrfs_free_path(path);
1666	return ret;
1667}
v4.10.11
 
   1/*
   2 * Copyright (C) 2015 Facebook.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/kernel.h>
  20#include <linux/vmalloc.h>
 
  21#include "ctree.h"
  22#include "disk-io.h"
  23#include "locking.h"
  24#include "free-space-tree.h"
  25#include "transaction.h"
 
 
 
 
 
  26
  27static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
  28					struct btrfs_fs_info *fs_info,
  29					struct btrfs_block_group_cache *block_group,
  30					struct btrfs_path *path);
  31
  32void set_free_space_tree_thresholds(struct btrfs_block_group_cache *cache)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  33{
  34	u32 bitmap_range;
  35	size_t bitmap_size;
  36	u64 num_bitmaps, total_bitmap_size;
  37
 
 
 
 
  38	/*
  39	 * We convert to bitmaps when the disk space required for using extents
  40	 * exceeds that required for using bitmaps.
  41	 */
  42	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
  43	num_bitmaps = div_u64(cache->key.offset + bitmap_range - 1,
  44			      bitmap_range);
  45	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
  46	total_bitmap_size = num_bitmaps * bitmap_size;
  47	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
  48					    sizeof(struct btrfs_item));
  49
  50	/*
  51	 * We allow for a small buffer between the high threshold and low
  52	 * threshold to avoid thrashing back and forth between the two formats.
  53	 */
  54	if (cache->bitmap_high_thresh > 100)
  55		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
  56	else
  57		cache->bitmap_low_thresh = 0;
  58}
  59
  60static int add_new_free_space_info(struct btrfs_trans_handle *trans,
  61				   struct btrfs_fs_info *fs_info,
  62				   struct btrfs_block_group_cache *block_group,
  63				   struct btrfs_path *path)
  64{
  65	struct btrfs_root *root = fs_info->free_space_root;
  66	struct btrfs_free_space_info *info;
  67	struct btrfs_key key;
  68	struct extent_buffer *leaf;
  69	int ret;
  70
  71	key.objectid = block_group->key.objectid;
  72	key.type = BTRFS_FREE_SPACE_INFO_KEY;
  73	key.offset = block_group->key.offset;
  74
  75	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
  76	if (ret)
  77		goto out;
  78
  79	leaf = path->nodes[0];
  80	info = btrfs_item_ptr(leaf, path->slots[0],
  81			      struct btrfs_free_space_info);
  82	btrfs_set_free_space_extent_count(leaf, info, 0);
  83	btrfs_set_free_space_flags(leaf, info, 0);
  84	btrfs_mark_buffer_dirty(leaf);
  85
  86	ret = 0;
  87out:
  88	btrfs_release_path(path);
  89	return ret;
  90}
  91
  92struct btrfs_free_space_info *
  93search_free_space_info(struct btrfs_trans_handle *trans,
  94		       struct btrfs_fs_info *fs_info,
  95		       struct btrfs_block_group_cache *block_group,
  96		       struct btrfs_path *path, int cow)
  97{
  98	struct btrfs_root *root = fs_info->free_space_root;
 
  99	struct btrfs_key key;
 100	int ret;
 101
 102	key.objectid = block_group->key.objectid;
 103	key.type = BTRFS_FREE_SPACE_INFO_KEY;
 104	key.offset = block_group->key.offset;
 105
 106	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
 107	if (ret < 0)
 108		return ERR_PTR(ret);
 109	if (ret != 0) {
 110		btrfs_warn(fs_info, "missing free space info for %llu",
 111			   block_group->key.objectid);
 112		ASSERT(0);
 113		return ERR_PTR(-ENOENT);
 114	}
 115
 116	return btrfs_item_ptr(path->nodes[0], path->slots[0],
 117			      struct btrfs_free_space_info);
 118}
 119
 120/*
 121 * btrfs_search_slot() but we're looking for the greatest key less than the
 122 * passed key.
 123 */
 124static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
 125				  struct btrfs_root *root,
 126				  struct btrfs_key *key, struct btrfs_path *p,
 127				  int ins_len, int cow)
 128{
 129	int ret;
 130
 131	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
 132	if (ret < 0)
 133		return ret;
 134
 135	if (ret == 0) {
 136		ASSERT(0);
 137		return -EIO;
 138	}
 139
 140	if (p->slots[0] == 0) {
 141		ASSERT(0);
 142		return -EIO;
 143	}
 144	p->slots[0]--;
 145
 146	return 0;
 147}
 148
 149static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
 
 150{
 151	return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
 152}
 153
 154static u8 *alloc_bitmap(u32 bitmap_size)
 155{
 156	void *mem;
 
 
 157
 158	/*
 159	 * The allocation size varies, observed numbers were < 4K up to 16K.
 160	 * Using vmalloc unconditionally would be too heavy, we'll try
 161	 * contiguous allocations first.
 
 
 
 162	 */
 163	if  (bitmap_size <= PAGE_SIZE)
 164		return kzalloc(bitmap_size, GFP_NOFS);
 
 
 
 165
 166	mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
 167	if (mem)
 168		return mem;
 
 
 
 169
 170	return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
 171			 PAGE_KERNEL);
 
 
 
 
 
 
 
 
 
 172}
 173
 
 174int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 175				  struct btrfs_fs_info *fs_info,
 176				  struct btrfs_block_group_cache *block_group,
 177				  struct btrfs_path *path)
 178{
 179	struct btrfs_root *root = fs_info->free_space_root;
 
 180	struct btrfs_free_space_info *info;
 181	struct btrfs_key key, found_key;
 182	struct extent_buffer *leaf;
 183	u8 *bitmap, *bitmap_cursor;
 
 184	u64 start, end;
 185	u64 bitmap_range, i;
 186	u32 bitmap_size, flags, expected_extent_count;
 187	u32 extent_count = 0;
 188	int done = 0, nr;
 189	int ret;
 190
 191	bitmap_size = free_space_bitmap_size(block_group->key.offset,
 192					     fs_info->sectorsize);
 193	bitmap = alloc_bitmap(bitmap_size);
 194	if (!bitmap) {
 195		ret = -ENOMEM;
 196		goto out;
 197	}
 198
 199	start = block_group->key.objectid;
 200	end = block_group->key.objectid + block_group->key.offset;
 201
 202	key.objectid = end - 1;
 203	key.type = (u8)-1;
 204	key.offset = (u64)-1;
 205
 206	while (!done) {
 207		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 208		if (ret)
 209			goto out;
 210
 211		leaf = path->nodes[0];
 212		nr = 0;
 213		path->slots[0]++;
 214		while (path->slots[0] > 0) {
 215			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
 216
 217			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
 218				ASSERT(found_key.objectid == block_group->key.objectid);
 219				ASSERT(found_key.offset == block_group->key.offset);
 220				done = 1;
 221				break;
 222			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
 223				u64 first, last;
 224
 225				ASSERT(found_key.objectid >= start);
 226				ASSERT(found_key.objectid < end);
 227				ASSERT(found_key.objectid + found_key.offset <= end);
 228
 229				first = div_u64(found_key.objectid - start,
 230						fs_info->sectorsize);
 231				last = div_u64(found_key.objectid + found_key.offset - start,
 232					       fs_info->sectorsize);
 233				le_bitmap_set(bitmap, first, last - first);
 234
 235				extent_count++;
 236				nr++;
 237				path->slots[0]--;
 238			} else {
 239				ASSERT(0);
 240			}
 241		}
 242
 243		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
 244		if (ret)
 245			goto out;
 246		btrfs_release_path(path);
 247	}
 248
 249	info = search_free_space_info(trans, fs_info, block_group, path, 1);
 250	if (IS_ERR(info)) {
 251		ret = PTR_ERR(info);
 252		goto out;
 253	}
 254	leaf = path->nodes[0];
 255	flags = btrfs_free_space_flags(leaf, info);
 256	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
 257	btrfs_set_free_space_flags(leaf, info, flags);
 258	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
 259	btrfs_mark_buffer_dirty(leaf);
 260	btrfs_release_path(path);
 261
 262	if (extent_count != expected_extent_count) {
 263		btrfs_err(fs_info,
 264			  "incorrect extent count for %llu; counted %u, expected %u",
 265			  block_group->key.objectid, extent_count,
 266			  expected_extent_count);
 267		ASSERT(0);
 268		ret = -EIO;
 269		goto out;
 270	}
 271
 272	bitmap_cursor = bitmap;
 273	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
 274	i = start;
 275	while (i < end) {
 276		unsigned long ptr;
 277		u64 extent_size;
 278		u32 data_size;
 279
 280		extent_size = min(end - i, bitmap_range);
 281		data_size = free_space_bitmap_size(extent_size,
 282						   fs_info->sectorsize);
 283
 284		key.objectid = i;
 285		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
 286		key.offset = extent_size;
 287
 288		ret = btrfs_insert_empty_item(trans, root, path, &key,
 289					      data_size);
 290		if (ret)
 291			goto out;
 292
 293		leaf = path->nodes[0];
 294		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 295		write_extent_buffer(leaf, bitmap_cursor, ptr,
 296				    data_size);
 297		btrfs_mark_buffer_dirty(leaf);
 298		btrfs_release_path(path);
 299
 300		i += extent_size;
 301		bitmap_cursor += data_size;
 302	}
 303
 304	ret = 0;
 305out:
 306	kvfree(bitmap);
 307	if (ret)
 308		btrfs_abort_transaction(trans, ret);
 309	return ret;
 310}
 311
 
 312int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 313				  struct btrfs_fs_info *fs_info,
 314				  struct btrfs_block_group_cache *block_group,
 315				  struct btrfs_path *path)
 316{
 317	struct btrfs_root *root = fs_info->free_space_root;
 
 318	struct btrfs_free_space_info *info;
 319	struct btrfs_key key, found_key;
 320	struct extent_buffer *leaf;
 321	u8 *bitmap;
 322	u64 start, end;
 323	/* Initialize to silence GCC. */
 324	u64 extent_start = 0;
 325	u64 offset;
 326	u32 bitmap_size, flags, expected_extent_count;
 327	int prev_bit = 0, bit, bitnr;
 328	u32 extent_count = 0;
 329	int done = 0, nr;
 330	int ret;
 331
 332	bitmap_size = free_space_bitmap_size(block_group->key.offset,
 333					     fs_info->sectorsize);
 334	bitmap = alloc_bitmap(bitmap_size);
 335	if (!bitmap) {
 336		ret = -ENOMEM;
 337		goto out;
 338	}
 339
 340	start = block_group->key.objectid;
 341	end = block_group->key.objectid + block_group->key.offset;
 342
 343	key.objectid = end - 1;
 344	key.type = (u8)-1;
 345	key.offset = (u64)-1;
 346
 347	while (!done) {
 348		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 349		if (ret)
 350			goto out;
 351
 352		leaf = path->nodes[0];
 353		nr = 0;
 354		path->slots[0]++;
 355		while (path->slots[0] > 0) {
 356			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
 357
 358			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
 359				ASSERT(found_key.objectid == block_group->key.objectid);
 360				ASSERT(found_key.offset == block_group->key.offset);
 361				done = 1;
 362				break;
 363			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
 364				unsigned long ptr;
 365				u8 *bitmap_cursor;
 366				u32 bitmap_pos, data_size;
 367
 368				ASSERT(found_key.objectid >= start);
 369				ASSERT(found_key.objectid < end);
 370				ASSERT(found_key.objectid + found_key.offset <= end);
 371
 372				bitmap_pos = div_u64(found_key.objectid - start,
 373						     fs_info->sectorsize *
 374						     BITS_PER_BYTE);
 375				bitmap_cursor = bitmap + bitmap_pos;
 376				data_size = free_space_bitmap_size(found_key.offset,
 377								   fs_info->sectorsize);
 378
 379				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
 380				read_extent_buffer(leaf, bitmap_cursor, ptr,
 381						   data_size);
 382
 383				nr++;
 384				path->slots[0]--;
 385			} else {
 386				ASSERT(0);
 387			}
 388		}
 389
 390		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
 391		if (ret)
 392			goto out;
 393		btrfs_release_path(path);
 394	}
 395
 396	info = search_free_space_info(trans, fs_info, block_group, path, 1);
 397	if (IS_ERR(info)) {
 398		ret = PTR_ERR(info);
 399		goto out;
 400	}
 401	leaf = path->nodes[0];
 402	flags = btrfs_free_space_flags(leaf, info);
 403	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
 404	btrfs_set_free_space_flags(leaf, info, flags);
 405	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
 406	btrfs_mark_buffer_dirty(leaf);
 407	btrfs_release_path(path);
 408
 409	offset = start;
 410	bitnr = 0;
 411	while (offset < end) {
 412		bit = !!le_test_bit(bitnr, bitmap);
 413		if (prev_bit == 0 && bit == 1) {
 414			extent_start = offset;
 415		} else if (prev_bit == 1 && bit == 0) {
 416			key.objectid = extent_start;
 417			key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 418			key.offset = offset - extent_start;
 419
 420			ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 421			if (ret)
 422				goto out;
 423			btrfs_release_path(path);
 424
 425			extent_count++;
 426		}
 427		prev_bit = bit;
 428		offset += fs_info->sectorsize;
 429		bitnr++;
 430	}
 431	if (prev_bit == 1) {
 432		key.objectid = extent_start;
 433		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 434		key.offset = end - extent_start;
 435
 436		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 437		if (ret)
 438			goto out;
 439		btrfs_release_path(path);
 440
 441		extent_count++;
 
 
 442	}
 443
 444	if (extent_count != expected_extent_count) {
 445		btrfs_err(fs_info,
 446			  "incorrect extent count for %llu; counted %u, expected %u",
 447			  block_group->key.objectid, extent_count,
 448			  expected_extent_count);
 449		ASSERT(0);
 450		ret = -EIO;
 451		goto out;
 452	}
 453
 454	ret = 0;
 455out:
 456	kvfree(bitmap);
 457	if (ret)
 458		btrfs_abort_transaction(trans, ret);
 459	return ret;
 460}
 461
 462static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
 463					  struct btrfs_fs_info *fs_info,
 464					  struct btrfs_block_group_cache *block_group,
 465					  struct btrfs_path *path,
 466					  int new_extents)
 467{
 468	struct btrfs_free_space_info *info;
 469	u32 flags;
 470	u32 extent_count;
 471	int ret = 0;
 472
 473	if (new_extents == 0)
 474		return 0;
 475
 476	info = search_free_space_info(trans, fs_info, block_group, path, 1);
 477	if (IS_ERR(info)) {
 478		ret = PTR_ERR(info);
 479		goto out;
 480	}
 481	flags = btrfs_free_space_flags(path->nodes[0], info);
 482	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
 483
 484	extent_count += new_extents;
 485	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
 486	btrfs_mark_buffer_dirty(path->nodes[0]);
 487	btrfs_release_path(path);
 488
 489	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 490	    extent_count > block_group->bitmap_high_thresh) {
 491		ret = convert_free_space_to_bitmaps(trans, fs_info, block_group,
 492						    path);
 493	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 494		   extent_count < block_group->bitmap_low_thresh) {
 495		ret = convert_free_space_to_extents(trans, fs_info, block_group,
 496						    path);
 497	}
 498
 499out:
 500	return ret;
 501}
 502
 503int free_space_test_bit(struct btrfs_block_group_cache *block_group,
 
 504			struct btrfs_path *path, u64 offset)
 505{
 506	struct extent_buffer *leaf;
 507	struct btrfs_key key;
 508	u64 found_start, found_end;
 509	unsigned long ptr, i;
 510
 511	leaf = path->nodes[0];
 512	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 513	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
 514
 515	found_start = key.objectid;
 516	found_end = key.objectid + key.offset;
 517	ASSERT(offset >= found_start && offset < found_end);
 518
 519	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 520	i = div_u64(offset - found_start,
 521		    block_group->fs_info->sectorsize);
 522	return !!extent_buffer_test_bit(leaf, ptr, i);
 523}
 524
 525static void free_space_set_bits(struct btrfs_block_group_cache *block_group,
 
 526				struct btrfs_path *path, u64 *start, u64 *size,
 527				int bit)
 528{
 529	struct btrfs_fs_info *fs_info = block_group->fs_info;
 530	struct extent_buffer *leaf;
 531	struct btrfs_key key;
 532	u64 end = *start + *size;
 533	u64 found_start, found_end;
 534	unsigned long ptr, first, last;
 535
 536	leaf = path->nodes[0];
 537	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 538	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
 539
 540	found_start = key.objectid;
 541	found_end = key.objectid + key.offset;
 542	ASSERT(*start >= found_start && *start < found_end);
 543	ASSERT(end > found_start);
 544
 545	if (end > found_end)
 546		end = found_end;
 547
 548	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 549	first = div_u64(*start - found_start, fs_info->sectorsize);
 550	last = div_u64(end - found_start, fs_info->sectorsize);
 551	if (bit)
 552		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
 553	else
 554		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
 555	btrfs_mark_buffer_dirty(leaf);
 556
 557	*size -= end - *start;
 558	*start = end;
 559}
 560
 561/*
 562 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
 563 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
 564 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
 565 * looking for.
 566 */
 567static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
 568				  struct btrfs_root *root, struct btrfs_path *p)
 569{
 570	struct btrfs_key key;
 571
 572	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
 573		p->slots[0]++;
 574		return 0;
 575	}
 576
 577	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
 578	btrfs_release_path(p);
 579
 580	key.objectid += key.offset;
 581	key.type = (u8)-1;
 582	key.offset = (u64)-1;
 583
 584	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
 585}
 586
 587/*
 588 * If remove is 1, then we are removing free space, thus clearing bits in the
 589 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
 590 * the bitmap.
 591 */
 592static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
 593				    struct btrfs_fs_info *fs_info,
 594				    struct btrfs_block_group_cache *block_group,
 595				    struct btrfs_path *path,
 596				    u64 start, u64 size, int remove)
 597{
 598	struct btrfs_root *root = fs_info->free_space_root;
 599	struct btrfs_key key;
 600	u64 end = start + size;
 601	u64 cur_start, cur_size;
 602	int prev_bit, next_bit;
 603	int new_extents;
 604	int ret;
 605
 606	/*
 607	 * Read the bit for the block immediately before the extent of space if
 608	 * that block is within the block group.
 609	 */
 610	if (start > block_group->key.objectid) {
 611		u64 prev_block = start - block_group->fs_info->sectorsize;
 612
 613		key.objectid = prev_block;
 614		key.type = (u8)-1;
 615		key.offset = (u64)-1;
 616
 617		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
 618		if (ret)
 619			goto out;
 620
 621		prev_bit = free_space_test_bit(block_group, path, prev_block);
 622
 623		/* The previous block may have been in the previous bitmap. */
 624		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 625		if (start >= key.objectid + key.offset) {
 626			ret = free_space_next_bitmap(trans, root, path);
 627			if (ret)
 628				goto out;
 629		}
 630	} else {
 631		key.objectid = start;
 632		key.type = (u8)-1;
 633		key.offset = (u64)-1;
 634
 635		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
 636		if (ret)
 637			goto out;
 638
 639		prev_bit = -1;
 640	}
 641
 642	/*
 643	 * Iterate over all of the bitmaps overlapped by the extent of space,
 644	 * clearing/setting bits as required.
 645	 */
 646	cur_start = start;
 647	cur_size = size;
 648	while (1) {
 649		free_space_set_bits(block_group, path, &cur_start, &cur_size,
 650				    !remove);
 651		if (cur_size == 0)
 652			break;
 653		ret = free_space_next_bitmap(trans, root, path);
 654		if (ret)
 655			goto out;
 656	}
 657
 658	/*
 659	 * Read the bit for the block immediately after the extent of space if
 660	 * that block is within the block group.
 661	 */
 662	if (end < block_group->key.objectid + block_group->key.offset) {
 663		/* The next block may be in the next bitmap. */
 664		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 665		if (end >= key.objectid + key.offset) {
 666			ret = free_space_next_bitmap(trans, root, path);
 667			if (ret)
 668				goto out;
 669		}
 670
 671		next_bit = free_space_test_bit(block_group, path, end);
 672	} else {
 673		next_bit = -1;
 674	}
 675
 676	if (remove) {
 677		new_extents = -1;
 678		if (prev_bit == 1) {
 679			/* Leftover on the left. */
 680			new_extents++;
 681		}
 682		if (next_bit == 1) {
 683			/* Leftover on the right. */
 684			new_extents++;
 685		}
 686	} else {
 687		new_extents = 1;
 688		if (prev_bit == 1) {
 689			/* Merging with neighbor on the left. */
 690			new_extents--;
 691		}
 692		if (next_bit == 1) {
 693			/* Merging with neighbor on the right. */
 694			new_extents--;
 695		}
 696	}
 697
 698	btrfs_release_path(path);
 699	ret = update_free_space_extent_count(trans, fs_info, block_group, path,
 700					     new_extents);
 701
 702out:
 703	return ret;
 704}
 705
 706static int remove_free_space_extent(struct btrfs_trans_handle *trans,
 707				    struct btrfs_fs_info *fs_info,
 708				    struct btrfs_block_group_cache *block_group,
 709				    struct btrfs_path *path,
 710				    u64 start, u64 size)
 711{
 712	struct btrfs_root *root = fs_info->free_space_root;
 713	struct btrfs_key key;
 714	u64 found_start, found_end;
 715	u64 end = start + size;
 716	int new_extents = -1;
 717	int ret;
 718
 719	key.objectid = start;
 720	key.type = (u8)-1;
 721	key.offset = (u64)-1;
 722
 723	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 724	if (ret)
 725		goto out;
 726
 727	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 728
 729	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
 730
 731	found_start = key.objectid;
 732	found_end = key.objectid + key.offset;
 733	ASSERT(start >= found_start && end <= found_end);
 734
 735	/*
 736	 * Okay, now that we've found the free space extent which contains the
 737	 * free space that we are removing, there are four cases:
 738	 *
 739	 * 1. We're using the whole extent: delete the key we found and
 740	 * decrement the free space extent count.
 741	 * 2. We are using part of the extent starting at the beginning: delete
 742	 * the key we found and insert a new key representing the leftover at
 743	 * the end. There is no net change in the number of extents.
 744	 * 3. We are using part of the extent ending at the end: delete the key
 745	 * we found and insert a new key representing the leftover at the
 746	 * beginning. There is no net change in the number of extents.
 747	 * 4. We are using part of the extent in the middle: delete the key we
 748	 * found and insert two new keys representing the leftovers on each
 749	 * side. Where we used to have one extent, we now have two, so increment
 750	 * the extent count. We may need to convert the block group to bitmaps
 751	 * as a result.
 752	 */
 753
 754	/* Delete the existing key (cases 1-4). */
 755	ret = btrfs_del_item(trans, root, path);
 756	if (ret)
 757		goto out;
 758
 759	/* Add a key for leftovers at the beginning (cases 3 and 4). */
 760	if (start > found_start) {
 761		key.objectid = found_start;
 762		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 763		key.offset = start - found_start;
 764
 765		btrfs_release_path(path);
 766		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 767		if (ret)
 768			goto out;
 769		new_extents++;
 770	}
 771
 772	/* Add a key for leftovers at the end (cases 2 and 4). */
 773	if (end < found_end) {
 774		key.objectid = end;
 775		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 776		key.offset = found_end - end;
 777
 778		btrfs_release_path(path);
 779		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 780		if (ret)
 781			goto out;
 782		new_extents++;
 783	}
 784
 785	btrfs_release_path(path);
 786	ret = update_free_space_extent_count(trans, fs_info, block_group, path,
 787					     new_extents);
 788
 789out:
 790	return ret;
 791}
 792
 
 793int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
 794				  struct btrfs_fs_info *fs_info,
 795				  struct btrfs_block_group_cache *block_group,
 796				  struct btrfs_path *path, u64 start, u64 size)
 797{
 798	struct btrfs_free_space_info *info;
 799	u32 flags;
 800	int ret;
 801
 802	if (block_group->needs_free_space) {
 803		ret = __add_block_group_free_space(trans, fs_info, block_group,
 804						   path);
 805		if (ret)
 806			return ret;
 807	}
 808
 809	info = search_free_space_info(NULL, fs_info, block_group, path, 0);
 810	if (IS_ERR(info))
 811		return PTR_ERR(info);
 812	flags = btrfs_free_space_flags(path->nodes[0], info);
 813	btrfs_release_path(path);
 814
 815	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
 816		return modify_free_space_bitmap(trans, fs_info, block_group,
 817						path, start, size, 1);
 818	} else {
 819		return remove_free_space_extent(trans, fs_info, block_group,
 820						path, start, size);
 821	}
 822}
 823
 824int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
 825				struct btrfs_fs_info *fs_info,
 826				u64 start, u64 size)
 827{
 828	struct btrfs_block_group_cache *block_group;
 829	struct btrfs_path *path;
 830	int ret;
 831
 832	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
 833		return 0;
 834
 835	path = btrfs_alloc_path();
 836	if (!path) {
 837		ret = -ENOMEM;
 838		goto out;
 839	}
 840
 841	block_group = btrfs_lookup_block_group(fs_info, start);
 842	if (!block_group) {
 843		ASSERT(0);
 844		ret = -ENOENT;
 845		goto out;
 846	}
 847
 848	mutex_lock(&block_group->free_space_lock);
 849	ret = __remove_from_free_space_tree(trans, fs_info, block_group, path,
 850					    start, size);
 851	mutex_unlock(&block_group->free_space_lock);
 852
 853	btrfs_put_block_group(block_group);
 854out:
 855	btrfs_free_path(path);
 856	if (ret)
 857		btrfs_abort_transaction(trans, ret);
 858	return ret;
 859}
 860
 861static int add_free_space_extent(struct btrfs_trans_handle *trans,
 862				 struct btrfs_fs_info *fs_info,
 863				 struct btrfs_block_group_cache *block_group,
 864				 struct btrfs_path *path,
 865				 u64 start, u64 size)
 866{
 867	struct btrfs_root *root = fs_info->free_space_root;
 868	struct btrfs_key key, new_key;
 869	u64 found_start, found_end;
 870	u64 end = start + size;
 871	int new_extents = 1;
 872	int ret;
 873
 874	/*
 875	 * We are adding a new extent of free space, but we need to merge
 876	 * extents. There are four cases here:
 877	 *
 878	 * 1. The new extent does not have any immediate neighbors to merge
 879	 * with: add the new key and increment the free space extent count. We
 880	 * may need to convert the block group to bitmaps as a result.
 881	 * 2. The new extent has an immediate neighbor before it: remove the
 882	 * previous key and insert a new key combining both of them. There is no
 883	 * net change in the number of extents.
 884	 * 3. The new extent has an immediate neighbor after it: remove the next
 885	 * key and insert a new key combining both of them. There is no net
 886	 * change in the number of extents.
 887	 * 4. The new extent has immediate neighbors on both sides: remove both
 888	 * of the keys and insert a new key combining all of them. Where we used
 889	 * to have two extents, we now have one, so decrement the extent count.
 890	 */
 891
 892	new_key.objectid = start;
 893	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 894	new_key.offset = size;
 895
 896	/* Search for a neighbor on the left. */
 897	if (start == block_group->key.objectid)
 898		goto right;
 899	key.objectid = start - 1;
 900	key.type = (u8)-1;
 901	key.offset = (u64)-1;
 902
 903	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 904	if (ret)
 905		goto out;
 906
 907	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 908
 909	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
 910		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
 911		btrfs_release_path(path);
 912		goto right;
 913	}
 914
 915	found_start = key.objectid;
 916	found_end = key.objectid + key.offset;
 917	ASSERT(found_start >= block_group->key.objectid &&
 918	       found_end > block_group->key.objectid);
 919	ASSERT(found_start < start && found_end <= start);
 920
 921	/*
 922	 * Delete the neighbor on the left and absorb it into the new key (cases
 923	 * 2 and 4).
 924	 */
 925	if (found_end == start) {
 926		ret = btrfs_del_item(trans, root, path);
 927		if (ret)
 928			goto out;
 929		new_key.objectid = found_start;
 930		new_key.offset += key.offset;
 931		new_extents--;
 932	}
 933	btrfs_release_path(path);
 934
 935right:
 936	/* Search for a neighbor on the right. */
 937	if (end == block_group->key.objectid + block_group->key.offset)
 938		goto insert;
 939	key.objectid = end;
 940	key.type = (u8)-1;
 941	key.offset = (u64)-1;
 942
 943	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 944	if (ret)
 945		goto out;
 946
 947	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 948
 949	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
 950		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
 951		btrfs_release_path(path);
 952		goto insert;
 953	}
 954
 955	found_start = key.objectid;
 956	found_end = key.objectid + key.offset;
 957	ASSERT(found_start >= block_group->key.objectid &&
 958	       found_end > block_group->key.objectid);
 959	ASSERT((found_start < start && found_end <= start) ||
 960	       (found_start >= end && found_end > end));
 961
 962	/*
 963	 * Delete the neighbor on the right and absorb it into the new key
 964	 * (cases 3 and 4).
 965	 */
 966	if (found_start == end) {
 967		ret = btrfs_del_item(trans, root, path);
 968		if (ret)
 969			goto out;
 970		new_key.offset += key.offset;
 971		new_extents--;
 972	}
 973	btrfs_release_path(path);
 974
 975insert:
 976	/* Insert the new key (cases 1-4). */
 977	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
 978	if (ret)
 979		goto out;
 980
 981	btrfs_release_path(path);
 982	ret = update_free_space_extent_count(trans, fs_info, block_group, path,
 983					     new_extents);
 984
 985out:
 986	return ret;
 987}
 988
 
 989int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
 990			     struct btrfs_fs_info *fs_info,
 991			     struct btrfs_block_group_cache *block_group,
 992			     struct btrfs_path *path, u64 start, u64 size)
 993{
 994	struct btrfs_free_space_info *info;
 995	u32 flags;
 996	int ret;
 997
 998	if (block_group->needs_free_space) {
 999		ret = __add_block_group_free_space(trans, fs_info, block_group,
1000						   path);
1001		if (ret)
1002			return ret;
1003	}
1004
1005	info = search_free_space_info(NULL, fs_info, block_group, path, 0);
1006	if (IS_ERR(info))
1007		return PTR_ERR(info);
1008	flags = btrfs_free_space_flags(path->nodes[0], info);
1009	btrfs_release_path(path);
1010
1011	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
1012		return modify_free_space_bitmap(trans, fs_info, block_group,
1013						path, start, size, 0);
1014	} else {
1015		return add_free_space_extent(trans, fs_info, block_group, path,
1016					     start, size);
1017	}
1018}
1019
1020int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1021			   struct btrfs_fs_info *fs_info,
1022			   u64 start, u64 size)
1023{
1024	struct btrfs_block_group_cache *block_group;
1025	struct btrfs_path *path;
1026	int ret;
1027
1028	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1029		return 0;
1030
1031	path = btrfs_alloc_path();
1032	if (!path) {
1033		ret = -ENOMEM;
1034		goto out;
1035	}
1036
1037	block_group = btrfs_lookup_block_group(fs_info, start);
1038	if (!block_group) {
1039		ASSERT(0);
1040		ret = -ENOENT;
1041		goto out;
1042	}
1043
1044	mutex_lock(&block_group->free_space_lock);
1045	ret = __add_to_free_space_tree(trans, fs_info, block_group, path, start,
1046				       size);
1047	mutex_unlock(&block_group->free_space_lock);
1048
1049	btrfs_put_block_group(block_group);
1050out:
1051	btrfs_free_path(path);
1052	if (ret)
1053		btrfs_abort_transaction(trans, ret);
1054	return ret;
1055}
1056
1057/*
1058 * Populate the free space tree by walking the extent tree. Operations on the
1059 * extent tree that happen as a result of writes to the free space tree will go
1060 * through the normal add/remove hooks.
1061 */
1062static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1063				    struct btrfs_fs_info *fs_info,
1064				    struct btrfs_block_group_cache *block_group)
1065{
1066	struct btrfs_root *extent_root = fs_info->extent_root;
1067	struct btrfs_path *path, *path2;
1068	struct btrfs_key key;
1069	u64 start, end;
1070	int ret;
1071
1072	path = btrfs_alloc_path();
1073	if (!path)
1074		return -ENOMEM;
1075	path->reada = 1;
1076
1077	path2 = btrfs_alloc_path();
1078	if (!path2) {
1079		btrfs_free_path(path);
1080		return -ENOMEM;
1081	}
1082
1083	ret = add_new_free_space_info(trans, fs_info, block_group, path2);
1084	if (ret)
1085		goto out;
1086
1087	mutex_lock(&block_group->free_space_lock);
1088
1089	/*
1090	 * Iterate through all of the extent and metadata items in this block
1091	 * group, adding the free space between them and the free space at the
1092	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1093	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1094	 * contained in.
1095	 */
1096	key.objectid = block_group->key.objectid;
1097	key.type = BTRFS_EXTENT_ITEM_KEY;
1098	key.offset = 0;
1099
 
1100	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1101	if (ret < 0)
1102		goto out_locked;
1103	ASSERT(ret == 0);
1104
1105	start = block_group->key.objectid;
1106	end = block_group->key.objectid + block_group->key.offset;
1107	while (1) {
1108		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1109
1110		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1111		    key.type == BTRFS_METADATA_ITEM_KEY) {
1112			if (key.objectid >= end)
1113				break;
1114
1115			if (start < key.objectid) {
1116				ret = __add_to_free_space_tree(trans, fs_info,
1117							       block_group,
1118							       path2, start,
1119							       key.objectid -
1120							       start);
1121				if (ret)
1122					goto out_locked;
1123			}
1124			start = key.objectid;
1125			if (key.type == BTRFS_METADATA_ITEM_KEY)
1126				start += fs_info->nodesize;
1127			else
1128				start += key.offset;
1129		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1130			if (key.objectid != block_group->key.objectid)
1131				break;
1132		}
1133
1134		ret = btrfs_next_item(extent_root, path);
1135		if (ret < 0)
1136			goto out_locked;
1137		if (ret)
1138			break;
1139	}
1140	if (start < end) {
1141		ret = __add_to_free_space_tree(trans, fs_info, block_group,
1142					       path2, start, end - start);
1143		if (ret)
1144			goto out_locked;
1145	}
1146
1147	ret = 0;
1148out_locked:
1149	mutex_unlock(&block_group->free_space_lock);
1150out:
1151	btrfs_free_path(path2);
1152	btrfs_free_path(path);
1153	return ret;
1154}
1155
1156int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1157{
1158	struct btrfs_trans_handle *trans;
1159	struct btrfs_root *tree_root = fs_info->tree_root;
1160	struct btrfs_root *free_space_root;
1161	struct btrfs_block_group_cache *block_group;
1162	struct rb_node *node;
1163	int ret;
1164
1165	trans = btrfs_start_transaction(tree_root, 0);
1166	if (IS_ERR(trans))
1167		return PTR_ERR(trans);
1168
1169	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1170	free_space_root = btrfs_create_tree(trans, fs_info,
 
1171					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1172	if (IS_ERR(free_space_root)) {
1173		ret = PTR_ERR(free_space_root);
1174		goto abort;
1175	}
1176	fs_info->free_space_root = free_space_root;
 
 
 
 
1177
1178	node = rb_first(&fs_info->block_group_cache_tree);
1179	while (node) {
1180		block_group = rb_entry(node, struct btrfs_block_group_cache,
1181				       cache_node);
1182		ret = populate_free_space_tree(trans, fs_info, block_group);
1183		if (ret)
1184			goto abort;
1185		node = rb_next(node);
1186	}
1187
1188	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1189	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1190	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1191
1192	ret = btrfs_commit_transaction(trans);
1193	if (ret)
1194		return ret;
1195
1196	return 0;
 
 
 
 
 
1197
1198abort:
1199	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
 
1200	btrfs_abort_transaction(trans, ret);
1201	btrfs_end_transaction(trans);
1202	return ret;
1203}
1204
1205static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1206				 struct btrfs_root *root)
1207{
1208	struct btrfs_path *path;
1209	struct btrfs_key key;
1210	int nr;
1211	int ret;
1212
1213	path = btrfs_alloc_path();
1214	if (!path)
1215		return -ENOMEM;
1216
1217	path->leave_spinning = 1;
1218
1219	key.objectid = 0;
1220	key.type = 0;
1221	key.offset = 0;
1222
1223	while (1) {
1224		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1225		if (ret < 0)
1226			goto out;
1227
1228		nr = btrfs_header_nritems(path->nodes[0]);
1229		if (!nr)
1230			break;
1231
1232		path->slots[0] = 0;
1233		ret = btrfs_del_items(trans, root, path, 0, nr);
1234		if (ret)
1235			goto out;
1236
1237		btrfs_release_path(path);
1238	}
1239
1240	ret = 0;
1241out:
1242	btrfs_free_path(path);
1243	return ret;
1244}
1245
1246int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
1247{
1248	struct btrfs_trans_handle *trans;
1249	struct btrfs_root *tree_root = fs_info->tree_root;
1250	struct btrfs_root *free_space_root = fs_info->free_space_root;
 
 
 
 
 
1251	int ret;
1252
1253	trans = btrfs_start_transaction(tree_root, 0);
1254	if (IS_ERR(trans))
1255		return PTR_ERR(trans);
1256
1257	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1258	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1259	fs_info->free_space_root = NULL;
1260
1261	ret = clear_free_space_tree(trans, free_space_root);
1262	if (ret)
1263		goto abort;
1264
1265	ret = btrfs_del_root(trans, tree_root, &free_space_root->root_key);
1266	if (ret)
1267		goto abort;
1268
 
 
 
1269	list_del(&free_space_root->dirty_list);
 
1270
1271	btrfs_tree_lock(free_space_root->node);
1272	clean_tree_block(trans, fs_info, free_space_root->node);
1273	btrfs_tree_unlock(free_space_root->node);
1274	btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
1275			      0, 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1276
1277	free_extent_buffer(free_space_root->node);
1278	free_extent_buffer(free_space_root->commit_root);
1279	kfree(free_space_root);
1280
1281	ret = btrfs_commit_transaction(trans);
 
 
 
1282	if (ret)
1283		return ret;
 
 
 
 
 
 
 
 
 
 
 
 
1284
1285	return 0;
 
 
1286
 
 
 
1287abort:
1288	btrfs_abort_transaction(trans, ret);
1289	btrfs_end_transaction(trans);
1290	return ret;
1291}
1292
1293static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1294					struct btrfs_fs_info *fs_info,
1295					struct btrfs_block_group_cache *block_group,
1296					struct btrfs_path *path)
1297{
1298	u64 start, end;
1299	int ret;
1300
1301	start = block_group->key.objectid;
1302	end = block_group->key.objectid + block_group->key.offset;
1303
1304	block_group->needs_free_space = 0;
1305
1306	ret = add_new_free_space_info(trans, fs_info, block_group, path);
1307	if (ret)
1308		return ret;
1309
1310	return __add_to_free_space_tree(trans, fs_info, block_group, path,
1311					block_group->key.objectid,
1312					block_group->key.offset);
1313}
1314
1315int add_block_group_free_space(struct btrfs_trans_handle *trans,
1316			       struct btrfs_fs_info *fs_info,
1317			       struct btrfs_block_group_cache *block_group)
1318{
 
1319	struct btrfs_path *path = NULL;
1320	int ret = 0;
1321
1322	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1323		return 0;
1324
1325	mutex_lock(&block_group->free_space_lock);
1326	if (!block_group->needs_free_space)
1327		goto out;
1328
1329	path = btrfs_alloc_path();
1330	if (!path) {
1331		ret = -ENOMEM;
1332		goto out;
1333	}
1334
1335	ret = __add_block_group_free_space(trans, fs_info, block_group, path);
1336
1337out:
1338	btrfs_free_path(path);
1339	mutex_unlock(&block_group->free_space_lock);
1340	if (ret)
1341		btrfs_abort_transaction(trans, ret);
1342	return ret;
1343}
1344
1345int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1346				  struct btrfs_fs_info *fs_info,
1347				  struct btrfs_block_group_cache *block_group)
1348{
1349	struct btrfs_root *root = fs_info->free_space_root;
1350	struct btrfs_path *path;
1351	struct btrfs_key key, found_key;
1352	struct extent_buffer *leaf;
1353	u64 start, end;
1354	int done = 0, nr;
1355	int ret;
1356
1357	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1358		return 0;
1359
1360	if (block_group->needs_free_space) {
1361		/* We never added this block group to the free space tree. */
1362		return 0;
1363	}
1364
1365	path = btrfs_alloc_path();
1366	if (!path) {
1367		ret = -ENOMEM;
1368		goto out;
1369	}
1370
1371	start = block_group->key.objectid;
1372	end = block_group->key.objectid + block_group->key.offset;
1373
1374	key.objectid = end - 1;
1375	key.type = (u8)-1;
1376	key.offset = (u64)-1;
1377
1378	while (!done) {
1379		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1380		if (ret)
1381			goto out;
1382
1383		leaf = path->nodes[0];
1384		nr = 0;
1385		path->slots[0]++;
1386		while (path->slots[0] > 0) {
1387			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1388
1389			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1390				ASSERT(found_key.objectid == block_group->key.objectid);
1391				ASSERT(found_key.offset == block_group->key.offset);
1392				done = 1;
1393				nr++;
1394				path->slots[0]--;
1395				break;
1396			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1397				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1398				ASSERT(found_key.objectid >= start);
1399				ASSERT(found_key.objectid < end);
1400				ASSERT(found_key.objectid + found_key.offset <= end);
1401				nr++;
1402				path->slots[0]--;
1403			} else {
1404				ASSERT(0);
1405			}
1406		}
1407
1408		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1409		if (ret)
1410			goto out;
1411		btrfs_release_path(path);
1412	}
1413
1414	ret = 0;
1415out:
1416	btrfs_free_path(path);
1417	if (ret)
1418		btrfs_abort_transaction(trans, ret);
1419	return ret;
1420}
1421
1422static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1423				   struct btrfs_path *path,
1424				   u32 expected_extent_count)
1425{
1426	struct btrfs_block_group_cache *block_group;
1427	struct btrfs_fs_info *fs_info;
1428	struct btrfs_root *root;
1429	struct btrfs_key key;
1430	int prev_bit = 0, bit;
1431	/* Initialize to silence GCC. */
1432	u64 extent_start = 0;
1433	u64 end, offset;
1434	u64 total_found = 0;
1435	u32 extent_count = 0;
1436	int ret;
1437
1438	block_group = caching_ctl->block_group;
1439	fs_info = block_group->fs_info;
1440	root = fs_info->free_space_root;
1441
1442	end = block_group->key.objectid + block_group->key.offset;
1443
1444	while (1) {
1445		ret = btrfs_next_item(root, path);
1446		if (ret < 0)
1447			goto out;
1448		if (ret)
1449			break;
1450
1451		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1452
1453		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1454			break;
1455
1456		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1457		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1458
1459		caching_ctl->progress = key.objectid;
1460
1461		offset = key.objectid;
1462		while (offset < key.objectid + key.offset) {
1463			bit = free_space_test_bit(block_group, path, offset);
1464			if (prev_bit == 0 && bit == 1) {
1465				extent_start = offset;
1466			} else if (prev_bit == 1 && bit == 0) {
1467				total_found += add_new_free_space(block_group,
1468								  fs_info,
1469								  extent_start,
1470								  offset);
 
 
 
 
 
1471				if (total_found > CACHING_CTL_WAKE_UP) {
1472					total_found = 0;
1473					wake_up(&caching_ctl->wait);
1474				}
1475				extent_count++;
1476			}
1477			prev_bit = bit;
1478			offset += fs_info->sectorsize;
1479		}
1480	}
1481	if (prev_bit == 1) {
1482		total_found += add_new_free_space(block_group, fs_info,
1483						  extent_start, end);
 
1484		extent_count++;
1485	}
1486
1487	if (extent_count != expected_extent_count) {
1488		btrfs_err(fs_info,
1489			  "incorrect extent count for %llu; counted %u, expected %u",
1490			  block_group->key.objectid, extent_count,
1491			  expected_extent_count);
1492		ASSERT(0);
1493		ret = -EIO;
1494		goto out;
1495	}
1496
1497	caching_ctl->progress = (u64)-1;
1498
1499	ret = 0;
1500out:
1501	return ret;
1502}
1503
1504static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1505				   struct btrfs_path *path,
1506				   u32 expected_extent_count)
1507{
1508	struct btrfs_block_group_cache *block_group;
1509	struct btrfs_fs_info *fs_info;
1510	struct btrfs_root *root;
1511	struct btrfs_key key;
1512	u64 end;
1513	u64 total_found = 0;
1514	u32 extent_count = 0;
1515	int ret;
1516
1517	block_group = caching_ctl->block_group;
1518	fs_info = block_group->fs_info;
1519	root = fs_info->free_space_root;
1520
1521	end = block_group->key.objectid + block_group->key.offset;
1522
1523	while (1) {
 
 
1524		ret = btrfs_next_item(root, path);
1525		if (ret < 0)
1526			goto out;
1527		if (ret)
1528			break;
1529
1530		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1531
1532		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1533			break;
1534
1535		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1536		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1537
1538		caching_ctl->progress = key.objectid;
1539
1540		total_found += add_new_free_space(block_group, fs_info,
1541						  key.objectid,
1542						  key.objectid + key.offset);
 
1543		if (total_found > CACHING_CTL_WAKE_UP) {
1544			total_found = 0;
1545			wake_up(&caching_ctl->wait);
1546		}
1547		extent_count++;
1548	}
1549
1550	if (extent_count != expected_extent_count) {
1551		btrfs_err(fs_info,
1552			  "incorrect extent count for %llu; counted %u, expected %u",
1553			  block_group->key.objectid, extent_count,
1554			  expected_extent_count);
1555		ASSERT(0);
1556		ret = -EIO;
1557		goto out;
1558	}
1559
1560	caching_ctl->progress = (u64)-1;
1561
1562	ret = 0;
1563out:
1564	return ret;
1565}
1566
1567int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1568{
1569	struct btrfs_block_group_cache *block_group;
1570	struct btrfs_fs_info *fs_info;
1571	struct btrfs_free_space_info *info;
1572	struct btrfs_path *path;
1573	u32 extent_count, flags;
1574	int ret;
1575
1576	block_group = caching_ctl->block_group;
1577	fs_info = block_group->fs_info;
1578
1579	path = btrfs_alloc_path();
1580	if (!path)
1581		return -ENOMEM;
1582
1583	/*
1584	 * Just like caching_thread() doesn't want to deadlock on the extent
1585	 * tree, we don't want to deadlock on the free space tree.
1586	 */
1587	path->skip_locking = 1;
1588	path->search_commit_root = 1;
1589	path->reada = 1;
1590
1591	info = search_free_space_info(NULL, fs_info, block_group, path, 0);
1592	if (IS_ERR(info)) {
1593		ret = PTR_ERR(info);
1594		goto out;
1595	}
1596	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1597	flags = btrfs_free_space_flags(path->nodes[0], info);
1598
1599	/*
1600	 * We left path pointing to the free space info item, so now
1601	 * load_free_space_foo can just iterate through the free space tree from
1602	 * there.
1603	 */
1604	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1605		ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1606	else
1607		ret = load_free_space_extents(caching_ctl, path, extent_count);
1608
1609out:
1610	btrfs_free_path(path);
1611	return ret;
1612}