Linux Audio

Check our new training course

Loading...
v6.2
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2015 Facebook.  All rights reserved.
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/sched/mm.h>
   8#include "messages.h"
   9#include "ctree.h"
  10#include "disk-io.h"
  11#include "locking.h"
  12#include "free-space-tree.h"
  13#include "transaction.h"
  14#include "block-group.h"
  15#include "fs.h"
  16#include "accessors.h"
  17#include "extent-tree.h"
  18#include "root-tree.h"
  19
  20static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
  21					struct btrfs_block_group *block_group,
  22					struct btrfs_path *path);
  23
  24static struct btrfs_root *btrfs_free_space_root(
  25				struct btrfs_block_group *block_group)
  26{
  27	struct btrfs_key key = {
  28		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
  29		.type = BTRFS_ROOT_ITEM_KEY,
  30		.offset = 0,
  31	};
  32
  33	if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
  34		key.offset = block_group->global_root_id;
  35	return btrfs_global_root(block_group->fs_info, &key);
  36}
  37
  38void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
  39{
  40	u32 bitmap_range;
  41	size_t bitmap_size;
  42	u64 num_bitmaps, total_bitmap_size;
  43
  44	if (WARN_ON(cache->length == 0))
  45		btrfs_warn(cache->fs_info, "block group %llu length is zero",
  46			   cache->start);
  47
  48	/*
  49	 * We convert to bitmaps when the disk space required for using extents
  50	 * exceeds that required for using bitmaps.
  51	 */
  52	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
  53	num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
  54	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
  55	total_bitmap_size = num_bitmaps * bitmap_size;
  56	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
  57					    sizeof(struct btrfs_item));
  58
  59	/*
  60	 * We allow for a small buffer between the high threshold and low
  61	 * threshold to avoid thrashing back and forth between the two formats.
  62	 */
  63	if (cache->bitmap_high_thresh > 100)
  64		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
  65	else
  66		cache->bitmap_low_thresh = 0;
  67}
  68
  69static int add_new_free_space_info(struct btrfs_trans_handle *trans,
  70				   struct btrfs_block_group *block_group,
  71				   struct btrfs_path *path)
  72{
  73	struct btrfs_root *root = btrfs_free_space_root(block_group);
  74	struct btrfs_free_space_info *info;
  75	struct btrfs_key key;
  76	struct extent_buffer *leaf;
  77	int ret;
  78
  79	key.objectid = block_group->start;
  80	key.type = BTRFS_FREE_SPACE_INFO_KEY;
  81	key.offset = block_group->length;
  82
  83	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
  84	if (ret)
  85		goto out;
  86
  87	leaf = path->nodes[0];
  88	info = btrfs_item_ptr(leaf, path->slots[0],
  89			      struct btrfs_free_space_info);
  90	btrfs_set_free_space_extent_count(leaf, info, 0);
  91	btrfs_set_free_space_flags(leaf, info, 0);
  92	btrfs_mark_buffer_dirty(leaf);
  93
  94	ret = 0;
  95out:
  96	btrfs_release_path(path);
  97	return ret;
  98}
  99
 100EXPORT_FOR_TESTS
 101struct btrfs_free_space_info *search_free_space_info(
 102		struct btrfs_trans_handle *trans,
 103		struct btrfs_block_group *block_group,
 104		struct btrfs_path *path, int cow)
 105{
 106	struct btrfs_fs_info *fs_info = block_group->fs_info;
 107	struct btrfs_root *root = btrfs_free_space_root(block_group);
 108	struct btrfs_key key;
 109	int ret;
 110
 111	key.objectid = block_group->start;
 112	key.type = BTRFS_FREE_SPACE_INFO_KEY;
 113	key.offset = block_group->length;
 114
 115	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
 116	if (ret < 0)
 117		return ERR_PTR(ret);
 118	if (ret != 0) {
 119		btrfs_warn(fs_info, "missing free space info for %llu",
 120			   block_group->start);
 121		ASSERT(0);
 122		return ERR_PTR(-ENOENT);
 123	}
 124
 125	return btrfs_item_ptr(path->nodes[0], path->slots[0],
 126			      struct btrfs_free_space_info);
 127}
 128
 129/*
 130 * btrfs_search_slot() but we're looking for the greatest key less than the
 131 * passed key.
 132 */
 133static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
 134				  struct btrfs_root *root,
 135				  struct btrfs_key *key, struct btrfs_path *p,
 136				  int ins_len, int cow)
 137{
 138	int ret;
 139
 140	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
 141	if (ret < 0)
 142		return ret;
 143
 144	if (ret == 0) {
 145		ASSERT(0);
 146		return -EIO;
 147	}
 148
 149	if (p->slots[0] == 0) {
 150		ASSERT(0);
 151		return -EIO;
 152	}
 153	p->slots[0]--;
 154
 155	return 0;
 156}
 157
 158static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
 159					 u64 size)
 160{
 161	return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
 162}
 163
 164static unsigned long *alloc_bitmap(u32 bitmap_size)
 165{
 166	unsigned long *ret;
 167	unsigned int nofs_flag;
 168	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
 169
 170	/*
 171	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
 172	 * into the filesystem as the free space bitmap can be modified in the
 173	 * critical section of a transaction commit.
 174	 *
 175	 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
 176	 * know that recursion is unsafe.
 177	 */
 178	nofs_flag = memalloc_nofs_save();
 179	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
 180	memalloc_nofs_restore(nofs_flag);
 181	return ret;
 182}
 183
 184static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
 185{
 186	u8 *p = ((u8 *)map) + BIT_BYTE(start);
 187	const unsigned int size = start + len;
 188	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
 189	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
 190
 191	while (len - bits_to_set >= 0) {
 192		*p |= mask_to_set;
 193		len -= bits_to_set;
 194		bits_to_set = BITS_PER_BYTE;
 195		mask_to_set = ~0;
 196		p++;
 197	}
 198	if (len) {
 199		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
 200		*p |= mask_to_set;
 201	}
 202}
 203
 204EXPORT_FOR_TESTS
 205int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 206				  struct btrfs_block_group *block_group,
 207				  struct btrfs_path *path)
 208{
 209	struct btrfs_fs_info *fs_info = trans->fs_info;
 210	struct btrfs_root *root = btrfs_free_space_root(block_group);
 211	struct btrfs_free_space_info *info;
 212	struct btrfs_key key, found_key;
 213	struct extent_buffer *leaf;
 214	unsigned long *bitmap;
 215	char *bitmap_cursor;
 216	u64 start, end;
 217	u64 bitmap_range, i;
 218	u32 bitmap_size, flags, expected_extent_count;
 219	u32 extent_count = 0;
 220	int done = 0, nr;
 221	int ret;
 222
 223	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
 
 224	bitmap = alloc_bitmap(bitmap_size);
 225	if (!bitmap) {
 226		ret = -ENOMEM;
 227		goto out;
 228	}
 229
 230	start = block_group->start;
 231	end = block_group->start + block_group->length;
 232
 233	key.objectid = end - 1;
 234	key.type = (u8)-1;
 235	key.offset = (u64)-1;
 236
 237	while (!done) {
 238		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 239		if (ret)
 240			goto out;
 241
 242		leaf = path->nodes[0];
 243		nr = 0;
 244		path->slots[0]++;
 245		while (path->slots[0] > 0) {
 246			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
 247
 248			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
 249				ASSERT(found_key.objectid == block_group->start);
 250				ASSERT(found_key.offset == block_group->length);
 251				done = 1;
 252				break;
 253			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
 254				u64 first, last;
 255
 256				ASSERT(found_key.objectid >= start);
 257				ASSERT(found_key.objectid < end);
 258				ASSERT(found_key.objectid + found_key.offset <= end);
 259
 260				first = div_u64(found_key.objectid - start,
 261						fs_info->sectorsize);
 262				last = div_u64(found_key.objectid + found_key.offset - start,
 263					       fs_info->sectorsize);
 264				le_bitmap_set(bitmap, first, last - first);
 265
 266				extent_count++;
 267				nr++;
 268				path->slots[0]--;
 269			} else {
 270				ASSERT(0);
 271			}
 272		}
 273
 274		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
 275		if (ret)
 276			goto out;
 277		btrfs_release_path(path);
 278	}
 279
 280	info = search_free_space_info(trans, block_group, path, 1);
 281	if (IS_ERR(info)) {
 282		ret = PTR_ERR(info);
 283		goto out;
 284	}
 285	leaf = path->nodes[0];
 286	flags = btrfs_free_space_flags(leaf, info);
 287	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
 288	btrfs_set_free_space_flags(leaf, info, flags);
 289	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
 290	btrfs_mark_buffer_dirty(leaf);
 291	btrfs_release_path(path);
 292
 293	if (extent_count != expected_extent_count) {
 294		btrfs_err(fs_info,
 295			  "incorrect extent count for %llu; counted %u, expected %u",
 296			  block_group->start, extent_count,
 297			  expected_extent_count);
 298		ASSERT(0);
 299		ret = -EIO;
 300		goto out;
 301	}
 302
 303	bitmap_cursor = (char *)bitmap;
 304	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
 305	i = start;
 306	while (i < end) {
 307		unsigned long ptr;
 308		u64 extent_size;
 309		u32 data_size;
 310
 311		extent_size = min(end - i, bitmap_range);
 312		data_size = free_space_bitmap_size(fs_info, extent_size);
 
 313
 314		key.objectid = i;
 315		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
 316		key.offset = extent_size;
 317
 318		ret = btrfs_insert_empty_item(trans, root, path, &key,
 319					      data_size);
 320		if (ret)
 321			goto out;
 322
 323		leaf = path->nodes[0];
 324		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 325		write_extent_buffer(leaf, bitmap_cursor, ptr,
 326				    data_size);
 327		btrfs_mark_buffer_dirty(leaf);
 328		btrfs_release_path(path);
 329
 330		i += extent_size;
 331		bitmap_cursor += data_size;
 332	}
 333
 334	ret = 0;
 335out:
 336	kvfree(bitmap);
 337	if (ret)
 338		btrfs_abort_transaction(trans, ret);
 339	return ret;
 340}
 341
 342EXPORT_FOR_TESTS
 343int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 344				  struct btrfs_block_group *block_group,
 345				  struct btrfs_path *path)
 346{
 347	struct btrfs_fs_info *fs_info = trans->fs_info;
 348	struct btrfs_root *root = btrfs_free_space_root(block_group);
 349	struct btrfs_free_space_info *info;
 350	struct btrfs_key key, found_key;
 351	struct extent_buffer *leaf;
 352	unsigned long *bitmap;
 353	u64 start, end;
 354	u32 bitmap_size, flags, expected_extent_count;
 355	unsigned long nrbits, start_bit, end_bit;
 356	u32 extent_count = 0;
 357	int done = 0, nr;
 358	int ret;
 359
 360	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
 
 361	bitmap = alloc_bitmap(bitmap_size);
 362	if (!bitmap) {
 363		ret = -ENOMEM;
 364		goto out;
 365	}
 366
 367	start = block_group->start;
 368	end = block_group->start + block_group->length;
 369
 370	key.objectid = end - 1;
 371	key.type = (u8)-1;
 372	key.offset = (u64)-1;
 373
 374	while (!done) {
 375		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 376		if (ret)
 377			goto out;
 378
 379		leaf = path->nodes[0];
 380		nr = 0;
 381		path->slots[0]++;
 382		while (path->slots[0] > 0) {
 383			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
 384
 385			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
 386				ASSERT(found_key.objectid == block_group->start);
 387				ASSERT(found_key.offset == block_group->length);
 388				done = 1;
 389				break;
 390			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
 391				unsigned long ptr;
 392				char *bitmap_cursor;
 393				u32 bitmap_pos, data_size;
 394
 395				ASSERT(found_key.objectid >= start);
 396				ASSERT(found_key.objectid < end);
 397				ASSERT(found_key.objectid + found_key.offset <= end);
 398
 399				bitmap_pos = div_u64(found_key.objectid - start,
 400						     fs_info->sectorsize *
 401						     BITS_PER_BYTE);
 402				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
 403				data_size = free_space_bitmap_size(fs_info,
 404								found_key.offset);
 405
 406				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
 407				read_extent_buffer(leaf, bitmap_cursor, ptr,
 408						   data_size);
 409
 410				nr++;
 411				path->slots[0]--;
 412			} else {
 413				ASSERT(0);
 414			}
 415		}
 416
 417		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
 418		if (ret)
 419			goto out;
 420		btrfs_release_path(path);
 421	}
 422
 423	info = search_free_space_info(trans, block_group, path, 1);
 424	if (IS_ERR(info)) {
 425		ret = PTR_ERR(info);
 426		goto out;
 427	}
 428	leaf = path->nodes[0];
 429	flags = btrfs_free_space_flags(leaf, info);
 430	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
 431	btrfs_set_free_space_flags(leaf, info, flags);
 432	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
 433	btrfs_mark_buffer_dirty(leaf);
 434	btrfs_release_path(path);
 435
 436	nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
 437	start_bit = find_next_bit_le(bitmap, nrbits, 0);
 438
 439	while (start_bit < nrbits) {
 440		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
 441		ASSERT(start_bit < end_bit);
 442
 443		key.objectid = start + start_bit * block_group->fs_info->sectorsize;
 444		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 445		key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
 446
 447		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 448		if (ret)
 449			goto out;
 450		btrfs_release_path(path);
 451
 452		extent_count++;
 453
 454		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
 455	}
 456
 457	if (extent_count != expected_extent_count) {
 458		btrfs_err(fs_info,
 459			  "incorrect extent count for %llu; counted %u, expected %u",
 460			  block_group->start, extent_count,
 461			  expected_extent_count);
 462		ASSERT(0);
 463		ret = -EIO;
 464		goto out;
 465	}
 466
 467	ret = 0;
 468out:
 469	kvfree(bitmap);
 470	if (ret)
 471		btrfs_abort_transaction(trans, ret);
 472	return ret;
 473}
 474
 475static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
 476					  struct btrfs_block_group *block_group,
 477					  struct btrfs_path *path,
 478					  int new_extents)
 479{
 480	struct btrfs_free_space_info *info;
 481	u32 flags;
 482	u32 extent_count;
 483	int ret = 0;
 484
 485	if (new_extents == 0)
 486		return 0;
 487
 488	info = search_free_space_info(trans, block_group, path, 1);
 489	if (IS_ERR(info)) {
 490		ret = PTR_ERR(info);
 491		goto out;
 492	}
 493	flags = btrfs_free_space_flags(path->nodes[0], info);
 494	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
 495
 496	extent_count += new_extents;
 497	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
 498	btrfs_mark_buffer_dirty(path->nodes[0]);
 499	btrfs_release_path(path);
 500
 501	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 502	    extent_count > block_group->bitmap_high_thresh) {
 503		ret = convert_free_space_to_bitmaps(trans, block_group, path);
 504	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 505		   extent_count < block_group->bitmap_low_thresh) {
 506		ret = convert_free_space_to_extents(trans, block_group, path);
 507	}
 508
 509out:
 510	return ret;
 511}
 512
 513EXPORT_FOR_TESTS
 514int free_space_test_bit(struct btrfs_block_group *block_group,
 515			struct btrfs_path *path, u64 offset)
 516{
 517	struct extent_buffer *leaf;
 518	struct btrfs_key key;
 519	u64 found_start, found_end;
 520	unsigned long ptr, i;
 521
 522	leaf = path->nodes[0];
 523	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 524	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
 525
 526	found_start = key.objectid;
 527	found_end = key.objectid + key.offset;
 528	ASSERT(offset >= found_start && offset < found_end);
 529
 530	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 531	i = div_u64(offset - found_start,
 532		    block_group->fs_info->sectorsize);
 533	return !!extent_buffer_test_bit(leaf, ptr, i);
 534}
 535
 536static void free_space_set_bits(struct btrfs_block_group *block_group,
 537				struct btrfs_path *path, u64 *start, u64 *size,
 538				int bit)
 539{
 540	struct btrfs_fs_info *fs_info = block_group->fs_info;
 541	struct extent_buffer *leaf;
 542	struct btrfs_key key;
 543	u64 end = *start + *size;
 544	u64 found_start, found_end;
 545	unsigned long ptr, first, last;
 546
 547	leaf = path->nodes[0];
 548	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 549	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
 550
 551	found_start = key.objectid;
 552	found_end = key.objectid + key.offset;
 553	ASSERT(*start >= found_start && *start < found_end);
 554	ASSERT(end > found_start);
 555
 556	if (end > found_end)
 557		end = found_end;
 558
 559	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 560	first = (*start - found_start) >> fs_info->sectorsize_bits;
 561	last = (end - found_start) >> fs_info->sectorsize_bits;
 562	if (bit)
 563		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
 564	else
 565		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
 566	btrfs_mark_buffer_dirty(leaf);
 567
 568	*size -= end - *start;
 569	*start = end;
 570}
 571
 572/*
 573 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
 574 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
 575 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
 576 * looking for.
 577 */
 578static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
 579				  struct btrfs_root *root, struct btrfs_path *p)
 580{
 581	struct btrfs_key key;
 582
 583	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
 584		p->slots[0]++;
 585		return 0;
 586	}
 587
 588	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
 589	btrfs_release_path(p);
 590
 591	key.objectid += key.offset;
 592	key.type = (u8)-1;
 593	key.offset = (u64)-1;
 594
 595	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
 596}
 597
 598/*
 599 * If remove is 1, then we are removing free space, thus clearing bits in the
 600 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
 601 * the bitmap.
 602 */
 603static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
 604				    struct btrfs_block_group *block_group,
 605				    struct btrfs_path *path,
 606				    u64 start, u64 size, int remove)
 607{
 608	struct btrfs_root *root = btrfs_free_space_root(block_group);
 609	struct btrfs_key key;
 610	u64 end = start + size;
 611	u64 cur_start, cur_size;
 612	int prev_bit, next_bit;
 613	int new_extents;
 614	int ret;
 615
 616	/*
 617	 * Read the bit for the block immediately before the extent of space if
 618	 * that block is within the block group.
 619	 */
 620	if (start > block_group->start) {
 621		u64 prev_block = start - block_group->fs_info->sectorsize;
 622
 623		key.objectid = prev_block;
 624		key.type = (u8)-1;
 625		key.offset = (u64)-1;
 626
 627		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
 628		if (ret)
 629			goto out;
 630
 631		prev_bit = free_space_test_bit(block_group, path, prev_block);
 632
 633		/* The previous block may have been in the previous bitmap. */
 634		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 635		if (start >= key.objectid + key.offset) {
 636			ret = free_space_next_bitmap(trans, root, path);
 637			if (ret)
 638				goto out;
 639		}
 640	} else {
 641		key.objectid = start;
 642		key.type = (u8)-1;
 643		key.offset = (u64)-1;
 644
 645		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
 646		if (ret)
 647			goto out;
 648
 649		prev_bit = -1;
 650	}
 651
 652	/*
 653	 * Iterate over all of the bitmaps overlapped by the extent of space,
 654	 * clearing/setting bits as required.
 655	 */
 656	cur_start = start;
 657	cur_size = size;
 658	while (1) {
 659		free_space_set_bits(block_group, path, &cur_start, &cur_size,
 660				    !remove);
 661		if (cur_size == 0)
 662			break;
 663		ret = free_space_next_bitmap(trans, root, path);
 664		if (ret)
 665			goto out;
 666	}
 667
 668	/*
 669	 * Read the bit for the block immediately after the extent of space if
 670	 * that block is within the block group.
 671	 */
 672	if (end < block_group->start + block_group->length) {
 673		/* The next block may be in the next bitmap. */
 674		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 675		if (end >= key.objectid + key.offset) {
 676			ret = free_space_next_bitmap(trans, root, path);
 677			if (ret)
 678				goto out;
 679		}
 680
 681		next_bit = free_space_test_bit(block_group, path, end);
 682	} else {
 683		next_bit = -1;
 684	}
 685
 686	if (remove) {
 687		new_extents = -1;
 688		if (prev_bit == 1) {
 689			/* Leftover on the left. */
 690			new_extents++;
 691		}
 692		if (next_bit == 1) {
 693			/* Leftover on the right. */
 694			new_extents++;
 695		}
 696	} else {
 697		new_extents = 1;
 698		if (prev_bit == 1) {
 699			/* Merging with neighbor on the left. */
 700			new_extents--;
 701		}
 702		if (next_bit == 1) {
 703			/* Merging with neighbor on the right. */
 704			new_extents--;
 705		}
 706	}
 707
 708	btrfs_release_path(path);
 709	ret = update_free_space_extent_count(trans, block_group, path,
 710					     new_extents);
 711
 712out:
 713	return ret;
 714}
 715
 716static int remove_free_space_extent(struct btrfs_trans_handle *trans,
 717				    struct btrfs_block_group *block_group,
 718				    struct btrfs_path *path,
 719				    u64 start, u64 size)
 720{
 721	struct btrfs_root *root = btrfs_free_space_root(block_group);
 722	struct btrfs_key key;
 723	u64 found_start, found_end;
 724	u64 end = start + size;
 725	int new_extents = -1;
 726	int ret;
 727
 728	key.objectid = start;
 729	key.type = (u8)-1;
 730	key.offset = (u64)-1;
 731
 732	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 733	if (ret)
 734		goto out;
 735
 736	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 737
 738	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
 739
 740	found_start = key.objectid;
 741	found_end = key.objectid + key.offset;
 742	ASSERT(start >= found_start && end <= found_end);
 743
 744	/*
 745	 * Okay, now that we've found the free space extent which contains the
 746	 * free space that we are removing, there are four cases:
 747	 *
 748	 * 1. We're using the whole extent: delete the key we found and
 749	 * decrement the free space extent count.
 750	 * 2. We are using part of the extent starting at the beginning: delete
 751	 * the key we found and insert a new key representing the leftover at
 752	 * the end. There is no net change in the number of extents.
 753	 * 3. We are using part of the extent ending at the end: delete the key
 754	 * we found and insert a new key representing the leftover at the
 755	 * beginning. There is no net change in the number of extents.
 756	 * 4. We are using part of the extent in the middle: delete the key we
 757	 * found and insert two new keys representing the leftovers on each
 758	 * side. Where we used to have one extent, we now have two, so increment
 759	 * the extent count. We may need to convert the block group to bitmaps
 760	 * as a result.
 761	 */
 762
 763	/* Delete the existing key (cases 1-4). */
 764	ret = btrfs_del_item(trans, root, path);
 765	if (ret)
 766		goto out;
 767
 768	/* Add a key for leftovers at the beginning (cases 3 and 4). */
 769	if (start > found_start) {
 770		key.objectid = found_start;
 771		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 772		key.offset = start - found_start;
 773
 774		btrfs_release_path(path);
 775		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 776		if (ret)
 777			goto out;
 778		new_extents++;
 779	}
 780
 781	/* Add a key for leftovers at the end (cases 2 and 4). */
 782	if (end < found_end) {
 783		key.objectid = end;
 784		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 785		key.offset = found_end - end;
 786
 787		btrfs_release_path(path);
 788		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 789		if (ret)
 790			goto out;
 791		new_extents++;
 792	}
 793
 794	btrfs_release_path(path);
 795	ret = update_free_space_extent_count(trans, block_group, path,
 796					     new_extents);
 797
 798out:
 799	return ret;
 800}
 801
 802EXPORT_FOR_TESTS
 803int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
 804				  struct btrfs_block_group *block_group,
 805				  struct btrfs_path *path, u64 start, u64 size)
 806{
 807	struct btrfs_free_space_info *info;
 808	u32 flags;
 809	int ret;
 810
 811	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
 812		ret = __add_block_group_free_space(trans, block_group, path);
 813		if (ret)
 814			return ret;
 815	}
 816
 817	info = search_free_space_info(NULL, block_group, path, 0);
 818	if (IS_ERR(info))
 819		return PTR_ERR(info);
 820	flags = btrfs_free_space_flags(path->nodes[0], info);
 821	btrfs_release_path(path);
 822
 823	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
 824		return modify_free_space_bitmap(trans, block_group, path,
 825						start, size, 1);
 826	} else {
 827		return remove_free_space_extent(trans, block_group, path,
 828						start, size);
 829	}
 830}
 831
 832int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
 833				u64 start, u64 size)
 834{
 835	struct btrfs_block_group *block_group;
 836	struct btrfs_path *path;
 837	int ret;
 838
 839	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
 840		return 0;
 841
 842	path = btrfs_alloc_path();
 843	if (!path) {
 844		ret = -ENOMEM;
 845		goto out;
 846	}
 847
 848	block_group = btrfs_lookup_block_group(trans->fs_info, start);
 849	if (!block_group) {
 850		ASSERT(0);
 851		ret = -ENOENT;
 852		goto out;
 853	}
 854
 855	mutex_lock(&block_group->free_space_lock);
 856	ret = __remove_from_free_space_tree(trans, block_group, path, start,
 857					    size);
 858	mutex_unlock(&block_group->free_space_lock);
 859
 860	btrfs_put_block_group(block_group);
 861out:
 862	btrfs_free_path(path);
 863	if (ret)
 864		btrfs_abort_transaction(trans, ret);
 865	return ret;
 866}
 867
 868static int add_free_space_extent(struct btrfs_trans_handle *trans,
 869				 struct btrfs_block_group *block_group,
 870				 struct btrfs_path *path,
 871				 u64 start, u64 size)
 872{
 873	struct btrfs_root *root = btrfs_free_space_root(block_group);
 874	struct btrfs_key key, new_key;
 875	u64 found_start, found_end;
 876	u64 end = start + size;
 877	int new_extents = 1;
 878	int ret;
 879
 880	/*
 881	 * We are adding a new extent of free space, but we need to merge
 882	 * extents. There are four cases here:
 883	 *
 884	 * 1. The new extent does not have any immediate neighbors to merge
 885	 * with: add the new key and increment the free space extent count. We
 886	 * may need to convert the block group to bitmaps as a result.
 887	 * 2. The new extent has an immediate neighbor before it: remove the
 888	 * previous key and insert a new key combining both of them. There is no
 889	 * net change in the number of extents.
 890	 * 3. The new extent has an immediate neighbor after it: remove the next
 891	 * key and insert a new key combining both of them. There is no net
 892	 * change in the number of extents.
 893	 * 4. The new extent has immediate neighbors on both sides: remove both
 894	 * of the keys and insert a new key combining all of them. Where we used
 895	 * to have two extents, we now have one, so decrement the extent count.
 896	 */
 897
 898	new_key.objectid = start;
 899	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 900	new_key.offset = size;
 901
 902	/* Search for a neighbor on the left. */
 903	if (start == block_group->start)
 904		goto right;
 905	key.objectid = start - 1;
 906	key.type = (u8)-1;
 907	key.offset = (u64)-1;
 908
 909	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 910	if (ret)
 911		goto out;
 912
 913	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 914
 915	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
 916		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
 917		btrfs_release_path(path);
 918		goto right;
 919	}
 920
 921	found_start = key.objectid;
 922	found_end = key.objectid + key.offset;
 923	ASSERT(found_start >= block_group->start &&
 924	       found_end > block_group->start);
 925	ASSERT(found_start < start && found_end <= start);
 926
 927	/*
 928	 * Delete the neighbor on the left and absorb it into the new key (cases
 929	 * 2 and 4).
 930	 */
 931	if (found_end == start) {
 932		ret = btrfs_del_item(trans, root, path);
 933		if (ret)
 934			goto out;
 935		new_key.objectid = found_start;
 936		new_key.offset += key.offset;
 937		new_extents--;
 938	}
 939	btrfs_release_path(path);
 940
 941right:
 942	/* Search for a neighbor on the right. */
 943	if (end == block_group->start + block_group->length)
 944		goto insert;
 945	key.objectid = end;
 946	key.type = (u8)-1;
 947	key.offset = (u64)-1;
 948
 949	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 950	if (ret)
 951		goto out;
 952
 953	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 954
 955	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
 956		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
 957		btrfs_release_path(path);
 958		goto insert;
 959	}
 960
 961	found_start = key.objectid;
 962	found_end = key.objectid + key.offset;
 963	ASSERT(found_start >= block_group->start &&
 964	       found_end > block_group->start);
 965	ASSERT((found_start < start && found_end <= start) ||
 966	       (found_start >= end && found_end > end));
 967
 968	/*
 969	 * Delete the neighbor on the right and absorb it into the new key
 970	 * (cases 3 and 4).
 971	 */
 972	if (found_start == end) {
 973		ret = btrfs_del_item(trans, root, path);
 974		if (ret)
 975			goto out;
 976		new_key.offset += key.offset;
 977		new_extents--;
 978	}
 979	btrfs_release_path(path);
 980
 981insert:
 982	/* Insert the new key (cases 1-4). */
 983	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
 984	if (ret)
 985		goto out;
 986
 987	btrfs_release_path(path);
 988	ret = update_free_space_extent_count(trans, block_group, path,
 989					     new_extents);
 990
 991out:
 992	return ret;
 993}
 994
 995EXPORT_FOR_TESTS
 996int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
 997			     struct btrfs_block_group *block_group,
 998			     struct btrfs_path *path, u64 start, u64 size)
 999{
1000	struct btrfs_free_space_info *info;
1001	u32 flags;
1002	int ret;
1003
1004	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1005		ret = __add_block_group_free_space(trans, block_group, path);
1006		if (ret)
1007			return ret;
1008	}
1009
1010	info = search_free_space_info(NULL, block_group, path, 0);
1011	if (IS_ERR(info))
1012		return PTR_ERR(info);
1013	flags = btrfs_free_space_flags(path->nodes[0], info);
1014	btrfs_release_path(path);
1015
1016	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
1017		return modify_free_space_bitmap(trans, block_group, path,
1018						start, size, 0);
1019	} else {
1020		return add_free_space_extent(trans, block_group, path, start,
1021					     size);
1022	}
1023}
1024
1025int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1026			   u64 start, u64 size)
1027{
1028	struct btrfs_block_group *block_group;
1029	struct btrfs_path *path;
1030	int ret;
1031
1032	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1033		return 0;
1034
1035	path = btrfs_alloc_path();
1036	if (!path) {
1037		ret = -ENOMEM;
1038		goto out;
1039	}
1040
1041	block_group = btrfs_lookup_block_group(trans->fs_info, start);
1042	if (!block_group) {
1043		ASSERT(0);
1044		ret = -ENOENT;
1045		goto out;
1046	}
1047
1048	mutex_lock(&block_group->free_space_lock);
1049	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
1050	mutex_unlock(&block_group->free_space_lock);
1051
1052	btrfs_put_block_group(block_group);
1053out:
1054	btrfs_free_path(path);
1055	if (ret)
1056		btrfs_abort_transaction(trans, ret);
1057	return ret;
1058}
1059
1060/*
1061 * Populate the free space tree by walking the extent tree. Operations on the
1062 * extent tree that happen as a result of writes to the free space tree will go
1063 * through the normal add/remove hooks.
1064 */
1065static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1066				    struct btrfs_block_group *block_group)
1067{
1068	struct btrfs_root *extent_root;
1069	struct btrfs_path *path, *path2;
1070	struct btrfs_key key;
1071	u64 start, end;
1072	int ret;
1073
1074	path = btrfs_alloc_path();
1075	if (!path)
1076		return -ENOMEM;
1077	path->reada = READA_FORWARD;
1078
1079	path2 = btrfs_alloc_path();
1080	if (!path2) {
1081		btrfs_free_path(path);
1082		return -ENOMEM;
1083	}
1084
1085	ret = add_new_free_space_info(trans, block_group, path2);
1086	if (ret)
1087		goto out;
1088
1089	mutex_lock(&block_group->free_space_lock);
1090
1091	/*
1092	 * Iterate through all of the extent and metadata items in this block
1093	 * group, adding the free space between them and the free space at the
1094	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1095	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1096	 * contained in.
1097	 */
1098	key.objectid = block_group->start;
1099	key.type = BTRFS_EXTENT_ITEM_KEY;
1100	key.offset = 0;
1101
1102	extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
1103	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1104	if (ret < 0)
1105		goto out_locked;
1106	ASSERT(ret == 0);
1107
1108	start = block_group->start;
1109	end = block_group->start + block_group->length;
1110	while (1) {
1111		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1112
1113		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1114		    key.type == BTRFS_METADATA_ITEM_KEY) {
1115			if (key.objectid >= end)
1116				break;
1117
1118			if (start < key.objectid) {
1119				ret = __add_to_free_space_tree(trans,
1120							       block_group,
1121							       path2, start,
1122							       key.objectid -
1123							       start);
1124				if (ret)
1125					goto out_locked;
1126			}
1127			start = key.objectid;
1128			if (key.type == BTRFS_METADATA_ITEM_KEY)
1129				start += trans->fs_info->nodesize;
1130			else
1131				start += key.offset;
1132		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1133			if (key.objectid != block_group->start)
1134				break;
1135		}
1136
1137		ret = btrfs_next_item(extent_root, path);
1138		if (ret < 0)
1139			goto out_locked;
1140		if (ret)
1141			break;
1142	}
1143	if (start < end) {
1144		ret = __add_to_free_space_tree(trans, block_group, path2,
1145					       start, end - start);
1146		if (ret)
1147			goto out_locked;
1148	}
1149
1150	ret = 0;
1151out_locked:
1152	mutex_unlock(&block_group->free_space_lock);
1153out:
1154	btrfs_free_path(path2);
1155	btrfs_free_path(path);
1156	return ret;
1157}
1158
1159int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1160{
1161	struct btrfs_trans_handle *trans;
1162	struct btrfs_root *tree_root = fs_info->tree_root;
1163	struct btrfs_root *free_space_root;
1164	struct btrfs_block_group *block_group;
1165	struct rb_node *node;
1166	int ret;
1167
1168	trans = btrfs_start_transaction(tree_root, 0);
1169	if (IS_ERR(trans))
1170		return PTR_ERR(trans);
1171
1172	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1173	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1174	free_space_root = btrfs_create_tree(trans,
1175					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1176	if (IS_ERR(free_space_root)) {
1177		ret = PTR_ERR(free_space_root);
1178		goto abort;
1179	}
1180	ret = btrfs_global_root_insert(free_space_root);
1181	if (ret) {
1182		btrfs_put_root(free_space_root);
1183		goto abort;
1184	}
1185
1186	node = rb_first_cached(&fs_info->block_group_cache_tree);
1187	while (node) {
1188		block_group = rb_entry(node, struct btrfs_block_group,
1189				       cache_node);
1190		ret = populate_free_space_tree(trans, block_group);
1191		if (ret)
1192			goto abort;
1193		node = rb_next(node);
1194	}
1195
1196	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1197	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1198	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1199	ret = btrfs_commit_transaction(trans);
1200
1201	/*
1202	 * Now that we've committed the transaction any reading of our commit
1203	 * root will be safe, so we can cache from the free space tree now.
1204	 */
1205	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1206	return ret;
1207
1208abort:
1209	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1210	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1211	btrfs_abort_transaction(trans, ret);
1212	btrfs_end_transaction(trans);
1213	return ret;
1214}
1215
1216static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1217				 struct btrfs_root *root)
1218{
1219	struct btrfs_path *path;
1220	struct btrfs_key key;
1221	int nr;
1222	int ret;
1223
1224	path = btrfs_alloc_path();
1225	if (!path)
1226		return -ENOMEM;
1227
 
 
1228	key.objectid = 0;
1229	key.type = 0;
1230	key.offset = 0;
1231
1232	while (1) {
1233		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1234		if (ret < 0)
1235			goto out;
1236
1237		nr = btrfs_header_nritems(path->nodes[0]);
1238		if (!nr)
1239			break;
1240
1241		path->slots[0] = 0;
1242		ret = btrfs_del_items(trans, root, path, 0, nr);
1243		if (ret)
1244			goto out;
1245
1246		btrfs_release_path(path);
1247	}
1248
1249	ret = 0;
1250out:
1251	btrfs_free_path(path);
1252	return ret;
1253}
1254
1255int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
1256{
1257	struct btrfs_trans_handle *trans;
1258	struct btrfs_root *tree_root = fs_info->tree_root;
1259	struct btrfs_key key = {
1260		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1261		.type = BTRFS_ROOT_ITEM_KEY,
1262		.offset = 0,
1263	};
1264	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1265	int ret;
1266
1267	trans = btrfs_start_transaction(tree_root, 0);
1268	if (IS_ERR(trans))
1269		return PTR_ERR(trans);
1270
1271	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1272	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
 
1273
1274	ret = clear_free_space_tree(trans, free_space_root);
1275	if (ret)
1276		goto abort;
1277
1278	ret = btrfs_del_root(trans, &free_space_root->root_key);
1279	if (ret)
1280		goto abort;
1281
1282	btrfs_global_root_delete(free_space_root);
1283	list_del(&free_space_root->dirty_list);
1284
1285	btrfs_tree_lock(free_space_root->node);
1286	btrfs_clean_tree_block(free_space_root->node);
1287	btrfs_tree_unlock(free_space_root->node);
1288	btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1289			      free_space_root->node, 0, 1);
1290
1291	btrfs_put_root(free_space_root);
1292
1293	return btrfs_commit_transaction(trans);
1294
1295abort:
1296	btrfs_abort_transaction(trans, ret);
1297	btrfs_end_transaction(trans);
1298	return ret;
1299}
1300
1301static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1302					struct btrfs_block_group *block_group,
1303					struct btrfs_path *path)
1304{
1305	int ret;
1306
1307	clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
1308
1309	ret = add_new_free_space_info(trans, block_group, path);
1310	if (ret)
1311		return ret;
1312
1313	return __add_to_free_space_tree(trans, block_group, path,
1314					block_group->start,
1315					block_group->length);
1316}
1317
1318int add_block_group_free_space(struct btrfs_trans_handle *trans,
1319			       struct btrfs_block_group *block_group)
1320{
1321	struct btrfs_fs_info *fs_info = trans->fs_info;
1322	struct btrfs_path *path = NULL;
1323	int ret = 0;
1324
1325	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1326		return 0;
1327
1328	mutex_lock(&block_group->free_space_lock);
1329	if (!test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags))
1330		goto out;
1331
1332	path = btrfs_alloc_path();
1333	if (!path) {
1334		ret = -ENOMEM;
1335		goto out;
1336	}
1337
1338	ret = __add_block_group_free_space(trans, block_group, path);
1339
1340out:
1341	btrfs_free_path(path);
1342	mutex_unlock(&block_group->free_space_lock);
1343	if (ret)
1344		btrfs_abort_transaction(trans, ret);
1345	return ret;
1346}
1347
1348int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1349				  struct btrfs_block_group *block_group)
1350{
1351	struct btrfs_root *root = btrfs_free_space_root(block_group);
1352	struct btrfs_path *path;
1353	struct btrfs_key key, found_key;
1354	struct extent_buffer *leaf;
1355	u64 start, end;
1356	int done = 0, nr;
1357	int ret;
1358
1359	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1360		return 0;
1361
1362	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1363		/* We never added this block group to the free space tree. */
1364		return 0;
1365	}
1366
1367	path = btrfs_alloc_path();
1368	if (!path) {
1369		ret = -ENOMEM;
1370		goto out;
1371	}
1372
1373	start = block_group->start;
1374	end = block_group->start + block_group->length;
1375
1376	key.objectid = end - 1;
1377	key.type = (u8)-1;
1378	key.offset = (u64)-1;
1379
1380	while (!done) {
1381		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1382		if (ret)
1383			goto out;
1384
1385		leaf = path->nodes[0];
1386		nr = 0;
1387		path->slots[0]++;
1388		while (path->slots[0] > 0) {
1389			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1390
1391			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1392				ASSERT(found_key.objectid == block_group->start);
1393				ASSERT(found_key.offset == block_group->length);
1394				done = 1;
1395				nr++;
1396				path->slots[0]--;
1397				break;
1398			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1399				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1400				ASSERT(found_key.objectid >= start);
1401				ASSERT(found_key.objectid < end);
1402				ASSERT(found_key.objectid + found_key.offset <= end);
1403				nr++;
1404				path->slots[0]--;
1405			} else {
1406				ASSERT(0);
1407			}
1408		}
1409
1410		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1411		if (ret)
1412			goto out;
1413		btrfs_release_path(path);
1414	}
1415
1416	ret = 0;
1417out:
1418	btrfs_free_path(path);
1419	if (ret)
1420		btrfs_abort_transaction(trans, ret);
1421	return ret;
1422}
1423
1424static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1425				   struct btrfs_path *path,
1426				   u32 expected_extent_count)
1427{
1428	struct btrfs_block_group *block_group;
1429	struct btrfs_fs_info *fs_info;
1430	struct btrfs_root *root;
1431	struct btrfs_key key;
1432	int prev_bit = 0, bit;
1433	/* Initialize to silence GCC. */
1434	u64 extent_start = 0;
1435	u64 end, offset;
1436	u64 total_found = 0;
1437	u32 extent_count = 0;
1438	int ret;
1439
1440	block_group = caching_ctl->block_group;
1441	fs_info = block_group->fs_info;
1442	root = btrfs_free_space_root(block_group);
1443
1444	end = block_group->start + block_group->length;
1445
1446	while (1) {
1447		ret = btrfs_next_item(root, path);
1448		if (ret < 0)
1449			goto out;
1450		if (ret)
1451			break;
1452
1453		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1454
1455		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1456			break;
1457
1458		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1459		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1460
 
 
1461		offset = key.objectid;
1462		while (offset < key.objectid + key.offset) {
1463			bit = free_space_test_bit(block_group, path, offset);
1464			if (prev_bit == 0 && bit == 1) {
1465				extent_start = offset;
1466			} else if (prev_bit == 1 && bit == 0) {
1467				total_found += add_new_free_space(block_group,
1468								  extent_start,
1469								  offset);
1470				if (total_found > CACHING_CTL_WAKE_UP) {
1471					total_found = 0;
1472					wake_up(&caching_ctl->wait);
1473				}
1474				extent_count++;
1475			}
1476			prev_bit = bit;
1477			offset += fs_info->sectorsize;
1478		}
1479	}
1480	if (prev_bit == 1) {
1481		total_found += add_new_free_space(block_group, extent_start,
1482						  end);
1483		extent_count++;
1484	}
1485
1486	if (extent_count != expected_extent_count) {
1487		btrfs_err(fs_info,
1488			  "incorrect extent count for %llu; counted %u, expected %u",
1489			  block_group->start, extent_count,
1490			  expected_extent_count);
1491		ASSERT(0);
1492		ret = -EIO;
1493		goto out;
1494	}
1495
 
 
1496	ret = 0;
1497out:
1498	return ret;
1499}
1500
1501static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1502				   struct btrfs_path *path,
1503				   u32 expected_extent_count)
1504{
1505	struct btrfs_block_group *block_group;
1506	struct btrfs_fs_info *fs_info;
1507	struct btrfs_root *root;
1508	struct btrfs_key key;
1509	u64 end;
1510	u64 total_found = 0;
1511	u32 extent_count = 0;
1512	int ret;
1513
1514	block_group = caching_ctl->block_group;
1515	fs_info = block_group->fs_info;
1516	root = btrfs_free_space_root(block_group);
1517
1518	end = block_group->start + block_group->length;
1519
1520	while (1) {
1521		ret = btrfs_next_item(root, path);
1522		if (ret < 0)
1523			goto out;
1524		if (ret)
1525			break;
1526
1527		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1528
1529		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1530			break;
1531
1532		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1533		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1534
 
 
1535		total_found += add_new_free_space(block_group, key.objectid,
1536						  key.objectid + key.offset);
1537		if (total_found > CACHING_CTL_WAKE_UP) {
1538			total_found = 0;
1539			wake_up(&caching_ctl->wait);
1540		}
1541		extent_count++;
1542	}
1543
1544	if (extent_count != expected_extent_count) {
1545		btrfs_err(fs_info,
1546			  "incorrect extent count for %llu; counted %u, expected %u",
1547			  block_group->start, extent_count,
1548			  expected_extent_count);
1549		ASSERT(0);
1550		ret = -EIO;
1551		goto out;
1552	}
 
 
1553
1554	ret = 0;
1555out:
1556	return ret;
1557}
1558
1559int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1560{
1561	struct btrfs_block_group *block_group;
1562	struct btrfs_free_space_info *info;
1563	struct btrfs_path *path;
1564	u32 extent_count, flags;
1565	int ret;
1566
1567	block_group = caching_ctl->block_group;
1568
1569	path = btrfs_alloc_path();
1570	if (!path)
1571		return -ENOMEM;
1572
1573	/*
1574	 * Just like caching_thread() doesn't want to deadlock on the extent
1575	 * tree, we don't want to deadlock on the free space tree.
1576	 */
1577	path->skip_locking = 1;
1578	path->search_commit_root = 1;
1579	path->reada = READA_FORWARD;
1580
1581	info = search_free_space_info(NULL, block_group, path, 0);
1582	if (IS_ERR(info)) {
1583		ret = PTR_ERR(info);
1584		goto out;
1585	}
1586	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1587	flags = btrfs_free_space_flags(path->nodes[0], info);
1588
1589	/*
1590	 * We left path pointing to the free space info item, so now
1591	 * load_free_space_foo can just iterate through the free space tree from
1592	 * there.
1593	 */
1594	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1595		ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1596	else
1597		ret = load_free_space_extents(caching_ctl, path, extent_count);
1598
1599out:
1600	btrfs_free_path(path);
1601	return ret;
1602}
v5.9
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2015 Facebook.  All rights reserved.
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/sched/mm.h>
 
   8#include "ctree.h"
   9#include "disk-io.h"
  10#include "locking.h"
  11#include "free-space-tree.h"
  12#include "transaction.h"
  13#include "block-group.h"
 
 
 
 
  14
  15static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
  16					struct btrfs_block_group *block_group,
  17					struct btrfs_path *path);
  18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  19void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
  20{
  21	u32 bitmap_range;
  22	size_t bitmap_size;
  23	u64 num_bitmaps, total_bitmap_size;
  24
  25	if (WARN_ON(cache->length == 0))
  26		btrfs_warn(cache->fs_info, "block group %llu length is zero",
  27			   cache->start);
  28
  29	/*
  30	 * We convert to bitmaps when the disk space required for using extents
  31	 * exceeds that required for using bitmaps.
  32	 */
  33	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
  34	num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
  35	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
  36	total_bitmap_size = num_bitmaps * bitmap_size;
  37	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
  38					    sizeof(struct btrfs_item));
  39
  40	/*
  41	 * We allow for a small buffer between the high threshold and low
  42	 * threshold to avoid thrashing back and forth between the two formats.
  43	 */
  44	if (cache->bitmap_high_thresh > 100)
  45		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
  46	else
  47		cache->bitmap_low_thresh = 0;
  48}
  49
  50static int add_new_free_space_info(struct btrfs_trans_handle *trans,
  51				   struct btrfs_block_group *block_group,
  52				   struct btrfs_path *path)
  53{
  54	struct btrfs_root *root = trans->fs_info->free_space_root;
  55	struct btrfs_free_space_info *info;
  56	struct btrfs_key key;
  57	struct extent_buffer *leaf;
  58	int ret;
  59
  60	key.objectid = block_group->start;
  61	key.type = BTRFS_FREE_SPACE_INFO_KEY;
  62	key.offset = block_group->length;
  63
  64	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
  65	if (ret)
  66		goto out;
  67
  68	leaf = path->nodes[0];
  69	info = btrfs_item_ptr(leaf, path->slots[0],
  70			      struct btrfs_free_space_info);
  71	btrfs_set_free_space_extent_count(leaf, info, 0);
  72	btrfs_set_free_space_flags(leaf, info, 0);
  73	btrfs_mark_buffer_dirty(leaf);
  74
  75	ret = 0;
  76out:
  77	btrfs_release_path(path);
  78	return ret;
  79}
  80
  81EXPORT_FOR_TESTS
  82struct btrfs_free_space_info *search_free_space_info(
  83		struct btrfs_trans_handle *trans,
  84		struct btrfs_block_group *block_group,
  85		struct btrfs_path *path, int cow)
  86{
  87	struct btrfs_fs_info *fs_info = block_group->fs_info;
  88	struct btrfs_root *root = fs_info->free_space_root;
  89	struct btrfs_key key;
  90	int ret;
  91
  92	key.objectid = block_group->start;
  93	key.type = BTRFS_FREE_SPACE_INFO_KEY;
  94	key.offset = block_group->length;
  95
  96	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
  97	if (ret < 0)
  98		return ERR_PTR(ret);
  99	if (ret != 0) {
 100		btrfs_warn(fs_info, "missing free space info for %llu",
 101			   block_group->start);
 102		ASSERT(0);
 103		return ERR_PTR(-ENOENT);
 104	}
 105
 106	return btrfs_item_ptr(path->nodes[0], path->slots[0],
 107			      struct btrfs_free_space_info);
 108}
 109
 110/*
 111 * btrfs_search_slot() but we're looking for the greatest key less than the
 112 * passed key.
 113 */
 114static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
 115				  struct btrfs_root *root,
 116				  struct btrfs_key *key, struct btrfs_path *p,
 117				  int ins_len, int cow)
 118{
 119	int ret;
 120
 121	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
 122	if (ret < 0)
 123		return ret;
 124
 125	if (ret == 0) {
 126		ASSERT(0);
 127		return -EIO;
 128	}
 129
 130	if (p->slots[0] == 0) {
 131		ASSERT(0);
 132		return -EIO;
 133	}
 134	p->slots[0]--;
 135
 136	return 0;
 137}
 138
 139static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
 
 140{
 141	return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
 142}
 143
 144static unsigned long *alloc_bitmap(u32 bitmap_size)
 145{
 146	unsigned long *ret;
 147	unsigned int nofs_flag;
 148	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
 149
 150	/*
 151	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
 152	 * into the filesystem as the free space bitmap can be modified in the
 153	 * critical section of a transaction commit.
 154	 *
 155	 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
 156	 * know that recursion is unsafe.
 157	 */
 158	nofs_flag = memalloc_nofs_save();
 159	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
 160	memalloc_nofs_restore(nofs_flag);
 161	return ret;
 162}
 163
 164static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
 165{
 166	u8 *p = ((u8 *)map) + BIT_BYTE(start);
 167	const unsigned int size = start + len;
 168	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
 169	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
 170
 171	while (len - bits_to_set >= 0) {
 172		*p |= mask_to_set;
 173		len -= bits_to_set;
 174		bits_to_set = BITS_PER_BYTE;
 175		mask_to_set = ~0;
 176		p++;
 177	}
 178	if (len) {
 179		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
 180		*p |= mask_to_set;
 181	}
 182}
 183
 184EXPORT_FOR_TESTS
 185int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 186				  struct btrfs_block_group *block_group,
 187				  struct btrfs_path *path)
 188{
 189	struct btrfs_fs_info *fs_info = trans->fs_info;
 190	struct btrfs_root *root = fs_info->free_space_root;
 191	struct btrfs_free_space_info *info;
 192	struct btrfs_key key, found_key;
 193	struct extent_buffer *leaf;
 194	unsigned long *bitmap;
 195	char *bitmap_cursor;
 196	u64 start, end;
 197	u64 bitmap_range, i;
 198	u32 bitmap_size, flags, expected_extent_count;
 199	u32 extent_count = 0;
 200	int done = 0, nr;
 201	int ret;
 202
 203	bitmap_size = free_space_bitmap_size(block_group->length,
 204					     fs_info->sectorsize);
 205	bitmap = alloc_bitmap(bitmap_size);
 206	if (!bitmap) {
 207		ret = -ENOMEM;
 208		goto out;
 209	}
 210
 211	start = block_group->start;
 212	end = block_group->start + block_group->length;
 213
 214	key.objectid = end - 1;
 215	key.type = (u8)-1;
 216	key.offset = (u64)-1;
 217
 218	while (!done) {
 219		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 220		if (ret)
 221			goto out;
 222
 223		leaf = path->nodes[0];
 224		nr = 0;
 225		path->slots[0]++;
 226		while (path->slots[0] > 0) {
 227			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
 228
 229			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
 230				ASSERT(found_key.objectid == block_group->start);
 231				ASSERT(found_key.offset == block_group->length);
 232				done = 1;
 233				break;
 234			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
 235				u64 first, last;
 236
 237				ASSERT(found_key.objectid >= start);
 238				ASSERT(found_key.objectid < end);
 239				ASSERT(found_key.objectid + found_key.offset <= end);
 240
 241				first = div_u64(found_key.objectid - start,
 242						fs_info->sectorsize);
 243				last = div_u64(found_key.objectid + found_key.offset - start,
 244					       fs_info->sectorsize);
 245				le_bitmap_set(bitmap, first, last - first);
 246
 247				extent_count++;
 248				nr++;
 249				path->slots[0]--;
 250			} else {
 251				ASSERT(0);
 252			}
 253		}
 254
 255		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
 256		if (ret)
 257			goto out;
 258		btrfs_release_path(path);
 259	}
 260
 261	info = search_free_space_info(trans, block_group, path, 1);
 262	if (IS_ERR(info)) {
 263		ret = PTR_ERR(info);
 264		goto out;
 265	}
 266	leaf = path->nodes[0];
 267	flags = btrfs_free_space_flags(leaf, info);
 268	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
 269	btrfs_set_free_space_flags(leaf, info, flags);
 270	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
 271	btrfs_mark_buffer_dirty(leaf);
 272	btrfs_release_path(path);
 273
 274	if (extent_count != expected_extent_count) {
 275		btrfs_err(fs_info,
 276			  "incorrect extent count for %llu; counted %u, expected %u",
 277			  block_group->start, extent_count,
 278			  expected_extent_count);
 279		ASSERT(0);
 280		ret = -EIO;
 281		goto out;
 282	}
 283
 284	bitmap_cursor = (char *)bitmap;
 285	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
 286	i = start;
 287	while (i < end) {
 288		unsigned long ptr;
 289		u64 extent_size;
 290		u32 data_size;
 291
 292		extent_size = min(end - i, bitmap_range);
 293		data_size = free_space_bitmap_size(extent_size,
 294						   fs_info->sectorsize);
 295
 296		key.objectid = i;
 297		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
 298		key.offset = extent_size;
 299
 300		ret = btrfs_insert_empty_item(trans, root, path, &key,
 301					      data_size);
 302		if (ret)
 303			goto out;
 304
 305		leaf = path->nodes[0];
 306		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 307		write_extent_buffer(leaf, bitmap_cursor, ptr,
 308				    data_size);
 309		btrfs_mark_buffer_dirty(leaf);
 310		btrfs_release_path(path);
 311
 312		i += extent_size;
 313		bitmap_cursor += data_size;
 314	}
 315
 316	ret = 0;
 317out:
 318	kvfree(bitmap);
 319	if (ret)
 320		btrfs_abort_transaction(trans, ret);
 321	return ret;
 322}
 323
 324EXPORT_FOR_TESTS
 325int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 326				  struct btrfs_block_group *block_group,
 327				  struct btrfs_path *path)
 328{
 329	struct btrfs_fs_info *fs_info = trans->fs_info;
 330	struct btrfs_root *root = fs_info->free_space_root;
 331	struct btrfs_free_space_info *info;
 332	struct btrfs_key key, found_key;
 333	struct extent_buffer *leaf;
 334	unsigned long *bitmap;
 335	u64 start, end;
 336	u32 bitmap_size, flags, expected_extent_count;
 337	unsigned long nrbits, start_bit, end_bit;
 338	u32 extent_count = 0;
 339	int done = 0, nr;
 340	int ret;
 341
 342	bitmap_size = free_space_bitmap_size(block_group->length,
 343					     fs_info->sectorsize);
 344	bitmap = alloc_bitmap(bitmap_size);
 345	if (!bitmap) {
 346		ret = -ENOMEM;
 347		goto out;
 348	}
 349
 350	start = block_group->start;
 351	end = block_group->start + block_group->length;
 352
 353	key.objectid = end - 1;
 354	key.type = (u8)-1;
 355	key.offset = (u64)-1;
 356
 357	while (!done) {
 358		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 359		if (ret)
 360			goto out;
 361
 362		leaf = path->nodes[0];
 363		nr = 0;
 364		path->slots[0]++;
 365		while (path->slots[0] > 0) {
 366			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
 367
 368			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
 369				ASSERT(found_key.objectid == block_group->start);
 370				ASSERT(found_key.offset == block_group->length);
 371				done = 1;
 372				break;
 373			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
 374				unsigned long ptr;
 375				char *bitmap_cursor;
 376				u32 bitmap_pos, data_size;
 377
 378				ASSERT(found_key.objectid >= start);
 379				ASSERT(found_key.objectid < end);
 380				ASSERT(found_key.objectid + found_key.offset <= end);
 381
 382				bitmap_pos = div_u64(found_key.objectid - start,
 383						     fs_info->sectorsize *
 384						     BITS_PER_BYTE);
 385				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
 386				data_size = free_space_bitmap_size(found_key.offset,
 387								   fs_info->sectorsize);
 388
 389				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
 390				read_extent_buffer(leaf, bitmap_cursor, ptr,
 391						   data_size);
 392
 393				nr++;
 394				path->slots[0]--;
 395			} else {
 396				ASSERT(0);
 397			}
 398		}
 399
 400		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
 401		if (ret)
 402			goto out;
 403		btrfs_release_path(path);
 404	}
 405
 406	info = search_free_space_info(trans, block_group, path, 1);
 407	if (IS_ERR(info)) {
 408		ret = PTR_ERR(info);
 409		goto out;
 410	}
 411	leaf = path->nodes[0];
 412	flags = btrfs_free_space_flags(leaf, info);
 413	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
 414	btrfs_set_free_space_flags(leaf, info, flags);
 415	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
 416	btrfs_mark_buffer_dirty(leaf);
 417	btrfs_release_path(path);
 418
 419	nrbits = div_u64(block_group->length, block_group->fs_info->sectorsize);
 420	start_bit = find_next_bit_le(bitmap, nrbits, 0);
 421
 422	while (start_bit < nrbits) {
 423		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
 424		ASSERT(start_bit < end_bit);
 425
 426		key.objectid = start + start_bit * block_group->fs_info->sectorsize;
 427		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 428		key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
 429
 430		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 431		if (ret)
 432			goto out;
 433		btrfs_release_path(path);
 434
 435		extent_count++;
 436
 437		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
 438	}
 439
 440	if (extent_count != expected_extent_count) {
 441		btrfs_err(fs_info,
 442			  "incorrect extent count for %llu; counted %u, expected %u",
 443			  block_group->start, extent_count,
 444			  expected_extent_count);
 445		ASSERT(0);
 446		ret = -EIO;
 447		goto out;
 448	}
 449
 450	ret = 0;
 451out:
 452	kvfree(bitmap);
 453	if (ret)
 454		btrfs_abort_transaction(trans, ret);
 455	return ret;
 456}
 457
 458static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
 459					  struct btrfs_block_group *block_group,
 460					  struct btrfs_path *path,
 461					  int new_extents)
 462{
 463	struct btrfs_free_space_info *info;
 464	u32 flags;
 465	u32 extent_count;
 466	int ret = 0;
 467
 468	if (new_extents == 0)
 469		return 0;
 470
 471	info = search_free_space_info(trans, block_group, path, 1);
 472	if (IS_ERR(info)) {
 473		ret = PTR_ERR(info);
 474		goto out;
 475	}
 476	flags = btrfs_free_space_flags(path->nodes[0], info);
 477	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
 478
 479	extent_count += new_extents;
 480	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
 481	btrfs_mark_buffer_dirty(path->nodes[0]);
 482	btrfs_release_path(path);
 483
 484	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 485	    extent_count > block_group->bitmap_high_thresh) {
 486		ret = convert_free_space_to_bitmaps(trans, block_group, path);
 487	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 488		   extent_count < block_group->bitmap_low_thresh) {
 489		ret = convert_free_space_to_extents(trans, block_group, path);
 490	}
 491
 492out:
 493	return ret;
 494}
 495
 496EXPORT_FOR_TESTS
 497int free_space_test_bit(struct btrfs_block_group *block_group,
 498			struct btrfs_path *path, u64 offset)
 499{
 500	struct extent_buffer *leaf;
 501	struct btrfs_key key;
 502	u64 found_start, found_end;
 503	unsigned long ptr, i;
 504
 505	leaf = path->nodes[0];
 506	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 507	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
 508
 509	found_start = key.objectid;
 510	found_end = key.objectid + key.offset;
 511	ASSERT(offset >= found_start && offset < found_end);
 512
 513	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 514	i = div_u64(offset - found_start,
 515		    block_group->fs_info->sectorsize);
 516	return !!extent_buffer_test_bit(leaf, ptr, i);
 517}
 518
 519static void free_space_set_bits(struct btrfs_block_group *block_group,
 520				struct btrfs_path *path, u64 *start, u64 *size,
 521				int bit)
 522{
 523	struct btrfs_fs_info *fs_info = block_group->fs_info;
 524	struct extent_buffer *leaf;
 525	struct btrfs_key key;
 526	u64 end = *start + *size;
 527	u64 found_start, found_end;
 528	unsigned long ptr, first, last;
 529
 530	leaf = path->nodes[0];
 531	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 532	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
 533
 534	found_start = key.objectid;
 535	found_end = key.objectid + key.offset;
 536	ASSERT(*start >= found_start && *start < found_end);
 537	ASSERT(end > found_start);
 538
 539	if (end > found_end)
 540		end = found_end;
 541
 542	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 543	first = div_u64(*start - found_start, fs_info->sectorsize);
 544	last = div_u64(end - found_start, fs_info->sectorsize);
 545	if (bit)
 546		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
 547	else
 548		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
 549	btrfs_mark_buffer_dirty(leaf);
 550
 551	*size -= end - *start;
 552	*start = end;
 553}
 554
 555/*
 556 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
 557 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
 558 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
 559 * looking for.
 560 */
 561static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
 562				  struct btrfs_root *root, struct btrfs_path *p)
 563{
 564	struct btrfs_key key;
 565
 566	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
 567		p->slots[0]++;
 568		return 0;
 569	}
 570
 571	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
 572	btrfs_release_path(p);
 573
 574	key.objectid += key.offset;
 575	key.type = (u8)-1;
 576	key.offset = (u64)-1;
 577
 578	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
 579}
 580
 581/*
 582 * If remove is 1, then we are removing free space, thus clearing bits in the
 583 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
 584 * the bitmap.
 585 */
 586static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
 587				    struct btrfs_block_group *block_group,
 588				    struct btrfs_path *path,
 589				    u64 start, u64 size, int remove)
 590{
 591	struct btrfs_root *root = block_group->fs_info->free_space_root;
 592	struct btrfs_key key;
 593	u64 end = start + size;
 594	u64 cur_start, cur_size;
 595	int prev_bit, next_bit;
 596	int new_extents;
 597	int ret;
 598
 599	/*
 600	 * Read the bit for the block immediately before the extent of space if
 601	 * that block is within the block group.
 602	 */
 603	if (start > block_group->start) {
 604		u64 prev_block = start - block_group->fs_info->sectorsize;
 605
 606		key.objectid = prev_block;
 607		key.type = (u8)-1;
 608		key.offset = (u64)-1;
 609
 610		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
 611		if (ret)
 612			goto out;
 613
 614		prev_bit = free_space_test_bit(block_group, path, prev_block);
 615
 616		/* The previous block may have been in the previous bitmap. */
 617		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 618		if (start >= key.objectid + key.offset) {
 619			ret = free_space_next_bitmap(trans, root, path);
 620			if (ret)
 621				goto out;
 622		}
 623	} else {
 624		key.objectid = start;
 625		key.type = (u8)-1;
 626		key.offset = (u64)-1;
 627
 628		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
 629		if (ret)
 630			goto out;
 631
 632		prev_bit = -1;
 633	}
 634
 635	/*
 636	 * Iterate over all of the bitmaps overlapped by the extent of space,
 637	 * clearing/setting bits as required.
 638	 */
 639	cur_start = start;
 640	cur_size = size;
 641	while (1) {
 642		free_space_set_bits(block_group, path, &cur_start, &cur_size,
 643				    !remove);
 644		if (cur_size == 0)
 645			break;
 646		ret = free_space_next_bitmap(trans, root, path);
 647		if (ret)
 648			goto out;
 649	}
 650
 651	/*
 652	 * Read the bit for the block immediately after the extent of space if
 653	 * that block is within the block group.
 654	 */
 655	if (end < block_group->start + block_group->length) {
 656		/* The next block may be in the next bitmap. */
 657		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 658		if (end >= key.objectid + key.offset) {
 659			ret = free_space_next_bitmap(trans, root, path);
 660			if (ret)
 661				goto out;
 662		}
 663
 664		next_bit = free_space_test_bit(block_group, path, end);
 665	} else {
 666		next_bit = -1;
 667	}
 668
 669	if (remove) {
 670		new_extents = -1;
 671		if (prev_bit == 1) {
 672			/* Leftover on the left. */
 673			new_extents++;
 674		}
 675		if (next_bit == 1) {
 676			/* Leftover on the right. */
 677			new_extents++;
 678		}
 679	} else {
 680		new_extents = 1;
 681		if (prev_bit == 1) {
 682			/* Merging with neighbor on the left. */
 683			new_extents--;
 684		}
 685		if (next_bit == 1) {
 686			/* Merging with neighbor on the right. */
 687			new_extents--;
 688		}
 689	}
 690
 691	btrfs_release_path(path);
 692	ret = update_free_space_extent_count(trans, block_group, path,
 693					     new_extents);
 694
 695out:
 696	return ret;
 697}
 698
 699static int remove_free_space_extent(struct btrfs_trans_handle *trans,
 700				    struct btrfs_block_group *block_group,
 701				    struct btrfs_path *path,
 702				    u64 start, u64 size)
 703{
 704	struct btrfs_root *root = trans->fs_info->free_space_root;
 705	struct btrfs_key key;
 706	u64 found_start, found_end;
 707	u64 end = start + size;
 708	int new_extents = -1;
 709	int ret;
 710
 711	key.objectid = start;
 712	key.type = (u8)-1;
 713	key.offset = (u64)-1;
 714
 715	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 716	if (ret)
 717		goto out;
 718
 719	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 720
 721	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
 722
 723	found_start = key.objectid;
 724	found_end = key.objectid + key.offset;
 725	ASSERT(start >= found_start && end <= found_end);
 726
 727	/*
 728	 * Okay, now that we've found the free space extent which contains the
 729	 * free space that we are removing, there are four cases:
 730	 *
 731	 * 1. We're using the whole extent: delete the key we found and
 732	 * decrement the free space extent count.
 733	 * 2. We are using part of the extent starting at the beginning: delete
 734	 * the key we found and insert a new key representing the leftover at
 735	 * the end. There is no net change in the number of extents.
 736	 * 3. We are using part of the extent ending at the end: delete the key
 737	 * we found and insert a new key representing the leftover at the
 738	 * beginning. There is no net change in the number of extents.
 739	 * 4. We are using part of the extent in the middle: delete the key we
 740	 * found and insert two new keys representing the leftovers on each
 741	 * side. Where we used to have one extent, we now have two, so increment
 742	 * the extent count. We may need to convert the block group to bitmaps
 743	 * as a result.
 744	 */
 745
 746	/* Delete the existing key (cases 1-4). */
 747	ret = btrfs_del_item(trans, root, path);
 748	if (ret)
 749		goto out;
 750
 751	/* Add a key for leftovers at the beginning (cases 3 and 4). */
 752	if (start > found_start) {
 753		key.objectid = found_start;
 754		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 755		key.offset = start - found_start;
 756
 757		btrfs_release_path(path);
 758		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 759		if (ret)
 760			goto out;
 761		new_extents++;
 762	}
 763
 764	/* Add a key for leftovers at the end (cases 2 and 4). */
 765	if (end < found_end) {
 766		key.objectid = end;
 767		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 768		key.offset = found_end - end;
 769
 770		btrfs_release_path(path);
 771		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 772		if (ret)
 773			goto out;
 774		new_extents++;
 775	}
 776
 777	btrfs_release_path(path);
 778	ret = update_free_space_extent_count(trans, block_group, path,
 779					     new_extents);
 780
 781out:
 782	return ret;
 783}
 784
 785EXPORT_FOR_TESTS
 786int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
 787				  struct btrfs_block_group *block_group,
 788				  struct btrfs_path *path, u64 start, u64 size)
 789{
 790	struct btrfs_free_space_info *info;
 791	u32 flags;
 792	int ret;
 793
 794	if (block_group->needs_free_space) {
 795		ret = __add_block_group_free_space(trans, block_group, path);
 796		if (ret)
 797			return ret;
 798	}
 799
 800	info = search_free_space_info(NULL, block_group, path, 0);
 801	if (IS_ERR(info))
 802		return PTR_ERR(info);
 803	flags = btrfs_free_space_flags(path->nodes[0], info);
 804	btrfs_release_path(path);
 805
 806	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
 807		return modify_free_space_bitmap(trans, block_group, path,
 808						start, size, 1);
 809	} else {
 810		return remove_free_space_extent(trans, block_group, path,
 811						start, size);
 812	}
 813}
 814
 815int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
 816				u64 start, u64 size)
 817{
 818	struct btrfs_block_group *block_group;
 819	struct btrfs_path *path;
 820	int ret;
 821
 822	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
 823		return 0;
 824
 825	path = btrfs_alloc_path();
 826	if (!path) {
 827		ret = -ENOMEM;
 828		goto out;
 829	}
 830
 831	block_group = btrfs_lookup_block_group(trans->fs_info, start);
 832	if (!block_group) {
 833		ASSERT(0);
 834		ret = -ENOENT;
 835		goto out;
 836	}
 837
 838	mutex_lock(&block_group->free_space_lock);
 839	ret = __remove_from_free_space_tree(trans, block_group, path, start,
 840					    size);
 841	mutex_unlock(&block_group->free_space_lock);
 842
 843	btrfs_put_block_group(block_group);
 844out:
 845	btrfs_free_path(path);
 846	if (ret)
 847		btrfs_abort_transaction(trans, ret);
 848	return ret;
 849}
 850
 851static int add_free_space_extent(struct btrfs_trans_handle *trans,
 852				 struct btrfs_block_group *block_group,
 853				 struct btrfs_path *path,
 854				 u64 start, u64 size)
 855{
 856	struct btrfs_root *root = trans->fs_info->free_space_root;
 857	struct btrfs_key key, new_key;
 858	u64 found_start, found_end;
 859	u64 end = start + size;
 860	int new_extents = 1;
 861	int ret;
 862
 863	/*
 864	 * We are adding a new extent of free space, but we need to merge
 865	 * extents. There are four cases here:
 866	 *
 867	 * 1. The new extent does not have any immediate neighbors to merge
 868	 * with: add the new key and increment the free space extent count. We
 869	 * may need to convert the block group to bitmaps as a result.
 870	 * 2. The new extent has an immediate neighbor before it: remove the
 871	 * previous key and insert a new key combining both of them. There is no
 872	 * net change in the number of extents.
 873	 * 3. The new extent has an immediate neighbor after it: remove the next
 874	 * key and insert a new key combining both of them. There is no net
 875	 * change in the number of extents.
 876	 * 4. The new extent has immediate neighbors on both sides: remove both
 877	 * of the keys and insert a new key combining all of them. Where we used
 878	 * to have two extents, we now have one, so decrement the extent count.
 879	 */
 880
 881	new_key.objectid = start;
 882	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 883	new_key.offset = size;
 884
 885	/* Search for a neighbor on the left. */
 886	if (start == block_group->start)
 887		goto right;
 888	key.objectid = start - 1;
 889	key.type = (u8)-1;
 890	key.offset = (u64)-1;
 891
 892	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 893	if (ret)
 894		goto out;
 895
 896	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 897
 898	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
 899		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
 900		btrfs_release_path(path);
 901		goto right;
 902	}
 903
 904	found_start = key.objectid;
 905	found_end = key.objectid + key.offset;
 906	ASSERT(found_start >= block_group->start &&
 907	       found_end > block_group->start);
 908	ASSERT(found_start < start && found_end <= start);
 909
 910	/*
 911	 * Delete the neighbor on the left and absorb it into the new key (cases
 912	 * 2 and 4).
 913	 */
 914	if (found_end == start) {
 915		ret = btrfs_del_item(trans, root, path);
 916		if (ret)
 917			goto out;
 918		new_key.objectid = found_start;
 919		new_key.offset += key.offset;
 920		new_extents--;
 921	}
 922	btrfs_release_path(path);
 923
 924right:
 925	/* Search for a neighbor on the right. */
 926	if (end == block_group->start + block_group->length)
 927		goto insert;
 928	key.objectid = end;
 929	key.type = (u8)-1;
 930	key.offset = (u64)-1;
 931
 932	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 933	if (ret)
 934		goto out;
 935
 936	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 937
 938	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
 939		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
 940		btrfs_release_path(path);
 941		goto insert;
 942	}
 943
 944	found_start = key.objectid;
 945	found_end = key.objectid + key.offset;
 946	ASSERT(found_start >= block_group->start &&
 947	       found_end > block_group->start);
 948	ASSERT((found_start < start && found_end <= start) ||
 949	       (found_start >= end && found_end > end));
 950
 951	/*
 952	 * Delete the neighbor on the right and absorb it into the new key
 953	 * (cases 3 and 4).
 954	 */
 955	if (found_start == end) {
 956		ret = btrfs_del_item(trans, root, path);
 957		if (ret)
 958			goto out;
 959		new_key.offset += key.offset;
 960		new_extents--;
 961	}
 962	btrfs_release_path(path);
 963
 964insert:
 965	/* Insert the new key (cases 1-4). */
 966	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
 967	if (ret)
 968		goto out;
 969
 970	btrfs_release_path(path);
 971	ret = update_free_space_extent_count(trans, block_group, path,
 972					     new_extents);
 973
 974out:
 975	return ret;
 976}
 977
 978EXPORT_FOR_TESTS
 979int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
 980			     struct btrfs_block_group *block_group,
 981			     struct btrfs_path *path, u64 start, u64 size)
 982{
 983	struct btrfs_free_space_info *info;
 984	u32 flags;
 985	int ret;
 986
 987	if (block_group->needs_free_space) {
 988		ret = __add_block_group_free_space(trans, block_group, path);
 989		if (ret)
 990			return ret;
 991	}
 992
 993	info = search_free_space_info(NULL, block_group, path, 0);
 994	if (IS_ERR(info))
 995		return PTR_ERR(info);
 996	flags = btrfs_free_space_flags(path->nodes[0], info);
 997	btrfs_release_path(path);
 998
 999	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
1000		return modify_free_space_bitmap(trans, block_group, path,
1001						start, size, 0);
1002	} else {
1003		return add_free_space_extent(trans, block_group, path, start,
1004					     size);
1005	}
1006}
1007
1008int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1009			   u64 start, u64 size)
1010{
1011	struct btrfs_block_group *block_group;
1012	struct btrfs_path *path;
1013	int ret;
1014
1015	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1016		return 0;
1017
1018	path = btrfs_alloc_path();
1019	if (!path) {
1020		ret = -ENOMEM;
1021		goto out;
1022	}
1023
1024	block_group = btrfs_lookup_block_group(trans->fs_info, start);
1025	if (!block_group) {
1026		ASSERT(0);
1027		ret = -ENOENT;
1028		goto out;
1029	}
1030
1031	mutex_lock(&block_group->free_space_lock);
1032	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
1033	mutex_unlock(&block_group->free_space_lock);
1034
1035	btrfs_put_block_group(block_group);
1036out:
1037	btrfs_free_path(path);
1038	if (ret)
1039		btrfs_abort_transaction(trans, ret);
1040	return ret;
1041}
1042
1043/*
1044 * Populate the free space tree by walking the extent tree. Operations on the
1045 * extent tree that happen as a result of writes to the free space tree will go
1046 * through the normal add/remove hooks.
1047 */
1048static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1049				    struct btrfs_block_group *block_group)
1050{
1051	struct btrfs_root *extent_root = trans->fs_info->extent_root;
1052	struct btrfs_path *path, *path2;
1053	struct btrfs_key key;
1054	u64 start, end;
1055	int ret;
1056
1057	path = btrfs_alloc_path();
1058	if (!path)
1059		return -ENOMEM;
1060	path->reada = READA_FORWARD;
1061
1062	path2 = btrfs_alloc_path();
1063	if (!path2) {
1064		btrfs_free_path(path);
1065		return -ENOMEM;
1066	}
1067
1068	ret = add_new_free_space_info(trans, block_group, path2);
1069	if (ret)
1070		goto out;
1071
1072	mutex_lock(&block_group->free_space_lock);
1073
1074	/*
1075	 * Iterate through all of the extent and metadata items in this block
1076	 * group, adding the free space between them and the free space at the
1077	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1078	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1079	 * contained in.
1080	 */
1081	key.objectid = block_group->start;
1082	key.type = BTRFS_EXTENT_ITEM_KEY;
1083	key.offset = 0;
1084
 
1085	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1086	if (ret < 0)
1087		goto out_locked;
1088	ASSERT(ret == 0);
1089
1090	start = block_group->start;
1091	end = block_group->start + block_group->length;
1092	while (1) {
1093		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1094
1095		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1096		    key.type == BTRFS_METADATA_ITEM_KEY) {
1097			if (key.objectid >= end)
1098				break;
1099
1100			if (start < key.objectid) {
1101				ret = __add_to_free_space_tree(trans,
1102							       block_group,
1103							       path2, start,
1104							       key.objectid -
1105							       start);
1106				if (ret)
1107					goto out_locked;
1108			}
1109			start = key.objectid;
1110			if (key.type == BTRFS_METADATA_ITEM_KEY)
1111				start += trans->fs_info->nodesize;
1112			else
1113				start += key.offset;
1114		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1115			if (key.objectid != block_group->start)
1116				break;
1117		}
1118
1119		ret = btrfs_next_item(extent_root, path);
1120		if (ret < 0)
1121			goto out_locked;
1122		if (ret)
1123			break;
1124	}
1125	if (start < end) {
1126		ret = __add_to_free_space_tree(trans, block_group, path2,
1127					       start, end - start);
1128		if (ret)
1129			goto out_locked;
1130	}
1131
1132	ret = 0;
1133out_locked:
1134	mutex_unlock(&block_group->free_space_lock);
1135out:
1136	btrfs_free_path(path2);
1137	btrfs_free_path(path);
1138	return ret;
1139}
1140
1141int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1142{
1143	struct btrfs_trans_handle *trans;
1144	struct btrfs_root *tree_root = fs_info->tree_root;
1145	struct btrfs_root *free_space_root;
1146	struct btrfs_block_group *block_group;
1147	struct rb_node *node;
1148	int ret;
1149
1150	trans = btrfs_start_transaction(tree_root, 0);
1151	if (IS_ERR(trans))
1152		return PTR_ERR(trans);
1153
1154	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
 
1155	free_space_root = btrfs_create_tree(trans,
1156					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1157	if (IS_ERR(free_space_root)) {
1158		ret = PTR_ERR(free_space_root);
1159		goto abort;
1160	}
1161	fs_info->free_space_root = free_space_root;
 
 
 
 
1162
1163	node = rb_first(&fs_info->block_group_cache_tree);
1164	while (node) {
1165		block_group = rb_entry(node, struct btrfs_block_group,
1166				       cache_node);
1167		ret = populate_free_space_tree(trans, block_group);
1168		if (ret)
1169			goto abort;
1170		node = rb_next(node);
1171	}
1172
1173	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1174	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1175	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
 
1176
1177	return btrfs_commit_transaction(trans);
 
 
 
 
 
1178
1179abort:
1180	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
 
1181	btrfs_abort_transaction(trans, ret);
1182	btrfs_end_transaction(trans);
1183	return ret;
1184}
1185
1186static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1187				 struct btrfs_root *root)
1188{
1189	struct btrfs_path *path;
1190	struct btrfs_key key;
1191	int nr;
1192	int ret;
1193
1194	path = btrfs_alloc_path();
1195	if (!path)
1196		return -ENOMEM;
1197
1198	path->leave_spinning = 1;
1199
1200	key.objectid = 0;
1201	key.type = 0;
1202	key.offset = 0;
1203
1204	while (1) {
1205		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1206		if (ret < 0)
1207			goto out;
1208
1209		nr = btrfs_header_nritems(path->nodes[0]);
1210		if (!nr)
1211			break;
1212
1213		path->slots[0] = 0;
1214		ret = btrfs_del_items(trans, root, path, 0, nr);
1215		if (ret)
1216			goto out;
1217
1218		btrfs_release_path(path);
1219	}
1220
1221	ret = 0;
1222out:
1223	btrfs_free_path(path);
1224	return ret;
1225}
1226
1227int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
1228{
1229	struct btrfs_trans_handle *trans;
1230	struct btrfs_root *tree_root = fs_info->tree_root;
1231	struct btrfs_root *free_space_root = fs_info->free_space_root;
 
 
 
 
 
1232	int ret;
1233
1234	trans = btrfs_start_transaction(tree_root, 0);
1235	if (IS_ERR(trans))
1236		return PTR_ERR(trans);
1237
1238	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1239	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1240	fs_info->free_space_root = NULL;
1241
1242	ret = clear_free_space_tree(trans, free_space_root);
1243	if (ret)
1244		goto abort;
1245
1246	ret = btrfs_del_root(trans, &free_space_root->root_key);
1247	if (ret)
1248		goto abort;
1249
 
1250	list_del(&free_space_root->dirty_list);
1251
1252	btrfs_tree_lock(free_space_root->node);
1253	btrfs_clean_tree_block(free_space_root->node);
1254	btrfs_tree_unlock(free_space_root->node);
1255	btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
1256			      0, 1);
1257
1258	btrfs_put_root(free_space_root);
1259
1260	return btrfs_commit_transaction(trans);
1261
1262abort:
1263	btrfs_abort_transaction(trans, ret);
1264	btrfs_end_transaction(trans);
1265	return ret;
1266}
1267
1268static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1269					struct btrfs_block_group *block_group,
1270					struct btrfs_path *path)
1271{
1272	int ret;
1273
1274	block_group->needs_free_space = 0;
1275
1276	ret = add_new_free_space_info(trans, block_group, path);
1277	if (ret)
1278		return ret;
1279
1280	return __add_to_free_space_tree(trans, block_group, path,
1281					block_group->start,
1282					block_group->length);
1283}
1284
1285int add_block_group_free_space(struct btrfs_trans_handle *trans,
1286			       struct btrfs_block_group *block_group)
1287{
1288	struct btrfs_fs_info *fs_info = trans->fs_info;
1289	struct btrfs_path *path = NULL;
1290	int ret = 0;
1291
1292	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1293		return 0;
1294
1295	mutex_lock(&block_group->free_space_lock);
1296	if (!block_group->needs_free_space)
1297		goto out;
1298
1299	path = btrfs_alloc_path();
1300	if (!path) {
1301		ret = -ENOMEM;
1302		goto out;
1303	}
1304
1305	ret = __add_block_group_free_space(trans, block_group, path);
1306
1307out:
1308	btrfs_free_path(path);
1309	mutex_unlock(&block_group->free_space_lock);
1310	if (ret)
1311		btrfs_abort_transaction(trans, ret);
1312	return ret;
1313}
1314
1315int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1316				  struct btrfs_block_group *block_group)
1317{
1318	struct btrfs_root *root = trans->fs_info->free_space_root;
1319	struct btrfs_path *path;
1320	struct btrfs_key key, found_key;
1321	struct extent_buffer *leaf;
1322	u64 start, end;
1323	int done = 0, nr;
1324	int ret;
1325
1326	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1327		return 0;
1328
1329	if (block_group->needs_free_space) {
1330		/* We never added this block group to the free space tree. */
1331		return 0;
1332	}
1333
1334	path = btrfs_alloc_path();
1335	if (!path) {
1336		ret = -ENOMEM;
1337		goto out;
1338	}
1339
1340	start = block_group->start;
1341	end = block_group->start + block_group->length;
1342
1343	key.objectid = end - 1;
1344	key.type = (u8)-1;
1345	key.offset = (u64)-1;
1346
1347	while (!done) {
1348		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1349		if (ret)
1350			goto out;
1351
1352		leaf = path->nodes[0];
1353		nr = 0;
1354		path->slots[0]++;
1355		while (path->slots[0] > 0) {
1356			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1357
1358			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1359				ASSERT(found_key.objectid == block_group->start);
1360				ASSERT(found_key.offset == block_group->length);
1361				done = 1;
1362				nr++;
1363				path->slots[0]--;
1364				break;
1365			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1366				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1367				ASSERT(found_key.objectid >= start);
1368				ASSERT(found_key.objectid < end);
1369				ASSERT(found_key.objectid + found_key.offset <= end);
1370				nr++;
1371				path->slots[0]--;
1372			} else {
1373				ASSERT(0);
1374			}
1375		}
1376
1377		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1378		if (ret)
1379			goto out;
1380		btrfs_release_path(path);
1381	}
1382
1383	ret = 0;
1384out:
1385	btrfs_free_path(path);
1386	if (ret)
1387		btrfs_abort_transaction(trans, ret);
1388	return ret;
1389}
1390
1391static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1392				   struct btrfs_path *path,
1393				   u32 expected_extent_count)
1394{
1395	struct btrfs_block_group *block_group;
1396	struct btrfs_fs_info *fs_info;
1397	struct btrfs_root *root;
1398	struct btrfs_key key;
1399	int prev_bit = 0, bit;
1400	/* Initialize to silence GCC. */
1401	u64 extent_start = 0;
1402	u64 end, offset;
1403	u64 total_found = 0;
1404	u32 extent_count = 0;
1405	int ret;
1406
1407	block_group = caching_ctl->block_group;
1408	fs_info = block_group->fs_info;
1409	root = fs_info->free_space_root;
1410
1411	end = block_group->start + block_group->length;
1412
1413	while (1) {
1414		ret = btrfs_next_item(root, path);
1415		if (ret < 0)
1416			goto out;
1417		if (ret)
1418			break;
1419
1420		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1421
1422		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1423			break;
1424
1425		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1426		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1427
1428		caching_ctl->progress = key.objectid;
1429
1430		offset = key.objectid;
1431		while (offset < key.objectid + key.offset) {
1432			bit = free_space_test_bit(block_group, path, offset);
1433			if (prev_bit == 0 && bit == 1) {
1434				extent_start = offset;
1435			} else if (prev_bit == 1 && bit == 0) {
1436				total_found += add_new_free_space(block_group,
1437								  extent_start,
1438								  offset);
1439				if (total_found > CACHING_CTL_WAKE_UP) {
1440					total_found = 0;
1441					wake_up(&caching_ctl->wait);
1442				}
1443				extent_count++;
1444			}
1445			prev_bit = bit;
1446			offset += fs_info->sectorsize;
1447		}
1448	}
1449	if (prev_bit == 1) {
1450		total_found += add_new_free_space(block_group, extent_start,
1451						  end);
1452		extent_count++;
1453	}
1454
1455	if (extent_count != expected_extent_count) {
1456		btrfs_err(fs_info,
1457			  "incorrect extent count for %llu; counted %u, expected %u",
1458			  block_group->start, extent_count,
1459			  expected_extent_count);
1460		ASSERT(0);
1461		ret = -EIO;
1462		goto out;
1463	}
1464
1465	caching_ctl->progress = (u64)-1;
1466
1467	ret = 0;
1468out:
1469	return ret;
1470}
1471
1472static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1473				   struct btrfs_path *path,
1474				   u32 expected_extent_count)
1475{
1476	struct btrfs_block_group *block_group;
1477	struct btrfs_fs_info *fs_info;
1478	struct btrfs_root *root;
1479	struct btrfs_key key;
1480	u64 end;
1481	u64 total_found = 0;
1482	u32 extent_count = 0;
1483	int ret;
1484
1485	block_group = caching_ctl->block_group;
1486	fs_info = block_group->fs_info;
1487	root = fs_info->free_space_root;
1488
1489	end = block_group->start + block_group->length;
1490
1491	while (1) {
1492		ret = btrfs_next_item(root, path);
1493		if (ret < 0)
1494			goto out;
1495		if (ret)
1496			break;
1497
1498		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1499
1500		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1501			break;
1502
1503		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1504		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1505
1506		caching_ctl->progress = key.objectid;
1507
1508		total_found += add_new_free_space(block_group, key.objectid,
1509						  key.objectid + key.offset);
1510		if (total_found > CACHING_CTL_WAKE_UP) {
1511			total_found = 0;
1512			wake_up(&caching_ctl->wait);
1513		}
1514		extent_count++;
1515	}
1516
1517	if (extent_count != expected_extent_count) {
1518		btrfs_err(fs_info,
1519			  "incorrect extent count for %llu; counted %u, expected %u",
1520			  block_group->start, extent_count,
1521			  expected_extent_count);
1522		ASSERT(0);
1523		ret = -EIO;
1524		goto out;
1525	}
1526
1527	caching_ctl->progress = (u64)-1;
1528
1529	ret = 0;
1530out:
1531	return ret;
1532}
1533
1534int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1535{
1536	struct btrfs_block_group *block_group;
1537	struct btrfs_free_space_info *info;
1538	struct btrfs_path *path;
1539	u32 extent_count, flags;
1540	int ret;
1541
1542	block_group = caching_ctl->block_group;
1543
1544	path = btrfs_alloc_path();
1545	if (!path)
1546		return -ENOMEM;
1547
1548	/*
1549	 * Just like caching_thread() doesn't want to deadlock on the extent
1550	 * tree, we don't want to deadlock on the free space tree.
1551	 */
1552	path->skip_locking = 1;
1553	path->search_commit_root = 1;
1554	path->reada = READA_FORWARD;
1555
1556	info = search_free_space_info(NULL, block_group, path, 0);
1557	if (IS_ERR(info)) {
1558		ret = PTR_ERR(info);
1559		goto out;
1560	}
1561	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1562	flags = btrfs_free_space_flags(path->nodes[0], info);
1563
1564	/*
1565	 * We left path pointing to the free space info item, so now
1566	 * load_free_space_foo can just iterate through the free space tree from
1567	 * there.
1568	 */
1569	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1570		ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1571	else
1572		ret = load_free_space_extents(caching_ctl, path, extent_count);
1573
1574out:
1575	btrfs_free_path(path);
1576	return ret;
1577}