Linux Audio

Check our new training course

Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2015 Facebook.  All rights reserved.
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/sched/mm.h>
   8#include "messages.h"
   9#include "ctree.h"
  10#include "disk-io.h"
  11#include "locking.h"
  12#include "free-space-tree.h"
  13#include "transaction.h"
  14#include "block-group.h"
  15#include "fs.h"
  16#include "accessors.h"
  17#include "extent-tree.h"
  18#include "root-tree.h"
  19
  20static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
  21					struct btrfs_block_group *block_group,
  22					struct btrfs_path *path);
  23
  24static struct btrfs_root *btrfs_free_space_root(
  25				struct btrfs_block_group *block_group)
  26{
  27	struct btrfs_key key = {
  28		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
  29		.type = BTRFS_ROOT_ITEM_KEY,
  30		.offset = 0,
  31	};
  32
  33	if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
  34		key.offset = block_group->global_root_id;
  35	return btrfs_global_root(block_group->fs_info, &key);
  36}
  37
  38void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
  39{
  40	u32 bitmap_range;
  41	size_t bitmap_size;
  42	u64 num_bitmaps, total_bitmap_size;
  43
  44	if (WARN_ON(cache->length == 0))
  45		btrfs_warn(cache->fs_info, "block group %llu length is zero",
  46			   cache->start);
  47
  48	/*
  49	 * We convert to bitmaps when the disk space required for using extents
  50	 * exceeds that required for using bitmaps.
  51	 */
  52	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
  53	num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
 
  54	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
  55	total_bitmap_size = num_bitmaps * bitmap_size;
  56	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
  57					    sizeof(struct btrfs_item));
  58
  59	/*
  60	 * We allow for a small buffer between the high threshold and low
  61	 * threshold to avoid thrashing back and forth between the two formats.
  62	 */
  63	if (cache->bitmap_high_thresh > 100)
  64		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
  65	else
  66		cache->bitmap_low_thresh = 0;
  67}
  68
  69static int add_new_free_space_info(struct btrfs_trans_handle *trans,
  70				   struct btrfs_block_group *block_group,
  71				   struct btrfs_path *path)
  72{
  73	struct btrfs_root *root = btrfs_free_space_root(block_group);
  74	struct btrfs_free_space_info *info;
  75	struct btrfs_key key;
  76	struct extent_buffer *leaf;
  77	int ret;
  78
  79	key.objectid = block_group->start;
  80	key.type = BTRFS_FREE_SPACE_INFO_KEY;
  81	key.offset = block_group->length;
  82
  83	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
  84	if (ret)
  85		goto out;
  86
  87	leaf = path->nodes[0];
  88	info = btrfs_item_ptr(leaf, path->slots[0],
  89			      struct btrfs_free_space_info);
  90	btrfs_set_free_space_extent_count(leaf, info, 0);
  91	btrfs_set_free_space_flags(leaf, info, 0);
  92	btrfs_mark_buffer_dirty(trans, leaf);
  93
  94	ret = 0;
  95out:
  96	btrfs_release_path(path);
  97	return ret;
  98}
  99
 100EXPORT_FOR_TESTS
 101struct btrfs_free_space_info *search_free_space_info(
 102		struct btrfs_trans_handle *trans,
 103		struct btrfs_block_group *block_group,
 104		struct btrfs_path *path, int cow)
 105{
 106	struct btrfs_fs_info *fs_info = block_group->fs_info;
 107	struct btrfs_root *root = btrfs_free_space_root(block_group);
 108	struct btrfs_key key;
 109	int ret;
 110
 111	key.objectid = block_group->start;
 112	key.type = BTRFS_FREE_SPACE_INFO_KEY;
 113	key.offset = block_group->length;
 114
 115	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
 116	if (ret < 0)
 117		return ERR_PTR(ret);
 118	if (ret != 0) {
 119		btrfs_warn(fs_info, "missing free space info for %llu",
 120			   block_group->start);
 121		ASSERT(0);
 122		return ERR_PTR(-ENOENT);
 123	}
 124
 125	return btrfs_item_ptr(path->nodes[0], path->slots[0],
 126			      struct btrfs_free_space_info);
 127}
 128
 129/*
 130 * btrfs_search_slot() but we're looking for the greatest key less than the
 131 * passed key.
 132 */
 133static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
 134				  struct btrfs_root *root,
 135				  struct btrfs_key *key, struct btrfs_path *p,
 136				  int ins_len, int cow)
 137{
 138	int ret;
 139
 140	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
 141	if (ret < 0)
 142		return ret;
 143
 144	if (ret == 0) {
 145		ASSERT(0);
 146		return -EIO;
 147	}
 148
 149	if (p->slots[0] == 0) {
 150		ASSERT(0);
 151		return -EIO;
 152	}
 153	p->slots[0]--;
 154
 155	return 0;
 156}
 157
 158static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
 159					 u64 size)
 160{
 161	return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
 162}
 163
 164static unsigned long *alloc_bitmap(u32 bitmap_size)
 165{
 166	unsigned long *ret;
 167	unsigned int nofs_flag;
 168	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
 169
 170	/*
 171	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
 172	 * into the filesystem as the free space bitmap can be modified in the
 173	 * critical section of a transaction commit.
 174	 *
 175	 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
 176	 * know that recursion is unsafe.
 177	 */
 178	nofs_flag = memalloc_nofs_save();
 179	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
 180	memalloc_nofs_restore(nofs_flag);
 181	return ret;
 182}
 183
 184static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
 185{
 186	u8 *p = ((u8 *)map) + BIT_BYTE(start);
 187	const unsigned int size = start + len;
 188	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
 189	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
 190
 191	while (len - bits_to_set >= 0) {
 192		*p |= mask_to_set;
 193		len -= bits_to_set;
 194		bits_to_set = BITS_PER_BYTE;
 195		mask_to_set = ~0;
 196		p++;
 197	}
 198	if (len) {
 199		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
 200		*p |= mask_to_set;
 201	}
 202}
 203
 204EXPORT_FOR_TESTS
 205int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 206				  struct btrfs_block_group *block_group,
 207				  struct btrfs_path *path)
 208{
 209	struct btrfs_fs_info *fs_info = trans->fs_info;
 210	struct btrfs_root *root = btrfs_free_space_root(block_group);
 211	struct btrfs_free_space_info *info;
 212	struct btrfs_key key, found_key;
 213	struct extent_buffer *leaf;
 214	unsigned long *bitmap;
 215	char *bitmap_cursor;
 216	u64 start, end;
 217	u64 bitmap_range, i;
 218	u32 bitmap_size, flags, expected_extent_count;
 219	u32 extent_count = 0;
 220	int done = 0, nr;
 221	int ret;
 222
 223	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
 
 224	bitmap = alloc_bitmap(bitmap_size);
 225	if (!bitmap) {
 226		ret = -ENOMEM;
 227		goto out;
 228	}
 229
 230	start = block_group->start;
 231	end = block_group->start + block_group->length;
 232
 233	key.objectid = end - 1;
 234	key.type = (u8)-1;
 235	key.offset = (u64)-1;
 236
 237	while (!done) {
 238		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 239		if (ret)
 240			goto out;
 241
 242		leaf = path->nodes[0];
 243		nr = 0;
 244		path->slots[0]++;
 245		while (path->slots[0] > 0) {
 246			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
 247
 248			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
 249				ASSERT(found_key.objectid == block_group->start);
 250				ASSERT(found_key.offset == block_group->length);
 251				done = 1;
 252				break;
 253			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
 254				u64 first, last;
 255
 256				ASSERT(found_key.objectid >= start);
 257				ASSERT(found_key.objectid < end);
 258				ASSERT(found_key.objectid + found_key.offset <= end);
 259
 260				first = div_u64(found_key.objectid - start,
 261						fs_info->sectorsize);
 262				last = div_u64(found_key.objectid + found_key.offset - start,
 263					       fs_info->sectorsize);
 264				le_bitmap_set(bitmap, first, last - first);
 265
 266				extent_count++;
 267				nr++;
 268				path->slots[0]--;
 269			} else {
 270				ASSERT(0);
 271			}
 272		}
 273
 274		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
 275		if (ret)
 276			goto out;
 277		btrfs_release_path(path);
 278	}
 279
 280	info = search_free_space_info(trans, block_group, path, 1);
 281	if (IS_ERR(info)) {
 282		ret = PTR_ERR(info);
 283		goto out;
 284	}
 285	leaf = path->nodes[0];
 286	flags = btrfs_free_space_flags(leaf, info);
 287	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
 288	btrfs_set_free_space_flags(leaf, info, flags);
 289	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
 290	btrfs_mark_buffer_dirty(trans, leaf);
 291	btrfs_release_path(path);
 292
 293	if (extent_count != expected_extent_count) {
 294		btrfs_err(fs_info,
 295			  "incorrect extent count for %llu; counted %u, expected %u",
 296			  block_group->start, extent_count,
 297			  expected_extent_count);
 298		ASSERT(0);
 299		ret = -EIO;
 300		goto out;
 301	}
 302
 303	bitmap_cursor = (char *)bitmap;
 304	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
 305	i = start;
 306	while (i < end) {
 307		unsigned long ptr;
 308		u64 extent_size;
 309		u32 data_size;
 310
 311		extent_size = min(end - i, bitmap_range);
 312		data_size = free_space_bitmap_size(fs_info, extent_size);
 
 313
 314		key.objectid = i;
 315		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
 316		key.offset = extent_size;
 317
 318		ret = btrfs_insert_empty_item(trans, root, path, &key,
 319					      data_size);
 320		if (ret)
 321			goto out;
 322
 323		leaf = path->nodes[0];
 324		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 325		write_extent_buffer(leaf, bitmap_cursor, ptr,
 326				    data_size);
 327		btrfs_mark_buffer_dirty(trans, leaf);
 328		btrfs_release_path(path);
 329
 330		i += extent_size;
 331		bitmap_cursor += data_size;
 332	}
 333
 334	ret = 0;
 335out:
 336	kvfree(bitmap);
 337	if (ret)
 338		btrfs_abort_transaction(trans, ret);
 339	return ret;
 340}
 341
 342EXPORT_FOR_TESTS
 343int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 344				  struct btrfs_block_group *block_group,
 345				  struct btrfs_path *path)
 346{
 347	struct btrfs_fs_info *fs_info = trans->fs_info;
 348	struct btrfs_root *root = btrfs_free_space_root(block_group);
 349	struct btrfs_free_space_info *info;
 350	struct btrfs_key key, found_key;
 351	struct extent_buffer *leaf;
 352	unsigned long *bitmap;
 353	u64 start, end;
 354	u32 bitmap_size, flags, expected_extent_count;
 355	unsigned long nrbits, start_bit, end_bit;
 356	u32 extent_count = 0;
 357	int done = 0, nr;
 358	int ret;
 359
 360	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
 
 361	bitmap = alloc_bitmap(bitmap_size);
 362	if (!bitmap) {
 363		ret = -ENOMEM;
 364		goto out;
 365	}
 366
 367	start = block_group->start;
 368	end = block_group->start + block_group->length;
 369
 370	key.objectid = end - 1;
 371	key.type = (u8)-1;
 372	key.offset = (u64)-1;
 373
 374	while (!done) {
 375		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 376		if (ret)
 377			goto out;
 378
 379		leaf = path->nodes[0];
 380		nr = 0;
 381		path->slots[0]++;
 382		while (path->slots[0] > 0) {
 383			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
 384
 385			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
 386				ASSERT(found_key.objectid == block_group->start);
 387				ASSERT(found_key.offset == block_group->length);
 388				done = 1;
 389				break;
 390			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
 391				unsigned long ptr;
 392				char *bitmap_cursor;
 393				u32 bitmap_pos, data_size;
 394
 395				ASSERT(found_key.objectid >= start);
 396				ASSERT(found_key.objectid < end);
 397				ASSERT(found_key.objectid + found_key.offset <= end);
 398
 399				bitmap_pos = div_u64(found_key.objectid - start,
 400						     fs_info->sectorsize *
 401						     BITS_PER_BYTE);
 402				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
 403				data_size = free_space_bitmap_size(fs_info,
 404								found_key.offset);
 405
 406				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
 407				read_extent_buffer(leaf, bitmap_cursor, ptr,
 408						   data_size);
 409
 410				nr++;
 411				path->slots[0]--;
 412			} else {
 413				ASSERT(0);
 414			}
 415		}
 416
 417		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
 418		if (ret)
 419			goto out;
 420		btrfs_release_path(path);
 421	}
 422
 423	info = search_free_space_info(trans, block_group, path, 1);
 424	if (IS_ERR(info)) {
 425		ret = PTR_ERR(info);
 426		goto out;
 427	}
 428	leaf = path->nodes[0];
 429	flags = btrfs_free_space_flags(leaf, info);
 430	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
 431	btrfs_set_free_space_flags(leaf, info, flags);
 432	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
 433	btrfs_mark_buffer_dirty(trans, leaf);
 434	btrfs_release_path(path);
 435
 436	nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
 437	start_bit = find_next_bit_le(bitmap, nrbits, 0);
 438
 439	while (start_bit < nrbits) {
 440		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
 441		ASSERT(start_bit < end_bit);
 442
 443		key.objectid = start + start_bit * block_group->fs_info->sectorsize;
 444		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 445		key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
 446
 447		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 448		if (ret)
 449			goto out;
 450		btrfs_release_path(path);
 451
 452		extent_count++;
 453
 454		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
 455	}
 456
 457	if (extent_count != expected_extent_count) {
 458		btrfs_err(fs_info,
 459			  "incorrect extent count for %llu; counted %u, expected %u",
 460			  block_group->start, extent_count,
 461			  expected_extent_count);
 462		ASSERT(0);
 463		ret = -EIO;
 464		goto out;
 465	}
 466
 467	ret = 0;
 468out:
 469	kvfree(bitmap);
 470	if (ret)
 471		btrfs_abort_transaction(trans, ret);
 472	return ret;
 473}
 474
 475static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
 476					  struct btrfs_block_group *block_group,
 477					  struct btrfs_path *path,
 478					  int new_extents)
 479{
 480	struct btrfs_free_space_info *info;
 481	u32 flags;
 482	u32 extent_count;
 483	int ret = 0;
 484
 485	if (new_extents == 0)
 486		return 0;
 487
 488	info = search_free_space_info(trans, block_group, path, 1);
 489	if (IS_ERR(info)) {
 490		ret = PTR_ERR(info);
 491		goto out;
 492	}
 493	flags = btrfs_free_space_flags(path->nodes[0], info);
 494	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
 495
 496	extent_count += new_extents;
 497	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
 498	btrfs_mark_buffer_dirty(trans, path->nodes[0]);
 499	btrfs_release_path(path);
 500
 501	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 502	    extent_count > block_group->bitmap_high_thresh) {
 503		ret = convert_free_space_to_bitmaps(trans, block_group, path);
 504	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 505		   extent_count < block_group->bitmap_low_thresh) {
 506		ret = convert_free_space_to_extents(trans, block_group, path);
 507	}
 508
 509out:
 510	return ret;
 511}
 512
 513EXPORT_FOR_TESTS
 514int free_space_test_bit(struct btrfs_block_group *block_group,
 515			struct btrfs_path *path, u64 offset)
 516{
 517	struct extent_buffer *leaf;
 518	struct btrfs_key key;
 519	u64 found_start, found_end;
 520	unsigned long ptr, i;
 521
 522	leaf = path->nodes[0];
 523	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 524	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
 525
 526	found_start = key.objectid;
 527	found_end = key.objectid + key.offset;
 528	ASSERT(offset >= found_start && offset < found_end);
 529
 530	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 531	i = div_u64(offset - found_start,
 532		    block_group->fs_info->sectorsize);
 533	return !!extent_buffer_test_bit(leaf, ptr, i);
 534}
 535
 536static void free_space_set_bits(struct btrfs_trans_handle *trans,
 537				struct btrfs_block_group *block_group,
 538				struct btrfs_path *path, u64 *start, u64 *size,
 539				int bit)
 540{
 541	struct btrfs_fs_info *fs_info = block_group->fs_info;
 542	struct extent_buffer *leaf;
 543	struct btrfs_key key;
 544	u64 end = *start + *size;
 545	u64 found_start, found_end;
 546	unsigned long ptr, first, last;
 547
 548	leaf = path->nodes[0];
 549	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 550	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
 551
 552	found_start = key.objectid;
 553	found_end = key.objectid + key.offset;
 554	ASSERT(*start >= found_start && *start < found_end);
 555	ASSERT(end > found_start);
 556
 557	if (end > found_end)
 558		end = found_end;
 559
 560	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 561	first = (*start - found_start) >> fs_info->sectorsize_bits;
 562	last = (end - found_start) >> fs_info->sectorsize_bits;
 563	if (bit)
 564		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
 565	else
 566		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
 567	btrfs_mark_buffer_dirty(trans, leaf);
 568
 569	*size -= end - *start;
 570	*start = end;
 571}
 572
 573/*
 574 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
 575 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
 576 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
 577 * looking for.
 578 */
 579static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
 580				  struct btrfs_root *root, struct btrfs_path *p)
 581{
 582	struct btrfs_key key;
 583
 584	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
 585		p->slots[0]++;
 586		return 0;
 587	}
 588
 589	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
 590	btrfs_release_path(p);
 591
 592	key.objectid += key.offset;
 593	key.type = (u8)-1;
 594	key.offset = (u64)-1;
 595
 596	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
 597}
 598
 599/*
 600 * If remove is 1, then we are removing free space, thus clearing bits in the
 601 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
 602 * the bitmap.
 603 */
 604static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
 605				    struct btrfs_block_group *block_group,
 606				    struct btrfs_path *path,
 607				    u64 start, u64 size, int remove)
 608{
 609	struct btrfs_root *root = btrfs_free_space_root(block_group);
 610	struct btrfs_key key;
 611	u64 end = start + size;
 612	u64 cur_start, cur_size;
 613	int prev_bit, next_bit;
 614	int new_extents;
 615	int ret;
 616
 617	/*
 618	 * Read the bit for the block immediately before the extent of space if
 619	 * that block is within the block group.
 620	 */
 621	if (start > block_group->start) {
 622		u64 prev_block = start - block_group->fs_info->sectorsize;
 623
 624		key.objectid = prev_block;
 625		key.type = (u8)-1;
 626		key.offset = (u64)-1;
 627
 628		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
 629		if (ret)
 630			goto out;
 631
 632		prev_bit = free_space_test_bit(block_group, path, prev_block);
 633
 634		/* The previous block may have been in the previous bitmap. */
 635		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 636		if (start >= key.objectid + key.offset) {
 637			ret = free_space_next_bitmap(trans, root, path);
 638			if (ret)
 639				goto out;
 640		}
 641	} else {
 642		key.objectid = start;
 643		key.type = (u8)-1;
 644		key.offset = (u64)-1;
 645
 646		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
 647		if (ret)
 648			goto out;
 649
 650		prev_bit = -1;
 651	}
 652
 653	/*
 654	 * Iterate over all of the bitmaps overlapped by the extent of space,
 655	 * clearing/setting bits as required.
 656	 */
 657	cur_start = start;
 658	cur_size = size;
 659	while (1) {
 660		free_space_set_bits(trans, block_group, path, &cur_start, &cur_size,
 661				    !remove);
 662		if (cur_size == 0)
 663			break;
 664		ret = free_space_next_bitmap(trans, root, path);
 665		if (ret)
 666			goto out;
 667	}
 668
 669	/*
 670	 * Read the bit for the block immediately after the extent of space if
 671	 * that block is within the block group.
 672	 */
 673	if (end < block_group->start + block_group->length) {
 674		/* The next block may be in the next bitmap. */
 675		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 676		if (end >= key.objectid + key.offset) {
 677			ret = free_space_next_bitmap(trans, root, path);
 678			if (ret)
 679				goto out;
 680		}
 681
 682		next_bit = free_space_test_bit(block_group, path, end);
 683	} else {
 684		next_bit = -1;
 685	}
 686
 687	if (remove) {
 688		new_extents = -1;
 689		if (prev_bit == 1) {
 690			/* Leftover on the left. */
 691			new_extents++;
 692		}
 693		if (next_bit == 1) {
 694			/* Leftover on the right. */
 695			new_extents++;
 696		}
 697	} else {
 698		new_extents = 1;
 699		if (prev_bit == 1) {
 700			/* Merging with neighbor on the left. */
 701			new_extents--;
 702		}
 703		if (next_bit == 1) {
 704			/* Merging with neighbor on the right. */
 705			new_extents--;
 706		}
 707	}
 708
 709	btrfs_release_path(path);
 710	ret = update_free_space_extent_count(trans, block_group, path,
 711					     new_extents);
 712
 713out:
 714	return ret;
 715}
 716
 717static int remove_free_space_extent(struct btrfs_trans_handle *trans,
 718				    struct btrfs_block_group *block_group,
 719				    struct btrfs_path *path,
 720				    u64 start, u64 size)
 721{
 722	struct btrfs_root *root = btrfs_free_space_root(block_group);
 723	struct btrfs_key key;
 724	u64 found_start, found_end;
 725	u64 end = start + size;
 726	int new_extents = -1;
 727	int ret;
 728
 729	key.objectid = start;
 730	key.type = (u8)-1;
 731	key.offset = (u64)-1;
 732
 733	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 734	if (ret)
 735		goto out;
 736
 737	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 738
 739	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
 740
 741	found_start = key.objectid;
 742	found_end = key.objectid + key.offset;
 743	ASSERT(start >= found_start && end <= found_end);
 744
 745	/*
 746	 * Okay, now that we've found the free space extent which contains the
 747	 * free space that we are removing, there are four cases:
 748	 *
 749	 * 1. We're using the whole extent: delete the key we found and
 750	 * decrement the free space extent count.
 751	 * 2. We are using part of the extent starting at the beginning: delete
 752	 * the key we found and insert a new key representing the leftover at
 753	 * the end. There is no net change in the number of extents.
 754	 * 3. We are using part of the extent ending at the end: delete the key
 755	 * we found and insert a new key representing the leftover at the
 756	 * beginning. There is no net change in the number of extents.
 757	 * 4. We are using part of the extent in the middle: delete the key we
 758	 * found and insert two new keys representing the leftovers on each
 759	 * side. Where we used to have one extent, we now have two, so increment
 760	 * the extent count. We may need to convert the block group to bitmaps
 761	 * as a result.
 762	 */
 763
 764	/* Delete the existing key (cases 1-4). */
 765	ret = btrfs_del_item(trans, root, path);
 766	if (ret)
 767		goto out;
 768
 769	/* Add a key for leftovers at the beginning (cases 3 and 4). */
 770	if (start > found_start) {
 771		key.objectid = found_start;
 772		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 773		key.offset = start - found_start;
 774
 775		btrfs_release_path(path);
 776		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 777		if (ret)
 778			goto out;
 779		new_extents++;
 780	}
 781
 782	/* Add a key for leftovers at the end (cases 2 and 4). */
 783	if (end < found_end) {
 784		key.objectid = end;
 785		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 786		key.offset = found_end - end;
 787
 788		btrfs_release_path(path);
 789		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 790		if (ret)
 791			goto out;
 792		new_extents++;
 793	}
 794
 795	btrfs_release_path(path);
 796	ret = update_free_space_extent_count(trans, block_group, path,
 797					     new_extents);
 798
 799out:
 800	return ret;
 801}
 802
 803EXPORT_FOR_TESTS
 804int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
 805				  struct btrfs_block_group *block_group,
 806				  struct btrfs_path *path, u64 start, u64 size)
 807{
 808	struct btrfs_free_space_info *info;
 809	u32 flags;
 810	int ret;
 811
 812	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
 813		ret = __add_block_group_free_space(trans, block_group, path);
 814		if (ret)
 815			return ret;
 816	}
 817
 818	info = search_free_space_info(NULL, block_group, path, 0);
 819	if (IS_ERR(info))
 820		return PTR_ERR(info);
 821	flags = btrfs_free_space_flags(path->nodes[0], info);
 822	btrfs_release_path(path);
 823
 824	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
 825		return modify_free_space_bitmap(trans, block_group, path,
 826						start, size, 1);
 827	} else {
 828		return remove_free_space_extent(trans, block_group, path,
 829						start, size);
 830	}
 831}
 832
 833int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
 834				u64 start, u64 size)
 835{
 836	struct btrfs_block_group *block_group;
 837	struct btrfs_path *path;
 838	int ret;
 839
 840	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
 841		return 0;
 842
 843	path = btrfs_alloc_path();
 844	if (!path) {
 845		ret = -ENOMEM;
 846		goto out;
 847	}
 848
 849	block_group = btrfs_lookup_block_group(trans->fs_info, start);
 850	if (!block_group) {
 851		ASSERT(0);
 852		ret = -ENOENT;
 853		goto out;
 854	}
 855
 856	mutex_lock(&block_group->free_space_lock);
 857	ret = __remove_from_free_space_tree(trans, block_group, path, start,
 858					    size);
 859	mutex_unlock(&block_group->free_space_lock);
 860
 861	btrfs_put_block_group(block_group);
 862out:
 863	btrfs_free_path(path);
 864	if (ret)
 865		btrfs_abort_transaction(trans, ret);
 866	return ret;
 867}
 868
 869static int add_free_space_extent(struct btrfs_trans_handle *trans,
 870				 struct btrfs_block_group *block_group,
 871				 struct btrfs_path *path,
 872				 u64 start, u64 size)
 873{
 874	struct btrfs_root *root = btrfs_free_space_root(block_group);
 875	struct btrfs_key key, new_key;
 876	u64 found_start, found_end;
 877	u64 end = start + size;
 878	int new_extents = 1;
 879	int ret;
 880
 881	/*
 882	 * We are adding a new extent of free space, but we need to merge
 883	 * extents. There are four cases here:
 884	 *
 885	 * 1. The new extent does not have any immediate neighbors to merge
 886	 * with: add the new key and increment the free space extent count. We
 887	 * may need to convert the block group to bitmaps as a result.
 888	 * 2. The new extent has an immediate neighbor before it: remove the
 889	 * previous key and insert a new key combining both of them. There is no
 890	 * net change in the number of extents.
 891	 * 3. The new extent has an immediate neighbor after it: remove the next
 892	 * key and insert a new key combining both of them. There is no net
 893	 * change in the number of extents.
 894	 * 4. The new extent has immediate neighbors on both sides: remove both
 895	 * of the keys and insert a new key combining all of them. Where we used
 896	 * to have two extents, we now have one, so decrement the extent count.
 897	 */
 898
 899	new_key.objectid = start;
 900	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 901	new_key.offset = size;
 902
 903	/* Search for a neighbor on the left. */
 904	if (start == block_group->start)
 905		goto right;
 906	key.objectid = start - 1;
 907	key.type = (u8)-1;
 908	key.offset = (u64)-1;
 909
 910	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 911	if (ret)
 912		goto out;
 913
 914	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 915
 916	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
 917		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
 918		btrfs_release_path(path);
 919		goto right;
 920	}
 921
 922	found_start = key.objectid;
 923	found_end = key.objectid + key.offset;
 924	ASSERT(found_start >= block_group->start &&
 925	       found_end > block_group->start);
 926	ASSERT(found_start < start && found_end <= start);
 927
 928	/*
 929	 * Delete the neighbor on the left and absorb it into the new key (cases
 930	 * 2 and 4).
 931	 */
 932	if (found_end == start) {
 933		ret = btrfs_del_item(trans, root, path);
 934		if (ret)
 935			goto out;
 936		new_key.objectid = found_start;
 937		new_key.offset += key.offset;
 938		new_extents--;
 939	}
 940	btrfs_release_path(path);
 941
 942right:
 943	/* Search for a neighbor on the right. */
 944	if (end == block_group->start + block_group->length)
 945		goto insert;
 946	key.objectid = end;
 947	key.type = (u8)-1;
 948	key.offset = (u64)-1;
 949
 950	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 951	if (ret)
 952		goto out;
 953
 954	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 955
 956	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
 957		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
 958		btrfs_release_path(path);
 959		goto insert;
 960	}
 961
 962	found_start = key.objectid;
 963	found_end = key.objectid + key.offset;
 964	ASSERT(found_start >= block_group->start &&
 965	       found_end > block_group->start);
 966	ASSERT((found_start < start && found_end <= start) ||
 967	       (found_start >= end && found_end > end));
 968
 969	/*
 970	 * Delete the neighbor on the right and absorb it into the new key
 971	 * (cases 3 and 4).
 972	 */
 973	if (found_start == end) {
 974		ret = btrfs_del_item(trans, root, path);
 975		if (ret)
 976			goto out;
 977		new_key.offset += key.offset;
 978		new_extents--;
 979	}
 980	btrfs_release_path(path);
 981
 982insert:
 983	/* Insert the new key (cases 1-4). */
 984	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
 985	if (ret)
 986		goto out;
 987
 988	btrfs_release_path(path);
 989	ret = update_free_space_extent_count(trans, block_group, path,
 990					     new_extents);
 991
 992out:
 993	return ret;
 994}
 995
 996EXPORT_FOR_TESTS
 997int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
 998			     struct btrfs_block_group *block_group,
 999			     struct btrfs_path *path, u64 start, u64 size)
1000{
1001	struct btrfs_free_space_info *info;
1002	u32 flags;
1003	int ret;
1004
1005	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1006		ret = __add_block_group_free_space(trans, block_group, path);
1007		if (ret)
1008			return ret;
1009	}
1010
1011	info = search_free_space_info(NULL, block_group, path, 0);
1012	if (IS_ERR(info))
1013		return PTR_ERR(info);
1014	flags = btrfs_free_space_flags(path->nodes[0], info);
1015	btrfs_release_path(path);
1016
1017	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
1018		return modify_free_space_bitmap(trans, block_group, path,
1019						start, size, 0);
1020	} else {
1021		return add_free_space_extent(trans, block_group, path, start,
1022					     size);
1023	}
1024}
1025
1026int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1027			   u64 start, u64 size)
1028{
1029	struct btrfs_block_group *block_group;
1030	struct btrfs_path *path;
1031	int ret;
1032
1033	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1034		return 0;
1035
1036	path = btrfs_alloc_path();
1037	if (!path) {
1038		ret = -ENOMEM;
1039		goto out;
1040	}
1041
1042	block_group = btrfs_lookup_block_group(trans->fs_info, start);
1043	if (!block_group) {
1044		ASSERT(0);
1045		ret = -ENOENT;
1046		goto out;
1047	}
1048
1049	mutex_lock(&block_group->free_space_lock);
1050	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
1051	mutex_unlock(&block_group->free_space_lock);
1052
1053	btrfs_put_block_group(block_group);
1054out:
1055	btrfs_free_path(path);
1056	if (ret)
1057		btrfs_abort_transaction(trans, ret);
1058	return ret;
1059}
1060
1061/*
1062 * Populate the free space tree by walking the extent tree. Operations on the
1063 * extent tree that happen as a result of writes to the free space tree will go
1064 * through the normal add/remove hooks.
1065 */
1066static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1067				    struct btrfs_block_group *block_group)
1068{
1069	struct btrfs_root *extent_root;
1070	struct btrfs_path *path, *path2;
1071	struct btrfs_key key;
1072	u64 start, end;
1073	int ret;
1074
1075	path = btrfs_alloc_path();
1076	if (!path)
1077		return -ENOMEM;
1078	path->reada = READA_FORWARD;
1079
1080	path2 = btrfs_alloc_path();
1081	if (!path2) {
1082		btrfs_free_path(path);
1083		return -ENOMEM;
1084	}
1085
1086	ret = add_new_free_space_info(trans, block_group, path2);
1087	if (ret)
1088		goto out;
1089
1090	mutex_lock(&block_group->free_space_lock);
1091
1092	/*
1093	 * Iterate through all of the extent and metadata items in this block
1094	 * group, adding the free space between them and the free space at the
1095	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1096	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1097	 * contained in.
1098	 */
1099	key.objectid = block_group->start;
1100	key.type = BTRFS_EXTENT_ITEM_KEY;
1101	key.offset = 0;
1102
1103	extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
1104	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1105	if (ret < 0)
1106		goto out_locked;
1107	ASSERT(ret == 0);
1108
1109	start = block_group->start;
1110	end = block_group->start + block_group->length;
1111	while (1) {
1112		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1113
1114		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1115		    key.type == BTRFS_METADATA_ITEM_KEY) {
1116			if (key.objectid >= end)
1117				break;
1118
1119			if (start < key.objectid) {
1120				ret = __add_to_free_space_tree(trans,
1121							       block_group,
1122							       path2, start,
1123							       key.objectid -
1124							       start);
1125				if (ret)
1126					goto out_locked;
1127			}
1128			start = key.objectid;
1129			if (key.type == BTRFS_METADATA_ITEM_KEY)
1130				start += trans->fs_info->nodesize;
1131			else
1132				start += key.offset;
1133		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1134			if (key.objectid != block_group->start)
1135				break;
1136		}
1137
1138		ret = btrfs_next_item(extent_root, path);
1139		if (ret < 0)
1140			goto out_locked;
1141		if (ret)
1142			break;
1143	}
1144	if (start < end) {
1145		ret = __add_to_free_space_tree(trans, block_group, path2,
1146					       start, end - start);
1147		if (ret)
1148			goto out_locked;
1149	}
1150
1151	ret = 0;
1152out_locked:
1153	mutex_unlock(&block_group->free_space_lock);
1154out:
1155	btrfs_free_path(path2);
1156	btrfs_free_path(path);
1157	return ret;
1158}
1159
1160int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1161{
1162	struct btrfs_trans_handle *trans;
1163	struct btrfs_root *tree_root = fs_info->tree_root;
1164	struct btrfs_root *free_space_root;
1165	struct btrfs_block_group *block_group;
1166	struct rb_node *node;
1167	int ret;
1168
1169	trans = btrfs_start_transaction(tree_root, 0);
1170	if (IS_ERR(trans))
1171		return PTR_ERR(trans);
1172
1173	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1174	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1175	free_space_root = btrfs_create_tree(trans,
1176					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1177	if (IS_ERR(free_space_root)) {
1178		ret = PTR_ERR(free_space_root);
1179		btrfs_abort_transaction(trans, ret);
1180		btrfs_end_transaction(trans);
1181		goto out_clear;
1182	}
1183	ret = btrfs_global_root_insert(free_space_root);
1184	if (ret) {
1185		btrfs_put_root(free_space_root);
1186		btrfs_abort_transaction(trans, ret);
1187		btrfs_end_transaction(trans);
1188		goto out_clear;
1189	}
 
1190
1191	node = rb_first_cached(&fs_info->block_group_cache_tree);
1192	while (node) {
1193		block_group = rb_entry(node, struct btrfs_block_group,
1194				       cache_node);
1195		ret = populate_free_space_tree(trans, block_group);
1196		if (ret) {
1197			btrfs_abort_transaction(trans, ret);
1198			btrfs_end_transaction(trans);
1199			goto out_clear;
1200		}
1201		node = rb_next(node);
1202	}
1203
1204	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1205	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1206	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1207	ret = btrfs_commit_transaction(trans);
1208
1209	/*
1210	 * Now that we've committed the transaction any reading of our commit
1211	 * root will be safe, so we can cache from the free space tree now.
1212	 */
1213	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1214	return ret;
1215
1216out_clear:
1217	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1218	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
 
1219	return ret;
1220}
1221
1222static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1223				 struct btrfs_root *root)
1224{
1225	struct btrfs_path *path;
1226	struct btrfs_key key;
1227	int nr;
1228	int ret;
1229
1230	path = btrfs_alloc_path();
1231	if (!path)
1232		return -ENOMEM;
1233
 
 
1234	key.objectid = 0;
1235	key.type = 0;
1236	key.offset = 0;
1237
1238	while (1) {
1239		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1240		if (ret < 0)
1241			goto out;
1242
1243		nr = btrfs_header_nritems(path->nodes[0]);
1244		if (!nr)
1245			break;
1246
1247		path->slots[0] = 0;
1248		ret = btrfs_del_items(trans, root, path, 0, nr);
1249		if (ret)
1250			goto out;
1251
1252		btrfs_release_path(path);
1253	}
1254
1255	ret = 0;
1256out:
1257	btrfs_free_path(path);
1258	return ret;
1259}
1260
1261int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
1262{
1263	struct btrfs_trans_handle *trans;
1264	struct btrfs_root *tree_root = fs_info->tree_root;
1265	struct btrfs_key key = {
1266		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1267		.type = BTRFS_ROOT_ITEM_KEY,
1268		.offset = 0,
1269	};
1270	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1271	int ret;
1272
1273	trans = btrfs_start_transaction(tree_root, 0);
1274	if (IS_ERR(trans))
1275		return PTR_ERR(trans);
1276
1277	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1278	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
 
1279
1280	ret = clear_free_space_tree(trans, free_space_root);
1281	if (ret) {
1282		btrfs_abort_transaction(trans, ret);
1283		btrfs_end_transaction(trans);
1284		return ret;
1285	}
1286
1287	ret = btrfs_del_root(trans, &free_space_root->root_key);
1288	if (ret) {
1289		btrfs_abort_transaction(trans, ret);
1290		btrfs_end_transaction(trans);
1291		return ret;
1292	}
1293
1294	btrfs_global_root_delete(free_space_root);
1295
1296	spin_lock(&fs_info->trans_lock);
1297	list_del(&free_space_root->dirty_list);
1298	spin_unlock(&fs_info->trans_lock);
1299
1300	btrfs_tree_lock(free_space_root->node);
1301	btrfs_clear_buffer_dirty(trans, free_space_root->node);
1302	btrfs_tree_unlock(free_space_root->node);
1303	ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1304				    free_space_root->node, 0, 1);
1305	btrfs_put_root(free_space_root);
1306	if (ret < 0) {
1307		btrfs_abort_transaction(trans, ret);
1308		btrfs_end_transaction(trans);
1309		return ret;
1310	}
1311
1312	return btrfs_commit_transaction(trans);
1313}
1314
1315int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
1316{
1317	struct btrfs_trans_handle *trans;
1318	struct btrfs_key key = {
1319		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1320		.type = BTRFS_ROOT_ITEM_KEY,
1321		.offset = 0,
1322	};
1323	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1324	struct rb_node *node;
1325	int ret;
1326
1327	trans = btrfs_start_transaction(free_space_root, 1);
1328	if (IS_ERR(trans))
1329		return PTR_ERR(trans);
1330
1331	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1332	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1333
1334	ret = clear_free_space_tree(trans, free_space_root);
1335	if (ret) {
1336		btrfs_abort_transaction(trans, ret);
1337		btrfs_end_transaction(trans);
1338		return ret;
1339	}
1340
1341	node = rb_first_cached(&fs_info->block_group_cache_tree);
1342	while (node) {
1343		struct btrfs_block_group *block_group;
1344
1345		block_group = rb_entry(node, struct btrfs_block_group,
1346				       cache_node);
1347		ret = populate_free_space_tree(trans, block_group);
1348		if (ret) {
1349			btrfs_abort_transaction(trans, ret);
1350			btrfs_end_transaction(trans);
1351			return ret;
1352		}
1353		node = rb_next(node);
1354	}
1355
1356	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1357	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1358	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1359
1360	ret = btrfs_commit_transaction(trans);
1361	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
 
1362	return ret;
1363}
1364
1365static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1366					struct btrfs_block_group *block_group,
1367					struct btrfs_path *path)
1368{
1369	int ret;
1370
1371	clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
1372
1373	ret = add_new_free_space_info(trans, block_group, path);
1374	if (ret)
1375		return ret;
1376
1377	return __add_to_free_space_tree(trans, block_group, path,
1378					block_group->start,
1379					block_group->length);
1380}
1381
1382int add_block_group_free_space(struct btrfs_trans_handle *trans,
1383			       struct btrfs_block_group *block_group)
1384{
1385	struct btrfs_fs_info *fs_info = trans->fs_info;
1386	struct btrfs_path *path = NULL;
1387	int ret = 0;
1388
1389	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1390		return 0;
1391
1392	mutex_lock(&block_group->free_space_lock);
1393	if (!test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags))
1394		goto out;
1395
1396	path = btrfs_alloc_path();
1397	if (!path) {
1398		ret = -ENOMEM;
1399		goto out;
1400	}
1401
1402	ret = __add_block_group_free_space(trans, block_group, path);
1403
1404out:
1405	btrfs_free_path(path);
1406	mutex_unlock(&block_group->free_space_lock);
1407	if (ret)
1408		btrfs_abort_transaction(trans, ret);
1409	return ret;
1410}
1411
1412int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1413				  struct btrfs_block_group *block_group)
1414{
1415	struct btrfs_root *root = btrfs_free_space_root(block_group);
1416	struct btrfs_path *path;
1417	struct btrfs_key key, found_key;
1418	struct extent_buffer *leaf;
1419	u64 start, end;
1420	int done = 0, nr;
1421	int ret;
1422
1423	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1424		return 0;
1425
1426	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1427		/* We never added this block group to the free space tree. */
1428		return 0;
1429	}
1430
1431	path = btrfs_alloc_path();
1432	if (!path) {
1433		ret = -ENOMEM;
1434		goto out;
1435	}
1436
1437	start = block_group->start;
1438	end = block_group->start + block_group->length;
1439
1440	key.objectid = end - 1;
1441	key.type = (u8)-1;
1442	key.offset = (u64)-1;
1443
1444	while (!done) {
1445		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1446		if (ret)
1447			goto out;
1448
1449		leaf = path->nodes[0];
1450		nr = 0;
1451		path->slots[0]++;
1452		while (path->slots[0] > 0) {
1453			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1454
1455			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1456				ASSERT(found_key.objectid == block_group->start);
1457				ASSERT(found_key.offset == block_group->length);
1458				done = 1;
1459				nr++;
1460				path->slots[0]--;
1461				break;
1462			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1463				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1464				ASSERT(found_key.objectid >= start);
1465				ASSERT(found_key.objectid < end);
1466				ASSERT(found_key.objectid + found_key.offset <= end);
1467				nr++;
1468				path->slots[0]--;
1469			} else {
1470				ASSERT(0);
1471			}
1472		}
1473
1474		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1475		if (ret)
1476			goto out;
1477		btrfs_release_path(path);
1478	}
1479
1480	ret = 0;
1481out:
1482	btrfs_free_path(path);
1483	if (ret)
1484		btrfs_abort_transaction(trans, ret);
1485	return ret;
1486}
1487
1488static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1489				   struct btrfs_path *path,
1490				   u32 expected_extent_count)
1491{
1492	struct btrfs_block_group *block_group;
1493	struct btrfs_fs_info *fs_info;
1494	struct btrfs_root *root;
1495	struct btrfs_key key;
1496	int prev_bit = 0, bit;
1497	/* Initialize to silence GCC. */
1498	u64 extent_start = 0;
1499	u64 end, offset;
1500	u64 total_found = 0;
1501	u32 extent_count = 0;
1502	int ret;
1503
1504	block_group = caching_ctl->block_group;
1505	fs_info = block_group->fs_info;
1506	root = btrfs_free_space_root(block_group);
1507
1508	end = block_group->start + block_group->length;
1509
1510	while (1) {
1511		ret = btrfs_next_item(root, path);
1512		if (ret < 0)
1513			goto out;
1514		if (ret)
1515			break;
1516
1517		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1518
1519		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1520			break;
1521
1522		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1523		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1524
 
 
1525		offset = key.objectid;
1526		while (offset < key.objectid + key.offset) {
1527			bit = free_space_test_bit(block_group, path, offset);
1528			if (prev_bit == 0 && bit == 1) {
1529				extent_start = offset;
1530			} else if (prev_bit == 1 && bit == 0) {
1531				u64 space_added;
1532
1533				ret = btrfs_add_new_free_space(block_group,
1534							       extent_start,
1535							       offset,
1536							       &space_added);
1537				if (ret)
1538					goto out;
1539				total_found += space_added;
1540				if (total_found > CACHING_CTL_WAKE_UP) {
1541					total_found = 0;
1542					wake_up(&caching_ctl->wait);
1543				}
1544				extent_count++;
1545			}
1546			prev_bit = bit;
1547			offset += fs_info->sectorsize;
1548		}
1549	}
1550	if (prev_bit == 1) {
1551		ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
1552		if (ret)
1553			goto out;
1554		extent_count++;
1555	}
1556
1557	if (extent_count != expected_extent_count) {
1558		btrfs_err(fs_info,
1559			  "incorrect extent count for %llu; counted %u, expected %u",
1560			  block_group->start, extent_count,
1561			  expected_extent_count);
1562		ASSERT(0);
1563		ret = -EIO;
1564		goto out;
1565	}
1566
 
 
1567	ret = 0;
1568out:
1569	return ret;
1570}
1571
1572static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1573				   struct btrfs_path *path,
1574				   u32 expected_extent_count)
1575{
1576	struct btrfs_block_group *block_group;
1577	struct btrfs_fs_info *fs_info;
1578	struct btrfs_root *root;
1579	struct btrfs_key key;
1580	u64 end;
1581	u64 total_found = 0;
1582	u32 extent_count = 0;
1583	int ret;
1584
1585	block_group = caching_ctl->block_group;
1586	fs_info = block_group->fs_info;
1587	root = btrfs_free_space_root(block_group);
1588
1589	end = block_group->start + block_group->length;
1590
1591	while (1) {
1592		u64 space_added;
1593
1594		ret = btrfs_next_item(root, path);
1595		if (ret < 0)
1596			goto out;
1597		if (ret)
1598			break;
1599
1600		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1601
1602		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1603			break;
1604
1605		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1606		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1607
1608		ret = btrfs_add_new_free_space(block_group, key.objectid,
1609					       key.objectid + key.offset,
1610					       &space_added);
1611		if (ret)
1612			goto out;
1613		total_found += space_added;
1614		if (total_found > CACHING_CTL_WAKE_UP) {
1615			total_found = 0;
1616			wake_up(&caching_ctl->wait);
1617		}
1618		extent_count++;
1619	}
1620
1621	if (extent_count != expected_extent_count) {
1622		btrfs_err(fs_info,
1623			  "incorrect extent count for %llu; counted %u, expected %u",
1624			  block_group->start, extent_count,
1625			  expected_extent_count);
1626		ASSERT(0);
1627		ret = -EIO;
1628		goto out;
1629	}
1630
 
 
1631	ret = 0;
1632out:
1633	return ret;
1634}
1635
1636int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1637{
1638	struct btrfs_block_group *block_group;
1639	struct btrfs_free_space_info *info;
1640	struct btrfs_path *path;
1641	u32 extent_count, flags;
1642	int ret;
1643
1644	block_group = caching_ctl->block_group;
1645
1646	path = btrfs_alloc_path();
1647	if (!path)
1648		return -ENOMEM;
1649
1650	/*
1651	 * Just like caching_thread() doesn't want to deadlock on the extent
1652	 * tree, we don't want to deadlock on the free space tree.
1653	 */
1654	path->skip_locking = 1;
1655	path->search_commit_root = 1;
1656	path->reada = READA_FORWARD;
1657
1658	info = search_free_space_info(NULL, block_group, path, 0);
1659	if (IS_ERR(info)) {
1660		ret = PTR_ERR(info);
1661		goto out;
1662	}
1663	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1664	flags = btrfs_free_space_flags(path->nodes[0], info);
1665
1666	/*
1667	 * We left path pointing to the free space info item, so now
1668	 * load_free_space_foo can just iterate through the free space tree from
1669	 * there.
1670	 */
1671	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1672		ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1673	else
1674		ret = load_free_space_extents(caching_ctl, path, extent_count);
1675
1676out:
1677	btrfs_free_path(path);
1678	return ret;
1679}
v5.4
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2015 Facebook.  All rights reserved.
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/sched/mm.h>
 
   8#include "ctree.h"
   9#include "disk-io.h"
  10#include "locking.h"
  11#include "free-space-tree.h"
  12#include "transaction.h"
  13#include "block-group.h"
 
 
 
 
  14
  15static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
  16					struct btrfs_block_group_cache *block_group,
  17					struct btrfs_path *path);
  18
  19void set_free_space_tree_thresholds(struct btrfs_block_group_cache *cache)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  20{
  21	u32 bitmap_range;
  22	size_t bitmap_size;
  23	u64 num_bitmaps, total_bitmap_size;
  24
 
 
 
 
  25	/*
  26	 * We convert to bitmaps when the disk space required for using extents
  27	 * exceeds that required for using bitmaps.
  28	 */
  29	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
  30	num_bitmaps = div_u64(cache->key.offset + bitmap_range - 1,
  31			      bitmap_range);
  32	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
  33	total_bitmap_size = num_bitmaps * bitmap_size;
  34	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
  35					    sizeof(struct btrfs_item));
  36
  37	/*
  38	 * We allow for a small buffer between the high threshold and low
  39	 * threshold to avoid thrashing back and forth between the two formats.
  40	 */
  41	if (cache->bitmap_high_thresh > 100)
  42		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
  43	else
  44		cache->bitmap_low_thresh = 0;
  45}
  46
  47static int add_new_free_space_info(struct btrfs_trans_handle *trans,
  48				   struct btrfs_block_group_cache *block_group,
  49				   struct btrfs_path *path)
  50{
  51	struct btrfs_root *root = trans->fs_info->free_space_root;
  52	struct btrfs_free_space_info *info;
  53	struct btrfs_key key;
  54	struct extent_buffer *leaf;
  55	int ret;
  56
  57	key.objectid = block_group->key.objectid;
  58	key.type = BTRFS_FREE_SPACE_INFO_KEY;
  59	key.offset = block_group->key.offset;
  60
  61	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
  62	if (ret)
  63		goto out;
  64
  65	leaf = path->nodes[0];
  66	info = btrfs_item_ptr(leaf, path->slots[0],
  67			      struct btrfs_free_space_info);
  68	btrfs_set_free_space_extent_count(leaf, info, 0);
  69	btrfs_set_free_space_flags(leaf, info, 0);
  70	btrfs_mark_buffer_dirty(leaf);
  71
  72	ret = 0;
  73out:
  74	btrfs_release_path(path);
  75	return ret;
  76}
  77
  78EXPORT_FOR_TESTS
  79struct btrfs_free_space_info *search_free_space_info(
  80		struct btrfs_trans_handle *trans,
  81		struct btrfs_block_group_cache *block_group,
  82		struct btrfs_path *path, int cow)
  83{
  84	struct btrfs_fs_info *fs_info = block_group->fs_info;
  85	struct btrfs_root *root = fs_info->free_space_root;
  86	struct btrfs_key key;
  87	int ret;
  88
  89	key.objectid = block_group->key.objectid;
  90	key.type = BTRFS_FREE_SPACE_INFO_KEY;
  91	key.offset = block_group->key.offset;
  92
  93	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
  94	if (ret < 0)
  95		return ERR_PTR(ret);
  96	if (ret != 0) {
  97		btrfs_warn(fs_info, "missing free space info for %llu",
  98			   block_group->key.objectid);
  99		ASSERT(0);
 100		return ERR_PTR(-ENOENT);
 101	}
 102
 103	return btrfs_item_ptr(path->nodes[0], path->slots[0],
 104			      struct btrfs_free_space_info);
 105}
 106
 107/*
 108 * btrfs_search_slot() but we're looking for the greatest key less than the
 109 * passed key.
 110 */
 111static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
 112				  struct btrfs_root *root,
 113				  struct btrfs_key *key, struct btrfs_path *p,
 114				  int ins_len, int cow)
 115{
 116	int ret;
 117
 118	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
 119	if (ret < 0)
 120		return ret;
 121
 122	if (ret == 0) {
 123		ASSERT(0);
 124		return -EIO;
 125	}
 126
 127	if (p->slots[0] == 0) {
 128		ASSERT(0);
 129		return -EIO;
 130	}
 131	p->slots[0]--;
 132
 133	return 0;
 134}
 135
 136static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
 
 137{
 138	return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
 139}
 140
 141static unsigned long *alloc_bitmap(u32 bitmap_size)
 142{
 143	unsigned long *ret;
 144	unsigned int nofs_flag;
 145	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
 146
 147	/*
 148	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
 149	 * into the filesystem as the free space bitmap can be modified in the
 150	 * critical section of a transaction commit.
 151	 *
 152	 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
 153	 * know that recursion is unsafe.
 154	 */
 155	nofs_flag = memalloc_nofs_save();
 156	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
 157	memalloc_nofs_restore(nofs_flag);
 158	return ret;
 159}
 160
 161static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
 162{
 163	u8 *p = ((u8 *)map) + BIT_BYTE(start);
 164	const unsigned int size = start + len;
 165	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
 166	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
 167
 168	while (len - bits_to_set >= 0) {
 169		*p |= mask_to_set;
 170		len -= bits_to_set;
 171		bits_to_set = BITS_PER_BYTE;
 172		mask_to_set = ~0;
 173		p++;
 174	}
 175	if (len) {
 176		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
 177		*p |= mask_to_set;
 178	}
 179}
 180
 181EXPORT_FOR_TESTS
 182int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 183				  struct btrfs_block_group_cache *block_group,
 184				  struct btrfs_path *path)
 185{
 186	struct btrfs_fs_info *fs_info = trans->fs_info;
 187	struct btrfs_root *root = fs_info->free_space_root;
 188	struct btrfs_free_space_info *info;
 189	struct btrfs_key key, found_key;
 190	struct extent_buffer *leaf;
 191	unsigned long *bitmap;
 192	char *bitmap_cursor;
 193	u64 start, end;
 194	u64 bitmap_range, i;
 195	u32 bitmap_size, flags, expected_extent_count;
 196	u32 extent_count = 0;
 197	int done = 0, nr;
 198	int ret;
 199
 200	bitmap_size = free_space_bitmap_size(block_group->key.offset,
 201					     fs_info->sectorsize);
 202	bitmap = alloc_bitmap(bitmap_size);
 203	if (!bitmap) {
 204		ret = -ENOMEM;
 205		goto out;
 206	}
 207
 208	start = block_group->key.objectid;
 209	end = block_group->key.objectid + block_group->key.offset;
 210
 211	key.objectid = end - 1;
 212	key.type = (u8)-1;
 213	key.offset = (u64)-1;
 214
 215	while (!done) {
 216		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 217		if (ret)
 218			goto out;
 219
 220		leaf = path->nodes[0];
 221		nr = 0;
 222		path->slots[0]++;
 223		while (path->slots[0] > 0) {
 224			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
 225
 226			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
 227				ASSERT(found_key.objectid == block_group->key.objectid);
 228				ASSERT(found_key.offset == block_group->key.offset);
 229				done = 1;
 230				break;
 231			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
 232				u64 first, last;
 233
 234				ASSERT(found_key.objectid >= start);
 235				ASSERT(found_key.objectid < end);
 236				ASSERT(found_key.objectid + found_key.offset <= end);
 237
 238				first = div_u64(found_key.objectid - start,
 239						fs_info->sectorsize);
 240				last = div_u64(found_key.objectid + found_key.offset - start,
 241					       fs_info->sectorsize);
 242				le_bitmap_set(bitmap, first, last - first);
 243
 244				extent_count++;
 245				nr++;
 246				path->slots[0]--;
 247			} else {
 248				ASSERT(0);
 249			}
 250		}
 251
 252		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
 253		if (ret)
 254			goto out;
 255		btrfs_release_path(path);
 256	}
 257
 258	info = search_free_space_info(trans, block_group, path, 1);
 259	if (IS_ERR(info)) {
 260		ret = PTR_ERR(info);
 261		goto out;
 262	}
 263	leaf = path->nodes[0];
 264	flags = btrfs_free_space_flags(leaf, info);
 265	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
 266	btrfs_set_free_space_flags(leaf, info, flags);
 267	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
 268	btrfs_mark_buffer_dirty(leaf);
 269	btrfs_release_path(path);
 270
 271	if (extent_count != expected_extent_count) {
 272		btrfs_err(fs_info,
 273			  "incorrect extent count for %llu; counted %u, expected %u",
 274			  block_group->key.objectid, extent_count,
 275			  expected_extent_count);
 276		ASSERT(0);
 277		ret = -EIO;
 278		goto out;
 279	}
 280
 281	bitmap_cursor = (char *)bitmap;
 282	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
 283	i = start;
 284	while (i < end) {
 285		unsigned long ptr;
 286		u64 extent_size;
 287		u32 data_size;
 288
 289		extent_size = min(end - i, bitmap_range);
 290		data_size = free_space_bitmap_size(extent_size,
 291						   fs_info->sectorsize);
 292
 293		key.objectid = i;
 294		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
 295		key.offset = extent_size;
 296
 297		ret = btrfs_insert_empty_item(trans, root, path, &key,
 298					      data_size);
 299		if (ret)
 300			goto out;
 301
 302		leaf = path->nodes[0];
 303		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 304		write_extent_buffer(leaf, bitmap_cursor, ptr,
 305				    data_size);
 306		btrfs_mark_buffer_dirty(leaf);
 307		btrfs_release_path(path);
 308
 309		i += extent_size;
 310		bitmap_cursor += data_size;
 311	}
 312
 313	ret = 0;
 314out:
 315	kvfree(bitmap);
 316	if (ret)
 317		btrfs_abort_transaction(trans, ret);
 318	return ret;
 319}
 320
 321EXPORT_FOR_TESTS
 322int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 323				  struct btrfs_block_group_cache *block_group,
 324				  struct btrfs_path *path)
 325{
 326	struct btrfs_fs_info *fs_info = trans->fs_info;
 327	struct btrfs_root *root = fs_info->free_space_root;
 328	struct btrfs_free_space_info *info;
 329	struct btrfs_key key, found_key;
 330	struct extent_buffer *leaf;
 331	unsigned long *bitmap;
 332	u64 start, end;
 333	u32 bitmap_size, flags, expected_extent_count;
 334	unsigned long nrbits, start_bit, end_bit;
 335	u32 extent_count = 0;
 336	int done = 0, nr;
 337	int ret;
 338
 339	bitmap_size = free_space_bitmap_size(block_group->key.offset,
 340					     fs_info->sectorsize);
 341	bitmap = alloc_bitmap(bitmap_size);
 342	if (!bitmap) {
 343		ret = -ENOMEM;
 344		goto out;
 345	}
 346
 347	start = block_group->key.objectid;
 348	end = block_group->key.objectid + block_group->key.offset;
 349
 350	key.objectid = end - 1;
 351	key.type = (u8)-1;
 352	key.offset = (u64)-1;
 353
 354	while (!done) {
 355		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 356		if (ret)
 357			goto out;
 358
 359		leaf = path->nodes[0];
 360		nr = 0;
 361		path->slots[0]++;
 362		while (path->slots[0] > 0) {
 363			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
 364
 365			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
 366				ASSERT(found_key.objectid == block_group->key.objectid);
 367				ASSERT(found_key.offset == block_group->key.offset);
 368				done = 1;
 369				break;
 370			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
 371				unsigned long ptr;
 372				char *bitmap_cursor;
 373				u32 bitmap_pos, data_size;
 374
 375				ASSERT(found_key.objectid >= start);
 376				ASSERT(found_key.objectid < end);
 377				ASSERT(found_key.objectid + found_key.offset <= end);
 378
 379				bitmap_pos = div_u64(found_key.objectid - start,
 380						     fs_info->sectorsize *
 381						     BITS_PER_BYTE);
 382				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
 383				data_size = free_space_bitmap_size(found_key.offset,
 384								   fs_info->sectorsize);
 385
 386				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
 387				read_extent_buffer(leaf, bitmap_cursor, ptr,
 388						   data_size);
 389
 390				nr++;
 391				path->slots[0]--;
 392			} else {
 393				ASSERT(0);
 394			}
 395		}
 396
 397		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
 398		if (ret)
 399			goto out;
 400		btrfs_release_path(path);
 401	}
 402
 403	info = search_free_space_info(trans, block_group, path, 1);
 404	if (IS_ERR(info)) {
 405		ret = PTR_ERR(info);
 406		goto out;
 407	}
 408	leaf = path->nodes[0];
 409	flags = btrfs_free_space_flags(leaf, info);
 410	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
 411	btrfs_set_free_space_flags(leaf, info, flags);
 412	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
 413	btrfs_mark_buffer_dirty(leaf);
 414	btrfs_release_path(path);
 415
 416	nrbits = div_u64(block_group->key.offset, block_group->fs_info->sectorsize);
 417	start_bit = find_next_bit_le(bitmap, nrbits, 0);
 418
 419	while (start_bit < nrbits) {
 420		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
 421		ASSERT(start_bit < end_bit);
 422
 423		key.objectid = start + start_bit * block_group->fs_info->sectorsize;
 424		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 425		key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
 426
 427		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 428		if (ret)
 429			goto out;
 430		btrfs_release_path(path);
 431
 432		extent_count++;
 433
 434		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
 435	}
 436
 437	if (extent_count != expected_extent_count) {
 438		btrfs_err(fs_info,
 439			  "incorrect extent count for %llu; counted %u, expected %u",
 440			  block_group->key.objectid, extent_count,
 441			  expected_extent_count);
 442		ASSERT(0);
 443		ret = -EIO;
 444		goto out;
 445	}
 446
 447	ret = 0;
 448out:
 449	kvfree(bitmap);
 450	if (ret)
 451		btrfs_abort_transaction(trans, ret);
 452	return ret;
 453}
 454
 455static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
 456					  struct btrfs_block_group_cache *block_group,
 457					  struct btrfs_path *path,
 458					  int new_extents)
 459{
 460	struct btrfs_free_space_info *info;
 461	u32 flags;
 462	u32 extent_count;
 463	int ret = 0;
 464
 465	if (new_extents == 0)
 466		return 0;
 467
 468	info = search_free_space_info(trans, block_group, path, 1);
 469	if (IS_ERR(info)) {
 470		ret = PTR_ERR(info);
 471		goto out;
 472	}
 473	flags = btrfs_free_space_flags(path->nodes[0], info);
 474	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
 475
 476	extent_count += new_extents;
 477	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
 478	btrfs_mark_buffer_dirty(path->nodes[0]);
 479	btrfs_release_path(path);
 480
 481	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 482	    extent_count > block_group->bitmap_high_thresh) {
 483		ret = convert_free_space_to_bitmaps(trans, block_group, path);
 484	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 485		   extent_count < block_group->bitmap_low_thresh) {
 486		ret = convert_free_space_to_extents(trans, block_group, path);
 487	}
 488
 489out:
 490	return ret;
 491}
 492
 493EXPORT_FOR_TESTS
 494int free_space_test_bit(struct btrfs_block_group_cache *block_group,
 495			struct btrfs_path *path, u64 offset)
 496{
 497	struct extent_buffer *leaf;
 498	struct btrfs_key key;
 499	u64 found_start, found_end;
 500	unsigned long ptr, i;
 501
 502	leaf = path->nodes[0];
 503	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 504	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
 505
 506	found_start = key.objectid;
 507	found_end = key.objectid + key.offset;
 508	ASSERT(offset >= found_start && offset < found_end);
 509
 510	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 511	i = div_u64(offset - found_start,
 512		    block_group->fs_info->sectorsize);
 513	return !!extent_buffer_test_bit(leaf, ptr, i);
 514}
 515
 516static void free_space_set_bits(struct btrfs_block_group_cache *block_group,
 
 517				struct btrfs_path *path, u64 *start, u64 *size,
 518				int bit)
 519{
 520	struct btrfs_fs_info *fs_info = block_group->fs_info;
 521	struct extent_buffer *leaf;
 522	struct btrfs_key key;
 523	u64 end = *start + *size;
 524	u64 found_start, found_end;
 525	unsigned long ptr, first, last;
 526
 527	leaf = path->nodes[0];
 528	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 529	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
 530
 531	found_start = key.objectid;
 532	found_end = key.objectid + key.offset;
 533	ASSERT(*start >= found_start && *start < found_end);
 534	ASSERT(end > found_start);
 535
 536	if (end > found_end)
 537		end = found_end;
 538
 539	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
 540	first = div_u64(*start - found_start, fs_info->sectorsize);
 541	last = div_u64(end - found_start, fs_info->sectorsize);
 542	if (bit)
 543		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
 544	else
 545		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
 546	btrfs_mark_buffer_dirty(leaf);
 547
 548	*size -= end - *start;
 549	*start = end;
 550}
 551
 552/*
 553 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
 554 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
 555 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
 556 * looking for.
 557 */
 558static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
 559				  struct btrfs_root *root, struct btrfs_path *p)
 560{
 561	struct btrfs_key key;
 562
 563	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
 564		p->slots[0]++;
 565		return 0;
 566	}
 567
 568	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
 569	btrfs_release_path(p);
 570
 571	key.objectid += key.offset;
 572	key.type = (u8)-1;
 573	key.offset = (u64)-1;
 574
 575	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
 576}
 577
 578/*
 579 * If remove is 1, then we are removing free space, thus clearing bits in the
 580 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
 581 * the bitmap.
 582 */
 583static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
 584				    struct btrfs_block_group_cache *block_group,
 585				    struct btrfs_path *path,
 586				    u64 start, u64 size, int remove)
 587{
 588	struct btrfs_root *root = block_group->fs_info->free_space_root;
 589	struct btrfs_key key;
 590	u64 end = start + size;
 591	u64 cur_start, cur_size;
 592	int prev_bit, next_bit;
 593	int new_extents;
 594	int ret;
 595
 596	/*
 597	 * Read the bit for the block immediately before the extent of space if
 598	 * that block is within the block group.
 599	 */
 600	if (start > block_group->key.objectid) {
 601		u64 prev_block = start - block_group->fs_info->sectorsize;
 602
 603		key.objectid = prev_block;
 604		key.type = (u8)-1;
 605		key.offset = (u64)-1;
 606
 607		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
 608		if (ret)
 609			goto out;
 610
 611		prev_bit = free_space_test_bit(block_group, path, prev_block);
 612
 613		/* The previous block may have been in the previous bitmap. */
 614		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 615		if (start >= key.objectid + key.offset) {
 616			ret = free_space_next_bitmap(trans, root, path);
 617			if (ret)
 618				goto out;
 619		}
 620	} else {
 621		key.objectid = start;
 622		key.type = (u8)-1;
 623		key.offset = (u64)-1;
 624
 625		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
 626		if (ret)
 627			goto out;
 628
 629		prev_bit = -1;
 630	}
 631
 632	/*
 633	 * Iterate over all of the bitmaps overlapped by the extent of space,
 634	 * clearing/setting bits as required.
 635	 */
 636	cur_start = start;
 637	cur_size = size;
 638	while (1) {
 639		free_space_set_bits(block_group, path, &cur_start, &cur_size,
 640				    !remove);
 641		if (cur_size == 0)
 642			break;
 643		ret = free_space_next_bitmap(trans, root, path);
 644		if (ret)
 645			goto out;
 646	}
 647
 648	/*
 649	 * Read the bit for the block immediately after the extent of space if
 650	 * that block is within the block group.
 651	 */
 652	if (end < block_group->key.objectid + block_group->key.offset) {
 653		/* The next block may be in the next bitmap. */
 654		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 655		if (end >= key.objectid + key.offset) {
 656			ret = free_space_next_bitmap(trans, root, path);
 657			if (ret)
 658				goto out;
 659		}
 660
 661		next_bit = free_space_test_bit(block_group, path, end);
 662	} else {
 663		next_bit = -1;
 664	}
 665
 666	if (remove) {
 667		new_extents = -1;
 668		if (prev_bit == 1) {
 669			/* Leftover on the left. */
 670			new_extents++;
 671		}
 672		if (next_bit == 1) {
 673			/* Leftover on the right. */
 674			new_extents++;
 675		}
 676	} else {
 677		new_extents = 1;
 678		if (prev_bit == 1) {
 679			/* Merging with neighbor on the left. */
 680			new_extents--;
 681		}
 682		if (next_bit == 1) {
 683			/* Merging with neighbor on the right. */
 684			new_extents--;
 685		}
 686	}
 687
 688	btrfs_release_path(path);
 689	ret = update_free_space_extent_count(trans, block_group, path,
 690					     new_extents);
 691
 692out:
 693	return ret;
 694}
 695
 696static int remove_free_space_extent(struct btrfs_trans_handle *trans,
 697				    struct btrfs_block_group_cache *block_group,
 698				    struct btrfs_path *path,
 699				    u64 start, u64 size)
 700{
 701	struct btrfs_root *root = trans->fs_info->free_space_root;
 702	struct btrfs_key key;
 703	u64 found_start, found_end;
 704	u64 end = start + size;
 705	int new_extents = -1;
 706	int ret;
 707
 708	key.objectid = start;
 709	key.type = (u8)-1;
 710	key.offset = (u64)-1;
 711
 712	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 713	if (ret)
 714		goto out;
 715
 716	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 717
 718	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
 719
 720	found_start = key.objectid;
 721	found_end = key.objectid + key.offset;
 722	ASSERT(start >= found_start && end <= found_end);
 723
 724	/*
 725	 * Okay, now that we've found the free space extent which contains the
 726	 * free space that we are removing, there are four cases:
 727	 *
 728	 * 1. We're using the whole extent: delete the key we found and
 729	 * decrement the free space extent count.
 730	 * 2. We are using part of the extent starting at the beginning: delete
 731	 * the key we found and insert a new key representing the leftover at
 732	 * the end. There is no net change in the number of extents.
 733	 * 3. We are using part of the extent ending at the end: delete the key
 734	 * we found and insert a new key representing the leftover at the
 735	 * beginning. There is no net change in the number of extents.
 736	 * 4. We are using part of the extent in the middle: delete the key we
 737	 * found and insert two new keys representing the leftovers on each
 738	 * side. Where we used to have one extent, we now have two, so increment
 739	 * the extent count. We may need to convert the block group to bitmaps
 740	 * as a result.
 741	 */
 742
 743	/* Delete the existing key (cases 1-4). */
 744	ret = btrfs_del_item(trans, root, path);
 745	if (ret)
 746		goto out;
 747
 748	/* Add a key for leftovers at the beginning (cases 3 and 4). */
 749	if (start > found_start) {
 750		key.objectid = found_start;
 751		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 752		key.offset = start - found_start;
 753
 754		btrfs_release_path(path);
 755		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 756		if (ret)
 757			goto out;
 758		new_extents++;
 759	}
 760
 761	/* Add a key for leftovers at the end (cases 2 and 4). */
 762	if (end < found_end) {
 763		key.objectid = end;
 764		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 765		key.offset = found_end - end;
 766
 767		btrfs_release_path(path);
 768		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 769		if (ret)
 770			goto out;
 771		new_extents++;
 772	}
 773
 774	btrfs_release_path(path);
 775	ret = update_free_space_extent_count(trans, block_group, path,
 776					     new_extents);
 777
 778out:
 779	return ret;
 780}
 781
 782EXPORT_FOR_TESTS
 783int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
 784				  struct btrfs_block_group_cache *block_group,
 785				  struct btrfs_path *path, u64 start, u64 size)
 786{
 787	struct btrfs_free_space_info *info;
 788	u32 flags;
 789	int ret;
 790
 791	if (block_group->needs_free_space) {
 792		ret = __add_block_group_free_space(trans, block_group, path);
 793		if (ret)
 794			return ret;
 795	}
 796
 797	info = search_free_space_info(NULL, block_group, path, 0);
 798	if (IS_ERR(info))
 799		return PTR_ERR(info);
 800	flags = btrfs_free_space_flags(path->nodes[0], info);
 801	btrfs_release_path(path);
 802
 803	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
 804		return modify_free_space_bitmap(trans, block_group, path,
 805						start, size, 1);
 806	} else {
 807		return remove_free_space_extent(trans, block_group, path,
 808						start, size);
 809	}
 810}
 811
 812int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
 813				u64 start, u64 size)
 814{
 815	struct btrfs_block_group_cache *block_group;
 816	struct btrfs_path *path;
 817	int ret;
 818
 819	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
 820		return 0;
 821
 822	path = btrfs_alloc_path();
 823	if (!path) {
 824		ret = -ENOMEM;
 825		goto out;
 826	}
 827
 828	block_group = btrfs_lookup_block_group(trans->fs_info, start);
 829	if (!block_group) {
 830		ASSERT(0);
 831		ret = -ENOENT;
 832		goto out;
 833	}
 834
 835	mutex_lock(&block_group->free_space_lock);
 836	ret = __remove_from_free_space_tree(trans, block_group, path, start,
 837					    size);
 838	mutex_unlock(&block_group->free_space_lock);
 839
 840	btrfs_put_block_group(block_group);
 841out:
 842	btrfs_free_path(path);
 843	if (ret)
 844		btrfs_abort_transaction(trans, ret);
 845	return ret;
 846}
 847
 848static int add_free_space_extent(struct btrfs_trans_handle *trans,
 849				 struct btrfs_block_group_cache *block_group,
 850				 struct btrfs_path *path,
 851				 u64 start, u64 size)
 852{
 853	struct btrfs_root *root = trans->fs_info->free_space_root;
 854	struct btrfs_key key, new_key;
 855	u64 found_start, found_end;
 856	u64 end = start + size;
 857	int new_extents = 1;
 858	int ret;
 859
 860	/*
 861	 * We are adding a new extent of free space, but we need to merge
 862	 * extents. There are four cases here:
 863	 *
 864	 * 1. The new extent does not have any immediate neighbors to merge
 865	 * with: add the new key and increment the free space extent count. We
 866	 * may need to convert the block group to bitmaps as a result.
 867	 * 2. The new extent has an immediate neighbor before it: remove the
 868	 * previous key and insert a new key combining both of them. There is no
 869	 * net change in the number of extents.
 870	 * 3. The new extent has an immediate neighbor after it: remove the next
 871	 * key and insert a new key combining both of them. There is no net
 872	 * change in the number of extents.
 873	 * 4. The new extent has immediate neighbors on both sides: remove both
 874	 * of the keys and insert a new key combining all of them. Where we used
 875	 * to have two extents, we now have one, so decrement the extent count.
 876	 */
 877
 878	new_key.objectid = start;
 879	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
 880	new_key.offset = size;
 881
 882	/* Search for a neighbor on the left. */
 883	if (start == block_group->key.objectid)
 884		goto right;
 885	key.objectid = start - 1;
 886	key.type = (u8)-1;
 887	key.offset = (u64)-1;
 888
 889	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 890	if (ret)
 891		goto out;
 892
 893	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 894
 895	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
 896		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
 897		btrfs_release_path(path);
 898		goto right;
 899	}
 900
 901	found_start = key.objectid;
 902	found_end = key.objectid + key.offset;
 903	ASSERT(found_start >= block_group->key.objectid &&
 904	       found_end > block_group->key.objectid);
 905	ASSERT(found_start < start && found_end <= start);
 906
 907	/*
 908	 * Delete the neighbor on the left and absorb it into the new key (cases
 909	 * 2 and 4).
 910	 */
 911	if (found_end == start) {
 912		ret = btrfs_del_item(trans, root, path);
 913		if (ret)
 914			goto out;
 915		new_key.objectid = found_start;
 916		new_key.offset += key.offset;
 917		new_extents--;
 918	}
 919	btrfs_release_path(path);
 920
 921right:
 922	/* Search for a neighbor on the right. */
 923	if (end == block_group->key.objectid + block_group->key.offset)
 924		goto insert;
 925	key.objectid = end;
 926	key.type = (u8)-1;
 927	key.offset = (u64)-1;
 928
 929	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
 930	if (ret)
 931		goto out;
 932
 933	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 934
 935	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
 936		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
 937		btrfs_release_path(path);
 938		goto insert;
 939	}
 940
 941	found_start = key.objectid;
 942	found_end = key.objectid + key.offset;
 943	ASSERT(found_start >= block_group->key.objectid &&
 944	       found_end > block_group->key.objectid);
 945	ASSERT((found_start < start && found_end <= start) ||
 946	       (found_start >= end && found_end > end));
 947
 948	/*
 949	 * Delete the neighbor on the right and absorb it into the new key
 950	 * (cases 3 and 4).
 951	 */
 952	if (found_start == end) {
 953		ret = btrfs_del_item(trans, root, path);
 954		if (ret)
 955			goto out;
 956		new_key.offset += key.offset;
 957		new_extents--;
 958	}
 959	btrfs_release_path(path);
 960
 961insert:
 962	/* Insert the new key (cases 1-4). */
 963	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
 964	if (ret)
 965		goto out;
 966
 967	btrfs_release_path(path);
 968	ret = update_free_space_extent_count(trans, block_group, path,
 969					     new_extents);
 970
 971out:
 972	return ret;
 973}
 974
 975EXPORT_FOR_TESTS
 976int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
 977			     struct btrfs_block_group_cache *block_group,
 978			     struct btrfs_path *path, u64 start, u64 size)
 979{
 980	struct btrfs_free_space_info *info;
 981	u32 flags;
 982	int ret;
 983
 984	if (block_group->needs_free_space) {
 985		ret = __add_block_group_free_space(trans, block_group, path);
 986		if (ret)
 987			return ret;
 988	}
 989
 990	info = search_free_space_info(NULL, block_group, path, 0);
 991	if (IS_ERR(info))
 992		return PTR_ERR(info);
 993	flags = btrfs_free_space_flags(path->nodes[0], info);
 994	btrfs_release_path(path);
 995
 996	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
 997		return modify_free_space_bitmap(trans, block_group, path,
 998						start, size, 0);
 999	} else {
1000		return add_free_space_extent(trans, block_group, path, start,
1001					     size);
1002	}
1003}
1004
1005int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1006			   u64 start, u64 size)
1007{
1008	struct btrfs_block_group_cache *block_group;
1009	struct btrfs_path *path;
1010	int ret;
1011
1012	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1013		return 0;
1014
1015	path = btrfs_alloc_path();
1016	if (!path) {
1017		ret = -ENOMEM;
1018		goto out;
1019	}
1020
1021	block_group = btrfs_lookup_block_group(trans->fs_info, start);
1022	if (!block_group) {
1023		ASSERT(0);
1024		ret = -ENOENT;
1025		goto out;
1026	}
1027
1028	mutex_lock(&block_group->free_space_lock);
1029	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
1030	mutex_unlock(&block_group->free_space_lock);
1031
1032	btrfs_put_block_group(block_group);
1033out:
1034	btrfs_free_path(path);
1035	if (ret)
1036		btrfs_abort_transaction(trans, ret);
1037	return ret;
1038}
1039
1040/*
1041 * Populate the free space tree by walking the extent tree. Operations on the
1042 * extent tree that happen as a result of writes to the free space tree will go
1043 * through the normal add/remove hooks.
1044 */
1045static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1046				    struct btrfs_block_group_cache *block_group)
1047{
1048	struct btrfs_root *extent_root = trans->fs_info->extent_root;
1049	struct btrfs_path *path, *path2;
1050	struct btrfs_key key;
1051	u64 start, end;
1052	int ret;
1053
1054	path = btrfs_alloc_path();
1055	if (!path)
1056		return -ENOMEM;
1057	path->reada = READA_FORWARD;
1058
1059	path2 = btrfs_alloc_path();
1060	if (!path2) {
1061		btrfs_free_path(path);
1062		return -ENOMEM;
1063	}
1064
1065	ret = add_new_free_space_info(trans, block_group, path2);
1066	if (ret)
1067		goto out;
1068
1069	mutex_lock(&block_group->free_space_lock);
1070
1071	/*
1072	 * Iterate through all of the extent and metadata items in this block
1073	 * group, adding the free space between them and the free space at the
1074	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1075	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1076	 * contained in.
1077	 */
1078	key.objectid = block_group->key.objectid;
1079	key.type = BTRFS_EXTENT_ITEM_KEY;
1080	key.offset = 0;
1081
 
1082	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1083	if (ret < 0)
1084		goto out_locked;
1085	ASSERT(ret == 0);
1086
1087	start = block_group->key.objectid;
1088	end = block_group->key.objectid + block_group->key.offset;
1089	while (1) {
1090		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1091
1092		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1093		    key.type == BTRFS_METADATA_ITEM_KEY) {
1094			if (key.objectid >= end)
1095				break;
1096
1097			if (start < key.objectid) {
1098				ret = __add_to_free_space_tree(trans,
1099							       block_group,
1100							       path2, start,
1101							       key.objectid -
1102							       start);
1103				if (ret)
1104					goto out_locked;
1105			}
1106			start = key.objectid;
1107			if (key.type == BTRFS_METADATA_ITEM_KEY)
1108				start += trans->fs_info->nodesize;
1109			else
1110				start += key.offset;
1111		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1112			if (key.objectid != block_group->key.objectid)
1113				break;
1114		}
1115
1116		ret = btrfs_next_item(extent_root, path);
1117		if (ret < 0)
1118			goto out_locked;
1119		if (ret)
1120			break;
1121	}
1122	if (start < end) {
1123		ret = __add_to_free_space_tree(trans, block_group, path2,
1124					       start, end - start);
1125		if (ret)
1126			goto out_locked;
1127	}
1128
1129	ret = 0;
1130out_locked:
1131	mutex_unlock(&block_group->free_space_lock);
1132out:
1133	btrfs_free_path(path2);
1134	btrfs_free_path(path);
1135	return ret;
1136}
1137
1138int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1139{
1140	struct btrfs_trans_handle *trans;
1141	struct btrfs_root *tree_root = fs_info->tree_root;
1142	struct btrfs_root *free_space_root;
1143	struct btrfs_block_group_cache *block_group;
1144	struct rb_node *node;
1145	int ret;
1146
1147	trans = btrfs_start_transaction(tree_root, 0);
1148	if (IS_ERR(trans))
1149		return PTR_ERR(trans);
1150
1151	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
 
1152	free_space_root = btrfs_create_tree(trans,
1153					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1154	if (IS_ERR(free_space_root)) {
1155		ret = PTR_ERR(free_space_root);
1156		goto abort;
 
 
 
 
 
 
 
 
 
1157	}
1158	fs_info->free_space_root = free_space_root;
1159
1160	node = rb_first(&fs_info->block_group_cache_tree);
1161	while (node) {
1162		block_group = rb_entry(node, struct btrfs_block_group_cache,
1163				       cache_node);
1164		ret = populate_free_space_tree(trans, block_group);
1165		if (ret)
1166			goto abort;
 
 
 
1167		node = rb_next(node);
1168	}
1169
1170	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1171	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1172	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
 
1173
1174	return btrfs_commit_transaction(trans);
 
 
 
 
 
1175
1176abort:
1177	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1178	btrfs_abort_transaction(trans, ret);
1179	btrfs_end_transaction(trans);
1180	return ret;
1181}
1182
1183static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1184				 struct btrfs_root *root)
1185{
1186	struct btrfs_path *path;
1187	struct btrfs_key key;
1188	int nr;
1189	int ret;
1190
1191	path = btrfs_alloc_path();
1192	if (!path)
1193		return -ENOMEM;
1194
1195	path->leave_spinning = 1;
1196
1197	key.objectid = 0;
1198	key.type = 0;
1199	key.offset = 0;
1200
1201	while (1) {
1202		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1203		if (ret < 0)
1204			goto out;
1205
1206		nr = btrfs_header_nritems(path->nodes[0]);
1207		if (!nr)
1208			break;
1209
1210		path->slots[0] = 0;
1211		ret = btrfs_del_items(trans, root, path, 0, nr);
1212		if (ret)
1213			goto out;
1214
1215		btrfs_release_path(path);
1216	}
1217
1218	ret = 0;
1219out:
1220	btrfs_free_path(path);
1221	return ret;
1222}
1223
1224int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
1225{
1226	struct btrfs_trans_handle *trans;
1227	struct btrfs_root *tree_root = fs_info->tree_root;
1228	struct btrfs_root *free_space_root = fs_info->free_space_root;
 
 
 
 
 
1229	int ret;
1230
1231	trans = btrfs_start_transaction(tree_root, 0);
1232	if (IS_ERR(trans))
1233		return PTR_ERR(trans);
1234
1235	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1236	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1237	fs_info->free_space_root = NULL;
1238
1239	ret = clear_free_space_tree(trans, free_space_root);
1240	if (ret)
1241		goto abort;
 
 
 
1242
1243	ret = btrfs_del_root(trans, &free_space_root->root_key);
1244	if (ret)
1245		goto abort;
 
 
 
 
 
1246
 
1247	list_del(&free_space_root->dirty_list);
 
1248
1249	btrfs_tree_lock(free_space_root->node);
1250	btrfs_clean_tree_block(free_space_root->node);
1251	btrfs_tree_unlock(free_space_root->node);
1252	btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
1253			      0, 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1254
1255	free_extent_buffer(free_space_root->node);
1256	free_extent_buffer(free_space_root->commit_root);
1257	kfree(free_space_root);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1258
1259	return btrfs_commit_transaction(trans);
 
 
1260
1261abort:
1262	btrfs_abort_transaction(trans, ret);
1263	btrfs_end_transaction(trans);
1264	return ret;
1265}
1266
1267static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1268					struct btrfs_block_group_cache *block_group,
1269					struct btrfs_path *path)
1270{
1271	int ret;
1272
1273	block_group->needs_free_space = 0;
1274
1275	ret = add_new_free_space_info(trans, block_group, path);
1276	if (ret)
1277		return ret;
1278
1279	return __add_to_free_space_tree(trans, block_group, path,
1280					block_group->key.objectid,
1281					block_group->key.offset);
1282}
1283
1284int add_block_group_free_space(struct btrfs_trans_handle *trans,
1285			       struct btrfs_block_group_cache *block_group)
1286{
1287	struct btrfs_fs_info *fs_info = trans->fs_info;
1288	struct btrfs_path *path = NULL;
1289	int ret = 0;
1290
1291	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1292		return 0;
1293
1294	mutex_lock(&block_group->free_space_lock);
1295	if (!block_group->needs_free_space)
1296		goto out;
1297
1298	path = btrfs_alloc_path();
1299	if (!path) {
1300		ret = -ENOMEM;
1301		goto out;
1302	}
1303
1304	ret = __add_block_group_free_space(trans, block_group, path);
1305
1306out:
1307	btrfs_free_path(path);
1308	mutex_unlock(&block_group->free_space_lock);
1309	if (ret)
1310		btrfs_abort_transaction(trans, ret);
1311	return ret;
1312}
1313
1314int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1315				  struct btrfs_block_group_cache *block_group)
1316{
1317	struct btrfs_root *root = trans->fs_info->free_space_root;
1318	struct btrfs_path *path;
1319	struct btrfs_key key, found_key;
1320	struct extent_buffer *leaf;
1321	u64 start, end;
1322	int done = 0, nr;
1323	int ret;
1324
1325	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1326		return 0;
1327
1328	if (block_group->needs_free_space) {
1329		/* We never added this block group to the free space tree. */
1330		return 0;
1331	}
1332
1333	path = btrfs_alloc_path();
1334	if (!path) {
1335		ret = -ENOMEM;
1336		goto out;
1337	}
1338
1339	start = block_group->key.objectid;
1340	end = block_group->key.objectid + block_group->key.offset;
1341
1342	key.objectid = end - 1;
1343	key.type = (u8)-1;
1344	key.offset = (u64)-1;
1345
1346	while (!done) {
1347		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1348		if (ret)
1349			goto out;
1350
1351		leaf = path->nodes[0];
1352		nr = 0;
1353		path->slots[0]++;
1354		while (path->slots[0] > 0) {
1355			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1356
1357			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1358				ASSERT(found_key.objectid == block_group->key.objectid);
1359				ASSERT(found_key.offset == block_group->key.offset);
1360				done = 1;
1361				nr++;
1362				path->slots[0]--;
1363				break;
1364			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1365				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1366				ASSERT(found_key.objectid >= start);
1367				ASSERT(found_key.objectid < end);
1368				ASSERT(found_key.objectid + found_key.offset <= end);
1369				nr++;
1370				path->slots[0]--;
1371			} else {
1372				ASSERT(0);
1373			}
1374		}
1375
1376		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1377		if (ret)
1378			goto out;
1379		btrfs_release_path(path);
1380	}
1381
1382	ret = 0;
1383out:
1384	btrfs_free_path(path);
1385	if (ret)
1386		btrfs_abort_transaction(trans, ret);
1387	return ret;
1388}
1389
1390static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1391				   struct btrfs_path *path,
1392				   u32 expected_extent_count)
1393{
1394	struct btrfs_block_group_cache *block_group;
1395	struct btrfs_fs_info *fs_info;
1396	struct btrfs_root *root;
1397	struct btrfs_key key;
1398	int prev_bit = 0, bit;
1399	/* Initialize to silence GCC. */
1400	u64 extent_start = 0;
1401	u64 end, offset;
1402	u64 total_found = 0;
1403	u32 extent_count = 0;
1404	int ret;
1405
1406	block_group = caching_ctl->block_group;
1407	fs_info = block_group->fs_info;
1408	root = fs_info->free_space_root;
1409
1410	end = block_group->key.objectid + block_group->key.offset;
1411
1412	while (1) {
1413		ret = btrfs_next_item(root, path);
1414		if (ret < 0)
1415			goto out;
1416		if (ret)
1417			break;
1418
1419		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1420
1421		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1422			break;
1423
1424		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1425		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1426
1427		caching_ctl->progress = key.objectid;
1428
1429		offset = key.objectid;
1430		while (offset < key.objectid + key.offset) {
1431			bit = free_space_test_bit(block_group, path, offset);
1432			if (prev_bit == 0 && bit == 1) {
1433				extent_start = offset;
1434			} else if (prev_bit == 1 && bit == 0) {
1435				total_found += add_new_free_space(block_group,
1436								  extent_start,
1437								  offset);
 
 
 
 
 
 
1438				if (total_found > CACHING_CTL_WAKE_UP) {
1439					total_found = 0;
1440					wake_up(&caching_ctl->wait);
1441				}
1442				extent_count++;
1443			}
1444			prev_bit = bit;
1445			offset += fs_info->sectorsize;
1446		}
1447	}
1448	if (prev_bit == 1) {
1449		total_found += add_new_free_space(block_group, extent_start,
1450						  end);
 
1451		extent_count++;
1452	}
1453
1454	if (extent_count != expected_extent_count) {
1455		btrfs_err(fs_info,
1456			  "incorrect extent count for %llu; counted %u, expected %u",
1457			  block_group->key.objectid, extent_count,
1458			  expected_extent_count);
1459		ASSERT(0);
1460		ret = -EIO;
1461		goto out;
1462	}
1463
1464	caching_ctl->progress = (u64)-1;
1465
1466	ret = 0;
1467out:
1468	return ret;
1469}
1470
1471static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1472				   struct btrfs_path *path,
1473				   u32 expected_extent_count)
1474{
1475	struct btrfs_block_group_cache *block_group;
1476	struct btrfs_fs_info *fs_info;
1477	struct btrfs_root *root;
1478	struct btrfs_key key;
1479	u64 end;
1480	u64 total_found = 0;
1481	u32 extent_count = 0;
1482	int ret;
1483
1484	block_group = caching_ctl->block_group;
1485	fs_info = block_group->fs_info;
1486	root = fs_info->free_space_root;
1487
1488	end = block_group->key.objectid + block_group->key.offset;
1489
1490	while (1) {
 
 
1491		ret = btrfs_next_item(root, path);
1492		if (ret < 0)
1493			goto out;
1494		if (ret)
1495			break;
1496
1497		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1498
1499		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1500			break;
1501
1502		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1503		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1504
1505		caching_ctl->progress = key.objectid;
1506
1507		total_found += add_new_free_space(block_group, key.objectid,
1508						  key.objectid + key.offset);
 
 
1509		if (total_found > CACHING_CTL_WAKE_UP) {
1510			total_found = 0;
1511			wake_up(&caching_ctl->wait);
1512		}
1513		extent_count++;
1514	}
1515
1516	if (extent_count != expected_extent_count) {
1517		btrfs_err(fs_info,
1518			  "incorrect extent count for %llu; counted %u, expected %u",
1519			  block_group->key.objectid, extent_count,
1520			  expected_extent_count);
1521		ASSERT(0);
1522		ret = -EIO;
1523		goto out;
1524	}
1525
1526	caching_ctl->progress = (u64)-1;
1527
1528	ret = 0;
1529out:
1530	return ret;
1531}
1532
1533int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1534{
1535	struct btrfs_block_group_cache *block_group;
1536	struct btrfs_free_space_info *info;
1537	struct btrfs_path *path;
1538	u32 extent_count, flags;
1539	int ret;
1540
1541	block_group = caching_ctl->block_group;
1542
1543	path = btrfs_alloc_path();
1544	if (!path)
1545		return -ENOMEM;
1546
1547	/*
1548	 * Just like caching_thread() doesn't want to deadlock on the extent
1549	 * tree, we don't want to deadlock on the free space tree.
1550	 */
1551	path->skip_locking = 1;
1552	path->search_commit_root = 1;
1553	path->reada = READA_FORWARD;
1554
1555	info = search_free_space_info(NULL, block_group, path, 0);
1556	if (IS_ERR(info)) {
1557		ret = PTR_ERR(info);
1558		goto out;
1559	}
1560	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1561	flags = btrfs_free_space_flags(path->nodes[0], info);
1562
1563	/*
1564	 * We left path pointing to the free space info item, so now
1565	 * load_free_space_foo can just iterate through the free space tree from
1566	 * there.
1567	 */
1568	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1569		ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1570	else
1571		ret = load_free_space_extents(caching_ctl, path, extent_count);
1572
1573out:
1574	btrfs_free_path(path);
1575	return ret;
1576}