Linux Audio

Check our new training course

Linux kernel drivers training

Mar 31-Apr 9, 2025, special US time zones
Register
Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright (C) 2011 STRATO.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/sched.h>
  20#include <linux/pagemap.h>
  21#include <linux/writeback.h>
  22#include <linux/blkdev.h>
  23#include <linux/rbtree.h>
  24#include <linux/slab.h>
  25#include <linux/workqueue.h>
  26#include <linux/btrfs.h>
  27
  28#include "ctree.h"
  29#include "transaction.h"
  30#include "disk-io.h"
  31#include "locking.h"
  32#include "ulist.h"
  33#include "backref.h"
  34#include "extent_io.h"
  35
  36/* TODO XXX FIXME
  37 *  - subvol delete -> delete when ref goes to 0? delete limits also?
  38 *  - reorganize keys
  39 *  - compressed
  40 *  - sync
  41 *  - copy also limits on subvol creation
  42 *  - limit
  43 *  - caches fuer ulists
  44 *  - performance benchmarks
  45 *  - check all ioctl parameters
  46 */
  47
  48/*
  49 * one struct for each qgroup, organized in fs_info->qgroup_tree.
  50 */
  51struct btrfs_qgroup {
  52	u64 qgroupid;
  53
  54	/*
  55	 * state
  56	 */
  57	u64 rfer;	/* referenced */
  58	u64 rfer_cmpr;	/* referenced compressed */
  59	u64 excl;	/* exclusive */
  60	u64 excl_cmpr;	/* exclusive compressed */
  61
  62	/*
  63	 * limits
  64	 */
  65	u64 lim_flags;	/* which limits are set */
  66	u64 max_rfer;
  67	u64 max_excl;
  68	u64 rsv_rfer;
  69	u64 rsv_excl;
  70
  71	/*
  72	 * reservation tracking
  73	 */
  74	u64 reserved;
  75
  76	/*
  77	 * lists
  78	 */
  79	struct list_head groups;  /* groups this group is member of */
  80	struct list_head members; /* groups that are members of this group */
  81	struct list_head dirty;   /* dirty groups */
  82	struct rb_node node;	  /* tree of qgroups */
  83
  84	/*
  85	 * temp variables for accounting operations
  86	 */
  87	u64 tag;
  88	u64 refcnt;
  89};
  90
  91/*
  92 * glue structure to represent the relations between qgroups.
  93 */
  94struct btrfs_qgroup_list {
  95	struct list_head next_group;
  96	struct list_head next_member;
  97	struct btrfs_qgroup *group;
  98	struct btrfs_qgroup *member;
  99};
 100
 101static int
 102qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
 103		   int init_flags);
 104static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
 105
 106/* must be called with qgroup_ioctl_lock held */
 107static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
 108					   u64 qgroupid)
 109{
 110	struct rb_node *n = fs_info->qgroup_tree.rb_node;
 111	struct btrfs_qgroup *qgroup;
 112
 113	while (n) {
 114		qgroup = rb_entry(n, struct btrfs_qgroup, node);
 115		if (qgroup->qgroupid < qgroupid)
 116			n = n->rb_left;
 117		else if (qgroup->qgroupid > qgroupid)
 118			n = n->rb_right;
 119		else
 120			return qgroup;
 121	}
 122	return NULL;
 123}
 124
 125/* must be called with qgroup_lock held */
 126static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
 127					  u64 qgroupid)
 128{
 129	struct rb_node **p = &fs_info->qgroup_tree.rb_node;
 130	struct rb_node *parent = NULL;
 131	struct btrfs_qgroup *qgroup;
 132
 133	while (*p) {
 134		parent = *p;
 135		qgroup = rb_entry(parent, struct btrfs_qgroup, node);
 136
 137		if (qgroup->qgroupid < qgroupid)
 138			p = &(*p)->rb_left;
 139		else if (qgroup->qgroupid > qgroupid)
 140			p = &(*p)->rb_right;
 141		else
 142			return qgroup;
 143	}
 144
 145	qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
 146	if (!qgroup)
 147		return ERR_PTR(-ENOMEM);
 148
 149	qgroup->qgroupid = qgroupid;
 150	INIT_LIST_HEAD(&qgroup->groups);
 151	INIT_LIST_HEAD(&qgroup->members);
 152	INIT_LIST_HEAD(&qgroup->dirty);
 153
 154	rb_link_node(&qgroup->node, parent, p);
 155	rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
 156
 157	return qgroup;
 158}
 159
 160static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
 161{
 162	struct btrfs_qgroup_list *list;
 163
 164	list_del(&qgroup->dirty);
 165	while (!list_empty(&qgroup->groups)) {
 166		list = list_first_entry(&qgroup->groups,
 167					struct btrfs_qgroup_list, next_group);
 168		list_del(&list->next_group);
 169		list_del(&list->next_member);
 170		kfree(list);
 171	}
 172
 173	while (!list_empty(&qgroup->members)) {
 174		list = list_first_entry(&qgroup->members,
 175					struct btrfs_qgroup_list, next_member);
 176		list_del(&list->next_group);
 177		list_del(&list->next_member);
 178		kfree(list);
 179	}
 180	kfree(qgroup);
 181}
 182
 183/* must be called with qgroup_lock held */
 184static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
 185{
 186	struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
 187
 188	if (!qgroup)
 189		return -ENOENT;
 190
 191	rb_erase(&qgroup->node, &fs_info->qgroup_tree);
 192	__del_qgroup_rb(qgroup);
 193	return 0;
 194}
 195
 196/* must be called with qgroup_lock held */
 197static int add_relation_rb(struct btrfs_fs_info *fs_info,
 198			   u64 memberid, u64 parentid)
 199{
 200	struct btrfs_qgroup *member;
 201	struct btrfs_qgroup *parent;
 202	struct btrfs_qgroup_list *list;
 203
 204	member = find_qgroup_rb(fs_info, memberid);
 205	parent = find_qgroup_rb(fs_info, parentid);
 206	if (!member || !parent)
 207		return -ENOENT;
 208
 209	list = kzalloc(sizeof(*list), GFP_ATOMIC);
 210	if (!list)
 211		return -ENOMEM;
 212
 213	list->group = parent;
 214	list->member = member;
 215	list_add_tail(&list->next_group, &member->groups);
 216	list_add_tail(&list->next_member, &parent->members);
 217
 218	return 0;
 219}
 220
 221/* must be called with qgroup_lock held */
 222static int del_relation_rb(struct btrfs_fs_info *fs_info,
 223			   u64 memberid, u64 parentid)
 224{
 225	struct btrfs_qgroup *member;
 226	struct btrfs_qgroup *parent;
 227	struct btrfs_qgroup_list *list;
 228
 229	member = find_qgroup_rb(fs_info, memberid);
 230	parent = find_qgroup_rb(fs_info, parentid);
 231	if (!member || !parent)
 232		return -ENOENT;
 233
 234	list_for_each_entry(list, &member->groups, next_group) {
 235		if (list->group == parent) {
 236			list_del(&list->next_group);
 237			list_del(&list->next_member);
 238			kfree(list);
 239			return 0;
 240		}
 241	}
 242	return -ENOENT;
 243}
 244
 245/*
 246 * The full config is read in one go, only called from open_ctree()
 247 * It doesn't use any locking, as at this point we're still single-threaded
 248 */
 249int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
 250{
 251	struct btrfs_key key;
 252	struct btrfs_key found_key;
 253	struct btrfs_root *quota_root = fs_info->quota_root;
 254	struct btrfs_path *path = NULL;
 255	struct extent_buffer *l;
 256	int slot;
 257	int ret = 0;
 258	u64 flags = 0;
 259	u64 rescan_progress = 0;
 260
 261	if (!fs_info->quota_enabled)
 262		return 0;
 263
 264	fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
 265	if (!fs_info->qgroup_ulist) {
 266		ret = -ENOMEM;
 267		goto out;
 268	}
 269
 270	path = btrfs_alloc_path();
 271	if (!path) {
 272		ret = -ENOMEM;
 273		goto out;
 274	}
 275
 276	/* default this to quota off, in case no status key is found */
 277	fs_info->qgroup_flags = 0;
 278
 279	/*
 280	 * pass 1: read status, all qgroup infos and limits
 281	 */
 282	key.objectid = 0;
 283	key.type = 0;
 284	key.offset = 0;
 285	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
 286	if (ret)
 287		goto out;
 288
 289	while (1) {
 290		struct btrfs_qgroup *qgroup;
 291
 292		slot = path->slots[0];
 293		l = path->nodes[0];
 294		btrfs_item_key_to_cpu(l, &found_key, slot);
 295
 296		if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
 297			struct btrfs_qgroup_status_item *ptr;
 298
 299			ptr = btrfs_item_ptr(l, slot,
 300					     struct btrfs_qgroup_status_item);
 301
 302			if (btrfs_qgroup_status_version(l, ptr) !=
 303			    BTRFS_QGROUP_STATUS_VERSION) {
 304				btrfs_err(fs_info,
 305				 "old qgroup version, quota disabled");
 306				goto out;
 307			}
 308			if (btrfs_qgroup_status_generation(l, ptr) !=
 309			    fs_info->generation) {
 310				flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 311				btrfs_err(fs_info,
 312					"qgroup generation mismatch, "
 313					"marked as inconsistent");
 314			}
 315			fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
 316									  ptr);
 317			rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
 318			goto next1;
 319		}
 320
 321		if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
 322		    found_key.type != BTRFS_QGROUP_LIMIT_KEY)
 323			goto next1;
 324
 325		qgroup = find_qgroup_rb(fs_info, found_key.offset);
 326		if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
 327		    (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
 328			btrfs_err(fs_info, "inconsitent qgroup config");
 329			flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 330		}
 331		if (!qgroup) {
 332			qgroup = add_qgroup_rb(fs_info, found_key.offset);
 333			if (IS_ERR(qgroup)) {
 334				ret = PTR_ERR(qgroup);
 335				goto out;
 336			}
 337		}
 338		switch (found_key.type) {
 339		case BTRFS_QGROUP_INFO_KEY: {
 340			struct btrfs_qgroup_info_item *ptr;
 341
 342			ptr = btrfs_item_ptr(l, slot,
 343					     struct btrfs_qgroup_info_item);
 344			qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
 345			qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
 346			qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
 347			qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
 348			/* generation currently unused */
 349			break;
 350		}
 351		case BTRFS_QGROUP_LIMIT_KEY: {
 352			struct btrfs_qgroup_limit_item *ptr;
 353
 354			ptr = btrfs_item_ptr(l, slot,
 355					     struct btrfs_qgroup_limit_item);
 356			qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
 357			qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
 358			qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
 359			qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
 360			qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
 361			break;
 362		}
 363		}
 364next1:
 365		ret = btrfs_next_item(quota_root, path);
 366		if (ret < 0)
 367			goto out;
 368		if (ret)
 369			break;
 370	}
 371	btrfs_release_path(path);
 372
 373	/*
 374	 * pass 2: read all qgroup relations
 375	 */
 376	key.objectid = 0;
 377	key.type = BTRFS_QGROUP_RELATION_KEY;
 378	key.offset = 0;
 379	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
 380	if (ret)
 381		goto out;
 382	while (1) {
 383		slot = path->slots[0];
 384		l = path->nodes[0];
 385		btrfs_item_key_to_cpu(l, &found_key, slot);
 386
 387		if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
 388			goto next2;
 389
 390		if (found_key.objectid > found_key.offset) {
 391			/* parent <- member, not needed to build config */
 392			/* FIXME should we omit the key completely? */
 393			goto next2;
 394		}
 395
 396		ret = add_relation_rb(fs_info, found_key.objectid,
 397				      found_key.offset);
 398		if (ret == -ENOENT) {
 399			btrfs_warn(fs_info,
 400				"orphan qgroup relation 0x%llx->0x%llx",
 401				found_key.objectid, found_key.offset);
 402			ret = 0;	/* ignore the error */
 403		}
 404		if (ret)
 405			goto out;
 406next2:
 407		ret = btrfs_next_item(quota_root, path);
 408		if (ret < 0)
 409			goto out;
 410		if (ret)
 411			break;
 412	}
 413out:
 414	fs_info->qgroup_flags |= flags;
 415	if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
 416		fs_info->quota_enabled = 0;
 417		fs_info->pending_quota_state = 0;
 418	} else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
 419		   ret >= 0) {
 420		ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
 421	}
 422	btrfs_free_path(path);
 423
 424	if (ret < 0) {
 425		ulist_free(fs_info->qgroup_ulist);
 426		fs_info->qgroup_ulist = NULL;
 427		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
 428	}
 429
 430	return ret < 0 ? ret : 0;
 431}
 432
 433/*
 434 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
 435 * first two are in single-threaded paths.And for the third one, we have set
 436 * quota_root to be null with qgroup_lock held before, so it is safe to clean
 437 * up the in-memory structures without qgroup_lock held.
 438 */
 439void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
 440{
 441	struct rb_node *n;
 442	struct btrfs_qgroup *qgroup;
 443
 444	while ((n = rb_first(&fs_info->qgroup_tree))) {
 445		qgroup = rb_entry(n, struct btrfs_qgroup, node);
 446		rb_erase(n, &fs_info->qgroup_tree);
 447		__del_qgroup_rb(qgroup);
 448	}
 449	/*
 450	 * we call btrfs_free_qgroup_config() when umounting
 451	 * filesystem and disabling quota, so we set qgroup_ulit
 452	 * to be null here to avoid double free.
 453	 */
 454	ulist_free(fs_info->qgroup_ulist);
 455	fs_info->qgroup_ulist = NULL;
 456}
 457
 458static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
 459				    struct btrfs_root *quota_root,
 460				    u64 src, u64 dst)
 461{
 462	int ret;
 463	struct btrfs_path *path;
 464	struct btrfs_key key;
 465
 466	path = btrfs_alloc_path();
 467	if (!path)
 468		return -ENOMEM;
 469
 470	key.objectid = src;
 471	key.type = BTRFS_QGROUP_RELATION_KEY;
 472	key.offset = dst;
 473
 474	ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
 475
 476	btrfs_mark_buffer_dirty(path->nodes[0]);
 477
 478	btrfs_free_path(path);
 479	return ret;
 480}
 481
 482static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
 483				    struct btrfs_root *quota_root,
 484				    u64 src, u64 dst)
 485{
 486	int ret;
 487	struct btrfs_path *path;
 488	struct btrfs_key key;
 489
 490	path = btrfs_alloc_path();
 491	if (!path)
 492		return -ENOMEM;
 493
 494	key.objectid = src;
 495	key.type = BTRFS_QGROUP_RELATION_KEY;
 496	key.offset = dst;
 497
 498	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
 499	if (ret < 0)
 500		goto out;
 501
 502	if (ret > 0) {
 503		ret = -ENOENT;
 504		goto out;
 505	}
 506
 507	ret = btrfs_del_item(trans, quota_root, path);
 508out:
 509	btrfs_free_path(path);
 510	return ret;
 511}
 512
 513static int add_qgroup_item(struct btrfs_trans_handle *trans,
 514			   struct btrfs_root *quota_root, u64 qgroupid)
 515{
 516	int ret;
 517	struct btrfs_path *path;
 518	struct btrfs_qgroup_info_item *qgroup_info;
 519	struct btrfs_qgroup_limit_item *qgroup_limit;
 520	struct extent_buffer *leaf;
 521	struct btrfs_key key;
 522
 523	path = btrfs_alloc_path();
 524	if (!path)
 525		return -ENOMEM;
 526
 527	key.objectid = 0;
 528	key.type = BTRFS_QGROUP_INFO_KEY;
 529	key.offset = qgroupid;
 530
 531	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
 532				      sizeof(*qgroup_info));
 533	if (ret)
 534		goto out;
 535
 536	leaf = path->nodes[0];
 537	qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
 538				 struct btrfs_qgroup_info_item);
 539	btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
 540	btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
 541	btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
 542	btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
 543	btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
 544
 545	btrfs_mark_buffer_dirty(leaf);
 546
 547	btrfs_release_path(path);
 548
 549	key.type = BTRFS_QGROUP_LIMIT_KEY;
 550	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
 551				      sizeof(*qgroup_limit));
 552	if (ret)
 553		goto out;
 554
 555	leaf = path->nodes[0];
 556	qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
 557				  struct btrfs_qgroup_limit_item);
 558	btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
 559	btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
 560	btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
 561	btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
 562	btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
 563
 564	btrfs_mark_buffer_dirty(leaf);
 565
 566	ret = 0;
 567out:
 568	btrfs_free_path(path);
 569	return ret;
 570}
 571
 572static int del_qgroup_item(struct btrfs_trans_handle *trans,
 573			   struct btrfs_root *quota_root, u64 qgroupid)
 574{
 575	int ret;
 576	struct btrfs_path *path;
 577	struct btrfs_key key;
 578
 579	path = btrfs_alloc_path();
 580	if (!path)
 581		return -ENOMEM;
 582
 583	key.objectid = 0;
 584	key.type = BTRFS_QGROUP_INFO_KEY;
 585	key.offset = qgroupid;
 586	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
 587	if (ret < 0)
 588		goto out;
 589
 590	if (ret > 0) {
 591		ret = -ENOENT;
 592		goto out;
 593	}
 594
 595	ret = btrfs_del_item(trans, quota_root, path);
 596	if (ret)
 597		goto out;
 598
 599	btrfs_release_path(path);
 600
 601	key.type = BTRFS_QGROUP_LIMIT_KEY;
 602	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
 603	if (ret < 0)
 604		goto out;
 605
 606	if (ret > 0) {
 607		ret = -ENOENT;
 608		goto out;
 609	}
 610
 611	ret = btrfs_del_item(trans, quota_root, path);
 612
 613out:
 614	btrfs_free_path(path);
 615	return ret;
 616}
 617
 618static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
 619				    struct btrfs_root *root, u64 qgroupid,
 620				    u64 flags, u64 max_rfer, u64 max_excl,
 621				    u64 rsv_rfer, u64 rsv_excl)
 622{
 623	struct btrfs_path *path;
 624	struct btrfs_key key;
 625	struct extent_buffer *l;
 626	struct btrfs_qgroup_limit_item *qgroup_limit;
 627	int ret;
 628	int slot;
 629
 630	key.objectid = 0;
 631	key.type = BTRFS_QGROUP_LIMIT_KEY;
 632	key.offset = qgroupid;
 633
 634	path = btrfs_alloc_path();
 635	if (!path)
 636		return -ENOMEM;
 637
 638	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 639	if (ret > 0)
 640		ret = -ENOENT;
 641
 642	if (ret)
 643		goto out;
 644
 645	l = path->nodes[0];
 646	slot = path->slots[0];
 647	qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
 648	btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
 649	btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
 650	btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
 651	btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
 652	btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
 653
 654	btrfs_mark_buffer_dirty(l);
 655
 656out:
 657	btrfs_free_path(path);
 658	return ret;
 659}
 660
 661static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
 662				   struct btrfs_root *root,
 663				   struct btrfs_qgroup *qgroup)
 664{
 665	struct btrfs_path *path;
 666	struct btrfs_key key;
 667	struct extent_buffer *l;
 668	struct btrfs_qgroup_info_item *qgroup_info;
 669	int ret;
 670	int slot;
 671
 672	key.objectid = 0;
 673	key.type = BTRFS_QGROUP_INFO_KEY;
 674	key.offset = qgroup->qgroupid;
 675
 676	path = btrfs_alloc_path();
 677	if (!path)
 678		return -ENOMEM;
 679
 680	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 681	if (ret > 0)
 682		ret = -ENOENT;
 683
 684	if (ret)
 685		goto out;
 686
 687	l = path->nodes[0];
 688	slot = path->slots[0];
 689	qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
 690	btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
 691	btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
 692	btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
 693	btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
 694	btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
 695
 696	btrfs_mark_buffer_dirty(l);
 697
 698out:
 699	btrfs_free_path(path);
 700	return ret;
 701}
 702
 703static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
 704				     struct btrfs_fs_info *fs_info,
 705				    struct btrfs_root *root)
 706{
 707	struct btrfs_path *path;
 708	struct btrfs_key key;
 709	struct extent_buffer *l;
 710	struct btrfs_qgroup_status_item *ptr;
 711	int ret;
 712	int slot;
 713
 714	key.objectid = 0;
 715	key.type = BTRFS_QGROUP_STATUS_KEY;
 716	key.offset = 0;
 717
 718	path = btrfs_alloc_path();
 719	if (!path)
 720		return -ENOMEM;
 721
 722	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 723	if (ret > 0)
 724		ret = -ENOENT;
 725
 726	if (ret)
 727		goto out;
 728
 729	l = path->nodes[0];
 730	slot = path->slots[0];
 731	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
 732	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
 733	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
 734	btrfs_set_qgroup_status_rescan(l, ptr,
 735				fs_info->qgroup_rescan_progress.objectid);
 736
 737	btrfs_mark_buffer_dirty(l);
 738
 739out:
 740	btrfs_free_path(path);
 741	return ret;
 742}
 743
 744/*
 745 * called with qgroup_lock held
 746 */
 747static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
 748				  struct btrfs_root *root)
 749{
 750	struct btrfs_path *path;
 751	struct btrfs_key key;
 752	struct extent_buffer *leaf = NULL;
 753	int ret;
 754	int nr = 0;
 755
 756	path = btrfs_alloc_path();
 757	if (!path)
 758		return -ENOMEM;
 759
 760	path->leave_spinning = 1;
 761
 762	key.objectid = 0;
 763	key.offset = 0;
 764	key.type = 0;
 765
 766	while (1) {
 767		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 768		if (ret < 0)
 769			goto out;
 770		leaf = path->nodes[0];
 771		nr = btrfs_header_nritems(leaf);
 772		if (!nr)
 773			break;
 774		/*
 775		 * delete the leaf one by one
 776		 * since the whole tree is going
 777		 * to be deleted.
 778		 */
 779		path->slots[0] = 0;
 780		ret = btrfs_del_items(trans, root, path, 0, nr);
 781		if (ret)
 782			goto out;
 783
 784		btrfs_release_path(path);
 785	}
 786	ret = 0;
 787out:
 788	root->fs_info->pending_quota_state = 0;
 789	btrfs_free_path(path);
 790	return ret;
 791}
 792
 793int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 794		       struct btrfs_fs_info *fs_info)
 795{
 796	struct btrfs_root *quota_root;
 797	struct btrfs_root *tree_root = fs_info->tree_root;
 798	struct btrfs_path *path = NULL;
 799	struct btrfs_qgroup_status_item *ptr;
 800	struct extent_buffer *leaf;
 801	struct btrfs_key key;
 802	struct btrfs_key found_key;
 803	struct btrfs_qgroup *qgroup = NULL;
 804	int ret = 0;
 805	int slot;
 806
 807	mutex_lock(&fs_info->qgroup_ioctl_lock);
 808	if (fs_info->quota_root) {
 809		fs_info->pending_quota_state = 1;
 810		goto out;
 811	}
 812
 813	fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
 814	if (!fs_info->qgroup_ulist) {
 815		ret = -ENOMEM;
 816		goto out;
 817	}
 818
 819	/*
 820	 * initially create the quota tree
 821	 */
 822	quota_root = btrfs_create_tree(trans, fs_info,
 823				       BTRFS_QUOTA_TREE_OBJECTID);
 824	if (IS_ERR(quota_root)) {
 825		ret =  PTR_ERR(quota_root);
 826		goto out;
 827	}
 828
 829	path = btrfs_alloc_path();
 830	if (!path) {
 831		ret = -ENOMEM;
 832		goto out_free_root;
 833	}
 834
 835	key.objectid = 0;
 836	key.type = BTRFS_QGROUP_STATUS_KEY;
 837	key.offset = 0;
 838
 839	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
 840				      sizeof(*ptr));
 841	if (ret)
 842		goto out_free_path;
 843
 844	leaf = path->nodes[0];
 845	ptr = btrfs_item_ptr(leaf, path->slots[0],
 846				 struct btrfs_qgroup_status_item);
 847	btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
 848	btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
 849	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
 850				BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 851	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
 852	btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
 853
 854	btrfs_mark_buffer_dirty(leaf);
 855
 856	key.objectid = 0;
 857	key.type = BTRFS_ROOT_REF_KEY;
 858	key.offset = 0;
 859
 860	btrfs_release_path(path);
 861	ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
 862	if (ret > 0)
 863		goto out_add_root;
 864	if (ret < 0)
 865		goto out_free_path;
 866
 867
 868	while (1) {
 869		slot = path->slots[0];
 870		leaf = path->nodes[0];
 871		btrfs_item_key_to_cpu(leaf, &found_key, slot);
 872
 873		if (found_key.type == BTRFS_ROOT_REF_KEY) {
 874			ret = add_qgroup_item(trans, quota_root,
 875					      found_key.offset);
 876			if (ret)
 877				goto out_free_path;
 878
 879			qgroup = add_qgroup_rb(fs_info, found_key.offset);
 880			if (IS_ERR(qgroup)) {
 881				ret = PTR_ERR(qgroup);
 882				goto out_free_path;
 883			}
 884		}
 885		ret = btrfs_next_item(tree_root, path);
 886		if (ret < 0)
 887			goto out_free_path;
 888		if (ret)
 889			break;
 890	}
 891
 892out_add_root:
 893	btrfs_release_path(path);
 894	ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
 895	if (ret)
 896		goto out_free_path;
 897
 898	qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
 899	if (IS_ERR(qgroup)) {
 900		ret = PTR_ERR(qgroup);
 901		goto out_free_path;
 902	}
 903	spin_lock(&fs_info->qgroup_lock);
 904	fs_info->quota_root = quota_root;
 905	fs_info->pending_quota_state = 1;
 906	spin_unlock(&fs_info->qgroup_lock);
 907out_free_path:
 908	btrfs_free_path(path);
 909out_free_root:
 910	if (ret) {
 911		free_extent_buffer(quota_root->node);
 912		free_extent_buffer(quota_root->commit_root);
 913		kfree(quota_root);
 914	}
 915out:
 916	if (ret) {
 917		ulist_free(fs_info->qgroup_ulist);
 918		fs_info->qgroup_ulist = NULL;
 919	}
 920	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 921	return ret;
 922}
 923
 924int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 925			struct btrfs_fs_info *fs_info)
 926{
 927	struct btrfs_root *tree_root = fs_info->tree_root;
 928	struct btrfs_root *quota_root;
 929	int ret = 0;
 930
 931	mutex_lock(&fs_info->qgroup_ioctl_lock);
 932	if (!fs_info->quota_root)
 933		goto out;
 934	spin_lock(&fs_info->qgroup_lock);
 935	fs_info->quota_enabled = 0;
 936	fs_info->pending_quota_state = 0;
 937	quota_root = fs_info->quota_root;
 938	fs_info->quota_root = NULL;
 939	spin_unlock(&fs_info->qgroup_lock);
 940
 941	btrfs_free_qgroup_config(fs_info);
 942
 943	ret = btrfs_clean_quota_tree(trans, quota_root);
 944	if (ret)
 945		goto out;
 946
 947	ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
 948	if (ret)
 949		goto out;
 950
 951	list_del(&quota_root->dirty_list);
 952
 953	btrfs_tree_lock(quota_root->node);
 954	clean_tree_block(trans, tree_root, quota_root->node);
 955	btrfs_tree_unlock(quota_root->node);
 956	btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
 957
 958	free_extent_buffer(quota_root->node);
 959	free_extent_buffer(quota_root->commit_root);
 960	kfree(quota_root);
 961out:
 962	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 963	return ret;
 964}
 965
 966static void qgroup_dirty(struct btrfs_fs_info *fs_info,
 967			 struct btrfs_qgroup *qgroup)
 968{
 969	if (list_empty(&qgroup->dirty))
 970		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
 971}
 972
 973int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 974			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
 975{
 976	struct btrfs_root *quota_root;
 977	struct btrfs_qgroup *parent;
 978	struct btrfs_qgroup *member;
 979	struct btrfs_qgroup_list *list;
 980	int ret = 0;
 981
 982	mutex_lock(&fs_info->qgroup_ioctl_lock);
 983	quota_root = fs_info->quota_root;
 984	if (!quota_root) {
 985		ret = -EINVAL;
 986		goto out;
 987	}
 988	member = find_qgroup_rb(fs_info, src);
 989	parent = find_qgroup_rb(fs_info, dst);
 990	if (!member || !parent) {
 991		ret = -EINVAL;
 992		goto out;
 993	}
 994
 995	/* check if such qgroup relation exist firstly */
 996	list_for_each_entry(list, &member->groups, next_group) {
 997		if (list->group == parent) {
 998			ret = -EEXIST;
 999			goto out;
1000		}
1001	}
1002
1003	ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1004	if (ret)
1005		goto out;
1006
1007	ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1008	if (ret) {
1009		del_qgroup_relation_item(trans, quota_root, src, dst);
1010		goto out;
1011	}
1012
1013	spin_lock(&fs_info->qgroup_lock);
1014	ret = add_relation_rb(quota_root->fs_info, src, dst);
1015	spin_unlock(&fs_info->qgroup_lock);
1016out:
1017	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1018	return ret;
1019}
1020
1021int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1022			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1023{
1024	struct btrfs_root *quota_root;
1025	struct btrfs_qgroup *parent;
1026	struct btrfs_qgroup *member;
1027	struct btrfs_qgroup_list *list;
1028	int ret = 0;
1029	int err;
1030
1031	mutex_lock(&fs_info->qgroup_ioctl_lock);
1032	quota_root = fs_info->quota_root;
1033	if (!quota_root) {
1034		ret = -EINVAL;
1035		goto out;
1036	}
1037
1038	member = find_qgroup_rb(fs_info, src);
1039	parent = find_qgroup_rb(fs_info, dst);
1040	if (!member || !parent) {
1041		ret = -EINVAL;
1042		goto out;
1043	}
1044
1045	/* check if such qgroup relation exist firstly */
1046	list_for_each_entry(list, &member->groups, next_group) {
1047		if (list->group == parent)
1048			goto exist;
1049	}
1050	ret = -ENOENT;
1051	goto out;
1052exist:
1053	ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1054	err = del_qgroup_relation_item(trans, quota_root, dst, src);
1055	if (err && !ret)
1056		ret = err;
1057
1058	spin_lock(&fs_info->qgroup_lock);
1059	del_relation_rb(fs_info, src, dst);
1060	spin_unlock(&fs_info->qgroup_lock);
1061out:
1062	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1063	return ret;
1064}
1065
1066int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1067			struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
1068{
1069	struct btrfs_root *quota_root;
1070	struct btrfs_qgroup *qgroup;
1071	int ret = 0;
1072
1073	mutex_lock(&fs_info->qgroup_ioctl_lock);
1074	quota_root = fs_info->quota_root;
1075	if (!quota_root) {
1076		ret = -EINVAL;
1077		goto out;
1078	}
1079	qgroup = find_qgroup_rb(fs_info, qgroupid);
1080	if (qgroup) {
1081		ret = -EEXIST;
1082		goto out;
1083	}
1084
1085	ret = add_qgroup_item(trans, quota_root, qgroupid);
1086	if (ret)
1087		goto out;
1088
1089	spin_lock(&fs_info->qgroup_lock);
1090	qgroup = add_qgroup_rb(fs_info, qgroupid);
1091	spin_unlock(&fs_info->qgroup_lock);
1092
1093	if (IS_ERR(qgroup))
1094		ret = PTR_ERR(qgroup);
1095out:
1096	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1097	return ret;
1098}
1099
1100int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1101			struct btrfs_fs_info *fs_info, u64 qgroupid)
1102{
1103	struct btrfs_root *quota_root;
1104	struct btrfs_qgroup *qgroup;
1105	int ret = 0;
1106
1107	mutex_lock(&fs_info->qgroup_ioctl_lock);
1108	quota_root = fs_info->quota_root;
1109	if (!quota_root) {
1110		ret = -EINVAL;
1111		goto out;
1112	}
1113
1114	qgroup = find_qgroup_rb(fs_info, qgroupid);
1115	if (!qgroup) {
1116		ret = -ENOENT;
1117		goto out;
1118	} else {
1119		/* check if there are no relations to this qgroup */
1120		if (!list_empty(&qgroup->groups) ||
1121		    !list_empty(&qgroup->members)) {
1122			ret = -EBUSY;
1123			goto out;
1124		}
1125	}
1126	ret = del_qgroup_item(trans, quota_root, qgroupid);
1127
1128	spin_lock(&fs_info->qgroup_lock);
1129	del_qgroup_rb(quota_root->fs_info, qgroupid);
1130	spin_unlock(&fs_info->qgroup_lock);
1131out:
1132	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1133	return ret;
1134}
1135
1136int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1137		       struct btrfs_fs_info *fs_info, u64 qgroupid,
1138		       struct btrfs_qgroup_limit *limit)
1139{
1140	struct btrfs_root *quota_root;
1141	struct btrfs_qgroup *qgroup;
1142	int ret = 0;
1143
1144	mutex_lock(&fs_info->qgroup_ioctl_lock);
1145	quota_root = fs_info->quota_root;
1146	if (!quota_root) {
1147		ret = -EINVAL;
1148		goto out;
1149	}
1150
1151	qgroup = find_qgroup_rb(fs_info, qgroupid);
1152	if (!qgroup) {
1153		ret = -ENOENT;
1154		goto out;
1155	}
1156	ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1157				       limit->flags, limit->max_rfer,
1158				       limit->max_excl, limit->rsv_rfer,
1159				       limit->rsv_excl);
1160	if (ret) {
1161		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1162		btrfs_info(fs_info, "unable to update quota limit for %llu",
1163		       qgroupid);
1164	}
1165
1166	spin_lock(&fs_info->qgroup_lock);
1167	qgroup->lim_flags = limit->flags;
1168	qgroup->max_rfer = limit->max_rfer;
1169	qgroup->max_excl = limit->max_excl;
1170	qgroup->rsv_rfer = limit->rsv_rfer;
1171	qgroup->rsv_excl = limit->rsv_excl;
1172	spin_unlock(&fs_info->qgroup_lock);
1173out:
1174	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1175	return ret;
1176}
1177
1178/*
1179 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1180 * the modification into a list that's later used by btrfs_end_transaction to
1181 * pass the recorded modifications on to btrfs_qgroup_account_ref.
1182 */
1183int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1184			    struct btrfs_delayed_ref_node *node,
1185			    struct btrfs_delayed_extent_op *extent_op)
1186{
1187	struct qgroup_update *u;
1188
1189	BUG_ON(!trans->delayed_ref_elem.seq);
1190	u = kmalloc(sizeof(*u), GFP_NOFS);
1191	if (!u)
1192		return -ENOMEM;
1193
1194	u->node = node;
1195	u->extent_op = extent_op;
1196	list_add_tail(&u->list, &trans->qgroup_ref_list);
1197
1198	return 0;
1199}
1200
1201static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1202				    struct ulist *roots, struct ulist *tmp,
1203				    u64 seq)
1204{
1205	struct ulist_node *unode;
1206	struct ulist_iterator uiter;
1207	struct ulist_node *tmp_unode;
1208	struct ulist_iterator tmp_uiter;
1209	struct btrfs_qgroup *qg;
1210	int ret;
1211
1212	ULIST_ITER_INIT(&uiter);
1213	while ((unode = ulist_next(roots, &uiter))) {
1214		qg = find_qgroup_rb(fs_info, unode->val);
1215		if (!qg)
1216			continue;
1217
1218		ulist_reinit(tmp);
1219						/* XXX id not needed */
1220		ret = ulist_add(tmp, qg->qgroupid,
1221				(u64)(uintptr_t)qg, GFP_ATOMIC);
1222		if (ret < 0)
1223			return ret;
1224		ULIST_ITER_INIT(&tmp_uiter);
1225		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1226			struct btrfs_qgroup_list *glist;
1227
1228			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1229			if (qg->refcnt < seq)
1230				qg->refcnt = seq + 1;
1231			else
1232				++qg->refcnt;
1233
1234			list_for_each_entry(glist, &qg->groups, next_group) {
1235				ret = ulist_add(tmp, glist->group->qgroupid,
1236						(u64)(uintptr_t)glist->group,
1237						GFP_ATOMIC);
1238				if (ret < 0)
1239					return ret;
1240			}
1241		}
1242	}
1243
1244	return 0;
1245}
1246
1247static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
1248				    struct ulist *roots, struct ulist *tmp,
1249				    u64 seq, int sgn, u64 num_bytes,
1250				    struct btrfs_qgroup *qgroup)
1251{
1252	struct ulist_node *unode;
1253	struct ulist_iterator uiter;
1254	struct btrfs_qgroup *qg;
1255	struct btrfs_qgroup_list *glist;
1256	int ret;
1257
1258	ulist_reinit(tmp);
1259	ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1260	if (ret < 0)
1261		return ret;
1262
1263	ULIST_ITER_INIT(&uiter);
1264	while ((unode = ulist_next(tmp, &uiter))) {
1265		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1266		if (qg->refcnt < seq) {
1267			/* not visited by step 1 */
1268			qg->rfer += sgn * num_bytes;
1269			qg->rfer_cmpr += sgn * num_bytes;
1270			if (roots->nnodes == 0) {
1271				qg->excl += sgn * num_bytes;
1272				qg->excl_cmpr += sgn * num_bytes;
1273			}
1274			qgroup_dirty(fs_info, qg);
1275		}
1276		WARN_ON(qg->tag >= seq);
1277		qg->tag = seq;
1278
1279		list_for_each_entry(glist, &qg->groups, next_group) {
1280			ret = ulist_add(tmp, glist->group->qgroupid,
1281					(uintptr_t)glist->group, GFP_ATOMIC);
1282			if (ret < 0)
1283				return ret;
1284		}
1285	}
1286
1287	return 0;
1288}
1289
1290static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
1291				    struct ulist *roots, struct ulist *tmp,
1292				    u64 seq, int sgn, u64 num_bytes)
1293{
1294	struct ulist_node *unode;
1295	struct ulist_iterator uiter;
1296	struct btrfs_qgroup *qg;
1297	struct ulist_node *tmp_unode;
1298	struct ulist_iterator tmp_uiter;
1299	int ret;
1300
1301	ULIST_ITER_INIT(&uiter);
1302	while ((unode = ulist_next(roots, &uiter))) {
1303		qg = find_qgroup_rb(fs_info, unode->val);
1304		if (!qg)
1305			continue;
1306
1307		ulist_reinit(tmp);
1308		ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1309		if (ret < 0)
1310			return ret;
1311
1312		ULIST_ITER_INIT(&tmp_uiter);
1313		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1314			struct btrfs_qgroup_list *glist;
1315
1316			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1317			if (qg->tag == seq)
1318				continue;
1319
1320			if (qg->refcnt - seq == roots->nnodes) {
1321				qg->excl -= sgn * num_bytes;
1322				qg->excl_cmpr -= sgn * num_bytes;
1323				qgroup_dirty(fs_info, qg);
1324			}
1325
1326			list_for_each_entry(glist, &qg->groups, next_group) {
1327				ret = ulist_add(tmp, glist->group->qgroupid,
1328						(uintptr_t)glist->group,
1329						GFP_ATOMIC);
1330				if (ret < 0)
1331					return ret;
1332			}
1333		}
1334	}
1335
1336	return 0;
1337}
1338
1339/*
1340 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1341 * from the fs. First, all roots referencing the extent are searched, and
1342 * then the space is accounted accordingly to the different roots. The
1343 * accounting algorithm works in 3 steps documented inline.
1344 */
1345int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1346			     struct btrfs_fs_info *fs_info,
1347			     struct btrfs_delayed_ref_node *node,
1348			     struct btrfs_delayed_extent_op *extent_op)
1349{
1350	struct btrfs_root *quota_root;
1351	u64 ref_root;
1352	struct btrfs_qgroup *qgroup;
1353	struct ulist *roots = NULL;
1354	u64 seq;
1355	int ret = 0;
1356	int sgn;
1357
1358	if (!fs_info->quota_enabled)
1359		return 0;
1360
1361	BUG_ON(!fs_info->quota_root);
1362
1363	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1364	    node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1365		struct btrfs_delayed_tree_ref *ref;
1366		ref = btrfs_delayed_node_to_tree_ref(node);
1367		ref_root = ref->root;
1368	} else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1369		   node->type == BTRFS_SHARED_DATA_REF_KEY) {
1370		struct btrfs_delayed_data_ref *ref;
1371		ref = btrfs_delayed_node_to_data_ref(node);
1372		ref_root = ref->root;
1373	} else {
1374		BUG();
1375	}
1376
1377	if (!is_fstree(ref_root)) {
1378		/*
1379		 * non-fs-trees are not being accounted
1380		 */
1381		return 0;
1382	}
1383
1384	switch (node->action) {
1385	case BTRFS_ADD_DELAYED_REF:
1386	case BTRFS_ADD_DELAYED_EXTENT:
1387		sgn = 1;
1388		seq = btrfs_tree_mod_seq_prev(node->seq);
1389		break;
1390	case BTRFS_DROP_DELAYED_REF:
1391		sgn = -1;
1392		seq = node->seq;
1393		break;
1394	case BTRFS_UPDATE_DELAYED_HEAD:
1395		return 0;
1396	default:
1397		BUG();
1398	}
1399
1400	mutex_lock(&fs_info->qgroup_rescan_lock);
1401	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1402		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1403			mutex_unlock(&fs_info->qgroup_rescan_lock);
1404			return 0;
1405		}
1406	}
1407	mutex_unlock(&fs_info->qgroup_rescan_lock);
1408
1409	/*
1410	 * the delayed ref sequence number we pass depends on the direction of
1411	 * the operation. for add operations, we pass
1412	 * tree_mod_log_prev_seq(node->seq) to skip
1413	 * the delayed ref's current sequence number, because we need the state
1414	 * of the tree before the add operation. for delete operations, we pass
1415	 * (node->seq) to include the delayed ref's current sequence number,
1416	 * because we need the state of the tree after the delete operation.
1417	 */
1418	ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
1419	if (ret < 0)
1420		return ret;
1421
1422	spin_lock(&fs_info->qgroup_lock);
1423
1424	quota_root = fs_info->quota_root;
1425	if (!quota_root)
1426		goto unlock;
1427
1428	qgroup = find_qgroup_rb(fs_info, ref_root);
1429	if (!qgroup)
1430		goto unlock;
1431
1432	/*
1433	 * step 1: for each old ref, visit all nodes once and inc refcnt
1434	 */
1435	ulist_reinit(fs_info->qgroup_ulist);
1436	seq = fs_info->qgroup_seq;
1437	fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1438
1439	ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist,
1440				       seq);
1441	if (ret)
1442		goto unlock;
1443
1444	/*
1445	 * step 2: walk from the new root
1446	 */
1447	ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist,
1448				       seq, sgn, node->num_bytes, qgroup);
1449	if (ret)
1450		goto unlock;
1451
1452	/*
1453	 * step 3: walk again from old refs
1454	 */
1455	ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist,
1456				       seq, sgn, node->num_bytes);
1457	if (ret)
1458		goto unlock;
1459
1460unlock:
1461	spin_unlock(&fs_info->qgroup_lock);
1462	ulist_free(roots);
1463
1464	return ret;
1465}
1466
1467/*
1468 * called from commit_transaction. Writes all changed qgroups to disk.
1469 */
1470int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1471		      struct btrfs_fs_info *fs_info)
1472{
1473	struct btrfs_root *quota_root = fs_info->quota_root;
1474	int ret = 0;
1475	int start_rescan_worker = 0;
1476
1477	if (!quota_root)
1478		goto out;
1479
1480	if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1481		start_rescan_worker = 1;
1482
1483	fs_info->quota_enabled = fs_info->pending_quota_state;
1484
1485	spin_lock(&fs_info->qgroup_lock);
1486	while (!list_empty(&fs_info->dirty_qgroups)) {
1487		struct btrfs_qgroup *qgroup;
1488		qgroup = list_first_entry(&fs_info->dirty_qgroups,
1489					  struct btrfs_qgroup, dirty);
1490		list_del_init(&qgroup->dirty);
1491		spin_unlock(&fs_info->qgroup_lock);
1492		ret = update_qgroup_info_item(trans, quota_root, qgroup);
1493		if (ret)
1494			fs_info->qgroup_flags |=
1495					BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1496		spin_lock(&fs_info->qgroup_lock);
1497	}
1498	if (fs_info->quota_enabled)
1499		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1500	else
1501		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1502	spin_unlock(&fs_info->qgroup_lock);
1503
1504	ret = update_qgroup_status_item(trans, fs_info, quota_root);
1505	if (ret)
1506		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1507
1508	if (!ret && start_rescan_worker) {
1509		ret = qgroup_rescan_init(fs_info, 0, 1);
1510		if (!ret) {
1511			qgroup_rescan_zero_tracking(fs_info);
1512			btrfs_queue_work(fs_info->qgroup_rescan_workers,
1513					 &fs_info->qgroup_rescan_work);
1514		}
1515		ret = 0;
1516	}
1517
1518out:
1519
1520	return ret;
1521}
1522
1523/*
1524 * copy the acounting information between qgroups. This is necessary when a
1525 * snapshot or a subvolume is created
1526 */
1527int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1528			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
1529			 struct btrfs_qgroup_inherit *inherit)
1530{
1531	int ret = 0;
1532	int i;
1533	u64 *i_qgroups;
1534	struct btrfs_root *quota_root = fs_info->quota_root;
1535	struct btrfs_qgroup *srcgroup;
1536	struct btrfs_qgroup *dstgroup;
1537	u32 level_size = 0;
1538	u64 nums;
1539
1540	mutex_lock(&fs_info->qgroup_ioctl_lock);
1541	if (!fs_info->quota_enabled)
1542		goto out;
1543
1544	if (!quota_root) {
1545		ret = -EINVAL;
1546		goto out;
1547	}
1548
1549	if (inherit) {
1550		i_qgroups = (u64 *)(inherit + 1);
1551		nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
1552		       2 * inherit->num_excl_copies;
1553		for (i = 0; i < nums; ++i) {
1554			srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
1555			if (!srcgroup) {
1556				ret = -EINVAL;
1557				goto out;
1558			}
1559			++i_qgroups;
1560		}
1561	}
1562
1563	/*
1564	 * create a tracking group for the subvol itself
1565	 */
1566	ret = add_qgroup_item(trans, quota_root, objectid);
1567	if (ret)
1568		goto out;
1569
1570	if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
1571		ret = update_qgroup_limit_item(trans, quota_root, objectid,
1572					       inherit->lim.flags,
1573					       inherit->lim.max_rfer,
1574					       inherit->lim.max_excl,
1575					       inherit->lim.rsv_rfer,
1576					       inherit->lim.rsv_excl);
1577		if (ret)
1578			goto out;
1579	}
1580
1581	if (srcid) {
1582		struct btrfs_root *srcroot;
1583		struct btrfs_key srckey;
1584		int srcroot_level;
1585
1586		srckey.objectid = srcid;
1587		srckey.type = BTRFS_ROOT_ITEM_KEY;
1588		srckey.offset = (u64)-1;
1589		srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
1590		if (IS_ERR(srcroot)) {
1591			ret = PTR_ERR(srcroot);
1592			goto out;
1593		}
1594
1595		rcu_read_lock();
1596		srcroot_level = btrfs_header_level(srcroot->node);
1597		level_size = btrfs_level_size(srcroot, srcroot_level);
1598		rcu_read_unlock();
1599	}
1600
1601	/*
1602	 * add qgroup to all inherited groups
1603	 */
1604	if (inherit) {
1605		i_qgroups = (u64 *)(inherit + 1);
1606		for (i = 0; i < inherit->num_qgroups; ++i) {
1607			ret = add_qgroup_relation_item(trans, quota_root,
1608						       objectid, *i_qgroups);
1609			if (ret)
1610				goto out;
1611			ret = add_qgroup_relation_item(trans, quota_root,
1612						       *i_qgroups, objectid);
1613			if (ret)
1614				goto out;
1615			++i_qgroups;
1616		}
1617	}
1618
1619
1620	spin_lock(&fs_info->qgroup_lock);
1621
1622	dstgroup = add_qgroup_rb(fs_info, objectid);
1623	if (IS_ERR(dstgroup)) {
1624		ret = PTR_ERR(dstgroup);
1625		goto unlock;
1626	}
1627
1628	if (srcid) {
1629		srcgroup = find_qgroup_rb(fs_info, srcid);
1630		if (!srcgroup)
1631			goto unlock;
1632		dstgroup->rfer = srcgroup->rfer - level_size;
1633		dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1634		srcgroup->excl = level_size;
1635		srcgroup->excl_cmpr = level_size;
1636		qgroup_dirty(fs_info, dstgroup);
1637		qgroup_dirty(fs_info, srcgroup);
1638	}
1639
1640	if (!inherit)
1641		goto unlock;
1642
1643	i_qgroups = (u64 *)(inherit + 1);
1644	for (i = 0; i < inherit->num_qgroups; ++i) {
1645		ret = add_relation_rb(quota_root->fs_info, objectid,
1646				      *i_qgroups);
1647		if (ret)
1648			goto unlock;
1649		++i_qgroups;
1650	}
1651
1652	for (i = 0; i <  inherit->num_ref_copies; ++i) {
1653		struct btrfs_qgroup *src;
1654		struct btrfs_qgroup *dst;
1655
1656		src = find_qgroup_rb(fs_info, i_qgroups[0]);
1657		dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1658
1659		if (!src || !dst) {
1660			ret = -EINVAL;
1661			goto unlock;
1662		}
1663
1664		dst->rfer = src->rfer - level_size;
1665		dst->rfer_cmpr = src->rfer_cmpr - level_size;
1666		i_qgroups += 2;
1667	}
1668	for (i = 0; i <  inherit->num_excl_copies; ++i) {
1669		struct btrfs_qgroup *src;
1670		struct btrfs_qgroup *dst;
1671
1672		src = find_qgroup_rb(fs_info, i_qgroups[0]);
1673		dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1674
1675		if (!src || !dst) {
1676			ret = -EINVAL;
1677			goto unlock;
1678		}
1679
1680		dst->excl = src->excl + level_size;
1681		dst->excl_cmpr = src->excl_cmpr + level_size;
1682		i_qgroups += 2;
1683	}
1684
1685unlock:
1686	spin_unlock(&fs_info->qgroup_lock);
1687out:
1688	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1689	return ret;
1690}
1691
1692/*
1693 * reserve some space for a qgroup and all its parents. The reservation takes
1694 * place with start_transaction or dealloc_reserve, similar to ENOSPC
1695 * accounting. If not enough space is available, EDQUOT is returned.
1696 * We assume that the requested space is new for all qgroups.
1697 */
1698int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1699{
1700	struct btrfs_root *quota_root;
1701	struct btrfs_qgroup *qgroup;
1702	struct btrfs_fs_info *fs_info = root->fs_info;
1703	u64 ref_root = root->root_key.objectid;
1704	int ret = 0;
1705	struct ulist_node *unode;
1706	struct ulist_iterator uiter;
1707
1708	if (!is_fstree(ref_root))
1709		return 0;
1710
1711	if (num_bytes == 0)
1712		return 0;
1713
1714	spin_lock(&fs_info->qgroup_lock);
1715	quota_root = fs_info->quota_root;
1716	if (!quota_root)
1717		goto out;
1718
1719	qgroup = find_qgroup_rb(fs_info, ref_root);
1720	if (!qgroup)
1721		goto out;
1722
1723	/*
1724	 * in a first step, we check all affected qgroups if any limits would
1725	 * be exceeded
1726	 */
1727	ulist_reinit(fs_info->qgroup_ulist);
1728	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1729			(uintptr_t)qgroup, GFP_ATOMIC);
1730	if (ret < 0)
1731		goto out;
1732	ULIST_ITER_INIT(&uiter);
1733	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1734		struct btrfs_qgroup *qg;
1735		struct btrfs_qgroup_list *glist;
1736
1737		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1738
1739		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1740		    qg->reserved + (s64)qg->rfer + num_bytes >
1741		    qg->max_rfer) {
1742			ret = -EDQUOT;
1743			goto out;
1744		}
1745
1746		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
1747		    qg->reserved + (s64)qg->excl + num_bytes >
1748		    qg->max_excl) {
1749			ret = -EDQUOT;
1750			goto out;
1751		}
1752
1753		list_for_each_entry(glist, &qg->groups, next_group) {
1754			ret = ulist_add(fs_info->qgroup_ulist,
1755					glist->group->qgroupid,
1756					(uintptr_t)glist->group, GFP_ATOMIC);
1757			if (ret < 0)
1758				goto out;
1759		}
1760	}
1761	ret = 0;
1762	/*
1763	 * no limits exceeded, now record the reservation into all qgroups
1764	 */
1765	ULIST_ITER_INIT(&uiter);
1766	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1767		struct btrfs_qgroup *qg;
1768
1769		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1770
1771		qg->reserved += num_bytes;
1772	}
1773
1774out:
1775	spin_unlock(&fs_info->qgroup_lock);
1776	return ret;
1777}
1778
1779void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1780{
1781	struct btrfs_root *quota_root;
1782	struct btrfs_qgroup *qgroup;
1783	struct btrfs_fs_info *fs_info = root->fs_info;
1784	struct ulist_node *unode;
1785	struct ulist_iterator uiter;
1786	u64 ref_root = root->root_key.objectid;
1787	int ret = 0;
1788
1789	if (!is_fstree(ref_root))
1790		return;
1791
1792	if (num_bytes == 0)
1793		return;
1794
1795	spin_lock(&fs_info->qgroup_lock);
1796
1797	quota_root = fs_info->quota_root;
1798	if (!quota_root)
1799		goto out;
1800
1801	qgroup = find_qgroup_rb(fs_info, ref_root);
1802	if (!qgroup)
1803		goto out;
1804
1805	ulist_reinit(fs_info->qgroup_ulist);
1806	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1807			(uintptr_t)qgroup, GFP_ATOMIC);
1808	if (ret < 0)
1809		goto out;
1810	ULIST_ITER_INIT(&uiter);
1811	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1812		struct btrfs_qgroup *qg;
1813		struct btrfs_qgroup_list *glist;
1814
1815		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1816
1817		qg->reserved -= num_bytes;
1818
1819		list_for_each_entry(glist, &qg->groups, next_group) {
1820			ret = ulist_add(fs_info->qgroup_ulist,
1821					glist->group->qgroupid,
1822					(uintptr_t)glist->group, GFP_ATOMIC);
1823			if (ret < 0)
1824				goto out;
1825		}
1826	}
1827
1828out:
1829	spin_unlock(&fs_info->qgroup_lock);
1830}
1831
1832void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1833{
1834	if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1835		return;
1836	btrfs_err(trans->root->fs_info,
1837		"qgroups not uptodate in trans handle %p:  list is%s empty, "
1838		"seq is %#x.%x",
1839		trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1840		(u32)(trans->delayed_ref_elem.seq >> 32),
1841		(u32)trans->delayed_ref_elem.seq);
1842	BUG();
1843}
1844
1845/*
1846 * returns < 0 on error, 0 when more leafs are to be scanned.
1847 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1848 */
1849static int
1850qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1851		   struct btrfs_trans_handle *trans, struct ulist *tmp,
1852		   struct extent_buffer *scratch_leaf)
1853{
1854	struct btrfs_key found;
1855	struct ulist *roots = NULL;
1856	struct ulist_node *unode;
1857	struct ulist_iterator uiter;
1858	struct seq_list tree_mod_seq_elem = {};
1859	u64 seq;
1860	int slot;
1861	int ret;
1862
1863	path->leave_spinning = 1;
1864	mutex_lock(&fs_info->qgroup_rescan_lock);
1865	ret = btrfs_search_slot_for_read(fs_info->extent_root,
1866					 &fs_info->qgroup_rescan_progress,
1867					 path, 1, 0);
1868
1869	pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1870		 fs_info->qgroup_rescan_progress.objectid,
1871		 fs_info->qgroup_rescan_progress.type,
1872		 fs_info->qgroup_rescan_progress.offset, ret);
1873
1874	if (ret) {
1875		/*
1876		 * The rescan is about to end, we will not be scanning any
1877		 * further blocks. We cannot unset the RESCAN flag here, because
1878		 * we want to commit the transaction if everything went well.
1879		 * To make the live accounting work in this phase, we set our
1880		 * scan progress pointer such that every real extent objectid
1881		 * will be smaller.
1882		 */
1883		fs_info->qgroup_rescan_progress.objectid = (u64)-1;
1884		btrfs_release_path(path);
1885		mutex_unlock(&fs_info->qgroup_rescan_lock);
1886		return ret;
1887	}
1888
1889	btrfs_item_key_to_cpu(path->nodes[0], &found,
1890			      btrfs_header_nritems(path->nodes[0]) - 1);
1891	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
1892
1893	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1894	memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
1895	slot = path->slots[0];
1896	btrfs_release_path(path);
1897	mutex_unlock(&fs_info->qgroup_rescan_lock);
1898
1899	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1900		u64 num_bytes;
1901
1902		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1903		if (found.type != BTRFS_EXTENT_ITEM_KEY &&
1904		    found.type != BTRFS_METADATA_ITEM_KEY)
1905			continue;
1906		if (found.type == BTRFS_METADATA_ITEM_KEY)
1907			num_bytes = fs_info->extent_root->leafsize;
1908		else
1909			num_bytes = found.offset;
1910
1911		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1912					   tree_mod_seq_elem.seq, &roots);
1913		if (ret < 0)
1914			goto out;
1915		spin_lock(&fs_info->qgroup_lock);
1916		seq = fs_info->qgroup_seq;
1917		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1918
1919		ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1920		if (ret) {
1921			spin_unlock(&fs_info->qgroup_lock);
1922			ulist_free(roots);
1923			goto out;
1924		}
1925
1926		/*
1927		 * step2 of btrfs_qgroup_account_ref works from a single root,
1928		 * we're doing all at once here.
1929		 */
1930		ulist_reinit(tmp);
1931		ULIST_ITER_INIT(&uiter);
1932		while ((unode = ulist_next(roots, &uiter))) {
1933			struct btrfs_qgroup *qg;
1934
1935			qg = find_qgroup_rb(fs_info, unode->val);
1936			if (!qg)
1937				continue;
1938
1939			ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1940					GFP_ATOMIC);
1941			if (ret < 0) {
1942				spin_unlock(&fs_info->qgroup_lock);
1943				ulist_free(roots);
1944				goto out;
1945			}
1946		}
1947
1948		/* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1949		ULIST_ITER_INIT(&uiter);
1950		while ((unode = ulist_next(tmp, &uiter))) {
1951			struct btrfs_qgroup *qg;
1952			struct btrfs_qgroup_list *glist;
1953
1954			qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1955			qg->rfer += num_bytes;
1956			qg->rfer_cmpr += num_bytes;
1957			WARN_ON(qg->tag >= seq);
1958			if (qg->refcnt - seq == roots->nnodes) {
1959				qg->excl += num_bytes;
1960				qg->excl_cmpr += num_bytes;
1961			}
1962			qgroup_dirty(fs_info, qg);
1963
1964			list_for_each_entry(glist, &qg->groups, next_group) {
1965				ret = ulist_add(tmp, glist->group->qgroupid,
1966						(uintptr_t)glist->group,
1967						GFP_ATOMIC);
1968				if (ret < 0) {
1969					spin_unlock(&fs_info->qgroup_lock);
1970					ulist_free(roots);
1971					goto out;
1972				}
1973			}
1974		}
1975
1976		spin_unlock(&fs_info->qgroup_lock);
1977		ulist_free(roots);
1978		ret = 0;
1979	}
1980
1981out:
1982	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1983
1984	return ret;
1985}
1986
1987static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
1988{
1989	struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
1990						     qgroup_rescan_work);
1991	struct btrfs_path *path;
1992	struct btrfs_trans_handle *trans = NULL;
1993	struct ulist *tmp = NULL;
1994	struct extent_buffer *scratch_leaf = NULL;
1995	int err = -ENOMEM;
1996
1997	path = btrfs_alloc_path();
1998	if (!path)
1999		goto out;
2000	tmp = ulist_alloc(GFP_NOFS);
2001	if (!tmp)
2002		goto out;
2003	scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2004	if (!scratch_leaf)
2005		goto out;
2006
2007	err = 0;
2008	while (!err) {
2009		trans = btrfs_start_transaction(fs_info->fs_root, 0);
2010		if (IS_ERR(trans)) {
2011			err = PTR_ERR(trans);
2012			break;
2013		}
2014		if (!fs_info->quota_enabled) {
2015			err = -EINTR;
2016		} else {
2017			err = qgroup_rescan_leaf(fs_info, path, trans,
2018						 tmp, scratch_leaf);
2019		}
2020		if (err > 0)
2021			btrfs_commit_transaction(trans, fs_info->fs_root);
2022		else
2023			btrfs_end_transaction(trans, fs_info->fs_root);
2024	}
2025
2026out:
2027	kfree(scratch_leaf);
2028	ulist_free(tmp);
2029	btrfs_free_path(path);
2030
2031	mutex_lock(&fs_info->qgroup_rescan_lock);
2032	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2033
2034	if (err == 2 &&
2035	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2036		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2037	} else if (err < 0) {
2038		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2039	}
2040	mutex_unlock(&fs_info->qgroup_rescan_lock);
2041
2042	if (err >= 0) {
2043		btrfs_info(fs_info, "qgroup scan completed%s",
2044			err == 2 ? " (inconsistency flag cleared)" : "");
2045	} else {
2046		btrfs_err(fs_info, "qgroup scan failed with %d", err);
2047	}
2048
2049	complete_all(&fs_info->qgroup_rescan_completion);
2050}
2051
2052/*
2053 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2054 * memory required for the rescan context.
2055 */
2056static int
2057qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2058		   int init_flags)
2059{
2060	int ret = 0;
2061
2062	if (!init_flags &&
2063	    (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2064	     !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2065		ret = -EINVAL;
2066		goto err;
2067	}
2068
2069	mutex_lock(&fs_info->qgroup_rescan_lock);
2070	spin_lock(&fs_info->qgroup_lock);
2071
2072	if (init_flags) {
2073		if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2074			ret = -EINPROGRESS;
2075		else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2076			ret = -EINVAL;
2077
2078		if (ret) {
2079			spin_unlock(&fs_info->qgroup_lock);
2080			mutex_unlock(&fs_info->qgroup_rescan_lock);
2081			goto err;
2082		}
2083
2084		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2085	}
2086
2087	memset(&fs_info->qgroup_rescan_progress, 0,
2088		sizeof(fs_info->qgroup_rescan_progress));
2089	fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2090
2091	spin_unlock(&fs_info->qgroup_lock);
2092	mutex_unlock(&fs_info->qgroup_rescan_lock);
2093
2094	init_completion(&fs_info->qgroup_rescan_completion);
2095
2096	memset(&fs_info->qgroup_rescan_work, 0,
2097	       sizeof(fs_info->qgroup_rescan_work));
2098	btrfs_init_work(&fs_info->qgroup_rescan_work,
2099			btrfs_qgroup_rescan_worker, NULL, NULL);
2100
2101	if (ret) {
2102err:
2103		btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2104		return ret;
2105	}
2106
2107	return 0;
2108}
2109
2110static void
2111qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2112{
2113	struct rb_node *n;
2114	struct btrfs_qgroup *qgroup;
2115
2116	spin_lock(&fs_info->qgroup_lock);
2117	/* clear all current qgroup tracking information */
2118	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2119		qgroup = rb_entry(n, struct btrfs_qgroup, node);
2120		qgroup->rfer = 0;
2121		qgroup->rfer_cmpr = 0;
2122		qgroup->excl = 0;
2123		qgroup->excl_cmpr = 0;
2124	}
2125	spin_unlock(&fs_info->qgroup_lock);
2126}
2127
2128int
2129btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2130{
2131	int ret = 0;
2132	struct btrfs_trans_handle *trans;
2133
2134	ret = qgroup_rescan_init(fs_info, 0, 1);
2135	if (ret)
2136		return ret;
2137
2138	/*
2139	 * We have set the rescan_progress to 0, which means no more
2140	 * delayed refs will be accounted by btrfs_qgroup_account_ref.
2141	 * However, btrfs_qgroup_account_ref may be right after its call
2142	 * to btrfs_find_all_roots, in which case it would still do the
2143	 * accounting.
2144	 * To solve this, we're committing the transaction, which will
2145	 * ensure we run all delayed refs and only after that, we are
2146	 * going to clear all tracking information for a clean start.
2147	 */
2148
2149	trans = btrfs_join_transaction(fs_info->fs_root);
2150	if (IS_ERR(trans)) {
2151		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2152		return PTR_ERR(trans);
2153	}
2154	ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2155	if (ret) {
2156		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2157		return ret;
2158	}
2159
2160	qgroup_rescan_zero_tracking(fs_info);
2161
2162	btrfs_queue_work(fs_info->qgroup_rescan_workers,
2163			 &fs_info->qgroup_rescan_work);
2164
2165	return 0;
2166}
2167
2168int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2169{
2170	int running;
2171	int ret = 0;
2172
2173	mutex_lock(&fs_info->qgroup_rescan_lock);
2174	spin_lock(&fs_info->qgroup_lock);
2175	running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2176	spin_unlock(&fs_info->qgroup_lock);
2177	mutex_unlock(&fs_info->qgroup_rescan_lock);
2178
2179	if (running)
2180		ret = wait_for_completion_interruptible(
2181					&fs_info->qgroup_rescan_completion);
2182
2183	return ret;
2184}
2185
2186/*
2187 * this is only called from open_ctree where we're still single threaded, thus
2188 * locking is omitted here.
2189 */
2190void
2191btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2192{
2193	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2194		btrfs_queue_work(fs_info->qgroup_rescan_workers,
2195				 &fs_info->qgroup_rescan_work);
2196}