Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.10.11.
   1// SPDX-License-Identifier: GPL-2.0
   2#ifndef NO_BCACHEFS_FS
   3
   4#include "bcachefs.h"
   5#include "acl.h"
   6#include "bkey_buf.h"
   7#include "btree_update.h"
   8#include "buckets.h"
   9#include "chardev.h"
  10#include "dirent.h"
  11#include "errcode.h"
  12#include "extents.h"
  13#include "fs.h"
  14#include "fs-common.h"
  15#include "fs-io.h"
  16#include "fs-ioctl.h"
  17#include "fs-io-buffered.h"
  18#include "fs-io-direct.h"
  19#include "fs-io-pagecache.h"
  20#include "fsck.h"
  21#include "inode.h"
  22#include "io_read.h"
  23#include "journal.h"
  24#include "keylist.h"
  25#include "quota.h"
  26#include "snapshot.h"
  27#include "super.h"
  28#include "xattr.h"
  29
  30#include <linux/aio.h>
  31#include <linux/backing-dev.h>
  32#include <linux/exportfs.h>
  33#include <linux/fiemap.h>
  34#include <linux/module.h>
  35#include <linux/pagemap.h>
  36#include <linux/posix_acl.h>
  37#include <linux/random.h>
  38#include <linux/seq_file.h>
  39#include <linux/statfs.h>
  40#include <linux/string.h>
  41#include <linux/xattr.h>
  42
  43static struct kmem_cache *bch2_inode_cache;
  44
  45static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
  46				struct bch_inode_info *,
  47				struct bch_inode_unpacked *,
  48				struct bch_subvolume *);
  49
  50void bch2_inode_update_after_write(struct btree_trans *trans,
  51				   struct bch_inode_info *inode,
  52				   struct bch_inode_unpacked *bi,
  53				   unsigned fields)
  54{
  55	struct bch_fs *c = trans->c;
  56
  57	BUG_ON(bi->bi_inum != inode->v.i_ino);
  58
  59	bch2_assert_pos_locked(trans, BTREE_ID_inodes,
  60			       POS(0, bi->bi_inum),
  61			       c->opts.inodes_use_key_cache);
  62
  63	set_nlink(&inode->v, bch2_inode_nlink_get(bi));
  64	i_uid_write(&inode->v, bi->bi_uid);
  65	i_gid_write(&inode->v, bi->bi_gid);
  66	inode->v.i_mode	= bi->bi_mode;
  67
  68	if (fields & ATTR_ATIME)
  69		inode_set_atime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_atime));
  70	if (fields & ATTR_MTIME)
  71		inode_set_mtime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_mtime));
  72	if (fields & ATTR_CTIME)
  73		inode_set_ctime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_ctime));
  74
  75	inode->ei_inode		= *bi;
  76
  77	bch2_inode_flags_to_vfs(inode);
  78}
  79
  80int __must_check bch2_write_inode(struct bch_fs *c,
  81				  struct bch_inode_info *inode,
  82				  inode_set_fn set,
  83				  void *p, unsigned fields)
  84{
  85	struct btree_trans *trans = bch2_trans_get(c);
  86	struct btree_iter iter = { NULL };
  87	struct bch_inode_unpacked inode_u;
  88	int ret;
  89retry:
  90	bch2_trans_begin(trans);
  91
  92	ret   = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode),
  93				BTREE_ITER_INTENT) ?:
  94		(set ? set(trans, inode, &inode_u, p) : 0) ?:
  95		bch2_inode_write(trans, &iter, &inode_u) ?:
  96		bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
  97
  98	/*
  99	 * the btree node lock protects inode->ei_inode, not ei_update_lock;
 100	 * this is important for inode updates via bchfs_write_index_update
 101	 */
 102	if (!ret)
 103		bch2_inode_update_after_write(trans, inode, &inode_u, fields);
 104
 105	bch2_trans_iter_exit(trans, &iter);
 106
 107	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 108		goto retry;
 109
 110	bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
 111			     "inode %u:%llu not found when updating",
 112			     inode_inum(inode).subvol,
 113			     inode_inum(inode).inum);
 114
 115	bch2_trans_put(trans);
 116	return ret < 0 ? ret : 0;
 117}
 118
 119int bch2_fs_quota_transfer(struct bch_fs *c,
 120			   struct bch_inode_info *inode,
 121			   struct bch_qid new_qid,
 122			   unsigned qtypes,
 123			   enum quota_acct_mode mode)
 124{
 125	unsigned i;
 126	int ret;
 127
 128	qtypes &= enabled_qtypes(c);
 129
 130	for (i = 0; i < QTYP_NR; i++)
 131		if (new_qid.q[i] == inode->ei_qid.q[i])
 132			qtypes &= ~(1U << i);
 133
 134	if (!qtypes)
 135		return 0;
 136
 137	mutex_lock(&inode->ei_quota_lock);
 138
 139	ret = bch2_quota_transfer(c, qtypes, new_qid,
 140				  inode->ei_qid,
 141				  inode->v.i_blocks +
 142				  inode->ei_quota_reserved,
 143				  mode);
 144	if (!ret)
 145		for (i = 0; i < QTYP_NR; i++)
 146			if (qtypes & (1 << i))
 147				inode->ei_qid.q[i] = new_qid.q[i];
 148
 149	mutex_unlock(&inode->ei_quota_lock);
 150
 151	return ret;
 152}
 153
 154static int bch2_iget5_test(struct inode *vinode, void *p)
 155{
 156	struct bch_inode_info *inode = to_bch_ei(vinode);
 157	subvol_inum *inum = p;
 158
 159	return inode->ei_subvol == inum->subvol &&
 160		inode->ei_inode.bi_inum == inum->inum;
 161}
 162
 163static int bch2_iget5_set(struct inode *vinode, void *p)
 164{
 165	struct bch_inode_info *inode = to_bch_ei(vinode);
 166	subvol_inum *inum = p;
 167
 168	inode->v.i_ino		= inum->inum;
 169	inode->ei_subvol	= inum->subvol;
 170	inode->ei_inode.bi_inum	= inum->inum;
 171	return 0;
 172}
 173
 174static unsigned bch2_inode_hash(subvol_inum inum)
 175{
 176	return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
 177}
 178
 179struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
 180{
 181	struct bch_inode_unpacked inode_u;
 182	struct bch_inode_info *inode;
 183	struct btree_trans *trans;
 184	struct bch_subvolume subvol;
 185	int ret;
 186
 187	inode = to_bch_ei(iget5_locked(c->vfs_sb,
 188				       bch2_inode_hash(inum),
 189				       bch2_iget5_test,
 190				       bch2_iget5_set,
 191				       &inum));
 192	if (unlikely(!inode))
 193		return ERR_PTR(-ENOMEM);
 194	if (!(inode->v.i_state & I_NEW))
 195		return &inode->v;
 196
 197	trans = bch2_trans_get(c);
 198	ret = lockrestart_do(trans,
 199		bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
 200		bch2_inode_find_by_inum_trans(trans, inum, &inode_u));
 201
 202	if (!ret)
 203		bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
 204	bch2_trans_put(trans);
 205
 206	if (ret) {
 207		iget_failed(&inode->v);
 208		return ERR_PTR(bch2_err_class(ret));
 209	}
 210
 211	mutex_lock(&c->vfs_inodes_lock);
 212	list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
 213	mutex_unlock(&c->vfs_inodes_lock);
 214
 215	unlock_new_inode(&inode->v);
 216
 217	return &inode->v;
 218}
 219
 220struct bch_inode_info *
 221__bch2_create(struct mnt_idmap *idmap,
 222	      struct bch_inode_info *dir, struct dentry *dentry,
 223	      umode_t mode, dev_t rdev, subvol_inum snapshot_src,
 224	      unsigned flags)
 225{
 226	struct bch_fs *c = dir->v.i_sb->s_fs_info;
 227	struct btree_trans *trans;
 228	struct bch_inode_unpacked dir_u;
 229	struct bch_inode_info *inode, *old;
 230	struct bch_inode_unpacked inode_u;
 231	struct posix_acl *default_acl = NULL, *acl = NULL;
 232	subvol_inum inum;
 233	struct bch_subvolume subvol;
 234	u64 journal_seq = 0;
 235	int ret;
 236
 237	/*
 238	 * preallocate acls + vfs inode before btree transaction, so that
 239	 * nothing can fail after the transaction succeeds:
 240	 */
 241#ifdef CONFIG_BCACHEFS_POSIX_ACL
 242	ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
 243	if (ret)
 244		return ERR_PTR(ret);
 245#endif
 246	inode = to_bch_ei(new_inode(c->vfs_sb));
 247	if (unlikely(!inode)) {
 248		inode = ERR_PTR(-ENOMEM);
 249		goto err;
 250	}
 251
 252	bch2_inode_init_early(c, &inode_u);
 253
 254	if (!(flags & BCH_CREATE_TMPFILE))
 255		mutex_lock(&dir->ei_update_lock);
 256
 257	trans = bch2_trans_get(c);
 258retry:
 259	bch2_trans_begin(trans);
 260
 261	ret   = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?:
 262		bch2_create_trans(trans,
 263				  inode_inum(dir), &dir_u, &inode_u,
 264				  !(flags & BCH_CREATE_TMPFILE)
 265				  ? &dentry->d_name : NULL,
 266				  from_kuid(i_user_ns(&dir->v), current_fsuid()),
 267				  from_kgid(i_user_ns(&dir->v), current_fsgid()),
 268				  mode, rdev,
 269				  default_acl, acl, snapshot_src, flags) ?:
 270		bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
 271				KEY_TYPE_QUOTA_PREALLOC);
 272	if (unlikely(ret))
 273		goto err_before_quota;
 274
 275	inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
 276	inum.inum = inode_u.bi_inum;
 277
 278	ret   = bch2_subvolume_get(trans, inum.subvol, true,
 279				   BTREE_ITER_WITH_UPDATES, &subvol) ?:
 280		bch2_trans_commit(trans, NULL, &journal_seq, 0);
 281	if (unlikely(ret)) {
 282		bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
 283				KEY_TYPE_QUOTA_WARN);
 284err_before_quota:
 285		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 286			goto retry;
 287		goto err_trans;
 288	}
 289
 290	if (!(flags & BCH_CREATE_TMPFILE)) {
 291		bch2_inode_update_after_write(trans, dir, &dir_u,
 292					      ATTR_MTIME|ATTR_CTIME);
 293		mutex_unlock(&dir->ei_update_lock);
 294	}
 295
 296	bch2_iget5_set(&inode->v, &inum);
 297	bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
 298
 299	set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
 300	set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
 301
 302	/*
 303	 * we must insert the new inode into the inode cache before calling
 304	 * bch2_trans_exit() and dropping locks, else we could race with another
 305	 * thread pulling the inode in and modifying it:
 306	 */
 307
 308	inode->v.i_state |= I_CREATING;
 309
 310	old = to_bch_ei(inode_insert5(&inode->v,
 311				      bch2_inode_hash(inum),
 312				      bch2_iget5_test,
 313				      bch2_iget5_set,
 314				      &inum));
 315	BUG_ON(!old);
 316
 317	if (unlikely(old != inode)) {
 318		/*
 319		 * We raced, another process pulled the new inode into cache
 320		 * before us:
 321		 */
 322		make_bad_inode(&inode->v);
 323		iput(&inode->v);
 324
 325		inode = old;
 326	} else {
 327		mutex_lock(&c->vfs_inodes_lock);
 328		list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
 329		mutex_unlock(&c->vfs_inodes_lock);
 330		/*
 331		 * we really don't want insert_inode_locked2() to be setting
 332		 * I_NEW...
 333		 */
 334		unlock_new_inode(&inode->v);
 335	}
 336
 337	bch2_trans_put(trans);
 338err:
 339	posix_acl_release(default_acl);
 340	posix_acl_release(acl);
 341	return inode;
 342err_trans:
 343	if (!(flags & BCH_CREATE_TMPFILE))
 344		mutex_unlock(&dir->ei_update_lock);
 345
 346	bch2_trans_put(trans);
 347	make_bad_inode(&inode->v);
 348	iput(&inode->v);
 349	inode = ERR_PTR(ret);
 350	goto err;
 351}
 352
 353/* methods */
 354
 355static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
 356				  unsigned int flags)
 357{
 358	struct bch_fs *c = vdir->i_sb->s_fs_info;
 359	struct bch_inode_info *dir = to_bch_ei(vdir);
 360	struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
 361	struct inode *vinode = NULL;
 362	subvol_inum inum = { .subvol = 1 };
 363	int ret;
 364
 365	ret = bch2_dirent_lookup(c, inode_inum(dir), &hash,
 366				 &dentry->d_name, &inum);
 367
 368	if (!ret)
 369		vinode = bch2_vfs_inode_get(c, inum);
 370
 371	return d_splice_alias(vinode, dentry);
 372}
 373
 374static int bch2_mknod(struct mnt_idmap *idmap,
 375		      struct inode *vdir, struct dentry *dentry,
 376		      umode_t mode, dev_t rdev)
 377{
 378	struct bch_inode_info *inode =
 379		__bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev,
 380			      (subvol_inum) { 0 }, 0);
 381
 382	if (IS_ERR(inode))
 383		return bch2_err_class(PTR_ERR(inode));
 384
 385	d_instantiate(dentry, &inode->v);
 386	return 0;
 387}
 388
 389static int bch2_create(struct mnt_idmap *idmap,
 390		       struct inode *vdir, struct dentry *dentry,
 391		       umode_t mode, bool excl)
 392{
 393	return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0);
 394}
 395
 396static int __bch2_link(struct bch_fs *c,
 397		       struct bch_inode_info *inode,
 398		       struct bch_inode_info *dir,
 399		       struct dentry *dentry)
 400{
 401	struct btree_trans *trans = bch2_trans_get(c);
 402	struct bch_inode_unpacked dir_u, inode_u;
 403	int ret;
 404
 405	mutex_lock(&inode->ei_update_lock);
 406
 407	ret = commit_do(trans, NULL, NULL, 0,
 408			bch2_link_trans(trans,
 409					inode_inum(dir),   &dir_u,
 410					inode_inum(inode), &inode_u,
 411					&dentry->d_name));
 412
 413	if (likely(!ret)) {
 414		bch2_inode_update_after_write(trans, dir, &dir_u,
 415					      ATTR_MTIME|ATTR_CTIME);
 416		bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME);
 417	}
 418
 419	bch2_trans_put(trans);
 420	mutex_unlock(&inode->ei_update_lock);
 421	return ret;
 422}
 423
 424static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
 425		     struct dentry *dentry)
 426{
 427	struct bch_fs *c = vdir->i_sb->s_fs_info;
 428	struct bch_inode_info *dir = to_bch_ei(vdir);
 429	struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
 430	int ret;
 431
 432	lockdep_assert_held(&inode->v.i_rwsem);
 433
 434	ret   = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
 435		bch2_subvol_is_ro(c, inode->ei_subvol) ?:
 436		__bch2_link(c, inode, dir, dentry);
 437	if (unlikely(ret))
 438		return bch2_err_class(ret);
 439
 440	ihold(&inode->v);
 441	d_instantiate(dentry, &inode->v);
 442	return 0;
 443}
 444
 445int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
 446		  bool deleting_snapshot)
 447{
 448	struct bch_fs *c = vdir->i_sb->s_fs_info;
 449	struct bch_inode_info *dir = to_bch_ei(vdir);
 450	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
 451	struct bch_inode_unpacked dir_u, inode_u;
 452	struct btree_trans *trans = bch2_trans_get(c);
 453	int ret;
 454
 455	bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
 456
 457	ret = commit_do(trans, NULL, NULL,
 458			BCH_TRANS_COMMIT_no_enospc,
 459		bch2_unlink_trans(trans,
 460				  inode_inum(dir), &dir_u,
 461				  &inode_u, &dentry->d_name,
 462				  deleting_snapshot));
 463	if (unlikely(ret))
 464		goto err;
 465
 466	bch2_inode_update_after_write(trans, dir, &dir_u,
 467				      ATTR_MTIME|ATTR_CTIME);
 468	bch2_inode_update_after_write(trans, inode, &inode_u,
 469				      ATTR_MTIME);
 470
 471	if (inode_u.bi_subvol) {
 472		/*
 473		 * Subvolume deletion is asynchronous, but we still want to tell
 474		 * the VFS that it's been deleted here:
 475		 */
 476		set_nlink(&inode->v, 0);
 477	}
 478err:
 479	bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
 480	bch2_trans_put(trans);
 481
 482	return ret;
 483}
 484
 485static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
 486{
 487	struct bch_inode_info *dir= to_bch_ei(vdir);
 488	struct bch_fs *c = dir->v.i_sb->s_fs_info;
 489
 490	int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
 491		__bch2_unlink(vdir, dentry, false);
 492	return bch2_err_class(ret);
 493}
 494
 495static int bch2_symlink(struct mnt_idmap *idmap,
 496			struct inode *vdir, struct dentry *dentry,
 497			const char *symname)
 498{
 499	struct bch_fs *c = vdir->i_sb->s_fs_info;
 500	struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
 501	int ret;
 502
 503	inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
 504			      (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
 505	if (IS_ERR(inode))
 506		return bch2_err_class(PTR_ERR(inode));
 507
 508	inode_lock(&inode->v);
 509	ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
 510	inode_unlock(&inode->v);
 511
 512	if (unlikely(ret))
 513		goto err;
 514
 515	ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
 516	if (unlikely(ret))
 517		goto err;
 518
 519	ret = __bch2_link(c, inode, dir, dentry);
 520	if (unlikely(ret))
 521		goto err;
 522
 523	d_instantiate(dentry, &inode->v);
 524	return 0;
 525err:
 526	iput(&inode->v);
 527	return bch2_err_class(ret);
 528}
 529
 530static int bch2_mkdir(struct mnt_idmap *idmap,
 531		      struct inode *vdir, struct dentry *dentry, umode_t mode)
 532{
 533	return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0);
 534}
 535
 536static int bch2_rename2(struct mnt_idmap *idmap,
 537			struct inode *src_vdir, struct dentry *src_dentry,
 538			struct inode *dst_vdir, struct dentry *dst_dentry,
 539			unsigned flags)
 540{
 541	struct bch_fs *c = src_vdir->i_sb->s_fs_info;
 542	struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
 543	struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
 544	struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
 545	struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
 546	struct bch_inode_unpacked dst_dir_u, src_dir_u;
 547	struct bch_inode_unpacked src_inode_u, dst_inode_u;
 548	struct btree_trans *trans;
 549	enum bch_rename_mode mode = flags & RENAME_EXCHANGE
 550		? BCH_RENAME_EXCHANGE
 551		: dst_dentry->d_inode
 552		? BCH_RENAME_OVERWRITE : BCH_RENAME;
 553	int ret;
 554
 555	if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
 556		return -EINVAL;
 557
 558	if (mode == BCH_RENAME_OVERWRITE) {
 559		ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
 560						   0, LLONG_MAX);
 561		if (ret)
 562			return ret;
 563	}
 564
 565	trans = bch2_trans_get(c);
 566
 567	bch2_lock_inodes(INODE_UPDATE_LOCK,
 568			 src_dir,
 569			 dst_dir,
 570			 src_inode,
 571			 dst_inode);
 572
 573	ret   = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?:
 574		bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol);
 575	if (ret)
 576		goto err;
 577
 578	if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
 579		ret = bch2_fs_quota_transfer(c, src_inode,
 580					     dst_dir->ei_qid,
 581					     1 << QTYP_PRJ,
 582					     KEY_TYPE_QUOTA_PREALLOC);
 583		if (ret)
 584			goto err;
 585	}
 586
 587	if (mode == BCH_RENAME_EXCHANGE &&
 588	    inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
 589		ret = bch2_fs_quota_transfer(c, dst_inode,
 590					     src_dir->ei_qid,
 591					     1 << QTYP_PRJ,
 592					     KEY_TYPE_QUOTA_PREALLOC);
 593		if (ret)
 594			goto err;
 595	}
 596
 597	ret = commit_do(trans, NULL, NULL, 0,
 598			bch2_rename_trans(trans,
 599					  inode_inum(src_dir), &src_dir_u,
 600					  inode_inum(dst_dir), &dst_dir_u,
 601					  &src_inode_u,
 602					  &dst_inode_u,
 603					  &src_dentry->d_name,
 604					  &dst_dentry->d_name,
 605					  mode));
 606	if (unlikely(ret))
 607		goto err;
 608
 609	BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
 610	BUG_ON(dst_inode &&
 611	       dst_inode->v.i_ino != dst_inode_u.bi_inum);
 612
 613	bch2_inode_update_after_write(trans, src_dir, &src_dir_u,
 614				      ATTR_MTIME|ATTR_CTIME);
 615
 616	if (src_dir != dst_dir)
 617		bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u,
 618					      ATTR_MTIME|ATTR_CTIME);
 619
 620	bch2_inode_update_after_write(trans, src_inode, &src_inode_u,
 621				      ATTR_CTIME);
 622
 623	if (dst_inode)
 624		bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u,
 625					      ATTR_CTIME);
 626err:
 627	bch2_trans_put(trans);
 628
 629	bch2_fs_quota_transfer(c, src_inode,
 630			       bch_qid(&src_inode->ei_inode),
 631			       1 << QTYP_PRJ,
 632			       KEY_TYPE_QUOTA_NOCHECK);
 633	if (dst_inode)
 634		bch2_fs_quota_transfer(c, dst_inode,
 635				       bch_qid(&dst_inode->ei_inode),
 636				       1 << QTYP_PRJ,
 637				       KEY_TYPE_QUOTA_NOCHECK);
 638
 639	bch2_unlock_inodes(INODE_UPDATE_LOCK,
 640			   src_dir,
 641			   dst_dir,
 642			   src_inode,
 643			   dst_inode);
 644
 645	return bch2_err_class(ret);
 646}
 647
 648static void bch2_setattr_copy(struct mnt_idmap *idmap,
 649			      struct bch_inode_info *inode,
 650			      struct bch_inode_unpacked *bi,
 651			      struct iattr *attr)
 652{
 653	struct bch_fs *c = inode->v.i_sb->s_fs_info;
 654	unsigned int ia_valid = attr->ia_valid;
 655
 656	if (ia_valid & ATTR_UID)
 657		bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
 658	if (ia_valid & ATTR_GID)
 659		bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
 660
 661	if (ia_valid & ATTR_SIZE)
 662		bi->bi_size = attr->ia_size;
 663
 664	if (ia_valid & ATTR_ATIME)
 665		bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
 666	if (ia_valid & ATTR_MTIME)
 667		bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
 668	if (ia_valid & ATTR_CTIME)
 669		bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
 670
 671	if (ia_valid & ATTR_MODE) {
 672		umode_t mode = attr->ia_mode;
 673		kgid_t gid = ia_valid & ATTR_GID
 674			? attr->ia_gid
 675			: inode->v.i_gid;
 676
 677		if (!in_group_p(gid) &&
 678		    !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID))
 679			mode &= ~S_ISGID;
 680		bi->bi_mode = mode;
 681	}
 682}
 683
 684int bch2_setattr_nonsize(struct mnt_idmap *idmap,
 685			 struct bch_inode_info *inode,
 686			 struct iattr *attr)
 687{
 688	struct bch_fs *c = inode->v.i_sb->s_fs_info;
 689	struct bch_qid qid;
 690	struct btree_trans *trans;
 691	struct btree_iter inode_iter = { NULL };
 692	struct bch_inode_unpacked inode_u;
 693	struct posix_acl *acl = NULL;
 694	int ret;
 695
 696	mutex_lock(&inode->ei_update_lock);
 697
 698	qid = inode->ei_qid;
 699
 700	if (attr->ia_valid & ATTR_UID)
 701		qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
 702
 703	if (attr->ia_valid & ATTR_GID)
 704		qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
 705
 706	ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
 707				     KEY_TYPE_QUOTA_PREALLOC);
 708	if (ret)
 709		goto err;
 710
 711	trans = bch2_trans_get(c);
 712retry:
 713	bch2_trans_begin(trans);
 714	kfree(acl);
 715	acl = NULL;
 716
 717	ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
 718			      BTREE_ITER_INTENT);
 719	if (ret)
 720		goto btree_err;
 721
 722	bch2_setattr_copy(idmap, inode, &inode_u, attr);
 723
 724	if (attr->ia_valid & ATTR_MODE) {
 725		ret = bch2_acl_chmod(trans, inode_inum(inode), &inode_u,
 726				     inode_u.bi_mode, &acl);
 727		if (ret)
 728			goto btree_err;
 729	}
 730
 731	ret =   bch2_inode_write(trans, &inode_iter, &inode_u) ?:
 732		bch2_trans_commit(trans, NULL, NULL,
 733				  BCH_TRANS_COMMIT_no_enospc);
 734btree_err:
 735	bch2_trans_iter_exit(trans, &inode_iter);
 736
 737	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 738		goto retry;
 739	if (unlikely(ret))
 740		goto err_trans;
 741
 742	bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid);
 743
 744	if (acl)
 745		set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
 746err_trans:
 747	bch2_trans_put(trans);
 748err:
 749	mutex_unlock(&inode->ei_update_lock);
 750
 751	return bch2_err_class(ret);
 752}
 753
 754static int bch2_getattr(struct mnt_idmap *idmap,
 755			const struct path *path, struct kstat *stat,
 756			u32 request_mask, unsigned query_flags)
 757{
 758	struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
 759	struct bch_fs *c = inode->v.i_sb->s_fs_info;
 760
 761	stat->dev	= inode->v.i_sb->s_dev;
 762	stat->ino	= inode->v.i_ino;
 763	stat->mode	= inode->v.i_mode;
 764	stat->nlink	= inode->v.i_nlink;
 765	stat->uid	= inode->v.i_uid;
 766	stat->gid	= inode->v.i_gid;
 767	stat->rdev	= inode->v.i_rdev;
 768	stat->size	= i_size_read(&inode->v);
 769	stat->atime	= inode_get_atime(&inode->v);
 770	stat->mtime	= inode_get_mtime(&inode->v);
 771	stat->ctime	= inode_get_ctime(&inode->v);
 772	stat->blksize	= block_bytes(c);
 773	stat->blocks	= inode->v.i_blocks;
 774
 775	if (request_mask & STATX_BTIME) {
 776		stat->result_mask |= STATX_BTIME;
 777		stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
 778	}
 779
 780	if (inode->ei_inode.bi_flags & BCH_INODE_immutable)
 781		stat->attributes |= STATX_ATTR_IMMUTABLE;
 782	stat->attributes_mask	 |= STATX_ATTR_IMMUTABLE;
 783
 784	if (inode->ei_inode.bi_flags & BCH_INODE_append)
 785		stat->attributes |= STATX_ATTR_APPEND;
 786	stat->attributes_mask	 |= STATX_ATTR_APPEND;
 787
 788	if (inode->ei_inode.bi_flags & BCH_INODE_nodump)
 789		stat->attributes |= STATX_ATTR_NODUMP;
 790	stat->attributes_mask	 |= STATX_ATTR_NODUMP;
 791
 792	return 0;
 793}
 794
 795static int bch2_setattr(struct mnt_idmap *idmap,
 796			struct dentry *dentry, struct iattr *iattr)
 797{
 798	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
 799	struct bch_fs *c = inode->v.i_sb->s_fs_info;
 800	int ret;
 801
 802	lockdep_assert_held(&inode->v.i_rwsem);
 803
 804	ret   = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
 805		setattr_prepare(idmap, dentry, iattr);
 806	if (ret)
 807		return ret;
 808
 809	return iattr->ia_valid & ATTR_SIZE
 810		? bchfs_truncate(idmap, inode, iattr)
 811		: bch2_setattr_nonsize(idmap, inode, iattr);
 812}
 813
 814static int bch2_tmpfile(struct mnt_idmap *idmap,
 815			struct inode *vdir, struct file *file, umode_t mode)
 816{
 817	struct bch_inode_info *inode =
 818		__bch2_create(idmap, to_bch_ei(vdir),
 819			      file->f_path.dentry, mode, 0,
 820			      (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
 821
 822	if (IS_ERR(inode))
 823		return bch2_err_class(PTR_ERR(inode));
 824
 825	d_mark_tmpfile(file, &inode->v);
 826	d_instantiate(file->f_path.dentry, &inode->v);
 827	return finish_open_simple(file, 0);
 828}
 829
 830static int bch2_fill_extent(struct bch_fs *c,
 831			    struct fiemap_extent_info *info,
 832			    struct bkey_s_c k, unsigned flags)
 833{
 834	if (bkey_extent_is_direct_data(k.k)) {
 835		struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
 836		const union bch_extent_entry *entry;
 837		struct extent_ptr_decoded p;
 838		int ret;
 839
 840		if (k.k->type == KEY_TYPE_reflink_v)
 841			flags |= FIEMAP_EXTENT_SHARED;
 842
 843		bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
 844			int flags2 = 0;
 845			u64 offset = p.ptr.offset;
 846
 847			if (p.ptr.unwritten)
 848				flags2 |= FIEMAP_EXTENT_UNWRITTEN;
 849
 850			if (p.crc.compression_type)
 851				flags2 |= FIEMAP_EXTENT_ENCODED;
 852			else
 853				offset += p.crc.offset;
 854
 855			if ((offset & (block_sectors(c) - 1)) ||
 856			    (k.k->size & (block_sectors(c) - 1)))
 857				flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
 858
 859			ret = fiemap_fill_next_extent(info,
 860						bkey_start_offset(k.k) << 9,
 861						offset << 9,
 862						k.k->size << 9, flags|flags2);
 863			if (ret)
 864				return ret;
 865		}
 866
 867		return 0;
 868	} else if (bkey_extent_is_inline_data(k.k)) {
 869		return fiemap_fill_next_extent(info,
 870					       bkey_start_offset(k.k) << 9,
 871					       0, k.k->size << 9,
 872					       flags|
 873					       FIEMAP_EXTENT_DATA_INLINE);
 874	} else if (k.k->type == KEY_TYPE_reservation) {
 875		return fiemap_fill_next_extent(info,
 876					       bkey_start_offset(k.k) << 9,
 877					       0, k.k->size << 9,
 878					       flags|
 879					       FIEMAP_EXTENT_DELALLOC|
 880					       FIEMAP_EXTENT_UNWRITTEN);
 881	} else {
 882		BUG();
 883	}
 884}
 885
 886static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
 887		       u64 start, u64 len)
 888{
 889	struct bch_fs *c = vinode->i_sb->s_fs_info;
 890	struct bch_inode_info *ei = to_bch_ei(vinode);
 891	struct btree_trans *trans;
 892	struct btree_iter iter;
 893	struct bkey_s_c k;
 894	struct bkey_buf cur, prev;
 895	struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
 896	unsigned offset_into_extent, sectors;
 897	bool have_extent = false;
 898	u32 snapshot;
 899	int ret = 0;
 900
 901	ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
 902	if (ret)
 903		return ret;
 904
 905	if (start + len < start)
 906		return -EINVAL;
 907
 908	start >>= 9;
 909
 910	bch2_bkey_buf_init(&cur);
 911	bch2_bkey_buf_init(&prev);
 912	trans = bch2_trans_get(c);
 913retry:
 914	bch2_trans_begin(trans);
 915
 916	ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot);
 917	if (ret)
 918		goto err;
 919
 920	bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
 921			     SPOS(ei->v.i_ino, start, snapshot), 0);
 922
 923	while (!(ret = btree_trans_too_many_iters(trans)) &&
 924	       (k = bch2_btree_iter_peek_upto(&iter, end)).k &&
 925	       !(ret = bkey_err(k))) {
 926		enum btree_id data_btree = BTREE_ID_extents;
 927
 928		if (!bkey_extent_is_data(k.k) &&
 929		    k.k->type != KEY_TYPE_reservation) {
 930			bch2_btree_iter_advance(&iter);
 931			continue;
 932		}
 933
 934		offset_into_extent	= iter.pos.offset -
 935			bkey_start_offset(k.k);
 936		sectors			= k.k->size - offset_into_extent;
 937
 938		bch2_bkey_buf_reassemble(&cur, c, k);
 939
 940		ret = bch2_read_indirect_extent(trans, &data_btree,
 941					&offset_into_extent, &cur);
 942		if (ret)
 943			break;
 944
 945		k = bkey_i_to_s_c(cur.k);
 946		bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
 947
 948		sectors = min(sectors, k.k->size - offset_into_extent);
 949
 950		bch2_cut_front(POS(k.k->p.inode,
 951				   bkey_start_offset(k.k) +
 952				   offset_into_extent),
 953			       cur.k);
 954		bch2_key_resize(&cur.k->k, sectors);
 955		cur.k->k.p = iter.pos;
 956		cur.k->k.p.offset += cur.k->k.size;
 957
 958		if (have_extent) {
 959			bch2_trans_unlock(trans);
 960			ret = bch2_fill_extent(c, info,
 961					bkey_i_to_s_c(prev.k), 0);
 962			if (ret)
 963				break;
 964		}
 965
 966		bkey_copy(prev.k, cur.k);
 967		have_extent = true;
 968
 969		bch2_btree_iter_set_pos(&iter,
 970			POS(iter.pos.inode, iter.pos.offset + sectors));
 971	}
 972	start = iter.pos.offset;
 973	bch2_trans_iter_exit(trans, &iter);
 974err:
 975	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 976		goto retry;
 977
 978	if (!ret && have_extent) {
 979		bch2_trans_unlock(trans);
 980		ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
 981				       FIEMAP_EXTENT_LAST);
 982	}
 983
 984	bch2_trans_put(trans);
 985	bch2_bkey_buf_exit(&cur, c);
 986	bch2_bkey_buf_exit(&prev, c);
 987	return ret < 0 ? ret : 0;
 988}
 989
 990static const struct vm_operations_struct bch_vm_ops = {
 991	.fault		= bch2_page_fault,
 992	.map_pages	= filemap_map_pages,
 993	.page_mkwrite   = bch2_page_mkwrite,
 994};
 995
 996static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
 997{
 998	file_accessed(file);
 999
1000	vma->vm_ops = &bch_vm_ops;
1001	return 0;
1002}
1003
1004/* Directories: */
1005
1006static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
1007{
1008	return generic_file_llseek_size(file, offset, whence,
1009					S64_MAX, S64_MAX);
1010}
1011
1012static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
1013{
1014	struct bch_inode_info *inode = file_bch_inode(file);
1015	struct bch_fs *c = inode->v.i_sb->s_fs_info;
1016
1017	if (!dir_emit_dots(file, ctx))
1018		return 0;
1019
1020	int ret = bch2_readdir(c, inode_inum(inode), ctx);
1021
1022	bch_err_fn(c, ret);
1023	return bch2_err_class(ret);
1024}
1025
1026static int bch2_open(struct inode *vinode, struct file *file)
1027{
1028	if (file->f_flags & (O_WRONLY|O_RDWR)) {
1029		struct bch_inode_info *inode = to_bch_ei(vinode);
1030		struct bch_fs *c = inode->v.i_sb->s_fs_info;
1031
1032		int ret = bch2_subvol_is_ro(c, inode->ei_subvol);
1033		if (ret)
1034			return ret;
1035	}
1036
1037	return generic_file_open(vinode, file);
1038}
1039
1040static const struct file_operations bch_file_operations = {
1041	.open		= bch2_open,
1042	.llseek		= bch2_llseek,
1043	.read_iter	= bch2_read_iter,
1044	.write_iter	= bch2_write_iter,
1045	.mmap		= bch2_mmap,
1046	.fsync		= bch2_fsync,
1047	.splice_read	= filemap_splice_read,
1048	.splice_write	= iter_file_splice_write,
1049	.fallocate	= bch2_fallocate_dispatch,
1050	.unlocked_ioctl = bch2_fs_file_ioctl,
1051#ifdef CONFIG_COMPAT
1052	.compat_ioctl	= bch2_compat_fs_ioctl,
1053#endif
1054	.remap_file_range = bch2_remap_file_range,
1055};
1056
1057static const struct inode_operations bch_file_inode_operations = {
1058	.getattr	= bch2_getattr,
1059	.setattr	= bch2_setattr,
1060	.fiemap		= bch2_fiemap,
1061	.listxattr	= bch2_xattr_list,
1062#ifdef CONFIG_BCACHEFS_POSIX_ACL
1063	.get_acl	= bch2_get_acl,
1064	.set_acl	= bch2_set_acl,
1065#endif
1066};
1067
1068static const struct inode_operations bch_dir_inode_operations = {
1069	.lookup		= bch2_lookup,
1070	.create		= bch2_create,
1071	.link		= bch2_link,
1072	.unlink		= bch2_unlink,
1073	.symlink	= bch2_symlink,
1074	.mkdir		= bch2_mkdir,
1075	.rmdir		= bch2_unlink,
1076	.mknod		= bch2_mknod,
1077	.rename		= bch2_rename2,
1078	.getattr	= bch2_getattr,
1079	.setattr	= bch2_setattr,
1080	.tmpfile	= bch2_tmpfile,
1081	.listxattr	= bch2_xattr_list,
1082#ifdef CONFIG_BCACHEFS_POSIX_ACL
1083	.get_acl	= bch2_get_acl,
1084	.set_acl	= bch2_set_acl,
1085#endif
1086};
1087
1088static const struct file_operations bch_dir_file_operations = {
1089	.llseek		= bch2_dir_llseek,
1090	.read		= generic_read_dir,
1091	.iterate_shared	= bch2_vfs_readdir,
1092	.fsync		= bch2_fsync,
1093	.unlocked_ioctl = bch2_fs_file_ioctl,
1094#ifdef CONFIG_COMPAT
1095	.compat_ioctl	= bch2_compat_fs_ioctl,
1096#endif
1097};
1098
1099static const struct inode_operations bch_symlink_inode_operations = {
1100	.get_link	= page_get_link,
1101	.getattr	= bch2_getattr,
1102	.setattr	= bch2_setattr,
1103	.listxattr	= bch2_xattr_list,
1104#ifdef CONFIG_BCACHEFS_POSIX_ACL
1105	.get_acl	= bch2_get_acl,
1106	.set_acl	= bch2_set_acl,
1107#endif
1108};
1109
1110static const struct inode_operations bch_special_inode_operations = {
1111	.getattr	= bch2_getattr,
1112	.setattr	= bch2_setattr,
1113	.listxattr	= bch2_xattr_list,
1114#ifdef CONFIG_BCACHEFS_POSIX_ACL
1115	.get_acl	= bch2_get_acl,
1116	.set_acl	= bch2_set_acl,
1117#endif
1118};
1119
1120static const struct address_space_operations bch_address_space_operations = {
1121	.read_folio	= bch2_read_folio,
1122	.writepages	= bch2_writepages,
1123	.readahead	= bch2_readahead,
1124	.dirty_folio	= filemap_dirty_folio,
1125	.write_begin	= bch2_write_begin,
1126	.write_end	= bch2_write_end,
1127	.invalidate_folio = bch2_invalidate_folio,
1128	.release_folio	= bch2_release_folio,
1129	.direct_IO	= noop_direct_IO,
1130#ifdef CONFIG_MIGRATION
1131	.migrate_folio	= filemap_migrate_folio,
1132#endif
1133	.error_remove_folio = generic_error_remove_folio,
1134};
1135
1136struct bcachefs_fid {
1137	u64		inum;
1138	u32		subvol;
1139	u32		gen;
1140} __packed;
1141
1142struct bcachefs_fid_with_parent {
1143	struct bcachefs_fid	fid;
1144	struct bcachefs_fid	dir;
1145} __packed;
1146
1147static int bcachefs_fid_valid(int fh_len, int fh_type)
1148{
1149	switch (fh_type) {
1150	case FILEID_BCACHEFS_WITHOUT_PARENT:
1151		return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32);
1152	case FILEID_BCACHEFS_WITH_PARENT:
1153		return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32);
1154	default:
1155		return false;
1156	}
1157}
1158
1159static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
1160{
1161	return (struct bcachefs_fid) {
1162		.inum	= inode->ei_inode.bi_inum,
1163		.subvol	= inode->ei_subvol,
1164		.gen	= inode->ei_inode.bi_generation,
1165	};
1166}
1167
1168static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len,
1169			  struct inode *vdir)
1170{
1171	struct bch_inode_info *inode	= to_bch_ei(vinode);
1172	struct bch_inode_info *dir	= to_bch_ei(vdir);
1173	int min_len;
1174
1175	if (!S_ISDIR(inode->v.i_mode) && dir) {
1176		struct bcachefs_fid_with_parent *fid = (void *) fh;
1177
1178		min_len = sizeof(*fid) / sizeof(u32);
1179		if (*len < min_len) {
1180			*len = min_len;
1181			return FILEID_INVALID;
1182		}
1183
1184		fid->fid = bch2_inode_to_fid(inode);
1185		fid->dir = bch2_inode_to_fid(dir);
1186
1187		*len = min_len;
1188		return FILEID_BCACHEFS_WITH_PARENT;
1189	} else {
1190		struct bcachefs_fid *fid = (void *) fh;
1191
1192		min_len = sizeof(*fid) / sizeof(u32);
1193		if (*len < min_len) {
1194			*len = min_len;
1195			return FILEID_INVALID;
1196		}
1197		*fid = bch2_inode_to_fid(inode);
1198
1199		*len = min_len;
1200		return FILEID_BCACHEFS_WITHOUT_PARENT;
1201	}
1202}
1203
1204static struct inode *bch2_nfs_get_inode(struct super_block *sb,
1205					struct bcachefs_fid fid)
1206{
1207	struct bch_fs *c = sb->s_fs_info;
1208	struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) {
1209				    .subvol = fid.subvol,
1210				    .inum = fid.inum,
1211	});
1212	if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) {
1213		iput(vinode);
1214		vinode = ERR_PTR(-ESTALE);
1215	}
1216	return vinode;
1217}
1218
1219static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid,
1220		int fh_len, int fh_type)
1221{
1222	struct bcachefs_fid *fid = (void *) _fid;
1223
1224	if (!bcachefs_fid_valid(fh_len, fh_type))
1225		return NULL;
1226
1227	return d_obtain_alias(bch2_nfs_get_inode(sb, *fid));
1228}
1229
1230static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid,
1231		int fh_len, int fh_type)
1232{
1233	struct bcachefs_fid_with_parent *fid = (void *) _fid;
1234
1235	if (!bcachefs_fid_valid(fh_len, fh_type) ||
1236	    fh_type != FILEID_BCACHEFS_WITH_PARENT)
1237		return NULL;
1238
1239	return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir));
1240}
1241
1242static struct dentry *bch2_get_parent(struct dentry *child)
1243{
1244	struct bch_inode_info *inode = to_bch_ei(child->d_inode);
1245	struct bch_fs *c = inode->v.i_sb->s_fs_info;
1246	subvol_inum parent_inum = {
1247		.subvol = inode->ei_inode.bi_parent_subvol ?:
1248			inode->ei_subvol,
1249		.inum = inode->ei_inode.bi_dir,
1250	};
1251
1252	return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum));
1253}
1254
1255static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child)
1256{
1257	struct bch_inode_info *inode	= to_bch_ei(child->d_inode);
1258	struct bch_inode_info *dir	= to_bch_ei(parent->d_inode);
1259	struct bch_fs *c = inode->v.i_sb->s_fs_info;
1260	struct btree_trans *trans;
1261	struct btree_iter iter1;
1262	struct btree_iter iter2;
1263	struct bkey_s_c k;
1264	struct bkey_s_c_dirent d;
1265	struct bch_inode_unpacked inode_u;
1266	subvol_inum target;
1267	u32 snapshot;
1268	struct qstr dirent_name;
1269	unsigned name_len = 0;
1270	int ret;
1271
1272	if (!S_ISDIR(dir->v.i_mode))
1273		return -EINVAL;
1274
1275	trans = bch2_trans_get(c);
1276
1277	bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents,
1278			     POS(dir->ei_inode.bi_inum, 0), 0);
1279	bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents,
1280			     POS(dir->ei_inode.bi_inum, 0), 0);
1281retry:
1282	bch2_trans_begin(trans);
1283
1284	ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot);
1285	if (ret)
1286		goto err;
1287
1288	bch2_btree_iter_set_snapshot(&iter1, snapshot);
1289	bch2_btree_iter_set_snapshot(&iter2, snapshot);
1290
1291	ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u);
1292	if (ret)
1293		goto err;
1294
1295	if (inode_u.bi_dir == dir->ei_inode.bi_inum) {
1296		bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset));
1297
1298		k = bch2_btree_iter_peek_slot(&iter1);
1299		ret = bkey_err(k);
1300		if (ret)
1301			goto err;
1302
1303		if (k.k->type != KEY_TYPE_dirent) {
1304			ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
1305			goto err;
1306		}
1307
1308		d = bkey_s_c_to_dirent(k);
1309		ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
1310		if (ret > 0)
1311			ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
1312		if (ret)
1313			goto err;
1314
1315		if (target.subvol	== inode->ei_subvol &&
1316		    target.inum		== inode->ei_inode.bi_inum)
1317			goto found;
1318	} else {
1319		/*
1320		 * File with multiple hardlinks and our backref is to the wrong
1321		 * directory - linear search:
1322		 */
1323		for_each_btree_key_continue_norestart(iter2, 0, k, ret) {
1324			if (k.k->p.inode > dir->ei_inode.bi_inum)
1325				break;
1326
1327			if (k.k->type != KEY_TYPE_dirent)
1328				continue;
1329
1330			d = bkey_s_c_to_dirent(k);
1331			ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
1332			if (ret < 0)
1333				break;
1334			if (ret)
1335				continue;
1336
1337			if (target.subvol	== inode->ei_subvol &&
1338			    target.inum		== inode->ei_inode.bi_inum)
1339				goto found;
1340		}
1341	}
1342
1343	ret = -ENOENT;
1344	goto err;
1345found:
1346	dirent_name = bch2_dirent_get_name(d);
1347
1348	name_len = min_t(unsigned, dirent_name.len, NAME_MAX);
1349	memcpy(name, dirent_name.name, name_len);
1350	name[name_len] = '\0';
1351err:
1352	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1353		goto retry;
1354
1355	bch2_trans_iter_exit(trans, &iter1);
1356	bch2_trans_iter_exit(trans, &iter2);
1357	bch2_trans_put(trans);
1358
1359	return ret;
1360}
1361
1362static const struct export_operations bch_export_ops = {
1363	.encode_fh	= bch2_encode_fh,
1364	.fh_to_dentry	= bch2_fh_to_dentry,
1365	.fh_to_parent	= bch2_fh_to_parent,
1366	.get_parent	= bch2_get_parent,
1367	.get_name	= bch2_get_name,
1368};
1369
1370static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
1371				struct bch_inode_info *inode,
1372				struct bch_inode_unpacked *bi,
1373				struct bch_subvolume *subvol)
1374{
1375	bch2_inode_update_after_write(trans, inode, bi, ~0);
1376
1377	if (BCH_SUBVOLUME_SNAP(subvol))
1378		set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1379	else
1380		clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1381
1382	inode->v.i_blocks	= bi->bi_sectors;
1383	inode->v.i_ino		= bi->bi_inum;
1384	inode->v.i_rdev		= bi->bi_dev;
1385	inode->v.i_generation	= bi->bi_generation;
1386	inode->v.i_size		= bi->bi_size;
1387
1388	inode->ei_flags		= 0;
1389	inode->ei_quota_reserved = 0;
1390	inode->ei_qid		= bch_qid(bi);
1391	inode->ei_subvol	= inum.subvol;
1392
1393	inode->v.i_mapping->a_ops = &bch_address_space_operations;
1394
1395	switch (inode->v.i_mode & S_IFMT) {
1396	case S_IFREG:
1397		inode->v.i_op	= &bch_file_inode_operations;
1398		inode->v.i_fop	= &bch_file_operations;
1399		break;
1400	case S_IFDIR:
1401		inode->v.i_op	= &bch_dir_inode_operations;
1402		inode->v.i_fop	= &bch_dir_file_operations;
1403		break;
1404	case S_IFLNK:
1405		inode_nohighmem(&inode->v);
1406		inode->v.i_op	= &bch_symlink_inode_operations;
1407		break;
1408	default:
1409		init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
1410		inode->v.i_op	= &bch_special_inode_operations;
1411		break;
1412	}
1413
1414	mapping_set_large_folios(inode->v.i_mapping);
1415}
1416
1417static struct inode *bch2_alloc_inode(struct super_block *sb)
1418{
1419	struct bch_inode_info *inode;
1420
1421	inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
1422	if (!inode)
1423		return NULL;
1424
1425	inode_init_once(&inode->v);
1426	mutex_init(&inode->ei_update_lock);
1427	two_state_lock_init(&inode->ei_pagecache_lock);
1428	INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
1429	mutex_init(&inode->ei_quota_lock);
1430
1431	return &inode->v;
1432}
1433
1434static void bch2_i_callback(struct rcu_head *head)
1435{
1436	struct inode *vinode = container_of(head, struct inode, i_rcu);
1437	struct bch_inode_info *inode = to_bch_ei(vinode);
1438
1439	kmem_cache_free(bch2_inode_cache, inode);
1440}
1441
1442static void bch2_destroy_inode(struct inode *vinode)
1443{
1444	call_rcu(&vinode->i_rcu, bch2_i_callback);
1445}
1446
1447static int inode_update_times_fn(struct btree_trans *trans,
1448				 struct bch_inode_info *inode,
1449				 struct bch_inode_unpacked *bi,
1450				 void *p)
1451{
1452	struct bch_fs *c = inode->v.i_sb->s_fs_info;
1453
1454	bi->bi_atime	= timespec_to_bch2_time(c, inode_get_atime(&inode->v));
1455	bi->bi_mtime	= timespec_to_bch2_time(c, inode_get_mtime(&inode->v));
1456	bi->bi_ctime	= timespec_to_bch2_time(c, inode_get_ctime(&inode->v));
1457
1458	return 0;
1459}
1460
1461static int bch2_vfs_write_inode(struct inode *vinode,
1462				struct writeback_control *wbc)
1463{
1464	struct bch_fs *c = vinode->i_sb->s_fs_info;
1465	struct bch_inode_info *inode = to_bch_ei(vinode);
1466	int ret;
1467
1468	mutex_lock(&inode->ei_update_lock);
1469	ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
1470			       ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
1471	mutex_unlock(&inode->ei_update_lock);
1472
1473	return bch2_err_class(ret);
1474}
1475
1476static void bch2_evict_inode(struct inode *vinode)
1477{
1478	struct bch_fs *c = vinode->i_sb->s_fs_info;
1479	struct bch_inode_info *inode = to_bch_ei(vinode);
1480
1481	truncate_inode_pages_final(&inode->v.i_data);
1482
1483	clear_inode(&inode->v);
1484
1485	BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
1486
1487	if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
1488		bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
1489				KEY_TYPE_QUOTA_WARN);
1490		bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
1491				KEY_TYPE_QUOTA_WARN);
1492		bch2_inode_rm(c, inode_inum(inode));
1493	}
1494
1495	mutex_lock(&c->vfs_inodes_lock);
1496	list_del_init(&inode->ei_vfs_inode_list);
1497	mutex_unlock(&c->vfs_inodes_lock);
1498}
1499
1500void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
1501{
1502	struct bch_inode_info *inode;
1503	DARRAY(struct bch_inode_info *) grabbed;
1504	bool clean_pass = false, this_pass_clean;
1505
1506	/*
1507	 * Initially, we scan for inodes without I_DONTCACHE, then mark them to
1508	 * be pruned with d_mark_dontcache().
1509	 *
1510	 * Once we've had a clean pass where we didn't find any inodes without
1511	 * I_DONTCACHE, we wait for them to be freed:
1512	 */
1513
1514	darray_init(&grabbed);
1515	darray_make_room(&grabbed, 1024);
1516again:
1517	cond_resched();
1518	this_pass_clean = true;
1519
1520	mutex_lock(&c->vfs_inodes_lock);
1521	list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
1522		if (!snapshot_list_has_id(s, inode->ei_subvol))
1523			continue;
1524
1525		if (!(inode->v.i_state & I_DONTCACHE) &&
1526		    !(inode->v.i_state & I_FREEING) &&
1527		    igrab(&inode->v)) {
1528			this_pass_clean = false;
1529
1530			if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) {
1531				iput(&inode->v);
1532				break;
1533			}
1534		} else if (clean_pass && this_pass_clean) {
1535			wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
1536			DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
1537
1538			prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
1539			mutex_unlock(&c->vfs_inodes_lock);
1540
1541			schedule();
1542			finish_wait(wq, &wait.wq_entry);
1543			goto again;
1544		}
1545	}
1546	mutex_unlock(&c->vfs_inodes_lock);
1547
1548	darray_for_each(grabbed, i) {
1549		inode = *i;
1550		d_mark_dontcache(&inode->v);
1551		d_prune_aliases(&inode->v);
1552		iput(&inode->v);
1553	}
1554	grabbed.nr = 0;
1555
1556	if (!clean_pass || !this_pass_clean) {
1557		clean_pass = this_pass_clean;
1558		goto again;
1559	}
1560
1561	darray_exit(&grabbed);
1562}
1563
1564static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
1565{
1566	struct super_block *sb = dentry->d_sb;
1567	struct bch_fs *c = sb->s_fs_info;
1568	struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
1569	unsigned shift = sb->s_blocksize_bits - 9;
1570	/*
1571	 * this assumes inodes take up 64 bytes, which is a decent average
1572	 * number:
1573	 */
1574	u64 avail_inodes = ((usage.capacity - usage.used) << 3);
1575	u64 fsid;
1576
1577	buf->f_type	= BCACHEFS_STATFS_MAGIC;
1578	buf->f_bsize	= sb->s_blocksize;
1579	buf->f_blocks	= usage.capacity >> shift;
1580	buf->f_bfree	= usage.free >> shift;
1581	buf->f_bavail	= avail_factor(usage.free) >> shift;
1582
1583	buf->f_files	= usage.nr_inodes + avail_inodes;
1584	buf->f_ffree	= avail_inodes;
1585
1586	fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
1587	       le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
1588	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
1589	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
1590	buf->f_namelen	= BCH_NAME_MAX;
1591
1592	return 0;
1593}
1594
1595static int bch2_sync_fs(struct super_block *sb, int wait)
1596{
1597	struct bch_fs *c = sb->s_fs_info;
1598	int ret;
1599
1600	if (c->opts.journal_flush_disabled)
1601		return 0;
1602
1603	if (!wait) {
1604		bch2_journal_flush_async(&c->journal, NULL);
1605		return 0;
1606	}
1607
1608	ret = bch2_journal_flush(&c->journal);
1609	return bch2_err_class(ret);
1610}
1611
1612static struct bch_fs *bch2_path_to_fs(const char *path)
1613{
1614	struct bch_fs *c;
1615	dev_t dev;
1616	int ret;
1617
1618	ret = lookup_bdev(path, &dev);
1619	if (ret)
1620		return ERR_PTR(ret);
1621
1622	c = bch2_dev_to_fs(dev);
1623	if (c)
1624		closure_put(&c->cl);
1625	return c ?: ERR_PTR(-ENOENT);
1626}
1627
1628static int bch2_remount(struct super_block *sb, int *flags, char *data)
1629{
1630	struct bch_fs *c = sb->s_fs_info;
1631	struct bch_opts opts = bch2_opts_empty();
1632	int ret;
1633
1634	ret = bch2_parse_mount_opts(c, &opts, data);
1635	if (ret)
1636		goto err;
1637
1638	opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
1639
1640	if (opts.read_only != c->opts.read_only) {
1641		down_write(&c->state_lock);
1642
1643		if (opts.read_only) {
1644			bch2_fs_read_only(c);
1645
1646			sb->s_flags |= SB_RDONLY;
1647		} else {
1648			ret = bch2_fs_read_write(c);
1649			if (ret) {
1650				bch_err(c, "error going rw: %i", ret);
1651				up_write(&c->state_lock);
1652				ret = -EINVAL;
1653				goto err;
1654			}
1655
1656			sb->s_flags &= ~SB_RDONLY;
1657		}
1658
1659		c->opts.read_only = opts.read_only;
1660
1661		up_write(&c->state_lock);
1662	}
1663
1664	if (opt_defined(opts, errors))
1665		c->opts.errors = opts.errors;
1666err:
1667	return bch2_err_class(ret);
1668}
1669
1670static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
1671{
1672	struct bch_fs *c = root->d_sb->s_fs_info;
1673	bool first = true;
1674
1675	for_each_online_member(c, ca) {
1676		if (!first)
1677			seq_putc(seq, ':');
1678		first = false;
1679		seq_puts(seq, ca->disk_sb.sb_name);
1680	}
1681
1682	return 0;
1683}
1684
1685static int bch2_show_options(struct seq_file *seq, struct dentry *root)
1686{
1687	struct bch_fs *c = root->d_sb->s_fs_info;
1688	enum bch_opt_id i;
1689	struct printbuf buf = PRINTBUF;
1690	int ret = 0;
1691
1692	for (i = 0; i < bch2_opts_nr; i++) {
1693		const struct bch_option *opt = &bch2_opt_table[i];
1694		u64 v = bch2_opt_get_by_id(&c->opts, i);
1695
1696		if (!(opt->flags & OPT_MOUNT))
1697			continue;
1698
1699		if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
1700			continue;
1701
1702		printbuf_reset(&buf);
1703		bch2_opt_to_text(&buf, c, c->disk_sb.sb, opt, v,
1704				 OPT_SHOW_MOUNT_STYLE);
1705		seq_putc(seq, ',');
1706		seq_puts(seq, buf.buf);
1707	}
1708
1709	if (buf.allocation_failure)
1710		ret = -ENOMEM;
1711	printbuf_exit(&buf);
1712	return ret;
1713}
1714
1715static void bch2_put_super(struct super_block *sb)
1716{
1717	struct bch_fs *c = sb->s_fs_info;
1718
1719	__bch2_fs_stop(c);
1720}
1721
1722/*
1723 * bcachefs doesn't currently integrate intwrite freeze protection but the
1724 * internal write references serve the same purpose. Therefore reuse the
1725 * read-only transition code to perform the quiesce. The caveat is that we don't
1726 * currently have the ability to block tasks that want a write reference while
1727 * the superblock is frozen. This is fine for now, but we should either add
1728 * blocking support or find a way to integrate sb_start_intwrite() and friends.
1729 */
1730static int bch2_freeze(struct super_block *sb)
1731{
1732	struct bch_fs *c = sb->s_fs_info;
1733
1734	down_write(&c->state_lock);
1735	bch2_fs_read_only(c);
1736	up_write(&c->state_lock);
1737	return 0;
1738}
1739
1740static int bch2_unfreeze(struct super_block *sb)
1741{
1742	struct bch_fs *c = sb->s_fs_info;
1743	int ret;
1744
1745	if (test_bit(BCH_FS_emergency_ro, &c->flags))
1746		return 0;
1747
1748	down_write(&c->state_lock);
1749	ret = bch2_fs_read_write(c);
1750	up_write(&c->state_lock);
1751	return ret;
1752}
1753
1754static const struct super_operations bch_super_operations = {
1755	.alloc_inode	= bch2_alloc_inode,
1756	.destroy_inode	= bch2_destroy_inode,
1757	.write_inode	= bch2_vfs_write_inode,
1758	.evict_inode	= bch2_evict_inode,
1759	.sync_fs	= bch2_sync_fs,
1760	.statfs		= bch2_statfs,
1761	.show_devname	= bch2_show_devname,
1762	.show_options	= bch2_show_options,
1763	.remount_fs	= bch2_remount,
1764	.put_super	= bch2_put_super,
1765	.freeze_fs	= bch2_freeze,
1766	.unfreeze_fs	= bch2_unfreeze,
1767};
1768
1769static int bch2_set_super(struct super_block *s, void *data)
1770{
1771	s->s_fs_info = data;
1772	return 0;
1773}
1774
1775static int bch2_noset_super(struct super_block *s, void *data)
1776{
1777	return -EBUSY;
1778}
1779
1780typedef DARRAY(struct bch_fs *) darray_fs;
1781
1782static int bch2_test_super(struct super_block *s, void *data)
1783{
1784	struct bch_fs *c = s->s_fs_info;
1785	darray_fs *d = data;
1786
1787	if (!c)
1788		return false;
1789
1790	darray_for_each(*d, i)
1791		if (c != *i)
1792			return false;
1793	return true;
1794}
1795
1796static struct dentry *bch2_mount(struct file_system_type *fs_type,
1797				 int flags, const char *dev_name, void *data)
1798{
1799	struct bch_fs *c;
1800	struct super_block *sb;
1801	struct inode *vinode;
1802	struct bch_opts opts = bch2_opts_empty();
1803	int ret;
1804
1805	opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
1806
1807	ret = bch2_parse_mount_opts(NULL, &opts, data);
1808	if (ret)
1809		return ERR_PTR(ret);
1810
1811	if (!dev_name || strlen(dev_name) == 0)
1812		return ERR_PTR(-EINVAL);
1813
1814	darray_str devs;
1815	ret = bch2_split_devs(dev_name, &devs);
1816	if (ret)
1817		return ERR_PTR(ret);
1818
1819	darray_fs devs_to_fs = {};
1820	darray_for_each(devs, i) {
1821		ret = darray_push(&devs_to_fs, bch2_path_to_fs(*i));
1822		if (ret) {
1823			sb = ERR_PTR(ret);
1824			goto got_sb;
1825		}
1826	}
1827
1828	sb = sget(fs_type, bch2_test_super, bch2_noset_super, flags|SB_NOSEC, &devs_to_fs);
1829	if (!IS_ERR(sb))
1830		goto got_sb;
1831
1832	c = bch2_fs_open(devs.data, devs.nr, opts);
1833	if (IS_ERR(c)) {
1834		sb = ERR_CAST(c);
1835		goto got_sb;
1836	}
1837
1838	/* Some options can't be parsed until after the fs is started: */
1839	ret = bch2_parse_mount_opts(c, &opts, data);
1840	if (ret) {
1841		bch2_fs_stop(c);
1842		sb = ERR_PTR(ret);
1843		goto got_sb;
1844	}
1845
1846	bch2_opts_apply(&c->opts, opts);
1847
1848	sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c);
1849	if (IS_ERR(sb))
1850		bch2_fs_stop(c);
1851got_sb:
1852	darray_exit(&devs_to_fs);
1853	bch2_darray_str_exit(&devs);
1854
1855	if (IS_ERR(sb)) {
1856		ret = PTR_ERR(sb);
1857		ret = bch2_err_class(ret);
1858		return ERR_PTR(ret);
1859	}
1860
1861	c = sb->s_fs_info;
1862
1863	if (sb->s_root) {
1864		if ((flags ^ sb->s_flags) & SB_RDONLY) {
1865			ret = -EBUSY;
1866			goto err_put_super;
1867		}
1868		goto out;
1869	}
1870
1871	sb->s_blocksize		= block_bytes(c);
1872	sb->s_blocksize_bits	= ilog2(block_bytes(c));
1873	sb->s_maxbytes		= MAX_LFS_FILESIZE;
1874	sb->s_op		= &bch_super_operations;
1875	sb->s_export_op		= &bch_export_ops;
1876#ifdef CONFIG_BCACHEFS_QUOTA
1877	sb->s_qcop		= &bch2_quotactl_operations;
1878	sb->s_quota_types	= QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
1879#endif
1880	sb->s_xattr		= bch2_xattr_handlers;
1881	sb->s_magic		= BCACHEFS_STATFS_MAGIC;
1882	sb->s_time_gran		= c->sb.nsec_per_time_unit;
1883	sb->s_time_min		= div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
1884	sb->s_time_max		= div_s64(S64_MAX, c->sb.time_units_per_sec);
1885	c->vfs_sb		= sb;
1886	strscpy(sb->s_id, c->name, sizeof(sb->s_id));
1887
1888	ret = super_setup_bdi(sb);
1889	if (ret)
1890		goto err_put_super;
1891
1892	sb->s_bdi->ra_pages		= VM_READAHEAD_PAGES;
1893
1894	for_each_online_member(c, ca) {
1895		struct block_device *bdev = ca->disk_sb.bdev;
1896
1897		/* XXX: create an anonymous device for multi device filesystems */
1898		sb->s_bdev	= bdev;
1899		sb->s_dev	= bdev->bd_dev;
1900		percpu_ref_put(&ca->io_ref);
1901		break;
1902	}
1903
1904	c->dev = sb->s_dev;
1905
1906#ifdef CONFIG_BCACHEFS_POSIX_ACL
1907	if (c->opts.acl)
1908		sb->s_flags	|= SB_POSIXACL;
1909#endif
1910
1911	sb->s_shrink->seeks = 0;
1912
1913	vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
1914	ret = PTR_ERR_OR_ZERO(vinode);
1915	bch_err_msg(c, ret, "mounting: error getting root inode");
1916	if (ret)
1917		goto err_put_super;
1918
1919	sb->s_root = d_make_root(vinode);
1920	if (!sb->s_root) {
1921		bch_err(c, "error mounting: error allocating root dentry");
1922		ret = -ENOMEM;
1923		goto err_put_super;
1924	}
1925
1926	sb->s_flags |= SB_ACTIVE;
1927out:
1928	return dget(sb->s_root);
1929
1930err_put_super:
1931	deactivate_locked_super(sb);
1932	return ERR_PTR(bch2_err_class(ret));
1933}
1934
1935static void bch2_kill_sb(struct super_block *sb)
1936{
1937	struct bch_fs *c = sb->s_fs_info;
1938
1939	generic_shutdown_super(sb);
1940	bch2_fs_free(c);
1941}
1942
1943static struct file_system_type bcache_fs_type = {
1944	.owner		= THIS_MODULE,
1945	.name		= "bcachefs",
1946	.mount		= bch2_mount,
1947	.kill_sb	= bch2_kill_sb,
1948	.fs_flags	= FS_REQUIRES_DEV,
1949};
1950
1951MODULE_ALIAS_FS("bcachefs");
1952
1953void bch2_vfs_exit(void)
1954{
1955	unregister_filesystem(&bcache_fs_type);
1956	kmem_cache_destroy(bch2_inode_cache);
1957}
1958
1959int __init bch2_vfs_init(void)
1960{
1961	int ret = -ENOMEM;
1962
1963	bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT);
1964	if (!bch2_inode_cache)
1965		goto err;
1966
1967	ret = register_filesystem(&bcache_fs_type);
1968	if (ret)
1969		goto err;
1970
1971	return 0;
1972err:
1973	bch2_vfs_exit();
1974	return ret;
1975}
1976
1977#endif /* NO_BCACHEFS_FS */