Linux Audio

Check our new training course

Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *
   4 * Copyright (C) 2011 Novell Inc.
   5 */
   6
   7#include <uapi/linux/magic.h>
   8#include <linux/fs.h>
   9#include <linux/namei.h>
  10#include <linux/xattr.h>
  11#include <linux/mount.h>
  12#include <linux/parser.h>
  13#include <linux/module.h>
  14#include <linux/statfs.h>
  15#include <linux/seq_file.h>
  16#include <linux/posix_acl_xattr.h>
  17#include <linux/exportfs.h>
  18#include <linux/file.h>
  19#include <linux/fs_context.h>
  20#include <linux/fs_parser.h>
  21#include "overlayfs.h"
  22#include "params.h"
  23
  24MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
  25MODULE_DESCRIPTION("Overlay filesystem");
  26MODULE_LICENSE("GPL");
  27
  28
  29struct ovl_dir_cache;
  30
  31static struct dentry *ovl_d_real(struct dentry *dentry, enum d_real_type type)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  32{
  33	struct dentry *upper, *lower;
  34	int err;
  35
  36	switch (type) {
  37	case D_REAL_DATA:
  38	case D_REAL_METADATA:
  39		break;
  40	default:
  41		goto bug;
 
 
 
 
 
 
 
 
 
 
  42	}
 
  43
  44	if (!d_is_reg(dentry)) {
  45		/* d_real_inode() is only relevant for regular files */
 
 
 
 
 
  46		return dentry;
  47	}
  48
  49	upper = ovl_dentry_upper(dentry);
  50	if (upper && (type == D_REAL_METADATA ||
  51		      ovl_has_upperdata(d_inode(dentry))))
  52		return upper;
  53
  54	if (type == D_REAL_METADATA) {
  55		lower = ovl_dentry_lower(dentry);
  56		goto real_lower;
  57	}
  58
  59	/*
  60	 * Best effort lazy lookup of lowerdata for D_REAL_DATA case to return
  61	 * the real lowerdata dentry.  The only current caller of d_real() with
  62	 * D_REAL_DATA is d_real_inode() from trace_uprobe and this caller is
  63	 * likely going to be followed reading from the file, before placing
  64	 * uprobes on offset within the file, so lowerdata should be available
  65	 * when setting the uprobe.
  66	 */
  67	err = ovl_verify_lowerdata(dentry);
  68	if (err)
  69		goto bug;
  70	lower = ovl_dentry_lowerdata(dentry);
  71	if (!lower)
  72		goto bug;
 
  73
  74real_lower:
  75	/* Handle recursion into stacked lower fs */
  76	return d_real(lower, type);
  77
 
 
  78bug:
  79	WARN(1, "%s(%pd4, %d): real dentry not found\n", __func__, dentry, type);
 
 
 
  80	return dentry;
  81}
  82
  83static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak)
  84{
  85	int ret = 1;
  86
  87	if (!d)
  88		return 1;
  89
  90	if (weak) {
  91		if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE)
  92			ret =  d->d_op->d_weak_revalidate(d, flags);
  93	} else if (d->d_flags & DCACHE_OP_REVALIDATE) {
  94		ret = d->d_op->d_revalidate(d, flags);
  95		if (!ret) {
  96			if (!(flags & LOOKUP_RCU))
  97				d_invalidate(d);
  98			ret = -ESTALE;
  99		}
 100	}
 101	return ret;
 102}
 103
 104static int ovl_dentry_revalidate_common(struct dentry *dentry,
 105					unsigned int flags, bool weak)
 106{
 107	struct ovl_entry *oe;
 108	struct ovl_path *lowerstack;
 109	struct inode *inode = d_inode_rcu(dentry);
 110	struct dentry *upper;
 111	unsigned int i;
 112	int ret = 1;
 113
 114	/* Careful in RCU mode */
 115	if (!inode)
 116		return -ECHILD;
 117
 118	oe = OVL_I_E(inode);
 119	lowerstack = ovl_lowerstack(oe);
 120	upper = ovl_i_dentry_upper(inode);
 121	if (upper)
 122		ret = ovl_revalidate_real(upper, flags, weak);
 123
 124	for (i = 0; ret > 0 && i < ovl_numlower(oe); i++)
 125		ret = ovl_revalidate_real(lowerstack[i].dentry, flags, weak);
 126
 
 127	return ret;
 128}
 129
 130static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 131{
 132	return ovl_dentry_revalidate_common(dentry, flags, false);
 133}
 134
 135static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
 136{
 137	return ovl_dentry_revalidate_common(dentry, flags, true);
 138}
 139
 140static const struct dentry_operations ovl_dentry_operations = {
 
 141	.d_real = ovl_d_real,
 142	.d_revalidate = ovl_dentry_revalidate,
 143	.d_weak_revalidate = ovl_dentry_weak_revalidate,
 144};
 145
 146static struct kmem_cache *ovl_inode_cachep;
 147
 148static struct inode *ovl_alloc_inode(struct super_block *sb)
 149{
 150	struct ovl_inode *oi = alloc_inode_sb(sb, ovl_inode_cachep, GFP_KERNEL);
 151
 152	if (!oi)
 153		return NULL;
 154
 155	oi->cache = NULL;
 156	oi->redirect = NULL;
 157	oi->version = 0;
 158	oi->flags = 0;
 159	oi->__upperdentry = NULL;
 160	oi->lowerdata_redirect = NULL;
 161	oi->oe = NULL;
 
 162	mutex_init(&oi->lock);
 163
 164	return &oi->vfs_inode;
 165}
 166
 167static void ovl_free_inode(struct inode *inode)
 168{
 169	struct ovl_inode *oi = OVL_I(inode);
 170
 171	kfree(oi->redirect);
 172	kfree(oi->oe);
 173	mutex_destroy(&oi->lock);
 174	kmem_cache_free(ovl_inode_cachep, oi);
 175}
 176
 177static void ovl_destroy_inode(struct inode *inode)
 178{
 179	struct ovl_inode *oi = OVL_I(inode);
 180
 181	dput(oi->__upperdentry);
 182	ovl_stack_put(ovl_lowerstack(oi->oe), ovl_numlower(oi->oe));
 183	if (S_ISDIR(inode->i_mode))
 184		ovl_dir_cache_free(inode);
 185	else
 186		kfree(oi->lowerdata_redirect);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 187}
 188
 189static void ovl_put_super(struct super_block *sb)
 190{
 191	struct ovl_fs *ofs = OVL_FS(sb);
 192
 193	if (ofs)
 194		ovl_free_fs(ofs);
 195}
 196
 197/* Sync real dirty inodes in upper filesystem (if it exists) */
 198static int ovl_sync_fs(struct super_block *sb, int wait)
 199{
 200	struct ovl_fs *ofs = OVL_FS(sb);
 201	struct super_block *upper_sb;
 202	int ret;
 203
 204	ret = ovl_sync_status(ofs);
 205
 206	if (ret < 0)
 
 
 
 
 
 207		return -EIO;
 
 208
 209	if (!ret)
 210		return ret;
 211
 212	/*
 213	 * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
 214	 * All the super blocks will be iterated, including upper_sb.
 215	 *
 216	 * If this is a syncfs(2) call, then we do need to call
 217	 * sync_filesystem() on upper_sb, but enough if we do it when being
 218	 * called with wait == 1.
 219	 */
 220	if (!wait)
 221		return 0;
 222
 223	upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
 224
 225	down_read(&upper_sb->s_umount);
 226	ret = sync_filesystem(upper_sb);
 227	up_read(&upper_sb->s_umount);
 228
 229	return ret;
 230}
 231
 232/**
 233 * ovl_statfs
 234 * @dentry: The dentry to query
 235 * @buf: The struct kstatfs to fill in with stats
 236 *
 237 * Get the filesystem statistics.  As writes always target the upper layer
 238 * filesystem pass the statfs to the upper filesystem (if it exists)
 239 */
 240static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
 241{
 242	struct super_block *sb = dentry->d_sb;
 243	struct ovl_fs *ofs = OVL_FS(sb);
 244	struct dentry *root_dentry = sb->s_root;
 245	struct path path;
 246	int err;
 247
 248	ovl_path_real(root_dentry, &path);
 249
 250	err = vfs_statfs(&path, buf);
 251	if (!err) {
 252		buf->f_namelen = ofs->namelen;
 253		buf->f_type = OVERLAYFS_SUPER_MAGIC;
 254		if (ovl_has_fsid(ofs))
 255			buf->f_fsid = uuid_to_fsid(sb->s_uuid.b);
 256	}
 257
 258	return err;
 259}
 260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 261static const struct super_operations ovl_super_operations = {
 262	.alloc_inode	= ovl_alloc_inode,
 263	.free_inode	= ovl_free_inode,
 264	.destroy_inode	= ovl_destroy_inode,
 265	.drop_inode	= generic_delete_inode,
 266	.put_super	= ovl_put_super,
 267	.sync_fs	= ovl_sync_fs,
 268	.statfs		= ovl_statfs,
 269	.show_options	= ovl_show_options,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 270};
 271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 272#define OVL_WORKDIR_NAME "work"
 273#define OVL_INDEXDIR_NAME "index"
 274
 275static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
 276					 const char *name, bool persist)
 277{
 278	struct inode *dir =  ofs->workbasedir->d_inode;
 279	struct vfsmount *mnt = ovl_upper_mnt(ofs);
 280	struct dentry *work;
 281	int err;
 282	bool retried = false;
 283
 284	inode_lock_nested(dir, I_MUTEX_PARENT);
 285retry:
 286	work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name));
 287
 288	if (!IS_ERR(work)) {
 289		struct iattr attr = {
 290			.ia_valid = ATTR_MODE,
 291			.ia_mode = S_IFDIR | 0,
 292		};
 293
 294		if (work->d_inode) {
 295			err = -EEXIST;
 296			if (retried)
 297				goto out_dput;
 298
 299			if (persist)
 300				goto out_unlock;
 301
 302			retried = true;
 303			err = ovl_workdir_cleanup(ofs, dir, mnt, work, 0);
 304			dput(work);
 305			if (err == -EINVAL) {
 306				work = ERR_PTR(err);
 307				goto out_unlock;
 308			}
 309			goto retry;
 310		}
 311
 312		err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode);
 313		if (err)
 314			goto out_dput;
 315
 316		/* Weird filesystem returning with hashed negative (kernfs)? */
 317		err = -EINVAL;
 318		if (d_really_is_negative(work))
 319			goto out_dput;
 320
 321		/*
 322		 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
 323		 *
 324		 * a) success (there was a POSIX ACL xattr and was removed)
 325		 * b) -ENODATA (there was no POSIX ACL xattr)
 326		 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
 327		 *
 328		 * There are various other error values that could effectively
 329		 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
 330		 * if the xattr name is too long), but the set of filesystems
 331		 * allowed as upper are limited to "normal" ones, where checking
 332		 * for the above two errors is sufficient.
 333		 */
 334		err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_DEFAULT);
 335		if (err && err != -ENODATA && err != -EOPNOTSUPP)
 336			goto out_dput;
 337
 338		err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_ACCESS);
 339		if (err && err != -ENODATA && err != -EOPNOTSUPP)
 340			goto out_dput;
 341
 342		/* Clear any inherited mode bits */
 343		inode_lock(work->d_inode);
 344		err = ovl_do_notify_change(ofs, work, &attr);
 345		inode_unlock(work->d_inode);
 346		if (err)
 347			goto out_dput;
 348	} else {
 349		err = PTR_ERR(work);
 350		goto out_err;
 351	}
 352out_unlock:
 353	inode_unlock(dir);
 354	return work;
 355
 356out_dput:
 357	dput(work);
 358out_err:
 359	pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
 360		ofs->config.workdir, name, -err);
 361	work = NULL;
 362	goto out_unlock;
 363}
 364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 365static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs,
 366			     const char *name)
 367{
 368	struct kstatfs statfs;
 369	int err = vfs_statfs(path, &statfs);
 370
 371	if (err)
 372		pr_err("statfs failed on '%s'\n", name);
 373	else
 374		ofs->namelen = max(ofs->namelen, statfs.f_namelen);
 375
 376	return err;
 377}
 378
 379static int ovl_lower_dir(const char *name, struct path *path,
 380			 struct ovl_fs *ofs, int *stack_depth)
 381{
 382	int fh_type;
 383	int err;
 384
 
 
 
 
 385	err = ovl_check_namelen(path, ofs, name);
 386	if (err)
 387		return err;
 388
 389	*stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
 390
 391	/*
 392	 * The inodes index feature and NFS export need to encode and decode
 393	 * file handles, so they require that all layers support them.
 394	 */
 395	fh_type = ovl_can_decode_fh(path->dentry->d_sb);
 396	if ((ofs->config.nfs_export ||
 397	     (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
 398		ofs->config.index = false;
 399		ofs->config.nfs_export = false;
 400		pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
 401			name);
 402	}
 403	ofs->nofh |= !fh_type;
 404	/*
 405	 * Decoding origin file handle is required for persistent st_ino.
 406	 * Without persistent st_ino, xino=auto falls back to xino=off.
 407	 */
 408	if (ofs->config.xino == OVL_XINO_AUTO &&
 409	    ofs->config.upperdir && !fh_type) {
 410		ofs->config.xino = OVL_XINO_OFF;
 411		pr_warn("fs on '%s' does not support file handles, falling back to xino=off.\n",
 412			name);
 413	}
 414
 415	/* Check if lower fs has 32bit inode numbers */
 416	if (fh_type != FILEID_INO32_GEN)
 417		ofs->xino_mode = -1;
 418
 419	return 0;
 420}
 421
 422/* Workdir should not be subdir of upperdir and vice versa */
 423static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
 424{
 425	bool ok = false;
 426
 427	if (workdir != upperdir) {
 428		struct dentry *trap = lock_rename(workdir, upperdir);
 429		if (!IS_ERR(trap))
 430			unlock_rename(workdir, upperdir);
 431		ok = (trap == NULL);
 432	}
 433	return ok;
 434}
 435
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 436static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
 437			  struct inode **ptrap, const char *name)
 438{
 439	struct inode *trap;
 440	int err;
 441
 442	trap = ovl_get_trap_inode(sb, dir);
 443	err = PTR_ERR_OR_ZERO(trap);
 444	if (err) {
 445		if (err == -ELOOP)
 446			pr_err("conflicting %s path\n", name);
 447		return err;
 448	}
 449
 450	*ptrap = trap;
 451	return 0;
 452}
 453
 454/*
 455 * Determine how we treat concurrent use of upperdir/workdir based on the
 456 * index feature. This is papering over mount leaks of container runtimes,
 457 * for example, an old overlay mount is leaked and now its upperdir is
 458 * attempted to be used as a lower layer in a new overlay mount.
 459 */
 460static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
 461{
 462	if (ofs->config.index) {
 463		pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
 464		       name);
 465		return -EBUSY;
 466	} else {
 467		pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
 468			name);
 469		return 0;
 470	}
 471}
 472
 473static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
 474			 struct ovl_layer *upper_layer,
 475			 const struct path *upperpath)
 476{
 477	struct vfsmount *upper_mnt;
 478	int err;
 479
 
 
 
 
 480	/* Upperdir path should not be r/o */
 481	if (__mnt_is_readonly(upperpath->mnt)) {
 482		pr_err("upper fs is r/o, try multi-lower layers mount\n");
 483		err = -EINVAL;
 484		goto out;
 485	}
 486
 487	err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
 488	if (err)
 489		goto out;
 490
 491	err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap,
 492			     "upperdir");
 493	if (err)
 494		goto out;
 495
 496	upper_mnt = clone_private_mount(upperpath);
 497	err = PTR_ERR(upper_mnt);
 498	if (IS_ERR(upper_mnt)) {
 499		pr_err("failed to clone upperpath\n");
 500		goto out;
 501	}
 502
 503	/* Don't inherit atime flags */
 504	upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
 505	upper_layer->mnt = upper_mnt;
 506	upper_layer->idx = 0;
 507	upper_layer->fsid = 0;
 508
 509	/*
 510	 * Inherit SB_NOSEC flag from upperdir.
 511	 *
 512	 * This optimization changes behavior when a security related attribute
 513	 * (suid/sgid/security.*) is changed on an underlying layer.  This is
 514	 * okay because we don't yet have guarantees in that case, but it will
 515	 * need careful treatment once we want to honour changes to underlying
 516	 * filesystems.
 517	 */
 518	if (upper_mnt->mnt_sb->s_flags & SB_NOSEC)
 519		sb->s_flags |= SB_NOSEC;
 520
 521	if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) {
 522		ofs->upperdir_locked = true;
 523	} else {
 524		err = ovl_report_in_use(ofs, "upperdir");
 525		if (err)
 526			goto out;
 527	}
 528
 529	err = 0;
 530out:
 531	return err;
 532}
 533
 534/*
 535 * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and
 536 * negative values if error is encountered.
 537 */
 538static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
 539{
 540	struct dentry *workdir = ofs->workdir;
 541	struct inode *dir = d_inode(workdir);
 542	struct dentry *temp;
 543	struct dentry *dest;
 544	struct dentry *whiteout;
 545	struct name_snapshot name;
 546	int err;
 547
 548	inode_lock_nested(dir, I_MUTEX_PARENT);
 549
 550	temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0));
 551	err = PTR_ERR(temp);
 552	if (IS_ERR(temp))
 553		goto out_unlock;
 554
 555	dest = ovl_lookup_temp(ofs, workdir);
 556	err = PTR_ERR(dest);
 557	if (IS_ERR(dest)) {
 558		dput(temp);
 559		goto out_unlock;
 560	}
 561
 562	/* Name is inline and stable - using snapshot as a copy helper */
 563	take_dentry_name_snapshot(&name, temp);
 564	err = ovl_do_rename(ofs, dir, temp, dir, dest, RENAME_WHITEOUT);
 565	if (err) {
 566		if (err == -EINVAL)
 567			err = 0;
 568		goto cleanup_temp;
 569	}
 570
 571	whiteout = ovl_lookup_upper(ofs, name.name.name, workdir, name.name.len);
 572	err = PTR_ERR(whiteout);
 573	if (IS_ERR(whiteout))
 574		goto cleanup_temp;
 575
 576	err = ovl_upper_is_whiteout(ofs, whiteout);
 577
 578	/* Best effort cleanup of whiteout and temp file */
 579	if (err)
 580		ovl_cleanup(ofs, dir, whiteout);
 581	dput(whiteout);
 582
 583cleanup_temp:
 584	ovl_cleanup(ofs, dir, temp);
 585	release_dentry_name_snapshot(&name);
 586	dput(temp);
 587	dput(dest);
 588
 589out_unlock:
 590	inode_unlock(dir);
 591
 592	return err;
 593}
 594
 595static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs,
 596					   struct dentry *parent,
 597					   const char *name, umode_t mode)
 598{
 599	size_t len = strlen(name);
 600	struct dentry *child;
 601
 602	inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
 603	child = ovl_lookup_upper(ofs, name, parent, len);
 604	if (!IS_ERR(child) && !child->d_inode)
 605		child = ovl_create_real(ofs, parent->d_inode, child,
 606					OVL_CATTR(mode));
 607	inode_unlock(parent->d_inode);
 608	dput(parent);
 609
 610	return child;
 611}
 612
 613/*
 614 * Creates $workdir/work/incompat/volatile/dirty file if it is not already
 615 * present.
 616 */
 617static int ovl_create_volatile_dirty(struct ovl_fs *ofs)
 618{
 619	unsigned int ctr;
 620	struct dentry *d = dget(ofs->workbasedir);
 621	static const char *const volatile_path[] = {
 622		OVL_WORKDIR_NAME, "incompat", "volatile", "dirty"
 623	};
 624	const char *const *name = volatile_path;
 625
 626	for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) {
 627		d = ovl_lookup_or_create(ofs, d, *name, ctr > 1 ? S_IFDIR : S_IFREG);
 628		if (IS_ERR(d))
 629			return PTR_ERR(d);
 630	}
 631	dput(d);
 632	return 0;
 633}
 634
 635static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
 636			    const struct path *workpath)
 637{
 638	struct vfsmount *mnt = ovl_upper_mnt(ofs);
 639	struct dentry *workdir;
 640	struct file *tmpfile;
 641	bool rename_whiteout;
 642	bool d_type;
 643	int fh_type;
 644	int err;
 645
 646	err = mnt_want_write(mnt);
 647	if (err)
 648		return err;
 649
 650	workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
 651	err = PTR_ERR(workdir);
 652	if (IS_ERR_OR_NULL(workdir))
 653		goto out;
 654
 655	ofs->workdir = workdir;
 656
 657	err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
 658	if (err)
 659		goto out;
 660
 661	/*
 662	 * Upper should support d_type, else whiteouts are visible.  Given
 663	 * workdir and upper are on same fs, we can do iterate_dir() on
 664	 * workdir. This check requires successful creation of workdir in
 665	 * previous step.
 666	 */
 667	err = ovl_check_d_type_supported(workpath);
 668	if (err < 0)
 669		goto out;
 670
 671	d_type = err;
 672	if (!d_type)
 673		pr_warn("upper fs needs to support d_type.\n");
 674
 675	/* Check if upper/work fs supports O_TMPFILE */
 676	tmpfile = ovl_do_tmpfile(ofs, ofs->workdir, S_IFREG | 0);
 677	ofs->tmpfile = !IS_ERR(tmpfile);
 678	if (ofs->tmpfile)
 679		fput(tmpfile);
 680	else
 681		pr_warn("upper fs does not support tmpfile.\n");
 682
 683
 684	/* Check if upper/work fs supports RENAME_WHITEOUT */
 685	err = ovl_check_rename_whiteout(ofs);
 686	if (err < 0)
 687		goto out;
 688
 689	rename_whiteout = err;
 690	if (!rename_whiteout)
 691		pr_warn("upper fs does not support RENAME_WHITEOUT.\n");
 692
 693	/*
 694	 * Check if upper/work fs supports (trusted|user).overlay.* xattr
 695	 */
 696	err = ovl_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1);
 697	if (err) {
 698		pr_warn("failed to set xattr on upper\n");
 699		ofs->noxattr = true;
 700		if (ovl_redirect_follow(ofs)) {
 701			ofs->config.redirect_mode = OVL_REDIRECT_NOFOLLOW;
 702			pr_warn("...falling back to redirect_dir=nofollow.\n");
 703		}
 704		if (ofs->config.metacopy) {
 705			ofs->config.metacopy = false;
 706			pr_warn("...falling back to metacopy=off.\n");
 707		}
 708		if (ofs->config.index) {
 709			ofs->config.index = false;
 710			pr_warn("...falling back to index=off.\n");
 711		}
 712		if (ovl_has_fsid(ofs)) {
 713			ofs->config.uuid = OVL_UUID_NULL;
 714			pr_warn("...falling back to uuid=null.\n");
 715		}
 716		/*
 717		 * xattr support is required for persistent st_ino.
 718		 * Without persistent st_ino, xino=auto falls back to xino=off.
 719		 */
 720		if (ofs->config.xino == OVL_XINO_AUTO) {
 721			ofs->config.xino = OVL_XINO_OFF;
 722			pr_warn("...falling back to xino=off.\n");
 723		}
 724		if (err == -EPERM && !ofs->config.userxattr)
 725			pr_info("try mounting with 'userxattr' option\n");
 726		err = 0;
 727	} else {
 728		ovl_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE);
 729	}
 730
 731	/*
 732	 * We allowed sub-optimal upper fs configuration and don't want to break
 733	 * users over kernel upgrade, but we never allowed remote upper fs, so
 734	 * we can enforce strict requirements for remote upper fs.
 735	 */
 736	if (ovl_dentry_remote(ofs->workdir) &&
 737	    (!d_type || !rename_whiteout || ofs->noxattr)) {
 738		pr_err("upper fs missing required features.\n");
 739		err = -EINVAL;
 740		goto out;
 741	}
 742
 743	/*
 744	 * For volatile mount, create a incompat/volatile/dirty file to keep
 745	 * track of it.
 746	 */
 747	if (ofs->config.ovl_volatile) {
 748		err = ovl_create_volatile_dirty(ofs);
 749		if (err < 0) {
 750			pr_err("Failed to create volatile/dirty file.\n");
 751			goto out;
 752		}
 753	}
 754
 755	/* Check if upper/work fs supports file handles */
 756	fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
 757	if (ofs->config.index && !fh_type) {
 758		ofs->config.index = false;
 759		pr_warn("upper fs does not support file handles, falling back to index=off.\n");
 760	}
 761	ofs->nofh |= !fh_type;
 762
 763	/* Check if upper fs has 32bit inode numbers */
 764	if (fh_type != FILEID_INO32_GEN)
 765		ofs->xino_mode = -1;
 766
 767	/* NFS export of r/w mount depends on index */
 768	if (ofs->config.nfs_export && !ofs->config.index) {
 769		pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
 770		ofs->config.nfs_export = false;
 771	}
 772out:
 773	mnt_drop_write(mnt);
 774	return err;
 775}
 776
 777static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
 778			   const struct path *upperpath,
 779			   const struct path *workpath)
 780{
 781	int err;
 
 
 
 
 
 782
 783	err = -EINVAL;
 784	if (upperpath->mnt != workpath->mnt) {
 785		pr_err("workdir and upperdir must reside under the same mount\n");
 786		return err;
 787	}
 788	if (!ovl_workdir_ok(workpath->dentry, upperpath->dentry)) {
 789		pr_err("workdir and upperdir must be separate subtrees\n");
 790		return err;
 791	}
 792
 793	ofs->workbasedir = dget(workpath->dentry);
 794
 795	if (ovl_inuse_trylock(ofs->workbasedir)) {
 796		ofs->workdir_locked = true;
 797	} else {
 798		err = ovl_report_in_use(ofs, "workdir");
 799		if (err)
 800			return err;
 801	}
 802
 803	err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
 804			     "workdir");
 805	if (err)
 806		return err;
 807
 808	return ovl_make_workdir(sb, ofs, workpath);
 
 
 
 
 
 809}
 810
 811static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
 812			    struct ovl_entry *oe, const struct path *upperpath)
 813{
 814	struct vfsmount *mnt = ovl_upper_mnt(ofs);
 815	struct dentry *indexdir;
 816	struct dentry *origin = ovl_lowerstack(oe)->dentry;
 817	const struct ovl_fh *fh;
 818	int err;
 819
 820	fh = ovl_get_origin_fh(ofs, origin);
 821	if (IS_ERR(fh))
 822		return PTR_ERR(fh);
 823
 824	err = mnt_want_write(mnt);
 825	if (err)
 826		goto out_free_fh;
 827
 828	/* Verify lower root is upper root origin */
 829	err = ovl_verify_origin_fh(ofs, upperpath->dentry, fh, true);
 
 830	if (err) {
 831		pr_err("failed to verify upper root origin\n");
 832		goto out;
 833	}
 834
 835	/* index dir will act also as workdir */
 836	iput(ofs->workdir_trap);
 837	ofs->workdir_trap = NULL;
 838	dput(ofs->workdir);
 839	ofs->workdir = NULL;
 840	indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
 841	if (IS_ERR(indexdir)) {
 842		err = PTR_ERR(indexdir);
 843	} else if (indexdir) {
 844		ofs->workdir = indexdir;
 845		err = ovl_setup_trap(sb, indexdir, &ofs->workdir_trap,
 
 
 846				     "indexdir");
 847		if (err)
 848			goto out;
 849
 850		/*
 851		 * Verify upper root is exclusively associated with index dir.
 852		 * Older kernels stored upper fh in ".overlay.origin"
 853		 * xattr. If that xattr exists, verify that it is a match to
 854		 * upper dir file handle. In any case, verify or set xattr
 855		 * ".overlay.upper" to indicate that index may have
 856		 * directory entries.
 857		 */
 858		if (ovl_check_origin_xattr(ofs, indexdir)) {
 859			err = ovl_verify_origin_xattr(ofs, indexdir,
 860						      OVL_XATTR_ORIGIN,
 861						      upperpath->dentry, true,
 862						      false);
 863			if (err)
 864				pr_err("failed to verify index dir 'origin' xattr\n");
 865		}
 866		err = ovl_verify_upper(ofs, indexdir, upperpath->dentry, true);
 
 867		if (err)
 868			pr_err("failed to verify index dir 'upper' xattr\n");
 869
 870		/* Cleanup bad/stale/orphan index entries */
 871		if (!err)
 872			err = ovl_indexdir_cleanup(ofs);
 873	}
 874	if (err || !indexdir)
 875		pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
 876
 877out:
 878	mnt_drop_write(mnt);
 879out_free_fh:
 880	kfree(fh);
 881	return err;
 882}
 883
 884static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
 885{
 886	unsigned int i;
 887
 888	if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs))
 889		return true;
 890
 891	/*
 892	 * We allow using single lower with null uuid for index and nfs_export
 893	 * for example to support those features with single lower squashfs.
 894	 * To avoid regressions in setups of overlay with re-formatted lower
 895	 * squashfs, do not allow decoding origin with lower null uuid unless
 896	 * user opted-in to one of the new features that require following the
 897	 * lower inode of non-dir upper.
 898	 */
 899	if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid))
 900		return false;
 901
 902	for (i = 0; i < ofs->numfs; i++) {
 903		/*
 904		 * We use uuid to associate an overlay lower file handle with a
 905		 * lower layer, so we can accept lower fs with null uuid as long
 906		 * as all lower layers with null uuid are on the same fs.
 907		 * if we detect multiple lower fs with the same uuid, we
 908		 * disable lower file handle decoding on all of them.
 909		 */
 910		if (ofs->fs[i].is_lower &&
 911		    uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) {
 912			ofs->fs[i].bad_uuid = true;
 913			return false;
 914		}
 915	}
 916	return true;
 917}
 918
 919/* Get a unique fsid for the layer */
 920static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
 921{
 922	struct super_block *sb = path->mnt->mnt_sb;
 923	unsigned int i;
 924	dev_t dev;
 925	int err;
 926	bool bad_uuid = false;
 927	bool warn = false;
 928
 929	for (i = 0; i < ofs->numfs; i++) {
 930		if (ofs->fs[i].sb == sb)
 931			return i;
 932	}
 933
 934	if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
 935		bad_uuid = true;
 936		if (ofs->config.xino == OVL_XINO_AUTO) {
 937			ofs->config.xino = OVL_XINO_OFF;
 938			warn = true;
 939		}
 940		if (ofs->config.index || ofs->config.nfs_export) {
 941			ofs->config.index = false;
 942			ofs->config.nfs_export = false;
 943			warn = true;
 944		}
 945		if (warn) {
 946			pr_warn("%s uuid detected in lower fs '%pd2', falling back to xino=%s,index=off,nfs_export=off.\n",
 947				uuid_is_null(&sb->s_uuid) ? "null" :
 948							    "conflicting",
 949				path->dentry, ovl_xino_mode(&ofs->config));
 950		}
 951	}
 952
 953	err = get_anon_bdev(&dev);
 954	if (err) {
 955		pr_err("failed to get anonymous bdev for lowerpath\n");
 956		return err;
 957	}
 958
 959	ofs->fs[ofs->numfs].sb = sb;
 960	ofs->fs[ofs->numfs].pseudo_dev = dev;
 961	ofs->fs[ofs->numfs].bad_uuid = bad_uuid;
 962
 963	return ofs->numfs++;
 964}
 965
 966/*
 967 * The fsid after the last lower fsid is used for the data layers.
 968 * It is a "null fs" with a null sb, null uuid, and no pseudo dev.
 969 */
 970static int ovl_get_data_fsid(struct ovl_fs *ofs)
 971{
 972	return ofs->numfs;
 973}
 974
 975
 976static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
 977			  struct ovl_fs_context *ctx, struct ovl_layer *layers)
 
 978{
 979	int err;
 980	unsigned int i;
 981	size_t nr_merged_lower;
 982
 983	ofs->fs = kcalloc(ctx->nr + 2, sizeof(struct ovl_sb), GFP_KERNEL);
 
 984	if (ofs->fs == NULL)
 985		return -ENOMEM;
 986
 987	/*
 988	 * idx/fsid 0 are reserved for upper fs even with lower only overlay
 989	 * and the last fsid is reserved for "null fs" of the data layers.
 990	 */
 991	ofs->numfs++;
 992
 993	/*
 994	 * All lower layers that share the same fs as upper layer, use the same
 995	 * pseudo_dev as upper layer.  Allocate fs[0].pseudo_dev even for lower
 996	 * only overlay to simplify ovl_fs_free().
 997	 * is_lower will be set if upper fs is shared with a lower layer.
 998	 */
 999	err = get_anon_bdev(&ofs->fs[0].pseudo_dev);
1000	if (err) {
1001		pr_err("failed to get anonymous bdev for upper fs\n");
1002		return err;
1003	}
1004
1005	if (ovl_upper_mnt(ofs)) {
1006		ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb;
1007		ofs->fs[0].is_lower = false;
1008	}
1009
1010	nr_merged_lower = ctx->nr - ctx->nr_data;
1011	for (i = 0; i < ctx->nr; i++) {
1012		struct ovl_fs_context_layer *l = &ctx->lower[i];
1013		struct vfsmount *mnt;
1014		struct inode *trap;
1015		int fsid;
1016
1017		if (i < nr_merged_lower)
1018			fsid = ovl_get_fsid(ofs, &l->path);
1019		else
1020			fsid = ovl_get_data_fsid(ofs);
1021		if (fsid < 0)
1022			return fsid;
1023
1024		/*
1025		 * Check if lower root conflicts with this overlay layers before
1026		 * checking if it is in-use as upperdir/workdir of "another"
1027		 * mount, because we do not bother to check in ovl_is_inuse() if
1028		 * the upperdir/workdir is in fact in-use by our
1029		 * upperdir/workdir.
1030		 */
1031		err = ovl_setup_trap(sb, l->path.dentry, &trap, "lowerdir");
1032		if (err)
1033			return err;
1034
1035		if (ovl_is_inuse(l->path.dentry)) {
1036			err = ovl_report_in_use(ofs, "lowerdir");
1037			if (err) {
1038				iput(trap);
1039				return err;
1040			}
1041		}
1042
1043		mnt = clone_private_mount(&l->path);
1044		err = PTR_ERR(mnt);
1045		if (IS_ERR(mnt)) {
1046			pr_err("failed to clone lowerpath\n");
1047			iput(trap);
1048			return err;
1049		}
1050
1051		/*
1052		 * Make lower layers R/O.  That way fchmod/fchown on lower file
1053		 * will fail instead of modifying lower fs.
1054		 */
1055		mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
1056
1057		layers[ofs->numlayer].trap = trap;
1058		layers[ofs->numlayer].mnt = mnt;
1059		layers[ofs->numlayer].idx = ofs->numlayer;
1060		layers[ofs->numlayer].fsid = fsid;
1061		layers[ofs->numlayer].fs = &ofs->fs[fsid];
1062		/* Store for printing lowerdir=... in ovl_show_options() */
1063		ofs->config.lowerdirs[ofs->numlayer] = l->name;
1064		l->name = NULL;
1065		ofs->numlayer++;
1066		ofs->fs[fsid].is_lower = true;
1067	}
1068
1069	/*
1070	 * When all layers on same fs, overlay can use real inode numbers.
1071	 * With mount option "xino=<on|auto>", mounter declares that there are
1072	 * enough free high bits in underlying fs to hold the unique fsid.
1073	 * If overlayfs does encounter underlying inodes using the high xino
1074	 * bits reserved for fsid, it emits a warning and uses the original
1075	 * inode number or a non persistent inode number allocated from a
1076	 * dedicated range.
1077	 */
1078	if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) {
1079		if (ofs->config.xino == OVL_XINO_ON)
1080			pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
1081		ofs->xino_mode = 0;
1082	} else if (ofs->config.xino == OVL_XINO_OFF) {
1083		ofs->xino_mode = -1;
1084	} else if (ofs->xino_mode < 0) {
1085		/*
1086		 * This is a roundup of number of bits needed for encoding
1087		 * fsid, where fsid 0 is reserved for upper fs (even with
1088		 * lower only overlay) +1 extra bit is reserved for the non
1089		 * persistent inode number range that is used for resolving
1090		 * xino lower bits overflow.
1091		 */
1092		BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30);
1093		ofs->xino_mode = ilog2(ofs->numfs - 1) + 2;
1094	}
1095
1096	if (ofs->xino_mode > 0) {
1097		pr_info("\"xino\" feature enabled using %d upper inode bits.\n",
1098			ofs->xino_mode);
1099	}
1100
1101	return 0;
 
 
1102}
1103
1104static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
1105					    struct ovl_fs_context *ctx,
1106					    struct ovl_fs *ofs,
1107					    struct ovl_layer *layers)
1108{
1109	int err;
 
1110	unsigned int i;
1111	size_t nr_merged_lower;
1112	struct ovl_entry *oe;
1113	struct ovl_path *lowerstack;
1114
1115	struct ovl_fs_context_layer *l;
1116
1117	if (!ofs->config.upperdir && ctx->nr == 1) {
1118		pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
1119		return ERR_PTR(-EINVAL);
1120	}
1121
1122	err = -EINVAL;
1123	for (i = 0; i < ctx->nr; i++) {
1124		l = &ctx->lower[i];
1125
1126		err = ovl_lower_dir(l->name, &l->path, ofs, &sb->s_stack_depth);
 
 
1127		if (err)
1128			return ERR_PTR(err);
 
 
1129	}
1130
1131	err = -EINVAL;
1132	sb->s_stack_depth++;
1133	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
1134		pr_err("maximum fs stacking depth exceeded\n");
1135		return ERR_PTR(err);
1136	}
1137
1138	err = ovl_get_layers(sb, ofs, ctx, layers);
1139	if (err)
1140		return ERR_PTR(err);
1141
1142	err = -ENOMEM;
1143	/* Data-only layers are not merged in root directory */
1144	nr_merged_lower = ctx->nr - ctx->nr_data;
1145	oe = ovl_alloc_entry(nr_merged_lower);
1146	if (!oe)
1147		return ERR_PTR(err);
1148
1149	lowerstack = ovl_lowerstack(oe);
1150	for (i = 0; i < nr_merged_lower; i++) {
1151		l = &ctx->lower[i];
1152		lowerstack[i].dentry = dget(l->path.dentry);
1153		lowerstack[i].layer = &ofs->layers[i + 1];
1154	}
1155	ofs->numdatalayer = ctx->nr_data;
 
 
 
 
1156
1157	return oe;
 
 
 
 
1158}
1159
1160/*
1161 * Check if this layer root is a descendant of:
1162 * - another layer of this overlayfs instance
1163 * - upper/work dir of any overlayfs instance
1164 */
1165static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
1166			   struct dentry *dentry, const char *name,
1167			   bool is_lower)
1168{
1169	struct dentry *next = dentry, *parent;
1170	int err = 0;
1171
1172	if (!dentry)
1173		return 0;
1174
1175	parent = dget_parent(next);
1176
1177	/* Walk back ancestors to root (inclusive) looking for traps */
1178	while (!err && parent != next) {
1179		if (is_lower && ovl_lookup_trap_inode(sb, parent)) {
1180			err = -ELOOP;
1181			pr_err("overlapping %s path\n", name);
1182		} else if (ovl_is_inuse(parent)) {
1183			err = ovl_report_in_use(ofs, name);
1184		}
1185		next = parent;
1186		parent = dget_parent(next);
1187		dput(next);
1188	}
1189
1190	dput(parent);
1191
1192	return err;
1193}
1194
1195/*
1196 * Check if any of the layers or work dirs overlap.
1197 */
1198static int ovl_check_overlapping_layers(struct super_block *sb,
1199					struct ovl_fs *ofs)
1200{
1201	int i, err;
1202
1203	if (ovl_upper_mnt(ofs)) {
1204		err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root,
1205				      "upperdir", false);
1206		if (err)
1207			return err;
1208
1209		/*
1210		 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
1211		 * this instance and covers overlapping work and index dirs,
1212		 * unless work or index dir have been moved since created inside
1213		 * workbasedir.  In that case, we already have their traps in
1214		 * inode cache and we will catch that case on lookup.
1215		 */
1216		err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir",
1217				      false);
1218		if (err)
1219			return err;
1220	}
1221
1222	for (i = 1; i < ofs->numlayer; i++) {
1223		err = ovl_check_layer(sb, ofs,
1224				      ofs->layers[i].mnt->mnt_root,
1225				      "lowerdir", true);
1226		if (err)
1227			return err;
1228	}
1229
1230	return 0;
1231}
1232
1233static struct dentry *ovl_get_root(struct super_block *sb,
1234				   struct dentry *upperdentry,
1235				   struct ovl_entry *oe)
1236{
1237	struct dentry *root;
1238	struct ovl_fs *ofs = OVL_FS(sb);
1239	struct ovl_path *lowerpath = ovl_lowerstack(oe);
1240	unsigned long ino = d_inode(lowerpath->dentry)->i_ino;
1241	int fsid = lowerpath->layer->fsid;
1242	struct ovl_inode_params oip = {
1243		.upperdentry = upperdentry,
1244		.oe = oe,
1245	};
1246
1247	root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
1248	if (!root)
1249		return NULL;
1250
 
 
1251	if (upperdentry) {
1252		/* Root inode uses upper st_ino/i_ino */
1253		ino = d_inode(upperdentry)->i_ino;
1254		fsid = 0;
1255		ovl_dentry_set_upper_alias(root);
1256		if (ovl_is_impuredir(sb, upperdentry))
1257			ovl_set_flag(OVL_IMPURE, d_inode(root));
1258	}
1259
1260	/* Look for xwhiteouts marker except in the lowermost layer */
1261	for (int i = 0; i < ovl_numlower(oe) - 1; i++, lowerpath++) {
1262		struct path path = {
1263			.mnt = lowerpath->layer->mnt,
1264			.dentry = lowerpath->dentry,
1265		};
1266
1267		/* overlay.opaque=x means xwhiteouts directory */
1268		if (ovl_get_opaquedir_val(ofs, &path) == 'x') {
1269			ovl_layer_set_xwhiteouts(ofs, lowerpath->layer);
1270			ovl_dentry_set_xwhiteouts(root);
1271		}
1272	}
1273
1274	/* Root is always merge -> can have whiteouts */
1275	ovl_set_flag(OVL_WHITEOUTS, d_inode(root));
1276	ovl_dentry_set_flag(OVL_E_CONNECTED, root);
1277	ovl_set_upperdata(d_inode(root));
1278	ovl_inode_init(d_inode(root), &oip, ino, fsid);
1279	ovl_dentry_init_flags(root, upperdentry, oe, DCACHE_OP_WEAK_REVALIDATE);
1280	/* root keeps a reference of upperdentry */
1281	dget(upperdentry);
1282
1283	return root;
1284}
1285
1286int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
1287{
1288	struct ovl_fs *ofs = sb->s_fs_info;
1289	struct ovl_fs_context *ctx = fc->fs_private;
1290	struct dentry *root_dentry;
1291	struct ovl_entry *oe;
 
1292	struct ovl_layer *layers;
1293	struct cred *cred;
 
 
1294	int err;
1295
1296	err = -EIO;
1297	if (WARN_ON(fc->user_ns != current_user_ns()))
1298		goto out_err;
1299
1300	sb->s_d_op = &ovl_dentry_operations;
1301
1302	err = -ENOMEM;
 
 
 
 
 
1303	ofs->creator_cred = cred = prepare_creds();
1304	if (!cred)
1305		goto out_err;
1306
1307	err = ovl_fs_params_verify(ctx, &ofs->config);
 
 
 
 
 
 
 
 
1308	if (err)
1309		goto out_err;
1310
1311	err = -EINVAL;
1312	if (ctx->nr == 0) {
1313		if (!(fc->sb_flags & SB_SILENT))
1314			pr_err("missing 'lowerdir'\n");
1315		goto out_err;
1316	}
1317
1318	err = -ENOMEM;
1319	layers = kcalloc(ctx->nr + 1, sizeof(struct ovl_layer), GFP_KERNEL);
1320	if (!layers)
1321		goto out_err;
1322
1323	ofs->config.lowerdirs = kcalloc(ctx->nr + 1, sizeof(char *), GFP_KERNEL);
1324	if (!ofs->config.lowerdirs) {
1325		kfree(layers);
 
 
1326		goto out_err;
1327	}
 
 
 
 
 
 
1328	ofs->layers = layers;
1329	/*
1330	 * Layer 0 is reserved for upper even if there's no upper.
1331	 * config.lowerdirs[0] is used for storing the user provided colon
1332	 * separated lowerdir string.
1333	 */
1334	ofs->config.lowerdirs[0] = ctx->lowerdir_all;
1335	ctx->lowerdir_all = NULL;
1336	ofs->numlayer = 1;
1337
1338	sb->s_stack_depth = 0;
1339	sb->s_maxbytes = MAX_LFS_FILESIZE;
1340	atomic_long_set(&ofs->last_ino, 1);
1341	/* Assume underlying fs uses 32bit inodes unless proven otherwise */
1342	if (ofs->config.xino != OVL_XINO_OFF) {
1343		ofs->xino_mode = BITS_PER_LONG - 32;
1344		if (!ofs->xino_mode) {
1345			pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n");
1346			ofs->config.xino = OVL_XINO_OFF;
1347		}
1348	}
1349
1350	/* alloc/destroy_inode needed for setting up traps in inode cache */
1351	sb->s_op = &ovl_super_operations;
1352
1353	if (ofs->config.upperdir) {
1354		struct super_block *upper_sb;
1355
1356		err = -EINVAL;
1357		if (!ofs->config.workdir) {
1358			pr_err("missing 'workdir'\n");
1359			goto out_err;
1360		}
1361
1362		err = ovl_get_upper(sb, ofs, &layers[0], &ctx->upper);
1363		if (err)
1364			goto out_err;
1365
1366		upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
1367		if (!ovl_should_sync(ofs)) {
1368			ofs->errseq = errseq_sample(&upper_sb->s_wb_err);
1369			if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) {
1370				err = -EIO;
1371				pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n");
1372				goto out_err;
1373			}
1374		}
1375
1376		err = ovl_get_workdir(sb, ofs, &ctx->upper, &ctx->work);
1377		if (err)
1378			goto out_err;
1379
1380		if (!ofs->workdir)
1381			sb->s_flags |= SB_RDONLY;
1382
1383		sb->s_stack_depth = upper_sb->s_stack_depth;
1384		sb->s_time_gran = upper_sb->s_time_gran;
1385	}
1386	oe = ovl_get_lowerstack(sb, ctx, ofs, layers);
1387	err = PTR_ERR(oe);
1388	if (IS_ERR(oe))
1389		goto out_err;
1390
1391	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
1392	if (!ovl_upper_mnt(ofs))
1393		sb->s_flags |= SB_RDONLY;
1394
1395	if (!ovl_origin_uuid(ofs) && ofs->numfs > 1) {
1396		pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=null.\n");
1397		ofs->config.uuid = OVL_UUID_NULL;
1398	} else if (ovl_has_fsid(ofs) && ovl_upper_mnt(ofs)) {
1399		/* Use per instance persistent uuid/fsid */
1400		ovl_init_uuid_xattr(sb, ofs, &ctx->upper);
1401	}
1402
1403	if (!ovl_force_readonly(ofs) && ofs->config.index) {
1404		err = ovl_get_indexdir(sb, ofs, oe, &ctx->upper);
1405		if (err)
1406			goto out_free_oe;
1407
1408		/* Force r/o mount with no index dir */
1409		if (!ofs->workdir)
1410			sb->s_flags |= SB_RDONLY;
1411	}
1412
1413	err = ovl_check_overlapping_layers(sb, ofs);
1414	if (err)
1415		goto out_free_oe;
1416
1417	/* Show index=off in /proc/mounts for forced r/o mount */
1418	if (!ofs->workdir) {
1419		ofs->config.index = false;
1420		if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) {
1421			pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
1422			ofs->config.nfs_export = false;
1423		}
1424	}
1425
1426	if (ofs->config.metacopy && ofs->config.nfs_export) {
1427		pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
1428		ofs->config.nfs_export = false;
1429	}
1430
1431	/*
1432	 * Support encoding decodable file handles with nfs_export=on
1433	 * and encoding non-decodable file handles with nfs_export=off
1434	 * if all layers support file handles.
1435	 */
1436	if (ofs->config.nfs_export)
1437		sb->s_export_op = &ovl_export_operations;
1438	else if (!ofs->nofh)
1439		sb->s_export_op = &ovl_export_fid_operations;
1440
1441	/* Never override disk quota limits or use reserved space */
1442	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
1443
1444	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
1445	sb->s_xattr = ovl_xattr_handlers(ofs);
 
1446	sb->s_fs_info = ofs;
1447#ifdef CONFIG_FS_POSIX_ACL
1448	sb->s_flags |= SB_POSIXACL;
1449#endif
1450	sb->s_iflags |= SB_I_SKIP_SYNC;
1451	/*
1452	 * Ensure that umask handling is done by the filesystems used
1453	 * for the the upper layer instead of overlayfs as that would
1454	 * lead to unexpected results.
1455	 */
1456	sb->s_iflags |= SB_I_NOUMASK;
1457	sb->s_iflags |= SB_I_EVM_HMAC_UNSUPPORTED;
1458
1459	err = -ENOMEM;
1460	root_dentry = ovl_get_root(sb, ctx->upper.dentry, oe);
1461	if (!root_dentry)
1462		goto out_free_oe;
1463
 
 
 
1464	sb->s_root = root_dentry;
1465
1466	return 0;
1467
1468out_free_oe:
1469	ovl_free_entry(oe);
 
1470out_err:
 
 
1471	ovl_free_fs(ofs);
1472	sb->s_fs_info = NULL;
1473	return err;
1474}
1475
1476struct file_system_type ovl_fs_type = {
1477	.owner			= THIS_MODULE,
1478	.name			= "overlay",
1479	.init_fs_context	= ovl_init_fs_context,
1480	.parameters		= ovl_parameter_spec,
1481	.fs_flags		= FS_USERNS_MOUNT,
1482	.kill_sb		= kill_anon_super,
 
 
 
 
 
1483};
1484MODULE_ALIAS_FS("overlay");
1485
1486static void ovl_inode_init_once(void *foo)
1487{
1488	struct ovl_inode *oi = foo;
1489
1490	inode_init_once(&oi->vfs_inode);
1491}
1492
1493static int __init ovl_init(void)
1494{
1495	int err;
1496
1497	ovl_inode_cachep = kmem_cache_create("ovl_inode",
1498					     sizeof(struct ovl_inode), 0,
1499					     (SLAB_RECLAIM_ACCOUNT|
1500					      SLAB_ACCOUNT),
1501					     ovl_inode_init_once);
1502	if (ovl_inode_cachep == NULL)
1503		return -ENOMEM;
1504
1505	err = register_filesystem(&ovl_fs_type);
1506	if (!err)
1507		return 0;
 
 
1508
 
 
1509	kmem_cache_destroy(ovl_inode_cachep);
1510
1511	return err;
1512}
1513
1514static void __exit ovl_exit(void)
1515{
1516	unregister_filesystem(&ovl_fs_type);
1517
1518	/*
1519	 * Make sure all delayed rcu free inodes are flushed before we
1520	 * destroy cache.
1521	 */
1522	rcu_barrier();
1523	kmem_cache_destroy(ovl_inode_cachep);
 
1524}
1525
1526module_init(ovl_init);
1527module_exit(ovl_exit);
v6.2
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *
   4 * Copyright (C) 2011 Novell Inc.
   5 */
   6
   7#include <uapi/linux/magic.h>
   8#include <linux/fs.h>
   9#include <linux/namei.h>
  10#include <linux/xattr.h>
  11#include <linux/mount.h>
  12#include <linux/parser.h>
  13#include <linux/module.h>
  14#include <linux/statfs.h>
  15#include <linux/seq_file.h>
  16#include <linux/posix_acl_xattr.h>
  17#include <linux/exportfs.h>
  18#include <linux/file.h>
 
 
  19#include "overlayfs.h"
 
  20
  21MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
  22MODULE_DESCRIPTION("Overlay filesystem");
  23MODULE_LICENSE("GPL");
  24
  25
  26struct ovl_dir_cache;
  27
  28#define OVL_MAX_STACK 500
  29
  30static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
  31module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
  32MODULE_PARM_DESC(redirect_dir,
  33		 "Default to on or off for the redirect_dir feature");
  34
  35static bool ovl_redirect_always_follow =
  36	IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
  37module_param_named(redirect_always_follow, ovl_redirect_always_follow,
  38		   bool, 0644);
  39MODULE_PARM_DESC(redirect_always_follow,
  40		 "Follow redirects even if redirect_dir feature is turned off");
  41
  42static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
  43module_param_named(index, ovl_index_def, bool, 0644);
  44MODULE_PARM_DESC(index,
  45		 "Default to on or off for the inodes index feature");
  46
  47static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
  48module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
  49MODULE_PARM_DESC(nfs_export,
  50		 "Default to on or off for the NFS export feature");
  51
  52static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
  53module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
  54MODULE_PARM_DESC(xino_auto,
  55		 "Auto enable xino feature");
  56
  57static void ovl_entry_stack_free(struct ovl_entry *oe)
  58{
  59	unsigned int i;
 
  60
  61	for (i = 0; i < oe->numlower; i++)
  62		dput(oe->lowerstack[i].dentry);
  63}
  64
  65static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
  66module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
  67MODULE_PARM_DESC(metacopy,
  68		 "Default to on or off for the metadata only copy up feature");
  69
  70static void ovl_dentry_release(struct dentry *dentry)
  71{
  72	struct ovl_entry *oe = dentry->d_fsdata;
  73
  74	if (oe) {
  75		ovl_entry_stack_free(oe);
  76		kfree_rcu(oe, rcu);
  77	}
  78}
  79
  80static struct dentry *ovl_d_real(struct dentry *dentry,
  81				 const struct inode *inode)
  82{
  83	struct dentry *real = NULL, *lower;
  84
  85	/* It's an overlay file */
  86	if (inode && d_inode(dentry) == inode)
  87		return dentry;
 
  88
  89	if (!d_is_reg(dentry)) {
  90		if (!inode || inode == d_inode(dentry))
  91			return dentry;
  92		goto bug;
 
 
 
 
  93	}
  94
  95	real = ovl_dentry_upper(dentry);
  96	if (real && (inode == d_inode(real)))
  97		return real;
  98
  99	if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
 100		return real;
 101
 
 
 
 
 102	lower = ovl_dentry_lowerdata(dentry);
 103	if (!lower)
 104		goto bug;
 105	real = lower;
 106
 107	/* Handle recursion */
 108	real = d_real(real, inode);
 
 109
 110	if (!inode || inode == d_inode(real))
 111		return real;
 112bug:
 113	WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n",
 114	     __func__, dentry, inode ? inode->i_sb->s_id : "NULL",
 115	     inode ? inode->i_ino : 0, real,
 116	     real && d_inode(real) ? d_inode(real)->i_ino : 0);
 117	return dentry;
 118}
 119
 120static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak)
 121{
 122	int ret = 1;
 123
 
 
 
 124	if (weak) {
 125		if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE)
 126			ret =  d->d_op->d_weak_revalidate(d, flags);
 127	} else if (d->d_flags & DCACHE_OP_REVALIDATE) {
 128		ret = d->d_op->d_revalidate(d, flags);
 129		if (!ret) {
 130			if (!(flags & LOOKUP_RCU))
 131				d_invalidate(d);
 132			ret = -ESTALE;
 133		}
 134	}
 135	return ret;
 136}
 137
 138static int ovl_dentry_revalidate_common(struct dentry *dentry,
 139					unsigned int flags, bool weak)
 140{
 141	struct ovl_entry *oe = dentry->d_fsdata;
 
 142	struct inode *inode = d_inode_rcu(dentry);
 143	struct dentry *upper;
 144	unsigned int i;
 145	int ret = 1;
 146
 147	/* Careful in RCU mode */
 148	if (!inode)
 149		return -ECHILD;
 150
 
 
 151	upper = ovl_i_dentry_upper(inode);
 152	if (upper)
 153		ret = ovl_revalidate_real(upper, flags, weak);
 154
 155	for (i = 0; ret > 0 && i < oe->numlower; i++) {
 156		ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags,
 157					  weak);
 158	}
 159	return ret;
 160}
 161
 162static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 163{
 164	return ovl_dentry_revalidate_common(dentry, flags, false);
 165}
 166
 167static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
 168{
 169	return ovl_dentry_revalidate_common(dentry, flags, true);
 170}
 171
 172static const struct dentry_operations ovl_dentry_operations = {
 173	.d_release = ovl_dentry_release,
 174	.d_real = ovl_d_real,
 175	.d_revalidate = ovl_dentry_revalidate,
 176	.d_weak_revalidate = ovl_dentry_weak_revalidate,
 177};
 178
 179static struct kmem_cache *ovl_inode_cachep;
 180
 181static struct inode *ovl_alloc_inode(struct super_block *sb)
 182{
 183	struct ovl_inode *oi = alloc_inode_sb(sb, ovl_inode_cachep, GFP_KERNEL);
 184
 185	if (!oi)
 186		return NULL;
 187
 188	oi->cache = NULL;
 189	oi->redirect = NULL;
 190	oi->version = 0;
 191	oi->flags = 0;
 192	oi->__upperdentry = NULL;
 193	oi->lowerpath.dentry = NULL;
 194	oi->lowerpath.layer = NULL;
 195	oi->lowerdata = NULL;
 196	mutex_init(&oi->lock);
 197
 198	return &oi->vfs_inode;
 199}
 200
 201static void ovl_free_inode(struct inode *inode)
 202{
 203	struct ovl_inode *oi = OVL_I(inode);
 204
 205	kfree(oi->redirect);
 
 206	mutex_destroy(&oi->lock);
 207	kmem_cache_free(ovl_inode_cachep, oi);
 208}
 209
 210static void ovl_destroy_inode(struct inode *inode)
 211{
 212	struct ovl_inode *oi = OVL_I(inode);
 213
 214	dput(oi->__upperdentry);
 215	dput(oi->lowerpath.dentry);
 216	if (S_ISDIR(inode->i_mode))
 217		ovl_dir_cache_free(inode);
 218	else
 219		iput(oi->lowerdata);
 220}
 221
 222static void ovl_free_fs(struct ovl_fs *ofs)
 223{
 224	struct vfsmount **mounts;
 225	unsigned i;
 226
 227	iput(ofs->workbasedir_trap);
 228	iput(ofs->indexdir_trap);
 229	iput(ofs->workdir_trap);
 230	dput(ofs->whiteout);
 231	dput(ofs->indexdir);
 232	dput(ofs->workdir);
 233	if (ofs->workdir_locked)
 234		ovl_inuse_unlock(ofs->workbasedir);
 235	dput(ofs->workbasedir);
 236	if (ofs->upperdir_locked)
 237		ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
 238
 239	/* Hack!  Reuse ofs->layers as a vfsmount array before freeing it */
 240	mounts = (struct vfsmount **) ofs->layers;
 241	for (i = 0; i < ofs->numlayer; i++) {
 242		iput(ofs->layers[i].trap);
 243		mounts[i] = ofs->layers[i].mnt;
 244	}
 245	kern_unmount_array(mounts, ofs->numlayer);
 246	kfree(ofs->layers);
 247	for (i = 0; i < ofs->numfs; i++)
 248		free_anon_bdev(ofs->fs[i].pseudo_dev);
 249	kfree(ofs->fs);
 250
 251	kfree(ofs->config.lowerdir);
 252	kfree(ofs->config.upperdir);
 253	kfree(ofs->config.workdir);
 254	kfree(ofs->config.redirect_mode);
 255	if (ofs->creator_cred)
 256		put_cred(ofs->creator_cred);
 257	kfree(ofs);
 258}
 259
 260static void ovl_put_super(struct super_block *sb)
 261{
 262	struct ovl_fs *ofs = sb->s_fs_info;
 263
 264	ovl_free_fs(ofs);
 
 265}
 266
 267/* Sync real dirty inodes in upper filesystem (if it exists) */
 268static int ovl_sync_fs(struct super_block *sb, int wait)
 269{
 270	struct ovl_fs *ofs = sb->s_fs_info;
 271	struct super_block *upper_sb;
 272	int ret;
 273
 274	ret = ovl_sync_status(ofs);
 275	/*
 276	 * We have to always set the err, because the return value isn't
 277	 * checked in syncfs, and instead indirectly return an error via
 278	 * the sb's writeback errseq, which VFS inspects after this call.
 279	 */
 280	if (ret < 0) {
 281		errseq_set(&sb->s_wb_err, -EIO);
 282		return -EIO;
 283	}
 284
 285	if (!ret)
 286		return ret;
 287
 288	/*
 289	 * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
 290	 * All the super blocks will be iterated, including upper_sb.
 291	 *
 292	 * If this is a syncfs(2) call, then we do need to call
 293	 * sync_filesystem() on upper_sb, but enough if we do it when being
 294	 * called with wait == 1.
 295	 */
 296	if (!wait)
 297		return 0;
 298
 299	upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
 300
 301	down_read(&upper_sb->s_umount);
 302	ret = sync_filesystem(upper_sb);
 303	up_read(&upper_sb->s_umount);
 304
 305	return ret;
 306}
 307
 308/**
 309 * ovl_statfs
 310 * @dentry: The dentry to query
 311 * @buf: The struct kstatfs to fill in with stats
 312 *
 313 * Get the filesystem statistics.  As writes always target the upper layer
 314 * filesystem pass the statfs to the upper filesystem (if it exists)
 315 */
 316static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
 317{
 318	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 319	struct dentry *root_dentry = dentry->d_sb->s_root;
 
 320	struct path path;
 321	int err;
 322
 323	ovl_path_real(root_dentry, &path);
 324
 325	err = vfs_statfs(&path, buf);
 326	if (!err) {
 327		buf->f_namelen = ofs->namelen;
 328		buf->f_type = OVERLAYFS_SUPER_MAGIC;
 
 
 329	}
 330
 331	return err;
 332}
 333
 334/* Will this overlay be forced to mount/remount ro? */
 335static bool ovl_force_readonly(struct ovl_fs *ofs)
 336{
 337	return (!ovl_upper_mnt(ofs) || !ofs->workdir);
 338}
 339
 340static const char *ovl_redirect_mode_def(void)
 341{
 342	return ovl_redirect_dir_def ? "on" : "off";
 343}
 344
 345static const char * const ovl_xino_str[] = {
 346	"off",
 347	"auto",
 348	"on",
 349};
 350
 351static inline int ovl_xino_def(void)
 352{
 353	return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
 354}
 355
 356/**
 357 * ovl_show_options
 358 * @m: the seq_file handle
 359 * @dentry: The dentry to query
 360 *
 361 * Prints the mount options for a given superblock.
 362 * Returns zero; does not fail.
 363 */
 364static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 365{
 366	struct super_block *sb = dentry->d_sb;
 367	struct ovl_fs *ofs = sb->s_fs_info;
 368
 369	seq_show_option(m, "lowerdir", ofs->config.lowerdir);
 370	if (ofs->config.upperdir) {
 371		seq_show_option(m, "upperdir", ofs->config.upperdir);
 372		seq_show_option(m, "workdir", ofs->config.workdir);
 373	}
 374	if (ofs->config.default_permissions)
 375		seq_puts(m, ",default_permissions");
 376	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
 377		seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
 378	if (ofs->config.index != ovl_index_def)
 379		seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
 380	if (!ofs->config.uuid)
 381		seq_puts(m, ",uuid=off");
 382	if (ofs->config.nfs_export != ovl_nfs_export_def)
 383		seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
 384						"on" : "off");
 385	if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb))
 386		seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
 387	if (ofs->config.metacopy != ovl_metacopy_def)
 388		seq_printf(m, ",metacopy=%s",
 389			   ofs->config.metacopy ? "on" : "off");
 390	if (ofs->config.ovl_volatile)
 391		seq_puts(m, ",volatile");
 392	if (ofs->config.userxattr)
 393		seq_puts(m, ",userxattr");
 394	return 0;
 395}
 396
 397static int ovl_remount(struct super_block *sb, int *flags, char *data)
 398{
 399	struct ovl_fs *ofs = sb->s_fs_info;
 400	struct super_block *upper_sb;
 401	int ret = 0;
 402
 403	if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
 404		return -EROFS;
 405
 406	if (*flags & SB_RDONLY && !sb_rdonly(sb)) {
 407		upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
 408		if (ovl_should_sync(ofs)) {
 409			down_read(&upper_sb->s_umount);
 410			ret = sync_filesystem(upper_sb);
 411			up_read(&upper_sb->s_umount);
 412		}
 413	}
 414
 415	return ret;
 416}
 417
 418static const struct super_operations ovl_super_operations = {
 419	.alloc_inode	= ovl_alloc_inode,
 420	.free_inode	= ovl_free_inode,
 421	.destroy_inode	= ovl_destroy_inode,
 422	.drop_inode	= generic_delete_inode,
 423	.put_super	= ovl_put_super,
 424	.sync_fs	= ovl_sync_fs,
 425	.statfs		= ovl_statfs,
 426	.show_options	= ovl_show_options,
 427	.remount_fs	= ovl_remount,
 428};
 429
 430enum {
 431	OPT_LOWERDIR,
 432	OPT_UPPERDIR,
 433	OPT_WORKDIR,
 434	OPT_DEFAULT_PERMISSIONS,
 435	OPT_REDIRECT_DIR,
 436	OPT_INDEX_ON,
 437	OPT_INDEX_OFF,
 438	OPT_UUID_ON,
 439	OPT_UUID_OFF,
 440	OPT_NFS_EXPORT_ON,
 441	OPT_USERXATTR,
 442	OPT_NFS_EXPORT_OFF,
 443	OPT_XINO_ON,
 444	OPT_XINO_OFF,
 445	OPT_XINO_AUTO,
 446	OPT_METACOPY_ON,
 447	OPT_METACOPY_OFF,
 448	OPT_VOLATILE,
 449	OPT_ERR,
 450};
 451
 452static const match_table_t ovl_tokens = {
 453	{OPT_LOWERDIR,			"lowerdir=%s"},
 454	{OPT_UPPERDIR,			"upperdir=%s"},
 455	{OPT_WORKDIR,			"workdir=%s"},
 456	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
 457	{OPT_REDIRECT_DIR,		"redirect_dir=%s"},
 458	{OPT_INDEX_ON,			"index=on"},
 459	{OPT_INDEX_OFF,			"index=off"},
 460	{OPT_USERXATTR,			"userxattr"},
 461	{OPT_UUID_ON,			"uuid=on"},
 462	{OPT_UUID_OFF,			"uuid=off"},
 463	{OPT_NFS_EXPORT_ON,		"nfs_export=on"},
 464	{OPT_NFS_EXPORT_OFF,		"nfs_export=off"},
 465	{OPT_XINO_ON,			"xino=on"},
 466	{OPT_XINO_OFF,			"xino=off"},
 467	{OPT_XINO_AUTO,			"xino=auto"},
 468	{OPT_METACOPY_ON,		"metacopy=on"},
 469	{OPT_METACOPY_OFF,		"metacopy=off"},
 470	{OPT_VOLATILE,			"volatile"},
 471	{OPT_ERR,			NULL}
 472};
 473
 474static char *ovl_next_opt(char **s)
 475{
 476	char *sbegin = *s;
 477	char *p;
 478
 479	if (sbegin == NULL)
 480		return NULL;
 481
 482	for (p = sbegin; *p; p++) {
 483		if (*p == '\\') {
 484			p++;
 485			if (!*p)
 486				break;
 487		} else if (*p == ',') {
 488			*p = '\0';
 489			*s = p + 1;
 490			return sbegin;
 491		}
 492	}
 493	*s = NULL;
 494	return sbegin;
 495}
 496
 497static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
 498{
 499	if (strcmp(mode, "on") == 0) {
 500		config->redirect_dir = true;
 501		/*
 502		 * Does not make sense to have redirect creation without
 503		 * redirect following.
 504		 */
 505		config->redirect_follow = true;
 506	} else if (strcmp(mode, "follow") == 0) {
 507		config->redirect_follow = true;
 508	} else if (strcmp(mode, "off") == 0) {
 509		if (ovl_redirect_always_follow)
 510			config->redirect_follow = true;
 511	} else if (strcmp(mode, "nofollow") != 0) {
 512		pr_err("bad mount option \"redirect_dir=%s\"\n",
 513		       mode);
 514		return -EINVAL;
 515	}
 516
 517	return 0;
 518}
 519
 520static int ovl_parse_opt(char *opt, struct ovl_config *config)
 521{
 522	char *p;
 523	int err;
 524	bool metacopy_opt = false, redirect_opt = false;
 525	bool nfs_export_opt = false, index_opt = false;
 526
 527	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
 528	if (!config->redirect_mode)
 529		return -ENOMEM;
 530
 531	while ((p = ovl_next_opt(&opt)) != NULL) {
 532		int token;
 533		substring_t args[MAX_OPT_ARGS];
 534
 535		if (!*p)
 536			continue;
 537
 538		token = match_token(p, ovl_tokens, args);
 539		switch (token) {
 540		case OPT_UPPERDIR:
 541			kfree(config->upperdir);
 542			config->upperdir = match_strdup(&args[0]);
 543			if (!config->upperdir)
 544				return -ENOMEM;
 545			break;
 546
 547		case OPT_LOWERDIR:
 548			kfree(config->lowerdir);
 549			config->lowerdir = match_strdup(&args[0]);
 550			if (!config->lowerdir)
 551				return -ENOMEM;
 552			break;
 553
 554		case OPT_WORKDIR:
 555			kfree(config->workdir);
 556			config->workdir = match_strdup(&args[0]);
 557			if (!config->workdir)
 558				return -ENOMEM;
 559			break;
 560
 561		case OPT_DEFAULT_PERMISSIONS:
 562			config->default_permissions = true;
 563			break;
 564
 565		case OPT_REDIRECT_DIR:
 566			kfree(config->redirect_mode);
 567			config->redirect_mode = match_strdup(&args[0]);
 568			if (!config->redirect_mode)
 569				return -ENOMEM;
 570			redirect_opt = true;
 571			break;
 572
 573		case OPT_INDEX_ON:
 574			config->index = true;
 575			index_opt = true;
 576			break;
 577
 578		case OPT_INDEX_OFF:
 579			config->index = false;
 580			index_opt = true;
 581			break;
 582
 583		case OPT_UUID_ON:
 584			config->uuid = true;
 585			break;
 586
 587		case OPT_UUID_OFF:
 588			config->uuid = false;
 589			break;
 590
 591		case OPT_NFS_EXPORT_ON:
 592			config->nfs_export = true;
 593			nfs_export_opt = true;
 594			break;
 595
 596		case OPT_NFS_EXPORT_OFF:
 597			config->nfs_export = false;
 598			nfs_export_opt = true;
 599			break;
 600
 601		case OPT_XINO_ON:
 602			config->xino = OVL_XINO_ON;
 603			break;
 604
 605		case OPT_XINO_OFF:
 606			config->xino = OVL_XINO_OFF;
 607			break;
 608
 609		case OPT_XINO_AUTO:
 610			config->xino = OVL_XINO_AUTO;
 611			break;
 612
 613		case OPT_METACOPY_ON:
 614			config->metacopy = true;
 615			metacopy_opt = true;
 616			break;
 617
 618		case OPT_METACOPY_OFF:
 619			config->metacopy = false;
 620			metacopy_opt = true;
 621			break;
 622
 623		case OPT_VOLATILE:
 624			config->ovl_volatile = true;
 625			break;
 626
 627		case OPT_USERXATTR:
 628			config->userxattr = true;
 629			break;
 630
 631		default:
 632			pr_err("unrecognized mount option \"%s\" or missing value\n",
 633					p);
 634			return -EINVAL;
 635		}
 636	}
 637
 638	/* Workdir/index are useless in non-upper mount */
 639	if (!config->upperdir) {
 640		if (config->workdir) {
 641			pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
 642				config->workdir);
 643			kfree(config->workdir);
 644			config->workdir = NULL;
 645		}
 646		if (config->index && index_opt) {
 647			pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n");
 648			index_opt = false;
 649		}
 650		config->index = false;
 651	}
 652
 653	if (!config->upperdir && config->ovl_volatile) {
 654		pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
 655		config->ovl_volatile = false;
 656	}
 657
 658	err = ovl_parse_redirect_mode(config, config->redirect_mode);
 659	if (err)
 660		return err;
 661
 662	/*
 663	 * This is to make the logic below simpler.  It doesn't make any other
 664	 * difference, since config->redirect_dir is only used for upper.
 665	 */
 666	if (!config->upperdir && config->redirect_follow)
 667		config->redirect_dir = true;
 668
 669	/* Resolve metacopy -> redirect_dir dependency */
 670	if (config->metacopy && !config->redirect_dir) {
 671		if (metacopy_opt && redirect_opt) {
 672			pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
 673			       config->redirect_mode);
 674			return -EINVAL;
 675		}
 676		if (redirect_opt) {
 677			/*
 678			 * There was an explicit redirect_dir=... that resulted
 679			 * in this conflict.
 680			 */
 681			pr_info("disabling metacopy due to redirect_dir=%s\n",
 682				config->redirect_mode);
 683			config->metacopy = false;
 684		} else {
 685			/* Automatically enable redirect otherwise. */
 686			config->redirect_follow = config->redirect_dir = true;
 687		}
 688	}
 689
 690	/* Resolve nfs_export -> index dependency */
 691	if (config->nfs_export && !config->index) {
 692		if (!config->upperdir && config->redirect_follow) {
 693			pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
 694			config->nfs_export = false;
 695		} else if (nfs_export_opt && index_opt) {
 696			pr_err("conflicting options: nfs_export=on,index=off\n");
 697			return -EINVAL;
 698		} else if (index_opt) {
 699			/*
 700			 * There was an explicit index=off that resulted
 701			 * in this conflict.
 702			 */
 703			pr_info("disabling nfs_export due to index=off\n");
 704			config->nfs_export = false;
 705		} else {
 706			/* Automatically enable index otherwise. */
 707			config->index = true;
 708		}
 709	}
 710
 711	/* Resolve nfs_export -> !metacopy dependency */
 712	if (config->nfs_export && config->metacopy) {
 713		if (nfs_export_opt && metacopy_opt) {
 714			pr_err("conflicting options: nfs_export=on,metacopy=on\n");
 715			return -EINVAL;
 716		}
 717		if (metacopy_opt) {
 718			/*
 719			 * There was an explicit metacopy=on that resulted
 720			 * in this conflict.
 721			 */
 722			pr_info("disabling nfs_export due to metacopy=on\n");
 723			config->nfs_export = false;
 724		} else {
 725			/*
 726			 * There was an explicit nfs_export=on that resulted
 727			 * in this conflict.
 728			 */
 729			pr_info("disabling metacopy due to nfs_export=on\n");
 730			config->metacopy = false;
 731		}
 732	}
 733
 734
 735	/* Resolve userxattr -> !redirect && !metacopy dependency */
 736	if (config->userxattr) {
 737		if (config->redirect_follow && redirect_opt) {
 738			pr_err("conflicting options: userxattr,redirect_dir=%s\n",
 739			       config->redirect_mode);
 740			return -EINVAL;
 741		}
 742		if (config->metacopy && metacopy_opt) {
 743			pr_err("conflicting options: userxattr,metacopy=on\n");
 744			return -EINVAL;
 745		}
 746		/*
 747		 * Silently disable default setting of redirect and metacopy.
 748		 * This shall be the default in the future as well: these
 749		 * options must be explicitly enabled if used together with
 750		 * userxattr.
 751		 */
 752		config->redirect_dir = config->redirect_follow = false;
 753		config->metacopy = false;
 754	}
 755
 756	return 0;
 757}
 758
 759#define OVL_WORKDIR_NAME "work"
 760#define OVL_INDEXDIR_NAME "index"
 761
 762static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
 763					 const char *name, bool persist)
 764{
 765	struct inode *dir =  ofs->workbasedir->d_inode;
 766	struct vfsmount *mnt = ovl_upper_mnt(ofs);
 767	struct dentry *work;
 768	int err;
 769	bool retried = false;
 770
 771	inode_lock_nested(dir, I_MUTEX_PARENT);
 772retry:
 773	work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name));
 774
 775	if (!IS_ERR(work)) {
 776		struct iattr attr = {
 777			.ia_valid = ATTR_MODE,
 778			.ia_mode = S_IFDIR | 0,
 779		};
 780
 781		if (work->d_inode) {
 782			err = -EEXIST;
 783			if (retried)
 784				goto out_dput;
 785
 786			if (persist)
 787				goto out_unlock;
 788
 789			retried = true;
 790			err = ovl_workdir_cleanup(ofs, dir, mnt, work, 0);
 791			dput(work);
 792			if (err == -EINVAL) {
 793				work = ERR_PTR(err);
 794				goto out_unlock;
 795			}
 796			goto retry;
 797		}
 798
 799		err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode);
 800		if (err)
 801			goto out_dput;
 802
 803		/* Weird filesystem returning with hashed negative (kernfs)? */
 804		err = -EINVAL;
 805		if (d_really_is_negative(work))
 806			goto out_dput;
 807
 808		/*
 809		 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
 810		 *
 811		 * a) success (there was a POSIX ACL xattr and was removed)
 812		 * b) -ENODATA (there was no POSIX ACL xattr)
 813		 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
 814		 *
 815		 * There are various other error values that could effectively
 816		 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
 817		 * if the xattr name is too long), but the set of filesystems
 818		 * allowed as upper are limited to "normal" ones, where checking
 819		 * for the above two errors is sufficient.
 820		 */
 821		err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_DEFAULT);
 822		if (err && err != -ENODATA && err != -EOPNOTSUPP)
 823			goto out_dput;
 824
 825		err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_ACCESS);
 826		if (err && err != -ENODATA && err != -EOPNOTSUPP)
 827			goto out_dput;
 828
 829		/* Clear any inherited mode bits */
 830		inode_lock(work->d_inode);
 831		err = ovl_do_notify_change(ofs, work, &attr);
 832		inode_unlock(work->d_inode);
 833		if (err)
 834			goto out_dput;
 835	} else {
 836		err = PTR_ERR(work);
 837		goto out_err;
 838	}
 839out_unlock:
 840	inode_unlock(dir);
 841	return work;
 842
 843out_dput:
 844	dput(work);
 845out_err:
 846	pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
 847		ofs->config.workdir, name, -err);
 848	work = NULL;
 849	goto out_unlock;
 850}
 851
 852static void ovl_unescape(char *s)
 853{
 854	char *d = s;
 855
 856	for (;; s++, d++) {
 857		if (*s == '\\')
 858			s++;
 859		*d = *s;
 860		if (!*s)
 861			break;
 862	}
 863}
 864
 865static int ovl_mount_dir_noesc(const char *name, struct path *path)
 866{
 867	int err = -EINVAL;
 868
 869	if (!*name) {
 870		pr_err("empty lowerdir\n");
 871		goto out;
 872	}
 873	err = kern_path(name, LOOKUP_FOLLOW, path);
 874	if (err) {
 875		pr_err("failed to resolve '%s': %i\n", name, err);
 876		goto out;
 877	}
 878	err = -EINVAL;
 879	if (ovl_dentry_weird(path->dentry)) {
 880		pr_err("filesystem on '%s' not supported\n", name);
 881		goto out_put;
 882	}
 883	if (!d_is_dir(path->dentry)) {
 884		pr_err("'%s' not a directory\n", name);
 885		goto out_put;
 886	}
 887	return 0;
 888
 889out_put:
 890	path_put_init(path);
 891out:
 892	return err;
 893}
 894
 895static int ovl_mount_dir(const char *name, struct path *path)
 896{
 897	int err = -ENOMEM;
 898	char *tmp = kstrdup(name, GFP_KERNEL);
 899
 900	if (tmp) {
 901		ovl_unescape(tmp);
 902		err = ovl_mount_dir_noesc(tmp, path);
 903
 904		if (!err && path->dentry->d_flags & DCACHE_OP_REAL) {
 905			pr_err("filesystem on '%s' not supported as upperdir\n",
 906			       tmp);
 907			path_put_init(path);
 908			err = -EINVAL;
 909		}
 910		kfree(tmp);
 911	}
 912	return err;
 913}
 914
 915static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs,
 916			     const char *name)
 917{
 918	struct kstatfs statfs;
 919	int err = vfs_statfs(path, &statfs);
 920
 921	if (err)
 922		pr_err("statfs failed on '%s'\n", name);
 923	else
 924		ofs->namelen = max(ofs->namelen, statfs.f_namelen);
 925
 926	return err;
 927}
 928
 929static int ovl_lower_dir(const char *name, struct path *path,
 930			 struct ovl_fs *ofs, int *stack_depth)
 931{
 932	int fh_type;
 933	int err;
 934
 935	err = ovl_mount_dir_noesc(name, path);
 936	if (err)
 937		return err;
 938
 939	err = ovl_check_namelen(path, ofs, name);
 940	if (err)
 941		return err;
 942
 943	*stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
 944
 945	/*
 946	 * The inodes index feature and NFS export need to encode and decode
 947	 * file handles, so they require that all layers support them.
 948	 */
 949	fh_type = ovl_can_decode_fh(path->dentry->d_sb);
 950	if ((ofs->config.nfs_export ||
 951	     (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
 952		ofs->config.index = false;
 953		ofs->config.nfs_export = false;
 954		pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
 955			name);
 956	}
 
 957	/*
 958	 * Decoding origin file handle is required for persistent st_ino.
 959	 * Without persistent st_ino, xino=auto falls back to xino=off.
 960	 */
 961	if (ofs->config.xino == OVL_XINO_AUTO &&
 962	    ofs->config.upperdir && !fh_type) {
 963		ofs->config.xino = OVL_XINO_OFF;
 964		pr_warn("fs on '%s' does not support file handles, falling back to xino=off.\n",
 965			name);
 966	}
 967
 968	/* Check if lower fs has 32bit inode numbers */
 969	if (fh_type != FILEID_INO32_GEN)
 970		ofs->xino_mode = -1;
 971
 972	return 0;
 973}
 974
 975/* Workdir should not be subdir of upperdir and vice versa */
 976static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
 977{
 978	bool ok = false;
 979
 980	if (workdir != upperdir) {
 981		ok = (lock_rename(workdir, upperdir) == NULL);
 982		unlock_rename(workdir, upperdir);
 
 
 983	}
 984	return ok;
 985}
 986
 987static unsigned int ovl_split_lowerdirs(char *str)
 988{
 989	unsigned int ctr = 1;
 990	char *s, *d;
 991
 992	for (s = d = str;; s++, d++) {
 993		if (*s == '\\') {
 994			s++;
 995		} else if (*s == ':') {
 996			*d = '\0';
 997			ctr++;
 998			continue;
 999		}
1000		*d = *s;
1001		if (!*s)
1002			break;
1003	}
1004	return ctr;
1005}
1006
1007static int ovl_own_xattr_get(const struct xattr_handler *handler,
1008			     struct dentry *dentry, struct inode *inode,
1009			     const char *name, void *buffer, size_t size)
1010{
1011	return -EOPNOTSUPP;
1012}
1013
1014static int ovl_own_xattr_set(const struct xattr_handler *handler,
1015			     struct user_namespace *mnt_userns,
1016			     struct dentry *dentry, struct inode *inode,
1017			     const char *name, const void *value,
1018			     size_t size, int flags)
1019{
1020	return -EOPNOTSUPP;
1021}
1022
1023static int ovl_other_xattr_get(const struct xattr_handler *handler,
1024			       struct dentry *dentry, struct inode *inode,
1025			       const char *name, void *buffer, size_t size)
1026{
1027	return ovl_xattr_get(dentry, inode, name, buffer, size);
1028}
1029
1030static int ovl_other_xattr_set(const struct xattr_handler *handler,
1031			       struct user_namespace *mnt_userns,
1032			       struct dentry *dentry, struct inode *inode,
1033			       const char *name, const void *value,
1034			       size_t size, int flags)
1035{
1036	return ovl_xattr_set(dentry, inode, name, value, size, flags);
1037}
1038
1039static const struct xattr_handler ovl_own_trusted_xattr_handler = {
1040	.prefix	= OVL_XATTR_TRUSTED_PREFIX,
1041	.get = ovl_own_xattr_get,
1042	.set = ovl_own_xattr_set,
1043};
1044
1045static const struct xattr_handler ovl_own_user_xattr_handler = {
1046	.prefix	= OVL_XATTR_USER_PREFIX,
1047	.get = ovl_own_xattr_get,
1048	.set = ovl_own_xattr_set,
1049};
1050
1051static const struct xattr_handler ovl_other_xattr_handler = {
1052	.prefix	= "", /* catch all */
1053	.get = ovl_other_xattr_get,
1054	.set = ovl_other_xattr_set,
1055};
1056
1057static const struct xattr_handler *ovl_trusted_xattr_handlers[] = {
1058#ifdef CONFIG_FS_POSIX_ACL
1059	&posix_acl_access_xattr_handler,
1060	&posix_acl_default_xattr_handler,
1061#endif
1062	&ovl_own_trusted_xattr_handler,
1063	&ovl_other_xattr_handler,
1064	NULL
1065};
1066
1067static const struct xattr_handler *ovl_user_xattr_handlers[] = {
1068#ifdef CONFIG_FS_POSIX_ACL
1069	&posix_acl_access_xattr_handler,
1070	&posix_acl_default_xattr_handler,
1071#endif
1072	&ovl_own_user_xattr_handler,
1073	&ovl_other_xattr_handler,
1074	NULL
1075};
1076
1077static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
1078			  struct inode **ptrap, const char *name)
1079{
1080	struct inode *trap;
1081	int err;
1082
1083	trap = ovl_get_trap_inode(sb, dir);
1084	err = PTR_ERR_OR_ZERO(trap);
1085	if (err) {
1086		if (err == -ELOOP)
1087			pr_err("conflicting %s path\n", name);
1088		return err;
1089	}
1090
1091	*ptrap = trap;
1092	return 0;
1093}
1094
1095/*
1096 * Determine how we treat concurrent use of upperdir/workdir based on the
1097 * index feature. This is papering over mount leaks of container runtimes,
1098 * for example, an old overlay mount is leaked and now its upperdir is
1099 * attempted to be used as a lower layer in a new overlay mount.
1100 */
1101static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
1102{
1103	if (ofs->config.index) {
1104		pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
1105		       name);
1106		return -EBUSY;
1107	} else {
1108		pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
1109			name);
1110		return 0;
1111	}
1112}
1113
1114static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
1115			 struct ovl_layer *upper_layer, struct path *upperpath)
 
1116{
1117	struct vfsmount *upper_mnt;
1118	int err;
1119
1120	err = ovl_mount_dir(ofs->config.upperdir, upperpath);
1121	if (err)
1122		goto out;
1123
1124	/* Upperdir path should not be r/o */
1125	if (__mnt_is_readonly(upperpath->mnt)) {
1126		pr_err("upper fs is r/o, try multi-lower layers mount\n");
1127		err = -EINVAL;
1128		goto out;
1129	}
1130
1131	err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
1132	if (err)
1133		goto out;
1134
1135	err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap,
1136			     "upperdir");
1137	if (err)
1138		goto out;
1139
1140	upper_mnt = clone_private_mount(upperpath);
1141	err = PTR_ERR(upper_mnt);
1142	if (IS_ERR(upper_mnt)) {
1143		pr_err("failed to clone upperpath\n");
1144		goto out;
1145	}
1146
1147	/* Don't inherit atime flags */
1148	upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
1149	upper_layer->mnt = upper_mnt;
1150	upper_layer->idx = 0;
1151	upper_layer->fsid = 0;
1152
1153	/*
1154	 * Inherit SB_NOSEC flag from upperdir.
1155	 *
1156	 * This optimization changes behavior when a security related attribute
1157	 * (suid/sgid/security.*) is changed on an underlying layer.  This is
1158	 * okay because we don't yet have guarantees in that case, but it will
1159	 * need careful treatment once we want to honour changes to underlying
1160	 * filesystems.
1161	 */
1162	if (upper_mnt->mnt_sb->s_flags & SB_NOSEC)
1163		sb->s_flags |= SB_NOSEC;
1164
1165	if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) {
1166		ofs->upperdir_locked = true;
1167	} else {
1168		err = ovl_report_in_use(ofs, "upperdir");
1169		if (err)
1170			goto out;
1171	}
1172
1173	err = 0;
1174out:
1175	return err;
1176}
1177
1178/*
1179 * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and
1180 * negative values if error is encountered.
1181 */
1182static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
1183{
1184	struct dentry *workdir = ofs->workdir;
1185	struct inode *dir = d_inode(workdir);
1186	struct dentry *temp;
1187	struct dentry *dest;
1188	struct dentry *whiteout;
1189	struct name_snapshot name;
1190	int err;
1191
1192	inode_lock_nested(dir, I_MUTEX_PARENT);
1193
1194	temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0));
1195	err = PTR_ERR(temp);
1196	if (IS_ERR(temp))
1197		goto out_unlock;
1198
1199	dest = ovl_lookup_temp(ofs, workdir);
1200	err = PTR_ERR(dest);
1201	if (IS_ERR(dest)) {
1202		dput(temp);
1203		goto out_unlock;
1204	}
1205
1206	/* Name is inline and stable - using snapshot as a copy helper */
1207	take_dentry_name_snapshot(&name, temp);
1208	err = ovl_do_rename(ofs, dir, temp, dir, dest, RENAME_WHITEOUT);
1209	if (err) {
1210		if (err == -EINVAL)
1211			err = 0;
1212		goto cleanup_temp;
1213	}
1214
1215	whiteout = ovl_lookup_upper(ofs, name.name.name, workdir, name.name.len);
1216	err = PTR_ERR(whiteout);
1217	if (IS_ERR(whiteout))
1218		goto cleanup_temp;
1219
1220	err = ovl_is_whiteout(whiteout);
1221
1222	/* Best effort cleanup of whiteout and temp file */
1223	if (err)
1224		ovl_cleanup(ofs, dir, whiteout);
1225	dput(whiteout);
1226
1227cleanup_temp:
1228	ovl_cleanup(ofs, dir, temp);
1229	release_dentry_name_snapshot(&name);
1230	dput(temp);
1231	dput(dest);
1232
1233out_unlock:
1234	inode_unlock(dir);
1235
1236	return err;
1237}
1238
1239static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs,
1240					   struct dentry *parent,
1241					   const char *name, umode_t mode)
1242{
1243	size_t len = strlen(name);
1244	struct dentry *child;
1245
1246	inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
1247	child = ovl_lookup_upper(ofs, name, parent, len);
1248	if (!IS_ERR(child) && !child->d_inode)
1249		child = ovl_create_real(ofs, parent->d_inode, child,
1250					OVL_CATTR(mode));
1251	inode_unlock(parent->d_inode);
1252	dput(parent);
1253
1254	return child;
1255}
1256
1257/*
1258 * Creates $workdir/work/incompat/volatile/dirty file if it is not already
1259 * present.
1260 */
1261static int ovl_create_volatile_dirty(struct ovl_fs *ofs)
1262{
1263	unsigned int ctr;
1264	struct dentry *d = dget(ofs->workbasedir);
1265	static const char *const volatile_path[] = {
1266		OVL_WORKDIR_NAME, "incompat", "volatile", "dirty"
1267	};
1268	const char *const *name = volatile_path;
1269
1270	for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) {
1271		d = ovl_lookup_or_create(ofs, d, *name, ctr > 1 ? S_IFDIR : S_IFREG);
1272		if (IS_ERR(d))
1273			return PTR_ERR(d);
1274	}
1275	dput(d);
1276	return 0;
1277}
1278
1279static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
1280			    const struct path *workpath)
1281{
1282	struct vfsmount *mnt = ovl_upper_mnt(ofs);
1283	struct dentry *workdir;
1284	struct file *tmpfile;
1285	bool rename_whiteout;
1286	bool d_type;
1287	int fh_type;
1288	int err;
1289
1290	err = mnt_want_write(mnt);
1291	if (err)
1292		return err;
1293
1294	workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
1295	err = PTR_ERR(workdir);
1296	if (IS_ERR_OR_NULL(workdir))
1297		goto out;
1298
1299	ofs->workdir = workdir;
1300
1301	err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
1302	if (err)
1303		goto out;
1304
1305	/*
1306	 * Upper should support d_type, else whiteouts are visible.  Given
1307	 * workdir and upper are on same fs, we can do iterate_dir() on
1308	 * workdir. This check requires successful creation of workdir in
1309	 * previous step.
1310	 */
1311	err = ovl_check_d_type_supported(workpath);
1312	if (err < 0)
1313		goto out;
1314
1315	d_type = err;
1316	if (!d_type)
1317		pr_warn("upper fs needs to support d_type.\n");
1318
1319	/* Check if upper/work fs supports O_TMPFILE */
1320	tmpfile = ovl_do_tmpfile(ofs, ofs->workdir, S_IFREG | 0);
1321	ofs->tmpfile = !IS_ERR(tmpfile);
1322	if (ofs->tmpfile)
1323		fput(tmpfile);
1324	else
1325		pr_warn("upper fs does not support tmpfile.\n");
1326
1327
1328	/* Check if upper/work fs supports RENAME_WHITEOUT */
1329	err = ovl_check_rename_whiteout(ofs);
1330	if (err < 0)
1331		goto out;
1332
1333	rename_whiteout = err;
1334	if (!rename_whiteout)
1335		pr_warn("upper fs does not support RENAME_WHITEOUT.\n");
1336
1337	/*
1338	 * Check if upper/work fs supports (trusted|user).overlay.* xattr
1339	 */
1340	err = ovl_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1);
1341	if (err) {
1342		pr_warn("failed to set xattr on upper\n");
1343		ofs->noxattr = true;
1344		if (ofs->config.index || ofs->config.metacopy) {
 
 
 
 
 
 
 
 
1345			ofs->config.index = false;
1346			ofs->config.metacopy = false;
1347			pr_warn("...falling back to index=off,metacopy=off.\n");
 
 
 
1348		}
1349		/*
1350		 * xattr support is required for persistent st_ino.
1351		 * Without persistent st_ino, xino=auto falls back to xino=off.
1352		 */
1353		if (ofs->config.xino == OVL_XINO_AUTO) {
1354			ofs->config.xino = OVL_XINO_OFF;
1355			pr_warn("...falling back to xino=off.\n");
1356		}
1357		if (err == -EPERM && !ofs->config.userxattr)
1358			pr_info("try mounting with 'userxattr' option\n");
1359		err = 0;
1360	} else {
1361		ovl_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE);
1362	}
1363
1364	/*
1365	 * We allowed sub-optimal upper fs configuration and don't want to break
1366	 * users over kernel upgrade, but we never allowed remote upper fs, so
1367	 * we can enforce strict requirements for remote upper fs.
1368	 */
1369	if (ovl_dentry_remote(ofs->workdir) &&
1370	    (!d_type || !rename_whiteout || ofs->noxattr)) {
1371		pr_err("upper fs missing required features.\n");
1372		err = -EINVAL;
1373		goto out;
1374	}
1375
1376	/*
1377	 * For volatile mount, create a incompat/volatile/dirty file to keep
1378	 * track of it.
1379	 */
1380	if (ofs->config.ovl_volatile) {
1381		err = ovl_create_volatile_dirty(ofs);
1382		if (err < 0) {
1383			pr_err("Failed to create volatile/dirty file.\n");
1384			goto out;
1385		}
1386	}
1387
1388	/* Check if upper/work fs supports file handles */
1389	fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
1390	if (ofs->config.index && !fh_type) {
1391		ofs->config.index = false;
1392		pr_warn("upper fs does not support file handles, falling back to index=off.\n");
1393	}
 
1394
1395	/* Check if upper fs has 32bit inode numbers */
1396	if (fh_type != FILEID_INO32_GEN)
1397		ofs->xino_mode = -1;
1398
1399	/* NFS export of r/w mount depends on index */
1400	if (ofs->config.nfs_export && !ofs->config.index) {
1401		pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
1402		ofs->config.nfs_export = false;
1403	}
1404out:
1405	mnt_drop_write(mnt);
1406	return err;
1407}
1408
1409static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
1410			   const struct path *upperpath)
 
1411{
1412	int err;
1413	struct path workpath = { };
1414
1415	err = ovl_mount_dir(ofs->config.workdir, &workpath);
1416	if (err)
1417		goto out;
1418
1419	err = -EINVAL;
1420	if (upperpath->mnt != workpath.mnt) {
1421		pr_err("workdir and upperdir must reside under the same mount\n");
1422		goto out;
1423	}
1424	if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
1425		pr_err("workdir and upperdir must be separate subtrees\n");
1426		goto out;
1427	}
1428
1429	ofs->workbasedir = dget(workpath.dentry);
1430
1431	if (ovl_inuse_trylock(ofs->workbasedir)) {
1432		ofs->workdir_locked = true;
1433	} else {
1434		err = ovl_report_in_use(ofs, "workdir");
1435		if (err)
1436			goto out;
1437	}
1438
1439	err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
1440			     "workdir");
1441	if (err)
1442		goto out;
1443
1444	err = ovl_make_workdir(sb, ofs, &workpath);
1445
1446out:
1447	path_put(&workpath);
1448
1449	return err;
1450}
1451
1452static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
1453			    struct ovl_entry *oe, const struct path *upperpath)
1454{
1455	struct vfsmount *mnt = ovl_upper_mnt(ofs);
1456	struct dentry *indexdir;
 
 
1457	int err;
1458
 
 
 
 
1459	err = mnt_want_write(mnt);
1460	if (err)
1461		return err;
1462
1463	/* Verify lower root is upper root origin */
1464	err = ovl_verify_origin(ofs, upperpath->dentry,
1465				oe->lowerstack[0].dentry, true);
1466	if (err) {
1467		pr_err("failed to verify upper root origin\n");
1468		goto out;
1469	}
1470
1471	/* index dir will act also as workdir */
1472	iput(ofs->workdir_trap);
1473	ofs->workdir_trap = NULL;
1474	dput(ofs->workdir);
1475	ofs->workdir = NULL;
1476	indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
1477	if (IS_ERR(indexdir)) {
1478		err = PTR_ERR(indexdir);
1479	} else if (indexdir) {
1480		ofs->indexdir = indexdir;
1481		ofs->workdir = dget(indexdir);
1482
1483		err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
1484				     "indexdir");
1485		if (err)
1486			goto out;
1487
1488		/*
1489		 * Verify upper root is exclusively associated with index dir.
1490		 * Older kernels stored upper fh in ".overlay.origin"
1491		 * xattr. If that xattr exists, verify that it is a match to
1492		 * upper dir file handle. In any case, verify or set xattr
1493		 * ".overlay.upper" to indicate that index may have
1494		 * directory entries.
1495		 */
1496		if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
1497			err = ovl_verify_set_fh(ofs, ofs->indexdir,
1498						OVL_XATTR_ORIGIN,
1499						upperpath->dentry, true, false);
 
1500			if (err)
1501				pr_err("failed to verify index dir 'origin' xattr\n");
1502		}
1503		err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry,
1504				       true);
1505		if (err)
1506			pr_err("failed to verify index dir 'upper' xattr\n");
1507
1508		/* Cleanup bad/stale/orphan index entries */
1509		if (!err)
1510			err = ovl_indexdir_cleanup(ofs);
1511	}
1512	if (err || !ofs->indexdir)
1513		pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
1514
1515out:
1516	mnt_drop_write(mnt);
 
 
1517	return err;
1518}
1519
1520static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
1521{
1522	unsigned int i;
1523
1524	if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs))
1525		return true;
1526
1527	/*
1528	 * We allow using single lower with null uuid for index and nfs_export
1529	 * for example to support those features with single lower squashfs.
1530	 * To avoid regressions in setups of overlay with re-formatted lower
1531	 * squashfs, do not allow decoding origin with lower null uuid unless
1532	 * user opted-in to one of the new features that require following the
1533	 * lower inode of non-dir upper.
1534	 */
1535	if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid))
1536		return false;
1537
1538	for (i = 0; i < ofs->numfs; i++) {
1539		/*
1540		 * We use uuid to associate an overlay lower file handle with a
1541		 * lower layer, so we can accept lower fs with null uuid as long
1542		 * as all lower layers with null uuid are on the same fs.
1543		 * if we detect multiple lower fs with the same uuid, we
1544		 * disable lower file handle decoding on all of them.
1545		 */
1546		if (ofs->fs[i].is_lower &&
1547		    uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) {
1548			ofs->fs[i].bad_uuid = true;
1549			return false;
1550		}
1551	}
1552	return true;
1553}
1554
1555/* Get a unique fsid for the layer */
1556static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
1557{
1558	struct super_block *sb = path->mnt->mnt_sb;
1559	unsigned int i;
1560	dev_t dev;
1561	int err;
1562	bool bad_uuid = false;
1563	bool warn = false;
1564
1565	for (i = 0; i < ofs->numfs; i++) {
1566		if (ofs->fs[i].sb == sb)
1567			return i;
1568	}
1569
1570	if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
1571		bad_uuid = true;
1572		if (ofs->config.xino == OVL_XINO_AUTO) {
1573			ofs->config.xino = OVL_XINO_OFF;
1574			warn = true;
1575		}
1576		if (ofs->config.index || ofs->config.nfs_export) {
1577			ofs->config.index = false;
1578			ofs->config.nfs_export = false;
1579			warn = true;
1580		}
1581		if (warn) {
1582			pr_warn("%s uuid detected in lower fs '%pd2', falling back to xino=%s,index=off,nfs_export=off.\n",
1583				uuid_is_null(&sb->s_uuid) ? "null" :
1584							    "conflicting",
1585				path->dentry, ovl_xino_str[ofs->config.xino]);
1586		}
1587	}
1588
1589	err = get_anon_bdev(&dev);
1590	if (err) {
1591		pr_err("failed to get anonymous bdev for lowerpath\n");
1592		return err;
1593	}
1594
1595	ofs->fs[ofs->numfs].sb = sb;
1596	ofs->fs[ofs->numfs].pseudo_dev = dev;
1597	ofs->fs[ofs->numfs].bad_uuid = bad_uuid;
1598
1599	return ofs->numfs++;
1600}
1601
 
 
 
 
 
 
 
 
 
 
1602static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
1603			  struct path *stack, unsigned int numlower,
1604			  struct ovl_layer *layers)
1605{
1606	int err;
1607	unsigned int i;
 
1608
1609	err = -ENOMEM;
1610	ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL);
1611	if (ofs->fs == NULL)
1612		goto out;
1613
1614	/* idx/fsid 0 are reserved for upper fs even with lower only overlay */
 
 
 
1615	ofs->numfs++;
1616
1617	/*
1618	 * All lower layers that share the same fs as upper layer, use the same
1619	 * pseudo_dev as upper layer.  Allocate fs[0].pseudo_dev even for lower
1620	 * only overlay to simplify ovl_fs_free().
1621	 * is_lower will be set if upper fs is shared with a lower layer.
1622	 */
1623	err = get_anon_bdev(&ofs->fs[0].pseudo_dev);
1624	if (err) {
1625		pr_err("failed to get anonymous bdev for upper fs\n");
1626		goto out;
1627	}
1628
1629	if (ovl_upper_mnt(ofs)) {
1630		ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb;
1631		ofs->fs[0].is_lower = false;
1632	}
1633
1634	for (i = 0; i < numlower; i++) {
 
 
1635		struct vfsmount *mnt;
1636		struct inode *trap;
1637		int fsid;
1638
1639		err = fsid = ovl_get_fsid(ofs, &stack[i]);
1640		if (err < 0)
1641			goto out;
 
 
 
1642
1643		/*
1644		 * Check if lower root conflicts with this overlay layers before
1645		 * checking if it is in-use as upperdir/workdir of "another"
1646		 * mount, because we do not bother to check in ovl_is_inuse() if
1647		 * the upperdir/workdir is in fact in-use by our
1648		 * upperdir/workdir.
1649		 */
1650		err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
1651		if (err)
1652			goto out;
1653
1654		if (ovl_is_inuse(stack[i].dentry)) {
1655			err = ovl_report_in_use(ofs, "lowerdir");
1656			if (err) {
1657				iput(trap);
1658				goto out;
1659			}
1660		}
1661
1662		mnt = clone_private_mount(&stack[i]);
1663		err = PTR_ERR(mnt);
1664		if (IS_ERR(mnt)) {
1665			pr_err("failed to clone lowerpath\n");
1666			iput(trap);
1667			goto out;
1668		}
1669
1670		/*
1671		 * Make lower layers R/O.  That way fchmod/fchown on lower file
1672		 * will fail instead of modifying lower fs.
1673		 */
1674		mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
1675
1676		layers[ofs->numlayer].trap = trap;
1677		layers[ofs->numlayer].mnt = mnt;
1678		layers[ofs->numlayer].idx = ofs->numlayer;
1679		layers[ofs->numlayer].fsid = fsid;
1680		layers[ofs->numlayer].fs = &ofs->fs[fsid];
 
 
 
1681		ofs->numlayer++;
1682		ofs->fs[fsid].is_lower = true;
1683	}
1684
1685	/*
1686	 * When all layers on same fs, overlay can use real inode numbers.
1687	 * With mount option "xino=<on|auto>", mounter declares that there are
1688	 * enough free high bits in underlying fs to hold the unique fsid.
1689	 * If overlayfs does encounter underlying inodes using the high xino
1690	 * bits reserved for fsid, it emits a warning and uses the original
1691	 * inode number or a non persistent inode number allocated from a
1692	 * dedicated range.
1693	 */
1694	if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) {
1695		if (ofs->config.xino == OVL_XINO_ON)
1696			pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
1697		ofs->xino_mode = 0;
1698	} else if (ofs->config.xino == OVL_XINO_OFF) {
1699		ofs->xino_mode = -1;
1700	} else if (ofs->xino_mode < 0) {
1701		/*
1702		 * This is a roundup of number of bits needed for encoding
1703		 * fsid, where fsid 0 is reserved for upper fs (even with
1704		 * lower only overlay) +1 extra bit is reserved for the non
1705		 * persistent inode number range that is used for resolving
1706		 * xino lower bits overflow.
1707		 */
1708		BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30);
1709		ofs->xino_mode = ilog2(ofs->numfs - 1) + 2;
1710	}
1711
1712	if (ofs->xino_mode > 0) {
1713		pr_info("\"xino\" feature enabled using %d upper inode bits.\n",
1714			ofs->xino_mode);
1715	}
1716
1717	err = 0;
1718out:
1719	return err;
1720}
1721
1722static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
1723				const char *lower, unsigned int numlower,
1724				struct ovl_fs *ofs, struct ovl_layer *layers)
 
1725{
1726	int err;
1727	struct path *stack = NULL;
1728	unsigned int i;
 
1729	struct ovl_entry *oe;
 
 
 
1730
1731	if (!ofs->config.upperdir && numlower == 1) {
1732		pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
1733		return ERR_PTR(-EINVAL);
1734	}
1735
1736	stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL);
1737	if (!stack)
1738		return ERR_PTR(-ENOMEM);
1739
1740	err = -EINVAL;
1741	for (i = 0; i < numlower; i++) {
1742		err = ovl_lower_dir(lower, &stack[i], ofs, &sb->s_stack_depth);
1743		if (err)
1744			goto out_err;
1745
1746		lower = strchr(lower, '\0') + 1;
1747	}
1748
1749	err = -EINVAL;
1750	sb->s_stack_depth++;
1751	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
1752		pr_err("maximum fs stacking depth exceeded\n");
1753		goto out_err;
1754	}
1755
1756	err = ovl_get_layers(sb, ofs, stack, numlower, layers);
1757	if (err)
1758		goto out_err;
1759
1760	err = -ENOMEM;
1761	oe = ovl_alloc_entry(numlower);
 
 
1762	if (!oe)
1763		goto out_err;
1764
1765	for (i = 0; i < numlower; i++) {
1766		oe->lowerstack[i].dentry = dget(stack[i].dentry);
1767		oe->lowerstack[i].layer = &ofs->layers[i+1];
 
 
1768	}
1769
1770out:
1771	for (i = 0; i < numlower; i++)
1772		path_put(&stack[i]);
1773	kfree(stack);
1774
1775	return oe;
1776
1777out_err:
1778	oe = ERR_PTR(err);
1779	goto out;
1780}
1781
1782/*
1783 * Check if this layer root is a descendant of:
1784 * - another layer of this overlayfs instance
1785 * - upper/work dir of any overlayfs instance
1786 */
1787static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
1788			   struct dentry *dentry, const char *name,
1789			   bool is_lower)
1790{
1791	struct dentry *next = dentry, *parent;
1792	int err = 0;
1793
1794	if (!dentry)
1795		return 0;
1796
1797	parent = dget_parent(next);
1798
1799	/* Walk back ancestors to root (inclusive) looking for traps */
1800	while (!err && parent != next) {
1801		if (is_lower && ovl_lookup_trap_inode(sb, parent)) {
1802			err = -ELOOP;
1803			pr_err("overlapping %s path\n", name);
1804		} else if (ovl_is_inuse(parent)) {
1805			err = ovl_report_in_use(ofs, name);
1806		}
1807		next = parent;
1808		parent = dget_parent(next);
1809		dput(next);
1810	}
1811
1812	dput(parent);
1813
1814	return err;
1815}
1816
1817/*
1818 * Check if any of the layers or work dirs overlap.
1819 */
1820static int ovl_check_overlapping_layers(struct super_block *sb,
1821					struct ovl_fs *ofs)
1822{
1823	int i, err;
1824
1825	if (ovl_upper_mnt(ofs)) {
1826		err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root,
1827				      "upperdir", false);
1828		if (err)
1829			return err;
1830
1831		/*
1832		 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
1833		 * this instance and covers overlapping work and index dirs,
1834		 * unless work or index dir have been moved since created inside
1835		 * workbasedir.  In that case, we already have their traps in
1836		 * inode cache and we will catch that case on lookup.
1837		 */
1838		err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir",
1839				      false);
1840		if (err)
1841			return err;
1842	}
1843
1844	for (i = 1; i < ofs->numlayer; i++) {
1845		err = ovl_check_layer(sb, ofs,
1846				      ofs->layers[i].mnt->mnt_root,
1847				      "lowerdir", true);
1848		if (err)
1849			return err;
1850	}
1851
1852	return 0;
1853}
1854
1855static struct dentry *ovl_get_root(struct super_block *sb,
1856				   struct dentry *upperdentry,
1857				   struct ovl_entry *oe)
1858{
1859	struct dentry *root;
1860	struct ovl_path *lowerpath = &oe->lowerstack[0];
 
1861	unsigned long ino = d_inode(lowerpath->dentry)->i_ino;
1862	int fsid = lowerpath->layer->fsid;
1863	struct ovl_inode_params oip = {
1864		.upperdentry = upperdentry,
1865		.lowerpath = lowerpath,
1866	};
1867
1868	root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
1869	if (!root)
1870		return NULL;
1871
1872	root->d_fsdata = oe;
1873
1874	if (upperdentry) {
1875		/* Root inode uses upper st_ino/i_ino */
1876		ino = d_inode(upperdentry)->i_ino;
1877		fsid = 0;
1878		ovl_dentry_set_upper_alias(root);
1879		if (ovl_is_impuredir(sb, upperdentry))
1880			ovl_set_flag(OVL_IMPURE, d_inode(root));
1881	}
1882
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1883	/* Root is always merge -> can have whiteouts */
1884	ovl_set_flag(OVL_WHITEOUTS, d_inode(root));
1885	ovl_dentry_set_flag(OVL_E_CONNECTED, root);
1886	ovl_set_upperdata(d_inode(root));
1887	ovl_inode_init(d_inode(root), &oip, ino, fsid);
1888	ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE);
 
 
1889
1890	return root;
1891}
1892
1893static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1894{
1895	struct path upperpath = { };
 
1896	struct dentry *root_dentry;
1897	struct ovl_entry *oe;
1898	struct ovl_fs *ofs;
1899	struct ovl_layer *layers;
1900	struct cred *cred;
1901	char *splitlower = NULL;
1902	unsigned int numlower;
1903	int err;
1904
1905	err = -EIO;
1906	if (WARN_ON(sb->s_user_ns != current_user_ns()))
1907		goto out;
1908
1909	sb->s_d_op = &ovl_dentry_operations;
1910
1911	err = -ENOMEM;
1912	ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
1913	if (!ofs)
1914		goto out;
1915
1916	err = -ENOMEM;
1917	ofs->creator_cred = cred = prepare_creds();
1918	if (!cred)
1919		goto out_err;
1920
1921	/* Is there a reason anyone would want not to share whiteouts? */
1922	ofs->share_whiteout = true;
1923
1924	ofs->config.index = ovl_index_def;
1925	ofs->config.uuid = true;
1926	ofs->config.nfs_export = ovl_nfs_export_def;
1927	ofs->config.xino = ovl_xino_def();
1928	ofs->config.metacopy = ovl_metacopy_def;
1929	err = ovl_parse_opt((char *) data, &ofs->config);
1930	if (err)
1931		goto out_err;
1932
1933	err = -EINVAL;
1934	if (!ofs->config.lowerdir) {
1935		if (!silent)
1936			pr_err("missing 'lowerdir'\n");
1937		goto out_err;
1938	}
1939
1940	err = -ENOMEM;
1941	splitlower = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
1942	if (!splitlower)
1943		goto out_err;
1944
1945	err = -EINVAL;
1946	numlower = ovl_split_lowerdirs(splitlower);
1947	if (numlower > OVL_MAX_STACK) {
1948		pr_err("too many lower directories, limit is %d\n",
1949		       OVL_MAX_STACK);
1950		goto out_err;
1951	}
1952
1953	err = -ENOMEM;
1954	layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
1955	if (!layers)
1956		goto out_err;
1957
1958	ofs->layers = layers;
1959	/* Layer 0 is reserved for upper even if there's no upper */
 
 
 
 
 
 
1960	ofs->numlayer = 1;
1961
1962	sb->s_stack_depth = 0;
1963	sb->s_maxbytes = MAX_LFS_FILESIZE;
1964	atomic_long_set(&ofs->last_ino, 1);
1965	/* Assume underlying fs uses 32bit inodes unless proven otherwise */
1966	if (ofs->config.xino != OVL_XINO_OFF) {
1967		ofs->xino_mode = BITS_PER_LONG - 32;
1968		if (!ofs->xino_mode) {
1969			pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n");
1970			ofs->config.xino = OVL_XINO_OFF;
1971		}
1972	}
1973
1974	/* alloc/destroy_inode needed for setting up traps in inode cache */
1975	sb->s_op = &ovl_super_operations;
1976
1977	if (ofs->config.upperdir) {
1978		struct super_block *upper_sb;
1979
1980		err = -EINVAL;
1981		if (!ofs->config.workdir) {
1982			pr_err("missing 'workdir'\n");
1983			goto out_err;
1984		}
1985
1986		err = ovl_get_upper(sb, ofs, &layers[0], &upperpath);
1987		if (err)
1988			goto out_err;
1989
1990		upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
1991		if (!ovl_should_sync(ofs)) {
1992			ofs->errseq = errseq_sample(&upper_sb->s_wb_err);
1993			if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) {
1994				err = -EIO;
1995				pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n");
1996				goto out_err;
1997			}
1998		}
1999
2000		err = ovl_get_workdir(sb, ofs, &upperpath);
2001		if (err)
2002			goto out_err;
2003
2004		if (!ofs->workdir)
2005			sb->s_flags |= SB_RDONLY;
2006
2007		sb->s_stack_depth = upper_sb->s_stack_depth;
2008		sb->s_time_gran = upper_sb->s_time_gran;
2009	}
2010	oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
2011	err = PTR_ERR(oe);
2012	if (IS_ERR(oe))
2013		goto out_err;
2014
2015	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
2016	if (!ovl_upper_mnt(ofs))
2017		sb->s_flags |= SB_RDONLY;
2018
2019	if (!ofs->config.uuid && ofs->numfs > 1) {
2020		pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=on.\n");
2021		ofs->config.uuid = true;
 
 
 
2022	}
2023
2024	if (!ovl_force_readonly(ofs) && ofs->config.index) {
2025		err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
2026		if (err)
2027			goto out_free_oe;
2028
2029		/* Force r/o mount with no index dir */
2030		if (!ofs->indexdir)
2031			sb->s_flags |= SB_RDONLY;
2032	}
2033
2034	err = ovl_check_overlapping_layers(sb, ofs);
2035	if (err)
2036		goto out_free_oe;
2037
2038	/* Show index=off in /proc/mounts for forced r/o mount */
2039	if (!ofs->indexdir) {
2040		ofs->config.index = false;
2041		if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) {
2042			pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
2043			ofs->config.nfs_export = false;
2044		}
2045	}
2046
2047	if (ofs->config.metacopy && ofs->config.nfs_export) {
2048		pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
2049		ofs->config.nfs_export = false;
2050	}
2051
 
 
 
 
 
2052	if (ofs->config.nfs_export)
2053		sb->s_export_op = &ovl_export_operations;
 
 
2054
2055	/* Never override disk quota limits or use reserved space */
2056	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
2057
2058	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
2059	sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
2060		ovl_trusted_xattr_handlers;
2061	sb->s_fs_info = ofs;
 
2062	sb->s_flags |= SB_POSIXACL;
 
2063	sb->s_iflags |= SB_I_SKIP_SYNC;
 
 
 
 
 
 
 
2064
2065	err = -ENOMEM;
2066	root_dentry = ovl_get_root(sb, upperpath.dentry, oe);
2067	if (!root_dentry)
2068		goto out_free_oe;
2069
2070	mntput(upperpath.mnt);
2071	kfree(splitlower);
2072
2073	sb->s_root = root_dentry;
2074
2075	return 0;
2076
2077out_free_oe:
2078	ovl_entry_stack_free(oe);
2079	kfree(oe);
2080out_err:
2081	kfree(splitlower);
2082	path_put(&upperpath);
2083	ovl_free_fs(ofs);
2084out:
2085	return err;
2086}
2087
2088static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
2089				const char *dev_name, void *raw_data)
2090{
2091	return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
2092}
2093
2094static struct file_system_type ovl_fs_type = {
2095	.owner		= THIS_MODULE,
2096	.name		= "overlay",
2097	.fs_flags	= FS_USERNS_MOUNT,
2098	.mount		= ovl_mount,
2099	.kill_sb	= kill_anon_super,
2100};
2101MODULE_ALIAS_FS("overlay");
2102
2103static void ovl_inode_init_once(void *foo)
2104{
2105	struct ovl_inode *oi = foo;
2106
2107	inode_init_once(&oi->vfs_inode);
2108}
2109
2110static int __init ovl_init(void)
2111{
2112	int err;
2113
2114	ovl_inode_cachep = kmem_cache_create("ovl_inode",
2115					     sizeof(struct ovl_inode), 0,
2116					     (SLAB_RECLAIM_ACCOUNT|
2117					      SLAB_MEM_SPREAD|SLAB_ACCOUNT),
2118					     ovl_inode_init_once);
2119	if (ovl_inode_cachep == NULL)
2120		return -ENOMEM;
2121
2122	err = ovl_aio_request_cache_init();
2123	if (!err) {
2124		err = register_filesystem(&ovl_fs_type);
2125		if (!err)
2126			return 0;
2127
2128		ovl_aio_request_cache_destroy();
2129	}
2130	kmem_cache_destroy(ovl_inode_cachep);
2131
2132	return err;
2133}
2134
2135static void __exit ovl_exit(void)
2136{
2137	unregister_filesystem(&ovl_fs_type);
2138
2139	/*
2140	 * Make sure all delayed rcu free inodes are flushed before we
2141	 * destroy cache.
2142	 */
2143	rcu_barrier();
2144	kmem_cache_destroy(ovl_inode_cachep);
2145	ovl_aio_request_cache_destroy();
2146}
2147
2148module_init(ovl_init);
2149module_exit(ovl_exit);