Linux Audio

Check our new training course

Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2011 Novell Inc.
   4 * Copyright (C) 2016 Red Hat, Inc.
   5 */
   6
   7#include <linux/fs.h>
   8#include <linux/mount.h>
   9#include <linux/slab.h>
  10#include <linux/cred.h>
  11#include <linux/xattr.h>
  12#include <linux/exportfs.h>
  13#include <linux/file.h>
  14#include <linux/fileattr.h>
  15#include <linux/uuid.h>
  16#include <linux/namei.h>
  17#include <linux/ratelimit.h>
  18#include "overlayfs.h"
  19
  20/* Get write access to upper mnt - may fail if upper sb was remounted ro */
  21int ovl_get_write_access(struct dentry *dentry)
  22{
  23	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  24	return mnt_get_write_access(ovl_upper_mnt(ofs));
  25}
  26
  27/* Get write access to upper sb - may block if upper sb is frozen */
  28void ovl_start_write(struct dentry *dentry)
  29{
  30	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  31	sb_start_write(ovl_upper_mnt(ofs)->mnt_sb);
  32}
  33
  34int ovl_want_write(struct dentry *dentry)
  35{
  36	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  37	return mnt_want_write(ovl_upper_mnt(ofs));
  38}
  39
  40void ovl_put_write_access(struct dentry *dentry)
  41{
  42	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  43	mnt_put_write_access(ovl_upper_mnt(ofs));
  44}
  45
  46void ovl_end_write(struct dentry *dentry)
  47{
  48	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  49	sb_end_write(ovl_upper_mnt(ofs)->mnt_sb);
  50}
  51
  52void ovl_drop_write(struct dentry *dentry)
  53{
  54	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  55	mnt_drop_write(ovl_upper_mnt(ofs));
  56}
  57
  58struct dentry *ovl_workdir(struct dentry *dentry)
  59{
  60	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  61	return ofs->workdir;
  62}
  63
  64const struct cred *ovl_override_creds(struct super_block *sb)
  65{
  66	struct ovl_fs *ofs = OVL_FS(sb);
  67
  68	return override_creds_light(ofs->creator_cred);
  69}
  70
  71void ovl_revert_creds(const struct cred *old_cred)
  72{
  73	revert_creds_light(old_cred);
  74}
  75
  76/*
  77 * Check if underlying fs supports file handles and try to determine encoding
  78 * type, in order to deduce maximum inode number used by fs.
  79 *
  80 * Return 0 if file handles are not supported.
  81 * Return 1 (FILEID_INO32_GEN) if fs uses the default 32bit inode encoding.
  82 * Return -1 if fs uses a non default encoding with unknown inode size.
  83 */
  84int ovl_can_decode_fh(struct super_block *sb)
  85{
  86	if (!capable(CAP_DAC_READ_SEARCH))
  87		return 0;
  88
  89	if (!exportfs_can_decode_fh(sb->s_export_op))
  90		return 0;
  91
  92	return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
  93}
  94
  95struct dentry *ovl_indexdir(struct super_block *sb)
  96{
  97	struct ovl_fs *ofs = OVL_FS(sb);
  98
  99	return ofs->config.index ? ofs->workdir : NULL;
 100}
 101
 102/* Index all files on copy up. For now only enabled for NFS export */
 103bool ovl_index_all(struct super_block *sb)
 104{
 105	struct ovl_fs *ofs = OVL_FS(sb);
 106
 107	return ofs->config.nfs_export && ofs->config.index;
 108}
 109
 110/* Verify lower origin on lookup. For now only enabled for NFS export */
 111bool ovl_verify_lower(struct super_block *sb)
 112{
 113	struct ovl_fs *ofs = OVL_FS(sb);
 114
 115	return ofs->config.nfs_export && ofs->config.index;
 116}
 117
 118struct ovl_path *ovl_stack_alloc(unsigned int n)
 119{
 120	return kcalloc(n, sizeof(struct ovl_path), GFP_KERNEL);
 121}
 122
 123void ovl_stack_cpy(struct ovl_path *dst, struct ovl_path *src, unsigned int n)
 124{
 125	unsigned int i;
 126
 127	memcpy(dst, src, sizeof(struct ovl_path) * n);
 128	for (i = 0; i < n; i++)
 129		dget(src[i].dentry);
 130}
 131
 132void ovl_stack_put(struct ovl_path *stack, unsigned int n)
 133{
 134	unsigned int i;
 135
 136	for (i = 0; stack && i < n; i++)
 137		dput(stack[i].dentry);
 138}
 139
 140void ovl_stack_free(struct ovl_path *stack, unsigned int n)
 141{
 142	ovl_stack_put(stack, n);
 143	kfree(stack);
 144}
 145
 146struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
 147{
 148	size_t size = offsetof(struct ovl_entry, __lowerstack[numlower]);
 149	struct ovl_entry *oe = kzalloc(size, GFP_KERNEL);
 150
 151	if (oe)
 152		oe->__numlower = numlower;
 153
 154	return oe;
 155}
 156
 157void ovl_free_entry(struct ovl_entry *oe)
 158{
 159	ovl_stack_put(ovl_lowerstack(oe), ovl_numlower(oe));
 160	kfree(oe);
 161}
 162
 163#define OVL_D_REVALIDATE (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE)
 164
 165bool ovl_dentry_remote(struct dentry *dentry)
 166{
 167	return dentry->d_flags & OVL_D_REVALIDATE;
 168}
 169
 170void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *realdentry)
 171{
 172	if (!ovl_dentry_remote(realdentry))
 173		return;
 174
 175	spin_lock(&dentry->d_lock);
 176	dentry->d_flags |= realdentry->d_flags & OVL_D_REVALIDATE;
 177	spin_unlock(&dentry->d_lock);
 178}
 179
 180void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry,
 181			   struct ovl_entry *oe)
 182{
 183	return ovl_dentry_init_flags(dentry, upperdentry, oe, OVL_D_REVALIDATE);
 184}
 185
 186void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry,
 187			   struct ovl_entry *oe, unsigned int mask)
 188{
 189	struct ovl_path *lowerstack = ovl_lowerstack(oe);
 190	unsigned int i, flags = 0;
 191
 192	if (upperdentry)
 193		flags |= upperdentry->d_flags;
 194	for (i = 0; i < ovl_numlower(oe) && lowerstack[i].dentry; i++)
 195		flags |= lowerstack[i].dentry->d_flags;
 196
 197	spin_lock(&dentry->d_lock);
 198	dentry->d_flags &= ~mask;
 199	dentry->d_flags |= flags & mask;
 200	spin_unlock(&dentry->d_lock);
 201}
 202
 203bool ovl_dentry_weird(struct dentry *dentry)
 204{
 205	if (!d_can_lookup(dentry) && !d_is_file(dentry) && !d_is_symlink(dentry))
 206		return true;
 207
 208	return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT |
 209				  DCACHE_MANAGE_TRANSIT |
 210				  DCACHE_OP_HASH |
 211				  DCACHE_OP_COMPARE);
 212}
 213
 214enum ovl_path_type ovl_path_type(struct dentry *dentry)
 215{
 216	struct ovl_entry *oe = OVL_E(dentry);
 217	enum ovl_path_type type = 0;
 218
 219	if (ovl_dentry_upper(dentry)) {
 220		type = __OVL_PATH_UPPER;
 221
 222		/*
 223		 * Non-dir dentry can hold lower dentry of its copy up origin.
 224		 */
 225		if (ovl_numlower(oe)) {
 226			if (ovl_test_flag(OVL_CONST_INO, d_inode(dentry)))
 227				type |= __OVL_PATH_ORIGIN;
 228			if (d_is_dir(dentry) ||
 229			    !ovl_has_upperdata(d_inode(dentry)))
 230				type |= __OVL_PATH_MERGE;
 231		}
 232	} else {
 233		if (ovl_numlower(oe) > 1)
 234			type |= __OVL_PATH_MERGE;
 235	}
 236	return type;
 237}
 238
 239void ovl_path_upper(struct dentry *dentry, struct path *path)
 240{
 241	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 242
 243	path->mnt = ovl_upper_mnt(ofs);
 244	path->dentry = ovl_dentry_upper(dentry);
 245}
 246
 247void ovl_path_lower(struct dentry *dentry, struct path *path)
 248{
 249	struct ovl_entry *oe = OVL_E(dentry);
 250	struct ovl_path *lowerpath = ovl_lowerstack(oe);
 251
 252	if (ovl_numlower(oe)) {
 253		path->mnt = lowerpath->layer->mnt;
 254		path->dentry = lowerpath->dentry;
 255	} else {
 256		*path = (struct path) { };
 257	}
 258}
 259
 260void ovl_path_lowerdata(struct dentry *dentry, struct path *path)
 261{
 262	struct ovl_entry *oe = OVL_E(dentry);
 263	struct ovl_path *lowerdata = ovl_lowerdata(oe);
 264	struct dentry *lowerdata_dentry = ovl_lowerdata_dentry(oe);
 265
 266	if (lowerdata_dentry) {
 267		path->dentry = lowerdata_dentry;
 268		/*
 269		 * Pairs with smp_wmb() in ovl_dentry_set_lowerdata().
 270		 * Make sure that if lowerdata->dentry is visible, then
 271		 * datapath->layer is visible as well.
 272		 */
 273		smp_rmb();
 274		path->mnt = READ_ONCE(lowerdata->layer)->mnt;
 275	} else {
 276		*path = (struct path) { };
 277	}
 278}
 279
 280enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
 281{
 282	enum ovl_path_type type = ovl_path_type(dentry);
 283
 284	if (!OVL_TYPE_UPPER(type))
 285		ovl_path_lower(dentry, path);
 286	else
 287		ovl_path_upper(dentry, path);
 288
 289	return type;
 290}
 291
 292enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path)
 293{
 294	enum ovl_path_type type = ovl_path_type(dentry);
 295
 296	WARN_ON_ONCE(d_is_dir(dentry));
 297
 298	if (!OVL_TYPE_UPPER(type) || OVL_TYPE_MERGE(type))
 299		ovl_path_lowerdata(dentry, path);
 300	else
 301		ovl_path_upper(dentry, path);
 302
 303	return type;
 304}
 305
 306struct dentry *ovl_dentry_upper(struct dentry *dentry)
 307{
 308	return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
 309}
 310
 311struct dentry *ovl_dentry_lower(struct dentry *dentry)
 312{
 313	struct ovl_entry *oe = OVL_E(dentry);
 314
 315	return ovl_numlower(oe) ? ovl_lowerstack(oe)->dentry : NULL;
 316}
 317
 318const struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
 319{
 320	struct ovl_entry *oe = OVL_E(dentry);
 321
 322	return ovl_numlower(oe) ? ovl_lowerstack(oe)->layer : NULL;
 323}
 324
 325/*
 326 * ovl_dentry_lower() could return either a data dentry or metacopy dentry
 327 * depending on what is stored in lowerstack[0]. At times we need to find
 328 * lower dentry which has data (and not metacopy dentry). This helper
 329 * returns the lower data dentry.
 330 */
 331struct dentry *ovl_dentry_lowerdata(struct dentry *dentry)
 332{
 333	return ovl_lowerdata_dentry(OVL_E(dentry));
 334}
 335
 336int ovl_dentry_set_lowerdata(struct dentry *dentry, struct ovl_path *datapath)
 337{
 338	struct ovl_entry *oe = OVL_E(dentry);
 339	struct ovl_path *lowerdata = ovl_lowerdata(oe);
 340	struct dentry *datadentry = datapath->dentry;
 341
 342	if (WARN_ON_ONCE(ovl_numlower(oe) <= 1))
 343		return -EIO;
 344
 345	WRITE_ONCE(lowerdata->layer, datapath->layer);
 346	/*
 347	 * Pairs with smp_rmb() in ovl_path_lowerdata().
 348	 * Make sure that if lowerdata->dentry is visible, then
 349	 * lowerdata->layer is visible as well.
 350	 */
 351	smp_wmb();
 352	WRITE_ONCE(lowerdata->dentry, dget(datadentry));
 353
 354	ovl_dentry_update_reval(dentry, datadentry);
 355
 356	return 0;
 357}
 358
 359struct dentry *ovl_dentry_real(struct dentry *dentry)
 360{
 361	return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
 362}
 363
 364struct dentry *ovl_i_dentry_upper(struct inode *inode)
 365{
 366	return ovl_upperdentry_dereference(OVL_I(inode));
 367}
 368
 369struct inode *ovl_i_path_real(struct inode *inode, struct path *path)
 370{
 371	struct ovl_path *lowerpath = ovl_lowerpath(OVL_I_E(inode));
 372
 373	path->dentry = ovl_i_dentry_upper(inode);
 374	if (!path->dentry) {
 375		path->dentry = lowerpath->dentry;
 376		path->mnt = lowerpath->layer->mnt;
 377	} else {
 378		path->mnt = ovl_upper_mnt(OVL_FS(inode->i_sb));
 379	}
 380
 381	return path->dentry ? d_inode_rcu(path->dentry) : NULL;
 382}
 383
 384struct inode *ovl_inode_upper(struct inode *inode)
 385{
 386	struct dentry *upperdentry = ovl_i_dentry_upper(inode);
 387
 388	return upperdentry ? d_inode(upperdentry) : NULL;
 389}
 390
 391struct inode *ovl_inode_lower(struct inode *inode)
 392{
 393	struct ovl_path *lowerpath = ovl_lowerpath(OVL_I_E(inode));
 394
 395	return lowerpath ? d_inode(lowerpath->dentry) : NULL;
 396}
 397
 398struct inode *ovl_inode_real(struct inode *inode)
 399{
 400	return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
 401}
 402
 403/* Return inode which contains lower data. Do not return metacopy */
 404struct inode *ovl_inode_lowerdata(struct inode *inode)
 405{
 406	struct dentry *lowerdata = ovl_lowerdata_dentry(OVL_I_E(inode));
 407
 408	if (WARN_ON(!S_ISREG(inode->i_mode)))
 409		return NULL;
 410
 411	return lowerdata ? d_inode(lowerdata) : NULL;
 412}
 413
 414/* Return real inode which contains data. Does not return metacopy inode */
 415struct inode *ovl_inode_realdata(struct inode *inode)
 416{
 417	struct inode *upperinode;
 418
 419	upperinode = ovl_inode_upper(inode);
 420	if (upperinode && ovl_has_upperdata(inode))
 421		return upperinode;
 422
 423	return ovl_inode_lowerdata(inode);
 424}
 425
 426const char *ovl_lowerdata_redirect(struct inode *inode)
 427{
 428	return inode && S_ISREG(inode->i_mode) ?
 429		OVL_I(inode)->lowerdata_redirect : NULL;
 430}
 431
 432struct ovl_dir_cache *ovl_dir_cache(struct inode *inode)
 433{
 434	return inode && S_ISDIR(inode->i_mode) ? OVL_I(inode)->cache : NULL;
 435}
 436
 437void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache)
 438{
 439	OVL_I(inode)->cache = cache;
 440}
 441
 442void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry)
 443{
 444	set_bit(flag, OVL_E_FLAGS(dentry));
 445}
 446
 447void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry)
 448{
 449	clear_bit(flag, OVL_E_FLAGS(dentry));
 450}
 451
 452bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry)
 453{
 454	return test_bit(flag, OVL_E_FLAGS(dentry));
 455}
 456
 457bool ovl_dentry_is_opaque(struct dentry *dentry)
 458{
 459	return ovl_dentry_test_flag(OVL_E_OPAQUE, dentry);
 460}
 461
 462bool ovl_dentry_is_whiteout(struct dentry *dentry)
 463{
 464	return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
 465}
 466
 467void ovl_dentry_set_opaque(struct dentry *dentry)
 468{
 469	ovl_dentry_set_flag(OVL_E_OPAQUE, dentry);
 470}
 471
 472bool ovl_dentry_has_xwhiteouts(struct dentry *dentry)
 473{
 474	return ovl_dentry_test_flag(OVL_E_XWHITEOUTS, dentry);
 475}
 476
 477void ovl_dentry_set_xwhiteouts(struct dentry *dentry)
 478{
 479	ovl_dentry_set_flag(OVL_E_XWHITEOUTS, dentry);
 480}
 481
 482/*
 483 * ovl_layer_set_xwhiteouts() is called before adding the overlay dir
 484 * dentry to dcache, while readdir of that same directory happens after
 485 * the overlay dir dentry is in dcache, so if some cpu observes that
 486 * ovl_dentry_is_xwhiteouts(), it will also observe layer->has_xwhiteouts
 487 * for the layers where xwhiteouts marker was found in that merge dir.
 488 */
 489void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs,
 490			      const struct ovl_layer *layer)
 491{
 492	if (layer->has_xwhiteouts)
 493		return;
 494
 495	/* Write once to read-mostly layer properties */
 496	ofs->layers[layer->idx].has_xwhiteouts = true;
 497}
 498
 499/*
 500 * For hard links and decoded file handles, it's possible for ovl_dentry_upper()
 501 * to return positive, while there's no actual upper alias for the inode.
 502 * Copy up code needs to know about the existence of the upper alias, so it
 503 * can't use ovl_dentry_upper().
 504 */
 505bool ovl_dentry_has_upper_alias(struct dentry *dentry)
 506{
 507	return ovl_dentry_test_flag(OVL_E_UPPER_ALIAS, dentry);
 508}
 509
 510void ovl_dentry_set_upper_alias(struct dentry *dentry)
 511{
 512	ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry);
 513}
 514
 515static bool ovl_should_check_upperdata(struct inode *inode)
 516{
 517	if (!S_ISREG(inode->i_mode))
 518		return false;
 519
 520	if (!ovl_inode_lower(inode))
 521		return false;
 522
 523	return true;
 524}
 525
 526bool ovl_has_upperdata(struct inode *inode)
 527{
 528	if (!ovl_should_check_upperdata(inode))
 529		return true;
 530
 531	if (!ovl_test_flag(OVL_UPPERDATA, inode))
 532		return false;
 533	/*
 534	 * Pairs with smp_wmb() in ovl_set_upperdata(). Main user of
 535	 * ovl_has_upperdata() is ovl_copy_up_meta_inode_data(). Make sure
 536	 * if setting of OVL_UPPERDATA is visible, then effects of writes
 537	 * before that are visible too.
 538	 */
 539	smp_rmb();
 540	return true;
 541}
 542
 543void ovl_set_upperdata(struct inode *inode)
 544{
 545	/*
 546	 * Pairs with smp_rmb() in ovl_has_upperdata(). Make sure
 547	 * if OVL_UPPERDATA flag is visible, then effects of write operations
 548	 * before it are visible as well.
 549	 */
 550	smp_wmb();
 551	ovl_set_flag(OVL_UPPERDATA, inode);
 552}
 553
 554/* Caller should hold ovl_inode->lock */
 555bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags)
 556{
 557	if (!ovl_open_flags_need_copy_up(flags))
 558		return false;
 559
 560	return !ovl_test_flag(OVL_UPPERDATA, d_inode(dentry));
 561}
 562
 563bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags)
 564{
 565	if (!ovl_open_flags_need_copy_up(flags))
 566		return false;
 567
 568	return !ovl_has_upperdata(d_inode(dentry));
 569}
 570
 571const char *ovl_dentry_get_redirect(struct dentry *dentry)
 572{
 573	return OVL_I(d_inode(dentry))->redirect;
 574}
 575
 576void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
 577{
 578	struct ovl_inode *oi = OVL_I(d_inode(dentry));
 579
 580	kfree(oi->redirect);
 581	oi->redirect = redirect;
 582}
 583
 584void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
 585{
 586	struct inode *upperinode = d_inode(upperdentry);
 587
 588	WARN_ON(OVL_I(inode)->__upperdentry);
 589
 590	/*
 591	 * Make sure upperdentry is consistent before making it visible
 592	 */
 593	smp_wmb();
 594	OVL_I(inode)->__upperdentry = upperdentry;
 595	if (inode_unhashed(inode)) {
 596		inode->i_private = upperinode;
 597		__insert_inode_hash(inode, (unsigned long) upperinode);
 598	}
 599}
 600
 601static void ovl_dir_version_inc(struct dentry *dentry, bool impurity)
 602{
 603	struct inode *inode = d_inode(dentry);
 604
 605	WARN_ON(!inode_is_locked(inode));
 606	WARN_ON(!d_is_dir(dentry));
 607	/*
 608	 * Version is used by readdir code to keep cache consistent.
 609	 * For merge dirs (or dirs with origin) all changes need to be noted.
 610	 * For non-merge dirs, cache contains only impure entries (i.e. ones
 611	 * which have been copied up and have origins), so only need to note
 612	 * changes to impure entries.
 613	 */
 614	if (!ovl_dir_is_real(inode) || impurity)
 615		OVL_I(inode)->version++;
 616}
 617
 618void ovl_dir_modified(struct dentry *dentry, bool impurity)
 619{
 620	/* Copy mtime/ctime */
 621	ovl_copyattr(d_inode(dentry));
 622
 623	ovl_dir_version_inc(dentry, impurity);
 624}
 625
 626u64 ovl_inode_version_get(struct inode *inode)
 627{
 628	WARN_ON(!inode_is_locked(inode));
 629	return OVL_I(inode)->version;
 630}
 631
 632bool ovl_is_whiteout(struct dentry *dentry)
 633{
 634	struct inode *inode = dentry->d_inode;
 635
 636	return inode && IS_WHITEOUT(inode);
 637}
 638
 639/*
 640 * Use this over ovl_is_whiteout for upper and lower files, as it also
 641 * handles overlay.whiteout xattr whiteout files.
 642 */
 643bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path)
 644{
 645	return ovl_is_whiteout(path->dentry) ||
 646		ovl_path_check_xwhiteout_xattr(ofs, path);
 647}
 648
 649struct file *ovl_path_open(const struct path *path, int flags)
 650{
 651	struct inode *inode = d_inode(path->dentry);
 652	struct mnt_idmap *real_idmap = mnt_idmap(path->mnt);
 653	int err, acc_mode;
 654
 655	if (flags & ~(O_ACCMODE | O_LARGEFILE))
 656		BUG();
 657
 658	switch (flags & O_ACCMODE) {
 659	case O_RDONLY:
 660		acc_mode = MAY_READ;
 661		break;
 662	case O_WRONLY:
 663		acc_mode = MAY_WRITE;
 664		break;
 665	default:
 666		BUG();
 667	}
 668
 669	err = inode_permission(real_idmap, inode, acc_mode | MAY_OPEN);
 670	if (err)
 671		return ERR_PTR(err);
 672
 673	/* O_NOATIME is an optimization, don't fail if not permitted */
 674	if (inode_owner_or_capable(real_idmap, inode))
 675		flags |= O_NOATIME;
 676
 677	return dentry_open(path, flags, current_cred());
 678}
 679
 680/* Caller should hold ovl_inode->lock */
 681static bool ovl_already_copied_up_locked(struct dentry *dentry, int flags)
 682{
 683	bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
 684
 685	if (ovl_dentry_upper(dentry) &&
 686	    (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
 687	    !ovl_dentry_needs_data_copy_up_locked(dentry, flags))
 688		return true;
 689
 690	return false;
 691}
 692
 693bool ovl_already_copied_up(struct dentry *dentry, int flags)
 694{
 695	bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
 696
 697	/*
 698	 * Check if copy-up has happened as well as for upper alias (in
 699	 * case of hard links) is there.
 700	 *
 701	 * Both checks are lockless:
 702	 *  - false negatives: will recheck under oi->lock
 703	 *  - false positives:
 704	 *    + ovl_dentry_upper() uses memory barriers to ensure the
 705	 *      upper dentry is up-to-date
 706	 *    + ovl_dentry_has_upper_alias() relies on locking of
 707	 *      upper parent i_rwsem to prevent reordering copy-up
 708	 *      with rename.
 709	 */
 710	if (ovl_dentry_upper(dentry) &&
 711	    (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
 712	    !ovl_dentry_needs_data_copy_up(dentry, flags))
 713		return true;
 714
 715	return false;
 716}
 717
 718/*
 719 * The copy up "transaction" keeps an elevated mnt write count on upper mnt,
 720 * but leaves taking freeze protection on upper sb to lower level helpers.
 721 */
 722int ovl_copy_up_start(struct dentry *dentry, int flags)
 723{
 724	struct inode *inode = d_inode(dentry);
 725	int err;
 726
 727	err = ovl_inode_lock_interruptible(inode);
 728	if (err)
 729		return err;
 730
 731	if (ovl_already_copied_up_locked(dentry, flags))
 732		err = 1; /* Already copied up */
 733	else
 734		err = ovl_get_write_access(dentry);
 735	if (err)
 736		goto out_unlock;
 737
 738	return 0;
 739
 740out_unlock:
 741	ovl_inode_unlock(inode);
 742	return err;
 743}
 744
 745void ovl_copy_up_end(struct dentry *dentry)
 746{
 747	ovl_put_write_access(dentry);
 748	ovl_inode_unlock(d_inode(dentry));
 749}
 750
 751bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path)
 752{
 753	int res;
 754
 755	res = ovl_path_getxattr(ofs, path, OVL_XATTR_ORIGIN, NULL, 0);
 756
 757	/* Zero size value means "copied up but origin unknown" */
 758	if (res >= 0)
 759		return true;
 760
 761	return false;
 762}
 763
 764bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path)
 765{
 766	struct dentry *dentry = path->dentry;
 767	int res;
 768
 769	/* xattr.whiteout must be a zero size regular file */
 770	if (!d_is_reg(dentry) || i_size_read(d_inode(dentry)) != 0)
 771		return false;
 772
 773	res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUT, NULL, 0);
 774	return res >= 0;
 775}
 776
 777/*
 778 * Load persistent uuid from xattr into s_uuid if found, or store a new
 779 * random generated value in s_uuid and in xattr.
 780 */
 781bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
 782			 const struct path *upperpath)
 783{
 784	bool set = false;
 785	uuid_t uuid;
 786	int res;
 787
 788	/* Try to load existing persistent uuid */
 789	res = ovl_path_getxattr(ofs, upperpath, OVL_XATTR_UUID, uuid.b,
 790				UUID_SIZE);
 791	if (res == UUID_SIZE)
 792		goto set_uuid;
 793
 794	if (res != -ENODATA)
 795		goto fail;
 796
 797	/*
 798	 * With uuid=auto, if uuid xattr is found, it will be used.
 799	 * If uuid xattrs is not found, generate a persistent uuid only on mount
 800	 * of new overlays where upper root dir is not yet marked as impure.
 801	 * An upper dir is marked as impure on copy up or lookup of its subdirs.
 802	 */
 803	if (ofs->config.uuid == OVL_UUID_AUTO) {
 804		res = ovl_path_getxattr(ofs, upperpath, OVL_XATTR_IMPURE, NULL,
 805					0);
 806		if (res > 0) {
 807			/* Any mount of old overlay - downgrade to uuid=null */
 808			ofs->config.uuid = OVL_UUID_NULL;
 809			return true;
 810		} else if (res == -ENODATA) {
 811			/* First mount of new overlay - upgrade to uuid=on */
 812			ofs->config.uuid = OVL_UUID_ON;
 813		} else if (res < 0) {
 814			goto fail;
 815		}
 816
 817	}
 818
 819	/* Generate overlay instance uuid */
 820	uuid_gen(&uuid);
 821
 822	/* Try to store persistent uuid */
 823	set = true;
 824	res = ovl_setxattr(ofs, upperpath->dentry, OVL_XATTR_UUID, uuid.b,
 825			   UUID_SIZE);
 826	if (res)
 827		goto fail;
 828
 829set_uuid:
 830	super_set_uuid(sb, uuid.b, sizeof(uuid));
 831	return true;
 832
 833fail:
 834	ofs->config.uuid = OVL_UUID_NULL;
 835	pr_warn("failed to %s uuid (%pd2, err=%i); falling back to uuid=null.\n",
 836		set ? "set" : "get", upperpath->dentry, res);
 837	return false;
 838}
 839
 840char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path,
 841			   enum ovl_xattr ox)
 842{
 843	int res;
 844	char val;
 845
 846	if (!d_is_dir(path->dentry))
 847		return 0;
 848
 849	res = ovl_path_getxattr(ofs, path, ox, &val, 1);
 850	return res == 1 ? val : 0;
 851}
 852
 853#define OVL_XATTR_OPAQUE_POSTFIX	"opaque"
 854#define OVL_XATTR_REDIRECT_POSTFIX	"redirect"
 855#define OVL_XATTR_ORIGIN_POSTFIX	"origin"
 856#define OVL_XATTR_IMPURE_POSTFIX	"impure"
 857#define OVL_XATTR_NLINK_POSTFIX		"nlink"
 858#define OVL_XATTR_UPPER_POSTFIX		"upper"
 859#define OVL_XATTR_UUID_POSTFIX		"uuid"
 860#define OVL_XATTR_METACOPY_POSTFIX	"metacopy"
 861#define OVL_XATTR_PROTATTR_POSTFIX	"protattr"
 862#define OVL_XATTR_XWHITEOUT_POSTFIX	"whiteout"
 863
 864#define OVL_XATTR_TAB_ENTRY(x) \
 865	[x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
 866		[true] = OVL_XATTR_USER_PREFIX x ## _POSTFIX }
 867
 868const char *const ovl_xattr_table[][2] = {
 869	OVL_XATTR_TAB_ENTRY(OVL_XATTR_OPAQUE),
 870	OVL_XATTR_TAB_ENTRY(OVL_XATTR_REDIRECT),
 871	OVL_XATTR_TAB_ENTRY(OVL_XATTR_ORIGIN),
 872	OVL_XATTR_TAB_ENTRY(OVL_XATTR_IMPURE),
 873	OVL_XATTR_TAB_ENTRY(OVL_XATTR_NLINK),
 874	OVL_XATTR_TAB_ENTRY(OVL_XATTR_UPPER),
 875	OVL_XATTR_TAB_ENTRY(OVL_XATTR_UUID),
 876	OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
 877	OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR),
 878	OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT),
 879};
 880
 881int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
 882		       enum ovl_xattr ox, const void *value, size_t size,
 883		       int xerr)
 884{
 885	int err;
 886
 887	if (ofs->noxattr)
 888		return xerr;
 889
 890	err = ovl_setxattr(ofs, upperdentry, ox, value, size);
 891
 892	if (err == -EOPNOTSUPP) {
 893		pr_warn("cannot set %s xattr on upper\n", ovl_xattr(ofs, ox));
 894		ofs->noxattr = true;
 895		return xerr;
 896	}
 897
 898	return err;
 899}
 900
 901int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
 902{
 903	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 904	int err;
 905
 906	if (ovl_test_flag(OVL_IMPURE, d_inode(dentry)))
 907		return 0;
 908
 909	/*
 910	 * Do not fail when upper doesn't support xattrs.
 911	 * Upper inodes won't have origin nor redirect xattr anyway.
 912	 */
 913	err = ovl_check_setxattr(ofs, upperdentry, OVL_XATTR_IMPURE, "y", 1, 0);
 914	if (!err)
 915		ovl_set_flag(OVL_IMPURE, d_inode(dentry));
 916
 917	return err;
 918}
 919
 920
 921#define OVL_PROTATTR_MAX 32 /* Reserved for future flags */
 922
 923void ovl_check_protattr(struct inode *inode, struct dentry *upper)
 924{
 925	struct ovl_fs *ofs = OVL_FS(inode->i_sb);
 926	u32 iflags = inode->i_flags & OVL_PROT_I_FLAGS_MASK;
 927	char buf[OVL_PROTATTR_MAX+1];
 928	int res, n;
 929
 930	res = ovl_getxattr_upper(ofs, upper, OVL_XATTR_PROTATTR, buf,
 931				 OVL_PROTATTR_MAX);
 932	if (res < 0)
 933		return;
 934
 935	/*
 936	 * Initialize inode flags from overlay.protattr xattr and upper inode
 937	 * flags.  If upper inode has those fileattr flags set (i.e. from old
 938	 * kernel), we do not clear them on ovl_get_inode(), but we will clear
 939	 * them on next fileattr_set().
 940	 */
 941	for (n = 0; n < res; n++) {
 942		if (buf[n] == 'a')
 943			iflags |= S_APPEND;
 944		else if (buf[n] == 'i')
 945			iflags |= S_IMMUTABLE;
 946		else
 947			break;
 948	}
 949
 950	if (!res || n < res) {
 951		pr_warn_ratelimited("incompatible overlay.protattr format (%pd2, len=%d)\n",
 952				    upper, res);
 953	} else {
 954		inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK);
 955	}
 956}
 957
 958int ovl_set_protattr(struct inode *inode, struct dentry *upper,
 959		      struct fileattr *fa)
 960{
 961	struct ovl_fs *ofs = OVL_FS(inode->i_sb);
 962	char buf[OVL_PROTATTR_MAX];
 963	int len = 0, err = 0;
 964	u32 iflags = 0;
 965
 966	BUILD_BUG_ON(HWEIGHT32(OVL_PROT_FS_FLAGS_MASK) > OVL_PROTATTR_MAX);
 967
 968	if (fa->flags & FS_APPEND_FL) {
 969		buf[len++] = 'a';
 970		iflags |= S_APPEND;
 971	}
 972	if (fa->flags & FS_IMMUTABLE_FL) {
 973		buf[len++] = 'i';
 974		iflags |= S_IMMUTABLE;
 975	}
 976
 977	/*
 978	 * Do not allow to set protection flags when upper doesn't support
 979	 * xattrs, because we do not set those fileattr flags on upper inode.
 980	 * Remove xattr if it exist and all protection flags are cleared.
 981	 */
 982	if (len) {
 983		err = ovl_check_setxattr(ofs, upper, OVL_XATTR_PROTATTR,
 984					 buf, len, -EPERM);
 985	} else if (inode->i_flags & OVL_PROT_I_FLAGS_MASK) {
 986		err = ovl_removexattr(ofs, upper, OVL_XATTR_PROTATTR);
 987		if (err == -EOPNOTSUPP || err == -ENODATA)
 988			err = 0;
 989	}
 990	if (err)
 991		return err;
 992
 993	inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK);
 994
 995	/* Mask out the fileattr flags that should not be set in upper inode */
 996	fa->flags &= ~OVL_PROT_FS_FLAGS_MASK;
 997	fa->fsx_xflags &= ~OVL_PROT_FSX_FLAGS_MASK;
 998
 999	return 0;
1000}
1001
1002/*
1003 * Caller must hold a reference to inode to prevent it from being freed while
1004 * it is marked inuse.
1005 */
1006bool ovl_inuse_trylock(struct dentry *dentry)
1007{
1008	struct inode *inode = d_inode(dentry);
1009	bool locked = false;
1010
1011	spin_lock(&inode->i_lock);
1012	if (!(inode->i_state & I_OVL_INUSE)) {
1013		inode->i_state |= I_OVL_INUSE;
1014		locked = true;
1015	}
1016	spin_unlock(&inode->i_lock);
1017
1018	return locked;
1019}
1020
1021void ovl_inuse_unlock(struct dentry *dentry)
1022{
1023	if (dentry) {
1024		struct inode *inode = d_inode(dentry);
1025
1026		spin_lock(&inode->i_lock);
1027		WARN_ON(!(inode->i_state & I_OVL_INUSE));
1028		inode->i_state &= ~I_OVL_INUSE;
1029		spin_unlock(&inode->i_lock);
1030	}
1031}
1032
1033bool ovl_is_inuse(struct dentry *dentry)
1034{
1035	struct inode *inode = d_inode(dentry);
1036	bool inuse;
1037
1038	spin_lock(&inode->i_lock);
1039	inuse = (inode->i_state & I_OVL_INUSE);
1040	spin_unlock(&inode->i_lock);
1041
1042	return inuse;
1043}
1044
1045/*
1046 * Does this overlay dentry need to be indexed on copy up?
1047 */
1048bool ovl_need_index(struct dentry *dentry)
1049{
1050	struct dentry *lower = ovl_dentry_lower(dentry);
1051
1052	if (!lower || !ovl_indexdir(dentry->d_sb))
1053		return false;
1054
1055	/* Index all files for NFS export and consistency verification */
1056	if (ovl_index_all(dentry->d_sb))
1057		return true;
1058
1059	/* Index only lower hardlinks on copy up */
1060	if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
1061		return true;
1062
1063	return false;
1064}
1065
1066/* Caller must hold OVL_I(inode)->lock */
1067static void ovl_cleanup_index(struct dentry *dentry)
1068{
1069	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
1070	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
1071	struct inode *dir = indexdir->d_inode;
1072	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
1073	struct dentry *upperdentry = ovl_dentry_upper(dentry);
1074	struct dentry *index = NULL;
1075	struct inode *inode;
1076	struct qstr name = { };
1077	bool got_write = false;
1078	int err;
1079
1080	err = ovl_get_index_name(ofs, lowerdentry, &name);
1081	if (err)
1082		goto fail;
1083
1084	err = ovl_want_write(dentry);
1085	if (err)
1086		goto fail;
1087
1088	got_write = true;
1089	inode = d_inode(upperdentry);
1090	if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
1091		pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
1092				    upperdentry, inode->i_ino, inode->i_nlink);
1093		/*
1094		 * We either have a bug with persistent union nlink or a lower
1095		 * hardlink was added while overlay is mounted. Adding a lower
1096		 * hardlink and then unlinking all overlay hardlinks would drop
1097		 * overlay nlink to zero before all upper inodes are unlinked.
1098		 * As a safety measure, when that situation is detected, set
1099		 * the overlay nlink to the index inode nlink minus one for the
1100		 * index entry itself.
1101		 */
1102		set_nlink(d_inode(dentry), inode->i_nlink - 1);
1103		ovl_set_nlink_upper(dentry);
1104		goto out;
1105	}
1106
1107	inode_lock_nested(dir, I_MUTEX_PARENT);
1108	index = ovl_lookup_upper(ofs, name.name, indexdir, name.len);
1109	err = PTR_ERR(index);
1110	if (IS_ERR(index)) {
1111		index = NULL;
1112	} else if (ovl_index_all(dentry->d_sb)) {
1113		/* Whiteout orphan index to block future open by handle */
1114		err = ovl_cleanup_and_whiteout(OVL_FS(dentry->d_sb),
1115					       dir, index);
1116	} else {
1117		/* Cleanup orphan index entries */
1118		err = ovl_cleanup(ofs, dir, index);
1119	}
1120
1121	inode_unlock(dir);
1122	if (err)
1123		goto fail;
1124
1125out:
1126	if (got_write)
1127		ovl_drop_write(dentry);
1128	kfree(name.name);
1129	dput(index);
1130	return;
1131
1132fail:
1133	pr_err("cleanup index of '%pd2' failed (%i)\n", dentry, err);
1134	goto out;
1135}
1136
1137/*
1138 * Operations that change overlay inode and upper inode nlink need to be
1139 * synchronized with copy up for persistent nlink accounting.
1140 */
1141int ovl_nlink_start(struct dentry *dentry)
1142{
1143	struct inode *inode = d_inode(dentry);
1144	const struct cred *old_cred;
1145	int err;
1146
1147	if (WARN_ON(!inode))
1148		return -ENOENT;
1149
1150	/*
1151	 * With inodes index is enabled, we store the union overlay nlink
1152	 * in an xattr on the index inode. When whiting out an indexed lower,
1153	 * we need to decrement the overlay persistent nlink, but before the
1154	 * first copy up, we have no upper index inode to store the xattr.
1155	 *
1156	 * As a workaround, before whiteout/rename over an indexed lower,
1157	 * copy up to create the upper index. Creating the upper index will
1158	 * initialize the overlay nlink, so it could be dropped if unlink
1159	 * or rename succeeds.
1160	 *
1161	 * TODO: implement metadata only index copy up when called with
1162	 *       ovl_copy_up_flags(dentry, O_PATH).
1163	 */
1164	if (ovl_need_index(dentry) && !ovl_dentry_has_upper_alias(dentry)) {
1165		err = ovl_copy_up(dentry);
1166		if (err)
1167			return err;
1168	}
1169
1170	err = ovl_inode_lock_interruptible(inode);
1171	if (err)
1172		return err;
1173
1174	err = ovl_want_write(dentry);
1175	if (err)
1176		goto out_unlock;
1177
1178	if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, inode))
1179		return 0;
1180
1181	old_cred = ovl_override_creds(dentry->d_sb);
1182	/*
1183	 * The overlay inode nlink should be incremented/decremented IFF the
1184	 * upper operation succeeds, along with nlink change of upper inode.
1185	 * Therefore, before link/unlink/rename, we store the union nlink
1186	 * value relative to the upper inode nlink in an upper inode xattr.
1187	 */
1188	err = ovl_set_nlink_upper(dentry);
1189	ovl_revert_creds(old_cred);
1190	if (err)
1191		goto out_drop_write;
1192
1193	return 0;
1194
1195out_drop_write:
1196	ovl_drop_write(dentry);
1197out_unlock:
1198	ovl_inode_unlock(inode);
1199
1200	return err;
1201}
1202
1203void ovl_nlink_end(struct dentry *dentry)
1204{
1205	struct inode *inode = d_inode(dentry);
1206
1207	ovl_drop_write(dentry);
1208
1209	if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) {
1210		const struct cred *old_cred;
1211
1212		old_cred = ovl_override_creds(dentry->d_sb);
1213		ovl_cleanup_index(dentry);
1214		ovl_revert_creds(old_cred);
1215	}
1216
1217	ovl_inode_unlock(inode);
1218}
1219
1220int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
1221{
1222	struct dentry *trap;
1223
1224	/* Workdir should not be the same as upperdir */
1225	if (workdir == upperdir)
1226		goto err;
1227
1228	/* Workdir should not be subdir of upperdir and vice versa */
1229	trap = lock_rename(workdir, upperdir);
1230	if (IS_ERR(trap))
1231		goto err;
1232	if (trap)
1233		goto err_unlock;
1234
1235	return 0;
1236
1237err_unlock:
1238	unlock_rename(workdir, upperdir);
1239err:
1240	pr_err("failed to lock workdir+upperdir\n");
1241	return -EIO;
1242}
1243
1244/*
1245 * err < 0, 0 if no metacopy xattr, metacopy data size if xattr found.
1246 * an empty xattr returns OVL_METACOPY_MIN_SIZE to distinguish from no xattr value.
1247 */
1248int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path,
1249			     struct ovl_metacopy *data)
1250{
1251	int res;
1252
1253	/* Only regular files can have metacopy xattr */
1254	if (!S_ISREG(d_inode(path->dentry)->i_mode))
1255		return 0;
1256
1257	res = ovl_path_getxattr(ofs, path, OVL_XATTR_METACOPY,
1258				data, data ? OVL_METACOPY_MAX_SIZE : 0);
1259	if (res < 0) {
1260		if (res == -ENODATA || res == -EOPNOTSUPP)
1261			return 0;
1262		/*
1263		 * getxattr on user.* may fail with EACCES in case there's no
1264		 * read permission on the inode.  Not much we can do, other than
1265		 * tell the caller that this is not a metacopy inode.
1266		 */
1267		if (ofs->config.userxattr && res == -EACCES)
1268			return 0;
1269		goto out;
1270	}
1271
1272	if (res == 0) {
1273		/* Emulate empty data for zero size metacopy xattr */
1274		res = OVL_METACOPY_MIN_SIZE;
1275		if (data) {
1276			memset(data, 0, res);
1277			data->len = res;
1278		}
1279	} else if (res < OVL_METACOPY_MIN_SIZE) {
1280		pr_warn_ratelimited("metacopy file '%pd' has too small xattr\n",
1281				    path->dentry);
1282		return -EIO;
1283	} else if (data) {
1284		if (data->version != 0) {
1285			pr_warn_ratelimited("metacopy file '%pd' has unsupported version\n",
1286					    path->dentry);
1287			return -EIO;
1288		}
1289		if (res != data->len) {
1290			pr_warn_ratelimited("metacopy file '%pd' has invalid xattr size\n",
1291					    path->dentry);
1292			return -EIO;
1293		}
1294	}
1295
1296	return res;
1297out:
1298	pr_warn_ratelimited("failed to get metacopy (%i)\n", res);
1299	return res;
1300}
1301
1302int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d, struct ovl_metacopy *metacopy)
1303{
1304	size_t len = metacopy->len;
1305
1306	/* If no flags or digest fall back to empty metacopy file */
1307	if (metacopy->version == 0 && metacopy->flags == 0 && metacopy->digest_algo == 0)
1308		len = 0;
1309
1310	return ovl_check_setxattr(ofs, d, OVL_XATTR_METACOPY,
1311				  metacopy, len, -EOPNOTSUPP);
1312}
1313
1314bool ovl_is_metacopy_dentry(struct dentry *dentry)
1315{
1316	struct ovl_entry *oe = OVL_E(dentry);
1317
1318	if (!d_is_reg(dentry))
1319		return false;
1320
1321	if (ovl_dentry_upper(dentry)) {
1322		if (!ovl_has_upperdata(d_inode(dentry)))
1323			return true;
1324		return false;
1325	}
1326
1327	return (ovl_numlower(oe) > 1);
1328}
1329
1330char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding)
1331{
1332	int res;
1333	char *s, *next, *buf = NULL;
1334
1335	res = ovl_path_getxattr(ofs, path, OVL_XATTR_REDIRECT, NULL, 0);
1336	if (res == -ENODATA || res == -EOPNOTSUPP)
1337		return NULL;
1338	if (res < 0)
1339		goto fail;
1340	if (res == 0)
1341		goto invalid;
1342
1343	buf = kzalloc(res + padding + 1, GFP_KERNEL);
1344	if (!buf)
1345		return ERR_PTR(-ENOMEM);
1346
1347	res = ovl_path_getxattr(ofs, path, OVL_XATTR_REDIRECT, buf, res);
1348	if (res < 0)
1349		goto fail;
1350	if (res == 0)
1351		goto invalid;
1352
1353	if (buf[0] == '/') {
1354		for (s = buf; *s++ == '/'; s = next) {
1355			next = strchrnul(s, '/');
1356			if (s == next)
1357				goto invalid;
1358		}
1359	} else {
1360		if (strchr(buf, '/') != NULL)
1361			goto invalid;
1362	}
1363
1364	return buf;
1365invalid:
1366	pr_warn_ratelimited("invalid redirect (%s)\n", buf);
1367	res = -EINVAL;
1368	goto err_free;
1369fail:
1370	pr_warn_ratelimited("failed to get redirect (%i)\n", res);
1371err_free:
1372	kfree(buf);
1373	return ERR_PTR(res);
1374}
1375
1376/* Call with mounter creds as it may open the file */
1377int ovl_ensure_verity_loaded(struct path *datapath)
1378{
1379	struct inode *inode = d_inode(datapath->dentry);
1380	struct file *filp;
1381
1382	if (!fsverity_active(inode) && IS_VERITY(inode)) {
1383		/*
1384		 * If this inode was not yet opened, the verity info hasn't been
1385		 * loaded yet, so we need to do that here to force it into memory.
1386		 */
1387		filp = kernel_file_open(datapath, O_RDONLY, current_cred());
1388		if (IS_ERR(filp))
1389			return PTR_ERR(filp);
1390		fput(filp);
1391	}
1392
1393	return 0;
1394}
1395
1396int ovl_validate_verity(struct ovl_fs *ofs,
1397			struct path *metapath,
1398			struct path *datapath)
1399{
1400	struct ovl_metacopy metacopy_data;
1401	u8 actual_digest[FS_VERITY_MAX_DIGEST_SIZE];
1402	int xattr_digest_size, digest_size;
1403	int xattr_size, err;
1404	u8 verity_algo;
1405
1406	if (!ofs->config.verity_mode ||
1407	    /* Verity only works on regular files */
1408	    !S_ISREG(d_inode(metapath->dentry)->i_mode))
1409		return 0;
1410
1411	xattr_size = ovl_check_metacopy_xattr(ofs, metapath, &metacopy_data);
1412	if (xattr_size < 0)
1413		return xattr_size;
1414
1415	if (!xattr_size || !metacopy_data.digest_algo) {
1416		if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) {
1417			pr_warn_ratelimited("metacopy file '%pd' has no digest specified\n",
1418					    metapath->dentry);
1419			return -EIO;
1420		}
1421		return 0;
1422	}
1423
1424	xattr_digest_size = ovl_metadata_digest_size(&metacopy_data);
1425
1426	err = ovl_ensure_verity_loaded(datapath);
1427	if (err < 0) {
1428		pr_warn_ratelimited("lower file '%pd' failed to load fs-verity info\n",
1429				    datapath->dentry);
1430		return -EIO;
1431	}
1432
1433	digest_size = fsverity_get_digest(d_inode(datapath->dentry), actual_digest,
1434					  &verity_algo, NULL);
1435	if (digest_size == 0) {
1436		pr_warn_ratelimited("lower file '%pd' has no fs-verity digest\n", datapath->dentry);
1437		return -EIO;
1438	}
1439
1440	if (xattr_digest_size != digest_size ||
1441	    metacopy_data.digest_algo != verity_algo ||
1442	    memcmp(metacopy_data.digest, actual_digest, xattr_digest_size) != 0) {
1443		pr_warn_ratelimited("lower file '%pd' has the wrong fs-verity digest\n",
1444				    datapath->dentry);
1445		return -EIO;
1446	}
1447
1448	return 0;
1449}
1450
1451int ovl_get_verity_digest(struct ovl_fs *ofs, struct path *src,
1452			  struct ovl_metacopy *metacopy)
1453{
1454	int err, digest_size;
1455
1456	if (!ofs->config.verity_mode || !S_ISREG(d_inode(src->dentry)->i_mode))
1457		return 0;
1458
1459	err = ovl_ensure_verity_loaded(src);
1460	if (err < 0) {
1461		pr_warn_ratelimited("lower file '%pd' failed to load fs-verity info\n",
1462				    src->dentry);
1463		return -EIO;
1464	}
1465
1466	digest_size = fsverity_get_digest(d_inode(src->dentry),
1467					  metacopy->digest, &metacopy->digest_algo, NULL);
1468	if (digest_size == 0 ||
1469	    WARN_ON_ONCE(digest_size > FS_VERITY_MAX_DIGEST_SIZE)) {
1470		if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) {
1471			pr_warn_ratelimited("lower file '%pd' has no fs-verity digest\n",
1472					    src->dentry);
1473			return -EIO;
1474		}
1475		return 0;
1476	}
1477
1478	metacopy->len += digest_size;
1479	return 0;
1480}
1481
1482/*
1483 * ovl_sync_status() - Check fs sync status for volatile mounts
1484 *
1485 * Returns 1 if this is not a volatile mount and a real sync is required.
1486 *
1487 * Returns 0 if syncing can be skipped because mount is volatile, and no errors
1488 * have occurred on the upperdir since the mount.
1489 *
1490 * Returns -errno if it is a volatile mount, and the error that occurred since
1491 * the last mount. If the error code changes, it'll return the latest error
1492 * code.
1493 */
1494
1495int ovl_sync_status(struct ovl_fs *ofs)
1496{
1497	struct vfsmount *mnt;
1498
1499	if (ovl_should_sync(ofs))
1500		return 1;
1501
1502	mnt = ovl_upper_mnt(ofs);
1503	if (!mnt)
1504		return 0;
1505
1506	return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq);
1507}
1508
1509/*
1510 * ovl_copyattr() - copy inode attributes from layer to ovl inode
1511 *
1512 * When overlay copies inode information from an upper or lower layer to the
1513 * relevant overlay inode it will apply the idmapping of the upper or lower
1514 * layer when doing so ensuring that the ovl inode ownership will correctly
1515 * reflect the ownership of the idmapped upper or lower layer. For example, an
1516 * idmapped upper or lower layer mapping id 1001 to id 1000 will take care to
1517 * map any lower or upper inode owned by id 1001 to id 1000. These mapping
1518 * helpers are nops when the relevant layer isn't idmapped.
1519 */
1520void ovl_copyattr(struct inode *inode)
1521{
1522	struct path realpath;
1523	struct inode *realinode;
1524	struct mnt_idmap *real_idmap;
1525	vfsuid_t vfsuid;
1526	vfsgid_t vfsgid;
1527
1528	realinode = ovl_i_path_real(inode, &realpath);
1529	real_idmap = mnt_idmap(realpath.mnt);
1530
1531	spin_lock(&inode->i_lock);
1532	vfsuid = i_uid_into_vfsuid(real_idmap, realinode);
1533	vfsgid = i_gid_into_vfsgid(real_idmap, realinode);
1534
1535	inode->i_uid = vfsuid_into_kuid(vfsuid);
1536	inode->i_gid = vfsgid_into_kgid(vfsgid);
1537	inode->i_mode = realinode->i_mode;
1538	inode_set_atime_to_ts(inode, inode_get_atime(realinode));
1539	inode_set_mtime_to_ts(inode, inode_get_mtime(realinode));
1540	inode_set_ctime_to_ts(inode, inode_get_ctime(realinode));
1541	i_size_write(inode, i_size_read(realinode));
1542	spin_unlock(&inode->i_lock);
1543}
v6.9.4
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2011 Novell Inc.
   4 * Copyright (C) 2016 Red Hat, Inc.
   5 */
   6
   7#include <linux/fs.h>
   8#include <linux/mount.h>
   9#include <linux/slab.h>
  10#include <linux/cred.h>
  11#include <linux/xattr.h>
  12#include <linux/exportfs.h>
  13#include <linux/file.h>
  14#include <linux/fileattr.h>
  15#include <linux/uuid.h>
  16#include <linux/namei.h>
  17#include <linux/ratelimit.h>
  18#include "overlayfs.h"
  19
  20/* Get write access to upper mnt - may fail if upper sb was remounted ro */
  21int ovl_get_write_access(struct dentry *dentry)
  22{
  23	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  24	return mnt_get_write_access(ovl_upper_mnt(ofs));
  25}
  26
  27/* Get write access to upper sb - may block if upper sb is frozen */
  28void ovl_start_write(struct dentry *dentry)
  29{
  30	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  31	sb_start_write(ovl_upper_mnt(ofs)->mnt_sb);
  32}
  33
  34int ovl_want_write(struct dentry *dentry)
  35{
  36	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  37	return mnt_want_write(ovl_upper_mnt(ofs));
  38}
  39
  40void ovl_put_write_access(struct dentry *dentry)
  41{
  42	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  43	mnt_put_write_access(ovl_upper_mnt(ofs));
  44}
  45
  46void ovl_end_write(struct dentry *dentry)
  47{
  48	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  49	sb_end_write(ovl_upper_mnt(ofs)->mnt_sb);
  50}
  51
  52void ovl_drop_write(struct dentry *dentry)
  53{
  54	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  55	mnt_drop_write(ovl_upper_mnt(ofs));
  56}
  57
  58struct dentry *ovl_workdir(struct dentry *dentry)
  59{
  60	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  61	return ofs->workdir;
  62}
  63
  64const struct cred *ovl_override_creds(struct super_block *sb)
  65{
  66	struct ovl_fs *ofs = OVL_FS(sb);
  67
  68	return override_creds(ofs->creator_cred);
 
 
 
 
 
  69}
  70
  71/*
  72 * Check if underlying fs supports file handles and try to determine encoding
  73 * type, in order to deduce maximum inode number used by fs.
  74 *
  75 * Return 0 if file handles are not supported.
  76 * Return 1 (FILEID_INO32_GEN) if fs uses the default 32bit inode encoding.
  77 * Return -1 if fs uses a non default encoding with unknown inode size.
  78 */
  79int ovl_can_decode_fh(struct super_block *sb)
  80{
  81	if (!capable(CAP_DAC_READ_SEARCH))
  82		return 0;
  83
  84	if (!exportfs_can_decode_fh(sb->s_export_op))
  85		return 0;
  86
  87	return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
  88}
  89
  90struct dentry *ovl_indexdir(struct super_block *sb)
  91{
  92	struct ovl_fs *ofs = OVL_FS(sb);
  93
  94	return ofs->config.index ? ofs->workdir : NULL;
  95}
  96
  97/* Index all files on copy up. For now only enabled for NFS export */
  98bool ovl_index_all(struct super_block *sb)
  99{
 100	struct ovl_fs *ofs = OVL_FS(sb);
 101
 102	return ofs->config.nfs_export && ofs->config.index;
 103}
 104
 105/* Verify lower origin on lookup. For now only enabled for NFS export */
 106bool ovl_verify_lower(struct super_block *sb)
 107{
 108	struct ovl_fs *ofs = OVL_FS(sb);
 109
 110	return ofs->config.nfs_export && ofs->config.index;
 111}
 112
 113struct ovl_path *ovl_stack_alloc(unsigned int n)
 114{
 115	return kcalloc(n, sizeof(struct ovl_path), GFP_KERNEL);
 116}
 117
 118void ovl_stack_cpy(struct ovl_path *dst, struct ovl_path *src, unsigned int n)
 119{
 120	unsigned int i;
 121
 122	memcpy(dst, src, sizeof(struct ovl_path) * n);
 123	for (i = 0; i < n; i++)
 124		dget(src[i].dentry);
 125}
 126
 127void ovl_stack_put(struct ovl_path *stack, unsigned int n)
 128{
 129	unsigned int i;
 130
 131	for (i = 0; stack && i < n; i++)
 132		dput(stack[i].dentry);
 133}
 134
 135void ovl_stack_free(struct ovl_path *stack, unsigned int n)
 136{
 137	ovl_stack_put(stack, n);
 138	kfree(stack);
 139}
 140
 141struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
 142{
 143	size_t size = offsetof(struct ovl_entry, __lowerstack[numlower]);
 144	struct ovl_entry *oe = kzalloc(size, GFP_KERNEL);
 145
 146	if (oe)
 147		oe->__numlower = numlower;
 148
 149	return oe;
 150}
 151
 152void ovl_free_entry(struct ovl_entry *oe)
 153{
 154	ovl_stack_put(ovl_lowerstack(oe), ovl_numlower(oe));
 155	kfree(oe);
 156}
 157
 158#define OVL_D_REVALIDATE (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE)
 159
 160bool ovl_dentry_remote(struct dentry *dentry)
 161{
 162	return dentry->d_flags & OVL_D_REVALIDATE;
 163}
 164
 165void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *realdentry)
 166{
 167	if (!ovl_dentry_remote(realdentry))
 168		return;
 169
 170	spin_lock(&dentry->d_lock);
 171	dentry->d_flags |= realdentry->d_flags & OVL_D_REVALIDATE;
 172	spin_unlock(&dentry->d_lock);
 173}
 174
 175void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry,
 176			   struct ovl_entry *oe)
 177{
 178	return ovl_dentry_init_flags(dentry, upperdentry, oe, OVL_D_REVALIDATE);
 179}
 180
 181void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry,
 182			   struct ovl_entry *oe, unsigned int mask)
 183{
 184	struct ovl_path *lowerstack = ovl_lowerstack(oe);
 185	unsigned int i, flags = 0;
 186
 187	if (upperdentry)
 188		flags |= upperdentry->d_flags;
 189	for (i = 0; i < ovl_numlower(oe) && lowerstack[i].dentry; i++)
 190		flags |= lowerstack[i].dentry->d_flags;
 191
 192	spin_lock(&dentry->d_lock);
 193	dentry->d_flags &= ~mask;
 194	dentry->d_flags |= flags & mask;
 195	spin_unlock(&dentry->d_lock);
 196}
 197
 198bool ovl_dentry_weird(struct dentry *dentry)
 199{
 
 
 
 200	return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT |
 201				  DCACHE_MANAGE_TRANSIT |
 202				  DCACHE_OP_HASH |
 203				  DCACHE_OP_COMPARE);
 204}
 205
 206enum ovl_path_type ovl_path_type(struct dentry *dentry)
 207{
 208	struct ovl_entry *oe = OVL_E(dentry);
 209	enum ovl_path_type type = 0;
 210
 211	if (ovl_dentry_upper(dentry)) {
 212		type = __OVL_PATH_UPPER;
 213
 214		/*
 215		 * Non-dir dentry can hold lower dentry of its copy up origin.
 216		 */
 217		if (ovl_numlower(oe)) {
 218			if (ovl_test_flag(OVL_CONST_INO, d_inode(dentry)))
 219				type |= __OVL_PATH_ORIGIN;
 220			if (d_is_dir(dentry) ||
 221			    !ovl_has_upperdata(d_inode(dentry)))
 222				type |= __OVL_PATH_MERGE;
 223		}
 224	} else {
 225		if (ovl_numlower(oe) > 1)
 226			type |= __OVL_PATH_MERGE;
 227	}
 228	return type;
 229}
 230
 231void ovl_path_upper(struct dentry *dentry, struct path *path)
 232{
 233	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 234
 235	path->mnt = ovl_upper_mnt(ofs);
 236	path->dentry = ovl_dentry_upper(dentry);
 237}
 238
 239void ovl_path_lower(struct dentry *dentry, struct path *path)
 240{
 241	struct ovl_entry *oe = OVL_E(dentry);
 242	struct ovl_path *lowerpath = ovl_lowerstack(oe);
 243
 244	if (ovl_numlower(oe)) {
 245		path->mnt = lowerpath->layer->mnt;
 246		path->dentry = lowerpath->dentry;
 247	} else {
 248		*path = (struct path) { };
 249	}
 250}
 251
 252void ovl_path_lowerdata(struct dentry *dentry, struct path *path)
 253{
 254	struct ovl_entry *oe = OVL_E(dentry);
 255	struct ovl_path *lowerdata = ovl_lowerdata(oe);
 256	struct dentry *lowerdata_dentry = ovl_lowerdata_dentry(oe);
 257
 258	if (lowerdata_dentry) {
 259		path->dentry = lowerdata_dentry;
 260		/*
 261		 * Pairs with smp_wmb() in ovl_dentry_set_lowerdata().
 262		 * Make sure that if lowerdata->dentry is visible, then
 263		 * datapath->layer is visible as well.
 264		 */
 265		smp_rmb();
 266		path->mnt = READ_ONCE(lowerdata->layer)->mnt;
 267	} else {
 268		*path = (struct path) { };
 269	}
 270}
 271
 272enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
 273{
 274	enum ovl_path_type type = ovl_path_type(dentry);
 275
 276	if (!OVL_TYPE_UPPER(type))
 277		ovl_path_lower(dentry, path);
 278	else
 279		ovl_path_upper(dentry, path);
 280
 281	return type;
 282}
 283
 284enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path)
 285{
 286	enum ovl_path_type type = ovl_path_type(dentry);
 287
 288	WARN_ON_ONCE(d_is_dir(dentry));
 289
 290	if (!OVL_TYPE_UPPER(type) || OVL_TYPE_MERGE(type))
 291		ovl_path_lowerdata(dentry, path);
 292	else
 293		ovl_path_upper(dentry, path);
 294
 295	return type;
 296}
 297
 298struct dentry *ovl_dentry_upper(struct dentry *dentry)
 299{
 300	return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
 301}
 302
 303struct dentry *ovl_dentry_lower(struct dentry *dentry)
 304{
 305	struct ovl_entry *oe = OVL_E(dentry);
 306
 307	return ovl_numlower(oe) ? ovl_lowerstack(oe)->dentry : NULL;
 308}
 309
 310const struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
 311{
 312	struct ovl_entry *oe = OVL_E(dentry);
 313
 314	return ovl_numlower(oe) ? ovl_lowerstack(oe)->layer : NULL;
 315}
 316
 317/*
 318 * ovl_dentry_lower() could return either a data dentry or metacopy dentry
 319 * depending on what is stored in lowerstack[0]. At times we need to find
 320 * lower dentry which has data (and not metacopy dentry). This helper
 321 * returns the lower data dentry.
 322 */
 323struct dentry *ovl_dentry_lowerdata(struct dentry *dentry)
 324{
 325	return ovl_lowerdata_dentry(OVL_E(dentry));
 326}
 327
 328int ovl_dentry_set_lowerdata(struct dentry *dentry, struct ovl_path *datapath)
 329{
 330	struct ovl_entry *oe = OVL_E(dentry);
 331	struct ovl_path *lowerdata = ovl_lowerdata(oe);
 332	struct dentry *datadentry = datapath->dentry;
 333
 334	if (WARN_ON_ONCE(ovl_numlower(oe) <= 1))
 335		return -EIO;
 336
 337	WRITE_ONCE(lowerdata->layer, datapath->layer);
 338	/*
 339	 * Pairs with smp_rmb() in ovl_path_lowerdata().
 340	 * Make sure that if lowerdata->dentry is visible, then
 341	 * lowerdata->layer is visible as well.
 342	 */
 343	smp_wmb();
 344	WRITE_ONCE(lowerdata->dentry, dget(datadentry));
 345
 346	ovl_dentry_update_reval(dentry, datadentry);
 347
 348	return 0;
 349}
 350
 351struct dentry *ovl_dentry_real(struct dentry *dentry)
 352{
 353	return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
 354}
 355
 356struct dentry *ovl_i_dentry_upper(struct inode *inode)
 357{
 358	return ovl_upperdentry_dereference(OVL_I(inode));
 359}
 360
 361struct inode *ovl_i_path_real(struct inode *inode, struct path *path)
 362{
 363	struct ovl_path *lowerpath = ovl_lowerpath(OVL_I_E(inode));
 364
 365	path->dentry = ovl_i_dentry_upper(inode);
 366	if (!path->dentry) {
 367		path->dentry = lowerpath->dentry;
 368		path->mnt = lowerpath->layer->mnt;
 369	} else {
 370		path->mnt = ovl_upper_mnt(OVL_FS(inode->i_sb));
 371	}
 372
 373	return path->dentry ? d_inode_rcu(path->dentry) : NULL;
 374}
 375
 376struct inode *ovl_inode_upper(struct inode *inode)
 377{
 378	struct dentry *upperdentry = ovl_i_dentry_upper(inode);
 379
 380	return upperdentry ? d_inode(upperdentry) : NULL;
 381}
 382
 383struct inode *ovl_inode_lower(struct inode *inode)
 384{
 385	struct ovl_path *lowerpath = ovl_lowerpath(OVL_I_E(inode));
 386
 387	return lowerpath ? d_inode(lowerpath->dentry) : NULL;
 388}
 389
 390struct inode *ovl_inode_real(struct inode *inode)
 391{
 392	return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
 393}
 394
 395/* Return inode which contains lower data. Do not return metacopy */
 396struct inode *ovl_inode_lowerdata(struct inode *inode)
 397{
 398	struct dentry *lowerdata = ovl_lowerdata_dentry(OVL_I_E(inode));
 399
 400	if (WARN_ON(!S_ISREG(inode->i_mode)))
 401		return NULL;
 402
 403	return lowerdata ? d_inode(lowerdata) : NULL;
 404}
 405
 406/* Return real inode which contains data. Does not return metacopy inode */
 407struct inode *ovl_inode_realdata(struct inode *inode)
 408{
 409	struct inode *upperinode;
 410
 411	upperinode = ovl_inode_upper(inode);
 412	if (upperinode && ovl_has_upperdata(inode))
 413		return upperinode;
 414
 415	return ovl_inode_lowerdata(inode);
 416}
 417
 418const char *ovl_lowerdata_redirect(struct inode *inode)
 419{
 420	return inode && S_ISREG(inode->i_mode) ?
 421		OVL_I(inode)->lowerdata_redirect : NULL;
 422}
 423
 424struct ovl_dir_cache *ovl_dir_cache(struct inode *inode)
 425{
 426	return inode && S_ISDIR(inode->i_mode) ? OVL_I(inode)->cache : NULL;
 427}
 428
 429void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache)
 430{
 431	OVL_I(inode)->cache = cache;
 432}
 433
 434void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry)
 435{
 436	set_bit(flag, OVL_E_FLAGS(dentry));
 437}
 438
 439void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry)
 440{
 441	clear_bit(flag, OVL_E_FLAGS(dentry));
 442}
 443
 444bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry)
 445{
 446	return test_bit(flag, OVL_E_FLAGS(dentry));
 447}
 448
 449bool ovl_dentry_is_opaque(struct dentry *dentry)
 450{
 451	return ovl_dentry_test_flag(OVL_E_OPAQUE, dentry);
 452}
 453
 454bool ovl_dentry_is_whiteout(struct dentry *dentry)
 455{
 456	return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
 457}
 458
 459void ovl_dentry_set_opaque(struct dentry *dentry)
 460{
 461	ovl_dentry_set_flag(OVL_E_OPAQUE, dentry);
 462}
 463
 464bool ovl_dentry_has_xwhiteouts(struct dentry *dentry)
 465{
 466	return ovl_dentry_test_flag(OVL_E_XWHITEOUTS, dentry);
 467}
 468
 469void ovl_dentry_set_xwhiteouts(struct dentry *dentry)
 470{
 471	ovl_dentry_set_flag(OVL_E_XWHITEOUTS, dentry);
 472}
 473
 474/*
 475 * ovl_layer_set_xwhiteouts() is called before adding the overlay dir
 476 * dentry to dcache, while readdir of that same directory happens after
 477 * the overlay dir dentry is in dcache, so if some cpu observes that
 478 * ovl_dentry_is_xwhiteouts(), it will also observe layer->has_xwhiteouts
 479 * for the layers where xwhiteouts marker was found in that merge dir.
 480 */
 481void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs,
 482			      const struct ovl_layer *layer)
 483{
 484	if (layer->has_xwhiteouts)
 485		return;
 486
 487	/* Write once to read-mostly layer properties */
 488	ofs->layers[layer->idx].has_xwhiteouts = true;
 489}
 490
 491/*
 492 * For hard links and decoded file handles, it's possible for ovl_dentry_upper()
 493 * to return positive, while there's no actual upper alias for the inode.
 494 * Copy up code needs to know about the existence of the upper alias, so it
 495 * can't use ovl_dentry_upper().
 496 */
 497bool ovl_dentry_has_upper_alias(struct dentry *dentry)
 498{
 499	return ovl_dentry_test_flag(OVL_E_UPPER_ALIAS, dentry);
 500}
 501
 502void ovl_dentry_set_upper_alias(struct dentry *dentry)
 503{
 504	ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry);
 505}
 506
 507static bool ovl_should_check_upperdata(struct inode *inode)
 508{
 509	if (!S_ISREG(inode->i_mode))
 510		return false;
 511
 512	if (!ovl_inode_lower(inode))
 513		return false;
 514
 515	return true;
 516}
 517
 518bool ovl_has_upperdata(struct inode *inode)
 519{
 520	if (!ovl_should_check_upperdata(inode))
 521		return true;
 522
 523	if (!ovl_test_flag(OVL_UPPERDATA, inode))
 524		return false;
 525	/*
 526	 * Pairs with smp_wmb() in ovl_set_upperdata(). Main user of
 527	 * ovl_has_upperdata() is ovl_copy_up_meta_inode_data(). Make sure
 528	 * if setting of OVL_UPPERDATA is visible, then effects of writes
 529	 * before that are visible too.
 530	 */
 531	smp_rmb();
 532	return true;
 533}
 534
 535void ovl_set_upperdata(struct inode *inode)
 536{
 537	/*
 538	 * Pairs with smp_rmb() in ovl_has_upperdata(). Make sure
 539	 * if OVL_UPPERDATA flag is visible, then effects of write operations
 540	 * before it are visible as well.
 541	 */
 542	smp_wmb();
 543	ovl_set_flag(OVL_UPPERDATA, inode);
 544}
 545
 546/* Caller should hold ovl_inode->lock */
 547bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags)
 548{
 549	if (!ovl_open_flags_need_copy_up(flags))
 550		return false;
 551
 552	return !ovl_test_flag(OVL_UPPERDATA, d_inode(dentry));
 553}
 554
 555bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags)
 556{
 557	if (!ovl_open_flags_need_copy_up(flags))
 558		return false;
 559
 560	return !ovl_has_upperdata(d_inode(dentry));
 561}
 562
 563const char *ovl_dentry_get_redirect(struct dentry *dentry)
 564{
 565	return OVL_I(d_inode(dentry))->redirect;
 566}
 567
 568void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
 569{
 570	struct ovl_inode *oi = OVL_I(d_inode(dentry));
 571
 572	kfree(oi->redirect);
 573	oi->redirect = redirect;
 574}
 575
 576void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
 577{
 578	struct inode *upperinode = d_inode(upperdentry);
 579
 580	WARN_ON(OVL_I(inode)->__upperdentry);
 581
 582	/*
 583	 * Make sure upperdentry is consistent before making it visible
 584	 */
 585	smp_wmb();
 586	OVL_I(inode)->__upperdentry = upperdentry;
 587	if (inode_unhashed(inode)) {
 588		inode->i_private = upperinode;
 589		__insert_inode_hash(inode, (unsigned long) upperinode);
 590	}
 591}
 592
 593static void ovl_dir_version_inc(struct dentry *dentry, bool impurity)
 594{
 595	struct inode *inode = d_inode(dentry);
 596
 597	WARN_ON(!inode_is_locked(inode));
 598	WARN_ON(!d_is_dir(dentry));
 599	/*
 600	 * Version is used by readdir code to keep cache consistent.
 601	 * For merge dirs (or dirs with origin) all changes need to be noted.
 602	 * For non-merge dirs, cache contains only impure entries (i.e. ones
 603	 * which have been copied up and have origins), so only need to note
 604	 * changes to impure entries.
 605	 */
 606	if (!ovl_dir_is_real(inode) || impurity)
 607		OVL_I(inode)->version++;
 608}
 609
 610void ovl_dir_modified(struct dentry *dentry, bool impurity)
 611{
 612	/* Copy mtime/ctime */
 613	ovl_copyattr(d_inode(dentry));
 614
 615	ovl_dir_version_inc(dentry, impurity);
 616}
 617
 618u64 ovl_inode_version_get(struct inode *inode)
 619{
 620	WARN_ON(!inode_is_locked(inode));
 621	return OVL_I(inode)->version;
 622}
 623
 624bool ovl_is_whiteout(struct dentry *dentry)
 625{
 626	struct inode *inode = dentry->d_inode;
 627
 628	return inode && IS_WHITEOUT(inode);
 629}
 630
 631/*
 632 * Use this over ovl_is_whiteout for upper and lower files, as it also
 633 * handles overlay.whiteout xattr whiteout files.
 634 */
 635bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path)
 636{
 637	return ovl_is_whiteout(path->dentry) ||
 638		ovl_path_check_xwhiteout_xattr(ofs, path);
 639}
 640
 641struct file *ovl_path_open(const struct path *path, int flags)
 642{
 643	struct inode *inode = d_inode(path->dentry);
 644	struct mnt_idmap *real_idmap = mnt_idmap(path->mnt);
 645	int err, acc_mode;
 646
 647	if (flags & ~(O_ACCMODE | O_LARGEFILE))
 648		BUG();
 649
 650	switch (flags & O_ACCMODE) {
 651	case O_RDONLY:
 652		acc_mode = MAY_READ;
 653		break;
 654	case O_WRONLY:
 655		acc_mode = MAY_WRITE;
 656		break;
 657	default:
 658		BUG();
 659	}
 660
 661	err = inode_permission(real_idmap, inode, acc_mode | MAY_OPEN);
 662	if (err)
 663		return ERR_PTR(err);
 664
 665	/* O_NOATIME is an optimization, don't fail if not permitted */
 666	if (inode_owner_or_capable(real_idmap, inode))
 667		flags |= O_NOATIME;
 668
 669	return dentry_open(path, flags, current_cred());
 670}
 671
 672/* Caller should hold ovl_inode->lock */
 673static bool ovl_already_copied_up_locked(struct dentry *dentry, int flags)
 674{
 675	bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
 676
 677	if (ovl_dentry_upper(dentry) &&
 678	    (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
 679	    !ovl_dentry_needs_data_copy_up_locked(dentry, flags))
 680		return true;
 681
 682	return false;
 683}
 684
 685bool ovl_already_copied_up(struct dentry *dentry, int flags)
 686{
 687	bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
 688
 689	/*
 690	 * Check if copy-up has happened as well as for upper alias (in
 691	 * case of hard links) is there.
 692	 *
 693	 * Both checks are lockless:
 694	 *  - false negatives: will recheck under oi->lock
 695	 *  - false positives:
 696	 *    + ovl_dentry_upper() uses memory barriers to ensure the
 697	 *      upper dentry is up-to-date
 698	 *    + ovl_dentry_has_upper_alias() relies on locking of
 699	 *      upper parent i_rwsem to prevent reordering copy-up
 700	 *      with rename.
 701	 */
 702	if (ovl_dentry_upper(dentry) &&
 703	    (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
 704	    !ovl_dentry_needs_data_copy_up(dentry, flags))
 705		return true;
 706
 707	return false;
 708}
 709
 710/*
 711 * The copy up "transaction" keeps an elevated mnt write count on upper mnt,
 712 * but leaves taking freeze protection on upper sb to lower level helpers.
 713 */
 714int ovl_copy_up_start(struct dentry *dentry, int flags)
 715{
 716	struct inode *inode = d_inode(dentry);
 717	int err;
 718
 719	err = ovl_inode_lock_interruptible(inode);
 720	if (err)
 721		return err;
 722
 723	if (ovl_already_copied_up_locked(dentry, flags))
 724		err = 1; /* Already copied up */
 725	else
 726		err = ovl_get_write_access(dentry);
 727	if (err)
 728		goto out_unlock;
 729
 730	return 0;
 731
 732out_unlock:
 733	ovl_inode_unlock(inode);
 734	return err;
 735}
 736
 737void ovl_copy_up_end(struct dentry *dentry)
 738{
 739	ovl_put_write_access(dentry);
 740	ovl_inode_unlock(d_inode(dentry));
 741}
 742
 743bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path)
 744{
 745	int res;
 746
 747	res = ovl_path_getxattr(ofs, path, OVL_XATTR_ORIGIN, NULL, 0);
 748
 749	/* Zero size value means "copied up but origin unknown" */
 750	if (res >= 0)
 751		return true;
 752
 753	return false;
 754}
 755
 756bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path)
 757{
 758	struct dentry *dentry = path->dentry;
 759	int res;
 760
 761	/* xattr.whiteout must be a zero size regular file */
 762	if (!d_is_reg(dentry) || i_size_read(d_inode(dentry)) != 0)
 763		return false;
 764
 765	res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUT, NULL, 0);
 766	return res >= 0;
 767}
 768
 769/*
 770 * Load persistent uuid from xattr into s_uuid if found, or store a new
 771 * random generated value in s_uuid and in xattr.
 772 */
 773bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
 774			 const struct path *upperpath)
 775{
 776	bool set = false;
 777	uuid_t uuid;
 778	int res;
 779
 780	/* Try to load existing persistent uuid */
 781	res = ovl_path_getxattr(ofs, upperpath, OVL_XATTR_UUID, uuid.b,
 782				UUID_SIZE);
 783	if (res == UUID_SIZE)
 784		goto set_uuid;
 785
 786	if (res != -ENODATA)
 787		goto fail;
 788
 789	/*
 790	 * With uuid=auto, if uuid xattr is found, it will be used.
 791	 * If uuid xattrs is not found, generate a persistent uuid only on mount
 792	 * of new overlays where upper root dir is not yet marked as impure.
 793	 * An upper dir is marked as impure on copy up or lookup of its subdirs.
 794	 */
 795	if (ofs->config.uuid == OVL_UUID_AUTO) {
 796		res = ovl_path_getxattr(ofs, upperpath, OVL_XATTR_IMPURE, NULL,
 797					0);
 798		if (res > 0) {
 799			/* Any mount of old overlay - downgrade to uuid=null */
 800			ofs->config.uuid = OVL_UUID_NULL;
 801			return true;
 802		} else if (res == -ENODATA) {
 803			/* First mount of new overlay - upgrade to uuid=on */
 804			ofs->config.uuid = OVL_UUID_ON;
 805		} else if (res < 0) {
 806			goto fail;
 807		}
 808
 809	}
 810
 811	/* Generate overlay instance uuid */
 812	uuid_gen(&uuid);
 813
 814	/* Try to store persistent uuid */
 815	set = true;
 816	res = ovl_setxattr(ofs, upperpath->dentry, OVL_XATTR_UUID, uuid.b,
 817			   UUID_SIZE);
 818	if (res)
 819		goto fail;
 820
 821set_uuid:
 822	super_set_uuid(sb, uuid.b, sizeof(uuid));
 823	return true;
 824
 825fail:
 826	ofs->config.uuid = OVL_UUID_NULL;
 827	pr_warn("failed to %s uuid (%pd2, err=%i); falling back to uuid=null.\n",
 828		set ? "set" : "get", upperpath->dentry, res);
 829	return false;
 830}
 831
 832char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path,
 833			   enum ovl_xattr ox)
 834{
 835	int res;
 836	char val;
 837
 838	if (!d_is_dir(path->dentry))
 839		return 0;
 840
 841	res = ovl_path_getxattr(ofs, path, ox, &val, 1);
 842	return res == 1 ? val : 0;
 843}
 844
 845#define OVL_XATTR_OPAQUE_POSTFIX	"opaque"
 846#define OVL_XATTR_REDIRECT_POSTFIX	"redirect"
 847#define OVL_XATTR_ORIGIN_POSTFIX	"origin"
 848#define OVL_XATTR_IMPURE_POSTFIX	"impure"
 849#define OVL_XATTR_NLINK_POSTFIX		"nlink"
 850#define OVL_XATTR_UPPER_POSTFIX		"upper"
 851#define OVL_XATTR_UUID_POSTFIX		"uuid"
 852#define OVL_XATTR_METACOPY_POSTFIX	"metacopy"
 853#define OVL_XATTR_PROTATTR_POSTFIX	"protattr"
 854#define OVL_XATTR_XWHITEOUT_POSTFIX	"whiteout"
 855
 856#define OVL_XATTR_TAB_ENTRY(x) \
 857	[x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
 858		[true] = OVL_XATTR_USER_PREFIX x ## _POSTFIX }
 859
 860const char *const ovl_xattr_table[][2] = {
 861	OVL_XATTR_TAB_ENTRY(OVL_XATTR_OPAQUE),
 862	OVL_XATTR_TAB_ENTRY(OVL_XATTR_REDIRECT),
 863	OVL_XATTR_TAB_ENTRY(OVL_XATTR_ORIGIN),
 864	OVL_XATTR_TAB_ENTRY(OVL_XATTR_IMPURE),
 865	OVL_XATTR_TAB_ENTRY(OVL_XATTR_NLINK),
 866	OVL_XATTR_TAB_ENTRY(OVL_XATTR_UPPER),
 867	OVL_XATTR_TAB_ENTRY(OVL_XATTR_UUID),
 868	OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
 869	OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR),
 870	OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT),
 871};
 872
 873int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
 874		       enum ovl_xattr ox, const void *value, size_t size,
 875		       int xerr)
 876{
 877	int err;
 878
 879	if (ofs->noxattr)
 880		return xerr;
 881
 882	err = ovl_setxattr(ofs, upperdentry, ox, value, size);
 883
 884	if (err == -EOPNOTSUPP) {
 885		pr_warn("cannot set %s xattr on upper\n", ovl_xattr(ofs, ox));
 886		ofs->noxattr = true;
 887		return xerr;
 888	}
 889
 890	return err;
 891}
 892
 893int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
 894{
 895	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 896	int err;
 897
 898	if (ovl_test_flag(OVL_IMPURE, d_inode(dentry)))
 899		return 0;
 900
 901	/*
 902	 * Do not fail when upper doesn't support xattrs.
 903	 * Upper inodes won't have origin nor redirect xattr anyway.
 904	 */
 905	err = ovl_check_setxattr(ofs, upperdentry, OVL_XATTR_IMPURE, "y", 1, 0);
 906	if (!err)
 907		ovl_set_flag(OVL_IMPURE, d_inode(dentry));
 908
 909	return err;
 910}
 911
 912
 913#define OVL_PROTATTR_MAX 32 /* Reserved for future flags */
 914
 915void ovl_check_protattr(struct inode *inode, struct dentry *upper)
 916{
 917	struct ovl_fs *ofs = OVL_FS(inode->i_sb);
 918	u32 iflags = inode->i_flags & OVL_PROT_I_FLAGS_MASK;
 919	char buf[OVL_PROTATTR_MAX+1];
 920	int res, n;
 921
 922	res = ovl_getxattr_upper(ofs, upper, OVL_XATTR_PROTATTR, buf,
 923				 OVL_PROTATTR_MAX);
 924	if (res < 0)
 925		return;
 926
 927	/*
 928	 * Initialize inode flags from overlay.protattr xattr and upper inode
 929	 * flags.  If upper inode has those fileattr flags set (i.e. from old
 930	 * kernel), we do not clear them on ovl_get_inode(), but we will clear
 931	 * them on next fileattr_set().
 932	 */
 933	for (n = 0; n < res; n++) {
 934		if (buf[n] == 'a')
 935			iflags |= S_APPEND;
 936		else if (buf[n] == 'i')
 937			iflags |= S_IMMUTABLE;
 938		else
 939			break;
 940	}
 941
 942	if (!res || n < res) {
 943		pr_warn_ratelimited("incompatible overlay.protattr format (%pd2, len=%d)\n",
 944				    upper, res);
 945	} else {
 946		inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK);
 947	}
 948}
 949
 950int ovl_set_protattr(struct inode *inode, struct dentry *upper,
 951		      struct fileattr *fa)
 952{
 953	struct ovl_fs *ofs = OVL_FS(inode->i_sb);
 954	char buf[OVL_PROTATTR_MAX];
 955	int len = 0, err = 0;
 956	u32 iflags = 0;
 957
 958	BUILD_BUG_ON(HWEIGHT32(OVL_PROT_FS_FLAGS_MASK) > OVL_PROTATTR_MAX);
 959
 960	if (fa->flags & FS_APPEND_FL) {
 961		buf[len++] = 'a';
 962		iflags |= S_APPEND;
 963	}
 964	if (fa->flags & FS_IMMUTABLE_FL) {
 965		buf[len++] = 'i';
 966		iflags |= S_IMMUTABLE;
 967	}
 968
 969	/*
 970	 * Do not allow to set protection flags when upper doesn't support
 971	 * xattrs, because we do not set those fileattr flags on upper inode.
 972	 * Remove xattr if it exist and all protection flags are cleared.
 973	 */
 974	if (len) {
 975		err = ovl_check_setxattr(ofs, upper, OVL_XATTR_PROTATTR,
 976					 buf, len, -EPERM);
 977	} else if (inode->i_flags & OVL_PROT_I_FLAGS_MASK) {
 978		err = ovl_removexattr(ofs, upper, OVL_XATTR_PROTATTR);
 979		if (err == -EOPNOTSUPP || err == -ENODATA)
 980			err = 0;
 981	}
 982	if (err)
 983		return err;
 984
 985	inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK);
 986
 987	/* Mask out the fileattr flags that should not be set in upper inode */
 988	fa->flags &= ~OVL_PROT_FS_FLAGS_MASK;
 989	fa->fsx_xflags &= ~OVL_PROT_FSX_FLAGS_MASK;
 990
 991	return 0;
 992}
 993
 994/*
 995 * Caller must hold a reference to inode to prevent it from being freed while
 996 * it is marked inuse.
 997 */
 998bool ovl_inuse_trylock(struct dentry *dentry)
 999{
1000	struct inode *inode = d_inode(dentry);
1001	bool locked = false;
1002
1003	spin_lock(&inode->i_lock);
1004	if (!(inode->i_state & I_OVL_INUSE)) {
1005		inode->i_state |= I_OVL_INUSE;
1006		locked = true;
1007	}
1008	spin_unlock(&inode->i_lock);
1009
1010	return locked;
1011}
1012
1013void ovl_inuse_unlock(struct dentry *dentry)
1014{
1015	if (dentry) {
1016		struct inode *inode = d_inode(dentry);
1017
1018		spin_lock(&inode->i_lock);
1019		WARN_ON(!(inode->i_state & I_OVL_INUSE));
1020		inode->i_state &= ~I_OVL_INUSE;
1021		spin_unlock(&inode->i_lock);
1022	}
1023}
1024
1025bool ovl_is_inuse(struct dentry *dentry)
1026{
1027	struct inode *inode = d_inode(dentry);
1028	bool inuse;
1029
1030	spin_lock(&inode->i_lock);
1031	inuse = (inode->i_state & I_OVL_INUSE);
1032	spin_unlock(&inode->i_lock);
1033
1034	return inuse;
1035}
1036
1037/*
1038 * Does this overlay dentry need to be indexed on copy up?
1039 */
1040bool ovl_need_index(struct dentry *dentry)
1041{
1042	struct dentry *lower = ovl_dentry_lower(dentry);
1043
1044	if (!lower || !ovl_indexdir(dentry->d_sb))
1045		return false;
1046
1047	/* Index all files for NFS export and consistency verification */
1048	if (ovl_index_all(dentry->d_sb))
1049		return true;
1050
1051	/* Index only lower hardlinks on copy up */
1052	if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
1053		return true;
1054
1055	return false;
1056}
1057
1058/* Caller must hold OVL_I(inode)->lock */
1059static void ovl_cleanup_index(struct dentry *dentry)
1060{
1061	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
1062	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
1063	struct inode *dir = indexdir->d_inode;
1064	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
1065	struct dentry *upperdentry = ovl_dentry_upper(dentry);
1066	struct dentry *index = NULL;
1067	struct inode *inode;
1068	struct qstr name = { };
1069	bool got_write = false;
1070	int err;
1071
1072	err = ovl_get_index_name(ofs, lowerdentry, &name);
1073	if (err)
1074		goto fail;
1075
1076	err = ovl_want_write(dentry);
1077	if (err)
1078		goto fail;
1079
1080	got_write = true;
1081	inode = d_inode(upperdentry);
1082	if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
1083		pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
1084				    upperdentry, inode->i_ino, inode->i_nlink);
1085		/*
1086		 * We either have a bug with persistent union nlink or a lower
1087		 * hardlink was added while overlay is mounted. Adding a lower
1088		 * hardlink and then unlinking all overlay hardlinks would drop
1089		 * overlay nlink to zero before all upper inodes are unlinked.
1090		 * As a safety measure, when that situation is detected, set
1091		 * the overlay nlink to the index inode nlink minus one for the
1092		 * index entry itself.
1093		 */
1094		set_nlink(d_inode(dentry), inode->i_nlink - 1);
1095		ovl_set_nlink_upper(dentry);
1096		goto out;
1097	}
1098
1099	inode_lock_nested(dir, I_MUTEX_PARENT);
1100	index = ovl_lookup_upper(ofs, name.name, indexdir, name.len);
1101	err = PTR_ERR(index);
1102	if (IS_ERR(index)) {
1103		index = NULL;
1104	} else if (ovl_index_all(dentry->d_sb)) {
1105		/* Whiteout orphan index to block future open by handle */
1106		err = ovl_cleanup_and_whiteout(OVL_FS(dentry->d_sb),
1107					       dir, index);
1108	} else {
1109		/* Cleanup orphan index entries */
1110		err = ovl_cleanup(ofs, dir, index);
1111	}
1112
1113	inode_unlock(dir);
1114	if (err)
1115		goto fail;
1116
1117out:
1118	if (got_write)
1119		ovl_drop_write(dentry);
1120	kfree(name.name);
1121	dput(index);
1122	return;
1123
1124fail:
1125	pr_err("cleanup index of '%pd2' failed (%i)\n", dentry, err);
1126	goto out;
1127}
1128
1129/*
1130 * Operations that change overlay inode and upper inode nlink need to be
1131 * synchronized with copy up for persistent nlink accounting.
1132 */
1133int ovl_nlink_start(struct dentry *dentry)
1134{
1135	struct inode *inode = d_inode(dentry);
1136	const struct cred *old_cred;
1137	int err;
1138
1139	if (WARN_ON(!inode))
1140		return -ENOENT;
1141
1142	/*
1143	 * With inodes index is enabled, we store the union overlay nlink
1144	 * in an xattr on the index inode. When whiting out an indexed lower,
1145	 * we need to decrement the overlay persistent nlink, but before the
1146	 * first copy up, we have no upper index inode to store the xattr.
1147	 *
1148	 * As a workaround, before whiteout/rename over an indexed lower,
1149	 * copy up to create the upper index. Creating the upper index will
1150	 * initialize the overlay nlink, so it could be dropped if unlink
1151	 * or rename succeeds.
1152	 *
1153	 * TODO: implement metadata only index copy up when called with
1154	 *       ovl_copy_up_flags(dentry, O_PATH).
1155	 */
1156	if (ovl_need_index(dentry) && !ovl_dentry_has_upper_alias(dentry)) {
1157		err = ovl_copy_up(dentry);
1158		if (err)
1159			return err;
1160	}
1161
1162	err = ovl_inode_lock_interruptible(inode);
1163	if (err)
1164		return err;
1165
1166	err = ovl_want_write(dentry);
1167	if (err)
1168		goto out_unlock;
1169
1170	if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, inode))
1171		return 0;
1172
1173	old_cred = ovl_override_creds(dentry->d_sb);
1174	/*
1175	 * The overlay inode nlink should be incremented/decremented IFF the
1176	 * upper operation succeeds, along with nlink change of upper inode.
1177	 * Therefore, before link/unlink/rename, we store the union nlink
1178	 * value relative to the upper inode nlink in an upper inode xattr.
1179	 */
1180	err = ovl_set_nlink_upper(dentry);
1181	revert_creds(old_cred);
1182	if (err)
1183		goto out_drop_write;
1184
1185	return 0;
1186
1187out_drop_write:
1188	ovl_drop_write(dentry);
1189out_unlock:
1190	ovl_inode_unlock(inode);
1191
1192	return err;
1193}
1194
1195void ovl_nlink_end(struct dentry *dentry)
1196{
1197	struct inode *inode = d_inode(dentry);
1198
1199	ovl_drop_write(dentry);
1200
1201	if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) {
1202		const struct cred *old_cred;
1203
1204		old_cred = ovl_override_creds(dentry->d_sb);
1205		ovl_cleanup_index(dentry);
1206		revert_creds(old_cred);
1207	}
1208
1209	ovl_inode_unlock(inode);
1210}
1211
1212int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
1213{
1214	struct dentry *trap;
1215
1216	/* Workdir should not be the same as upperdir */
1217	if (workdir == upperdir)
1218		goto err;
1219
1220	/* Workdir should not be subdir of upperdir and vice versa */
1221	trap = lock_rename(workdir, upperdir);
1222	if (IS_ERR(trap))
1223		goto err;
1224	if (trap)
1225		goto err_unlock;
1226
1227	return 0;
1228
1229err_unlock:
1230	unlock_rename(workdir, upperdir);
1231err:
1232	pr_err("failed to lock workdir+upperdir\n");
1233	return -EIO;
1234}
1235
1236/*
1237 * err < 0, 0 if no metacopy xattr, metacopy data size if xattr found.
1238 * an empty xattr returns OVL_METACOPY_MIN_SIZE to distinguish from no xattr value.
1239 */
1240int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path,
1241			     struct ovl_metacopy *data)
1242{
1243	int res;
1244
1245	/* Only regular files can have metacopy xattr */
1246	if (!S_ISREG(d_inode(path->dentry)->i_mode))
1247		return 0;
1248
1249	res = ovl_path_getxattr(ofs, path, OVL_XATTR_METACOPY,
1250				data, data ? OVL_METACOPY_MAX_SIZE : 0);
1251	if (res < 0) {
1252		if (res == -ENODATA || res == -EOPNOTSUPP)
1253			return 0;
1254		/*
1255		 * getxattr on user.* may fail with EACCES in case there's no
1256		 * read permission on the inode.  Not much we can do, other than
1257		 * tell the caller that this is not a metacopy inode.
1258		 */
1259		if (ofs->config.userxattr && res == -EACCES)
1260			return 0;
1261		goto out;
1262	}
1263
1264	if (res == 0) {
1265		/* Emulate empty data for zero size metacopy xattr */
1266		res = OVL_METACOPY_MIN_SIZE;
1267		if (data) {
1268			memset(data, 0, res);
1269			data->len = res;
1270		}
1271	} else if (res < OVL_METACOPY_MIN_SIZE) {
1272		pr_warn_ratelimited("metacopy file '%pd' has too small xattr\n",
1273				    path->dentry);
1274		return -EIO;
1275	} else if (data) {
1276		if (data->version != 0) {
1277			pr_warn_ratelimited("metacopy file '%pd' has unsupported version\n",
1278					    path->dentry);
1279			return -EIO;
1280		}
1281		if (res != data->len) {
1282			pr_warn_ratelimited("metacopy file '%pd' has invalid xattr size\n",
1283					    path->dentry);
1284			return -EIO;
1285		}
1286	}
1287
1288	return res;
1289out:
1290	pr_warn_ratelimited("failed to get metacopy (%i)\n", res);
1291	return res;
1292}
1293
1294int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d, struct ovl_metacopy *metacopy)
1295{
1296	size_t len = metacopy->len;
1297
1298	/* If no flags or digest fall back to empty metacopy file */
1299	if (metacopy->version == 0 && metacopy->flags == 0 && metacopy->digest_algo == 0)
1300		len = 0;
1301
1302	return ovl_check_setxattr(ofs, d, OVL_XATTR_METACOPY,
1303				  metacopy, len, -EOPNOTSUPP);
1304}
1305
1306bool ovl_is_metacopy_dentry(struct dentry *dentry)
1307{
1308	struct ovl_entry *oe = OVL_E(dentry);
1309
1310	if (!d_is_reg(dentry))
1311		return false;
1312
1313	if (ovl_dentry_upper(dentry)) {
1314		if (!ovl_has_upperdata(d_inode(dentry)))
1315			return true;
1316		return false;
1317	}
1318
1319	return (ovl_numlower(oe) > 1);
1320}
1321
1322char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding)
1323{
1324	int res;
1325	char *s, *next, *buf = NULL;
1326
1327	res = ovl_path_getxattr(ofs, path, OVL_XATTR_REDIRECT, NULL, 0);
1328	if (res == -ENODATA || res == -EOPNOTSUPP)
1329		return NULL;
1330	if (res < 0)
1331		goto fail;
1332	if (res == 0)
1333		goto invalid;
1334
1335	buf = kzalloc(res + padding + 1, GFP_KERNEL);
1336	if (!buf)
1337		return ERR_PTR(-ENOMEM);
1338
1339	res = ovl_path_getxattr(ofs, path, OVL_XATTR_REDIRECT, buf, res);
1340	if (res < 0)
1341		goto fail;
1342	if (res == 0)
1343		goto invalid;
1344
1345	if (buf[0] == '/') {
1346		for (s = buf; *s++ == '/'; s = next) {
1347			next = strchrnul(s, '/');
1348			if (s == next)
1349				goto invalid;
1350		}
1351	} else {
1352		if (strchr(buf, '/') != NULL)
1353			goto invalid;
1354	}
1355
1356	return buf;
1357invalid:
1358	pr_warn_ratelimited("invalid redirect (%s)\n", buf);
1359	res = -EINVAL;
1360	goto err_free;
1361fail:
1362	pr_warn_ratelimited("failed to get redirect (%i)\n", res);
1363err_free:
1364	kfree(buf);
1365	return ERR_PTR(res);
1366}
1367
1368/* Call with mounter creds as it may open the file */
1369int ovl_ensure_verity_loaded(struct path *datapath)
1370{
1371	struct inode *inode = d_inode(datapath->dentry);
1372	struct file *filp;
1373
1374	if (!fsverity_active(inode) && IS_VERITY(inode)) {
1375		/*
1376		 * If this inode was not yet opened, the verity info hasn't been
1377		 * loaded yet, so we need to do that here to force it into memory.
1378		 */
1379		filp = kernel_file_open(datapath, O_RDONLY, inode, current_cred());
1380		if (IS_ERR(filp))
1381			return PTR_ERR(filp);
1382		fput(filp);
1383	}
1384
1385	return 0;
1386}
1387
1388int ovl_validate_verity(struct ovl_fs *ofs,
1389			struct path *metapath,
1390			struct path *datapath)
1391{
1392	struct ovl_metacopy metacopy_data;
1393	u8 actual_digest[FS_VERITY_MAX_DIGEST_SIZE];
1394	int xattr_digest_size, digest_size;
1395	int xattr_size, err;
1396	u8 verity_algo;
1397
1398	if (!ofs->config.verity_mode ||
1399	    /* Verity only works on regular files */
1400	    !S_ISREG(d_inode(metapath->dentry)->i_mode))
1401		return 0;
1402
1403	xattr_size = ovl_check_metacopy_xattr(ofs, metapath, &metacopy_data);
1404	if (xattr_size < 0)
1405		return xattr_size;
1406
1407	if (!xattr_size || !metacopy_data.digest_algo) {
1408		if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) {
1409			pr_warn_ratelimited("metacopy file '%pd' has no digest specified\n",
1410					    metapath->dentry);
1411			return -EIO;
1412		}
1413		return 0;
1414	}
1415
1416	xattr_digest_size = ovl_metadata_digest_size(&metacopy_data);
1417
1418	err = ovl_ensure_verity_loaded(datapath);
1419	if (err < 0) {
1420		pr_warn_ratelimited("lower file '%pd' failed to load fs-verity info\n",
1421				    datapath->dentry);
1422		return -EIO;
1423	}
1424
1425	digest_size = fsverity_get_digest(d_inode(datapath->dentry), actual_digest,
1426					  &verity_algo, NULL);
1427	if (digest_size == 0) {
1428		pr_warn_ratelimited("lower file '%pd' has no fs-verity digest\n", datapath->dentry);
1429		return -EIO;
1430	}
1431
1432	if (xattr_digest_size != digest_size ||
1433	    metacopy_data.digest_algo != verity_algo ||
1434	    memcmp(metacopy_data.digest, actual_digest, xattr_digest_size) != 0) {
1435		pr_warn_ratelimited("lower file '%pd' has the wrong fs-verity digest\n",
1436				    datapath->dentry);
1437		return -EIO;
1438	}
1439
1440	return 0;
1441}
1442
1443int ovl_get_verity_digest(struct ovl_fs *ofs, struct path *src,
1444			  struct ovl_metacopy *metacopy)
1445{
1446	int err, digest_size;
1447
1448	if (!ofs->config.verity_mode || !S_ISREG(d_inode(src->dentry)->i_mode))
1449		return 0;
1450
1451	err = ovl_ensure_verity_loaded(src);
1452	if (err < 0) {
1453		pr_warn_ratelimited("lower file '%pd' failed to load fs-verity info\n",
1454				    src->dentry);
1455		return -EIO;
1456	}
1457
1458	digest_size = fsverity_get_digest(d_inode(src->dentry),
1459					  metacopy->digest, &metacopy->digest_algo, NULL);
1460	if (digest_size == 0 ||
1461	    WARN_ON_ONCE(digest_size > FS_VERITY_MAX_DIGEST_SIZE)) {
1462		if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) {
1463			pr_warn_ratelimited("lower file '%pd' has no fs-verity digest\n",
1464					    src->dentry);
1465			return -EIO;
1466		}
1467		return 0;
1468	}
1469
1470	metacopy->len += digest_size;
1471	return 0;
1472}
1473
1474/*
1475 * ovl_sync_status() - Check fs sync status for volatile mounts
1476 *
1477 * Returns 1 if this is not a volatile mount and a real sync is required.
1478 *
1479 * Returns 0 if syncing can be skipped because mount is volatile, and no errors
1480 * have occurred on the upperdir since the mount.
1481 *
1482 * Returns -errno if it is a volatile mount, and the error that occurred since
1483 * the last mount. If the error code changes, it'll return the latest error
1484 * code.
1485 */
1486
1487int ovl_sync_status(struct ovl_fs *ofs)
1488{
1489	struct vfsmount *mnt;
1490
1491	if (ovl_should_sync(ofs))
1492		return 1;
1493
1494	mnt = ovl_upper_mnt(ofs);
1495	if (!mnt)
1496		return 0;
1497
1498	return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq);
1499}
1500
1501/*
1502 * ovl_copyattr() - copy inode attributes from layer to ovl inode
1503 *
1504 * When overlay copies inode information from an upper or lower layer to the
1505 * relevant overlay inode it will apply the idmapping of the upper or lower
1506 * layer when doing so ensuring that the ovl inode ownership will correctly
1507 * reflect the ownership of the idmapped upper or lower layer. For example, an
1508 * idmapped upper or lower layer mapping id 1001 to id 1000 will take care to
1509 * map any lower or upper inode owned by id 1001 to id 1000. These mapping
1510 * helpers are nops when the relevant layer isn't idmapped.
1511 */
1512void ovl_copyattr(struct inode *inode)
1513{
1514	struct path realpath;
1515	struct inode *realinode;
1516	struct mnt_idmap *real_idmap;
1517	vfsuid_t vfsuid;
1518	vfsgid_t vfsgid;
1519
1520	realinode = ovl_i_path_real(inode, &realpath);
1521	real_idmap = mnt_idmap(realpath.mnt);
1522
1523	spin_lock(&inode->i_lock);
1524	vfsuid = i_uid_into_vfsuid(real_idmap, realinode);
1525	vfsgid = i_gid_into_vfsgid(real_idmap, realinode);
1526
1527	inode->i_uid = vfsuid_into_kuid(vfsuid);
1528	inode->i_gid = vfsgid_into_kgid(vfsgid);
1529	inode->i_mode = realinode->i_mode;
1530	inode_set_atime_to_ts(inode, inode_get_atime(realinode));
1531	inode_set_mtime_to_ts(inode, inode_get_mtime(realinode));
1532	inode_set_ctime_to_ts(inode, inode_get_ctime(realinode));
1533	i_size_write(inode, i_size_read(realinode));
1534	spin_unlock(&inode->i_lock);
1535}