file.c - fs/smb/client/file.c - Linux source code v5.4

Note: File does not exist in v5.4.
   1// SPDX-License-Identifier: LGPL-2.1
   2/*
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 */
  11#include <linux/fs.h>
  12#include <linux/filelock.h>
  13#include <linux/backing-dev.h>
  14#include <linux/stat.h>
  15#include <linux/fcntl.h>
  16#include <linux/pagemap.h>
  17#include <linux/pagevec.h>
  18#include <linux/writeback.h>
  19#include <linux/task_io_accounting_ops.h>
  20#include <linux/delay.h>
  21#include <linux/mount.h>
  22#include <linux/slab.h>
  23#include <linux/swap.h>
  24#include <linux/mm.h>
  25#include <asm/div64.h>
  26#include "cifsfs.h"
  27#include "cifspdu.h"
  28#include "cifsglob.h"
  29#include "cifsproto.h"
  30#include "smb2proto.h"
  31#include "cifs_unicode.h"
  32#include "cifs_debug.h"
  33#include "cifs_fs_sb.h"
  34#include "fscache.h"
  35#include "smbdirect.h"
  36#include "fs_context.h"
  37#include "cifs_ioctl.h"
  38#include "cached_dir.h"
  39
  40/*
  41 * Remove the dirty flags from a span of pages.
  42 */
  43static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
  44{
  45	struct address_space *mapping = inode->i_mapping;
  46	struct folio *folio;
  47	pgoff_t end;
  48
  49	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
  50
  51	rcu_read_lock();
  52
  53	end = (start + len - 1) / PAGE_SIZE;
  54	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
  55		if (xas_retry(&xas, folio))
  56			continue;
  57		xas_pause(&xas);
  58		rcu_read_unlock();
  59		folio_lock(folio);
  60		folio_clear_dirty_for_io(folio);
  61		folio_unlock(folio);
  62		rcu_read_lock();
  63	}
  64
  65	rcu_read_unlock();
  66}
  67
  68/*
  69 * Completion of write to server.
  70 */
  71void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
  72{
  73	struct address_space *mapping = inode->i_mapping;
  74	struct folio *folio;
  75	pgoff_t end;
  76
  77	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
  78
  79	if (!len)
  80		return;
  81
  82	rcu_read_lock();
  83
  84	end = (start + len - 1) / PAGE_SIZE;
  85	xas_for_each(&xas, folio, end) {
  86		if (xas_retry(&xas, folio))
  87			continue;
  88		if (!folio_test_writeback(folio)) {
  89			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
  90				  len, start, folio->index, end);
  91			continue;
  92		}
  93
  94		folio_detach_private(folio);
  95		folio_end_writeback(folio);
  96	}
  97
  98	rcu_read_unlock();
  99}
 100
 101/*
 102 * Failure of write to server.
 103 */
 104void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
 105{
 106	struct address_space *mapping = inode->i_mapping;
 107	struct folio *folio;
 108	pgoff_t end;
 109
 110	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
 111
 112	if (!len)
 113		return;
 114
 115	rcu_read_lock();
 116
 117	end = (start + len - 1) / PAGE_SIZE;
 118	xas_for_each(&xas, folio, end) {
 119		if (xas_retry(&xas, folio))
 120			continue;
 121		if (!folio_test_writeback(folio)) {
 122			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
 123				  len, start, folio->index, end);
 124			continue;
 125		}
 126
 127		folio_set_error(folio);
 128		folio_end_writeback(folio);
 129	}
 130
 131	rcu_read_unlock();
 132}
 133
 134/*
 135 * Redirty pages after a temporary failure.
 136 */
 137void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
 138{
 139	struct address_space *mapping = inode->i_mapping;
 140	struct folio *folio;
 141	pgoff_t end;
 142
 143	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
 144
 145	if (!len)
 146		return;
 147
 148	rcu_read_lock();
 149
 150	end = (start + len - 1) / PAGE_SIZE;
 151	xas_for_each(&xas, folio, end) {
 152		if (!folio_test_writeback(folio)) {
 153			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
 154				  len, start, folio->index, end);
 155			continue;
 156		}
 157
 158		filemap_dirty_folio(folio->mapping, folio);
 159		folio_end_writeback(folio);
 160	}
 161
 162	rcu_read_unlock();
 163}
 164
 165/*
 166 * Mark as invalid, all open files on tree connections since they
 167 * were closed when session to server was lost.
 168 */
 169void
 170cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
 171{
 172	struct cifsFileInfo *open_file = NULL;
 173	struct list_head *tmp;
 174	struct list_head *tmp1;
 175
 176	/* only send once per connect */
 177	spin_lock(&tcon->tc_lock);
 178	if (tcon->need_reconnect)
 179		tcon->status = TID_NEED_RECON;
 180
 181	if (tcon->status != TID_NEED_RECON) {
 182		spin_unlock(&tcon->tc_lock);
 183		return;
 184	}
 185	tcon->status = TID_IN_FILES_INVALIDATE;
 186	spin_unlock(&tcon->tc_lock);
 187
 188	/* list all files open on tree connection and mark them invalid */
 189	spin_lock(&tcon->open_file_lock);
 190	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
 191		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
 192		open_file->invalidHandle = true;
 193		open_file->oplock_break_cancelled = true;
 194	}
 195	spin_unlock(&tcon->open_file_lock);
 196
 197	invalidate_all_cached_dirs(tcon);
 198	spin_lock(&tcon->tc_lock);
 199	if (tcon->status == TID_IN_FILES_INVALIDATE)
 200		tcon->status = TID_NEED_TCON;
 201	spin_unlock(&tcon->tc_lock);
 202
 203	/*
 204	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
 205	 * to this tcon.
 206	 */
 207}
 208
 209static inline int cifs_convert_flags(unsigned int flags)
 210{
 211	if ((flags & O_ACCMODE) == O_RDONLY)
 212		return GENERIC_READ;
 213	else if ((flags & O_ACCMODE) == O_WRONLY)
 214		return GENERIC_WRITE;
 215	else if ((flags & O_ACCMODE) == O_RDWR) {
 216		/* GENERIC_ALL is too much permission to request
 217		   can cause unnecessary access denied on create */
 218		/* return GENERIC_ALL; */
 219		return (GENERIC_READ | GENERIC_WRITE);
 220	}
 221
 222	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
 223		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
 224		FILE_READ_DATA);
 225}
 226
 227#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
 228static u32 cifs_posix_convert_flags(unsigned int flags)
 229{
 230	u32 posix_flags = 0;
 231
 232	if ((flags & O_ACCMODE) == O_RDONLY)
 233		posix_flags = SMB_O_RDONLY;
 234	else if ((flags & O_ACCMODE) == O_WRONLY)
 235		posix_flags = SMB_O_WRONLY;
 236	else if ((flags & O_ACCMODE) == O_RDWR)
 237		posix_flags = SMB_O_RDWR;
 238
 239	if (flags & O_CREAT) {
 240		posix_flags |= SMB_O_CREAT;
 241		if (flags & O_EXCL)
 242			posix_flags |= SMB_O_EXCL;
 243	} else if (flags & O_EXCL)
 244		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
 245			 current->comm, current->tgid);
 246
 247	if (flags & O_TRUNC)
 248		posix_flags |= SMB_O_TRUNC;
 249	/* be safe and imply O_SYNC for O_DSYNC */
 250	if (flags & O_DSYNC)
 251		posix_flags |= SMB_O_SYNC;
 252	if (flags & O_DIRECTORY)
 253		posix_flags |= SMB_O_DIRECTORY;
 254	if (flags & O_NOFOLLOW)
 255		posix_flags |= SMB_O_NOFOLLOW;
 256	if (flags & O_DIRECT)
 257		posix_flags |= SMB_O_DIRECT;
 258
 259	return posix_flags;
 260}
 261#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
 262
 263static inline int cifs_get_disposition(unsigned int flags)
 264{
 265	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 266		return FILE_CREATE;
 267	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 268		return FILE_OVERWRITE_IF;
 269	else if ((flags & O_CREAT) == O_CREAT)
 270		return FILE_OPEN_IF;
 271	else if ((flags & O_TRUNC) == O_TRUNC)
 272		return FILE_OVERWRITE;
 273	else
 274		return FILE_OPEN;
 275}
 276
 277#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
 278int cifs_posix_open(const char *full_path, struct inode **pinode,
 279			struct super_block *sb, int mode, unsigned int f_flags,
 280			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
 281{
 282	int rc;
 283	FILE_UNIX_BASIC_INFO *presp_data;
 284	__u32 posix_flags = 0;
 285	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 286	struct cifs_fattr fattr;
 287	struct tcon_link *tlink;
 288	struct cifs_tcon *tcon;
 289
 290	cifs_dbg(FYI, "posix open %s\n", full_path);
 291
 292	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 293	if (presp_data == NULL)
 294		return -ENOMEM;
 295
 296	tlink = cifs_sb_tlink(cifs_sb);
 297	if (IS_ERR(tlink)) {
 298		rc = PTR_ERR(tlink);
 299		goto posix_open_ret;
 300	}
 301
 302	tcon = tlink_tcon(tlink);
 303	mode &= ~current_umask();
 304
 305	posix_flags = cifs_posix_convert_flags(f_flags);
 306	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 307			     poplock, full_path, cifs_sb->local_nls,
 308			     cifs_remap(cifs_sb));
 309	cifs_put_tlink(tlink);
 310
 311	if (rc)
 312		goto posix_open_ret;
 313
 314	if (presp_data->Type == cpu_to_le32(-1))
 315		goto posix_open_ret; /* open ok, caller does qpathinfo */
 316
 317	if (!pinode)
 318		goto posix_open_ret; /* caller does not need info */
 319
 320	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 321
 322	/* get new inode and set it up */
 323	if (*pinode == NULL) {
 324		cifs_fill_uniqueid(sb, &fattr);
 325		*pinode = cifs_iget(sb, &fattr);
 326		if (!*pinode) {
 327			rc = -ENOMEM;
 328			goto posix_open_ret;
 329		}
 330	} else {
 331		cifs_revalidate_mapping(*pinode);
 332		rc = cifs_fattr_to_inode(*pinode, &fattr);
 333	}
 334
 335posix_open_ret:
 336	kfree(presp_data);
 337	return rc;
 338}
 339#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
 340
 341static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 342			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 343			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
 344{
 345	int rc;
 346	int desired_access;
 347	int disposition;
 348	int create_options = CREATE_NOT_DIR;
 349	struct TCP_Server_Info *server = tcon->ses->server;
 350	struct cifs_open_parms oparms;
 351
 352	if (!server->ops->open)
 353		return -ENOSYS;
 354
 355	desired_access = cifs_convert_flags(f_flags);
 356
 357/*********************************************************************
 358 *  open flag mapping table:
 359 *
 360 *	POSIX Flag            CIFS Disposition
 361 *	----------            ----------------
 362 *	O_CREAT               FILE_OPEN_IF
 363 *	O_CREAT | O_EXCL      FILE_CREATE
 364 *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 365 *	O_TRUNC               FILE_OVERWRITE
 366 *	none of the above     FILE_OPEN
 367 *
 368 *	Note that there is not a direct match between disposition
 369 *	FILE_SUPERSEDE (ie create whether or not file exists although
 370 *	O_CREAT | O_TRUNC is similar but truncates the existing
 371 *	file rather than creating a new file as FILE_SUPERSEDE does
 372 *	(which uses the attributes / metadata passed in on open call)
 373 *?
 374 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 375 *?  and the read write flags match reasonably.  O_LARGEFILE
 376 *?  is irrelevant because largefile support is always used
 377 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 378 *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 379 *********************************************************************/
 380
 381	disposition = cifs_get_disposition(f_flags);
 382
 383	/* BB pass O_SYNC flag through on file attributes .. BB */
 384
 385	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
 386	if (f_flags & O_SYNC)
 387		create_options |= CREATE_WRITE_THROUGH;
 388
 389	if (f_flags & O_DIRECT)
 390		create_options |= CREATE_NO_BUFFER;
 391
 392	oparms = (struct cifs_open_parms) {
 393		.tcon = tcon,
 394		.cifs_sb = cifs_sb,
 395		.desired_access = desired_access,
 396		.create_options = cifs_create_options(cifs_sb, create_options),
 397		.disposition = disposition,
 398		.path = full_path,
 399		.fid = fid,
 400	};
 401
 402	rc = server->ops->open(xid, &oparms, oplock, buf);
 403	if (rc)
 404		return rc;
 405
 406	/* TODO: Add support for calling posix query info but with passing in fid */
 407	if (tcon->unix_ext)
 408		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 409					      xid);
 410	else
 411		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 412					 xid, fid);
 413
 414	if (rc) {
 415		server->ops->close(xid, tcon, fid);
 416		if (rc == -ESTALE)
 417			rc = -EOPENSTALE;
 418	}
 419
 420	return rc;
 421}
 422
 423static bool
 424cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 425{
 426	struct cifs_fid_locks *cur;
 427	bool has_locks = false;
 428
 429	down_read(&cinode->lock_sem);
 430	list_for_each_entry(cur, &cinode->llist, llist) {
 431		if (!list_empty(&cur->locks)) {
 432			has_locks = true;
 433			break;
 434		}
 435	}
 436	up_read(&cinode->lock_sem);
 437	return has_locks;
 438}
 439
 440void
 441cifs_down_write(struct rw_semaphore *sem)
 442{
 443	while (!down_write_trylock(sem))
 444		msleep(10);
 445}
 446
 447static void cifsFileInfo_put_work(struct work_struct *work);
 448
 449struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 450				       struct tcon_link *tlink, __u32 oplock,
 451				       const char *symlink_target)
 452{
 453	struct dentry *dentry = file_dentry(file);
 454	struct inode *inode = d_inode(dentry);
 455	struct cifsInodeInfo *cinode = CIFS_I(inode);
 456	struct cifsFileInfo *cfile;
 457	struct cifs_fid_locks *fdlocks;
 458	struct cifs_tcon *tcon = tlink_tcon(tlink);
 459	struct TCP_Server_Info *server = tcon->ses->server;
 460
 461	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 462	if (cfile == NULL)
 463		return cfile;
 464
 465	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 466	if (!fdlocks) {
 467		kfree(cfile);
 468		return NULL;
 469	}
 470
 471	if (symlink_target) {
 472		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
 473		if (!cfile->symlink_target) {
 474			kfree(fdlocks);
 475			kfree(cfile);
 476			return NULL;
 477		}
 478	}
 479
 480	INIT_LIST_HEAD(&fdlocks->locks);
 481	fdlocks->cfile = cfile;
 482	cfile->llist = fdlocks;
 483
 484	cfile->count = 1;
 485	cfile->pid = current->tgid;
 486	cfile->uid = current_fsuid();
 487	cfile->dentry = dget(dentry);
 488	cfile->f_flags = file->f_flags;
 489	cfile->invalidHandle = false;
 490	cfile->deferred_close_scheduled = false;
 491	cfile->tlink = cifs_get_tlink(tlink);
 492	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 493	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
 494	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
 495	mutex_init(&cfile->fh_mutex);
 496	spin_lock_init(&cfile->file_info_lock);
 497
 498	cifs_sb_active(inode->i_sb);
 499
 500	/*
 501	 * If the server returned a read oplock and we have mandatory brlocks,
 502	 * set oplock level to None.
 503	 */
 504	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 505		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 506		oplock = 0;
 507	}
 508
 509	cifs_down_write(&cinode->lock_sem);
 510	list_add(&fdlocks->llist, &cinode->llist);
 511	up_write(&cinode->lock_sem);
 512
 513	spin_lock(&tcon->open_file_lock);
 514	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 515		oplock = fid->pending_open->oplock;
 516	list_del(&fid->pending_open->olist);
 517
 518	fid->purge_cache = false;
 519	server->ops->set_fid(cfile, fid, oplock);
 520
 521	list_add(&cfile->tlist, &tcon->openFileList);
 522	atomic_inc(&tcon->num_local_opens);
 523
 524	/* if readable file instance put first in list*/
 525	spin_lock(&cinode->open_file_lock);
 526	if (file->f_mode & FMODE_READ)
 527		list_add(&cfile->flist, &cinode->openFileList);
 528	else
 529		list_add_tail(&cfile->flist, &cinode->openFileList);
 530	spin_unlock(&cinode->open_file_lock);
 531	spin_unlock(&tcon->open_file_lock);
 532
 533	if (fid->purge_cache)
 534		cifs_zap_mapping(inode);
 535
 536	file->private_data = cfile;
 537	return cfile;
 538}
 539
 540struct cifsFileInfo *
 541cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 542{
 543	spin_lock(&cifs_file->file_info_lock);
 544	cifsFileInfo_get_locked(cifs_file);
 545	spin_unlock(&cifs_file->file_info_lock);
 546	return cifs_file;
 547}
 548
 549static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
 550{
 551	struct inode *inode = d_inode(cifs_file->dentry);
 552	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 553	struct cifsLockInfo *li, *tmp;
 554	struct super_block *sb = inode->i_sb;
 555
 556	/*
 557	 * Delete any outstanding lock records. We'll lose them when the file
 558	 * is closed anyway.
 559	 */
 560	cifs_down_write(&cifsi->lock_sem);
 561	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 562		list_del(&li->llist);
 563		cifs_del_lock_waiters(li);
 564		kfree(li);
 565	}
 566	list_del(&cifs_file->llist->llist);
 567	kfree(cifs_file->llist);
 568	up_write(&cifsi->lock_sem);
 569
 570	cifs_put_tlink(cifs_file->tlink);
 571	dput(cifs_file->dentry);
 572	cifs_sb_deactive(sb);
 573	kfree(cifs_file->symlink_target);
 574	kfree(cifs_file);
 575}
 576
 577static void cifsFileInfo_put_work(struct work_struct *work)
 578{
 579	struct cifsFileInfo *cifs_file = container_of(work,
 580			struct cifsFileInfo, put);
 581
 582	cifsFileInfo_put_final(cifs_file);
 583}
 584
 585/**
 586 * cifsFileInfo_put - release a reference of file priv data
 587 *
 588 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
 589 *
 590 * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
 591 */
 592void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 593{
 594	_cifsFileInfo_put(cifs_file, true, true);
 595}
 596
 597/**
 598 * _cifsFileInfo_put - release a reference of file priv data
 599 *
 600 * This may involve closing the filehandle @cifs_file out on the
 601 * server. Must be called without holding tcon->open_file_lock,
 602 * cinode->open_file_lock and cifs_file->file_info_lock.
 603 *
 604 * If @wait_for_oplock_handler is true and we are releasing the last
 605 * reference, wait for any running oplock break handler of the file
 606 * and cancel any pending one.
 607 *
 608 * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
 609 * @wait_oplock_handler: must be false if called from oplock_break_handler
 610 * @offload:	not offloaded on close and oplock breaks
 611 *
 612 */
 613void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
 614		       bool wait_oplock_handler, bool offload)
 615{
 616	struct inode *inode = d_inode(cifs_file->dentry);
 617	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 618	struct TCP_Server_Info *server = tcon->ses->server;
 619	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 620	struct super_block *sb = inode->i_sb;
 621	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 622	struct cifs_fid fid = {};
 623	struct cifs_pending_open open;
 624	bool oplock_break_cancelled;
 625
 626	spin_lock(&tcon->open_file_lock);
 627	spin_lock(&cifsi->open_file_lock);
 628	spin_lock(&cifs_file->file_info_lock);
 629	if (--cifs_file->count > 0) {
 630		spin_unlock(&cifs_file->file_info_lock);
 631		spin_unlock(&cifsi->open_file_lock);
 632		spin_unlock(&tcon->open_file_lock);
 633		return;
 634	}
 635	spin_unlock(&cifs_file->file_info_lock);
 636
 637	if (server->ops->get_lease_key)
 638		server->ops->get_lease_key(inode, &fid);
 639
 640	/* store open in pending opens to make sure we don't miss lease break */
 641	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 642
 643	/* remove it from the lists */
 644	list_del(&cifs_file->flist);
 645	list_del(&cifs_file->tlist);
 646	atomic_dec(&tcon->num_local_opens);
 647
 648	if (list_empty(&cifsi->openFileList)) {
 649		cifs_dbg(FYI, "closing last open instance for inode %p\n",
 650			 d_inode(cifs_file->dentry));
 651		/*
 652		 * In strict cache mode we need invalidate mapping on the last
 653		 * close  because it may cause a error when we open this file
 654		 * again and get at least level II oplock.
 655		 */
 656		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 657			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 658		cifs_set_oplock_level(cifsi, 0);
 659	}
 660
 661	spin_unlock(&cifsi->open_file_lock);
 662	spin_unlock(&tcon->open_file_lock);
 663
 664	oplock_break_cancelled = wait_oplock_handler ?
 665		cancel_work_sync(&cifs_file->oplock_break) : false;
 666
 667	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 668		struct TCP_Server_Info *server = tcon->ses->server;
 669		unsigned int xid;
 670
 671		xid = get_xid();
 672		if (server->ops->close_getattr)
 673			server->ops->close_getattr(xid, tcon, cifs_file);
 674		else if (server->ops->close)
 675			server->ops->close(xid, tcon, &cifs_file->fid);
 676		_free_xid(xid);
 677	}
 678
 679	if (oplock_break_cancelled)
 680		cifs_done_oplock_break(cifsi);
 681
 682	cifs_del_pending_open(&open);
 683
 684	if (offload)
 685		queue_work(fileinfo_put_wq, &cifs_file->put);
 686	else
 687		cifsFileInfo_put_final(cifs_file);
 688}
 689
 690int cifs_open(struct inode *inode, struct file *file)
 691
 692{
 693	int rc = -EACCES;
 694	unsigned int xid;
 695	__u32 oplock;
 696	struct cifs_sb_info *cifs_sb;
 697	struct TCP_Server_Info *server;
 698	struct cifs_tcon *tcon;
 699	struct tcon_link *tlink;
 700	struct cifsFileInfo *cfile = NULL;
 701	void *page;
 702	const char *full_path;
 703	bool posix_open_ok = false;
 704	struct cifs_fid fid = {};
 705	struct cifs_pending_open open;
 706	struct cifs_open_info_data data = {};
 707
 708	xid = get_xid();
 709
 710	cifs_sb = CIFS_SB(inode->i_sb);
 711	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
 712		free_xid(xid);
 713		return -EIO;
 714	}
 715
 716	tlink = cifs_sb_tlink(cifs_sb);
 717	if (IS_ERR(tlink)) {
 718		free_xid(xid);
 719		return PTR_ERR(tlink);
 720	}
 721	tcon = tlink_tcon(tlink);
 722	server = tcon->ses->server;
 723
 724	page = alloc_dentry_path();
 725	full_path = build_path_from_dentry(file_dentry(file), page);
 726	if (IS_ERR(full_path)) {
 727		rc = PTR_ERR(full_path);
 728		goto out;
 729	}
 730
 731	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 732		 inode, file->f_flags, full_path);
 733
 734	if (file->f_flags & O_DIRECT &&
 735	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
 736		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 737			file->f_op = &cifs_file_direct_nobrl_ops;
 738		else
 739			file->f_op = &cifs_file_direct_ops;
 740	}
 741
 742	/* Get the cached handle as SMB2 close is deferred */
 743	rc = cifs_get_readable_path(tcon, full_path, &cfile);
 744	if (rc == 0) {
 745		if (file->f_flags == cfile->f_flags) {
 746			file->private_data = cfile;
 747			spin_lock(&CIFS_I(inode)->deferred_lock);
 748			cifs_del_deferred_close(cfile);
 749			spin_unlock(&CIFS_I(inode)->deferred_lock);
 750			goto use_cache;
 751		} else {
 752			_cifsFileInfo_put(cfile, true, false);
 753		}
 754	}
 755
 756	if (server->oplocks)
 757		oplock = REQ_OPLOCK;
 758	else
 759		oplock = 0;
 760
 761#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
 762	if (!tcon->broken_posix_open && tcon->unix_ext &&
 763	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 764				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 765		/* can not refresh inode info since size could be stale */
 766		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 767				cifs_sb->ctx->file_mode /* ignored */,
 768				file->f_flags, &oplock, &fid.netfid, xid);
 769		if (rc == 0) {
 770			cifs_dbg(FYI, "posix open succeeded\n");
 771			posix_open_ok = true;
 772		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 773			if (tcon->ses->serverNOS)
 774				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 775					 tcon->ses->ip_addr,
 776					 tcon->ses->serverNOS);
 777			tcon->broken_posix_open = true;
 778		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
 779			 (rc != -EOPNOTSUPP)) /* path not found or net err */
 780			goto out;
 781		/*
 782		 * Else fallthrough to retry open the old way on network i/o
 783		 * or DFS errors.
 784		 */
 785	}
 786#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
 787
 788	if (server->ops->get_lease_key)
 789		server->ops->get_lease_key(inode, &fid);
 790
 791	cifs_add_pending_open(&fid, tlink, &open);
 792
 793	if (!posix_open_ok) {
 794		if (server->ops->get_lease_key)
 795			server->ops->get_lease_key(inode, &fid);
 796
 797		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
 798				  xid, &data);
 799		if (rc) {
 800			cifs_del_pending_open(&open);
 801			goto out;
 802		}
 803	}
 804
 805	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
 806	if (cfile == NULL) {
 807		if (server->ops->close)
 808			server->ops->close(xid, tcon, &fid);
 809		cifs_del_pending_open(&open);
 810		rc = -ENOMEM;
 811		goto out;
 812	}
 813
 814#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
 815	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 816		/*
 817		 * Time to set mode which we can not set earlier due to
 818		 * problems creating new read-only files.
 819		 */
 820		struct cifs_unix_set_info_args args = {
 821			.mode	= inode->i_mode,
 822			.uid	= INVALID_UID, /* no change */
 823			.gid	= INVALID_GID, /* no change */
 824			.ctime	= NO_CHANGE_64,
 825			.atime	= NO_CHANGE_64,
 826			.mtime	= NO_CHANGE_64,
 827			.device	= 0,
 828		};
 829		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 830				       cfile->pid);
 831	}
 832#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
 833
 834use_cache:
 835	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
 836			   file->f_mode & FMODE_WRITE);
 837	if (file->f_flags & O_DIRECT &&
 838	    (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
 839	     file->f_flags & O_APPEND))
 840		cifs_invalidate_cache(file_inode(file),
 841				      FSCACHE_INVAL_DIO_WRITE);
 842
 843out:
 844	free_dentry_path(page);
 845	free_xid(xid);
 846	cifs_put_tlink(tlink);
 847	cifs_free_open_info(&data);
 848	return rc;
 849}
 850
 851#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
 852static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 853#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
 854
 855/*
 856 * Try to reacquire byte range locks that were released when session
 857 * to server was lost.
 858 */
 859static int
 860cifs_relock_file(struct cifsFileInfo *cfile)
 861{
 862	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 863	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 864	int rc = 0;
 865#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
 866	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 867#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
 868
 869	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
 870	if (cinode->can_cache_brlcks) {
 871		/* can cache locks - no need to relock */
 872		up_read(&cinode->lock_sem);
 873		return rc;
 874	}
 875
 876#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
 877	if (cap_unix(tcon->ses) &&
 878	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 879	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 880		rc = cifs_push_posix_locks(cfile);
 881	else
 882#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
 883		rc = tcon->ses->server->ops->push_mand_locks(cfile);
 884
 885	up_read(&cinode->lock_sem);
 886	return rc;
 887}
 888
 889static int
 890cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 891{
 892	int rc = -EACCES;
 893	unsigned int xid;
 894	__u32 oplock;
 895	struct cifs_sb_info *cifs_sb;
 896	struct cifs_tcon *tcon;
 897	struct TCP_Server_Info *server;
 898	struct cifsInodeInfo *cinode;
 899	struct inode *inode;
 900	void *page;
 901	const char *full_path;
 902	int desired_access;
 903	int disposition = FILE_OPEN;
 904	int create_options = CREATE_NOT_DIR;
 905	struct cifs_open_parms oparms;
 906
 907	xid = get_xid();
 908	mutex_lock(&cfile->fh_mutex);
 909	if (!cfile->invalidHandle) {
 910		mutex_unlock(&cfile->fh_mutex);
 911		free_xid(xid);
 912		return 0;
 913	}
 914
 915	inode = d_inode(cfile->dentry);
 916	cifs_sb = CIFS_SB(inode->i_sb);
 917	tcon = tlink_tcon(cfile->tlink);
 918	server = tcon->ses->server;
 919
 920	/*
 921	 * Can not grab rename sem here because various ops, including those
 922	 * that already have the rename sem can end up causing writepage to get
 923	 * called and if the server was down that means we end up here, and we
 924	 * can never tell if the caller already has the rename_sem.
 925	 */
 926	page = alloc_dentry_path();
 927	full_path = build_path_from_dentry(cfile->dentry, page);
 928	if (IS_ERR(full_path)) {
 929		mutex_unlock(&cfile->fh_mutex);
 930		free_dentry_path(page);
 931		free_xid(xid);
 932		return PTR_ERR(full_path);
 933	}
 934
 935	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 936		 inode, cfile->f_flags, full_path);
 937
 938	if (tcon->ses->server->oplocks)
 939		oplock = REQ_OPLOCK;
 940	else
 941		oplock = 0;
 942
 943#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
 944	if (tcon->unix_ext && cap_unix(tcon->ses) &&
 945	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 946				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 947		/*
 948		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 949		 * original open. Must mask them off for a reopen.
 950		 */
 951		unsigned int oflags = cfile->f_flags &
 952						~(O_CREAT | O_EXCL | O_TRUNC);
 953
 954		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 955				     cifs_sb->ctx->file_mode /* ignored */,
 956				     oflags, &oplock, &cfile->fid.netfid, xid);
 957		if (rc == 0) {
 958			cifs_dbg(FYI, "posix reopen succeeded\n");
 959			oparms.reconnect = true;
 960			goto reopen_success;
 961		}
 962		/*
 963		 * fallthrough to retry open the old way on errors, especially
 964		 * in the reconnect path it is important to retry hard
 965		 */
 966	}
 967#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
 968
 969	desired_access = cifs_convert_flags(cfile->f_flags);
 970
 971	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
 972	if (cfile->f_flags & O_SYNC)
 973		create_options |= CREATE_WRITE_THROUGH;
 974
 975	if (cfile->f_flags & O_DIRECT)
 976		create_options |= CREATE_NO_BUFFER;
 977
 978	if (server->ops->get_lease_key)
 979		server->ops->get_lease_key(inode, &cfile->fid);
 980
 981	oparms = (struct cifs_open_parms) {
 982		.tcon = tcon,
 983		.cifs_sb = cifs_sb,
 984		.desired_access = desired_access,
 985		.create_options = cifs_create_options(cifs_sb, create_options),
 986		.disposition = disposition,
 987		.path = full_path,
 988		.fid = &cfile->fid,
 989		.reconnect = true,
 990	};
 991
 992	/*
 993	 * Can not refresh inode by passing in file_info buf to be returned by
 994	 * ops->open and then calling get_inode_info with returned buf since
 995	 * file might have write behind data that needs to be flushed and server
 996	 * version of file size can be stale. If we knew for sure that inode was
 997	 * not dirty locally we could do this.
 998	 */
 999	rc = server->ops->open(xid, &oparms, &oplock, NULL);
1000	if (rc == -ENOENT && oparms.reconnect == false) {
1001		/* durable handle timeout is expired - open the file again */
1002		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1003		/* indicate that we need to relock the file */
1004		oparms.reconnect = true;
1005	}
1006
1007	if (rc) {
1008		mutex_unlock(&cfile->fh_mutex);
1009		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1010		cifs_dbg(FYI, "oplock: %d\n", oplock);
1011		goto reopen_error_exit;
1012	}
1013
1014#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1015reopen_success:
1016#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1017	cfile->invalidHandle = false;
1018	mutex_unlock(&cfile->fh_mutex);
1019	cinode = CIFS_I(inode);
1020
1021	if (can_flush) {
1022		rc = filemap_write_and_wait(inode->i_mapping);
1023		if (!is_interrupt_error(rc))
1024			mapping_set_error(inode->i_mapping, rc);
1025
1026		if (tcon->posix_extensions) {
1027			rc = smb311_posix_get_inode_info(&inode, full_path,
1028							 NULL, inode->i_sb, xid);
1029		} else if (tcon->unix_ext) {
1030			rc = cifs_get_inode_info_unix(&inode, full_path,
1031						      inode->i_sb, xid);
1032		} else {
1033			rc = cifs_get_inode_info(&inode, full_path, NULL,
1034						 inode->i_sb, xid, NULL);
1035		}
1036	}
1037	/*
1038	 * Else we are writing out data to server already and could deadlock if
1039	 * we tried to flush data, and since we do not know if we have data that
1040	 * would invalidate the current end of file on the server we can not go
1041	 * to the server to get the new inode info.
1042	 */
1043
1044	/*
1045	 * If the server returned a read oplock and we have mandatory brlocks,
1046	 * set oplock level to None.
1047	 */
1048	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1049		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1050		oplock = 0;
1051	}
1052
1053	server->ops->set_fid(cfile, &cfile->fid, oplock);
1054	if (oparms.reconnect)
1055		cifs_relock_file(cfile);
1056
1057reopen_error_exit:
1058	free_dentry_path(page);
1059	free_xid(xid);
1060	return rc;
1061}
1062
1063void smb2_deferred_work_close(struct work_struct *work)
1064{
1065	struct cifsFileInfo *cfile = container_of(work,
1066			struct cifsFileInfo, deferred.work);
1067
1068	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1069	cifs_del_deferred_close(cfile);
1070	cfile->deferred_close_scheduled = false;
1071	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1072	_cifsFileInfo_put(cfile, true, false);
1073}
1074
1075int cifs_close(struct inode *inode, struct file *file)
1076{
1077	struct cifsFileInfo *cfile;
1078	struct cifsInodeInfo *cinode = CIFS_I(inode);
1079	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1080	struct cifs_deferred_close *dclose;
1081
1082	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1083
1084	if (file->private_data != NULL) {
1085		cfile = file->private_data;
1086		file->private_data = NULL;
1087		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1088		if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1089		    && cinode->lease_granted &&
1090		    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1091		    dclose) {
1092			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1093				inode_set_mtime_to_ts(inode,
1094						      inode_set_ctime_current(inode));
1095			}
1096			spin_lock(&cinode->deferred_lock);
1097			cifs_add_deferred_close(cfile, dclose);
1098			if (cfile->deferred_close_scheduled &&
1099			    delayed_work_pending(&cfile->deferred)) {
1100				/*
1101				 * If there is no pending work, mod_delayed_work queues new work.
1102				 * So, Increase the ref count to avoid use-after-free.
1103				 */
1104				if (!mod_delayed_work(deferredclose_wq,
1105						&cfile->deferred, cifs_sb->ctx->closetimeo))
1106					cifsFileInfo_get(cfile);
1107			} else {
1108				/* Deferred close for files */
1109				queue_delayed_work(deferredclose_wq,
1110						&cfile->deferred, cifs_sb->ctx->closetimeo);
1111				cfile->deferred_close_scheduled = true;
1112				spin_unlock(&cinode->deferred_lock);
1113				return 0;
1114			}
1115			spin_unlock(&cinode->deferred_lock);
1116			_cifsFileInfo_put(cfile, true, false);
1117		} else {
1118			_cifsFileInfo_put(cfile, true, false);
1119			kfree(dclose);
1120		}
1121	}
1122
1123	/* return code from the ->release op is always ignored */
1124	return 0;
1125}
1126
1127void
1128cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1129{
1130	struct cifsFileInfo *open_file, *tmp;
1131	struct list_head tmp_list;
1132
1133	if (!tcon->use_persistent || !tcon->need_reopen_files)
1134		return;
1135
1136	tcon->need_reopen_files = false;
1137
1138	cifs_dbg(FYI, "Reopen persistent handles\n");
1139	INIT_LIST_HEAD(&tmp_list);
1140
1141	/* list all files open on tree connection, reopen resilient handles  */
1142	spin_lock(&tcon->open_file_lock);
1143	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1144		if (!open_file->invalidHandle)
1145			continue;
1146		cifsFileInfo_get(open_file);
1147		list_add_tail(&open_file->rlist, &tmp_list);
1148	}
1149	spin_unlock(&tcon->open_file_lock);
1150
1151	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1152		if (cifs_reopen_file(open_file, false /* do not flush */))
1153			tcon->need_reopen_files = true;
1154		list_del_init(&open_file->rlist);
1155		cifsFileInfo_put(open_file);
1156	}
1157}
1158
1159int cifs_closedir(struct inode *inode, struct file *file)
1160{
1161	int rc = 0;
1162	unsigned int xid;
1163	struct cifsFileInfo *cfile = file->private_data;
1164	struct cifs_tcon *tcon;
1165	struct TCP_Server_Info *server;
1166	char *buf;
1167
1168	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1169
1170	if (cfile == NULL)
1171		return rc;
1172
1173	xid = get_xid();
1174	tcon = tlink_tcon(cfile->tlink);
1175	server = tcon->ses->server;
1176
1177	cifs_dbg(FYI, "Freeing private data in close dir\n");
1178	spin_lock(&cfile->file_info_lock);
1179	if (server->ops->dir_needs_close(cfile)) {
1180		cfile->invalidHandle = true;
1181		spin_unlock(&cfile->file_info_lock);
1182		if (server->ops->close_dir)
1183			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1184		else
1185			rc = -ENOSYS;
1186		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1187		/* not much we can do if it fails anyway, ignore rc */
1188		rc = 0;
1189	} else
1190		spin_unlock(&cfile->file_info_lock);
1191
1192	buf = cfile->srch_inf.ntwrk_buf_start;
1193	if (buf) {
1194		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1195		cfile->srch_inf.ntwrk_buf_start = NULL;
1196		if (cfile->srch_inf.smallBuf)
1197			cifs_small_buf_release(buf);
1198		else
1199			cifs_buf_release(buf);
1200	}
1201
1202	cifs_put_tlink(cfile->tlink);
1203	kfree(file->private_data);
1204	file->private_data = NULL;
1205	/* BB can we lock the filestruct while this is going on? */
1206	free_xid(xid);
1207	return rc;
1208}
1209
1210static struct cifsLockInfo *
1211cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1212{
1213	struct cifsLockInfo *lock =
1214		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1215	if (!lock)
1216		return lock;
1217	lock->offset = offset;
1218	lock->length = length;
1219	lock->type = type;
1220	lock->pid = current->tgid;
1221	lock->flags = flags;
1222	INIT_LIST_HEAD(&lock->blist);
1223	init_waitqueue_head(&lock->block_q);
1224	return lock;
1225}
1226
1227void
1228cifs_del_lock_waiters(struct cifsLockInfo *lock)
1229{
1230	struct cifsLockInfo *li, *tmp;
1231	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1232		list_del_init(&li->blist);
1233		wake_up(&li->block_q);
1234	}
1235}
1236
1237#define CIFS_LOCK_OP	0
1238#define CIFS_READ_OP	1
1239#define CIFS_WRITE_OP	2
1240
1241/* @rw_check : 0 - no op, 1 - read, 2 - write */
1242static bool
1243cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1244			    __u64 length, __u8 type, __u16 flags,
1245			    struct cifsFileInfo *cfile,
1246			    struct cifsLockInfo **conf_lock, int rw_check)
1247{
1248	struct cifsLockInfo *li;
1249	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1250	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1251
1252	list_for_each_entry(li, &fdlocks->locks, llist) {
1253		if (offset + length <= li->offset ||
1254		    offset >= li->offset + li->length)
1255			continue;
1256		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1257		    server->ops->compare_fids(cfile, cur_cfile)) {
1258			/* shared lock prevents write op through the same fid */
1259			if (!(li->type & server->vals->shared_lock_type) ||
1260			    rw_check != CIFS_WRITE_OP)
1261				continue;
1262		}
1263		if ((type & server->vals->shared_lock_type) &&
1264		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1265		     current->tgid == li->pid) || type == li->type))
1266			continue;
1267		if (rw_check == CIFS_LOCK_OP &&
1268		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1269		    server->ops->compare_fids(cfile, cur_cfile))
1270			continue;
1271		if (conf_lock)
1272			*conf_lock = li;
1273		return true;
1274	}
1275	return false;
1276}
1277
1278bool
1279cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1280			__u8 type, __u16 flags,
1281			struct cifsLockInfo **conf_lock, int rw_check)
1282{
1283	bool rc = false;
1284	struct cifs_fid_locks *cur;
1285	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1286
1287	list_for_each_entry(cur, &cinode->llist, llist) {
1288		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1289						 flags, cfile, conf_lock,
1290						 rw_check);
1291		if (rc)
1292			break;
1293	}
1294
1295	return rc;
1296}
1297
1298/*
1299 * Check if there is another lock that prevents us to set the lock (mandatory
1300 * style). If such a lock exists, update the flock structure with its
1301 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1302 * or leave it the same if we can't. Returns 0 if we don't need to request to
1303 * the server or 1 otherwise.
1304 */
1305static int
1306cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1307	       __u8 type, struct file_lock *flock)
1308{
1309	int rc = 0;
1310	struct cifsLockInfo *conf_lock;
1311	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1312	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1313	bool exist;
1314
1315	down_read(&cinode->lock_sem);
1316
1317	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1318					flock->fl_flags, &conf_lock,
1319					CIFS_LOCK_OP);
1320	if (exist) {
1321		flock->fl_start = conf_lock->offset;
1322		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1323		flock->fl_pid = conf_lock->pid;
1324		if (conf_lock->type & server->vals->shared_lock_type)
1325			flock->fl_type = F_RDLCK;
1326		else
1327			flock->fl_type = F_WRLCK;
1328	} else if (!cinode->can_cache_brlcks)
1329		rc = 1;
1330	else
1331		flock->fl_type = F_UNLCK;
1332
1333	up_read(&cinode->lock_sem);
1334	return rc;
1335}
1336
1337static void
1338cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1339{
1340	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1341	cifs_down_write(&cinode->lock_sem);
1342	list_add_tail(&lock->llist, &cfile->llist->locks);
1343	up_write(&cinode->lock_sem);
1344}
1345
1346/*
1347 * Set the byte-range lock (mandatory style). Returns:
1348 * 1) 0, if we set the lock and don't need to request to the server;
1349 * 2) 1, if no locks prevent us but we need to request to the server;
1350 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1351 */
1352static int
1353cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1354		 bool wait)
1355{
1356	struct cifsLockInfo *conf_lock;
1357	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1358	bool exist;
1359	int rc = 0;
1360
1361try_again:
1362	exist = false;
1363	cifs_down_write(&cinode->lock_sem);
1364
1365	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1366					lock->type, lock->flags, &conf_lock,
1367					CIFS_LOCK_OP);
1368	if (!exist && cinode->can_cache_brlcks) {
1369		list_add_tail(&lock->llist, &cfile->llist->locks);
1370		up_write(&cinode->lock_sem);
1371		return rc;
1372	}
1373
1374	if (!exist)
1375		rc = 1;
1376	else if (!wait)
1377		rc = -EACCES;
1378	else {
1379		list_add_tail(&lock->blist, &conf_lock->blist);
1380		up_write(&cinode->lock_sem);
1381		rc = wait_event_interruptible(lock->block_q,
1382					(lock->blist.prev == &lock->blist) &&
1383					(lock->blist.next == &lock->blist));
1384		if (!rc)
1385			goto try_again;
1386		cifs_down_write(&cinode->lock_sem);
1387		list_del_init(&lock->blist);
1388	}
1389
1390	up_write(&cinode->lock_sem);
1391	return rc;
1392}
1393
1394#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1395/*
1396 * Check if there is another lock that prevents us to set the lock (posix
1397 * style). If such a lock exists, update the flock structure with its
1398 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1399 * or leave it the same if we can't. Returns 0 if we don't need to request to
1400 * the server or 1 otherwise.
1401 */
1402static int
1403cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1404{
1405	int rc = 0;
1406	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1407	unsigned char saved_type = flock->fl_type;
1408
1409	if ((flock->fl_flags & FL_POSIX) == 0)
1410		return 1;
1411
1412	down_read(&cinode->lock_sem);
1413	posix_test_lock(file, flock);
1414
1415	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1416		flock->fl_type = saved_type;
1417		rc = 1;
1418	}
1419
1420	up_read(&cinode->lock_sem);
1421	return rc;
1422}
1423
1424/*
1425 * Set the byte-range lock (posix style). Returns:
1426 * 1) <0, if the error occurs while setting the lock;
1427 * 2) 0, if we set the lock and don't need to request to the server;
1428 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1429 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1430 */
1431static int
1432cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1433{
1434	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1435	int rc = FILE_LOCK_DEFERRED + 1;
1436
1437	if ((flock->fl_flags & FL_POSIX) == 0)
1438		return rc;
1439
1440	cifs_down_write(&cinode->lock_sem);
1441	if (!cinode->can_cache_brlcks) {
1442		up_write(&cinode->lock_sem);
1443		return rc;
1444	}
1445
1446	rc = posix_lock_file(file, flock, NULL);
1447	up_write(&cinode->lock_sem);
1448	return rc;
1449}
1450
1451int
1452cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1453{
1454	unsigned int xid;
1455	int rc = 0, stored_rc;
1456	struct cifsLockInfo *li, *tmp;
1457	struct cifs_tcon *tcon;
1458	unsigned int num, max_num, max_buf;
1459	LOCKING_ANDX_RANGE *buf, *cur;
1460	static const int types[] = {
1461		LOCKING_ANDX_LARGE_FILES,
1462		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1463	};
1464	int i;
1465
1466	xid = get_xid();
1467	tcon = tlink_tcon(cfile->tlink);
1468
1469	/*
1470	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1471	 * and check it before using.
1472	 */
1473	max_buf = tcon->ses->server->maxBuf;
1474	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1475		free_xid(xid);
1476		return -EINVAL;
1477	}
1478
1479	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1480		     PAGE_SIZE);
1481	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1482			PAGE_SIZE);
1483	max_num = (max_buf - sizeof(struct smb_hdr)) /
1484						sizeof(LOCKING_ANDX_RANGE);
1485	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1486	if (!buf) {
1487		free_xid(xid);
1488		return -ENOMEM;
1489	}
1490
1491	for (i = 0; i < 2; i++) {
1492		cur = buf;
1493		num = 0;
1494		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1495			if (li->type != types[i])
1496				continue;
1497			cur->Pid = cpu_to_le16(li->pid);
1498			cur->LengthLow = cpu_to_le32((u32)li->length);
1499			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1500			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1501			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1502			if (++num == max_num) {
1503				stored_rc = cifs_lockv(xid, tcon,
1504						       cfile->fid.netfid,
1505						       (__u8)li->type, 0, num,
1506						       buf);
1507				if (stored_rc)
1508					rc = stored_rc;
1509				cur = buf;
1510				num = 0;
1511			} else
1512				cur++;
1513		}
1514
1515		if (num) {
1516			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1517					       (__u8)types[i], 0, num, buf);
1518			if (stored_rc)
1519				rc = stored_rc;
1520		}
1521	}
1522
1523	kfree(buf);
1524	free_xid(xid);
1525	return rc;
1526}
1527
1528static __u32
1529hash_lockowner(fl_owner_t owner)
1530{
1531	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1532}
1533#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1534
1535struct lock_to_push {
1536	struct list_head llist;
1537	__u64 offset;
1538	__u64 length;
1539	__u32 pid;
1540	__u16 netfid;
1541	__u8 type;
1542};
1543
1544#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1545static int
1546cifs_push_posix_locks(struct cifsFileInfo *cfile)
1547{
1548	struct inode *inode = d_inode(cfile->dentry);
1549	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1550	struct file_lock *flock;
1551	struct file_lock_context *flctx = locks_inode_context(inode);
1552	unsigned int count = 0, i;
1553	int rc = 0, xid, type;
1554	struct list_head locks_to_send, *el;
1555	struct lock_to_push *lck, *tmp;
1556	__u64 length;
1557
1558	xid = get_xid();
1559
1560	if (!flctx)
1561		goto out;
1562
1563	spin_lock(&flctx->flc_lock);
1564	list_for_each(el, &flctx->flc_posix) {
1565		count++;
1566	}
1567	spin_unlock(&flctx->flc_lock);
1568
1569	INIT_LIST_HEAD(&locks_to_send);
1570
1571	/*
1572	 * Allocating count locks is enough because no FL_POSIX locks can be
1573	 * added to the list while we are holding cinode->lock_sem that
1574	 * protects locking operations of this inode.
1575	 */
1576	for (i = 0; i < count; i++) {
1577		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1578		if (!lck) {
1579			rc = -ENOMEM;
1580			goto err_out;
1581		}
1582		list_add_tail(&lck->llist, &locks_to_send);
1583	}
1584
1585	el = locks_to_send.next;
1586	spin_lock(&flctx->flc_lock);
1587	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1588		if (el == &locks_to_send) {
1589			/*
1590			 * The list ended. We don't have enough allocated
1591			 * structures - something is really wrong.
1592			 */
1593			cifs_dbg(VFS, "Can't push all brlocks!\n");
1594			break;
1595		}
1596		length = cifs_flock_len(flock);
1597		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1598			type = CIFS_RDLCK;
1599		else
1600			type = CIFS_WRLCK;
1601		lck = list_entry(el, struct lock_to_push, llist);
1602		lck->pid = hash_lockowner(flock->fl_owner);
1603		lck->netfid = cfile->fid.netfid;
1604		lck->length = length;
1605		lck->type = type;
1606		lck->offset = flock->fl_start;
1607	}
1608	spin_unlock(&flctx->flc_lock);
1609
1610	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1611		int stored_rc;
1612
1613		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1614					     lck->offset, lck->length, NULL,
1615					     lck->type, 0);
1616		if (stored_rc)
1617			rc = stored_rc;
1618		list_del(&lck->llist);
1619		kfree(lck);
1620	}
1621
1622out:
1623	free_xid(xid);
1624	return rc;
1625err_out:
1626	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1627		list_del(&lck->llist);
1628		kfree(lck);
1629	}
1630	goto out;
1631}
1632#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1633
1634static int
1635cifs_push_locks(struct cifsFileInfo *cfile)
1636{
1637	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1638	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1639	int rc = 0;
1640#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1641	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1642#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1643
1644	/* we are going to update can_cache_brlcks here - need a write access */
1645	cifs_down_write(&cinode->lock_sem);
1646	if (!cinode->can_cache_brlcks) {
1647		up_write(&cinode->lock_sem);
1648		return rc;
1649	}
1650
1651#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1652	if (cap_unix(tcon->ses) &&
1653	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1654	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1655		rc = cifs_push_posix_locks(cfile);
1656	else
1657#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1658		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1659
1660	cinode->can_cache_brlcks = false;
1661	up_write(&cinode->lock_sem);
1662	return rc;
1663}
1664
1665static void
1666cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1667		bool *wait_flag, struct TCP_Server_Info *server)
1668{
1669	if (flock->fl_flags & FL_POSIX)
1670		cifs_dbg(FYI, "Posix\n");
1671	if (flock->fl_flags & FL_FLOCK)
1672		cifs_dbg(FYI, "Flock\n");
1673	if (flock->fl_flags & FL_SLEEP) {
1674		cifs_dbg(FYI, "Blocking lock\n");
1675		*wait_flag = true;
1676	}
1677	if (flock->fl_flags & FL_ACCESS)
1678		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1679	if (flock->fl_flags & FL_LEASE)
1680		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1681	if (flock->fl_flags &
1682	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1683	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1684		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1685
1686	*type = server->vals->large_lock_type;
1687	if (flock->fl_type == F_WRLCK) {
1688		cifs_dbg(FYI, "F_WRLCK\n");
1689		*type |= server->vals->exclusive_lock_type;
1690		*lock = 1;
1691	} else if (flock->fl_type == F_UNLCK) {
1692		cifs_dbg(FYI, "F_UNLCK\n");
1693		*type |= server->vals->unlock_lock_type;
1694		*unlock = 1;
1695		/* Check if unlock includes more than one lock range */
1696	} else if (flock->fl_type == F_RDLCK) {
1697		cifs_dbg(FYI, "F_RDLCK\n");
1698		*type |= server->vals->shared_lock_type;
1699		*lock = 1;
1700	} else if (flock->fl_type == F_EXLCK) {
1701		cifs_dbg(FYI, "F_EXLCK\n");
1702		*type |= server->vals->exclusive_lock_type;
1703		*lock = 1;
1704	} else if (flock->fl_type == F_SHLCK) {
1705		cifs_dbg(FYI, "F_SHLCK\n");
1706		*type |= server->vals->shared_lock_type;
1707		*lock = 1;
1708	} else
1709		cifs_dbg(FYI, "Unknown type of lock\n");
1710}
1711
1712static int
1713cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1714	   bool wait_flag, bool posix_lck, unsigned int xid)
1715{
1716	int rc = 0;
1717	__u64 length = cifs_flock_len(flock);
1718	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1719	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1720	struct TCP_Server_Info *server = tcon->ses->server;
1721#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1722	__u16 netfid = cfile->fid.netfid;
1723
1724	if (posix_lck) {
1725		int posix_lock_type;
1726
1727		rc = cifs_posix_lock_test(file, flock);
1728		if (!rc)
1729			return rc;
1730
1731		if (type & server->vals->shared_lock_type)
1732			posix_lock_type = CIFS_RDLCK;
1733		else
1734			posix_lock_type = CIFS_WRLCK;
1735		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1736				      hash_lockowner(flock->fl_owner),
1737				      flock->fl_start, length, flock,
1738				      posix_lock_type, wait_flag);
1739		return rc;
1740	}
1741#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1742
1743	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1744	if (!rc)
1745		return rc;
1746
1747	/* BB we could chain these into one lock request BB */
1748	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1749				    1, 0, false);
1750	if (rc == 0) {
1751		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1752					    type, 0, 1, false);
1753		flock->fl_type = F_UNLCK;
1754		if (rc != 0)
1755			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1756				 rc);
1757		return 0;
1758	}
1759
1760	if (type & server->vals->shared_lock_type) {
1761		flock->fl_type = F_WRLCK;
1762		return 0;
1763	}
1764
1765	type &= ~server->vals->exclusive_lock_type;
1766
1767	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1768				    type | server->vals->shared_lock_type,
1769				    1, 0, false);
1770	if (rc == 0) {
1771		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1772			type | server->vals->shared_lock_type, 0, 1, false);
1773		flock->fl_type = F_RDLCK;
1774		if (rc != 0)
1775			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1776				 rc);
1777	} else
1778		flock->fl_type = F_WRLCK;
1779
1780	return 0;
1781}
1782
1783void
1784cifs_move_llist(struct list_head *source, struct list_head *dest)
1785{
1786	struct list_head *li, *tmp;
1787	list_for_each_safe(li, tmp, source)
1788		list_move(li, dest);
1789}
1790
1791void
1792cifs_free_llist(struct list_head *llist)
1793{
1794	struct cifsLockInfo *li, *tmp;
1795	list_for_each_entry_safe(li, tmp, llist, llist) {
1796		cifs_del_lock_waiters(li);
1797		list_del(&li->llist);
1798		kfree(li);
1799	}
1800}
1801
1802#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1803int
1804cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1805		  unsigned int xid)
1806{
1807	int rc = 0, stored_rc;
1808	static const int types[] = {
1809		LOCKING_ANDX_LARGE_FILES,
1810		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1811	};
1812	unsigned int i;
1813	unsigned int max_num, num, max_buf;
1814	LOCKING_ANDX_RANGE *buf, *cur;
1815	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1816	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1817	struct cifsLockInfo *li, *tmp;
1818	__u64 length = cifs_flock_len(flock);
1819	struct list_head tmp_llist;
1820
1821	INIT_LIST_HEAD(&tmp_llist);
1822
1823	/*
1824	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1825	 * and check it before using.
1826	 */
1827	max_buf = tcon->ses->server->maxBuf;
1828	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1829		return -EINVAL;
1830
1831	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1832		     PAGE_SIZE);
1833	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1834			PAGE_SIZE);
1835	max_num = (max_buf - sizeof(struct smb_hdr)) /
1836						sizeof(LOCKING_ANDX_RANGE);
1837	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1838	if (!buf)
1839		return -ENOMEM;
1840
1841	cifs_down_write(&cinode->lock_sem);
1842	for (i = 0; i < 2; i++) {
1843		cur = buf;
1844		num = 0;
1845		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1846			if (flock->fl_start > li->offset ||
1847			    (flock->fl_start + length) <
1848			    (li->offset + li->length))
1849				continue;
1850			if (current->tgid != li->pid)
1851				continue;
1852			if (types[i] != li->type)
1853				continue;
1854			if (cinode->can_cache_brlcks) {
1855				/*
1856				 * We can cache brlock requests - simply remove
1857				 * a lock from the file's list.
1858				 */
1859				list_del(&li->llist);
1860				cifs_del_lock_waiters(li);
1861				kfree(li);
1862				continue;
1863			}
1864			cur->Pid = cpu_to_le16(li->pid);
1865			cur->LengthLow = cpu_to_le32((u32)li->length);
1866			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1867			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1868			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1869			/*
1870			 * We need to save a lock here to let us add it again to
1871			 * the file's list if the unlock range request fails on
1872			 * the server.
1873			 */
1874			list_move(&li->llist, &tmp_llist);
1875			if (++num == max_num) {
1876				stored_rc = cifs_lockv(xid, tcon,
1877						       cfile->fid.netfid,
1878						       li->type, num, 0, buf);
1879				if (stored_rc) {
1880					/*
1881					 * We failed on the unlock range
1882					 * request - add all locks from the tmp
1883					 * list to the head of the file's list.
1884					 */
1885					cifs_move_llist(&tmp_llist,
1886							&cfile->llist->locks);
1887					rc = stored_rc;
1888				} else
1889					/*
1890					 * The unlock range request succeed -
1891					 * free the tmp list.
1892					 */
1893					cifs_free_llist(&tmp_llist);
1894				cur = buf;
1895				num = 0;
1896			} else
1897				cur++;
1898		}
1899		if (num) {
1900			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1901					       types[i], num, 0, buf);
1902			if (stored_rc) {
1903				cifs_move_llist(&tmp_llist,
1904						&cfile->llist->locks);
1905				rc = stored_rc;
1906			} else
1907				cifs_free_llist(&tmp_llist);
1908		}
1909	}
1910
1911	up_write(&cinode->lock_sem);
1912	kfree(buf);
1913	return rc;
1914}
1915#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1916
1917static int
1918cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1919	   bool wait_flag, bool posix_lck, int lock, int unlock,
1920	   unsigned int xid)
1921{
1922	int rc = 0;
1923	__u64 length = cifs_flock_len(flock);
1924	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1925	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1926	struct TCP_Server_Info *server = tcon->ses->server;
1927	struct inode *inode = d_inode(cfile->dentry);
1928
1929#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1930	if (posix_lck) {
1931		int posix_lock_type;
1932
1933		rc = cifs_posix_lock_set(file, flock);
1934		if (rc <= FILE_LOCK_DEFERRED)
1935			return rc;
1936
1937		if (type & server->vals->shared_lock_type)
1938			posix_lock_type = CIFS_RDLCK;
1939		else
1940			posix_lock_type = CIFS_WRLCK;
1941
1942		if (unlock == 1)
1943			posix_lock_type = CIFS_UNLCK;
1944
1945		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1946				      hash_lockowner(flock->fl_owner),
1947				      flock->fl_start, length,
1948				      NULL, posix_lock_type, wait_flag);
1949		goto out;
1950	}
1951#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1952	if (lock) {
1953		struct cifsLockInfo *lock;
1954
1955		lock = cifs_lock_init(flock->fl_start, length, type,
1956				      flock->fl_flags);
1957		if (!lock)
1958			return -ENOMEM;
1959
1960		rc = cifs_lock_add_if(cfile, lock, wait_flag);
1961		if (rc < 0) {
1962			kfree(lock);
1963			return rc;
1964		}
1965		if (!rc)
1966			goto out;
1967
1968		/*
1969		 * Windows 7 server can delay breaking lease from read to None
1970		 * if we set a byte-range lock on a file - break it explicitly
1971		 * before sending the lock to the server to be sure the next
1972		 * read won't conflict with non-overlapted locks due to
1973		 * pagereading.
1974		 */
1975		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1976					CIFS_CACHE_READ(CIFS_I(inode))) {
1977			cifs_zap_mapping(inode);
1978			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1979				 inode);
1980			CIFS_I(inode)->oplock = 0;
1981		}
1982
1983		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1984					    type, 1, 0, wait_flag);
1985		if (rc) {
1986			kfree(lock);
1987			return rc;
1988		}
1989
1990		cifs_lock_add(cfile, lock);
1991	} else if (unlock)
1992		rc = server->ops->mand_unlock_range(cfile, flock, xid);
1993
1994out:
1995	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1996		/*
1997		 * If this is a request to remove all locks because we
1998		 * are closing the file, it doesn't matter if the
1999		 * unlocking failed as both cifs.ko and the SMB server
2000		 * remove the lock on file close
2001		 */
2002		if (rc) {
2003			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2004			if (!(flock->fl_flags & FL_CLOSE))
2005				return rc;
2006		}
2007		rc = locks_lock_file_wait(file, flock);
2008	}
2009	return rc;
2010}
2011
2012int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2013{
2014	int rc, xid;
2015	int lock = 0, unlock = 0;
2016	bool wait_flag = false;
2017	bool posix_lck = false;
2018	struct cifs_sb_info *cifs_sb;
2019	struct cifs_tcon *tcon;
2020	struct cifsFileInfo *cfile;
2021	__u32 type;
2022
2023	xid = get_xid();
2024
2025	if (!(fl->fl_flags & FL_FLOCK)) {
2026		rc = -ENOLCK;
2027		free_xid(xid);
2028		return rc;
2029	}
2030
2031	cfile = (struct cifsFileInfo *)file->private_data;
2032	tcon = tlink_tcon(cfile->tlink);
2033
2034	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2035			tcon->ses->server);
2036	cifs_sb = CIFS_FILE_SB(file);
2037
2038	if (cap_unix(tcon->ses) &&
2039	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2040	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2041		posix_lck = true;
2042
2043	if (!lock && !unlock) {
2044		/*
2045		 * if no lock or unlock then nothing to do since we do not
2046		 * know what it is
2047		 */
2048		rc = -EOPNOTSUPP;
2049		free_xid(xid);
2050		return rc;
2051	}
2052
2053	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2054			xid);
2055	free_xid(xid);
2056	return rc;
2057
2058
2059}
2060
2061int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2062{
2063	int rc, xid;
2064	int lock = 0, unlock = 0;
2065	bool wait_flag = false;
2066	bool posix_lck = false;
2067	struct cifs_sb_info *cifs_sb;
2068	struct cifs_tcon *tcon;
2069	struct cifsFileInfo *cfile;
2070	__u32 type;
2071
2072	rc = -EACCES;
2073	xid = get_xid();
2074
2075	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2076		 flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2077		 (long long)flock->fl_end);
2078
2079	cfile = (struct cifsFileInfo *)file->private_data;
2080	tcon = tlink_tcon(cfile->tlink);
2081
2082	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2083			tcon->ses->server);
2084	cifs_sb = CIFS_FILE_SB(file);
2085	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2086
2087	if (cap_unix(tcon->ses) &&
2088	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2089	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2090		posix_lck = true;
2091	/*
2092	 * BB add code here to normalize offset and length to account for
2093	 * negative length which we can not accept over the wire.
2094	 */
2095	if (IS_GETLK(cmd)) {
2096		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2097		free_xid(xid);
2098		return rc;
2099	}
2100
2101	if (!lock && !unlock) {
2102		/*
2103		 * if no lock or unlock then nothing to do since we do not
2104		 * know what it is
2105		 */
2106		free_xid(xid);
2107		return -EOPNOTSUPP;
2108	}
2109
2110	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2111			xid);
2112	free_xid(xid);
2113	return rc;
2114}
2115
2116/*
2117 * update the file size (if needed) after a write. Should be called with
2118 * the inode->i_lock held
2119 */
2120void
2121cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2122		      unsigned int bytes_written)
2123{
2124	loff_t end_of_write = offset + bytes_written;
2125
2126	if (end_of_write > cifsi->netfs.remote_i_size)
2127		netfs_resize_file(&cifsi->netfs, end_of_write, true);
2128}
2129
2130static ssize_t
2131cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2132	   size_t write_size, loff_t *offset)
2133{
2134	int rc = 0;
2135	unsigned int bytes_written = 0;
2136	unsigned int total_written;
2137	struct cifs_tcon *tcon;
2138	struct TCP_Server_Info *server;
2139	unsigned int xid;
2140	struct dentry *dentry = open_file->dentry;
2141	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2142	struct cifs_io_parms io_parms = {0};
2143
2144	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2145		 write_size, *offset, dentry);
2146
2147	tcon = tlink_tcon(open_file->tlink);
2148	server = tcon->ses->server;
2149
2150	if (!server->ops->sync_write)
2151		return -ENOSYS;
2152
2153	xid = get_xid();
2154
2155	for (total_written = 0; write_size > total_written;
2156	     total_written += bytes_written) {
2157		rc = -EAGAIN;
2158		while (rc == -EAGAIN) {
2159			struct kvec iov[2];
2160			unsigned int len;
2161
2162			if (open_file->invalidHandle) {
2163				/* we could deadlock if we called
2164				   filemap_fdatawait from here so tell
2165				   reopen_file not to flush data to
2166				   server now */
2167				rc = cifs_reopen_file(open_file, false);
2168				if (rc != 0)
2169					break;
2170			}
2171
2172			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2173				  (unsigned int)write_size - total_written);
2174			/* iov[0] is reserved for smb header */
2175			iov[1].iov_base = (char *)write_data + total_written;
2176			iov[1].iov_len = len;
2177			io_parms.pid = pid;
2178			io_parms.tcon = tcon;
2179			io_parms.offset = *offset;
2180			io_parms.length = len;
2181			rc = server->ops->sync_write(xid, &open_file->fid,
2182					&io_parms, &bytes_written, iov, 1);
2183		}
2184		if (rc || (bytes_written == 0)) {
2185			if (total_written)
2186				break;
2187			else {
2188				free_xid(xid);
2189				return rc;
2190			}
2191		} else {
2192			spin_lock(&d_inode(dentry)->i_lock);
2193			cifs_update_eof(cifsi, *offset, bytes_written);
2194			spin_unlock(&d_inode(dentry)->i_lock);
2195			*offset += bytes_written;
2196		}
2197	}
2198
2199	cifs_stats_bytes_written(tcon, total_written);
2200
2201	if (total_written > 0) {
2202		spin_lock(&d_inode(dentry)->i_lock);
2203		if (*offset > d_inode(dentry)->i_size) {
2204			i_size_write(d_inode(dentry), *offset);
2205			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2206		}
2207		spin_unlock(&d_inode(dentry)->i_lock);
2208	}
2209	mark_inode_dirty_sync(d_inode(dentry));
2210	free_xid(xid);
2211	return total_written;
2212}
2213
2214struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2215					bool fsuid_only)
2216{
2217	struct cifsFileInfo *open_file = NULL;
2218	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2219
2220	/* only filter by fsuid on multiuser mounts */
2221	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2222		fsuid_only = false;
2223
2224	spin_lock(&cifs_inode->open_file_lock);
2225	/* we could simply get the first_list_entry since write-only entries
2226	   are always at the end of the list but since the first entry might
2227	   have a close pending, we go through the whole list */
2228	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2229		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2230			continue;
2231		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2232			if ((!open_file->invalidHandle)) {
2233				/* found a good file */
2234				/* lock it so it will not be closed on us */
2235				cifsFileInfo_get(open_file);
2236				spin_unlock(&cifs_inode->open_file_lock);
2237				return open_file;
2238			} /* else might as well continue, and look for
2239			     another, or simply have the caller reopen it
2240			     again rather than trying to fix this handle */
2241		} else /* write only file */
2242			break; /* write only files are last so must be done */
2243	}
2244	spin_unlock(&cifs_inode->open_file_lock);
2245	return NULL;
2246}
2247
2248/* Return -EBADF if no handle is found and general rc otherwise */
2249int
2250cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2251		       struct cifsFileInfo **ret_file)
2252{
2253	struct cifsFileInfo *open_file, *inv_file = NULL;
2254	struct cifs_sb_info *cifs_sb;
2255	bool any_available = false;
2256	int rc = -EBADF;
2257	unsigned int refind = 0;
2258	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2259	bool with_delete = flags & FIND_WR_WITH_DELETE;
2260	*ret_file = NULL;
2261
2262	/*
2263	 * Having a null inode here (because mapping->host was set to zero by
2264	 * the VFS or MM) should not happen but we had reports of on oops (due
2265	 * to it being zero) during stress testcases so we need to check for it
2266	 */
2267
2268	if (cifs_inode == NULL) {
2269		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2270		dump_stack();
2271		return rc;
2272	}
2273
2274	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2275
2276	/* only filter by fsuid on multiuser mounts */
2277	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2278		fsuid_only = false;
2279
2280	spin_lock(&cifs_inode->open_file_lock);
2281refind_writable:
2282	if (refind > MAX_REOPEN_ATT) {
2283		spin_unlock(&cifs_inode->open_file_lock);
2284		return rc;
2285	}
2286	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2287		if (!any_available && open_file->pid != current->tgid)
2288			continue;
2289		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2290			continue;
2291		if (with_delete && !(open_file->fid.access & DELETE))
2292			continue;
2293		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2294			if (!open_file->invalidHandle) {
2295				/* found a good writable file */
2296				cifsFileInfo_get(open_file);
2297				spin_unlock(&cifs_inode->open_file_lock);
2298				*ret_file = open_file;
2299				return 0;
2300			} else {
2301				if (!inv_file)
2302					inv_file = open_file;
2303			}
2304		}
2305	}
2306	/* couldn't find useable FH with same pid, try any available */
2307	if (!any_available) {
2308		any_available = true;
2309		goto refind_writable;
2310	}
2311
2312	if (inv_file) {
2313		any_available = false;
2314		cifsFileInfo_get(inv_file);
2315	}
2316
2317	spin_unlock(&cifs_inode->open_file_lock);
2318
2319	if (inv_file) {
2320		rc = cifs_reopen_file(inv_file, false);
2321		if (!rc) {
2322			*ret_file = inv_file;
2323			return 0;
2324		}
2325
2326		spin_lock(&cifs_inode->open_file_lock);
2327		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2328		spin_unlock(&cifs_inode->open_file_lock);
2329		cifsFileInfo_put(inv_file);
2330		++refind;
2331		inv_file = NULL;
2332		spin_lock(&cifs_inode->open_file_lock);
2333		goto refind_writable;
2334	}
2335
2336	return rc;
2337}
2338
2339struct cifsFileInfo *
2340find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2341{
2342	struct cifsFileInfo *cfile;
2343	int rc;
2344
2345	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2346	if (rc)
2347		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2348
2349	return cfile;
2350}
2351
2352int
2353cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2354		       int flags,
2355		       struct cifsFileInfo **ret_file)
2356{
2357	struct cifsFileInfo *cfile;
2358	void *page = alloc_dentry_path();
2359
2360	*ret_file = NULL;
2361
2362	spin_lock(&tcon->open_file_lock);
2363	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2364		struct cifsInodeInfo *cinode;
2365		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2366		if (IS_ERR(full_path)) {
2367			spin_unlock(&tcon->open_file_lock);
2368			free_dentry_path(page);
2369			return PTR_ERR(full_path);
2370		}
2371		if (strcmp(full_path, name))
2372			continue;
2373
2374		cinode = CIFS_I(d_inode(cfile->dentry));
2375		spin_unlock(&tcon->open_file_lock);
2376		free_dentry_path(page);
2377		return cifs_get_writable_file(cinode, flags, ret_file);
2378	}
2379
2380	spin_unlock(&tcon->open_file_lock);
2381	free_dentry_path(page);
2382	return -ENOENT;
2383}
2384
2385int
2386cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2387		       struct cifsFileInfo **ret_file)
2388{
2389	struct cifsFileInfo *cfile;
2390	void *page = alloc_dentry_path();
2391
2392	*ret_file = NULL;
2393
2394	spin_lock(&tcon->open_file_lock);
2395	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2396		struct cifsInodeInfo *cinode;
2397		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2398		if (IS_ERR(full_path)) {
2399			spin_unlock(&tcon->open_file_lock);
2400			free_dentry_path(page);
2401			return PTR_ERR(full_path);
2402		}
2403		if (strcmp(full_path, name))
2404			continue;
2405
2406		cinode = CIFS_I(d_inode(cfile->dentry));
2407		spin_unlock(&tcon->open_file_lock);
2408		free_dentry_path(page);
2409		*ret_file = find_readable_file(cinode, 0);
2410		return *ret_file ? 0 : -ENOENT;
2411	}
2412
2413	spin_unlock(&tcon->open_file_lock);
2414	free_dentry_path(page);
2415	return -ENOENT;
2416}
2417
2418void
2419cifs_writedata_release(struct kref *refcount)
2420{
2421	struct cifs_writedata *wdata = container_of(refcount,
2422					struct cifs_writedata, refcount);
2423#ifdef CONFIG_CIFS_SMB_DIRECT
2424	if (wdata->mr) {
2425		smbd_deregister_mr(wdata->mr);
2426		wdata->mr = NULL;
2427	}
2428#endif
2429
2430	if (wdata->cfile)
2431		cifsFileInfo_put(wdata->cfile);
2432
2433	kfree(wdata);
2434}
2435
2436/*
2437 * Write failed with a retryable error. Resend the write request. It's also
2438 * possible that the page was redirtied so re-clean the page.
2439 */
2440static void
2441cifs_writev_requeue(struct cifs_writedata *wdata)
2442{
2443	int rc = 0;
2444	struct inode *inode = d_inode(wdata->cfile->dentry);
2445	struct TCP_Server_Info *server;
2446	unsigned int rest_len = wdata->bytes;
2447	loff_t fpos = wdata->offset;
2448
2449	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2450	do {
2451		struct cifs_writedata *wdata2;
2452		unsigned int wsize, cur_len;
2453
2454		wsize = server->ops->wp_retry_size(inode);
2455		if (wsize < rest_len) {
2456			if (wsize < PAGE_SIZE) {
2457				rc = -EOPNOTSUPP;
2458				break;
2459			}
2460			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2461		} else {
2462			cur_len = rest_len;
2463		}
2464
2465		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2466		if (!wdata2) {
2467			rc = -ENOMEM;
2468			break;
2469		}
2470
2471		wdata2->sync_mode = wdata->sync_mode;
2472		wdata2->offset	= fpos;
2473		wdata2->bytes	= cur_len;
2474		wdata2->iter	= wdata->iter;
2475
2476		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2477		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2478
2479		if (iov_iter_is_xarray(&wdata2->iter))
2480			/* Check for pages having been redirtied and clean
2481			 * them.  We can do this by walking the xarray.  If
2482			 * it's not an xarray, then it's a DIO and we shouldn't
2483			 * be mucking around with the page bits.
2484			 */
2485			cifs_undirty_folios(inode, fpos, cur_len);
2486
2487		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2488					    &wdata2->cfile);
2489		if (!wdata2->cfile) {
2490			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2491				 rc);
2492			if (!is_retryable_error(rc))
2493				rc = -EBADF;
2494		} else {
2495			wdata2->pid = wdata2->cfile->pid;
2496			rc = server->ops->async_writev(wdata2,
2497						       cifs_writedata_release);
2498		}
2499
2500		kref_put(&wdata2->refcount, cifs_writedata_release);
2501		if (rc) {
2502			if (is_retryable_error(rc))
2503				continue;
2504			fpos += cur_len;
2505			rest_len -= cur_len;
2506			break;
2507		}
2508
2509		fpos += cur_len;
2510		rest_len -= cur_len;
2511	} while (rest_len > 0);
2512
2513	/* Clean up remaining pages from the original wdata */
2514	if (iov_iter_is_xarray(&wdata->iter))
2515		cifs_pages_write_failed(inode, fpos, rest_len);
2516
2517	if (rc != 0 && !is_retryable_error(rc))
2518		mapping_set_error(inode->i_mapping, rc);
2519	kref_put(&wdata->refcount, cifs_writedata_release);
2520}
2521
2522void
2523cifs_writev_complete(struct work_struct *work)
2524{
2525	struct cifs_writedata *wdata = container_of(work,
2526						struct cifs_writedata, work);
2527	struct inode *inode = d_inode(wdata->cfile->dentry);
2528
2529	if (wdata->result == 0) {
2530		spin_lock(&inode->i_lock);
2531		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2532		spin_unlock(&inode->i_lock);
2533		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2534					 wdata->bytes);
2535	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2536		return cifs_writev_requeue(wdata);
2537
2538	if (wdata->result == -EAGAIN)
2539		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2540	else if (wdata->result < 0)
2541		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2542	else
2543		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2544
2545	if (wdata->result != -EAGAIN)
2546		mapping_set_error(inode->i_mapping, wdata->result);
2547	kref_put(&wdata->refcount, cifs_writedata_release);
2548}
2549
2550struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2551{
2552	struct cifs_writedata *wdata;
2553
2554	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2555	if (wdata != NULL) {
2556		kref_init(&wdata->refcount);
2557		INIT_LIST_HEAD(&wdata->list);
2558		init_completion(&wdata->done);
2559		INIT_WORK(&wdata->work, complete);
2560	}
2561	return wdata;
2562}
2563
2564static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2565{
2566	struct address_space *mapping = page->mapping;
2567	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2568	char *write_data;
2569	int rc = -EFAULT;
2570	int bytes_written = 0;
2571	struct inode *inode;
2572	struct cifsFileInfo *open_file;
2573
2574	if (!mapping || !mapping->host)
2575		return -EFAULT;
2576
2577	inode = page->mapping->host;
2578
2579	offset += (loff_t)from;
2580	write_data = kmap(page);
2581	write_data += from;
2582
2583	if ((to > PAGE_SIZE) || (from > to)) {
2584		kunmap(page);
2585		return -EIO;
2586	}
2587
2588	/* racing with truncate? */
2589	if (offset > mapping->host->i_size) {
2590		kunmap(page);
2591		return 0; /* don't care */
2592	}
2593
2594	/* check to make sure that we are not extending the file */
2595	if (mapping->host->i_size - offset < (loff_t)to)
2596		to = (unsigned)(mapping->host->i_size - offset);
2597
2598	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2599				    &open_file);
2600	if (!rc) {
2601		bytes_written = cifs_write(open_file, open_file->pid,
2602					   write_data, to - from, &offset);
2603		cifsFileInfo_put(open_file);
2604		/* Does mm or vfs already set times? */
2605		simple_inode_init_ts(inode);
2606		if ((bytes_written > 0) && (offset))
2607			rc = 0;
2608		else if (bytes_written < 0)
2609			rc = bytes_written;
2610		else
2611			rc = -EFAULT;
2612	} else {
2613		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2614		if (!is_retryable_error(rc))
2615			rc = -EIO;
2616	}
2617
2618	kunmap(page);
2619	return rc;
2620}
2621
2622/*
2623 * Extend the region to be written back to include subsequent contiguously
2624 * dirty pages if possible, but don't sleep while doing so.
2625 */
2626static void cifs_extend_writeback(struct address_space *mapping,
2627				  long *_count,
2628				  loff_t start,
2629				  int max_pages,
2630				  size_t max_len,
2631				  unsigned int *_len)
2632{
2633	struct folio_batch batch;
2634	struct folio *folio;
2635	unsigned int psize, nr_pages;
2636	size_t len = *_len;
2637	pgoff_t index = (start + len) / PAGE_SIZE;
2638	bool stop = true;
2639	unsigned int i;
2640	XA_STATE(xas, &mapping->i_pages, index);
2641
2642	folio_batch_init(&batch);
2643
2644	do {
2645		/* Firstly, we gather up a batch of contiguous dirty pages
2646		 * under the RCU read lock - but we can't clear the dirty flags
2647		 * there if any of those pages are mapped.
2648		 */
2649		rcu_read_lock();
2650
2651		xas_for_each(&xas, folio, ULONG_MAX) {
2652			stop = true;
2653			if (xas_retry(&xas, folio))
2654				continue;
2655			if (xa_is_value(folio))
2656				break;
2657			if (folio->index != index)
2658				break;
2659			if (!folio_try_get_rcu(folio)) {
2660				xas_reset(&xas);
2661				continue;
2662			}
2663			nr_pages = folio_nr_pages(folio);
2664			if (nr_pages > max_pages)
2665				break;
2666
2667			/* Has the page moved or been split? */
2668			if (unlikely(folio != xas_reload(&xas))) {
2669				folio_put(folio);
2670				break;
2671			}
2672
2673			if (!folio_trylock(folio)) {
2674				folio_put(folio);
2675				break;
2676			}
2677			if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
2678				folio_unlock(folio);
2679				folio_put(folio);
2680				break;
2681			}
2682
2683			max_pages -= nr_pages;
2684			psize = folio_size(folio);
2685			len += psize;
2686			stop = false;
2687			if (max_pages <= 0 || len >= max_len || *_count <= 0)
2688				stop = true;
2689
2690			index += nr_pages;
2691			if (!folio_batch_add(&batch, folio))
2692				break;
2693			if (stop)
2694				break;
2695		}
2696
2697		if (!stop)
2698			xas_pause(&xas);
2699		rcu_read_unlock();
2700
2701		/* Now, if we obtained any pages, we can shift them to being
2702		 * writable and mark them for caching.
2703		 */
2704		if (!folio_batch_count(&batch))
2705			break;
2706
2707		for (i = 0; i < folio_batch_count(&batch); i++) {
2708			folio = batch.folios[i];
2709			/* The folio should be locked, dirty and not undergoing
2710			 * writeback from the loop above.
2711			 */
2712			if (!folio_clear_dirty_for_io(folio))
2713				WARN_ON(1);
2714			folio_start_writeback(folio);
2715
2716			*_count -= folio_nr_pages(folio);
2717			folio_unlock(folio);
2718		}
2719
2720		folio_batch_release(&batch);
2721		cond_resched();
2722	} while (!stop);
2723
2724	*_len = len;
2725}
2726
2727/*
2728 * Write back the locked page and any subsequent non-locked dirty pages.
2729 */
2730static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2731						 struct writeback_control *wbc,
2732						 struct folio *folio,
2733						 loff_t start, loff_t end)
2734{
2735	struct inode *inode = mapping->host;
2736	struct TCP_Server_Info *server;
2737	struct cifs_writedata *wdata;
2738	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2739	struct cifs_credits credits_on_stack;
2740	struct cifs_credits *credits = &credits_on_stack;
2741	struct cifsFileInfo *cfile = NULL;
2742	unsigned int xid, wsize, len;
2743	loff_t i_size = i_size_read(inode);
2744	size_t max_len;
2745	long count = wbc->nr_to_write;
2746	int rc;
2747
2748	/* The folio should be locked, dirty and not undergoing writeback. */
2749	folio_start_writeback(folio);
2750
2751	count -= folio_nr_pages(folio);
2752	len = folio_size(folio);
2753
2754	xid = get_xid();
2755	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2756
2757	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2758	if (rc) {
2759		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2760		goto err_xid;
2761	}
2762
2763	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2764					   &wsize, credits);
2765	if (rc != 0)
2766		goto err_close;
2767
2768	wdata = cifs_writedata_alloc(cifs_writev_complete);
2769	if (!wdata) {
2770		rc = -ENOMEM;
2771		goto err_uncredit;
2772	}
2773
2774	wdata->sync_mode = wbc->sync_mode;
2775	wdata->offset = folio_pos(folio);
2776	wdata->pid = cfile->pid;
2777	wdata->credits = credits_on_stack;
2778	wdata->cfile = cfile;
2779	wdata->server = server;
2780	cfile = NULL;
2781
2782	/* Find all consecutive lockable dirty pages, stopping when we find a
2783	 * page that is not immediately lockable, is not dirty or is missing,
2784	 * or we reach the end of the range.
2785	 */
2786	if (start < i_size) {
2787		/* Trim the write to the EOF; the extra data is ignored.  Also
2788		 * put an upper limit on the size of a single storedata op.
2789		 */
2790		max_len = wsize;
2791		max_len = min_t(unsigned long long, max_len, end - start + 1);
2792		max_len = min_t(unsigned long long, max_len, i_size - start);
2793
2794		if (len < max_len) {
2795			int max_pages = INT_MAX;
2796
2797#ifdef CONFIG_CIFS_SMB_DIRECT
2798			if (server->smbd_conn)
2799				max_pages = server->smbd_conn->max_frmr_depth;
2800#endif
2801			max_pages -= folio_nr_pages(folio);
2802
2803			if (max_pages > 0)
2804				cifs_extend_writeback(mapping, &count, start,
2805						      max_pages, max_len, &len);
2806		}
2807		len = min_t(loff_t, len, max_len);
2808	}
2809
2810	wdata->bytes = len;
2811
2812	/* We now have a contiguous set of dirty pages, each with writeback
2813	 * set; the first page is still locked at this point, but all the rest
2814	 * have been unlocked.
2815	 */
2816	folio_unlock(folio);
2817
2818	if (start < i_size) {
2819		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2820				start, len);
2821
2822		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2823		if (rc)
2824			goto err_wdata;
2825
2826		if (wdata->cfile->invalidHandle)
2827			rc = -EAGAIN;
2828		else
2829			rc = wdata->server->ops->async_writev(wdata,
2830							      cifs_writedata_release);
2831		if (rc >= 0) {
2832			kref_put(&wdata->refcount, cifs_writedata_release);
2833			goto err_close;
2834		}
2835	} else {
2836		/* The dirty region was entirely beyond the EOF. */
2837		cifs_pages_written_back(inode, start, len);
2838		rc = 0;
2839	}
2840
2841err_wdata:
2842	kref_put(&wdata->refcount, cifs_writedata_release);
2843err_uncredit:
2844	add_credits_and_wake_if(server, credits, 0);
2845err_close:
2846	if (cfile)
2847		cifsFileInfo_put(cfile);
2848err_xid:
2849	free_xid(xid);
2850	if (rc == 0) {
2851		wbc->nr_to_write = count;
2852		rc = len;
2853	} else if (is_retryable_error(rc)) {
2854		cifs_pages_write_redirty(inode, start, len);
2855	} else {
2856		cifs_pages_write_failed(inode, start, len);
2857		mapping_set_error(mapping, rc);
2858	}
2859	/* Indication to update ctime and mtime as close is deferred */
2860	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2861	return rc;
2862}
2863
2864/*
2865 * write a region of pages back to the server
2866 */
2867static int cifs_writepages_region(struct address_space *mapping,
2868				  struct writeback_control *wbc,
2869				  loff_t start, loff_t end, loff_t *_next)
2870{
2871	struct folio_batch fbatch;
2872	int skips = 0;
2873
2874	folio_batch_init(&fbatch);
2875	do {
2876		int nr;
2877		pgoff_t index = start / PAGE_SIZE;
2878
2879		nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
2880					    PAGECACHE_TAG_DIRTY, &fbatch);
2881		if (!nr)
2882			break;
2883
2884		for (int i = 0; i < nr; i++) {
2885			ssize_t ret;
2886			struct folio *folio = fbatch.folios[i];
2887
2888redo_folio:
2889			start = folio_pos(folio); /* May regress with THPs */
2890
2891			/* At this point we hold neither the i_pages lock nor the
2892			 * page lock: the page may be truncated or invalidated
2893			 * (changing page->mapping to NULL), or even swizzled
2894			 * back from swapper_space to tmpfs file mapping
2895			 */
2896			if (wbc->sync_mode != WB_SYNC_NONE) {
2897				ret = folio_lock_killable(folio);
2898				if (ret < 0)
2899					goto write_error;
2900			} else {
2901				if (!folio_trylock(folio))
2902					goto skip_write;
2903			}
2904
2905			if (folio->mapping != mapping ||
2906			    !folio_test_dirty(folio)) {
2907				start += folio_size(folio);
2908				folio_unlock(folio);
2909				continue;
2910			}
2911
2912			if (folio_test_writeback(folio) ||
2913			    folio_test_fscache(folio)) {
2914				folio_unlock(folio);
2915				if (wbc->sync_mode == WB_SYNC_NONE)
2916					goto skip_write;
2917
2918				folio_wait_writeback(folio);
2919#ifdef CONFIG_CIFS_FSCACHE
2920				folio_wait_fscache(folio);
2921#endif
2922				goto redo_folio;
2923			}
2924
2925			if (!folio_clear_dirty_for_io(folio))
2926				/* We hold the page lock - it should've been dirty. */
2927				WARN_ON(1);
2928
2929			ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
2930			if (ret < 0)
2931				goto write_error;
2932
2933			start += ret;
2934			continue;
2935
2936write_error:
2937			folio_batch_release(&fbatch);
2938			*_next = start;
2939			return ret;
2940
2941skip_write:
2942			/*
2943			 * Too many skipped writes, or need to reschedule?
2944			 * Treat it as a write error without an error code.
2945			 */
2946			if (skips >= 5 || need_resched()) {
2947				ret = 0;
2948				goto write_error;
2949			}
2950
2951			/* Otherwise, just skip that folio and go on to the next */
2952			skips++;
2953			start += folio_size(folio);
2954			continue;
2955		}
2956
2957		folio_batch_release(&fbatch);		
2958		cond_resched();
2959	} while (wbc->nr_to_write > 0);
2960
2961	*_next = start;
2962	return 0;
2963}
2964
2965/*
2966 * Write some of the pending data back to the server
2967 */
2968static int cifs_writepages(struct address_space *mapping,
2969			   struct writeback_control *wbc)
2970{
2971	loff_t start, next;
2972	int ret;
2973
2974	/* We have to be careful as we can end up racing with setattr()
2975	 * truncating the pagecache since the caller doesn't take a lock here
2976	 * to prevent it.
2977	 */
2978
2979	if (wbc->range_cyclic) {
2980		start = mapping->writeback_index * PAGE_SIZE;
2981		ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
2982		if (ret == 0) {
2983			mapping->writeback_index = next / PAGE_SIZE;
2984			if (start > 0 && wbc->nr_to_write > 0) {
2985				ret = cifs_writepages_region(mapping, wbc, 0,
2986							     start, &next);
2987				if (ret == 0)
2988					mapping->writeback_index =
2989						next / PAGE_SIZE;
2990			}
2991		}
2992	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
2993		ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
2994		if (wbc->nr_to_write > 0 && ret == 0)
2995			mapping->writeback_index = next / PAGE_SIZE;
2996	} else {
2997		ret = cifs_writepages_region(mapping, wbc,
2998					     wbc->range_start, wbc->range_end, &next);
2999	}
3000
3001	return ret;
3002}
3003
3004static int
3005cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3006{
3007	int rc;
3008	unsigned int xid;
3009
3010	xid = get_xid();
3011/* BB add check for wbc flags */
3012	get_page(page);
3013	if (!PageUptodate(page))
3014		cifs_dbg(FYI, "ppw - page not up to date\n");
3015
3016	/*
3017	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3018	 *
3019	 * A writepage() implementation always needs to do either this,
3020	 * or re-dirty the page with "redirty_page_for_writepage()" in
3021	 * the case of a failure.
3022	 *
3023	 * Just unlocking the page will cause the radix tree tag-bits
3024	 * to fail to update with the state of the page correctly.
3025	 */
3026	set_page_writeback(page);
3027retry_write:
3028	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3029	if (is_retryable_error(rc)) {
3030		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3031			goto retry_write;
3032		redirty_page_for_writepage(wbc, page);
3033	} else if (rc != 0) {
3034		SetPageError(page);
3035		mapping_set_error(page->mapping, rc);
3036	} else {
3037		SetPageUptodate(page);
3038	}
3039	end_page_writeback(page);
3040	put_page(page);
3041	free_xid(xid);
3042	return rc;
3043}
3044
3045static int cifs_write_end(struct file *file, struct address_space *mapping,
3046			loff_t pos, unsigned len, unsigned copied,
3047			struct page *page, void *fsdata)
3048{
3049	int rc;
3050	struct inode *inode = mapping->host;
3051	struct cifsFileInfo *cfile = file->private_data;
3052	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3053	struct folio *folio = page_folio(page);
3054	__u32 pid;
3055
3056	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3057		pid = cfile->pid;
3058	else
3059		pid = current->tgid;
3060
3061	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3062		 page, pos, copied);
3063
3064	if (folio_test_checked(folio)) {
3065		if (copied == len)
3066			folio_mark_uptodate(folio);
3067		folio_clear_checked(folio);
3068	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3069		folio_mark_uptodate(folio);
3070
3071	if (!folio_test_uptodate(folio)) {
3072		char *page_data;
3073		unsigned offset = pos & (PAGE_SIZE - 1);
3074		unsigned int xid;
3075
3076		xid = get_xid();
3077		/* this is probably better than directly calling
3078		   partialpage_write since in this function the file handle is
3079		   known which we might as well	leverage */
3080		/* BB check if anything else missing out of ppw
3081		   such as updating last write time */
3082		page_data = kmap(page);
3083		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3084		/* if (rc < 0) should we set writebehind rc? */
3085		kunmap(page);
3086
3087		free_xid(xid);
3088	} else {
3089		rc = copied;
3090		pos += copied;
3091		set_page_dirty(page);
3092	}
3093
3094	if (rc > 0) {
3095		spin_lock(&inode->i_lock);
3096		if (pos > inode->i_size) {
3097			i_size_write(inode, pos);
3098			inode->i_blocks = (512 - 1 + pos) >> 9;
3099		}
3100		spin_unlock(&inode->i_lock);
3101	}
3102
3103	unlock_page(page);
3104	put_page(page);
3105	/* Indication to update ctime and mtime as close is deferred */
3106	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3107
3108	return rc;
3109}
3110
3111int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3112		      int datasync)
3113{
3114	unsigned int xid;
3115	int rc = 0;
3116	struct cifs_tcon *tcon;
3117	struct TCP_Server_Info *server;
3118	struct cifsFileInfo *smbfile = file->private_data;
3119	struct inode *inode = file_inode(file);
3120	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3121
3122	rc = file_write_and_wait_range(file, start, end);
3123	if (rc) {
3124		trace_cifs_fsync_err(inode->i_ino, rc);
3125		return rc;
3126	}
3127
3128	xid = get_xid();
3129
3130	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3131		 file, datasync);
3132
3133	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3134		rc = cifs_zap_mapping(inode);
3135		if (rc) {
3136			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3137			rc = 0; /* don't care about it in fsync */
3138		}
3139	}
3140
3141	tcon = tlink_tcon(smbfile->tlink);
3142	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3143		server = tcon->ses->server;
3144		if (server->ops->flush == NULL) {
3145			rc = -ENOSYS;
3146			goto strict_fsync_exit;
3147		}
3148
3149		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3150			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3151			if (smbfile) {
3152				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3153				cifsFileInfo_put(smbfile);
3154			} else
3155				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3156		} else
3157			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3158	}
3159
3160strict_fsync_exit:
3161	free_xid(xid);
3162	return rc;
3163}
3164
3165int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3166{
3167	unsigned int xid;
3168	int rc = 0;
3169	struct cifs_tcon *tcon;
3170	struct TCP_Server_Info *server;
3171	struct cifsFileInfo *smbfile = file->private_data;
3172	struct inode *inode = file_inode(file);
3173	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3174
3175	rc = file_write_and_wait_range(file, start, end);
3176	if (rc) {
3177		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3178		return rc;
3179	}
3180
3181	xid = get_xid();
3182
3183	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3184		 file, datasync);
3185
3186	tcon = tlink_tcon(smbfile->tlink);
3187	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3188		server = tcon->ses->server;
3189		if (server->ops->flush == NULL) {
3190			rc = -ENOSYS;
3191			goto fsync_exit;
3192		}
3193
3194		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3195			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3196			if (smbfile) {
3197				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3198				cifsFileInfo_put(smbfile);
3199			} else
3200				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3201		} else
3202			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3203	}
3204
3205fsync_exit:
3206	free_xid(xid);
3207	return rc;
3208}
3209
3210/*
3211 * As file closes, flush all cached write data for this inode checking
3212 * for write behind errors.
3213 */
3214int cifs_flush(struct file *file, fl_owner_t id)
3215{
3216	struct inode *inode = file_inode(file);
3217	int rc = 0;
3218
3219	if (file->f_mode & FMODE_WRITE)
3220		rc = filemap_write_and_wait(inode->i_mapping);
3221
3222	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3223	if (rc) {
3224		/* get more nuanced writeback errors */
3225		rc = filemap_check_wb_err(file->f_mapping, 0);
3226		trace_cifs_flush_err(inode->i_ino, rc);
3227	}
3228	return rc;
3229}
3230
3231static void
3232cifs_uncached_writedata_release(struct kref *refcount)
3233{
3234	struct cifs_writedata *wdata = container_of(refcount,
3235					struct cifs_writedata, refcount);
3236
3237	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3238	cifs_writedata_release(refcount);
3239}
3240
3241static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3242
3243static void
3244cifs_uncached_writev_complete(struct work_struct *work)
3245{
3246	struct cifs_writedata *wdata = container_of(work,
3247					struct cifs_writedata, work);
3248	struct inode *inode = d_inode(wdata->cfile->dentry);
3249	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3250
3251	spin_lock(&inode->i_lock);
3252	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3253	if (cifsi->netfs.remote_i_size > inode->i_size)
3254		i_size_write(inode, cifsi->netfs.remote_i_size);
3255	spin_unlock(&inode->i_lock);
3256
3257	complete(&wdata->done);
3258	collect_uncached_write_data(wdata->ctx);
3259	/* the below call can possibly free the last ref to aio ctx */
3260	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3261}
3262
3263static int
3264cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3265	struct cifs_aio_ctx *ctx)
3266{
3267	unsigned int wsize;
3268	struct cifs_credits credits;
3269	int rc;
3270	struct TCP_Server_Info *server = wdata->server;
3271
3272	do {
3273		if (wdata->cfile->invalidHandle) {
3274			rc = cifs_reopen_file(wdata->cfile, false);
3275			if (rc == -EAGAIN)
3276				continue;
3277			else if (rc)
3278				break;
3279		}
3280
3281
3282		/*
3283		 * Wait for credits to resend this wdata.
3284		 * Note: we are attempting to resend the whole wdata not in
3285		 * segments
3286		 */
3287		do {
3288			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3289						&wsize, &credits);
3290			if (rc)
3291				goto fail;
3292
3293			if (wsize < wdata->bytes) {
3294				add_credits_and_wake_if(server, &credits, 0);
3295				msleep(1000);
3296			}
3297		} while (wsize < wdata->bytes);
3298		wdata->credits = credits;
3299
3300		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3301
3302		if (!rc) {
3303			if (wdata->cfile->invalidHandle)
3304				rc = -EAGAIN;
3305			else {
3306				wdata->replay = true;
3307#ifdef CONFIG_CIFS_SMB_DIRECT
3308				if (wdata->mr) {
3309					wdata->mr->need_invalidate = true;
3310					smbd_deregister_mr(wdata->mr);
3311					wdata->mr = NULL;
3312				}
3313#endif
3314				rc = server->ops->async_writev(wdata,
3315					cifs_uncached_writedata_release);
3316			}
3317		}
3318
3319		/* If the write was successfully sent, we are done */
3320		if (!rc) {
3321			list_add_tail(&wdata->list, wdata_list);
3322			return 0;
3323		}
3324
3325		/* Roll back credits and retry if needed */
3326		add_credits_and_wake_if(server, &wdata->credits, 0);
3327	} while (rc == -EAGAIN);
3328
3329fail:
3330	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3331	return rc;
3332}
3333
3334/*
3335 * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3336 * size and maximum number of segments.
3337 */
3338static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3339				     size_t max_segs, unsigned int *_nsegs)
3340{
3341	const struct bio_vec *bvecs = iter->bvec;
3342	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3343	size_t len, span = 0, n = iter->count;
3344	size_t skip = iter->iov_offset;
3345
3346	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3347		return 0;
3348
3349	while (n && ix < nbv && skip) {
3350		len = bvecs[ix].bv_len;
3351		if (skip < len)
3352			break;
3353		skip -= len;
3354		n -= len;
3355		ix++;
3356	}
3357
3358	while (n && ix < nbv) {
3359		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3360		span += len;
3361		max_size -= len;
3362		nsegs++;
3363		ix++;
3364		if (max_size == 0 || nsegs >= max_segs)
3365			break;
3366		skip = 0;
3367		n -= len;
3368	}
3369
3370	*_nsegs = nsegs;
3371	return span;
3372}
3373
3374static int
3375cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3376		     struct cifsFileInfo *open_file,
3377		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3378		     struct cifs_aio_ctx *ctx)
3379{
3380	int rc = 0;
3381	size_t cur_len, max_len;
3382	struct cifs_writedata *wdata;
3383	pid_t pid;
3384	struct TCP_Server_Info *server;
3385	unsigned int xid, max_segs = INT_MAX;
3386
3387	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3388		pid = open_file->pid;
3389	else
3390		pid = current->tgid;
3391
3392	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3393	xid = get_xid();
3394
3395#ifdef CONFIG_CIFS_SMB_DIRECT
3396	if (server->smbd_conn)
3397		max_segs = server->smbd_conn->max_frmr_depth;
3398#endif
3399
3400	do {
3401		struct cifs_credits credits_on_stack;
3402		struct cifs_credits *credits = &credits_on_stack;
3403		unsigned int wsize, nsegs = 0;
3404
3405		if (signal_pending(current)) {
3406			rc = -EINTR;
3407			break;
3408		}
3409
3410		if (open_file->invalidHandle) {
3411			rc = cifs_reopen_file(open_file, false);
3412			if (rc == -EAGAIN)
3413				continue;
3414			else if (rc)
3415				break;
3416		}
3417
3418		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3419						   &wsize, credits);
3420		if (rc)
3421			break;
3422
3423		max_len = min_t(const size_t, len, wsize);
3424		if (!max_len) {
3425			rc = -EAGAIN;
3426			add_credits_and_wake_if(server, credits, 0);
3427			break;
3428		}
3429
3430		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3431		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3432			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3433		if (cur_len == 0) {
3434			rc = -EIO;
3435			add_credits_and_wake_if(server, credits, 0);
3436			break;
3437		}
3438
3439		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3440		if (!wdata) {
3441			rc = -ENOMEM;
3442			add_credits_and_wake_if(server, credits, 0);
3443			break;
3444		}
3445
3446		wdata->sync_mode = WB_SYNC_ALL;
3447		wdata->offset	= (__u64)fpos;
3448		wdata->cfile	= cifsFileInfo_get(open_file);
3449		wdata->server	= server;
3450		wdata->pid	= pid;
3451		wdata->bytes	= cur_len;
3452		wdata->credits	= credits_on_stack;
3453		wdata->iter	= *from;
3454		wdata->ctx	= ctx;
3455		kref_get(&ctx->refcount);
3456
3457		iov_iter_truncate(&wdata->iter, cur_len);
3458
3459		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3460
3461		if (!rc) {
3462			if (wdata->cfile->invalidHandle)
3463				rc = -EAGAIN;
3464			else
3465				rc = server->ops->async_writev(wdata,
3466					cifs_uncached_writedata_release);
3467		}
3468
3469		if (rc) {
3470			add_credits_and_wake_if(server, &wdata->credits, 0);
3471			kref_put(&wdata->refcount,
3472				 cifs_uncached_writedata_release);
3473			if (rc == -EAGAIN)
3474				continue;
3475			break;
3476		}
3477
3478		list_add_tail(&wdata->list, wdata_list);
3479		iov_iter_advance(from, cur_len);
3480		fpos += cur_len;
3481		len -= cur_len;
3482	} while (len > 0);
3483
3484	free_xid(xid);
3485	return rc;
3486}
3487
3488static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3489{
3490	struct cifs_writedata *wdata, *tmp;
3491	struct cifs_tcon *tcon;
3492	struct cifs_sb_info *cifs_sb;
3493	struct dentry *dentry = ctx->cfile->dentry;
3494	ssize_t rc;
3495
3496	tcon = tlink_tcon(ctx->cfile->tlink);
3497	cifs_sb = CIFS_SB(dentry->d_sb);
3498
3499	mutex_lock(&ctx->aio_mutex);
3500
3501	if (list_empty(&ctx->list)) {
3502		mutex_unlock(&ctx->aio_mutex);
3503		return;
3504	}
3505
3506	rc = ctx->rc;
3507	/*
3508	 * Wait for and collect replies for any successful sends in order of
3509	 * increasing offset. Once an error is hit, then return without waiting
3510	 * for any more replies.
3511	 */
3512restart_loop:
3513	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3514		if (!rc) {
3515			if (!try_wait_for_completion(&wdata->done)) {
3516				mutex_unlock(&ctx->aio_mutex);
3517				return;
3518			}
3519
3520			if (wdata->result)
3521				rc = wdata->result;
3522			else
3523				ctx->total_len += wdata->bytes;
3524
3525			/* resend call if it's a retryable error */
3526			if (rc == -EAGAIN) {
3527				struct list_head tmp_list;
3528				struct iov_iter tmp_from = ctx->iter;
3529
3530				INIT_LIST_HEAD(&tmp_list);
3531				list_del_init(&wdata->list);
3532
3533				if (ctx->direct_io)
3534					rc = cifs_resend_wdata(
3535						wdata, &tmp_list, ctx);
3536				else {
3537					iov_iter_advance(&tmp_from,
3538						 wdata->offset - ctx->pos);
3539
3540					rc = cifs_write_from_iter(wdata->offset,
3541						wdata->bytes, &tmp_from,
3542						ctx->cfile, cifs_sb, &tmp_list,
3543						ctx);
3544
3545					kref_put(&wdata->refcount,
3546						cifs_uncached_writedata_release);
3547				}
3548
3549				list_splice(&tmp_list, &ctx->list);
3550				goto restart_loop;
3551			}
3552		}
3553		list_del_init(&wdata->list);
3554		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3555	}
3556
3557	cifs_stats_bytes_written(tcon, ctx->total_len);
3558	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3559
3560	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3561
3562	mutex_unlock(&ctx->aio_mutex);
3563
3564	if (ctx->iocb && ctx->iocb->ki_complete)
3565		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3566	else
3567		complete(&ctx->done);
3568}
3569
3570static ssize_t __cifs_writev(
3571	struct kiocb *iocb, struct iov_iter *from, bool direct)
3572{
3573	struct file *file = iocb->ki_filp;
3574	ssize_t total_written = 0;
3575	struct cifsFileInfo *cfile;
3576	struct cifs_tcon *tcon;
3577	struct cifs_sb_info *cifs_sb;
3578	struct cifs_aio_ctx *ctx;
3579	int rc;
3580
3581	rc = generic_write_checks(iocb, from);
3582	if (rc <= 0)
3583		return rc;
3584
3585	cifs_sb = CIFS_FILE_SB(file);
3586	cfile = file->private_data;
3587	tcon = tlink_tcon(cfile->tlink);
3588
3589	if (!tcon->ses->server->ops->async_writev)
3590		return -ENOSYS;
3591
3592	ctx = cifs_aio_ctx_alloc();
3593	if (!ctx)
3594		return -ENOMEM;
3595
3596	ctx->cfile = cifsFileInfo_get(cfile);
3597
3598	if (!is_sync_kiocb(iocb))
3599		ctx->iocb = iocb;
3600
3601	ctx->pos = iocb->ki_pos;
3602	ctx->direct_io = direct;
3603	ctx->nr_pinned_pages = 0;
3604
3605	if (user_backed_iter(from)) {
3606		/*
3607		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3608		 * they contain references to the calling process's virtual
3609		 * memory layout which won't be available in an async worker
3610		 * thread.  This also takes a pin on every folio involved.
3611		 */
3612		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3613					     &ctx->iter, 0);
3614		if (rc < 0) {
3615			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3616			return rc;
3617		}
3618
3619		ctx->nr_pinned_pages = rc;
3620		ctx->bv = (void *)ctx->iter.bvec;
3621		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3622	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3623		   !is_sync_kiocb(iocb)) {
3624		/*
3625		 * If the op is asynchronous, we need to copy the list attached
3626		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3627		 * will be pinned by the caller; in any case, we may or may not
3628		 * be able to pin the pages, so we don't try.
3629		 */
3630		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3631		if (!ctx->bv) {
3632			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3633			return -ENOMEM;
3634		}
3635	} else {
3636		/*
3637		 * Otherwise, we just pass the iterator down as-is and rely on
3638		 * the caller to make sure the pages referred to by the
3639		 * iterator don't evaporate.
3640		 */
3641		ctx->iter = *from;
3642	}
3643
3644	ctx->len = iov_iter_count(&ctx->iter);
3645
3646	/* grab a lock here due to read response handlers can access ctx */
3647	mutex_lock(&ctx->aio_mutex);
3648
3649	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3650				  cfile, cifs_sb, &ctx->list, ctx);
3651
3652	/*
3653	 * If at least one write was successfully sent, then discard any rc
3654	 * value from the later writes. If the other write succeeds, then
3655	 * we'll end up returning whatever was written. If it fails, then
3656	 * we'll get a new rc value from that.
3657	 */
3658	if (!list_empty(&ctx->list))
3659		rc = 0;
3660
3661	mutex_unlock(&ctx->aio_mutex);
3662
3663	if (rc) {
3664		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3665		return rc;
3666	}
3667
3668	if (!is_sync_kiocb(iocb)) {
3669		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3670		return -EIOCBQUEUED;
3671	}
3672
3673	rc = wait_for_completion_killable(&ctx->done);
3674	if (rc) {
3675		mutex_lock(&ctx->aio_mutex);
3676		ctx->rc = rc = -EINTR;
3677		total_written = ctx->total_len;
3678		mutex_unlock(&ctx->aio_mutex);
3679	} else {
3680		rc = ctx->rc;
3681		total_written = ctx->total_len;
3682	}
3683
3684	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3685
3686	if (unlikely(!total_written))
3687		return rc;
3688
3689	iocb->ki_pos += total_written;
3690	return total_written;
3691}
3692
3693ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3694{
3695	struct file *file = iocb->ki_filp;
3696
3697	cifs_revalidate_mapping(file->f_inode);
3698	return __cifs_writev(iocb, from, true);
3699}
3700
3701ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3702{
3703	return __cifs_writev(iocb, from, false);
3704}
3705
3706static ssize_t
3707cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3708{
3709	struct file *file = iocb->ki_filp;
3710	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3711	struct inode *inode = file->f_mapping->host;
3712	struct cifsInodeInfo *cinode = CIFS_I(inode);
3713	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3714	ssize_t rc;
3715
3716	inode_lock(inode);
3717	/*
3718	 * We need to hold the sem to be sure nobody modifies lock list
3719	 * with a brlock that prevents writing.
3720	 */
3721	down_read(&cinode->lock_sem);
3722
3723	rc = generic_write_checks(iocb, from);
3724	if (rc <= 0)
3725		goto out;
3726
3727	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3728				     server->vals->exclusive_lock_type, 0,
3729				     NULL, CIFS_WRITE_OP))
3730		rc = __generic_file_write_iter(iocb, from);
3731	else
3732		rc = -EACCES;
3733out:
3734	up_read(&cinode->lock_sem);
3735	inode_unlock(inode);
3736
3737	if (rc > 0)
3738		rc = generic_write_sync(iocb, rc);
3739	return rc;
3740}
3741
3742ssize_t
3743cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3744{
3745	struct inode *inode = file_inode(iocb->ki_filp);
3746	struct cifsInodeInfo *cinode = CIFS_I(inode);
3747	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3748	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3749						iocb->ki_filp->private_data;
3750	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3751	ssize_t written;
3752
3753	written = cifs_get_writer(cinode);
3754	if (written)
3755		return written;
3756
3757	if (CIFS_CACHE_WRITE(cinode)) {
3758		if (cap_unix(tcon->ses) &&
3759		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3760		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3761			written = generic_file_write_iter(iocb, from);
3762			goto out;
3763		}
3764		written = cifs_writev(iocb, from);
3765		goto out;
3766	}
3767	/*
3768	 * For non-oplocked files in strict cache mode we need to write the data
3769	 * to the server exactly from the pos to pos+len-1 rather than flush all
3770	 * affected pages because it may cause a error with mandatory locks on
3771	 * these pages but not on the region from pos to ppos+len-1.
3772	 */
3773	written = cifs_user_writev(iocb, from);
3774	if (CIFS_CACHE_READ(cinode)) {
3775		/*
3776		 * We have read level caching and we have just sent a write
3777		 * request to the server thus making data in the cache stale.
3778		 * Zap the cache and set oplock/lease level to NONE to avoid
3779		 * reading stale data from the cache. All subsequent read
3780		 * operations will read new data from the server.
3781		 */
3782		cifs_zap_mapping(inode);
3783		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3784			 inode);
3785		cinode->oplock = 0;
3786	}
3787out:
3788	cifs_put_writer(cinode);
3789	return written;
3790}
3791
3792static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3793{
3794	struct cifs_readdata *rdata;
3795
3796	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3797	if (rdata) {
3798		kref_init(&rdata->refcount);
3799		INIT_LIST_HEAD(&rdata->list);
3800		init_completion(&rdata->done);
3801		INIT_WORK(&rdata->work, complete);
3802	}
3803
3804	return rdata;
3805}
3806
3807void
3808cifs_readdata_release(struct kref *refcount)
3809{
3810	struct cifs_readdata *rdata = container_of(refcount,
3811					struct cifs_readdata, refcount);
3812
3813	if (rdata->ctx)
3814		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3815#ifdef CONFIG_CIFS_SMB_DIRECT
3816	if (rdata->mr) {
3817		smbd_deregister_mr(rdata->mr);
3818		rdata->mr = NULL;
3819	}
3820#endif
3821	if (rdata->cfile)
3822		cifsFileInfo_put(rdata->cfile);
3823
3824	kfree(rdata);
3825}
3826
3827static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3828
3829static void
3830cifs_uncached_readv_complete(struct work_struct *work)
3831{
3832	struct cifs_readdata *rdata = container_of(work,
3833						struct cifs_readdata, work);
3834
3835	complete(&rdata->done);
3836	collect_uncached_read_data(rdata->ctx);
3837	/* the below call can possibly free the last ref to aio ctx */
3838	kref_put(&rdata->refcount, cifs_readdata_release);
3839}
3840
3841static int cifs_resend_rdata(struct cifs_readdata *rdata,
3842			struct list_head *rdata_list,
3843			struct cifs_aio_ctx *ctx)
3844{
3845	unsigned int rsize;
3846	struct cifs_credits credits;
3847	int rc;
3848	struct TCP_Server_Info *server;
3849
3850	/* XXX: should we pick a new channel here? */
3851	server = rdata->server;
3852
3853	do {
3854		if (rdata->cfile->invalidHandle) {
3855			rc = cifs_reopen_file(rdata->cfile, true);
3856			if (rc == -EAGAIN)
3857				continue;
3858			else if (rc)
3859				break;
3860		}
3861
3862		/*
3863		 * Wait for credits to resend this rdata.
3864		 * Note: we are attempting to resend the whole rdata not in
3865		 * segments
3866		 */
3867		do {
3868			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3869						&rsize, &credits);
3870
3871			if (rc)
3872				goto fail;
3873
3874			if (rsize < rdata->bytes) {
3875				add_credits_and_wake_if(server, &credits, 0);
3876				msleep(1000);
3877			}
3878		} while (rsize < rdata->bytes);
3879		rdata->credits = credits;
3880
3881		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3882		if (!rc) {
3883			if (rdata->cfile->invalidHandle)
3884				rc = -EAGAIN;
3885			else {
3886#ifdef CONFIG_CIFS_SMB_DIRECT
3887				if (rdata->mr) {
3888					rdata->mr->need_invalidate = true;
3889					smbd_deregister_mr(rdata->mr);
3890					rdata->mr = NULL;
3891				}
3892#endif
3893				rc = server->ops->async_readv(rdata);
3894			}
3895		}
3896
3897		/* If the read was successfully sent, we are done */
3898		if (!rc) {
3899			/* Add to aio pending list */
3900			list_add_tail(&rdata->list, rdata_list);
3901			return 0;
3902		}
3903
3904		/* Roll back credits and retry if needed */
3905		add_credits_and_wake_if(server, &rdata->credits, 0);
3906	} while (rc == -EAGAIN);
3907
3908fail:
3909	kref_put(&rdata->refcount, cifs_readdata_release);
3910	return rc;
3911}
3912
3913static int
3914cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
3915		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3916		     struct cifs_aio_ctx *ctx)
3917{
3918	struct cifs_readdata *rdata;
3919	unsigned int rsize, nsegs, max_segs = INT_MAX;
3920	struct cifs_credits credits_on_stack;
3921	struct cifs_credits *credits = &credits_on_stack;
3922	size_t cur_len, max_len;
3923	int rc;
3924	pid_t pid;
3925	struct TCP_Server_Info *server;
3926
3927	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3928
3929#ifdef CONFIG_CIFS_SMB_DIRECT
3930	if (server->smbd_conn)
3931		max_segs = server->smbd_conn->max_frmr_depth;
3932#endif
3933
3934	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3935		pid = open_file->pid;
3936	else
3937		pid = current->tgid;
3938
3939	do {
3940		if (open_file->invalidHandle) {
3941			rc = cifs_reopen_file(open_file, true);
3942			if (rc == -EAGAIN)
3943				continue;
3944			else if (rc)
3945				break;
3946		}
3947
3948		if (cifs_sb->ctx->rsize == 0)
3949			cifs_sb->ctx->rsize =
3950				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3951							     cifs_sb->ctx);
3952
3953		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3954						   &rsize, credits);
3955		if (rc)
3956			break;
3957
3958		max_len = min_t(size_t, len, rsize);
3959
3960		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
3961						 max_segs, &nsegs);
3962		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3963			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
3964		if (cur_len == 0) {
3965			rc = -EIO;
3966			add_credits_and_wake_if(server, credits, 0);
3967			break;
3968		}
3969
3970		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
3971		if (!rdata) {
3972			add_credits_and_wake_if(server, credits, 0);
3973			rc = -ENOMEM;
3974			break;
3975		}
3976
3977		rdata->server	= server;
3978		rdata->cfile	= cifsFileInfo_get(open_file);
3979		rdata->offset	= fpos;
3980		rdata->bytes	= cur_len;
3981		rdata->pid	= pid;
3982		rdata->credits	= credits_on_stack;
3983		rdata->ctx	= ctx;
3984		kref_get(&ctx->refcount);
3985
3986		rdata->iter	= ctx->iter;
3987		iov_iter_truncate(&rdata->iter, cur_len);
3988
3989		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3990
3991		if (!rc) {
3992			if (rdata->cfile->invalidHandle)
3993				rc = -EAGAIN;
3994			else
3995				rc = server->ops->async_readv(rdata);
3996		}
3997
3998		if (rc) {
3999			add_credits_and_wake_if(server, &rdata->credits, 0);
4000			kref_put(&rdata->refcount, cifs_readdata_release);
4001			if (rc == -EAGAIN)
4002				continue;
4003			break;
4004		}
4005
4006		list_add_tail(&rdata->list, rdata_list);
4007		iov_iter_advance(&ctx->iter, cur_len);
4008		fpos += cur_len;
4009		len -= cur_len;
4010	} while (len > 0);
4011
4012	return rc;
4013}
4014
4015static void
4016collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4017{
4018	struct cifs_readdata *rdata, *tmp;
4019	struct cifs_sb_info *cifs_sb;
4020	int rc;
4021
4022	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4023
4024	mutex_lock(&ctx->aio_mutex);
4025
4026	if (list_empty(&ctx->list)) {
4027		mutex_unlock(&ctx->aio_mutex);
4028		return;
4029	}
4030
4031	rc = ctx->rc;
4032	/* the loop below should proceed in the order of increasing offsets */
4033again:
4034	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4035		if (!rc) {
4036			if (!try_wait_for_completion(&rdata->done)) {
4037				mutex_unlock(&ctx->aio_mutex);
4038				return;
4039			}
4040
4041			if (rdata->result == -EAGAIN) {
4042				/* resend call if it's a retryable error */
4043				struct list_head tmp_list;
4044				unsigned int got_bytes = rdata->got_bytes;
4045
4046				list_del_init(&rdata->list);
4047				INIT_LIST_HEAD(&tmp_list);
4048
4049				if (ctx->direct_io) {
4050					/*
4051					 * Re-use rdata as this is a
4052					 * direct I/O
4053					 */
4054					rc = cifs_resend_rdata(
4055						rdata,
4056						&tmp_list, ctx);
4057				} else {
4058					rc = cifs_send_async_read(
4059						rdata->offset + got_bytes,
4060						rdata->bytes - got_bytes,
4061						rdata->cfile, cifs_sb,
4062						&tmp_list, ctx);
4063
4064					kref_put(&rdata->refcount,
4065						cifs_readdata_release);
4066				}
4067
4068				list_splice(&tmp_list, &ctx->list);
4069
4070				goto again;
4071			} else if (rdata->result)
4072				rc = rdata->result;
4073
4074			/* if there was a short read -- discard anything left */
4075			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4076				rc = -ENODATA;
4077
4078			ctx->total_len += rdata->got_bytes;
4079		}
4080		list_del_init(&rdata->list);
4081		kref_put(&rdata->refcount, cifs_readdata_release);
4082	}
4083
4084	/* mask nodata case */
4085	if (rc == -ENODATA)
4086		rc = 0;
4087
4088	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4089
4090	mutex_unlock(&ctx->aio_mutex);
4091
4092	if (ctx->iocb && ctx->iocb->ki_complete)
4093		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4094	else
4095		complete(&ctx->done);
4096}
4097
4098static ssize_t __cifs_readv(
4099	struct kiocb *iocb, struct iov_iter *to, bool direct)
4100{
4101	size_t len;
4102	struct file *file = iocb->ki_filp;
4103	struct cifs_sb_info *cifs_sb;
4104	struct cifsFileInfo *cfile;
4105	struct cifs_tcon *tcon;
4106	ssize_t rc, total_read = 0;
4107	loff_t offset = iocb->ki_pos;
4108	struct cifs_aio_ctx *ctx;
4109
4110	len = iov_iter_count(to);
4111	if (!len)
4112		return 0;
4113
4114	cifs_sb = CIFS_FILE_SB(file);
4115	cfile = file->private_data;
4116	tcon = tlink_tcon(cfile->tlink);
4117
4118	if (!tcon->ses->server->ops->async_readv)
4119		return -ENOSYS;
4120
4121	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4122		cifs_dbg(FYI, "attempting read on write only file instance\n");
4123
4124	ctx = cifs_aio_ctx_alloc();
4125	if (!ctx)
4126		return -ENOMEM;
4127
4128	ctx->pos	= offset;
4129	ctx->direct_io	= direct;
4130	ctx->len	= len;
4131	ctx->cfile	= cifsFileInfo_get(cfile);
4132	ctx->nr_pinned_pages = 0;
4133
4134	if (!is_sync_kiocb(iocb))
4135		ctx->iocb = iocb;
4136
4137	if (user_backed_iter(to)) {
4138		/*
4139		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4140		 * they contain references to the calling process's virtual
4141		 * memory layout which won't be available in an async worker
4142		 * thread.  This also takes a pin on every folio involved.
4143		 */
4144		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4145					     &ctx->iter, 0);
4146		if (rc < 0) {
4147			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4148			return rc;
4149		}
4150
4151		ctx->nr_pinned_pages = rc;
4152		ctx->bv = (void *)ctx->iter.bvec;
4153		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4154		ctx->should_dirty = true;
4155	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4156		   !is_sync_kiocb(iocb)) {
4157		/*
4158		 * If the op is asynchronous, we need to copy the list attached
4159		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4160		 * will be retained by the caller; in any case, we may or may
4161		 * not be able to pin the pages, so we don't try.
4162		 */
4163		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4164		if (!ctx->bv) {
4165			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4166			return -ENOMEM;
4167		}
4168	} else {
4169		/*
4170		 * Otherwise, we just pass the iterator down as-is and rely on
4171		 * the caller to make sure the pages referred to by the
4172		 * iterator don't evaporate.
4173		 */
4174		ctx->iter = *to;
4175	}
4176
4177	if (direct) {
4178		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4179						  offset, offset + len - 1);
4180		if (rc) {
4181			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4182			return -EAGAIN;
4183		}
4184	}
4185
4186	/* grab a lock here due to read response handlers can access ctx */
4187	mutex_lock(&ctx->aio_mutex);
4188
4189	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4190
4191	/* if at least one read request send succeeded, then reset rc */
4192	if (!list_empty(&ctx->list))
4193		rc = 0;
4194
4195	mutex_unlock(&ctx->aio_mutex);
4196
4197	if (rc) {
4198		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4199		return rc;
4200	}
4201
4202	if (!is_sync_kiocb(iocb)) {
4203		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4204		return -EIOCBQUEUED;
4205	}
4206
4207	rc = wait_for_completion_killable(&ctx->done);
4208	if (rc) {
4209		mutex_lock(&ctx->aio_mutex);
4210		ctx->rc = rc = -EINTR;
4211		total_read = ctx->total_len;
4212		mutex_unlock(&ctx->aio_mutex);
4213	} else {
4214		rc = ctx->rc;
4215		total_read = ctx->total_len;
4216	}
4217
4218	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4219
4220	if (total_read) {
4221		iocb->ki_pos += total_read;
4222		return total_read;
4223	}
4224	return rc;
4225}
4226
4227ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4228{
4229	return __cifs_readv(iocb, to, true);
4230}
4231
4232ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4233{
4234	return __cifs_readv(iocb, to, false);
4235}
4236
4237ssize_t
4238cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4239{
4240	struct inode *inode = file_inode(iocb->ki_filp);
4241	struct cifsInodeInfo *cinode = CIFS_I(inode);
4242	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4243	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4244						iocb->ki_filp->private_data;
4245	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4246	int rc = -EACCES;
4247
4248	/*
4249	 * In strict cache mode we need to read from the server all the time
4250	 * if we don't have level II oplock because the server can delay mtime
4251	 * change - so we can't make a decision about inode invalidating.
4252	 * And we can also fail with pagereading if there are mandatory locks
4253	 * on pages affected by this read but not on the region from pos to
4254	 * pos+len-1.
4255	 */
4256	if (!CIFS_CACHE_READ(cinode))
4257		return cifs_user_readv(iocb, to);
4258
4259	if (cap_unix(tcon->ses) &&
4260	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4261	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4262		return generic_file_read_iter(iocb, to);
4263
4264	/*
4265	 * We need to hold the sem to be sure nobody modifies lock list
4266	 * with a brlock that prevents reading.
4267	 */
4268	down_read(&cinode->lock_sem);
4269	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4270				     tcon->ses->server->vals->shared_lock_type,
4271				     0, NULL, CIFS_READ_OP))
4272		rc = generic_file_read_iter(iocb, to);
4273	up_read(&cinode->lock_sem);
4274	return rc;
4275}
4276
4277static ssize_t
4278cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4279{
4280	int rc = -EACCES;
4281	unsigned int bytes_read = 0;
4282	unsigned int total_read;
4283	unsigned int current_read_size;
4284	unsigned int rsize;
4285	struct cifs_sb_info *cifs_sb;
4286	struct cifs_tcon *tcon;
4287	struct TCP_Server_Info *server;
4288	unsigned int xid;
4289	char *cur_offset;
4290	struct cifsFileInfo *open_file;
4291	struct cifs_io_parms io_parms = {0};
4292	int buf_type = CIFS_NO_BUFFER;
4293	__u32 pid;
4294
4295	xid = get_xid();
4296	cifs_sb = CIFS_FILE_SB(file);
4297
4298	/* FIXME: set up handlers for larger reads and/or convert to async */
4299	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4300
4301	if (file->private_data == NULL) {
4302		rc = -EBADF;
4303		free_xid(xid);
4304		return rc;
4305	}
4306	open_file = file->private_data;
4307	tcon = tlink_tcon(open_file->tlink);
4308	server = cifs_pick_channel(tcon->ses);
4309
4310	if (!server->ops->sync_read) {
4311		free_xid(xid);
4312		return -ENOSYS;
4313	}
4314
4315	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4316		pid = open_file->pid;
4317	else
4318		pid = current->tgid;
4319
4320	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4321		cifs_dbg(FYI, "attempting read on write only file instance\n");
4322
4323	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4324	     total_read += bytes_read, cur_offset += bytes_read) {
4325		do {
4326			current_read_size = min_t(uint, read_size - total_read,
4327						  rsize);
4328			/*
4329			 * For windows me and 9x we do not want to request more
4330			 * than it negotiated since it will refuse the read
4331			 * then.
4332			 */
4333			if (!(tcon->ses->capabilities &
4334				tcon->ses->server->vals->cap_large_files)) {
4335				current_read_size = min_t(uint,
4336					current_read_size, CIFSMaxBufSize);
4337			}
4338			if (open_file->invalidHandle) {
4339				rc = cifs_reopen_file(open_file, true);
4340				if (rc != 0)
4341					break;
4342			}
4343			io_parms.pid = pid;
4344			io_parms.tcon = tcon;
4345			io_parms.offset = *offset;
4346			io_parms.length = current_read_size;
4347			io_parms.server = server;
4348			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4349						    &bytes_read, &cur_offset,
4350						    &buf_type);
4351		} while (rc == -EAGAIN);
4352
4353		if (rc || (bytes_read == 0)) {
4354			if (total_read) {
4355				break;
4356			} else {
4357				free_xid(xid);
4358				return rc;
4359			}
4360		} else {
4361			cifs_stats_bytes_read(tcon, total_read);
4362			*offset += bytes_read;
4363		}
4364	}
4365	free_xid(xid);
4366	return total_read;
4367}
4368
4369/*
4370 * If the page is mmap'ed into a process' page tables, then we need to make
4371 * sure that it doesn't change while being written back.
4372 */
4373static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4374{
4375	struct folio *folio = page_folio(vmf->page);
4376
4377	/* Wait for the folio to be written to the cache before we allow it to
4378	 * be modified.  We then assume the entire folio will need writing back.
4379	 */
4380#ifdef CONFIG_CIFS_FSCACHE
4381	if (folio_test_fscache(folio) &&
4382	    folio_wait_fscache_killable(folio) < 0)
4383		return VM_FAULT_RETRY;
4384#endif
4385
4386	folio_wait_writeback(folio);
4387
4388	if (folio_lock_killable(folio) < 0)
4389		return VM_FAULT_RETRY;
4390	return VM_FAULT_LOCKED;
4391}
4392
4393static const struct vm_operations_struct cifs_file_vm_ops = {
4394	.fault = filemap_fault,
4395	.map_pages = filemap_map_pages,
4396	.page_mkwrite = cifs_page_mkwrite,
4397};
4398
4399int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4400{
4401	int xid, rc = 0;
4402	struct inode *inode = file_inode(file);
4403
4404	xid = get_xid();
4405
4406	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4407		rc = cifs_zap_mapping(inode);
4408	if (!rc)
4409		rc = generic_file_mmap(file, vma);
4410	if (!rc)
4411		vma->vm_ops = &cifs_file_vm_ops;
4412
4413	free_xid(xid);
4414	return rc;
4415}
4416
4417int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4418{
4419	int rc, xid;
4420
4421	xid = get_xid();
4422
4423	rc = cifs_revalidate_file(file);
4424	if (rc)
4425		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4426			 rc);
4427	if (!rc)
4428		rc = generic_file_mmap(file, vma);
4429	if (!rc)
4430		vma->vm_ops = &cifs_file_vm_ops;
4431
4432	free_xid(xid);
4433	return rc;
4434}
4435
4436/*
4437 * Unlock a bunch of folios in the pagecache.
4438 */
4439static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4440{
4441	struct folio *folio;
4442	XA_STATE(xas, &mapping->i_pages, first);
4443
4444	rcu_read_lock();
4445	xas_for_each(&xas, folio, last) {
4446		folio_unlock(folio);
4447	}
4448	rcu_read_unlock();
4449}
4450
4451static void cifs_readahead_complete(struct work_struct *work)
4452{
4453	struct cifs_readdata *rdata = container_of(work,
4454						   struct cifs_readdata, work);
4455	struct folio *folio;
4456	pgoff_t last;
4457	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4458
4459	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4460
4461	if (good)
4462		cifs_readahead_to_fscache(rdata->mapping->host,
4463					  rdata->offset, rdata->bytes);
4464
4465	if (iov_iter_count(&rdata->iter) > 0)
4466		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4467
4468	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4469
4470	rcu_read_lock();
4471	xas_for_each(&xas, folio, last) {
4472		if (good) {
4473			flush_dcache_folio(folio);
4474			folio_mark_uptodate(folio);
4475		}
4476		folio_unlock(folio);
4477	}
4478	rcu_read_unlock();
4479
4480	kref_put(&rdata->refcount, cifs_readdata_release);
4481}
4482
4483static void cifs_readahead(struct readahead_control *ractl)
4484{
4485	struct cifsFileInfo *open_file = ractl->file->private_data;
4486	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4487	struct TCP_Server_Info *server;
4488	unsigned int xid, nr_pages, cache_nr_pages = 0;
4489	unsigned int ra_pages;
4490	pgoff_t next_cached = ULONG_MAX, ra_index;
4491	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4492		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4493	bool check_cache = caching;
4494	pid_t pid;
4495	int rc = 0;
4496
4497	/* Note that readahead_count() lags behind our dequeuing of pages from
4498	 * the ractl, wo we have to keep track for ourselves.
4499	 */
4500	ra_pages = readahead_count(ractl);
4501	ra_index = readahead_index(ractl);
4502
4503	xid = get_xid();
4504
4505	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4506		pid = open_file->pid;
4507	else
4508		pid = current->tgid;
4509
4510	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4511
4512	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4513		 __func__, ractl->file, ractl->mapping, ra_pages);
4514
4515	/*
4516	 * Chop the readahead request up into rsize-sized read requests.
4517	 */
4518	while ((nr_pages = ra_pages)) {
4519		unsigned int i, rsize;
4520		struct cifs_readdata *rdata;
4521		struct cifs_credits credits_on_stack;
4522		struct cifs_credits *credits = &credits_on_stack;
4523		struct folio *folio;
4524		pgoff_t fsize;
4525
4526		/*
4527		 * Find out if we have anything cached in the range of
4528		 * interest, and if so, where the next chunk of cached data is.
4529		 */
4530		if (caching) {
4531			if (check_cache) {
4532				rc = cifs_fscache_query_occupancy(
4533					ractl->mapping->host, ra_index, nr_pages,
4534					&next_cached, &cache_nr_pages);
4535				if (rc < 0)
4536					caching = false;
4537				check_cache = false;
4538			}
4539
4540			if (ra_index == next_cached) {
4541				/*
4542				 * TODO: Send a whole batch of pages to be read
4543				 * by the cache.
4544				 */
4545				folio = readahead_folio(ractl);
4546				fsize = folio_nr_pages(folio);
4547				ra_pages -= fsize;
4548				ra_index += fsize;
4549				if (cifs_readpage_from_fscache(ractl->mapping->host,
4550							       &folio->page) < 0) {
4551					/*
4552					 * TODO: Deal with cache read failure
4553					 * here, but for the moment, delegate
4554					 * that to readpage.
4555					 */
4556					caching = false;
4557				}
4558				folio_unlock(folio);
4559				next_cached += fsize;
4560				cache_nr_pages -= fsize;
4561				if (cache_nr_pages == 0)
4562					check_cache = true;
4563				continue;
4564			}
4565		}
4566
4567		if (open_file->invalidHandle) {
4568			rc = cifs_reopen_file(open_file, true);
4569			if (rc) {
4570				if (rc == -EAGAIN)
4571					continue;
4572				break;
4573			}
4574		}
4575
4576		if (cifs_sb->ctx->rsize == 0)
4577			cifs_sb->ctx->rsize =
4578				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4579							     cifs_sb->ctx);
4580
4581		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4582						   &rsize, credits);
4583		if (rc)
4584			break;
4585		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4586		if (next_cached != ULONG_MAX)
4587			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4588
4589		/*
4590		 * Give up immediately if rsize is too small to read an entire
4591		 * page. The VFS will fall back to readpage. We should never
4592		 * reach this point however since we set ra_pages to 0 when the
4593		 * rsize is smaller than a cache page.
4594		 */
4595		if (unlikely(!nr_pages)) {
4596			add_credits_and_wake_if(server, credits, 0);
4597			break;
4598		}
4599
4600		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4601		if (!rdata) {
4602			/* best to give up if we're out of mem */
4603			add_credits_and_wake_if(server, credits, 0);
4604			break;
4605		}
4606
4607		rdata->offset	= ra_index * PAGE_SIZE;
4608		rdata->bytes	= nr_pages * PAGE_SIZE;
4609		rdata->cfile	= cifsFileInfo_get(open_file);
4610		rdata->server	= server;
4611		rdata->mapping	= ractl->mapping;
4612		rdata->pid	= pid;
4613		rdata->credits	= credits_on_stack;
4614
4615		for (i = 0; i < nr_pages; i++) {
4616			if (!readahead_folio(ractl))
4617				WARN_ON(1);
4618		}
4619		ra_pages -= nr_pages;
4620		ra_index += nr_pages;
4621
4622		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4623				rdata->offset, rdata->bytes);
4624
4625		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4626		if (!rc) {
4627			if (rdata->cfile->invalidHandle)
4628				rc = -EAGAIN;
4629			else
4630				rc = server->ops->async_readv(rdata);
4631		}
4632
4633		if (rc) {
4634			add_credits_and_wake_if(server, &rdata->credits, 0);
4635			cifs_unlock_folios(rdata->mapping,
4636					   rdata->offset / PAGE_SIZE,
4637					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4638			/* Fallback to the readpage in error/reconnect cases */
4639			kref_put(&rdata->refcount, cifs_readdata_release);
4640			break;
4641		}
4642
4643		kref_put(&rdata->refcount, cifs_readdata_release);
4644	}
4645
4646	free_xid(xid);
4647}
4648
4649/*
4650 * cifs_readpage_worker must be called with the page pinned
4651 */
4652static int cifs_readpage_worker(struct file *file, struct page *page,
4653	loff_t *poffset)
4654{
4655	struct inode *inode = file_inode(file);
4656	struct timespec64 atime, mtime;
4657	char *read_data;
4658	int rc;
4659
4660	/* Is the page cached? */
4661	rc = cifs_readpage_from_fscache(inode, page);
4662	if (rc == 0)
4663		goto read_complete;
4664
4665	read_data = kmap(page);
4666	/* for reads over a certain size could initiate async read ahead */
4667
4668	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4669
4670	if (rc < 0)
4671		goto io_error;
4672	else
4673		cifs_dbg(FYI, "Bytes read %d\n", rc);
4674
4675	/* we do not want atime to be less than mtime, it broke some apps */
4676	atime = inode_set_atime_to_ts(inode, current_time(inode));
4677	mtime = inode_get_mtime(inode);
4678	if (timespec64_compare(&atime, &mtime) < 0)
4679		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4680
4681	if (PAGE_SIZE > rc)
4682		memset(read_data + rc, 0, PAGE_SIZE - rc);
4683
4684	flush_dcache_page(page);
4685	SetPageUptodate(page);
4686	rc = 0;
4687
4688io_error:
4689	kunmap(page);
4690
4691read_complete:
4692	unlock_page(page);
4693	return rc;
4694}
4695
4696static int cifs_read_folio(struct file *file, struct folio *folio)
4697{
4698	struct page *page = &folio->page;
4699	loff_t offset = page_file_offset(page);
4700	int rc = -EACCES;
4701	unsigned int xid;
4702
4703	xid = get_xid();
4704
4705	if (file->private_data == NULL) {
4706		rc = -EBADF;
4707		free_xid(xid);
4708		return rc;
4709	}
4710
4711	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4712		 page, (int)offset, (int)offset);
4713
4714	rc = cifs_readpage_worker(file, page, &offset);
4715
4716	free_xid(xid);
4717	return rc;
4718}
4719
4720static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4721{
4722	struct cifsFileInfo *open_file;
4723
4724	spin_lock(&cifs_inode->open_file_lock);
4725	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4726		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4727			spin_unlock(&cifs_inode->open_file_lock);
4728			return 1;
4729		}
4730	}
4731	spin_unlock(&cifs_inode->open_file_lock);
4732	return 0;
4733}
4734
4735/* We do not want to update the file size from server for inodes
4736   open for write - to avoid races with writepage extending
4737   the file - in the future we could consider allowing
4738   refreshing the inode only on increases in the file size
4739   but this is tricky to do without racing with writebehind
4740   page caching in the current Linux kernel design */
4741bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4742{
4743	if (!cifsInode)
4744		return true;
4745
4746	if (is_inode_writable(cifsInode)) {
4747		/* This inode is open for write at least once */
4748		struct cifs_sb_info *cifs_sb;
4749
4750		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4751		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4752			/* since no page cache to corrupt on directio
4753			we can change size safely */
4754			return true;
4755		}
4756
4757		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4758			return true;
4759
4760		return false;
4761	} else
4762		return true;
4763}
4764
4765static int cifs_write_begin(struct file *file, struct address_space *mapping,
4766			loff_t pos, unsigned len,
4767			struct page **pagep, void **fsdata)
4768{
4769	int oncethru = 0;
4770	pgoff_t index = pos >> PAGE_SHIFT;
4771	loff_t offset = pos & (PAGE_SIZE - 1);
4772	loff_t page_start = pos & PAGE_MASK;
4773	loff_t i_size;
4774	struct page *page;
4775	int rc = 0;
4776
4777	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4778
4779start:
4780	page = grab_cache_page_write_begin(mapping, index);
4781	if (!page) {
4782		rc = -ENOMEM;
4783		goto out;
4784	}
4785
4786	if (PageUptodate(page))
4787		goto out;
4788
4789	/*
4790	 * If we write a full page it will be up to date, no need to read from
4791	 * the server. If the write is short, we'll end up doing a sync write
4792	 * instead.
4793	 */
4794	if (len == PAGE_SIZE)
4795		goto out;
4796
4797	/*
4798	 * optimize away the read when we have an oplock, and we're not
4799	 * expecting to use any of the data we'd be reading in. That
4800	 * is, when the page lies beyond the EOF, or straddles the EOF
4801	 * and the write will cover all of the existing data.
4802	 */
4803	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4804		i_size = i_size_read(mapping->host);
4805		if (page_start >= i_size ||
4806		    (offset == 0 && (pos + len) >= i_size)) {
4807			zero_user_segments(page, 0, offset,
4808					   offset + len,
4809					   PAGE_SIZE);
4810			/*
4811			 * PageChecked means that the parts of the page
4812			 * to which we're not writing are considered up
4813			 * to date. Once the data is copied to the
4814			 * page, it can be set uptodate.
4815			 */
4816			SetPageChecked(page);
4817			goto out;
4818		}
4819	}
4820
4821	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4822		/*
4823		 * might as well read a page, it is fast enough. If we get
4824		 * an error, we don't need to return it. cifs_write_end will
4825		 * do a sync write instead since PG_uptodate isn't set.
4826		 */
4827		cifs_readpage_worker(file, page, &page_start);
4828		put_page(page);
4829		oncethru = 1;
4830		goto start;
4831	} else {
4832		/* we could try using another file handle if there is one -
4833		   but how would we lock it to prevent close of that handle
4834		   racing with this read? In any case
4835		   this will be written out by write_end so is fine */
4836	}
4837out:
4838	*pagep = page;
4839	return rc;
4840}
4841
4842static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4843{
4844	if (folio_test_private(folio))
4845		return 0;
4846	if (folio_test_fscache(folio)) {
4847		if (current_is_kswapd() || !(gfp & __GFP_FS))
4848			return false;
4849		folio_wait_fscache(folio);
4850	}
4851	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4852	return true;
4853}
4854
4855static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4856				 size_t length)
4857{
4858	folio_wait_fscache(folio);
4859}
4860
4861static int cifs_launder_folio(struct folio *folio)
4862{
4863	int rc = 0;
4864	loff_t range_start = folio_pos(folio);
4865	loff_t range_end = range_start + folio_size(folio);
4866	struct writeback_control wbc = {
4867		.sync_mode = WB_SYNC_ALL,
4868		.nr_to_write = 0,
4869		.range_start = range_start,
4870		.range_end = range_end,
4871	};
4872
4873	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4874
4875	if (folio_clear_dirty_for_io(folio))
4876		rc = cifs_writepage_locked(&folio->page, &wbc);
4877
4878	folio_wait_fscache(folio);
4879	return rc;
4880}
4881
4882void cifs_oplock_break(struct work_struct *work)
4883{
4884	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4885						  oplock_break);
4886	struct inode *inode = d_inode(cfile->dentry);
4887	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4888	struct cifsInodeInfo *cinode = CIFS_I(inode);
4889	struct cifs_tcon *tcon;
4890	struct TCP_Server_Info *server;
4891	struct tcon_link *tlink;
4892	int rc = 0;
4893	bool purge_cache = false, oplock_break_cancelled;
4894	__u64 persistent_fid, volatile_fid;
4895	__u16 net_fid;
4896
4897	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4898			TASK_UNINTERRUPTIBLE);
4899
4900	tlink = cifs_sb_tlink(cifs_sb);
4901	if (IS_ERR(tlink))
4902		goto out;
4903	tcon = tlink_tcon(tlink);
4904	server = tcon->ses->server;
4905
4906	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4907				      cfile->oplock_epoch, &purge_cache);
4908
4909	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4910						cifs_has_mand_locks(cinode)) {
4911		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4912			 inode);
4913		cinode->oplock = 0;
4914	}
4915
4916	if (inode && S_ISREG(inode->i_mode)) {
4917		if (CIFS_CACHE_READ(cinode))
4918			break_lease(inode, O_RDONLY);
4919		else
4920			break_lease(inode, O_WRONLY);
4921		rc = filemap_fdatawrite(inode->i_mapping);
4922		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4923			rc = filemap_fdatawait(inode->i_mapping);
4924			mapping_set_error(inode->i_mapping, rc);
4925			cifs_zap_mapping(inode);
4926		}
4927		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4928		if (CIFS_CACHE_WRITE(cinode))
4929			goto oplock_break_ack;
4930	}
4931
4932	rc = cifs_push_locks(cfile);
4933	if (rc)
4934		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4935
4936oplock_break_ack:
4937	/*
4938	 * When oplock break is received and there are no active
4939	 * file handles but cached, then schedule deferred close immediately.
4940	 * So, new open will not use cached handle.
4941	 */
4942
4943	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
4944		cifs_close_deferred_file(cinode);
4945
4946	persistent_fid = cfile->fid.persistent_fid;
4947	volatile_fid = cfile->fid.volatile_fid;
4948	net_fid = cfile->fid.netfid;
4949	oplock_break_cancelled = cfile->oplock_break_cancelled;
4950
4951	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4952	/*
4953	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
4954	 * an acknowledgment to be sent when the file has already been closed.
4955	 */
4956	spin_lock(&cinode->open_file_lock);
4957	/* check list empty since can race with kill_sb calling tree disconnect */
4958	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
4959		spin_unlock(&cinode->open_file_lock);
4960		rc = server->ops->oplock_response(tcon, persistent_fid,
4961						  volatile_fid, net_fid, cinode);
4962		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4963	} else
4964		spin_unlock(&cinode->open_file_lock);
4965
4966	cifs_put_tlink(tlink);
4967out:
4968	cifs_done_oplock_break(cinode);
4969}
4970
4971/*
4972 * The presence of cifs_direct_io() in the address space ops vector
4973 * allowes open() O_DIRECT flags which would have failed otherwise.
4974 *
4975 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4976 * so this method should never be called.
4977 *
4978 * Direct IO is not yet supported in the cached mode.
4979 */
4980static ssize_t
4981cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4982{
4983        /*
4984         * FIXME
4985         * Eventually need to support direct IO for non forcedirectio mounts
4986         */
4987        return -EINVAL;
4988}
4989
4990static int cifs_swap_activate(struct swap_info_struct *sis,
4991			      struct file *swap_file, sector_t *span)
4992{
4993	struct cifsFileInfo *cfile = swap_file->private_data;
4994	struct inode *inode = swap_file->f_mapping->host;
4995	unsigned long blocks;
4996	long long isize;
4997
4998	cifs_dbg(FYI, "swap activate\n");
4999
5000	if (!swap_file->f_mapping->a_ops->swap_rw)
5001		/* Cannot support swap */
5002		return -EINVAL;
5003
5004	spin_lock(&inode->i_lock);
5005	blocks = inode->i_blocks;
5006	isize = inode->i_size;
5007	spin_unlock(&inode->i_lock);
5008	if (blocks*512 < isize) {
5009		pr_warn("swap activate: swapfile has holes\n");
5010		return -EINVAL;
5011	}
5012	*span = sis->pages;
5013
5014	pr_warn_once("Swap support over SMB3 is experimental\n");
5015
5016	/*
5017	 * TODO: consider adding ACL (or documenting how) to prevent other
5018	 * users (on this or other systems) from reading it
5019	 */
5020
5021
5022	/* TODO: add sk_set_memalloc(inet) or similar */
5023
5024	if (cfile)
5025		cfile->swapfile = true;
5026	/*
5027	 * TODO: Since file already open, we can't open with DENY_ALL here
5028	 * but we could add call to grab a byte range lock to prevent others
5029	 * from reading or writing the file
5030	 */
5031
5032	sis->flags |= SWP_FS_OPS;
5033	return add_swap_extent(sis, 0, sis->max, 0);
5034}
5035
5036static void cifs_swap_deactivate(struct file *file)
5037{
5038	struct cifsFileInfo *cfile = file->private_data;
5039
5040	cifs_dbg(FYI, "swap deactivate\n");
5041
5042	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5043
5044	if (cfile)
5045		cfile->swapfile = false;
5046
5047	/* do we need to unpin (or unlock) the file */
5048}
5049
5050const struct address_space_operations cifs_addr_ops = {
5051	.read_folio = cifs_read_folio,
5052	.readahead = cifs_readahead,
5053	.writepages = cifs_writepages,
5054	.write_begin = cifs_write_begin,
5055	.write_end = cifs_write_end,
5056	.dirty_folio = netfs_dirty_folio,
5057	.release_folio = cifs_release_folio,
5058	.direct_IO = cifs_direct_io,
5059	.invalidate_folio = cifs_invalidate_folio,
5060	.launder_folio = cifs_launder_folio,
5061	.migrate_folio = filemap_migrate_folio,
5062	/*
5063	 * TODO: investigate and if useful we could add an is_dirty_writeback
5064	 * helper if needed
5065	 */
5066	.swap_activate = cifs_swap_activate,
5067	.swap_deactivate = cifs_swap_deactivate,
5068};
5069
5070/*
5071 * cifs_readahead requires the server to support a buffer large enough to
5072 * contain the header plus one complete page of data.  Otherwise, we need
5073 * to leave cifs_readahead out of the address space operations.
5074 */
5075const struct address_space_operations cifs_addr_ops_smallbuf = {
5076	.read_folio = cifs_read_folio,
5077	.writepages = cifs_writepages,
5078	.write_begin = cifs_write_begin,
5079	.write_end = cifs_write_end,
5080	.dirty_folio = netfs_dirty_folio,
5081	.release_folio = cifs_release_folio,
5082	.invalidate_folio = cifs_invalidate_folio,
5083	.launder_folio = cifs_launder_folio,
5084	.migrate_folio = filemap_migrate_folio,
5085};