Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.13.7.
   1/*
   2 *   fs/cifs/file.c
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 *   This library is free software; you can redistribute it and/or modify
  11 *   it under the terms of the GNU Lesser General Public License as published
  12 *   by the Free Software Foundation; either version 2.1 of the License, or
  13 *   (at your option) any later version.
  14 *
  15 *   This library is distributed in the hope that it will be useful,
  16 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18 *   the GNU Lesser General Public License for more details.
  19 *
  20 *   You should have received a copy of the GNU Lesser General Public License
  21 *   along with this library; if not, write to the Free Software
  22 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23 */
  24#include <linux/fs.h>
  25#include <linux/backing-dev.h>
  26#include <linux/stat.h>
  27#include <linux/fcntl.h>
  28#include <linux/pagemap.h>
  29#include <linux/pagevec.h>
  30#include <linux/writeback.h>
  31#include <linux/task_io_accounting_ops.h>
  32#include <linux/delay.h>
  33#include <linux/mount.h>
  34#include <linux/slab.h>
  35#include <linux/swap.h>
  36#include <linux/mm.h>
  37#include <asm/div64.h>
  38#include "cifsfs.h"
  39#include "cifspdu.h"
  40#include "cifsglob.h"
  41#include "cifsproto.h"
  42#include "cifs_unicode.h"
  43#include "cifs_debug.h"
  44#include "cifs_fs_sb.h"
  45#include "fscache.h"
  46#include "smbdirect.h"
  47
  48static inline int cifs_convert_flags(unsigned int flags)
  49{
  50	if ((flags & O_ACCMODE) == O_RDONLY)
  51		return GENERIC_READ;
  52	else if ((flags & O_ACCMODE) == O_WRONLY)
  53		return GENERIC_WRITE;
  54	else if ((flags & O_ACCMODE) == O_RDWR) {
  55		/* GENERIC_ALL is too much permission to request
  56		   can cause unnecessary access denied on create */
  57		/* return GENERIC_ALL; */
  58		return (GENERIC_READ | GENERIC_WRITE);
  59	}
  60
  61	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  62		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  63		FILE_READ_DATA);
  64}
  65
  66static u32 cifs_posix_convert_flags(unsigned int flags)
  67{
  68	u32 posix_flags = 0;
  69
  70	if ((flags & O_ACCMODE) == O_RDONLY)
  71		posix_flags = SMB_O_RDONLY;
  72	else if ((flags & O_ACCMODE) == O_WRONLY)
  73		posix_flags = SMB_O_WRONLY;
  74	else if ((flags & O_ACCMODE) == O_RDWR)
  75		posix_flags = SMB_O_RDWR;
  76
  77	if (flags & O_CREAT) {
  78		posix_flags |= SMB_O_CREAT;
  79		if (flags & O_EXCL)
  80			posix_flags |= SMB_O_EXCL;
  81	} else if (flags & O_EXCL)
  82		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  83			 current->comm, current->tgid);
  84
  85	if (flags & O_TRUNC)
  86		posix_flags |= SMB_O_TRUNC;
  87	/* be safe and imply O_SYNC for O_DSYNC */
  88	if (flags & O_DSYNC)
  89		posix_flags |= SMB_O_SYNC;
  90	if (flags & O_DIRECTORY)
  91		posix_flags |= SMB_O_DIRECTORY;
  92	if (flags & O_NOFOLLOW)
  93		posix_flags |= SMB_O_NOFOLLOW;
  94	if (flags & O_DIRECT)
  95		posix_flags |= SMB_O_DIRECT;
  96
  97	return posix_flags;
  98}
  99
 100static inline int cifs_get_disposition(unsigned int flags)
 101{
 102	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 103		return FILE_CREATE;
 104	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 105		return FILE_OVERWRITE_IF;
 106	else if ((flags & O_CREAT) == O_CREAT)
 107		return FILE_OPEN_IF;
 108	else if ((flags & O_TRUNC) == O_TRUNC)
 109		return FILE_OVERWRITE;
 110	else
 111		return FILE_OPEN;
 112}
 113
 114int cifs_posix_open(char *full_path, struct inode **pinode,
 115			struct super_block *sb, int mode, unsigned int f_flags,
 116			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
 117{
 118	int rc;
 119	FILE_UNIX_BASIC_INFO *presp_data;
 120	__u32 posix_flags = 0;
 121	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 122	struct cifs_fattr fattr;
 123	struct tcon_link *tlink;
 124	struct cifs_tcon *tcon;
 125
 126	cifs_dbg(FYI, "posix open %s\n", full_path);
 127
 128	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 129	if (presp_data == NULL)
 130		return -ENOMEM;
 131
 132	tlink = cifs_sb_tlink(cifs_sb);
 133	if (IS_ERR(tlink)) {
 134		rc = PTR_ERR(tlink);
 135		goto posix_open_ret;
 136	}
 137
 138	tcon = tlink_tcon(tlink);
 139	mode &= ~current_umask();
 140
 141	posix_flags = cifs_posix_convert_flags(f_flags);
 142	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 143			     poplock, full_path, cifs_sb->local_nls,
 144			     cifs_remap(cifs_sb));
 145	cifs_put_tlink(tlink);
 146
 147	if (rc)
 148		goto posix_open_ret;
 149
 150	if (presp_data->Type == cpu_to_le32(-1))
 151		goto posix_open_ret; /* open ok, caller does qpathinfo */
 152
 153	if (!pinode)
 154		goto posix_open_ret; /* caller does not need info */
 155
 156	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 157
 158	/* get new inode and set it up */
 159	if (*pinode == NULL) {
 160		cifs_fill_uniqueid(sb, &fattr);
 161		*pinode = cifs_iget(sb, &fattr);
 162		if (!*pinode) {
 163			rc = -ENOMEM;
 164			goto posix_open_ret;
 165		}
 166	} else {
 167		cifs_fattr_to_inode(*pinode, &fattr);
 168	}
 169
 170posix_open_ret:
 171	kfree(presp_data);
 172	return rc;
 173}
 174
 175static int
 176cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 177	     struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 178	     struct cifs_fid *fid, unsigned int xid)
 179{
 180	int rc;
 181	int desired_access;
 182	int disposition;
 183	int create_options = CREATE_NOT_DIR;
 184	FILE_ALL_INFO *buf;
 185	struct TCP_Server_Info *server = tcon->ses->server;
 186	struct cifs_open_parms oparms;
 187
 188	if (!server->ops->open)
 189		return -ENOSYS;
 190
 191	desired_access = cifs_convert_flags(f_flags);
 192
 193/*********************************************************************
 194 *  open flag mapping table:
 195 *
 196 *	POSIX Flag            CIFS Disposition
 197 *	----------            ----------------
 198 *	O_CREAT               FILE_OPEN_IF
 199 *	O_CREAT | O_EXCL      FILE_CREATE
 200 *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 201 *	O_TRUNC               FILE_OVERWRITE
 202 *	none of the above     FILE_OPEN
 203 *
 204 *	Note that there is not a direct match between disposition
 205 *	FILE_SUPERSEDE (ie create whether or not file exists although
 206 *	O_CREAT | O_TRUNC is similar but truncates the existing
 207 *	file rather than creating a new file as FILE_SUPERSEDE does
 208 *	(which uses the attributes / metadata passed in on open call)
 209 *?
 210 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 211 *?  and the read write flags match reasonably.  O_LARGEFILE
 212 *?  is irrelevant because largefile support is always used
 213 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 214 *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 215 *********************************************************************/
 216
 217	disposition = cifs_get_disposition(f_flags);
 218
 219	/* BB pass O_SYNC flag through on file attributes .. BB */
 220
 221	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 222	if (!buf)
 223		return -ENOMEM;
 224
 225	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
 226	if (f_flags & O_SYNC)
 227		create_options |= CREATE_WRITE_THROUGH;
 228
 229	if (f_flags & O_DIRECT)
 230		create_options |= CREATE_NO_BUFFER;
 231
 232	oparms.tcon = tcon;
 233	oparms.cifs_sb = cifs_sb;
 234	oparms.desired_access = desired_access;
 235	oparms.create_options = cifs_create_options(cifs_sb, create_options);
 236	oparms.disposition = disposition;
 237	oparms.path = full_path;
 238	oparms.fid = fid;
 239	oparms.reconnect = false;
 240
 241	rc = server->ops->open(xid, &oparms, oplock, buf);
 242
 243	if (rc)
 244		goto out;
 245
 246	/* TODO: Add support for calling posix query info but with passing in fid */
 247	if (tcon->unix_ext)
 248		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 249					      xid);
 250	else
 251		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 252					 xid, fid);
 253
 254	if (rc) {
 255		server->ops->close(xid, tcon, fid);
 256		if (rc == -ESTALE)
 257			rc = -EOPENSTALE;
 258	}
 259
 260out:
 261	kfree(buf);
 262	return rc;
 263}
 264
 265static bool
 266cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 267{
 268	struct cifs_fid_locks *cur;
 269	bool has_locks = false;
 270
 271	down_read(&cinode->lock_sem);
 272	list_for_each_entry(cur, &cinode->llist, llist) {
 273		if (!list_empty(&cur->locks)) {
 274			has_locks = true;
 275			break;
 276		}
 277	}
 278	up_read(&cinode->lock_sem);
 279	return has_locks;
 280}
 281
 282void
 283cifs_down_write(struct rw_semaphore *sem)
 284{
 285	while (!down_write_trylock(sem))
 286		msleep(10);
 287}
 288
 289static void cifsFileInfo_put_work(struct work_struct *work);
 290
 291struct cifsFileInfo *
 292cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 293		  struct tcon_link *tlink, __u32 oplock)
 294{
 295	struct dentry *dentry = file_dentry(file);
 296	struct inode *inode = d_inode(dentry);
 297	struct cifsInodeInfo *cinode = CIFS_I(inode);
 298	struct cifsFileInfo *cfile;
 299	struct cifs_fid_locks *fdlocks;
 300	struct cifs_tcon *tcon = tlink_tcon(tlink);
 301	struct TCP_Server_Info *server = tcon->ses->server;
 302
 303	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 304	if (cfile == NULL)
 305		return cfile;
 306
 307	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 308	if (!fdlocks) {
 309		kfree(cfile);
 310		return NULL;
 311	}
 312
 313	INIT_LIST_HEAD(&fdlocks->locks);
 314	fdlocks->cfile = cfile;
 315	cfile->llist = fdlocks;
 316
 317	cfile->count = 1;
 318	cfile->pid = current->tgid;
 319	cfile->uid = current_fsuid();
 320	cfile->dentry = dget(dentry);
 321	cfile->f_flags = file->f_flags;
 322	cfile->invalidHandle = false;
 323	cfile->tlink = cifs_get_tlink(tlink);
 324	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 325	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
 326	mutex_init(&cfile->fh_mutex);
 327	spin_lock_init(&cfile->file_info_lock);
 328
 329	cifs_sb_active(inode->i_sb);
 330
 331	/*
 332	 * If the server returned a read oplock and we have mandatory brlocks,
 333	 * set oplock level to None.
 334	 */
 335	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 336		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 337		oplock = 0;
 338	}
 339
 340	cifs_down_write(&cinode->lock_sem);
 341	list_add(&fdlocks->llist, &cinode->llist);
 342	up_write(&cinode->lock_sem);
 343
 344	spin_lock(&tcon->open_file_lock);
 345	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 346		oplock = fid->pending_open->oplock;
 347	list_del(&fid->pending_open->olist);
 348
 349	fid->purge_cache = false;
 350	server->ops->set_fid(cfile, fid, oplock);
 351
 352	list_add(&cfile->tlist, &tcon->openFileList);
 353	atomic_inc(&tcon->num_local_opens);
 354
 355	/* if readable file instance put first in list*/
 356	spin_lock(&cinode->open_file_lock);
 357	if (file->f_mode & FMODE_READ)
 358		list_add(&cfile->flist, &cinode->openFileList);
 359	else
 360		list_add_tail(&cfile->flist, &cinode->openFileList);
 361	spin_unlock(&cinode->open_file_lock);
 362	spin_unlock(&tcon->open_file_lock);
 363
 364	if (fid->purge_cache)
 365		cifs_zap_mapping(inode);
 366
 367	file->private_data = cfile;
 368	return cfile;
 369}
 370
 371struct cifsFileInfo *
 372cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 373{
 374	spin_lock(&cifs_file->file_info_lock);
 375	cifsFileInfo_get_locked(cifs_file);
 376	spin_unlock(&cifs_file->file_info_lock);
 377	return cifs_file;
 378}
 379
 380static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
 381{
 382	struct inode *inode = d_inode(cifs_file->dentry);
 383	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 384	struct cifsLockInfo *li, *tmp;
 385	struct super_block *sb = inode->i_sb;
 386
 387	/*
 388	 * Delete any outstanding lock records. We'll lose them when the file
 389	 * is closed anyway.
 390	 */
 391	cifs_down_write(&cifsi->lock_sem);
 392	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 393		list_del(&li->llist);
 394		cifs_del_lock_waiters(li);
 395		kfree(li);
 396	}
 397	list_del(&cifs_file->llist->llist);
 398	kfree(cifs_file->llist);
 399	up_write(&cifsi->lock_sem);
 400
 401	cifs_put_tlink(cifs_file->tlink);
 402	dput(cifs_file->dentry);
 403	cifs_sb_deactive(sb);
 404	kfree(cifs_file);
 405}
 406
 407static void cifsFileInfo_put_work(struct work_struct *work)
 408{
 409	struct cifsFileInfo *cifs_file = container_of(work,
 410			struct cifsFileInfo, put);
 411
 412	cifsFileInfo_put_final(cifs_file);
 413}
 414
 415/**
 416 * cifsFileInfo_put - release a reference of file priv data
 417 *
 418 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
 419 */
 420void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 421{
 422	_cifsFileInfo_put(cifs_file, true, true);
 423}
 424
 425/**
 426 * _cifsFileInfo_put - release a reference of file priv data
 427 *
 428 * This may involve closing the filehandle @cifs_file out on the
 429 * server. Must be called without holding tcon->open_file_lock,
 430 * cinode->open_file_lock and cifs_file->file_info_lock.
 431 *
 432 * If @wait_for_oplock_handler is true and we are releasing the last
 433 * reference, wait for any running oplock break handler of the file
 434 * and cancel any pending one. If calling this function from the
 435 * oplock break handler, you need to pass false.
 436 *
 437 */
 438void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
 439		       bool wait_oplock_handler, bool offload)
 440{
 441	struct inode *inode = d_inode(cifs_file->dentry);
 442	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 443	struct TCP_Server_Info *server = tcon->ses->server;
 444	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 445	struct super_block *sb = inode->i_sb;
 446	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 447	struct cifs_fid fid;
 448	struct cifs_pending_open open;
 449	bool oplock_break_cancelled;
 450
 451	spin_lock(&tcon->open_file_lock);
 452	spin_lock(&cifsi->open_file_lock);
 453	spin_lock(&cifs_file->file_info_lock);
 454	if (--cifs_file->count > 0) {
 455		spin_unlock(&cifs_file->file_info_lock);
 456		spin_unlock(&cifsi->open_file_lock);
 457		spin_unlock(&tcon->open_file_lock);
 458		return;
 459	}
 460	spin_unlock(&cifs_file->file_info_lock);
 461
 462	if (server->ops->get_lease_key)
 463		server->ops->get_lease_key(inode, &fid);
 464
 465	/* store open in pending opens to make sure we don't miss lease break */
 466	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 467
 468	/* remove it from the lists */
 469	list_del(&cifs_file->flist);
 470	list_del(&cifs_file->tlist);
 471	atomic_dec(&tcon->num_local_opens);
 472
 473	if (list_empty(&cifsi->openFileList)) {
 474		cifs_dbg(FYI, "closing last open instance for inode %p\n",
 475			 d_inode(cifs_file->dentry));
 476		/*
 477		 * In strict cache mode we need invalidate mapping on the last
 478		 * close  because it may cause a error when we open this file
 479		 * again and get at least level II oplock.
 480		 */
 481		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 482			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 483		cifs_set_oplock_level(cifsi, 0);
 484	}
 485
 486	spin_unlock(&cifsi->open_file_lock);
 487	spin_unlock(&tcon->open_file_lock);
 488
 489	oplock_break_cancelled = wait_oplock_handler ?
 490		cancel_work_sync(&cifs_file->oplock_break) : false;
 491
 492	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 493		struct TCP_Server_Info *server = tcon->ses->server;
 494		unsigned int xid;
 495
 496		xid = get_xid();
 497		if (server->ops->close_getattr)
 498			server->ops->close_getattr(xid, tcon, cifs_file);
 499		else if (server->ops->close)
 500			server->ops->close(xid, tcon, &cifs_file->fid);
 501		_free_xid(xid);
 502	}
 503
 504	if (oplock_break_cancelled)
 505		cifs_done_oplock_break(cifsi);
 506
 507	cifs_del_pending_open(&open);
 508
 509	if (offload)
 510		queue_work(fileinfo_put_wq, &cifs_file->put);
 511	else
 512		cifsFileInfo_put_final(cifs_file);
 513}
 514
 515int cifs_open(struct inode *inode, struct file *file)
 516
 517{
 518	int rc = -EACCES;
 519	unsigned int xid;
 520	__u32 oplock;
 521	struct cifs_sb_info *cifs_sb;
 522	struct TCP_Server_Info *server;
 523	struct cifs_tcon *tcon;
 524	struct tcon_link *tlink;
 525	struct cifsFileInfo *cfile = NULL;
 526	char *full_path = NULL;
 527	bool posix_open_ok = false;
 528	struct cifs_fid fid;
 529	struct cifs_pending_open open;
 530
 531	xid = get_xid();
 532
 533	cifs_sb = CIFS_SB(inode->i_sb);
 534	tlink = cifs_sb_tlink(cifs_sb);
 535	if (IS_ERR(tlink)) {
 536		free_xid(xid);
 537		return PTR_ERR(tlink);
 538	}
 539	tcon = tlink_tcon(tlink);
 540	server = tcon->ses->server;
 541
 542	full_path = build_path_from_dentry(file_dentry(file));
 543	if (full_path == NULL) {
 544		rc = -ENOMEM;
 545		goto out;
 546	}
 547
 548	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 549		 inode, file->f_flags, full_path);
 550
 551	if (file->f_flags & O_DIRECT &&
 552	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
 553		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 554			file->f_op = &cifs_file_direct_nobrl_ops;
 555		else
 556			file->f_op = &cifs_file_direct_ops;
 557	}
 558
 559	if (server->oplocks)
 560		oplock = REQ_OPLOCK;
 561	else
 562		oplock = 0;
 563
 564	if (!tcon->broken_posix_open && tcon->unix_ext &&
 565	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 566				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 567		/* can not refresh inode info since size could be stale */
 568		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 569				cifs_sb->mnt_file_mode /* ignored */,
 570				file->f_flags, &oplock, &fid.netfid, xid);
 571		if (rc == 0) {
 572			cifs_dbg(FYI, "posix open succeeded\n");
 573			posix_open_ok = true;
 574		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 575			if (tcon->ses->serverNOS)
 576				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 577					 tcon->ses->serverName,
 578					 tcon->ses->serverNOS);
 579			tcon->broken_posix_open = true;
 580		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
 581			 (rc != -EOPNOTSUPP)) /* path not found or net err */
 582			goto out;
 583		/*
 584		 * Else fallthrough to retry open the old way on network i/o
 585		 * or DFS errors.
 586		 */
 587	}
 588
 589	if (server->ops->get_lease_key)
 590		server->ops->get_lease_key(inode, &fid);
 591
 592	cifs_add_pending_open(&fid, tlink, &open);
 593
 594	if (!posix_open_ok) {
 595		if (server->ops->get_lease_key)
 596			server->ops->get_lease_key(inode, &fid);
 597
 598		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 599				  file->f_flags, &oplock, &fid, xid);
 600		if (rc) {
 601			cifs_del_pending_open(&open);
 602			goto out;
 603		}
 604	}
 605
 606	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 607	if (cfile == NULL) {
 608		if (server->ops->close)
 609			server->ops->close(xid, tcon, &fid);
 610		cifs_del_pending_open(&open);
 611		rc = -ENOMEM;
 612		goto out;
 613	}
 614
 615	cifs_fscache_set_inode_cookie(inode, file);
 616
 617	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 618		/*
 619		 * Time to set mode which we can not set earlier due to
 620		 * problems creating new read-only files.
 621		 */
 622		struct cifs_unix_set_info_args args = {
 623			.mode	= inode->i_mode,
 624			.uid	= INVALID_UID, /* no change */
 625			.gid	= INVALID_GID, /* no change */
 626			.ctime	= NO_CHANGE_64,
 627			.atime	= NO_CHANGE_64,
 628			.mtime	= NO_CHANGE_64,
 629			.device	= 0,
 630		};
 631		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 632				       cfile->pid);
 633	}
 634
 635out:
 636	kfree(full_path);
 637	free_xid(xid);
 638	cifs_put_tlink(tlink);
 639	return rc;
 640}
 641
 642static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 643
 644/*
 645 * Try to reacquire byte range locks that were released when session
 646 * to server was lost.
 647 */
 648static int
 649cifs_relock_file(struct cifsFileInfo *cfile)
 650{
 651	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 652	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 653	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 654	int rc = 0;
 655
 656	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
 657	if (cinode->can_cache_brlcks) {
 658		/* can cache locks - no need to relock */
 659		up_read(&cinode->lock_sem);
 660		return rc;
 661	}
 662
 663	if (cap_unix(tcon->ses) &&
 664	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 665	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 666		rc = cifs_push_posix_locks(cfile);
 667	else
 668		rc = tcon->ses->server->ops->push_mand_locks(cfile);
 669
 670	up_read(&cinode->lock_sem);
 671	return rc;
 672}
 673
 674static int
 675cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 676{
 677	int rc = -EACCES;
 678	unsigned int xid;
 679	__u32 oplock;
 680	struct cifs_sb_info *cifs_sb;
 681	struct cifs_tcon *tcon;
 682	struct TCP_Server_Info *server;
 683	struct cifsInodeInfo *cinode;
 684	struct inode *inode;
 685	char *full_path = NULL;
 686	int desired_access;
 687	int disposition = FILE_OPEN;
 688	int create_options = CREATE_NOT_DIR;
 689	struct cifs_open_parms oparms;
 690
 691	xid = get_xid();
 692	mutex_lock(&cfile->fh_mutex);
 693	if (!cfile->invalidHandle) {
 694		mutex_unlock(&cfile->fh_mutex);
 695		rc = 0;
 696		free_xid(xid);
 697		return rc;
 698	}
 699
 700	inode = d_inode(cfile->dentry);
 701	cifs_sb = CIFS_SB(inode->i_sb);
 702	tcon = tlink_tcon(cfile->tlink);
 703	server = tcon->ses->server;
 704
 705	/*
 706	 * Can not grab rename sem here because various ops, including those
 707	 * that already have the rename sem can end up causing writepage to get
 708	 * called and if the server was down that means we end up here, and we
 709	 * can never tell if the caller already has the rename_sem.
 710	 */
 711	full_path = build_path_from_dentry(cfile->dentry);
 712	if (full_path == NULL) {
 713		rc = -ENOMEM;
 714		mutex_unlock(&cfile->fh_mutex);
 715		free_xid(xid);
 716		return rc;
 717	}
 718
 719	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 720		 inode, cfile->f_flags, full_path);
 721
 722	if (tcon->ses->server->oplocks)
 723		oplock = REQ_OPLOCK;
 724	else
 725		oplock = 0;
 726
 727	if (tcon->unix_ext && cap_unix(tcon->ses) &&
 728	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 729				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 730		/*
 731		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 732		 * original open. Must mask them off for a reopen.
 733		 */
 734		unsigned int oflags = cfile->f_flags &
 735						~(O_CREAT | O_EXCL | O_TRUNC);
 736
 737		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 738				     cifs_sb->mnt_file_mode /* ignored */,
 739				     oflags, &oplock, &cfile->fid.netfid, xid);
 740		if (rc == 0) {
 741			cifs_dbg(FYI, "posix reopen succeeded\n");
 742			oparms.reconnect = true;
 743			goto reopen_success;
 744		}
 745		/*
 746		 * fallthrough to retry open the old way on errors, especially
 747		 * in the reconnect path it is important to retry hard
 748		 */
 749	}
 750
 751	desired_access = cifs_convert_flags(cfile->f_flags);
 752
 753	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
 754	if (cfile->f_flags & O_SYNC)
 755		create_options |= CREATE_WRITE_THROUGH;
 756
 757	if (cfile->f_flags & O_DIRECT)
 758		create_options |= CREATE_NO_BUFFER;
 759
 760	if (server->ops->get_lease_key)
 761		server->ops->get_lease_key(inode, &cfile->fid);
 762
 763	oparms.tcon = tcon;
 764	oparms.cifs_sb = cifs_sb;
 765	oparms.desired_access = desired_access;
 766	oparms.create_options = cifs_create_options(cifs_sb, create_options);
 767	oparms.disposition = disposition;
 768	oparms.path = full_path;
 769	oparms.fid = &cfile->fid;
 770	oparms.reconnect = true;
 771
 772	/*
 773	 * Can not refresh inode by passing in file_info buf to be returned by
 774	 * ops->open and then calling get_inode_info with returned buf since
 775	 * file might have write behind data that needs to be flushed and server
 776	 * version of file size can be stale. If we knew for sure that inode was
 777	 * not dirty locally we could do this.
 778	 */
 779	rc = server->ops->open(xid, &oparms, &oplock, NULL);
 780	if (rc == -ENOENT && oparms.reconnect == false) {
 781		/* durable handle timeout is expired - open the file again */
 782		rc = server->ops->open(xid, &oparms, &oplock, NULL);
 783		/* indicate that we need to relock the file */
 784		oparms.reconnect = true;
 785	}
 786
 787	if (rc) {
 788		mutex_unlock(&cfile->fh_mutex);
 789		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 790		cifs_dbg(FYI, "oplock: %d\n", oplock);
 791		goto reopen_error_exit;
 792	}
 793
 794reopen_success:
 795	cfile->invalidHandle = false;
 796	mutex_unlock(&cfile->fh_mutex);
 797	cinode = CIFS_I(inode);
 798
 799	if (can_flush) {
 800		rc = filemap_write_and_wait(inode->i_mapping);
 801		if (!is_interrupt_error(rc))
 802			mapping_set_error(inode->i_mapping, rc);
 803
 804		if (tcon->posix_extensions)
 805			rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
 806		else if (tcon->unix_ext)
 807			rc = cifs_get_inode_info_unix(&inode, full_path,
 808						      inode->i_sb, xid);
 809		else
 810			rc = cifs_get_inode_info(&inode, full_path, NULL,
 811						 inode->i_sb, xid, NULL);
 812	}
 813	/*
 814	 * Else we are writing out data to server already and could deadlock if
 815	 * we tried to flush data, and since we do not know if we have data that
 816	 * would invalidate the current end of file on the server we can not go
 817	 * to the server to get the new inode info.
 818	 */
 819
 820	/*
 821	 * If the server returned a read oplock and we have mandatory brlocks,
 822	 * set oplock level to None.
 823	 */
 824	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 825		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 826		oplock = 0;
 827	}
 828
 829	server->ops->set_fid(cfile, &cfile->fid, oplock);
 830	if (oparms.reconnect)
 831		cifs_relock_file(cfile);
 832
 833reopen_error_exit:
 834	kfree(full_path);
 835	free_xid(xid);
 836	return rc;
 837}
 838
 839int cifs_close(struct inode *inode, struct file *file)
 840{
 841	if (file->private_data != NULL) {
 842		_cifsFileInfo_put(file->private_data, true, false);
 843		file->private_data = NULL;
 844	}
 845
 846	/* return code from the ->release op is always ignored */
 847	return 0;
 848}
 849
 850void
 851cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
 852{
 853	struct cifsFileInfo *open_file;
 854	struct list_head *tmp;
 855	struct list_head *tmp1;
 856	struct list_head tmp_list;
 857
 858	if (!tcon->use_persistent || !tcon->need_reopen_files)
 859		return;
 860
 861	tcon->need_reopen_files = false;
 862
 863	cifs_dbg(FYI, "Reopen persistent handles\n");
 864	INIT_LIST_HEAD(&tmp_list);
 865
 866	/* list all files open on tree connection, reopen resilient handles  */
 867	spin_lock(&tcon->open_file_lock);
 868	list_for_each(tmp, &tcon->openFileList) {
 869		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
 870		if (!open_file->invalidHandle)
 871			continue;
 872		cifsFileInfo_get(open_file);
 873		list_add_tail(&open_file->rlist, &tmp_list);
 874	}
 875	spin_unlock(&tcon->open_file_lock);
 876
 877	list_for_each_safe(tmp, tmp1, &tmp_list) {
 878		open_file = list_entry(tmp, struct cifsFileInfo, rlist);
 879		if (cifs_reopen_file(open_file, false /* do not flush */))
 880			tcon->need_reopen_files = true;
 881		list_del_init(&open_file->rlist);
 882		cifsFileInfo_put(open_file);
 883	}
 884}
 885
 886int cifs_closedir(struct inode *inode, struct file *file)
 887{
 888	int rc = 0;
 889	unsigned int xid;
 890	struct cifsFileInfo *cfile = file->private_data;
 891	struct cifs_tcon *tcon;
 892	struct TCP_Server_Info *server;
 893	char *buf;
 894
 895	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 896
 897	if (cfile == NULL)
 898		return rc;
 899
 900	xid = get_xid();
 901	tcon = tlink_tcon(cfile->tlink);
 902	server = tcon->ses->server;
 903
 904	cifs_dbg(FYI, "Freeing private data in close dir\n");
 905	spin_lock(&cfile->file_info_lock);
 906	if (server->ops->dir_needs_close(cfile)) {
 907		cfile->invalidHandle = true;
 908		spin_unlock(&cfile->file_info_lock);
 909		if (server->ops->close_dir)
 910			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 911		else
 912			rc = -ENOSYS;
 913		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 914		/* not much we can do if it fails anyway, ignore rc */
 915		rc = 0;
 916	} else
 917		spin_unlock(&cfile->file_info_lock);
 918
 919	buf = cfile->srch_inf.ntwrk_buf_start;
 920	if (buf) {
 921		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 922		cfile->srch_inf.ntwrk_buf_start = NULL;
 923		if (cfile->srch_inf.smallBuf)
 924			cifs_small_buf_release(buf);
 925		else
 926			cifs_buf_release(buf);
 927	}
 928
 929	cifs_put_tlink(cfile->tlink);
 930	kfree(file->private_data);
 931	file->private_data = NULL;
 932	/* BB can we lock the filestruct while this is going on? */
 933	free_xid(xid);
 934	return rc;
 935}
 936
 937static struct cifsLockInfo *
 938cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
 939{
 940	struct cifsLockInfo *lock =
 941		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
 942	if (!lock)
 943		return lock;
 944	lock->offset = offset;
 945	lock->length = length;
 946	lock->type = type;
 947	lock->pid = current->tgid;
 948	lock->flags = flags;
 949	INIT_LIST_HEAD(&lock->blist);
 950	init_waitqueue_head(&lock->block_q);
 951	return lock;
 952}
 953
 954void
 955cifs_del_lock_waiters(struct cifsLockInfo *lock)
 956{
 957	struct cifsLockInfo *li, *tmp;
 958	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
 959		list_del_init(&li->blist);
 960		wake_up(&li->block_q);
 961	}
 962}
 963
 964#define CIFS_LOCK_OP	0
 965#define CIFS_READ_OP	1
 966#define CIFS_WRITE_OP	2
 967
 968/* @rw_check : 0 - no op, 1 - read, 2 - write */
 969static bool
 970cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 971			    __u64 length, __u8 type, __u16 flags,
 972			    struct cifsFileInfo *cfile,
 973			    struct cifsLockInfo **conf_lock, int rw_check)
 974{
 975	struct cifsLockInfo *li;
 976	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
 977	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 978
 979	list_for_each_entry(li, &fdlocks->locks, llist) {
 980		if (offset + length <= li->offset ||
 981		    offset >= li->offset + li->length)
 982			continue;
 983		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
 984		    server->ops->compare_fids(cfile, cur_cfile)) {
 985			/* shared lock prevents write op through the same fid */
 986			if (!(li->type & server->vals->shared_lock_type) ||
 987			    rw_check != CIFS_WRITE_OP)
 988				continue;
 989		}
 990		if ((type & server->vals->shared_lock_type) &&
 991		    ((server->ops->compare_fids(cfile, cur_cfile) &&
 992		     current->tgid == li->pid) || type == li->type))
 993			continue;
 994		if (rw_check == CIFS_LOCK_OP &&
 995		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
 996		    server->ops->compare_fids(cfile, cur_cfile))
 997			continue;
 998		if (conf_lock)
 999			*conf_lock = li;
1000		return true;
1001	}
1002	return false;
1003}
1004
1005bool
1006cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1007			__u8 type, __u16 flags,
1008			struct cifsLockInfo **conf_lock, int rw_check)
1009{
1010	bool rc = false;
1011	struct cifs_fid_locks *cur;
1012	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1013
1014	list_for_each_entry(cur, &cinode->llist, llist) {
1015		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1016						 flags, cfile, conf_lock,
1017						 rw_check);
1018		if (rc)
1019			break;
1020	}
1021
1022	return rc;
1023}
1024
1025/*
1026 * Check if there is another lock that prevents us to set the lock (mandatory
1027 * style). If such a lock exists, update the flock structure with its
1028 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1029 * or leave it the same if we can't. Returns 0 if we don't need to request to
1030 * the server or 1 otherwise.
1031 */
1032static int
1033cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1034	       __u8 type, struct file_lock *flock)
1035{
1036	int rc = 0;
1037	struct cifsLockInfo *conf_lock;
1038	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1039	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1040	bool exist;
1041
1042	down_read(&cinode->lock_sem);
1043
1044	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1045					flock->fl_flags, &conf_lock,
1046					CIFS_LOCK_OP);
1047	if (exist) {
1048		flock->fl_start = conf_lock->offset;
1049		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1050		flock->fl_pid = conf_lock->pid;
1051		if (conf_lock->type & server->vals->shared_lock_type)
1052			flock->fl_type = F_RDLCK;
1053		else
1054			flock->fl_type = F_WRLCK;
1055	} else if (!cinode->can_cache_brlcks)
1056		rc = 1;
1057	else
1058		flock->fl_type = F_UNLCK;
1059
1060	up_read(&cinode->lock_sem);
1061	return rc;
1062}
1063
1064static void
1065cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1066{
1067	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1068	cifs_down_write(&cinode->lock_sem);
1069	list_add_tail(&lock->llist, &cfile->llist->locks);
1070	up_write(&cinode->lock_sem);
1071}
1072
1073/*
1074 * Set the byte-range lock (mandatory style). Returns:
1075 * 1) 0, if we set the lock and don't need to request to the server;
1076 * 2) 1, if no locks prevent us but we need to request to the server;
1077 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1078 */
1079static int
1080cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1081		 bool wait)
1082{
1083	struct cifsLockInfo *conf_lock;
1084	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1085	bool exist;
1086	int rc = 0;
1087
1088try_again:
1089	exist = false;
1090	cifs_down_write(&cinode->lock_sem);
1091
1092	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1093					lock->type, lock->flags, &conf_lock,
1094					CIFS_LOCK_OP);
1095	if (!exist && cinode->can_cache_brlcks) {
1096		list_add_tail(&lock->llist, &cfile->llist->locks);
1097		up_write(&cinode->lock_sem);
1098		return rc;
1099	}
1100
1101	if (!exist)
1102		rc = 1;
1103	else if (!wait)
1104		rc = -EACCES;
1105	else {
1106		list_add_tail(&lock->blist, &conf_lock->blist);
1107		up_write(&cinode->lock_sem);
1108		rc = wait_event_interruptible(lock->block_q,
1109					(lock->blist.prev == &lock->blist) &&
1110					(lock->blist.next == &lock->blist));
1111		if (!rc)
1112			goto try_again;
1113		cifs_down_write(&cinode->lock_sem);
1114		list_del_init(&lock->blist);
1115	}
1116
1117	up_write(&cinode->lock_sem);
1118	return rc;
1119}
1120
1121/*
1122 * Check if there is another lock that prevents us to set the lock (posix
1123 * style). If such a lock exists, update the flock structure with its
1124 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1125 * or leave it the same if we can't. Returns 0 if we don't need to request to
1126 * the server or 1 otherwise.
1127 */
1128static int
1129cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1130{
1131	int rc = 0;
1132	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1133	unsigned char saved_type = flock->fl_type;
1134
1135	if ((flock->fl_flags & FL_POSIX) == 0)
1136		return 1;
1137
1138	down_read(&cinode->lock_sem);
1139	posix_test_lock(file, flock);
1140
1141	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1142		flock->fl_type = saved_type;
1143		rc = 1;
1144	}
1145
1146	up_read(&cinode->lock_sem);
1147	return rc;
1148}
1149
1150/*
1151 * Set the byte-range lock (posix style). Returns:
1152 * 1) <0, if the error occurs while setting the lock;
1153 * 2) 0, if we set the lock and don't need to request to the server;
1154 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1155 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1156 */
1157static int
1158cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1159{
1160	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1161	int rc = FILE_LOCK_DEFERRED + 1;
1162
1163	if ((flock->fl_flags & FL_POSIX) == 0)
1164		return rc;
1165
1166	cifs_down_write(&cinode->lock_sem);
1167	if (!cinode->can_cache_brlcks) {
1168		up_write(&cinode->lock_sem);
1169		return rc;
1170	}
1171
1172	rc = posix_lock_file(file, flock, NULL);
1173	up_write(&cinode->lock_sem);
1174	return rc;
1175}
1176
1177int
1178cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1179{
1180	unsigned int xid;
1181	int rc = 0, stored_rc;
1182	struct cifsLockInfo *li, *tmp;
1183	struct cifs_tcon *tcon;
1184	unsigned int num, max_num, max_buf;
1185	LOCKING_ANDX_RANGE *buf, *cur;
1186	static const int types[] = {
1187		LOCKING_ANDX_LARGE_FILES,
1188		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1189	};
1190	int i;
1191
1192	xid = get_xid();
1193	tcon = tlink_tcon(cfile->tlink);
1194
1195	/*
1196	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1197	 * and check it before using.
1198	 */
1199	max_buf = tcon->ses->server->maxBuf;
1200	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1201		free_xid(xid);
1202		return -EINVAL;
1203	}
1204
1205	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1206		     PAGE_SIZE);
1207	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1208			PAGE_SIZE);
1209	max_num = (max_buf - sizeof(struct smb_hdr)) /
1210						sizeof(LOCKING_ANDX_RANGE);
1211	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1212	if (!buf) {
1213		free_xid(xid);
1214		return -ENOMEM;
1215	}
1216
1217	for (i = 0; i < 2; i++) {
1218		cur = buf;
1219		num = 0;
1220		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1221			if (li->type != types[i])
1222				continue;
1223			cur->Pid = cpu_to_le16(li->pid);
1224			cur->LengthLow = cpu_to_le32((u32)li->length);
1225			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1226			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1227			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1228			if (++num == max_num) {
1229				stored_rc = cifs_lockv(xid, tcon,
1230						       cfile->fid.netfid,
1231						       (__u8)li->type, 0, num,
1232						       buf);
1233				if (stored_rc)
1234					rc = stored_rc;
1235				cur = buf;
1236				num = 0;
1237			} else
1238				cur++;
1239		}
1240
1241		if (num) {
1242			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1243					       (__u8)types[i], 0, num, buf);
1244			if (stored_rc)
1245				rc = stored_rc;
1246		}
1247	}
1248
1249	kfree(buf);
1250	free_xid(xid);
1251	return rc;
1252}
1253
1254static __u32
1255hash_lockowner(fl_owner_t owner)
1256{
1257	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1258}
1259
1260struct lock_to_push {
1261	struct list_head llist;
1262	__u64 offset;
1263	__u64 length;
1264	__u32 pid;
1265	__u16 netfid;
1266	__u8 type;
1267};
1268
1269static int
1270cifs_push_posix_locks(struct cifsFileInfo *cfile)
1271{
1272	struct inode *inode = d_inode(cfile->dentry);
1273	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1274	struct file_lock *flock;
1275	struct file_lock_context *flctx = inode->i_flctx;
1276	unsigned int count = 0, i;
1277	int rc = 0, xid, type;
1278	struct list_head locks_to_send, *el;
1279	struct lock_to_push *lck, *tmp;
1280	__u64 length;
1281
1282	xid = get_xid();
1283
1284	if (!flctx)
1285		goto out;
1286
1287	spin_lock(&flctx->flc_lock);
1288	list_for_each(el, &flctx->flc_posix) {
1289		count++;
1290	}
1291	spin_unlock(&flctx->flc_lock);
1292
1293	INIT_LIST_HEAD(&locks_to_send);
1294
1295	/*
1296	 * Allocating count locks is enough because no FL_POSIX locks can be
1297	 * added to the list while we are holding cinode->lock_sem that
1298	 * protects locking operations of this inode.
1299	 */
1300	for (i = 0; i < count; i++) {
1301		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1302		if (!lck) {
1303			rc = -ENOMEM;
1304			goto err_out;
1305		}
1306		list_add_tail(&lck->llist, &locks_to_send);
1307	}
1308
1309	el = locks_to_send.next;
1310	spin_lock(&flctx->flc_lock);
1311	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1312		if (el == &locks_to_send) {
1313			/*
1314			 * The list ended. We don't have enough allocated
1315			 * structures - something is really wrong.
1316			 */
1317			cifs_dbg(VFS, "Can't push all brlocks!\n");
1318			break;
1319		}
1320		length = 1 + flock->fl_end - flock->fl_start;
1321		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1322			type = CIFS_RDLCK;
1323		else
1324			type = CIFS_WRLCK;
1325		lck = list_entry(el, struct lock_to_push, llist);
1326		lck->pid = hash_lockowner(flock->fl_owner);
1327		lck->netfid = cfile->fid.netfid;
1328		lck->length = length;
1329		lck->type = type;
1330		lck->offset = flock->fl_start;
1331	}
1332	spin_unlock(&flctx->flc_lock);
1333
1334	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1335		int stored_rc;
1336
1337		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1338					     lck->offset, lck->length, NULL,
1339					     lck->type, 0);
1340		if (stored_rc)
1341			rc = stored_rc;
1342		list_del(&lck->llist);
1343		kfree(lck);
1344	}
1345
1346out:
1347	free_xid(xid);
1348	return rc;
1349err_out:
1350	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1351		list_del(&lck->llist);
1352		kfree(lck);
1353	}
1354	goto out;
1355}
1356
1357static int
1358cifs_push_locks(struct cifsFileInfo *cfile)
1359{
1360	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1361	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1362	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1363	int rc = 0;
1364
1365	/* we are going to update can_cache_brlcks here - need a write access */
1366	cifs_down_write(&cinode->lock_sem);
1367	if (!cinode->can_cache_brlcks) {
1368		up_write(&cinode->lock_sem);
1369		return rc;
1370	}
1371
1372	if (cap_unix(tcon->ses) &&
1373	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1374	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1375		rc = cifs_push_posix_locks(cfile);
1376	else
1377		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1378
1379	cinode->can_cache_brlcks = false;
1380	up_write(&cinode->lock_sem);
1381	return rc;
1382}
1383
1384static void
1385cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1386		bool *wait_flag, struct TCP_Server_Info *server)
1387{
1388	if (flock->fl_flags & FL_POSIX)
1389		cifs_dbg(FYI, "Posix\n");
1390	if (flock->fl_flags & FL_FLOCK)
1391		cifs_dbg(FYI, "Flock\n");
1392	if (flock->fl_flags & FL_SLEEP) {
1393		cifs_dbg(FYI, "Blocking lock\n");
1394		*wait_flag = true;
1395	}
1396	if (flock->fl_flags & FL_ACCESS)
1397		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1398	if (flock->fl_flags & FL_LEASE)
1399		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1400	if (flock->fl_flags &
1401	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1402	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1403		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1404
1405	*type = server->vals->large_lock_type;
1406	if (flock->fl_type == F_WRLCK) {
1407		cifs_dbg(FYI, "F_WRLCK\n");
1408		*type |= server->vals->exclusive_lock_type;
1409		*lock = 1;
1410	} else if (flock->fl_type == F_UNLCK) {
1411		cifs_dbg(FYI, "F_UNLCK\n");
1412		*type |= server->vals->unlock_lock_type;
1413		*unlock = 1;
1414		/* Check if unlock includes more than one lock range */
1415	} else if (flock->fl_type == F_RDLCK) {
1416		cifs_dbg(FYI, "F_RDLCK\n");
1417		*type |= server->vals->shared_lock_type;
1418		*lock = 1;
1419	} else if (flock->fl_type == F_EXLCK) {
1420		cifs_dbg(FYI, "F_EXLCK\n");
1421		*type |= server->vals->exclusive_lock_type;
1422		*lock = 1;
1423	} else if (flock->fl_type == F_SHLCK) {
1424		cifs_dbg(FYI, "F_SHLCK\n");
1425		*type |= server->vals->shared_lock_type;
1426		*lock = 1;
1427	} else
1428		cifs_dbg(FYI, "Unknown type of lock\n");
1429}
1430
1431static int
1432cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1433	   bool wait_flag, bool posix_lck, unsigned int xid)
1434{
1435	int rc = 0;
1436	__u64 length = 1 + flock->fl_end - flock->fl_start;
1437	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1438	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1439	struct TCP_Server_Info *server = tcon->ses->server;
1440	__u16 netfid = cfile->fid.netfid;
1441
1442	if (posix_lck) {
1443		int posix_lock_type;
1444
1445		rc = cifs_posix_lock_test(file, flock);
1446		if (!rc)
1447			return rc;
1448
1449		if (type & server->vals->shared_lock_type)
1450			posix_lock_type = CIFS_RDLCK;
1451		else
1452			posix_lock_type = CIFS_WRLCK;
1453		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1454				      hash_lockowner(flock->fl_owner),
1455				      flock->fl_start, length, flock,
1456				      posix_lock_type, wait_flag);
1457		return rc;
1458	}
1459
1460	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1461	if (!rc)
1462		return rc;
1463
1464	/* BB we could chain these into one lock request BB */
1465	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1466				    1, 0, false);
1467	if (rc == 0) {
1468		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1469					    type, 0, 1, false);
1470		flock->fl_type = F_UNLCK;
1471		if (rc != 0)
1472			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1473				 rc);
1474		return 0;
1475	}
1476
1477	if (type & server->vals->shared_lock_type) {
1478		flock->fl_type = F_WRLCK;
1479		return 0;
1480	}
1481
1482	type &= ~server->vals->exclusive_lock_type;
1483
1484	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1485				    type | server->vals->shared_lock_type,
1486				    1, 0, false);
1487	if (rc == 0) {
1488		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1489			type | server->vals->shared_lock_type, 0, 1, false);
1490		flock->fl_type = F_RDLCK;
1491		if (rc != 0)
1492			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1493				 rc);
1494	} else
1495		flock->fl_type = F_WRLCK;
1496
1497	return 0;
1498}
1499
1500void
1501cifs_move_llist(struct list_head *source, struct list_head *dest)
1502{
1503	struct list_head *li, *tmp;
1504	list_for_each_safe(li, tmp, source)
1505		list_move(li, dest);
1506}
1507
1508void
1509cifs_free_llist(struct list_head *llist)
1510{
1511	struct cifsLockInfo *li, *tmp;
1512	list_for_each_entry_safe(li, tmp, llist, llist) {
1513		cifs_del_lock_waiters(li);
1514		list_del(&li->llist);
1515		kfree(li);
1516	}
1517}
1518
1519int
1520cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1521		  unsigned int xid)
1522{
1523	int rc = 0, stored_rc;
1524	static const int types[] = {
1525		LOCKING_ANDX_LARGE_FILES,
1526		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1527	};
1528	unsigned int i;
1529	unsigned int max_num, num, max_buf;
1530	LOCKING_ANDX_RANGE *buf, *cur;
1531	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1532	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1533	struct cifsLockInfo *li, *tmp;
1534	__u64 length = 1 + flock->fl_end - flock->fl_start;
1535	struct list_head tmp_llist;
1536
1537	INIT_LIST_HEAD(&tmp_llist);
1538
1539	/*
1540	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1541	 * and check it before using.
1542	 */
1543	max_buf = tcon->ses->server->maxBuf;
1544	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1545		return -EINVAL;
1546
1547	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1548		     PAGE_SIZE);
1549	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1550			PAGE_SIZE);
1551	max_num = (max_buf - sizeof(struct smb_hdr)) /
1552						sizeof(LOCKING_ANDX_RANGE);
1553	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1554	if (!buf)
1555		return -ENOMEM;
1556
1557	cifs_down_write(&cinode->lock_sem);
1558	for (i = 0; i < 2; i++) {
1559		cur = buf;
1560		num = 0;
1561		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1562			if (flock->fl_start > li->offset ||
1563			    (flock->fl_start + length) <
1564			    (li->offset + li->length))
1565				continue;
1566			if (current->tgid != li->pid)
1567				continue;
1568			if (types[i] != li->type)
1569				continue;
1570			if (cinode->can_cache_brlcks) {
1571				/*
1572				 * We can cache brlock requests - simply remove
1573				 * a lock from the file's list.
1574				 */
1575				list_del(&li->llist);
1576				cifs_del_lock_waiters(li);
1577				kfree(li);
1578				continue;
1579			}
1580			cur->Pid = cpu_to_le16(li->pid);
1581			cur->LengthLow = cpu_to_le32((u32)li->length);
1582			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1583			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1584			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1585			/*
1586			 * We need to save a lock here to let us add it again to
1587			 * the file's list if the unlock range request fails on
1588			 * the server.
1589			 */
1590			list_move(&li->llist, &tmp_llist);
1591			if (++num == max_num) {
1592				stored_rc = cifs_lockv(xid, tcon,
1593						       cfile->fid.netfid,
1594						       li->type, num, 0, buf);
1595				if (stored_rc) {
1596					/*
1597					 * We failed on the unlock range
1598					 * request - add all locks from the tmp
1599					 * list to the head of the file's list.
1600					 */
1601					cifs_move_llist(&tmp_llist,
1602							&cfile->llist->locks);
1603					rc = stored_rc;
1604				} else
1605					/*
1606					 * The unlock range request succeed -
1607					 * free the tmp list.
1608					 */
1609					cifs_free_llist(&tmp_llist);
1610				cur = buf;
1611				num = 0;
1612			} else
1613				cur++;
1614		}
1615		if (num) {
1616			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1617					       types[i], num, 0, buf);
1618			if (stored_rc) {
1619				cifs_move_llist(&tmp_llist,
1620						&cfile->llist->locks);
1621				rc = stored_rc;
1622			} else
1623				cifs_free_llist(&tmp_llist);
1624		}
1625	}
1626
1627	up_write(&cinode->lock_sem);
1628	kfree(buf);
1629	return rc;
1630}
1631
1632static int
1633cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1634	   bool wait_flag, bool posix_lck, int lock, int unlock,
1635	   unsigned int xid)
1636{
1637	int rc = 0;
1638	__u64 length = 1 + flock->fl_end - flock->fl_start;
1639	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1640	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1641	struct TCP_Server_Info *server = tcon->ses->server;
1642	struct inode *inode = d_inode(cfile->dentry);
1643
1644	if (posix_lck) {
1645		int posix_lock_type;
1646
1647		rc = cifs_posix_lock_set(file, flock);
1648		if (rc <= FILE_LOCK_DEFERRED)
1649			return rc;
1650
1651		if (type & server->vals->shared_lock_type)
1652			posix_lock_type = CIFS_RDLCK;
1653		else
1654			posix_lock_type = CIFS_WRLCK;
1655
1656		if (unlock == 1)
1657			posix_lock_type = CIFS_UNLCK;
1658
1659		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1660				      hash_lockowner(flock->fl_owner),
1661				      flock->fl_start, length,
1662				      NULL, posix_lock_type, wait_flag);
1663		goto out;
1664	}
1665
1666	if (lock) {
1667		struct cifsLockInfo *lock;
1668
1669		lock = cifs_lock_init(flock->fl_start, length, type,
1670				      flock->fl_flags);
1671		if (!lock)
1672			return -ENOMEM;
1673
1674		rc = cifs_lock_add_if(cfile, lock, wait_flag);
1675		if (rc < 0) {
1676			kfree(lock);
1677			return rc;
1678		}
1679		if (!rc)
1680			goto out;
1681
1682		/*
1683		 * Windows 7 server can delay breaking lease from read to None
1684		 * if we set a byte-range lock on a file - break it explicitly
1685		 * before sending the lock to the server to be sure the next
1686		 * read won't conflict with non-overlapted locks due to
1687		 * pagereading.
1688		 */
1689		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1690					CIFS_CACHE_READ(CIFS_I(inode))) {
1691			cifs_zap_mapping(inode);
1692			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1693				 inode);
1694			CIFS_I(inode)->oplock = 0;
1695		}
1696
1697		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1698					    type, 1, 0, wait_flag);
1699		if (rc) {
1700			kfree(lock);
1701			return rc;
1702		}
1703
1704		cifs_lock_add(cfile, lock);
1705	} else if (unlock)
1706		rc = server->ops->mand_unlock_range(cfile, flock, xid);
1707
1708out:
1709	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1710		/*
1711		 * If this is a request to remove all locks because we
1712		 * are closing the file, it doesn't matter if the
1713		 * unlocking failed as both cifs.ko and the SMB server
1714		 * remove the lock on file close
1715		 */
1716		if (rc) {
1717			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1718			if (!(flock->fl_flags & FL_CLOSE))
1719				return rc;
1720		}
1721		rc = locks_lock_file_wait(file, flock);
1722	}
1723	return rc;
1724}
1725
1726int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1727{
1728	int rc, xid;
1729	int lock = 0, unlock = 0;
1730	bool wait_flag = false;
1731	bool posix_lck = false;
1732	struct cifs_sb_info *cifs_sb;
1733	struct cifs_tcon *tcon;
1734	struct cifsFileInfo *cfile;
1735	__u32 type;
1736
1737	rc = -EACCES;
1738	xid = get_xid();
1739
1740	if (!(fl->fl_flags & FL_FLOCK))
1741		return -ENOLCK;
1742
1743	cfile = (struct cifsFileInfo *)file->private_data;
1744	tcon = tlink_tcon(cfile->tlink);
1745
1746	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1747			tcon->ses->server);
1748	cifs_sb = CIFS_FILE_SB(file);
1749
1750	if (cap_unix(tcon->ses) &&
1751	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1752	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1753		posix_lck = true;
1754
1755	if (!lock && !unlock) {
1756		/*
1757		 * if no lock or unlock then nothing to do since we do not
1758		 * know what it is
1759		 */
1760		free_xid(xid);
1761		return -EOPNOTSUPP;
1762	}
1763
1764	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1765			xid);
1766	free_xid(xid);
1767	return rc;
1768
1769
1770}
1771
1772int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1773{
1774	int rc, xid;
1775	int lock = 0, unlock = 0;
1776	bool wait_flag = false;
1777	bool posix_lck = false;
1778	struct cifs_sb_info *cifs_sb;
1779	struct cifs_tcon *tcon;
1780	struct cifsFileInfo *cfile;
1781	__u32 type;
1782
1783	rc = -EACCES;
1784	xid = get_xid();
1785
1786	cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1787		 cmd, flock->fl_flags, flock->fl_type,
1788		 flock->fl_start, flock->fl_end);
1789
1790	cfile = (struct cifsFileInfo *)file->private_data;
1791	tcon = tlink_tcon(cfile->tlink);
1792
1793	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1794			tcon->ses->server);
1795	cifs_sb = CIFS_FILE_SB(file);
1796
1797	if (cap_unix(tcon->ses) &&
1798	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1799	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1800		posix_lck = true;
1801	/*
1802	 * BB add code here to normalize offset and length to account for
1803	 * negative length which we can not accept over the wire.
1804	 */
1805	if (IS_GETLK(cmd)) {
1806		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1807		free_xid(xid);
1808		return rc;
1809	}
1810
1811	if (!lock && !unlock) {
1812		/*
1813		 * if no lock or unlock then nothing to do since we do not
1814		 * know what it is
1815		 */
1816		free_xid(xid);
1817		return -EOPNOTSUPP;
1818	}
1819
1820	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1821			xid);
1822	free_xid(xid);
1823	return rc;
1824}
1825
1826/*
1827 * update the file size (if needed) after a write. Should be called with
1828 * the inode->i_lock held
1829 */
1830void
1831cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1832		      unsigned int bytes_written)
1833{
1834	loff_t end_of_write = offset + bytes_written;
1835
1836	if (end_of_write > cifsi->server_eof)
1837		cifsi->server_eof = end_of_write;
1838}
1839
1840static ssize_t
1841cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1842	   size_t write_size, loff_t *offset)
1843{
1844	int rc = 0;
1845	unsigned int bytes_written = 0;
1846	unsigned int total_written;
1847	struct cifs_tcon *tcon;
1848	struct TCP_Server_Info *server;
1849	unsigned int xid;
1850	struct dentry *dentry = open_file->dentry;
1851	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1852	struct cifs_io_parms io_parms = {0};
1853
1854	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1855		 write_size, *offset, dentry);
1856
1857	tcon = tlink_tcon(open_file->tlink);
1858	server = tcon->ses->server;
1859
1860	if (!server->ops->sync_write)
1861		return -ENOSYS;
1862
1863	xid = get_xid();
1864
1865	for (total_written = 0; write_size > total_written;
1866	     total_written += bytes_written) {
1867		rc = -EAGAIN;
1868		while (rc == -EAGAIN) {
1869			struct kvec iov[2];
1870			unsigned int len;
1871
1872			if (open_file->invalidHandle) {
1873				/* we could deadlock if we called
1874				   filemap_fdatawait from here so tell
1875				   reopen_file not to flush data to
1876				   server now */
1877				rc = cifs_reopen_file(open_file, false);
1878				if (rc != 0)
1879					break;
1880			}
1881
1882			len = min(server->ops->wp_retry_size(d_inode(dentry)),
1883				  (unsigned int)write_size - total_written);
1884			/* iov[0] is reserved for smb header */
1885			iov[1].iov_base = (char *)write_data + total_written;
1886			iov[1].iov_len = len;
1887			io_parms.pid = pid;
1888			io_parms.tcon = tcon;
1889			io_parms.offset = *offset;
1890			io_parms.length = len;
1891			rc = server->ops->sync_write(xid, &open_file->fid,
1892					&io_parms, &bytes_written, iov, 1);
1893		}
1894		if (rc || (bytes_written == 0)) {
1895			if (total_written)
1896				break;
1897			else {
1898				free_xid(xid);
1899				return rc;
1900			}
1901		} else {
1902			spin_lock(&d_inode(dentry)->i_lock);
1903			cifs_update_eof(cifsi, *offset, bytes_written);
1904			spin_unlock(&d_inode(dentry)->i_lock);
1905			*offset += bytes_written;
1906		}
1907	}
1908
1909	cifs_stats_bytes_written(tcon, total_written);
1910
1911	if (total_written > 0) {
1912		spin_lock(&d_inode(dentry)->i_lock);
1913		if (*offset > d_inode(dentry)->i_size)
1914			i_size_write(d_inode(dentry), *offset);
1915		spin_unlock(&d_inode(dentry)->i_lock);
1916	}
1917	mark_inode_dirty_sync(d_inode(dentry));
1918	free_xid(xid);
1919	return total_written;
1920}
1921
1922struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1923					bool fsuid_only)
1924{
1925	struct cifsFileInfo *open_file = NULL;
1926	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1927
1928	/* only filter by fsuid on multiuser mounts */
1929	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1930		fsuid_only = false;
1931
1932	spin_lock(&cifs_inode->open_file_lock);
1933	/* we could simply get the first_list_entry since write-only entries
1934	   are always at the end of the list but since the first entry might
1935	   have a close pending, we go through the whole list */
1936	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1937		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1938			continue;
1939		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1940			if (!open_file->invalidHandle) {
1941				/* found a good file */
1942				/* lock it so it will not be closed on us */
1943				cifsFileInfo_get(open_file);
1944				spin_unlock(&cifs_inode->open_file_lock);
1945				return open_file;
1946			} /* else might as well continue, and look for
1947			     another, or simply have the caller reopen it
1948			     again rather than trying to fix this handle */
1949		} else /* write only file */
1950			break; /* write only files are last so must be done */
1951	}
1952	spin_unlock(&cifs_inode->open_file_lock);
1953	return NULL;
1954}
1955
1956/* Return -EBADF if no handle is found and general rc otherwise */
1957int
1958cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1959		       struct cifsFileInfo **ret_file)
1960{
1961	struct cifsFileInfo *open_file, *inv_file = NULL;
1962	struct cifs_sb_info *cifs_sb;
1963	bool any_available = false;
1964	int rc = -EBADF;
1965	unsigned int refind = 0;
1966	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1967	bool with_delete = flags & FIND_WR_WITH_DELETE;
1968	*ret_file = NULL;
1969
1970	/*
1971	 * Having a null inode here (because mapping->host was set to zero by
1972	 * the VFS or MM) should not happen but we had reports of on oops (due
1973	 * to it being zero) during stress testcases so we need to check for it
1974	 */
1975
1976	if (cifs_inode == NULL) {
1977		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1978		dump_stack();
1979		return rc;
1980	}
1981
1982	cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1983
1984	/* only filter by fsuid on multiuser mounts */
1985	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1986		fsuid_only = false;
1987
1988	spin_lock(&cifs_inode->open_file_lock);
1989refind_writable:
1990	if (refind > MAX_REOPEN_ATT) {
1991		spin_unlock(&cifs_inode->open_file_lock);
1992		return rc;
1993	}
1994	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1995		if (!any_available && open_file->pid != current->tgid)
1996			continue;
1997		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1998			continue;
1999		if (with_delete && !(open_file->fid.access & DELETE))
2000			continue;
2001		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2002			if (!open_file->invalidHandle) {
2003				/* found a good writable file */
2004				cifsFileInfo_get(open_file);
2005				spin_unlock(&cifs_inode->open_file_lock);
2006				*ret_file = open_file;
2007				return 0;
2008			} else {
2009				if (!inv_file)
2010					inv_file = open_file;
2011			}
2012		}
2013	}
2014	/* couldn't find useable FH with same pid, try any available */
2015	if (!any_available) {
2016		any_available = true;
2017		goto refind_writable;
2018	}
2019
2020	if (inv_file) {
2021		any_available = false;
2022		cifsFileInfo_get(inv_file);
2023	}
2024
2025	spin_unlock(&cifs_inode->open_file_lock);
2026
2027	if (inv_file) {
2028		rc = cifs_reopen_file(inv_file, false);
2029		if (!rc) {
2030			*ret_file = inv_file;
2031			return 0;
2032		}
2033
2034		spin_lock(&cifs_inode->open_file_lock);
2035		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2036		spin_unlock(&cifs_inode->open_file_lock);
2037		cifsFileInfo_put(inv_file);
2038		++refind;
2039		inv_file = NULL;
2040		spin_lock(&cifs_inode->open_file_lock);
2041		goto refind_writable;
2042	}
2043
2044	return rc;
2045}
2046
2047struct cifsFileInfo *
2048find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2049{
2050	struct cifsFileInfo *cfile;
2051	int rc;
2052
2053	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2054	if (rc)
2055		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2056
2057	return cfile;
2058}
2059
2060int
2061cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2062		       int flags,
2063		       struct cifsFileInfo **ret_file)
2064{
2065	struct list_head *tmp;
2066	struct cifsFileInfo *cfile;
2067	struct cifsInodeInfo *cinode;
2068	char *full_path;
2069
2070	*ret_file = NULL;
2071
2072	spin_lock(&tcon->open_file_lock);
2073	list_for_each(tmp, &tcon->openFileList) {
2074		cfile = list_entry(tmp, struct cifsFileInfo,
2075			     tlist);
2076		full_path = build_path_from_dentry(cfile->dentry);
2077		if (full_path == NULL) {
2078			spin_unlock(&tcon->open_file_lock);
2079			return -ENOMEM;
2080		}
2081		if (strcmp(full_path, name)) {
2082			kfree(full_path);
2083			continue;
2084		}
2085
2086		kfree(full_path);
2087		cinode = CIFS_I(d_inode(cfile->dentry));
2088		spin_unlock(&tcon->open_file_lock);
2089		return cifs_get_writable_file(cinode, flags, ret_file);
2090	}
2091
2092	spin_unlock(&tcon->open_file_lock);
2093	return -ENOENT;
2094}
2095
2096int
2097cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2098		       struct cifsFileInfo **ret_file)
2099{
2100	struct list_head *tmp;
2101	struct cifsFileInfo *cfile;
2102	struct cifsInodeInfo *cinode;
2103	char *full_path;
2104
2105	*ret_file = NULL;
2106
2107	spin_lock(&tcon->open_file_lock);
2108	list_for_each(tmp, &tcon->openFileList) {
2109		cfile = list_entry(tmp, struct cifsFileInfo,
2110			     tlist);
2111		full_path = build_path_from_dentry(cfile->dentry);
2112		if (full_path == NULL) {
2113			spin_unlock(&tcon->open_file_lock);
2114			return -ENOMEM;
2115		}
2116		if (strcmp(full_path, name)) {
2117			kfree(full_path);
2118			continue;
2119		}
2120
2121		kfree(full_path);
2122		cinode = CIFS_I(d_inode(cfile->dentry));
2123		spin_unlock(&tcon->open_file_lock);
2124		*ret_file = find_readable_file(cinode, 0);
2125		return *ret_file ? 0 : -ENOENT;
2126	}
2127
2128	spin_unlock(&tcon->open_file_lock);
2129	return -ENOENT;
2130}
2131
2132static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2133{
2134	struct address_space *mapping = page->mapping;
2135	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2136	char *write_data;
2137	int rc = -EFAULT;
2138	int bytes_written = 0;
2139	struct inode *inode;
2140	struct cifsFileInfo *open_file;
2141
2142	if (!mapping || !mapping->host)
2143		return -EFAULT;
2144
2145	inode = page->mapping->host;
2146
2147	offset += (loff_t)from;
2148	write_data = kmap(page);
2149	write_data += from;
2150
2151	if ((to > PAGE_SIZE) || (from > to)) {
2152		kunmap(page);
2153		return -EIO;
2154	}
2155
2156	/* racing with truncate? */
2157	if (offset > mapping->host->i_size) {
2158		kunmap(page);
2159		return 0; /* don't care */
2160	}
2161
2162	/* check to make sure that we are not extending the file */
2163	if (mapping->host->i_size - offset < (loff_t)to)
2164		to = (unsigned)(mapping->host->i_size - offset);
2165
2166	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2167				    &open_file);
2168	if (!rc) {
2169		bytes_written = cifs_write(open_file, open_file->pid,
2170					   write_data, to - from, &offset);
2171		cifsFileInfo_put(open_file);
2172		/* Does mm or vfs already set times? */
2173		inode->i_atime = inode->i_mtime = current_time(inode);
2174		if ((bytes_written > 0) && (offset))
2175			rc = 0;
2176		else if (bytes_written < 0)
2177			rc = bytes_written;
2178		else
2179			rc = -EFAULT;
2180	} else {
2181		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2182		if (!is_retryable_error(rc))
2183			rc = -EIO;
2184	}
2185
2186	kunmap(page);
2187	return rc;
2188}
2189
2190static struct cifs_writedata *
2191wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2192			  pgoff_t end, pgoff_t *index,
2193			  unsigned int *found_pages)
2194{
2195	struct cifs_writedata *wdata;
2196
2197	wdata = cifs_writedata_alloc((unsigned int)tofind,
2198				     cifs_writev_complete);
2199	if (!wdata)
2200		return NULL;
2201
2202	*found_pages = find_get_pages_range_tag(mapping, index, end,
2203				PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2204	return wdata;
2205}
2206
2207static unsigned int
2208wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2209		    struct address_space *mapping,
2210		    struct writeback_control *wbc,
2211		    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2212{
2213	unsigned int nr_pages = 0, i;
2214	struct page *page;
2215
2216	for (i = 0; i < found_pages; i++) {
2217		page = wdata->pages[i];
2218		/*
2219		 * At this point we hold neither the i_pages lock nor the
2220		 * page lock: the page may be truncated or invalidated
2221		 * (changing page->mapping to NULL), or even swizzled
2222		 * back from swapper_space to tmpfs file mapping
2223		 */
2224
2225		if (nr_pages == 0)
2226			lock_page(page);
2227		else if (!trylock_page(page))
2228			break;
2229
2230		if (unlikely(page->mapping != mapping)) {
2231			unlock_page(page);
2232			break;
2233		}
2234
2235		if (!wbc->range_cyclic && page->index > end) {
2236			*done = true;
2237			unlock_page(page);
2238			break;
2239		}
2240
2241		if (*next && (page->index != *next)) {
2242			/* Not next consecutive page */
2243			unlock_page(page);
2244			break;
2245		}
2246
2247		if (wbc->sync_mode != WB_SYNC_NONE)
2248			wait_on_page_writeback(page);
2249
2250		if (PageWriteback(page) ||
2251				!clear_page_dirty_for_io(page)) {
2252			unlock_page(page);
2253			break;
2254		}
2255
2256		/*
2257		 * This actually clears the dirty bit in the radix tree.
2258		 * See cifs_writepage() for more commentary.
2259		 */
2260		set_page_writeback(page);
2261		if (page_offset(page) >= i_size_read(mapping->host)) {
2262			*done = true;
2263			unlock_page(page);
2264			end_page_writeback(page);
2265			break;
2266		}
2267
2268		wdata->pages[i] = page;
2269		*next = page->index + 1;
2270		++nr_pages;
2271	}
2272
2273	/* reset index to refind any pages skipped */
2274	if (nr_pages == 0)
2275		*index = wdata->pages[0]->index + 1;
2276
2277	/* put any pages we aren't going to use */
2278	for (i = nr_pages; i < found_pages; i++) {
2279		put_page(wdata->pages[i]);
2280		wdata->pages[i] = NULL;
2281	}
2282
2283	return nr_pages;
2284}
2285
2286static int
2287wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2288		 struct address_space *mapping, struct writeback_control *wbc)
2289{
2290	int rc;
2291
2292	wdata->sync_mode = wbc->sync_mode;
2293	wdata->nr_pages = nr_pages;
2294	wdata->offset = page_offset(wdata->pages[0]);
2295	wdata->pagesz = PAGE_SIZE;
2296	wdata->tailsz = min(i_size_read(mapping->host) -
2297			page_offset(wdata->pages[nr_pages - 1]),
2298			(loff_t)PAGE_SIZE);
2299	wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2300	wdata->pid = wdata->cfile->pid;
2301
2302	rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2303	if (rc)
2304		return rc;
2305
2306	if (wdata->cfile->invalidHandle)
2307		rc = -EAGAIN;
2308	else
2309		rc = wdata->server->ops->async_writev(wdata,
2310						      cifs_writedata_release);
2311
2312	return rc;
2313}
2314
2315static int cifs_writepages(struct address_space *mapping,
2316			   struct writeback_control *wbc)
2317{
2318	struct inode *inode = mapping->host;
2319	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2320	struct TCP_Server_Info *server;
2321	bool done = false, scanned = false, range_whole = false;
2322	pgoff_t end, index;
2323	struct cifs_writedata *wdata;
2324	struct cifsFileInfo *cfile = NULL;
2325	int rc = 0;
2326	int saved_rc = 0;
2327	unsigned int xid;
2328
2329	/*
2330	 * If wsize is smaller than the page cache size, default to writing
2331	 * one page at a time via cifs_writepage
2332	 */
2333	if (cifs_sb->wsize < PAGE_SIZE)
2334		return generic_writepages(mapping, wbc);
2335
2336	xid = get_xid();
2337	if (wbc->range_cyclic) {
2338		index = mapping->writeback_index; /* Start from prev offset */
2339		end = -1;
2340	} else {
2341		index = wbc->range_start >> PAGE_SHIFT;
2342		end = wbc->range_end >> PAGE_SHIFT;
2343		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2344			range_whole = true;
2345		scanned = true;
2346	}
2347	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2348
2349retry:
2350	while (!done && index <= end) {
2351		unsigned int i, nr_pages, found_pages, wsize;
2352		pgoff_t next = 0, tofind, saved_index = index;
2353		struct cifs_credits credits_on_stack;
2354		struct cifs_credits *credits = &credits_on_stack;
2355		int get_file_rc = 0;
2356
2357		if (cfile)
2358			cifsFileInfo_put(cfile);
2359
2360		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2361
2362		/* in case of an error store it to return later */
2363		if (rc)
2364			get_file_rc = rc;
2365
2366		rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2367						   &wsize, credits);
2368		if (rc != 0) {
2369			done = true;
2370			break;
2371		}
2372
2373		tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2374
2375		wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2376						  &found_pages);
2377		if (!wdata) {
2378			rc = -ENOMEM;
2379			done = true;
2380			add_credits_and_wake_if(server, credits, 0);
2381			break;
2382		}
2383
2384		if (found_pages == 0) {
2385			kref_put(&wdata->refcount, cifs_writedata_release);
2386			add_credits_and_wake_if(server, credits, 0);
2387			break;
2388		}
2389
2390		nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2391					       end, &index, &next, &done);
2392
2393		/* nothing to write? */
2394		if (nr_pages == 0) {
2395			kref_put(&wdata->refcount, cifs_writedata_release);
2396			add_credits_and_wake_if(server, credits, 0);
2397			continue;
2398		}
2399
2400		wdata->credits = credits_on_stack;
2401		wdata->cfile = cfile;
2402		wdata->server = server;
2403		cfile = NULL;
2404
2405		if (!wdata->cfile) {
2406			cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2407				 get_file_rc);
2408			if (is_retryable_error(get_file_rc))
2409				rc = get_file_rc;
2410			else
2411				rc = -EBADF;
2412		} else
2413			rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2414
2415		for (i = 0; i < nr_pages; ++i)
2416			unlock_page(wdata->pages[i]);
2417
2418		/* send failure -- clean up the mess */
2419		if (rc != 0) {
2420			add_credits_and_wake_if(server, &wdata->credits, 0);
2421			for (i = 0; i < nr_pages; ++i) {
2422				if (is_retryable_error(rc))
2423					redirty_page_for_writepage(wbc,
2424							   wdata->pages[i]);
2425				else
2426					SetPageError(wdata->pages[i]);
2427				end_page_writeback(wdata->pages[i]);
2428				put_page(wdata->pages[i]);
2429			}
2430			if (!is_retryable_error(rc))
2431				mapping_set_error(mapping, rc);
2432		}
2433		kref_put(&wdata->refcount, cifs_writedata_release);
2434
2435		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2436			index = saved_index;
2437			continue;
2438		}
2439
2440		/* Return immediately if we received a signal during writing */
2441		if (is_interrupt_error(rc)) {
2442			done = true;
2443			break;
2444		}
2445
2446		if (rc != 0 && saved_rc == 0)
2447			saved_rc = rc;
2448
2449		wbc->nr_to_write -= nr_pages;
2450		if (wbc->nr_to_write <= 0)
2451			done = true;
2452
2453		index = next;
2454	}
2455
2456	if (!scanned && !done) {
2457		/*
2458		 * We hit the last page and there is more work to be done: wrap
2459		 * back to the start of the file
2460		 */
2461		scanned = true;
2462		index = 0;
2463		goto retry;
2464	}
2465
2466	if (saved_rc != 0)
2467		rc = saved_rc;
2468
2469	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2470		mapping->writeback_index = index;
2471
2472	if (cfile)
2473		cifsFileInfo_put(cfile);
2474	free_xid(xid);
2475	return rc;
2476}
2477
2478static int
2479cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2480{
2481	int rc;
2482	unsigned int xid;
2483
2484	xid = get_xid();
2485/* BB add check for wbc flags */
2486	get_page(page);
2487	if (!PageUptodate(page))
2488		cifs_dbg(FYI, "ppw - page not up to date\n");
2489
2490	/*
2491	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2492	 *
2493	 * A writepage() implementation always needs to do either this,
2494	 * or re-dirty the page with "redirty_page_for_writepage()" in
2495	 * the case of a failure.
2496	 *
2497	 * Just unlocking the page will cause the radix tree tag-bits
2498	 * to fail to update with the state of the page correctly.
2499	 */
2500	set_page_writeback(page);
2501retry_write:
2502	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2503	if (is_retryable_error(rc)) {
2504		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2505			goto retry_write;
2506		redirty_page_for_writepage(wbc, page);
2507	} else if (rc != 0) {
2508		SetPageError(page);
2509		mapping_set_error(page->mapping, rc);
2510	} else {
2511		SetPageUptodate(page);
2512	}
2513	end_page_writeback(page);
2514	put_page(page);
2515	free_xid(xid);
2516	return rc;
2517}
2518
2519static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2520{
2521	int rc = cifs_writepage_locked(page, wbc);
2522	unlock_page(page);
2523	return rc;
2524}
2525
2526static int cifs_write_end(struct file *file, struct address_space *mapping,
2527			loff_t pos, unsigned len, unsigned copied,
2528			struct page *page, void *fsdata)
2529{
2530	int rc;
2531	struct inode *inode = mapping->host;
2532	struct cifsFileInfo *cfile = file->private_data;
2533	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2534	__u32 pid;
2535
2536	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2537		pid = cfile->pid;
2538	else
2539		pid = current->tgid;
2540
2541	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2542		 page, pos, copied);
2543
2544	if (PageChecked(page)) {
2545		if (copied == len)
2546			SetPageUptodate(page);
2547		ClearPageChecked(page);
2548	} else if (!PageUptodate(page) && copied == PAGE_SIZE)
2549		SetPageUptodate(page);
2550
2551	if (!PageUptodate(page)) {
2552		char *page_data;
2553		unsigned offset = pos & (PAGE_SIZE - 1);
2554		unsigned int xid;
2555
2556		xid = get_xid();
2557		/* this is probably better than directly calling
2558		   partialpage_write since in this function the file handle is
2559		   known which we might as well	leverage */
2560		/* BB check if anything else missing out of ppw
2561		   such as updating last write time */
2562		page_data = kmap(page);
2563		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2564		/* if (rc < 0) should we set writebehind rc? */
2565		kunmap(page);
2566
2567		free_xid(xid);
2568	} else {
2569		rc = copied;
2570		pos += copied;
2571		set_page_dirty(page);
2572	}
2573
2574	if (rc > 0) {
2575		spin_lock(&inode->i_lock);
2576		if (pos > inode->i_size)
2577			i_size_write(inode, pos);
2578		spin_unlock(&inode->i_lock);
2579	}
2580
2581	unlock_page(page);
2582	put_page(page);
2583
2584	return rc;
2585}
2586
2587int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2588		      int datasync)
2589{
2590	unsigned int xid;
2591	int rc = 0;
2592	struct cifs_tcon *tcon;
2593	struct TCP_Server_Info *server;
2594	struct cifsFileInfo *smbfile = file->private_data;
2595	struct inode *inode = file_inode(file);
2596	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2597
2598	rc = file_write_and_wait_range(file, start, end);
2599	if (rc) {
2600		trace_cifs_fsync_err(inode->i_ino, rc);
2601		return rc;
2602	}
2603
2604	xid = get_xid();
2605
2606	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2607		 file, datasync);
2608
2609	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2610		rc = cifs_zap_mapping(inode);
2611		if (rc) {
2612			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2613			rc = 0; /* don't care about it in fsync */
2614		}
2615	}
2616
2617	tcon = tlink_tcon(smbfile->tlink);
2618	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2619		server = tcon->ses->server;
2620		if (server->ops->flush)
2621			rc = server->ops->flush(xid, tcon, &smbfile->fid);
2622		else
2623			rc = -ENOSYS;
2624	}
2625
2626	free_xid(xid);
2627	return rc;
2628}
2629
2630int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2631{
2632	unsigned int xid;
2633	int rc = 0;
2634	struct cifs_tcon *tcon;
2635	struct TCP_Server_Info *server;
2636	struct cifsFileInfo *smbfile = file->private_data;
2637	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2638
2639	rc = file_write_and_wait_range(file, start, end);
2640	if (rc) {
2641		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2642		return rc;
2643	}
2644
2645	xid = get_xid();
2646
2647	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2648		 file, datasync);
2649
2650	tcon = tlink_tcon(smbfile->tlink);
2651	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2652		server = tcon->ses->server;
2653		if (server->ops->flush)
2654			rc = server->ops->flush(xid, tcon, &smbfile->fid);
2655		else
2656			rc = -ENOSYS;
2657	}
2658
2659	free_xid(xid);
2660	return rc;
2661}
2662
2663/*
2664 * As file closes, flush all cached write data for this inode checking
2665 * for write behind errors.
2666 */
2667int cifs_flush(struct file *file, fl_owner_t id)
2668{
2669	struct inode *inode = file_inode(file);
2670	int rc = 0;
2671
2672	if (file->f_mode & FMODE_WRITE)
2673		rc = filemap_write_and_wait(inode->i_mapping);
2674
2675	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2676	if (rc)
2677		trace_cifs_flush_err(inode->i_ino, rc);
2678	return rc;
2679}
2680
2681static int
2682cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2683{
2684	int rc = 0;
2685	unsigned long i;
2686
2687	for (i = 0; i < num_pages; i++) {
2688		pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2689		if (!pages[i]) {
2690			/*
2691			 * save number of pages we have already allocated and
2692			 * return with ENOMEM error
2693			 */
2694			num_pages = i;
2695			rc = -ENOMEM;
2696			break;
2697		}
2698	}
2699
2700	if (rc) {
2701		for (i = 0; i < num_pages; i++)
2702			put_page(pages[i]);
2703	}
2704	return rc;
2705}
2706
2707static inline
2708size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2709{
2710	size_t num_pages;
2711	size_t clen;
2712
2713	clen = min_t(const size_t, len, wsize);
2714	num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2715
2716	if (cur_len)
2717		*cur_len = clen;
2718
2719	return num_pages;
2720}
2721
2722static void
2723cifs_uncached_writedata_release(struct kref *refcount)
2724{
2725	int i;
2726	struct cifs_writedata *wdata = container_of(refcount,
2727					struct cifs_writedata, refcount);
2728
2729	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2730	for (i = 0; i < wdata->nr_pages; i++)
2731		put_page(wdata->pages[i]);
2732	cifs_writedata_release(refcount);
2733}
2734
2735static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2736
2737static void
2738cifs_uncached_writev_complete(struct work_struct *work)
2739{
2740	struct cifs_writedata *wdata = container_of(work,
2741					struct cifs_writedata, work);
2742	struct inode *inode = d_inode(wdata->cfile->dentry);
2743	struct cifsInodeInfo *cifsi = CIFS_I(inode);
2744
2745	spin_lock(&inode->i_lock);
2746	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2747	if (cifsi->server_eof > inode->i_size)
2748		i_size_write(inode, cifsi->server_eof);
2749	spin_unlock(&inode->i_lock);
2750
2751	complete(&wdata->done);
2752	collect_uncached_write_data(wdata->ctx);
2753	/* the below call can possibly free the last ref to aio ctx */
2754	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2755}
2756
2757static int
2758wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2759		      size_t *len, unsigned long *num_pages)
2760{
2761	size_t save_len, copied, bytes, cur_len = *len;
2762	unsigned long i, nr_pages = *num_pages;
2763
2764	save_len = cur_len;
2765	for (i = 0; i < nr_pages; i++) {
2766		bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2767		copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2768		cur_len -= copied;
2769		/*
2770		 * If we didn't copy as much as we expected, then that
2771		 * may mean we trod into an unmapped area. Stop copying
2772		 * at that point. On the next pass through the big
2773		 * loop, we'll likely end up getting a zero-length
2774		 * write and bailing out of it.
2775		 */
2776		if (copied < bytes)
2777			break;
2778	}
2779	cur_len = save_len - cur_len;
2780	*len = cur_len;
2781
2782	/*
2783	 * If we have no data to send, then that probably means that
2784	 * the copy above failed altogether. That's most likely because
2785	 * the address in the iovec was bogus. Return -EFAULT and let
2786	 * the caller free anything we allocated and bail out.
2787	 */
2788	if (!cur_len)
2789		return -EFAULT;
2790
2791	/*
2792	 * i + 1 now represents the number of pages we actually used in
2793	 * the copy phase above.
2794	 */
2795	*num_pages = i + 1;
2796	return 0;
2797}
2798
2799static int
2800cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2801	struct cifs_aio_ctx *ctx)
2802{
2803	unsigned int wsize;
2804	struct cifs_credits credits;
2805	int rc;
2806	struct TCP_Server_Info *server = wdata->server;
2807
2808	do {
2809		if (wdata->cfile->invalidHandle) {
2810			rc = cifs_reopen_file(wdata->cfile, false);
2811			if (rc == -EAGAIN)
2812				continue;
2813			else if (rc)
2814				break;
2815		}
2816
2817
2818		/*
2819		 * Wait for credits to resend this wdata.
2820		 * Note: we are attempting to resend the whole wdata not in
2821		 * segments
2822		 */
2823		do {
2824			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2825						&wsize, &credits);
2826			if (rc)
2827				goto fail;
2828
2829			if (wsize < wdata->bytes) {
2830				add_credits_and_wake_if(server, &credits, 0);
2831				msleep(1000);
2832			}
2833		} while (wsize < wdata->bytes);
2834		wdata->credits = credits;
2835
2836		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2837
2838		if (!rc) {
2839			if (wdata->cfile->invalidHandle)
2840				rc = -EAGAIN;
2841			else {
2842#ifdef CONFIG_CIFS_SMB_DIRECT
2843				if (wdata->mr) {
2844					wdata->mr->need_invalidate = true;
2845					smbd_deregister_mr(wdata->mr);
2846					wdata->mr = NULL;
2847				}
2848#endif
2849				rc = server->ops->async_writev(wdata,
2850					cifs_uncached_writedata_release);
2851			}
2852		}
2853
2854		/* If the write was successfully sent, we are done */
2855		if (!rc) {
2856			list_add_tail(&wdata->list, wdata_list);
2857			return 0;
2858		}
2859
2860		/* Roll back credits and retry if needed */
2861		add_credits_and_wake_if(server, &wdata->credits, 0);
2862	} while (rc == -EAGAIN);
2863
2864fail:
2865	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2866	return rc;
2867}
2868
2869static int
2870cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2871		     struct cifsFileInfo *open_file,
2872		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2873		     struct cifs_aio_ctx *ctx)
2874{
2875	int rc = 0;
2876	size_t cur_len;
2877	unsigned long nr_pages, num_pages, i;
2878	struct cifs_writedata *wdata;
2879	struct iov_iter saved_from = *from;
2880	loff_t saved_offset = offset;
2881	pid_t pid;
2882	struct TCP_Server_Info *server;
2883	struct page **pagevec;
2884	size_t start;
2885	unsigned int xid;
2886
2887	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2888		pid = open_file->pid;
2889	else
2890		pid = current->tgid;
2891
2892	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2893	xid = get_xid();
2894
2895	do {
2896		unsigned int wsize;
2897		struct cifs_credits credits_on_stack;
2898		struct cifs_credits *credits = &credits_on_stack;
2899
2900		if (open_file->invalidHandle) {
2901			rc = cifs_reopen_file(open_file, false);
2902			if (rc == -EAGAIN)
2903				continue;
2904			else if (rc)
2905				break;
2906		}
2907
2908		rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2909						   &wsize, credits);
2910		if (rc)
2911			break;
2912
2913		cur_len = min_t(const size_t, len, wsize);
2914
2915		if (ctx->direct_io) {
2916			ssize_t result;
2917
2918			result = iov_iter_get_pages_alloc(
2919				from, &pagevec, cur_len, &start);
2920			if (result < 0) {
2921				cifs_dbg(VFS,
2922					 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2923					 result, iov_iter_type(from),
2924					 from->iov_offset, from->count);
2925				dump_stack();
2926
2927				rc = result;
2928				add_credits_and_wake_if(server, credits, 0);
2929				break;
2930			}
2931			cur_len = (size_t)result;
2932			iov_iter_advance(from, cur_len);
2933
2934			nr_pages =
2935				(cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2936
2937			wdata = cifs_writedata_direct_alloc(pagevec,
2938					     cifs_uncached_writev_complete);
2939			if (!wdata) {
2940				rc = -ENOMEM;
2941				add_credits_and_wake_if(server, credits, 0);
2942				break;
2943			}
2944
2945
2946			wdata->page_offset = start;
2947			wdata->tailsz =
2948				nr_pages > 1 ?
2949					cur_len - (PAGE_SIZE - start) -
2950					(nr_pages - 2) * PAGE_SIZE :
2951					cur_len;
2952		} else {
2953			nr_pages = get_numpages(wsize, len, &cur_len);
2954			wdata = cifs_writedata_alloc(nr_pages,
2955					     cifs_uncached_writev_complete);
2956			if (!wdata) {
2957				rc = -ENOMEM;
2958				add_credits_and_wake_if(server, credits, 0);
2959				break;
2960			}
2961
2962			rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2963			if (rc) {
2964				kvfree(wdata->pages);
2965				kfree(wdata);
2966				add_credits_and_wake_if(server, credits, 0);
2967				break;
2968			}
2969
2970			num_pages = nr_pages;
2971			rc = wdata_fill_from_iovec(
2972				wdata, from, &cur_len, &num_pages);
2973			if (rc) {
2974				for (i = 0; i < nr_pages; i++)
2975					put_page(wdata->pages[i]);
2976				kvfree(wdata->pages);
2977				kfree(wdata);
2978				add_credits_and_wake_if(server, credits, 0);
2979				break;
2980			}
2981
2982			/*
2983			 * Bring nr_pages down to the number of pages we
2984			 * actually used, and free any pages that we didn't use.
2985			 */
2986			for ( ; nr_pages > num_pages; nr_pages--)
2987				put_page(wdata->pages[nr_pages - 1]);
2988
2989			wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2990		}
2991
2992		wdata->sync_mode = WB_SYNC_ALL;
2993		wdata->nr_pages = nr_pages;
2994		wdata->offset = (__u64)offset;
2995		wdata->cfile = cifsFileInfo_get(open_file);
2996		wdata->server = server;
2997		wdata->pid = pid;
2998		wdata->bytes = cur_len;
2999		wdata->pagesz = PAGE_SIZE;
3000		wdata->credits = credits_on_stack;
3001		wdata->ctx = ctx;
3002		kref_get(&ctx->refcount);
3003
3004		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3005
3006		if (!rc) {
3007			if (wdata->cfile->invalidHandle)
3008				rc = -EAGAIN;
3009			else
3010				rc = server->ops->async_writev(wdata,
3011					cifs_uncached_writedata_release);
3012		}
3013
3014		if (rc) {
3015			add_credits_and_wake_if(server, &wdata->credits, 0);
3016			kref_put(&wdata->refcount,
3017				 cifs_uncached_writedata_release);
3018			if (rc == -EAGAIN) {
3019				*from = saved_from;
3020				iov_iter_advance(from, offset - saved_offset);
3021				continue;
3022			}
3023			break;
3024		}
3025
3026		list_add_tail(&wdata->list, wdata_list);
3027		offset += cur_len;
3028		len -= cur_len;
3029	} while (len > 0);
3030
3031	free_xid(xid);
3032	return rc;
3033}
3034
3035static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3036{
3037	struct cifs_writedata *wdata, *tmp;
3038	struct cifs_tcon *tcon;
3039	struct cifs_sb_info *cifs_sb;
3040	struct dentry *dentry = ctx->cfile->dentry;
3041	int rc;
3042
3043	tcon = tlink_tcon(ctx->cfile->tlink);
3044	cifs_sb = CIFS_SB(dentry->d_sb);
3045
3046	mutex_lock(&ctx->aio_mutex);
3047
3048	if (list_empty(&ctx->list)) {
3049		mutex_unlock(&ctx->aio_mutex);
3050		return;
3051	}
3052
3053	rc = ctx->rc;
3054	/*
3055	 * Wait for and collect replies for any successful sends in order of
3056	 * increasing offset. Once an error is hit, then return without waiting
3057	 * for any more replies.
3058	 */
3059restart_loop:
3060	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3061		if (!rc) {
3062			if (!try_wait_for_completion(&wdata->done)) {
3063				mutex_unlock(&ctx->aio_mutex);
3064				return;
3065			}
3066
3067			if (wdata->result)
3068				rc = wdata->result;
3069			else
3070				ctx->total_len += wdata->bytes;
3071
3072			/* resend call if it's a retryable error */
3073			if (rc == -EAGAIN) {
3074				struct list_head tmp_list;
3075				struct iov_iter tmp_from = ctx->iter;
3076
3077				INIT_LIST_HEAD(&tmp_list);
3078				list_del_init(&wdata->list);
3079
3080				if (ctx->direct_io)
3081					rc = cifs_resend_wdata(
3082						wdata, &tmp_list, ctx);
3083				else {
3084					iov_iter_advance(&tmp_from,
3085						 wdata->offset - ctx->pos);
3086
3087					rc = cifs_write_from_iter(wdata->offset,
3088						wdata->bytes, &tmp_from,
3089						ctx->cfile, cifs_sb, &tmp_list,
3090						ctx);
3091
3092					kref_put(&wdata->refcount,
3093						cifs_uncached_writedata_release);
3094				}
3095
3096				list_splice(&tmp_list, &ctx->list);
3097				goto restart_loop;
3098			}
3099		}
3100		list_del_init(&wdata->list);
3101		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3102	}
3103
3104	cifs_stats_bytes_written(tcon, ctx->total_len);
3105	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3106
3107	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3108
3109	mutex_unlock(&ctx->aio_mutex);
3110
3111	if (ctx->iocb && ctx->iocb->ki_complete)
3112		ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3113	else
3114		complete(&ctx->done);
3115}
3116
3117static ssize_t __cifs_writev(
3118	struct kiocb *iocb, struct iov_iter *from, bool direct)
3119{
3120	struct file *file = iocb->ki_filp;
3121	ssize_t total_written = 0;
3122	struct cifsFileInfo *cfile;
3123	struct cifs_tcon *tcon;
3124	struct cifs_sb_info *cifs_sb;
3125	struct cifs_aio_ctx *ctx;
3126	struct iov_iter saved_from = *from;
3127	size_t len = iov_iter_count(from);
3128	int rc;
3129
3130	/*
3131	 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3132	 * In this case, fall back to non-direct write function.
3133	 * this could be improved by getting pages directly in ITER_KVEC
3134	 */
3135	if (direct && iov_iter_is_kvec(from)) {
3136		cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3137		direct = false;
3138	}
3139
3140	rc = generic_write_checks(iocb, from);
3141	if (rc <= 0)
3142		return rc;
3143
3144	cifs_sb = CIFS_FILE_SB(file);
3145	cfile = file->private_data;
3146	tcon = tlink_tcon(cfile->tlink);
3147
3148	if (!tcon->ses->server->ops->async_writev)
3149		return -ENOSYS;
3150
3151	ctx = cifs_aio_ctx_alloc();
3152	if (!ctx)
3153		return -ENOMEM;
3154
3155	ctx->cfile = cifsFileInfo_get(cfile);
3156
3157	if (!is_sync_kiocb(iocb))
3158		ctx->iocb = iocb;
3159
3160	ctx->pos = iocb->ki_pos;
3161
3162	if (direct) {
3163		ctx->direct_io = true;
3164		ctx->iter = *from;
3165		ctx->len = len;
3166	} else {
3167		rc = setup_aio_ctx_iter(ctx, from, WRITE);
3168		if (rc) {
3169			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3170			return rc;
3171		}
3172	}
3173
3174	/* grab a lock here due to read response handlers can access ctx */
3175	mutex_lock(&ctx->aio_mutex);
3176
3177	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3178				  cfile, cifs_sb, &ctx->list, ctx);
3179
3180	/*
3181	 * If at least one write was successfully sent, then discard any rc
3182	 * value from the later writes. If the other write succeeds, then
3183	 * we'll end up returning whatever was written. If it fails, then
3184	 * we'll get a new rc value from that.
3185	 */
3186	if (!list_empty(&ctx->list))
3187		rc = 0;
3188
3189	mutex_unlock(&ctx->aio_mutex);
3190
3191	if (rc) {
3192		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3193		return rc;
3194	}
3195
3196	if (!is_sync_kiocb(iocb)) {
3197		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3198		return -EIOCBQUEUED;
3199	}
3200
3201	rc = wait_for_completion_killable(&ctx->done);
3202	if (rc) {
3203		mutex_lock(&ctx->aio_mutex);
3204		ctx->rc = rc = -EINTR;
3205		total_written = ctx->total_len;
3206		mutex_unlock(&ctx->aio_mutex);
3207	} else {
3208		rc = ctx->rc;
3209		total_written = ctx->total_len;
3210	}
3211
3212	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3213
3214	if (unlikely(!total_written))
3215		return rc;
3216
3217	iocb->ki_pos += total_written;
3218	return total_written;
3219}
3220
3221ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3222{
3223	return __cifs_writev(iocb, from, true);
3224}
3225
3226ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3227{
3228	return __cifs_writev(iocb, from, false);
3229}
3230
3231static ssize_t
3232cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3233{
3234	struct file *file = iocb->ki_filp;
3235	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3236	struct inode *inode = file->f_mapping->host;
3237	struct cifsInodeInfo *cinode = CIFS_I(inode);
3238	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3239	ssize_t rc;
3240
3241	inode_lock(inode);
3242	/*
3243	 * We need to hold the sem to be sure nobody modifies lock list
3244	 * with a brlock that prevents writing.
3245	 */
3246	down_read(&cinode->lock_sem);
3247
3248	rc = generic_write_checks(iocb, from);
3249	if (rc <= 0)
3250		goto out;
3251
3252	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3253				     server->vals->exclusive_lock_type, 0,
3254				     NULL, CIFS_WRITE_OP))
3255		rc = __generic_file_write_iter(iocb, from);
3256	else
3257		rc = -EACCES;
3258out:
3259	up_read(&cinode->lock_sem);
3260	inode_unlock(inode);
3261
3262	if (rc > 0)
3263		rc = generic_write_sync(iocb, rc);
3264	return rc;
3265}
3266
3267ssize_t
3268cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3269{
3270	struct inode *inode = file_inode(iocb->ki_filp);
3271	struct cifsInodeInfo *cinode = CIFS_I(inode);
3272	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3273	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3274						iocb->ki_filp->private_data;
3275	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3276	ssize_t written;
3277
3278	written = cifs_get_writer(cinode);
3279	if (written)
3280		return written;
3281
3282	if (CIFS_CACHE_WRITE(cinode)) {
3283		if (cap_unix(tcon->ses) &&
3284		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3285		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3286			written = generic_file_write_iter(iocb, from);
3287			goto out;
3288		}
3289		written = cifs_writev(iocb, from);
3290		goto out;
3291	}
3292	/*
3293	 * For non-oplocked files in strict cache mode we need to write the data
3294	 * to the server exactly from the pos to pos+len-1 rather than flush all
3295	 * affected pages because it may cause a error with mandatory locks on
3296	 * these pages but not on the region from pos to ppos+len-1.
3297	 */
3298	written = cifs_user_writev(iocb, from);
3299	if (CIFS_CACHE_READ(cinode)) {
3300		/*
3301		 * We have read level caching and we have just sent a write
3302		 * request to the server thus making data in the cache stale.
3303		 * Zap the cache and set oplock/lease level to NONE to avoid
3304		 * reading stale data from the cache. All subsequent read
3305		 * operations will read new data from the server.
3306		 */
3307		cifs_zap_mapping(inode);
3308		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3309			 inode);
3310		cinode->oplock = 0;
3311	}
3312out:
3313	cifs_put_writer(cinode);
3314	return written;
3315}
3316
3317static struct cifs_readdata *
3318cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3319{
3320	struct cifs_readdata *rdata;
3321
3322	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3323	if (rdata != NULL) {
3324		rdata->pages = pages;
3325		kref_init(&rdata->refcount);
3326		INIT_LIST_HEAD(&rdata->list);
3327		init_completion(&rdata->done);
3328		INIT_WORK(&rdata->work, complete);
3329	}
3330
3331	return rdata;
3332}
3333
3334static struct cifs_readdata *
3335cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3336{
3337	struct page **pages =
3338		kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3339	struct cifs_readdata *ret = NULL;
3340
3341	if (pages) {
3342		ret = cifs_readdata_direct_alloc(pages, complete);
3343		if (!ret)
3344			kfree(pages);
3345	}
3346
3347	return ret;
3348}
3349
3350void
3351cifs_readdata_release(struct kref *refcount)
3352{
3353	struct cifs_readdata *rdata = container_of(refcount,
3354					struct cifs_readdata, refcount);
3355#ifdef CONFIG_CIFS_SMB_DIRECT
3356	if (rdata->mr) {
3357		smbd_deregister_mr(rdata->mr);
3358		rdata->mr = NULL;
3359	}
3360#endif
3361	if (rdata->cfile)
3362		cifsFileInfo_put(rdata->cfile);
3363
3364	kvfree(rdata->pages);
3365	kfree(rdata);
3366}
3367
3368static int
3369cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3370{
3371	int rc = 0;
3372	struct page *page;
3373	unsigned int i;
3374
3375	for (i = 0; i < nr_pages; i++) {
3376		page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3377		if (!page) {
3378			rc = -ENOMEM;
3379			break;
3380		}
3381		rdata->pages[i] = page;
3382	}
3383
3384	if (rc) {
3385		unsigned int nr_page_failed = i;
3386
3387		for (i = 0; i < nr_page_failed; i++) {
3388			put_page(rdata->pages[i]);
3389			rdata->pages[i] = NULL;
3390		}
3391	}
3392	return rc;
3393}
3394
3395static void
3396cifs_uncached_readdata_release(struct kref *refcount)
3397{
3398	struct cifs_readdata *rdata = container_of(refcount,
3399					struct cifs_readdata, refcount);
3400	unsigned int i;
3401
3402	kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3403	for (i = 0; i < rdata->nr_pages; i++) {
3404		put_page(rdata->pages[i]);
3405	}
3406	cifs_readdata_release(refcount);
3407}
3408
3409/**
3410 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3411 * @rdata:	the readdata response with list of pages holding data
3412 * @iter:	destination for our data
3413 *
3414 * This function copies data from a list of pages in a readdata response into
3415 * an array of iovecs. It will first calculate where the data should go
3416 * based on the info in the readdata and then copy the data into that spot.
3417 */
3418static int
3419cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3420{
3421	size_t remaining = rdata->got_bytes;
3422	unsigned int i;
3423
3424	for (i = 0; i < rdata->nr_pages; i++) {
3425		struct page *page = rdata->pages[i];
3426		size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3427		size_t written;
3428
3429		if (unlikely(iov_iter_is_pipe(iter))) {
3430			void *addr = kmap_atomic(page);
3431
3432			written = copy_to_iter(addr, copy, iter);
3433			kunmap_atomic(addr);
3434		} else
3435			written = copy_page_to_iter(page, 0, copy, iter);
3436		remaining -= written;
3437		if (written < copy && iov_iter_count(iter) > 0)
3438			break;
3439	}
3440	return remaining ? -EFAULT : 0;
3441}
3442
3443static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3444
3445static void
3446cifs_uncached_readv_complete(struct work_struct *work)
3447{
3448	struct cifs_readdata *rdata = container_of(work,
3449						struct cifs_readdata, work);
3450
3451	complete(&rdata->done);
3452	collect_uncached_read_data(rdata->ctx);
3453	/* the below call can possibly free the last ref to aio ctx */
3454	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3455}
3456
3457static int
3458uncached_fill_pages(struct TCP_Server_Info *server,
3459		    struct cifs_readdata *rdata, struct iov_iter *iter,
3460		    unsigned int len)
3461{
3462	int result = 0;
3463	unsigned int i;
3464	unsigned int nr_pages = rdata->nr_pages;
3465	unsigned int page_offset = rdata->page_offset;
3466
3467	rdata->got_bytes = 0;
3468	rdata->tailsz = PAGE_SIZE;
3469	for (i = 0; i < nr_pages; i++) {
3470		struct page *page = rdata->pages[i];
3471		size_t n;
3472		unsigned int segment_size = rdata->pagesz;
3473
3474		if (i == 0)
3475			segment_size -= page_offset;
3476		else
3477			page_offset = 0;
3478
3479
3480		if (len <= 0) {
3481			/* no need to hold page hostage */
3482			rdata->pages[i] = NULL;
3483			rdata->nr_pages--;
3484			put_page(page);
3485			continue;
3486		}
3487
3488		n = len;
3489		if (len >= segment_size)
3490			/* enough data to fill the page */
3491			n = segment_size;
3492		else
3493			rdata->tailsz = len;
3494		len -= n;
3495
3496		if (iter)
3497			result = copy_page_from_iter(
3498					page, page_offset, n, iter);
3499#ifdef CONFIG_CIFS_SMB_DIRECT
3500		else if (rdata->mr)
3501			result = n;
3502#endif
3503		else
3504			result = cifs_read_page_from_socket(
3505					server, page, page_offset, n);
3506		if (result < 0)
3507			break;
3508
3509		rdata->got_bytes += result;
3510	}
3511
3512	return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3513						rdata->got_bytes : result;
3514}
3515
3516static int
3517cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3518			      struct cifs_readdata *rdata, unsigned int len)
3519{
3520	return uncached_fill_pages(server, rdata, NULL, len);
3521}
3522
3523static int
3524cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3525			      struct cifs_readdata *rdata,
3526			      struct iov_iter *iter)
3527{
3528	return uncached_fill_pages(server, rdata, iter, iter->count);
3529}
3530
3531static int cifs_resend_rdata(struct cifs_readdata *rdata,
3532			struct list_head *rdata_list,
3533			struct cifs_aio_ctx *ctx)
3534{
3535	unsigned int rsize;
3536	struct cifs_credits credits;
3537	int rc;
3538	struct TCP_Server_Info *server;
3539
3540	/* XXX: should we pick a new channel here? */
3541	server = rdata->server;
3542
3543	do {
3544		if (rdata->cfile->invalidHandle) {
3545			rc = cifs_reopen_file(rdata->cfile, true);
3546			if (rc == -EAGAIN)
3547				continue;
3548			else if (rc)
3549				break;
3550		}
3551
3552		/*
3553		 * Wait for credits to resend this rdata.
3554		 * Note: we are attempting to resend the whole rdata not in
3555		 * segments
3556		 */
3557		do {
3558			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3559						&rsize, &credits);
3560
3561			if (rc)
3562				goto fail;
3563
3564			if (rsize < rdata->bytes) {
3565				add_credits_and_wake_if(server, &credits, 0);
3566				msleep(1000);
3567			}
3568		} while (rsize < rdata->bytes);
3569		rdata->credits = credits;
3570
3571		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3572		if (!rc) {
3573			if (rdata->cfile->invalidHandle)
3574				rc = -EAGAIN;
3575			else {
3576#ifdef CONFIG_CIFS_SMB_DIRECT
3577				if (rdata->mr) {
3578					rdata->mr->need_invalidate = true;
3579					smbd_deregister_mr(rdata->mr);
3580					rdata->mr = NULL;
3581				}
3582#endif
3583				rc = server->ops->async_readv(rdata);
3584			}
3585		}
3586
3587		/* If the read was successfully sent, we are done */
3588		if (!rc) {
3589			/* Add to aio pending list */
3590			list_add_tail(&rdata->list, rdata_list);
3591			return 0;
3592		}
3593
3594		/* Roll back credits and retry if needed */
3595		add_credits_and_wake_if(server, &rdata->credits, 0);
3596	} while (rc == -EAGAIN);
3597
3598fail:
3599	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3600	return rc;
3601}
3602
3603static int
3604cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3605		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3606		     struct cifs_aio_ctx *ctx)
3607{
3608	struct cifs_readdata *rdata;
3609	unsigned int npages, rsize;
3610	struct cifs_credits credits_on_stack;
3611	struct cifs_credits *credits = &credits_on_stack;
3612	size_t cur_len;
3613	int rc;
3614	pid_t pid;
3615	struct TCP_Server_Info *server;
3616	struct page **pagevec;
3617	size_t start;
3618	struct iov_iter direct_iov = ctx->iter;
3619
3620	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3621
3622	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3623		pid = open_file->pid;
3624	else
3625		pid = current->tgid;
3626
3627	if (ctx->direct_io)
3628		iov_iter_advance(&direct_iov, offset - ctx->pos);
3629
3630	do {
3631		if (open_file->invalidHandle) {
3632			rc = cifs_reopen_file(open_file, true);
3633			if (rc == -EAGAIN)
3634				continue;
3635			else if (rc)
3636				break;
3637		}
3638
3639		rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3640						   &rsize, credits);
3641		if (rc)
3642			break;
3643
3644		cur_len = min_t(const size_t, len, rsize);
3645
3646		if (ctx->direct_io) {
3647			ssize_t result;
3648
3649			result = iov_iter_get_pages_alloc(
3650					&direct_iov, &pagevec,
3651					cur_len, &start);
3652			if (result < 0) {
3653				cifs_dbg(VFS,
3654					 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3655					 result, iov_iter_type(&direct_iov),
3656					 direct_iov.iov_offset,
3657					 direct_iov.count);
3658				dump_stack();
3659
3660				rc = result;
3661				add_credits_and_wake_if(server, credits, 0);
3662				break;
3663			}
3664			cur_len = (size_t)result;
3665			iov_iter_advance(&direct_iov, cur_len);
3666
3667			rdata = cifs_readdata_direct_alloc(
3668					pagevec, cifs_uncached_readv_complete);
3669			if (!rdata) {
3670				add_credits_and_wake_if(server, credits, 0);
3671				rc = -ENOMEM;
3672				break;
3673			}
3674
3675			npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3676			rdata->page_offset = start;
3677			rdata->tailsz = npages > 1 ?
3678				cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3679				cur_len;
3680
3681		} else {
3682
3683			npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3684			/* allocate a readdata struct */
3685			rdata = cifs_readdata_alloc(npages,
3686					    cifs_uncached_readv_complete);
3687			if (!rdata) {
3688				add_credits_and_wake_if(server, credits, 0);
3689				rc = -ENOMEM;
3690				break;
3691			}
3692
3693			rc = cifs_read_allocate_pages(rdata, npages);
3694			if (rc) {
3695				kvfree(rdata->pages);
3696				kfree(rdata);
3697				add_credits_and_wake_if(server, credits, 0);
3698				break;
3699			}
3700
3701			rdata->tailsz = PAGE_SIZE;
3702		}
3703
3704		rdata->server = server;
3705		rdata->cfile = cifsFileInfo_get(open_file);
3706		rdata->nr_pages = npages;
3707		rdata->offset = offset;
3708		rdata->bytes = cur_len;
3709		rdata->pid = pid;
3710		rdata->pagesz = PAGE_SIZE;
3711		rdata->read_into_pages = cifs_uncached_read_into_pages;
3712		rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3713		rdata->credits = credits_on_stack;
3714		rdata->ctx = ctx;
3715		kref_get(&ctx->refcount);
3716
3717		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3718
3719		if (!rc) {
3720			if (rdata->cfile->invalidHandle)
3721				rc = -EAGAIN;
3722			else
3723				rc = server->ops->async_readv(rdata);
3724		}
3725
3726		if (rc) {
3727			add_credits_and_wake_if(server, &rdata->credits, 0);
3728			kref_put(&rdata->refcount,
3729				cifs_uncached_readdata_release);
3730			if (rc == -EAGAIN) {
3731				iov_iter_revert(&direct_iov, cur_len);
3732				continue;
3733			}
3734			break;
3735		}
3736
3737		list_add_tail(&rdata->list, rdata_list);
3738		offset += cur_len;
3739		len -= cur_len;
3740	} while (len > 0);
3741
3742	return rc;
3743}
3744
3745static void
3746collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3747{
3748	struct cifs_readdata *rdata, *tmp;
3749	struct iov_iter *to = &ctx->iter;
3750	struct cifs_sb_info *cifs_sb;
3751	int rc;
3752
3753	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3754
3755	mutex_lock(&ctx->aio_mutex);
3756
3757	if (list_empty(&ctx->list)) {
3758		mutex_unlock(&ctx->aio_mutex);
3759		return;
3760	}
3761
3762	rc = ctx->rc;
3763	/* the loop below should proceed in the order of increasing offsets */
3764again:
3765	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3766		if (!rc) {
3767			if (!try_wait_for_completion(&rdata->done)) {
3768				mutex_unlock(&ctx->aio_mutex);
3769				return;
3770			}
3771
3772			if (rdata->result == -EAGAIN) {
3773				/* resend call if it's a retryable error */
3774				struct list_head tmp_list;
3775				unsigned int got_bytes = rdata->got_bytes;
3776
3777				list_del_init(&rdata->list);
3778				INIT_LIST_HEAD(&tmp_list);
3779
3780				/*
3781				 * Got a part of data and then reconnect has
3782				 * happened -- fill the buffer and continue
3783				 * reading.
3784				 */
3785				if (got_bytes && got_bytes < rdata->bytes) {
3786					rc = 0;
3787					if (!ctx->direct_io)
3788						rc = cifs_readdata_to_iov(rdata, to);
3789					if (rc) {
3790						kref_put(&rdata->refcount,
3791							cifs_uncached_readdata_release);
3792						continue;
3793					}
3794				}
3795
3796				if (ctx->direct_io) {
3797					/*
3798					 * Re-use rdata as this is a
3799					 * direct I/O
3800					 */
3801					rc = cifs_resend_rdata(
3802						rdata,
3803						&tmp_list, ctx);
3804				} else {
3805					rc = cifs_send_async_read(
3806						rdata->offset + got_bytes,
3807						rdata->bytes - got_bytes,
3808						rdata->cfile, cifs_sb,
3809						&tmp_list, ctx);
3810
3811					kref_put(&rdata->refcount,
3812						cifs_uncached_readdata_release);
3813				}
3814
3815				list_splice(&tmp_list, &ctx->list);
3816
3817				goto again;
3818			} else if (rdata->result)
3819				rc = rdata->result;
3820			else if (!ctx->direct_io)
3821				rc = cifs_readdata_to_iov(rdata, to);
3822
3823			/* if there was a short read -- discard anything left */
3824			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3825				rc = -ENODATA;
3826
3827			ctx->total_len += rdata->got_bytes;
3828		}
3829		list_del_init(&rdata->list);
3830		kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3831	}
3832
3833	if (!ctx->direct_io)
3834		ctx->total_len = ctx->len - iov_iter_count(to);
3835
3836	/* mask nodata case */
3837	if (rc == -ENODATA)
3838		rc = 0;
3839
3840	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3841
3842	mutex_unlock(&ctx->aio_mutex);
3843
3844	if (ctx->iocb && ctx->iocb->ki_complete)
3845		ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3846	else
3847		complete(&ctx->done);
3848}
3849
3850static ssize_t __cifs_readv(
3851	struct kiocb *iocb, struct iov_iter *to, bool direct)
3852{
3853	size_t len;
3854	struct file *file = iocb->ki_filp;
3855	struct cifs_sb_info *cifs_sb;
3856	struct cifsFileInfo *cfile;
3857	struct cifs_tcon *tcon;
3858	ssize_t rc, total_read = 0;
3859	loff_t offset = iocb->ki_pos;
3860	struct cifs_aio_ctx *ctx;
3861
3862	/*
3863	 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3864	 * fall back to data copy read path
3865	 * this could be improved by getting pages directly in ITER_KVEC
3866	 */
3867	if (direct && iov_iter_is_kvec(to)) {
3868		cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3869		direct = false;
3870	}
3871
3872	len = iov_iter_count(to);
3873	if (!len)
3874		return 0;
3875
3876	cifs_sb = CIFS_FILE_SB(file);
3877	cfile = file->private_data;
3878	tcon = tlink_tcon(cfile->tlink);
3879
3880	if (!tcon->ses->server->ops->async_readv)
3881		return -ENOSYS;
3882
3883	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3884		cifs_dbg(FYI, "attempting read on write only file instance\n");
3885
3886	ctx = cifs_aio_ctx_alloc();
3887	if (!ctx)
3888		return -ENOMEM;
3889
3890	ctx->cfile = cifsFileInfo_get(cfile);
3891
3892	if (!is_sync_kiocb(iocb))
3893		ctx->iocb = iocb;
3894
3895	if (iter_is_iovec(to))
3896		ctx->should_dirty = true;
3897
3898	if (direct) {
3899		ctx->pos = offset;
3900		ctx->direct_io = true;
3901		ctx->iter = *to;
3902		ctx->len = len;
3903	} else {
3904		rc = setup_aio_ctx_iter(ctx, to, READ);
3905		if (rc) {
3906			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3907			return rc;
3908		}
3909		len = ctx->len;
3910	}
3911
3912	/* grab a lock here due to read response handlers can access ctx */
3913	mutex_lock(&ctx->aio_mutex);
3914
3915	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3916
3917	/* if at least one read request send succeeded, then reset rc */
3918	if (!list_empty(&ctx->list))
3919		rc = 0;
3920
3921	mutex_unlock(&ctx->aio_mutex);
3922
3923	if (rc) {
3924		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3925		return rc;
3926	}
3927
3928	if (!is_sync_kiocb(iocb)) {
3929		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3930		return -EIOCBQUEUED;
3931	}
3932
3933	rc = wait_for_completion_killable(&ctx->done);
3934	if (rc) {
3935		mutex_lock(&ctx->aio_mutex);
3936		ctx->rc = rc = -EINTR;
3937		total_read = ctx->total_len;
3938		mutex_unlock(&ctx->aio_mutex);
3939	} else {
3940		rc = ctx->rc;
3941		total_read = ctx->total_len;
3942	}
3943
3944	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3945
3946	if (total_read) {
3947		iocb->ki_pos += total_read;
3948		return total_read;
3949	}
3950	return rc;
3951}
3952
3953ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3954{
3955	return __cifs_readv(iocb, to, true);
3956}
3957
3958ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3959{
3960	return __cifs_readv(iocb, to, false);
3961}
3962
3963ssize_t
3964cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3965{
3966	struct inode *inode = file_inode(iocb->ki_filp);
3967	struct cifsInodeInfo *cinode = CIFS_I(inode);
3968	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3969	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3970						iocb->ki_filp->private_data;
3971	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3972	int rc = -EACCES;
3973
3974	/*
3975	 * In strict cache mode we need to read from the server all the time
3976	 * if we don't have level II oplock because the server can delay mtime
3977	 * change - so we can't make a decision about inode invalidating.
3978	 * And we can also fail with pagereading if there are mandatory locks
3979	 * on pages affected by this read but not on the region from pos to
3980	 * pos+len-1.
3981	 */
3982	if (!CIFS_CACHE_READ(cinode))
3983		return cifs_user_readv(iocb, to);
3984
3985	if (cap_unix(tcon->ses) &&
3986	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3987	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3988		return generic_file_read_iter(iocb, to);
3989
3990	/*
3991	 * We need to hold the sem to be sure nobody modifies lock list
3992	 * with a brlock that prevents reading.
3993	 */
3994	down_read(&cinode->lock_sem);
3995	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3996				     tcon->ses->server->vals->shared_lock_type,
3997				     0, NULL, CIFS_READ_OP))
3998		rc = generic_file_read_iter(iocb, to);
3999	up_read(&cinode->lock_sem);
4000	return rc;
4001}
4002
4003static ssize_t
4004cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4005{
4006	int rc = -EACCES;
4007	unsigned int bytes_read = 0;
4008	unsigned int total_read;
4009	unsigned int current_read_size;
4010	unsigned int rsize;
4011	struct cifs_sb_info *cifs_sb;
4012	struct cifs_tcon *tcon;
4013	struct TCP_Server_Info *server;
4014	unsigned int xid;
4015	char *cur_offset;
4016	struct cifsFileInfo *open_file;
4017	struct cifs_io_parms io_parms = {0};
4018	int buf_type = CIFS_NO_BUFFER;
4019	__u32 pid;
4020
4021	xid = get_xid();
4022	cifs_sb = CIFS_FILE_SB(file);
4023
4024	/* FIXME: set up handlers for larger reads and/or convert to async */
4025	rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4026
4027	if (file->private_data == NULL) {
4028		rc = -EBADF;
4029		free_xid(xid);
4030		return rc;
4031	}
4032	open_file = file->private_data;
4033	tcon = tlink_tcon(open_file->tlink);
4034	server = cifs_pick_channel(tcon->ses);
4035
4036	if (!server->ops->sync_read) {
4037		free_xid(xid);
4038		return -ENOSYS;
4039	}
4040
4041	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4042		pid = open_file->pid;
4043	else
4044		pid = current->tgid;
4045
4046	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4047		cifs_dbg(FYI, "attempting read on write only file instance\n");
4048
4049	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4050	     total_read += bytes_read, cur_offset += bytes_read) {
4051		do {
4052			current_read_size = min_t(uint, read_size - total_read,
4053						  rsize);
4054			/*
4055			 * For windows me and 9x we do not want to request more
4056			 * than it negotiated since it will refuse the read
4057			 * then.
4058			 */
4059			if (!(tcon->ses->capabilities &
4060				tcon->ses->server->vals->cap_large_files)) {
4061				current_read_size = min_t(uint,
4062					current_read_size, CIFSMaxBufSize);
4063			}
4064			if (open_file->invalidHandle) {
4065				rc = cifs_reopen_file(open_file, true);
4066				if (rc != 0)
4067					break;
4068			}
4069			io_parms.pid = pid;
4070			io_parms.tcon = tcon;
4071			io_parms.offset = *offset;
4072			io_parms.length = current_read_size;
4073			io_parms.server = server;
4074			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4075						    &bytes_read, &cur_offset,
4076						    &buf_type);
4077		} while (rc == -EAGAIN);
4078
4079		if (rc || (bytes_read == 0)) {
4080			if (total_read) {
4081				break;
4082			} else {
4083				free_xid(xid);
4084				return rc;
4085			}
4086		} else {
4087			cifs_stats_bytes_read(tcon, total_read);
4088			*offset += bytes_read;
4089		}
4090	}
4091	free_xid(xid);
4092	return total_read;
4093}
4094
4095/*
4096 * If the page is mmap'ed into a process' page tables, then we need to make
4097 * sure that it doesn't change while being written back.
4098 */
4099static vm_fault_t
4100cifs_page_mkwrite(struct vm_fault *vmf)
4101{
4102	struct page *page = vmf->page;
4103
4104	lock_page(page);
4105	return VM_FAULT_LOCKED;
4106}
4107
4108static const struct vm_operations_struct cifs_file_vm_ops = {
4109	.fault = filemap_fault,
4110	.map_pages = filemap_map_pages,
4111	.page_mkwrite = cifs_page_mkwrite,
4112};
4113
4114int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4115{
4116	int xid, rc = 0;
4117	struct inode *inode = file_inode(file);
4118
4119	xid = get_xid();
4120
4121	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4122		rc = cifs_zap_mapping(inode);
4123	if (!rc)
4124		rc = generic_file_mmap(file, vma);
4125	if (!rc)
4126		vma->vm_ops = &cifs_file_vm_ops;
4127
4128	free_xid(xid);
4129	return rc;
4130}
4131
4132int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4133{
4134	int rc, xid;
4135
4136	xid = get_xid();
4137
4138	rc = cifs_revalidate_file(file);
4139	if (rc)
4140		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4141			 rc);
4142	if (!rc)
4143		rc = generic_file_mmap(file, vma);
4144	if (!rc)
4145		vma->vm_ops = &cifs_file_vm_ops;
4146
4147	free_xid(xid);
4148	return rc;
4149}
4150
4151static void
4152cifs_readv_complete(struct work_struct *work)
4153{
4154	unsigned int i, got_bytes;
4155	struct cifs_readdata *rdata = container_of(work,
4156						struct cifs_readdata, work);
4157
4158	got_bytes = rdata->got_bytes;
4159	for (i = 0; i < rdata->nr_pages; i++) {
4160		struct page *page = rdata->pages[i];
4161
4162		lru_cache_add(page);
4163
4164		if (rdata->result == 0 ||
4165		    (rdata->result == -EAGAIN && got_bytes)) {
4166			flush_dcache_page(page);
4167			SetPageUptodate(page);
4168		}
4169
4170		unlock_page(page);
4171
4172		if (rdata->result == 0 ||
4173		    (rdata->result == -EAGAIN && got_bytes))
4174			cifs_readpage_to_fscache(rdata->mapping->host, page);
4175
4176		got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4177
4178		put_page(page);
4179		rdata->pages[i] = NULL;
4180	}
4181	kref_put(&rdata->refcount, cifs_readdata_release);
4182}
4183
4184static int
4185readpages_fill_pages(struct TCP_Server_Info *server,
4186		     struct cifs_readdata *rdata, struct iov_iter *iter,
4187		     unsigned int len)
4188{
4189	int result = 0;
4190	unsigned int i;
4191	u64 eof;
4192	pgoff_t eof_index;
4193	unsigned int nr_pages = rdata->nr_pages;
4194	unsigned int page_offset = rdata->page_offset;
4195
4196	/* determine the eof that the server (probably) has */
4197	eof = CIFS_I(rdata->mapping->host)->server_eof;
4198	eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4199	cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4200
4201	rdata->got_bytes = 0;
4202	rdata->tailsz = PAGE_SIZE;
4203	for (i = 0; i < nr_pages; i++) {
4204		struct page *page = rdata->pages[i];
4205		unsigned int to_read = rdata->pagesz;
4206		size_t n;
4207
4208		if (i == 0)
4209			to_read -= page_offset;
4210		else
4211			page_offset = 0;
4212
4213		n = to_read;
4214
4215		if (len >= to_read) {
4216			len -= to_read;
4217		} else if (len > 0) {
4218			/* enough for partial page, fill and zero the rest */
4219			zero_user(page, len + page_offset, to_read - len);
4220			n = rdata->tailsz = len;
4221			len = 0;
4222		} else if (page->index > eof_index) {
4223			/*
4224			 * The VFS will not try to do readahead past the
4225			 * i_size, but it's possible that we have outstanding
4226			 * writes with gaps in the middle and the i_size hasn't
4227			 * caught up yet. Populate those with zeroed out pages
4228			 * to prevent the VFS from repeatedly attempting to
4229			 * fill them until the writes are flushed.
4230			 */
4231			zero_user(page, 0, PAGE_SIZE);
4232			lru_cache_add(page);
4233			flush_dcache_page(page);
4234			SetPageUptodate(page);
4235			unlock_page(page);
4236			put_page(page);
4237			rdata->pages[i] = NULL;
4238			rdata->nr_pages--;
4239			continue;
4240		} else {
4241			/* no need to hold page hostage */
4242			lru_cache_add(page);
4243			unlock_page(page);
4244			put_page(page);
4245			rdata->pages[i] = NULL;
4246			rdata->nr_pages--;
4247			continue;
4248		}
4249
4250		if (iter)
4251			result = copy_page_from_iter(
4252					page, page_offset, n, iter);
4253#ifdef CONFIG_CIFS_SMB_DIRECT
4254		else if (rdata->mr)
4255			result = n;
4256#endif
4257		else
4258			result = cifs_read_page_from_socket(
4259					server, page, page_offset, n);
4260		if (result < 0)
4261			break;
4262
4263		rdata->got_bytes += result;
4264	}
4265
4266	return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4267						rdata->got_bytes : result;
4268}
4269
4270static int
4271cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4272			       struct cifs_readdata *rdata, unsigned int len)
4273{
4274	return readpages_fill_pages(server, rdata, NULL, len);
4275}
4276
4277static int
4278cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4279			       struct cifs_readdata *rdata,
4280			       struct iov_iter *iter)
4281{
4282	return readpages_fill_pages(server, rdata, iter, iter->count);
4283}
4284
4285static int
4286readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4287		    unsigned int rsize, struct list_head *tmplist,
4288		    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4289{
4290	struct page *page, *tpage;
4291	unsigned int expected_index;
4292	int rc;
4293	gfp_t gfp = readahead_gfp_mask(mapping);
4294
4295	INIT_LIST_HEAD(tmplist);
4296
4297	page = lru_to_page(page_list);
4298
4299	/*
4300	 * Lock the page and put it in the cache. Since no one else
4301	 * should have access to this page, we're safe to simply set
4302	 * PG_locked without checking it first.
4303	 */
4304	__SetPageLocked(page);
4305	rc = add_to_page_cache_locked(page, mapping,
4306				      page->index, gfp);
4307
4308	/* give up if we can't stick it in the cache */
4309	if (rc) {
4310		__ClearPageLocked(page);
4311		return rc;
4312	}
4313
4314	/* move first page to the tmplist */
4315	*offset = (loff_t)page->index << PAGE_SHIFT;
4316	*bytes = PAGE_SIZE;
4317	*nr_pages = 1;
4318	list_move_tail(&page->lru, tmplist);
4319
4320	/* now try and add more pages onto the request */
4321	expected_index = page->index + 1;
4322	list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4323		/* discontinuity ? */
4324		if (page->index != expected_index)
4325			break;
4326
4327		/* would this page push the read over the rsize? */
4328		if (*bytes + PAGE_SIZE > rsize)
4329			break;
4330
4331		__SetPageLocked(page);
4332		rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4333		if (rc) {
4334			__ClearPageLocked(page);
4335			break;
4336		}
4337		list_move_tail(&page->lru, tmplist);
4338		(*bytes) += PAGE_SIZE;
4339		expected_index++;
4340		(*nr_pages)++;
4341	}
4342	return rc;
4343}
4344
4345static int cifs_readpages(struct file *file, struct address_space *mapping,
4346	struct list_head *page_list, unsigned num_pages)
4347{
4348	int rc;
4349	int err = 0;
4350	struct list_head tmplist;
4351	struct cifsFileInfo *open_file = file->private_data;
4352	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4353	struct TCP_Server_Info *server;
4354	pid_t pid;
4355	unsigned int xid;
4356
4357	xid = get_xid();
4358	/*
4359	 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4360	 * immediately if the cookie is negative
4361	 *
4362	 * After this point, every page in the list might have PG_fscache set,
4363	 * so we will need to clean that up off of every page we don't use.
4364	 */
4365	rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4366					 &num_pages);
4367	if (rc == 0) {
4368		free_xid(xid);
4369		return rc;
4370	}
4371
4372	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4373		pid = open_file->pid;
4374	else
4375		pid = current->tgid;
4376
4377	rc = 0;
4378	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4379
4380	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4381		 __func__, file, mapping, num_pages);
4382
4383	/*
4384	 * Start with the page at end of list and move it to private
4385	 * list. Do the same with any following pages until we hit
4386	 * the rsize limit, hit an index discontinuity, or run out of
4387	 * pages. Issue the async read and then start the loop again
4388	 * until the list is empty.
4389	 *
4390	 * Note that list order is important. The page_list is in
4391	 * the order of declining indexes. When we put the pages in
4392	 * the rdata->pages, then we want them in increasing order.
4393	 */
4394	while (!list_empty(page_list) && !err) {
4395		unsigned int i, nr_pages, bytes, rsize;
4396		loff_t offset;
4397		struct page *page, *tpage;
4398		struct cifs_readdata *rdata;
4399		struct cifs_credits credits_on_stack;
4400		struct cifs_credits *credits = &credits_on_stack;
4401
4402		if (open_file->invalidHandle) {
4403			rc = cifs_reopen_file(open_file, true);
4404			if (rc == -EAGAIN)
4405				continue;
4406			else if (rc)
4407				break;
4408		}
4409
4410		rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4411						   &rsize, credits);
4412		if (rc)
4413			break;
4414
4415		/*
4416		 * Give up immediately if rsize is too small to read an entire
4417		 * page. The VFS will fall back to readpage. We should never
4418		 * reach this point however since we set ra_pages to 0 when the
4419		 * rsize is smaller than a cache page.
4420		 */
4421		if (unlikely(rsize < PAGE_SIZE)) {
4422			add_credits_and_wake_if(server, credits, 0);
4423			free_xid(xid);
4424			return 0;
4425		}
4426
4427		nr_pages = 0;
4428		err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4429					 &nr_pages, &offset, &bytes);
4430		if (!nr_pages) {
4431			add_credits_and_wake_if(server, credits, 0);
4432			break;
4433		}
4434
4435		rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4436		if (!rdata) {
4437			/* best to give up if we're out of mem */
4438			list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4439				list_del(&page->lru);
4440				lru_cache_add(page);
4441				unlock_page(page);
4442				put_page(page);
4443			}
4444			rc = -ENOMEM;
4445			add_credits_and_wake_if(server, credits, 0);
4446			break;
4447		}
4448
4449		rdata->cfile = cifsFileInfo_get(open_file);
4450		rdata->server = server;
4451		rdata->mapping = mapping;
4452		rdata->offset = offset;
4453		rdata->bytes = bytes;
4454		rdata->pid = pid;
4455		rdata->pagesz = PAGE_SIZE;
4456		rdata->tailsz = PAGE_SIZE;
4457		rdata->read_into_pages = cifs_readpages_read_into_pages;
4458		rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4459		rdata->credits = credits_on_stack;
4460
4461		list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4462			list_del(&page->lru);
4463			rdata->pages[rdata->nr_pages++] = page;
4464		}
4465
4466		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4467
4468		if (!rc) {
4469			if (rdata->cfile->invalidHandle)
4470				rc = -EAGAIN;
4471			else
4472				rc = server->ops->async_readv(rdata);
4473		}
4474
4475		if (rc) {
4476			add_credits_and_wake_if(server, &rdata->credits, 0);
4477			for (i = 0; i < rdata->nr_pages; i++) {
4478				page = rdata->pages[i];
4479				lru_cache_add(page);
4480				unlock_page(page);
4481				put_page(page);
4482			}
4483			/* Fallback to the readpage in error/reconnect cases */
4484			kref_put(&rdata->refcount, cifs_readdata_release);
4485			break;
4486		}
4487
4488		kref_put(&rdata->refcount, cifs_readdata_release);
4489	}
4490
4491	/* Any pages that have been shown to fscache but didn't get added to
4492	 * the pagecache must be uncached before they get returned to the
4493	 * allocator.
4494	 */
4495	cifs_fscache_readpages_cancel(mapping->host, page_list);
4496	free_xid(xid);
4497	return rc;
4498}
4499
4500/*
4501 * cifs_readpage_worker must be called with the page pinned
4502 */
4503static int cifs_readpage_worker(struct file *file, struct page *page,
4504	loff_t *poffset)
4505{
4506	char *read_data;
4507	int rc;
4508
4509	/* Is the page cached? */
4510	rc = cifs_readpage_from_fscache(file_inode(file), page);
4511	if (rc == 0)
4512		goto read_complete;
4513
4514	read_data = kmap(page);
4515	/* for reads over a certain size could initiate async read ahead */
4516
4517	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4518
4519	if (rc < 0)
4520		goto io_error;
4521	else
4522		cifs_dbg(FYI, "Bytes read %d\n", rc);
4523
4524	/* we do not want atime to be less than mtime, it broke some apps */
4525	file_inode(file)->i_atime = current_time(file_inode(file));
4526	if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4527		file_inode(file)->i_atime = file_inode(file)->i_mtime;
4528	else
4529		file_inode(file)->i_atime = current_time(file_inode(file));
4530
4531	if (PAGE_SIZE > rc)
4532		memset(read_data + rc, 0, PAGE_SIZE - rc);
4533
4534	flush_dcache_page(page);
4535	SetPageUptodate(page);
4536
4537	/* send this page to the cache */
4538	cifs_readpage_to_fscache(file_inode(file), page);
4539
4540	rc = 0;
4541
4542io_error:
4543	kunmap(page);
4544	unlock_page(page);
4545
4546read_complete:
4547	return rc;
4548}
4549
4550static int cifs_readpage(struct file *file, struct page *page)
4551{
4552	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4553	int rc = -EACCES;
4554	unsigned int xid;
4555
4556	xid = get_xid();
4557
4558	if (file->private_data == NULL) {
4559		rc = -EBADF;
4560		free_xid(xid);
4561		return rc;
4562	}
4563
4564	cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4565		 page, (int)offset, (int)offset);
4566
4567	rc = cifs_readpage_worker(file, page, &offset);
4568
4569	free_xid(xid);
4570	return rc;
4571}
4572
4573static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4574{
4575	struct cifsFileInfo *open_file;
4576
4577	spin_lock(&cifs_inode->open_file_lock);
4578	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4579		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4580			spin_unlock(&cifs_inode->open_file_lock);
4581			return 1;
4582		}
4583	}
4584	spin_unlock(&cifs_inode->open_file_lock);
4585	return 0;
4586}
4587
4588/* We do not want to update the file size from server for inodes
4589   open for write - to avoid races with writepage extending
4590   the file - in the future we could consider allowing
4591   refreshing the inode only on increases in the file size
4592   but this is tricky to do without racing with writebehind
4593   page caching in the current Linux kernel design */
4594bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4595{
4596	if (!cifsInode)
4597		return true;
4598
4599	if (is_inode_writable(cifsInode)) {
4600		/* This inode is open for write at least once */
4601		struct cifs_sb_info *cifs_sb;
4602
4603		cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4604		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4605			/* since no page cache to corrupt on directio
4606			we can change size safely */
4607			return true;
4608		}
4609
4610		if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4611			return true;
4612
4613		return false;
4614	} else
4615		return true;
4616}
4617
4618static int cifs_write_begin(struct file *file, struct address_space *mapping,
4619			loff_t pos, unsigned len, unsigned flags,
4620			struct page **pagep, void **fsdata)
4621{
4622	int oncethru = 0;
4623	pgoff_t index = pos >> PAGE_SHIFT;
4624	loff_t offset = pos & (PAGE_SIZE - 1);
4625	loff_t page_start = pos & PAGE_MASK;
4626	loff_t i_size;
4627	struct page *page;
4628	int rc = 0;
4629
4630	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4631
4632start:
4633	page = grab_cache_page_write_begin(mapping, index, flags);
4634	if (!page) {
4635		rc = -ENOMEM;
4636		goto out;
4637	}
4638
4639	if (PageUptodate(page))
4640		goto out;
4641
4642	/*
4643	 * If we write a full page it will be up to date, no need to read from
4644	 * the server. If the write is short, we'll end up doing a sync write
4645	 * instead.
4646	 */
4647	if (len == PAGE_SIZE)
4648		goto out;
4649
4650	/*
4651	 * optimize away the read when we have an oplock, and we're not
4652	 * expecting to use any of the data we'd be reading in. That
4653	 * is, when the page lies beyond the EOF, or straddles the EOF
4654	 * and the write will cover all of the existing data.
4655	 */
4656	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4657		i_size = i_size_read(mapping->host);
4658		if (page_start >= i_size ||
4659		    (offset == 0 && (pos + len) >= i_size)) {
4660			zero_user_segments(page, 0, offset,
4661					   offset + len,
4662					   PAGE_SIZE);
4663			/*
4664			 * PageChecked means that the parts of the page
4665			 * to which we're not writing are considered up
4666			 * to date. Once the data is copied to the
4667			 * page, it can be set uptodate.
4668			 */
4669			SetPageChecked(page);
4670			goto out;
4671		}
4672	}
4673
4674	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4675		/*
4676		 * might as well read a page, it is fast enough. If we get
4677		 * an error, we don't need to return it. cifs_write_end will
4678		 * do a sync write instead since PG_uptodate isn't set.
4679		 */
4680		cifs_readpage_worker(file, page, &page_start);
4681		put_page(page);
4682		oncethru = 1;
4683		goto start;
4684	} else {
4685		/* we could try using another file handle if there is one -
4686		   but how would we lock it to prevent close of that handle
4687		   racing with this read? In any case
4688		   this will be written out by write_end so is fine */
4689	}
4690out:
4691	*pagep = page;
4692	return rc;
4693}
4694
4695static int cifs_release_page(struct page *page, gfp_t gfp)
4696{
4697	if (PagePrivate(page))
4698		return 0;
4699
4700	return cifs_fscache_release_page(page, gfp);
4701}
4702
4703static void cifs_invalidate_page(struct page *page, unsigned int offset,
4704				 unsigned int length)
4705{
4706	struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4707
4708	if (offset == 0 && length == PAGE_SIZE)
4709		cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4710}
4711
4712static int cifs_launder_page(struct page *page)
4713{
4714	int rc = 0;
4715	loff_t range_start = page_offset(page);
4716	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4717	struct writeback_control wbc = {
4718		.sync_mode = WB_SYNC_ALL,
4719		.nr_to_write = 0,
4720		.range_start = range_start,
4721		.range_end = range_end,
4722	};
4723
4724	cifs_dbg(FYI, "Launder page: %p\n", page);
4725
4726	if (clear_page_dirty_for_io(page))
4727		rc = cifs_writepage_locked(page, &wbc);
4728
4729	cifs_fscache_invalidate_page(page, page->mapping->host);
4730	return rc;
4731}
4732
4733void cifs_oplock_break(struct work_struct *work)
4734{
4735	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4736						  oplock_break);
4737	struct inode *inode = d_inode(cfile->dentry);
4738	struct cifsInodeInfo *cinode = CIFS_I(inode);
4739	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4740	struct TCP_Server_Info *server = tcon->ses->server;
4741	int rc = 0;
4742	bool purge_cache = false;
4743
4744	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4745			TASK_UNINTERRUPTIBLE);
4746
4747	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4748				      cfile->oplock_epoch, &purge_cache);
4749
4750	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4751						cifs_has_mand_locks(cinode)) {
4752		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4753			 inode);
4754		cinode->oplock = 0;
4755	}
4756
4757	if (inode && S_ISREG(inode->i_mode)) {
4758		if (CIFS_CACHE_READ(cinode))
4759			break_lease(inode, O_RDONLY);
4760		else
4761			break_lease(inode, O_WRONLY);
4762		rc = filemap_fdatawrite(inode->i_mapping);
4763		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4764			rc = filemap_fdatawait(inode->i_mapping);
4765			mapping_set_error(inode->i_mapping, rc);
4766			cifs_zap_mapping(inode);
4767		}
4768		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4769		if (CIFS_CACHE_WRITE(cinode))
4770			goto oplock_break_ack;
4771	}
4772
4773	rc = cifs_push_locks(cfile);
4774	if (rc)
4775		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4776
4777oplock_break_ack:
4778	/*
4779	 * releasing stale oplock after recent reconnect of smb session using
4780	 * a now incorrect file handle is not a data integrity issue but do
4781	 * not bother sending an oplock release if session to server still is
4782	 * disconnected since oplock already released by the server
4783	 */
4784	if (!cfile->oplock_break_cancelled) {
4785		rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4786							     cinode);
4787		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4788	}
4789	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4790	cifs_done_oplock_break(cinode);
4791}
4792
4793/*
4794 * The presence of cifs_direct_io() in the address space ops vector
4795 * allowes open() O_DIRECT flags which would have failed otherwise.
4796 *
4797 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4798 * so this method should never be called.
4799 *
4800 * Direct IO is not yet supported in the cached mode. 
4801 */
4802static ssize_t
4803cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4804{
4805        /*
4806         * FIXME
4807         * Eventually need to support direct IO for non forcedirectio mounts
4808         */
4809        return -EINVAL;
4810}
4811
4812static int cifs_swap_activate(struct swap_info_struct *sis,
4813			      struct file *swap_file, sector_t *span)
4814{
4815	struct cifsFileInfo *cfile = swap_file->private_data;
4816	struct inode *inode = swap_file->f_mapping->host;
4817	unsigned long blocks;
4818	long long isize;
4819
4820	cifs_dbg(FYI, "swap activate\n");
4821
4822	spin_lock(&inode->i_lock);
4823	blocks = inode->i_blocks;
4824	isize = inode->i_size;
4825	spin_unlock(&inode->i_lock);
4826	if (blocks*512 < isize) {
4827		pr_warn("swap activate: swapfile has holes\n");
4828		return -EINVAL;
4829	}
4830	*span = sis->pages;
4831
4832	pr_warn_once("Swap support over SMB3 is experimental\n");
4833
4834	/*
4835	 * TODO: consider adding ACL (or documenting how) to prevent other
4836	 * users (on this or other systems) from reading it
4837	 */
4838
4839
4840	/* TODO: add sk_set_memalloc(inet) or similar */
4841
4842	if (cfile)
4843		cfile->swapfile = true;
4844	/*
4845	 * TODO: Since file already open, we can't open with DENY_ALL here
4846	 * but we could add call to grab a byte range lock to prevent others
4847	 * from reading or writing the file
4848	 */
4849
4850	return 0;
4851}
4852
4853static void cifs_swap_deactivate(struct file *file)
4854{
4855	struct cifsFileInfo *cfile = file->private_data;
4856
4857	cifs_dbg(FYI, "swap deactivate\n");
4858
4859	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4860
4861	if (cfile)
4862		cfile->swapfile = false;
4863
4864	/* do we need to unpin (or unlock) the file */
4865}
4866
4867const struct address_space_operations cifs_addr_ops = {
4868	.readpage = cifs_readpage,
4869	.readpages = cifs_readpages,
4870	.writepage = cifs_writepage,
4871	.writepages = cifs_writepages,
4872	.write_begin = cifs_write_begin,
4873	.write_end = cifs_write_end,
4874	.set_page_dirty = __set_page_dirty_nobuffers,
4875	.releasepage = cifs_release_page,
4876	.direct_IO = cifs_direct_io,
4877	.invalidatepage = cifs_invalidate_page,
4878	.launder_page = cifs_launder_page,
4879	/*
4880	 * TODO: investigate and if useful we could add an cifs_migratePage
4881	 * helper (under an CONFIG_MIGRATION) in the future, and also
4882	 * investigate and add an is_dirty_writeback helper if needed
4883	 */
4884	.swap_activate = cifs_swap_activate,
4885	.swap_deactivate = cifs_swap_deactivate,
4886};
4887
4888/*
4889 * cifs_readpages requires the server to support a buffer large enough to
4890 * contain the header plus one complete page of data.  Otherwise, we need
4891 * to leave cifs_readpages out of the address space operations.
4892 */
4893const struct address_space_operations cifs_addr_ops_smallbuf = {
4894	.readpage = cifs_readpage,
4895	.writepage = cifs_writepage,
4896	.writepages = cifs_writepages,
4897	.write_begin = cifs_write_begin,
4898	.write_end = cifs_write_end,
4899	.set_page_dirty = __set_page_dirty_nobuffers,
4900	.releasepage = cifs_release_page,
4901	.invalidatepage = cifs_invalidate_page,
4902	.launder_page = cifs_launder_page,
4903};