Linux Audio

Check our new training course

Loading...
v4.6
   1/*
   2 *   fs/cifs/file.c
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 *   This library is free software; you can redistribute it and/or modify
  11 *   it under the terms of the GNU Lesser General Public License as published
  12 *   by the Free Software Foundation; either version 2.1 of the License, or
  13 *   (at your option) any later version.
  14 *
  15 *   This library is distributed in the hope that it will be useful,
  16 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18 *   the GNU Lesser General Public License for more details.
  19 *
  20 *   You should have received a copy of the GNU Lesser General Public License
  21 *   along with this library; if not, write to the Free Software
  22 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23 */
  24#include <linux/fs.h>
  25#include <linux/backing-dev.h>
  26#include <linux/stat.h>
  27#include <linux/fcntl.h>
  28#include <linux/pagemap.h>
  29#include <linux/pagevec.h>
  30#include <linux/writeback.h>
  31#include <linux/task_io_accounting_ops.h>
  32#include <linux/delay.h>
  33#include <linux/mount.h>
  34#include <linux/slab.h>
  35#include <linux/swap.h>
  36#include <asm/div64.h>
  37#include "cifsfs.h"
  38#include "cifspdu.h"
  39#include "cifsglob.h"
  40#include "cifsproto.h"
  41#include "cifs_unicode.h"
  42#include "cifs_debug.h"
  43#include "cifs_fs_sb.h"
  44#include "fscache.h"
  45
  46
  47static inline int cifs_convert_flags(unsigned int flags)
  48{
  49	if ((flags & O_ACCMODE) == O_RDONLY)
  50		return GENERIC_READ;
  51	else if ((flags & O_ACCMODE) == O_WRONLY)
  52		return GENERIC_WRITE;
  53	else if ((flags & O_ACCMODE) == O_RDWR) {
  54		/* GENERIC_ALL is too much permission to request
  55		   can cause unnecessary access denied on create */
  56		/* return GENERIC_ALL; */
  57		return (GENERIC_READ | GENERIC_WRITE);
  58	}
  59
  60	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  61		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  62		FILE_READ_DATA);
  63}
  64
  65static u32 cifs_posix_convert_flags(unsigned int flags)
  66{
  67	u32 posix_flags = 0;
  68
  69	if ((flags & O_ACCMODE) == O_RDONLY)
  70		posix_flags = SMB_O_RDONLY;
  71	else if ((flags & O_ACCMODE) == O_WRONLY)
  72		posix_flags = SMB_O_WRONLY;
  73	else if ((flags & O_ACCMODE) == O_RDWR)
  74		posix_flags = SMB_O_RDWR;
  75
  76	if (flags & O_CREAT) {
  77		posix_flags |= SMB_O_CREAT;
  78		if (flags & O_EXCL)
  79			posix_flags |= SMB_O_EXCL;
  80	} else if (flags & O_EXCL)
  81		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  82			 current->comm, current->tgid);
  83
  84	if (flags & O_TRUNC)
  85		posix_flags |= SMB_O_TRUNC;
  86	/* be safe and imply O_SYNC for O_DSYNC */
  87	if (flags & O_DSYNC)
  88		posix_flags |= SMB_O_SYNC;
  89	if (flags & O_DIRECTORY)
  90		posix_flags |= SMB_O_DIRECTORY;
  91	if (flags & O_NOFOLLOW)
  92		posix_flags |= SMB_O_NOFOLLOW;
  93	if (flags & O_DIRECT)
  94		posix_flags |= SMB_O_DIRECT;
  95
  96	return posix_flags;
  97}
  98
  99static inline int cifs_get_disposition(unsigned int flags)
 100{
 101	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 102		return FILE_CREATE;
 103	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 104		return FILE_OVERWRITE_IF;
 105	else if ((flags & O_CREAT) == O_CREAT)
 106		return FILE_OPEN_IF;
 107	else if ((flags & O_TRUNC) == O_TRUNC)
 108		return FILE_OVERWRITE;
 109	else
 110		return FILE_OPEN;
 111}
 112
 113int cifs_posix_open(char *full_path, struct inode **pinode,
 114			struct super_block *sb, int mode, unsigned int f_flags,
 115			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
 116{
 117	int rc;
 118	FILE_UNIX_BASIC_INFO *presp_data;
 119	__u32 posix_flags = 0;
 120	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 121	struct cifs_fattr fattr;
 122	struct tcon_link *tlink;
 123	struct cifs_tcon *tcon;
 124
 125	cifs_dbg(FYI, "posix open %s\n", full_path);
 126
 127	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 128	if (presp_data == NULL)
 129		return -ENOMEM;
 130
 131	tlink = cifs_sb_tlink(cifs_sb);
 132	if (IS_ERR(tlink)) {
 133		rc = PTR_ERR(tlink);
 134		goto posix_open_ret;
 135	}
 136
 137	tcon = tlink_tcon(tlink);
 138	mode &= ~current_umask();
 139
 140	posix_flags = cifs_posix_convert_flags(f_flags);
 141	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 142			     poplock, full_path, cifs_sb->local_nls,
 143			     cifs_remap(cifs_sb));
 144	cifs_put_tlink(tlink);
 145
 146	if (rc)
 147		goto posix_open_ret;
 148
 149	if (presp_data->Type == cpu_to_le32(-1))
 150		goto posix_open_ret; /* open ok, caller does qpathinfo */
 151
 152	if (!pinode)
 153		goto posix_open_ret; /* caller does not need info */
 154
 155	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 156
 157	/* get new inode and set it up */
 158	if (*pinode == NULL) {
 159		cifs_fill_uniqueid(sb, &fattr);
 160		*pinode = cifs_iget(sb, &fattr);
 161		if (!*pinode) {
 162			rc = -ENOMEM;
 163			goto posix_open_ret;
 164		}
 165	} else {
 166		cifs_fattr_to_inode(*pinode, &fattr);
 167	}
 168
 169posix_open_ret:
 170	kfree(presp_data);
 171	return rc;
 172}
 173
 174static int
 175cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 176	     struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 177	     struct cifs_fid *fid, unsigned int xid)
 178{
 179	int rc;
 180	int desired_access;
 181	int disposition;
 182	int create_options = CREATE_NOT_DIR;
 183	FILE_ALL_INFO *buf;
 184	struct TCP_Server_Info *server = tcon->ses->server;
 185	struct cifs_open_parms oparms;
 186
 187	if (!server->ops->open)
 188		return -ENOSYS;
 189
 190	desired_access = cifs_convert_flags(f_flags);
 191
 192/*********************************************************************
 193 *  open flag mapping table:
 194 *
 195 *	POSIX Flag            CIFS Disposition
 196 *	----------            ----------------
 197 *	O_CREAT               FILE_OPEN_IF
 198 *	O_CREAT | O_EXCL      FILE_CREATE
 199 *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 200 *	O_TRUNC               FILE_OVERWRITE
 201 *	none of the above     FILE_OPEN
 202 *
 203 *	Note that there is not a direct match between disposition
 204 *	FILE_SUPERSEDE (ie create whether or not file exists although
 205 *	O_CREAT | O_TRUNC is similar but truncates the existing
 206 *	file rather than creating a new file as FILE_SUPERSEDE does
 207 *	(which uses the attributes / metadata passed in on open call)
 208 *?
 209 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 210 *?  and the read write flags match reasonably.  O_LARGEFILE
 211 *?  is irrelevant because largefile support is always used
 212 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 213 *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 214 *********************************************************************/
 215
 216	disposition = cifs_get_disposition(f_flags);
 217
 218	/* BB pass O_SYNC flag through on file attributes .. BB */
 219
 220	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 221	if (!buf)
 222		return -ENOMEM;
 223
 224	if (backup_cred(cifs_sb))
 225		create_options |= CREATE_OPEN_BACKUP_INTENT;
 226
 227	oparms.tcon = tcon;
 228	oparms.cifs_sb = cifs_sb;
 229	oparms.desired_access = desired_access;
 230	oparms.create_options = create_options;
 231	oparms.disposition = disposition;
 232	oparms.path = full_path;
 233	oparms.fid = fid;
 234	oparms.reconnect = false;
 235
 236	rc = server->ops->open(xid, &oparms, oplock, buf);
 237
 238	if (rc)
 239		goto out;
 240
 241	if (tcon->unix_ext)
 242		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 243					      xid);
 244	else
 245		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 246					 xid, fid);
 247
 248out:
 249	kfree(buf);
 250	return rc;
 251}
 252
 253static bool
 254cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 255{
 256	struct cifs_fid_locks *cur;
 257	bool has_locks = false;
 258
 259	down_read(&cinode->lock_sem);
 260	list_for_each_entry(cur, &cinode->llist, llist) {
 261		if (!list_empty(&cur->locks)) {
 262			has_locks = true;
 263			break;
 264		}
 265	}
 266	up_read(&cinode->lock_sem);
 267	return has_locks;
 268}
 269
 270struct cifsFileInfo *
 271cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 272		  struct tcon_link *tlink, __u32 oplock)
 273{
 274	struct dentry *dentry = file->f_path.dentry;
 275	struct inode *inode = d_inode(dentry);
 276	struct cifsInodeInfo *cinode = CIFS_I(inode);
 277	struct cifsFileInfo *cfile;
 278	struct cifs_fid_locks *fdlocks;
 279	struct cifs_tcon *tcon = tlink_tcon(tlink);
 280	struct TCP_Server_Info *server = tcon->ses->server;
 281
 282	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 283	if (cfile == NULL)
 284		return cfile;
 285
 286	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 287	if (!fdlocks) {
 288		kfree(cfile);
 289		return NULL;
 290	}
 291
 292	INIT_LIST_HEAD(&fdlocks->locks);
 293	fdlocks->cfile = cfile;
 294	cfile->llist = fdlocks;
 295	down_write(&cinode->lock_sem);
 296	list_add(&fdlocks->llist, &cinode->llist);
 297	up_write(&cinode->lock_sem);
 298
 299	cfile->count = 1;
 300	cfile->pid = current->tgid;
 301	cfile->uid = current_fsuid();
 302	cfile->dentry = dget(dentry);
 303	cfile->f_flags = file->f_flags;
 304	cfile->invalidHandle = false;
 305	cfile->tlink = cifs_get_tlink(tlink);
 306	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 307	mutex_init(&cfile->fh_mutex);
 
 308
 309	cifs_sb_active(inode->i_sb);
 310
 311	/*
 312	 * If the server returned a read oplock and we have mandatory brlocks,
 313	 * set oplock level to None.
 314	 */
 315	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 316		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 317		oplock = 0;
 318	}
 319
 320	spin_lock(&cifs_file_list_lock);
 321	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 322		oplock = fid->pending_open->oplock;
 323	list_del(&fid->pending_open->olist);
 324
 325	fid->purge_cache = false;
 326	server->ops->set_fid(cfile, fid, oplock);
 327
 328	list_add(&cfile->tlist, &tcon->openFileList);
 
 329	/* if readable file instance put first in list*/
 330	if (file->f_mode & FMODE_READ)
 331		list_add(&cfile->flist, &cinode->openFileList);
 332	else
 333		list_add_tail(&cfile->flist, &cinode->openFileList);
 334	spin_unlock(&cifs_file_list_lock);
 335
 336	if (fid->purge_cache)
 337		cifs_zap_mapping(inode);
 338
 339	file->private_data = cfile;
 340	return cfile;
 341}
 342
 343struct cifsFileInfo *
 344cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 345{
 346	spin_lock(&cifs_file_list_lock);
 347	cifsFileInfo_get_locked(cifs_file);
 348	spin_unlock(&cifs_file_list_lock);
 349	return cifs_file;
 350}
 351
 352/*
 353 * Release a reference on the file private data. This may involve closing
 354 * the filehandle out on the server. Must be called without holding
 355 * cifs_file_list_lock.
 356 */
 357void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 358{
 359	struct inode *inode = d_inode(cifs_file->dentry);
 360	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 361	struct TCP_Server_Info *server = tcon->ses->server;
 362	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 363	struct super_block *sb = inode->i_sb;
 364	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 365	struct cifsLockInfo *li, *tmp;
 366	struct cifs_fid fid;
 367	struct cifs_pending_open open;
 368	bool oplock_break_cancelled;
 369
 370	spin_lock(&cifs_file_list_lock);
 
 
 371	if (--cifs_file->count > 0) {
 372		spin_unlock(&cifs_file_list_lock);
 
 373		return;
 374	}
 
 375
 376	if (server->ops->get_lease_key)
 377		server->ops->get_lease_key(inode, &fid);
 378
 379	/* store open in pending opens to make sure we don't miss lease break */
 380	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 381
 382	/* remove it from the lists */
 383	list_del(&cifs_file->flist);
 384	list_del(&cifs_file->tlist);
 385
 386	if (list_empty(&cifsi->openFileList)) {
 387		cifs_dbg(FYI, "closing last open instance for inode %p\n",
 388			 d_inode(cifs_file->dentry));
 389		/*
 390		 * In strict cache mode we need invalidate mapping on the last
 391		 * close  because it may cause a error when we open this file
 392		 * again and get at least level II oplock.
 393		 */
 394		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 395			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 396		cifs_set_oplock_level(cifsi, 0);
 397	}
 398	spin_unlock(&cifs_file_list_lock);
 
 399
 400	oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
 401
 402	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 403		struct TCP_Server_Info *server = tcon->ses->server;
 404		unsigned int xid;
 405
 406		xid = get_xid();
 407		if (server->ops->close)
 408			server->ops->close(xid, tcon, &cifs_file->fid);
 409		_free_xid(xid);
 410	}
 411
 412	if (oplock_break_cancelled)
 413		cifs_done_oplock_break(cifsi);
 414
 415	cifs_del_pending_open(&open);
 416
 417	/*
 418	 * Delete any outstanding lock records. We'll lose them when the file
 419	 * is closed anyway.
 420	 */
 421	down_write(&cifsi->lock_sem);
 422	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 423		list_del(&li->llist);
 424		cifs_del_lock_waiters(li);
 425		kfree(li);
 426	}
 427	list_del(&cifs_file->llist->llist);
 428	kfree(cifs_file->llist);
 429	up_write(&cifsi->lock_sem);
 430
 431	cifs_put_tlink(cifs_file->tlink);
 432	dput(cifs_file->dentry);
 433	cifs_sb_deactive(sb);
 434	kfree(cifs_file);
 435}
 436
 437int cifs_open(struct inode *inode, struct file *file)
 438
 439{
 440	int rc = -EACCES;
 441	unsigned int xid;
 442	__u32 oplock;
 443	struct cifs_sb_info *cifs_sb;
 444	struct TCP_Server_Info *server;
 445	struct cifs_tcon *tcon;
 446	struct tcon_link *tlink;
 447	struct cifsFileInfo *cfile = NULL;
 448	char *full_path = NULL;
 449	bool posix_open_ok = false;
 450	struct cifs_fid fid;
 451	struct cifs_pending_open open;
 452
 453	xid = get_xid();
 454
 455	cifs_sb = CIFS_SB(inode->i_sb);
 456	tlink = cifs_sb_tlink(cifs_sb);
 457	if (IS_ERR(tlink)) {
 458		free_xid(xid);
 459		return PTR_ERR(tlink);
 460	}
 461	tcon = tlink_tcon(tlink);
 462	server = tcon->ses->server;
 463
 464	full_path = build_path_from_dentry(file->f_path.dentry);
 465	if (full_path == NULL) {
 466		rc = -ENOMEM;
 467		goto out;
 468	}
 469
 470	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 471		 inode, file->f_flags, full_path);
 472
 473	if (file->f_flags & O_DIRECT &&
 474	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
 475		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 476			file->f_op = &cifs_file_direct_nobrl_ops;
 477		else
 478			file->f_op = &cifs_file_direct_ops;
 479	}
 480
 481	if (server->oplocks)
 482		oplock = REQ_OPLOCK;
 483	else
 484		oplock = 0;
 485
 486	if (!tcon->broken_posix_open && tcon->unix_ext &&
 487	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 488				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 489		/* can not refresh inode info since size could be stale */
 490		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 491				cifs_sb->mnt_file_mode /* ignored */,
 492				file->f_flags, &oplock, &fid.netfid, xid);
 493		if (rc == 0) {
 494			cifs_dbg(FYI, "posix open succeeded\n");
 495			posix_open_ok = true;
 496		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 497			if (tcon->ses->serverNOS)
 498				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 499					 tcon->ses->serverName,
 500					 tcon->ses->serverNOS);
 501			tcon->broken_posix_open = true;
 502		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
 503			 (rc != -EOPNOTSUPP)) /* path not found or net err */
 504			goto out;
 505		/*
 506		 * Else fallthrough to retry open the old way on network i/o
 507		 * or DFS errors.
 508		 */
 509	}
 510
 511	if (server->ops->get_lease_key)
 512		server->ops->get_lease_key(inode, &fid);
 513
 514	cifs_add_pending_open(&fid, tlink, &open);
 515
 516	if (!posix_open_ok) {
 517		if (server->ops->get_lease_key)
 518			server->ops->get_lease_key(inode, &fid);
 519
 520		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 521				  file->f_flags, &oplock, &fid, xid);
 522		if (rc) {
 523			cifs_del_pending_open(&open);
 524			goto out;
 525		}
 526	}
 527
 528	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 529	if (cfile == NULL) {
 530		if (server->ops->close)
 531			server->ops->close(xid, tcon, &fid);
 532		cifs_del_pending_open(&open);
 533		rc = -ENOMEM;
 534		goto out;
 535	}
 536
 537	cifs_fscache_set_inode_cookie(inode, file);
 538
 539	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 540		/*
 541		 * Time to set mode which we can not set earlier due to
 542		 * problems creating new read-only files.
 543		 */
 544		struct cifs_unix_set_info_args args = {
 545			.mode	= inode->i_mode,
 546			.uid	= INVALID_UID, /* no change */
 547			.gid	= INVALID_GID, /* no change */
 548			.ctime	= NO_CHANGE_64,
 549			.atime	= NO_CHANGE_64,
 550			.mtime	= NO_CHANGE_64,
 551			.device	= 0,
 552		};
 553		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 554				       cfile->pid);
 555	}
 556
 557out:
 558	kfree(full_path);
 559	free_xid(xid);
 560	cifs_put_tlink(tlink);
 561	return rc;
 562}
 563
 564static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 565
 566/*
 567 * Try to reacquire byte range locks that were released when session
 568 * to server was lost.
 569 */
 570static int
 571cifs_relock_file(struct cifsFileInfo *cfile)
 572{
 573	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 574	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 575	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 576	int rc = 0;
 577
 578	down_read(&cinode->lock_sem);
 579	if (cinode->can_cache_brlcks) {
 580		/* can cache locks - no need to relock */
 581		up_read(&cinode->lock_sem);
 582		return rc;
 583	}
 584
 585	if (cap_unix(tcon->ses) &&
 586	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 587	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 588		rc = cifs_push_posix_locks(cfile);
 589	else
 590		rc = tcon->ses->server->ops->push_mand_locks(cfile);
 591
 592	up_read(&cinode->lock_sem);
 593	return rc;
 594}
 595
 596static int
 597cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 598{
 599	int rc = -EACCES;
 600	unsigned int xid;
 601	__u32 oplock;
 602	struct cifs_sb_info *cifs_sb;
 603	struct cifs_tcon *tcon;
 604	struct TCP_Server_Info *server;
 605	struct cifsInodeInfo *cinode;
 606	struct inode *inode;
 607	char *full_path = NULL;
 608	int desired_access;
 609	int disposition = FILE_OPEN;
 610	int create_options = CREATE_NOT_DIR;
 611	struct cifs_open_parms oparms;
 612
 613	xid = get_xid();
 614	mutex_lock(&cfile->fh_mutex);
 615	if (!cfile->invalidHandle) {
 616		mutex_unlock(&cfile->fh_mutex);
 617		rc = 0;
 618		free_xid(xid);
 619		return rc;
 620	}
 621
 622	inode = d_inode(cfile->dentry);
 623	cifs_sb = CIFS_SB(inode->i_sb);
 624	tcon = tlink_tcon(cfile->tlink);
 625	server = tcon->ses->server;
 626
 627	/*
 628	 * Can not grab rename sem here because various ops, including those
 629	 * that already have the rename sem can end up causing writepage to get
 630	 * called and if the server was down that means we end up here, and we
 631	 * can never tell if the caller already has the rename_sem.
 632	 */
 633	full_path = build_path_from_dentry(cfile->dentry);
 634	if (full_path == NULL) {
 635		rc = -ENOMEM;
 636		mutex_unlock(&cfile->fh_mutex);
 637		free_xid(xid);
 638		return rc;
 639	}
 640
 641	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 642		 inode, cfile->f_flags, full_path);
 643
 644	if (tcon->ses->server->oplocks)
 645		oplock = REQ_OPLOCK;
 646	else
 647		oplock = 0;
 648
 649	if (tcon->unix_ext && cap_unix(tcon->ses) &&
 650	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 651				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 652		/*
 653		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 654		 * original open. Must mask them off for a reopen.
 655		 */
 656		unsigned int oflags = cfile->f_flags &
 657						~(O_CREAT | O_EXCL | O_TRUNC);
 658
 659		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 660				     cifs_sb->mnt_file_mode /* ignored */,
 661				     oflags, &oplock, &cfile->fid.netfid, xid);
 662		if (rc == 0) {
 663			cifs_dbg(FYI, "posix reopen succeeded\n");
 664			oparms.reconnect = true;
 665			goto reopen_success;
 666		}
 667		/*
 668		 * fallthrough to retry open the old way on errors, especially
 669		 * in the reconnect path it is important to retry hard
 670		 */
 671	}
 672
 673	desired_access = cifs_convert_flags(cfile->f_flags);
 674
 675	if (backup_cred(cifs_sb))
 676		create_options |= CREATE_OPEN_BACKUP_INTENT;
 677
 678	if (server->ops->get_lease_key)
 679		server->ops->get_lease_key(inode, &cfile->fid);
 680
 681	oparms.tcon = tcon;
 682	oparms.cifs_sb = cifs_sb;
 683	oparms.desired_access = desired_access;
 684	oparms.create_options = create_options;
 685	oparms.disposition = disposition;
 686	oparms.path = full_path;
 687	oparms.fid = &cfile->fid;
 688	oparms.reconnect = true;
 689
 690	/*
 691	 * Can not refresh inode by passing in file_info buf to be returned by
 692	 * ops->open and then calling get_inode_info with returned buf since
 693	 * file might have write behind data that needs to be flushed and server
 694	 * version of file size can be stale. If we knew for sure that inode was
 695	 * not dirty locally we could do this.
 696	 */
 697	rc = server->ops->open(xid, &oparms, &oplock, NULL);
 698	if (rc == -ENOENT && oparms.reconnect == false) {
 699		/* durable handle timeout is expired - open the file again */
 700		rc = server->ops->open(xid, &oparms, &oplock, NULL);
 701		/* indicate that we need to relock the file */
 702		oparms.reconnect = true;
 703	}
 704
 705	if (rc) {
 706		mutex_unlock(&cfile->fh_mutex);
 707		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 708		cifs_dbg(FYI, "oplock: %d\n", oplock);
 709		goto reopen_error_exit;
 710	}
 711
 712reopen_success:
 713	cfile->invalidHandle = false;
 714	mutex_unlock(&cfile->fh_mutex);
 715	cinode = CIFS_I(inode);
 716
 717	if (can_flush) {
 718		rc = filemap_write_and_wait(inode->i_mapping);
 719		mapping_set_error(inode->i_mapping, rc);
 720
 721		if (tcon->unix_ext)
 722			rc = cifs_get_inode_info_unix(&inode, full_path,
 723						      inode->i_sb, xid);
 724		else
 725			rc = cifs_get_inode_info(&inode, full_path, NULL,
 726						 inode->i_sb, xid, NULL);
 727	}
 728	/*
 729	 * Else we are writing out data to server already and could deadlock if
 730	 * we tried to flush data, and since we do not know if we have data that
 731	 * would invalidate the current end of file on the server we can not go
 732	 * to the server to get the new inode info.
 733	 */
 734
 
 
 
 
 
 
 
 
 
 735	server->ops->set_fid(cfile, &cfile->fid, oplock);
 736	if (oparms.reconnect)
 737		cifs_relock_file(cfile);
 738
 739reopen_error_exit:
 740	kfree(full_path);
 741	free_xid(xid);
 742	return rc;
 743}
 744
 745int cifs_close(struct inode *inode, struct file *file)
 746{
 747	if (file->private_data != NULL) {
 748		cifsFileInfo_put(file->private_data);
 749		file->private_data = NULL;
 750	}
 751
 752	/* return code from the ->release op is always ignored */
 753	return 0;
 754}
 755
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 756int cifs_closedir(struct inode *inode, struct file *file)
 757{
 758	int rc = 0;
 759	unsigned int xid;
 760	struct cifsFileInfo *cfile = file->private_data;
 761	struct cifs_tcon *tcon;
 762	struct TCP_Server_Info *server;
 763	char *buf;
 764
 765	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 766
 767	if (cfile == NULL)
 768		return rc;
 769
 770	xid = get_xid();
 771	tcon = tlink_tcon(cfile->tlink);
 772	server = tcon->ses->server;
 773
 774	cifs_dbg(FYI, "Freeing private data in close dir\n");
 775	spin_lock(&cifs_file_list_lock);
 776	if (server->ops->dir_needs_close(cfile)) {
 777		cfile->invalidHandle = true;
 778		spin_unlock(&cifs_file_list_lock);
 779		if (server->ops->close_dir)
 780			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 781		else
 782			rc = -ENOSYS;
 783		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 784		/* not much we can do if it fails anyway, ignore rc */
 785		rc = 0;
 786	} else
 787		spin_unlock(&cifs_file_list_lock);
 788
 789	buf = cfile->srch_inf.ntwrk_buf_start;
 790	if (buf) {
 791		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 792		cfile->srch_inf.ntwrk_buf_start = NULL;
 793		if (cfile->srch_inf.smallBuf)
 794			cifs_small_buf_release(buf);
 795		else
 796			cifs_buf_release(buf);
 797	}
 798
 799	cifs_put_tlink(cfile->tlink);
 800	kfree(file->private_data);
 801	file->private_data = NULL;
 802	/* BB can we lock the filestruct while this is going on? */
 803	free_xid(xid);
 804	return rc;
 805}
 806
 807static struct cifsLockInfo *
 808cifs_lock_init(__u64 offset, __u64 length, __u8 type)
 809{
 810	struct cifsLockInfo *lock =
 811		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
 812	if (!lock)
 813		return lock;
 814	lock->offset = offset;
 815	lock->length = length;
 816	lock->type = type;
 817	lock->pid = current->tgid;
 818	INIT_LIST_HEAD(&lock->blist);
 819	init_waitqueue_head(&lock->block_q);
 820	return lock;
 821}
 822
 823void
 824cifs_del_lock_waiters(struct cifsLockInfo *lock)
 825{
 826	struct cifsLockInfo *li, *tmp;
 827	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
 828		list_del_init(&li->blist);
 829		wake_up(&li->block_q);
 830	}
 831}
 832
 833#define CIFS_LOCK_OP	0
 834#define CIFS_READ_OP	1
 835#define CIFS_WRITE_OP	2
 836
 837/* @rw_check : 0 - no op, 1 - read, 2 - write */
 838static bool
 839cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 840			    __u64 length, __u8 type, struct cifsFileInfo *cfile,
 841			    struct cifsLockInfo **conf_lock, int rw_check)
 842{
 843	struct cifsLockInfo *li;
 844	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
 845	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 846
 847	list_for_each_entry(li, &fdlocks->locks, llist) {
 848		if (offset + length <= li->offset ||
 849		    offset >= li->offset + li->length)
 850			continue;
 851		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
 852		    server->ops->compare_fids(cfile, cur_cfile)) {
 853			/* shared lock prevents write op through the same fid */
 854			if (!(li->type & server->vals->shared_lock_type) ||
 855			    rw_check != CIFS_WRITE_OP)
 856				continue;
 857		}
 858		if ((type & server->vals->shared_lock_type) &&
 859		    ((server->ops->compare_fids(cfile, cur_cfile) &&
 860		     current->tgid == li->pid) || type == li->type))
 861			continue;
 862		if (conf_lock)
 863			*conf_lock = li;
 864		return true;
 865	}
 866	return false;
 867}
 868
 869bool
 870cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 871			__u8 type, struct cifsLockInfo **conf_lock,
 872			int rw_check)
 873{
 874	bool rc = false;
 875	struct cifs_fid_locks *cur;
 876	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 877
 878	list_for_each_entry(cur, &cinode->llist, llist) {
 879		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
 880						 cfile, conf_lock, rw_check);
 881		if (rc)
 882			break;
 883	}
 884
 885	return rc;
 886}
 887
 888/*
 889 * Check if there is another lock that prevents us to set the lock (mandatory
 890 * style). If such a lock exists, update the flock structure with its
 891 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 892 * or leave it the same if we can't. Returns 0 if we don't need to request to
 893 * the server or 1 otherwise.
 894 */
 895static int
 896cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 897	       __u8 type, struct file_lock *flock)
 898{
 899	int rc = 0;
 900	struct cifsLockInfo *conf_lock;
 901	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 902	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 903	bool exist;
 904
 905	down_read(&cinode->lock_sem);
 906
 907	exist = cifs_find_lock_conflict(cfile, offset, length, type,
 908					&conf_lock, CIFS_LOCK_OP);
 909	if (exist) {
 910		flock->fl_start = conf_lock->offset;
 911		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
 912		flock->fl_pid = conf_lock->pid;
 913		if (conf_lock->type & server->vals->shared_lock_type)
 914			flock->fl_type = F_RDLCK;
 915		else
 916			flock->fl_type = F_WRLCK;
 917	} else if (!cinode->can_cache_brlcks)
 918		rc = 1;
 919	else
 920		flock->fl_type = F_UNLCK;
 921
 922	up_read(&cinode->lock_sem);
 923	return rc;
 924}
 925
 926static void
 927cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 928{
 929	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 930	down_write(&cinode->lock_sem);
 931	list_add_tail(&lock->llist, &cfile->llist->locks);
 932	up_write(&cinode->lock_sem);
 933}
 934
 935/*
 936 * Set the byte-range lock (mandatory style). Returns:
 937 * 1) 0, if we set the lock and don't need to request to the server;
 938 * 2) 1, if no locks prevent us but we need to request to the server;
 939 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
 940 */
 941static int
 942cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
 943		 bool wait)
 944{
 945	struct cifsLockInfo *conf_lock;
 946	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 947	bool exist;
 948	int rc = 0;
 949
 950try_again:
 951	exist = false;
 952	down_write(&cinode->lock_sem);
 953
 954	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
 955					lock->type, &conf_lock, CIFS_LOCK_OP);
 956	if (!exist && cinode->can_cache_brlcks) {
 957		list_add_tail(&lock->llist, &cfile->llist->locks);
 958		up_write(&cinode->lock_sem);
 959		return rc;
 960	}
 961
 962	if (!exist)
 963		rc = 1;
 964	else if (!wait)
 965		rc = -EACCES;
 966	else {
 967		list_add_tail(&lock->blist, &conf_lock->blist);
 968		up_write(&cinode->lock_sem);
 969		rc = wait_event_interruptible(lock->block_q,
 970					(lock->blist.prev == &lock->blist) &&
 971					(lock->blist.next == &lock->blist));
 972		if (!rc)
 973			goto try_again;
 974		down_write(&cinode->lock_sem);
 975		list_del_init(&lock->blist);
 976	}
 977
 978	up_write(&cinode->lock_sem);
 979	return rc;
 980}
 981
 982/*
 983 * Check if there is another lock that prevents us to set the lock (posix
 984 * style). If such a lock exists, update the flock structure with its
 985 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 986 * or leave it the same if we can't. Returns 0 if we don't need to request to
 987 * the server or 1 otherwise.
 988 */
 989static int
 990cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 991{
 992	int rc = 0;
 993	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
 994	unsigned char saved_type = flock->fl_type;
 995
 996	if ((flock->fl_flags & FL_POSIX) == 0)
 997		return 1;
 998
 999	down_read(&cinode->lock_sem);
1000	posix_test_lock(file, flock);
1001
1002	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1003		flock->fl_type = saved_type;
1004		rc = 1;
1005	}
1006
1007	up_read(&cinode->lock_sem);
1008	return rc;
1009}
1010
1011/*
1012 * Set the byte-range lock (posix style). Returns:
1013 * 1) 0, if we set the lock and don't need to request to the server;
1014 * 2) 1, if we need to request to the server;
1015 * 3) <0, if the error occurs while setting the lock.
1016 */
1017static int
1018cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1019{
1020	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1021	int rc = 1;
1022
1023	if ((flock->fl_flags & FL_POSIX) == 0)
1024		return rc;
1025
1026try_again:
1027	down_write(&cinode->lock_sem);
1028	if (!cinode->can_cache_brlcks) {
1029		up_write(&cinode->lock_sem);
1030		return rc;
1031	}
1032
1033	rc = posix_lock_file(file, flock, NULL);
1034	up_write(&cinode->lock_sem);
1035	if (rc == FILE_LOCK_DEFERRED) {
1036		rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1037		if (!rc)
1038			goto try_again;
1039		posix_unblock_lock(flock);
1040	}
1041	return rc;
1042}
1043
1044int
1045cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1046{
1047	unsigned int xid;
1048	int rc = 0, stored_rc;
1049	struct cifsLockInfo *li, *tmp;
1050	struct cifs_tcon *tcon;
1051	unsigned int num, max_num, max_buf;
1052	LOCKING_ANDX_RANGE *buf, *cur;
1053	int types[] = {LOCKING_ANDX_LARGE_FILES,
1054		       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1055	int i;
1056
1057	xid = get_xid();
1058	tcon = tlink_tcon(cfile->tlink);
1059
1060	/*
1061	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1062	 * and check it for zero before using.
1063	 */
1064	max_buf = tcon->ses->server->maxBuf;
1065	if (!max_buf) {
1066		free_xid(xid);
1067		return -EINVAL;
1068	}
1069
1070	max_num = (max_buf - sizeof(struct smb_hdr)) /
1071						sizeof(LOCKING_ANDX_RANGE);
1072	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1073	if (!buf) {
1074		free_xid(xid);
1075		return -ENOMEM;
1076	}
1077
1078	for (i = 0; i < 2; i++) {
1079		cur = buf;
1080		num = 0;
1081		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1082			if (li->type != types[i])
1083				continue;
1084			cur->Pid = cpu_to_le16(li->pid);
1085			cur->LengthLow = cpu_to_le32((u32)li->length);
1086			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1087			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1088			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1089			if (++num == max_num) {
1090				stored_rc = cifs_lockv(xid, tcon,
1091						       cfile->fid.netfid,
1092						       (__u8)li->type, 0, num,
1093						       buf);
1094				if (stored_rc)
1095					rc = stored_rc;
1096				cur = buf;
1097				num = 0;
1098			} else
1099				cur++;
1100		}
1101
1102		if (num) {
1103			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1104					       (__u8)types[i], 0, num, buf);
1105			if (stored_rc)
1106				rc = stored_rc;
1107		}
1108	}
1109
1110	kfree(buf);
1111	free_xid(xid);
1112	return rc;
1113}
1114
 
 
 
 
 
 
1115struct lock_to_push {
1116	struct list_head llist;
1117	__u64 offset;
1118	__u64 length;
1119	__u32 pid;
1120	__u16 netfid;
1121	__u8 type;
1122};
1123
1124static int
1125cifs_push_posix_locks(struct cifsFileInfo *cfile)
1126{
1127	struct inode *inode = d_inode(cfile->dentry);
1128	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1129	struct file_lock *flock;
1130	struct file_lock_context *flctx = inode->i_flctx;
1131	unsigned int count = 0, i;
1132	int rc = 0, xid, type;
1133	struct list_head locks_to_send, *el;
1134	struct lock_to_push *lck, *tmp;
1135	__u64 length;
1136
1137	xid = get_xid();
1138
1139	if (!flctx)
1140		goto out;
1141
1142	spin_lock(&flctx->flc_lock);
1143	list_for_each(el, &flctx->flc_posix) {
1144		count++;
1145	}
1146	spin_unlock(&flctx->flc_lock);
1147
1148	INIT_LIST_HEAD(&locks_to_send);
1149
1150	/*
1151	 * Allocating count locks is enough because no FL_POSIX locks can be
1152	 * added to the list while we are holding cinode->lock_sem that
1153	 * protects locking operations of this inode.
1154	 */
1155	for (i = 0; i < count; i++) {
1156		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1157		if (!lck) {
1158			rc = -ENOMEM;
1159			goto err_out;
1160		}
1161		list_add_tail(&lck->llist, &locks_to_send);
1162	}
1163
1164	el = locks_to_send.next;
1165	spin_lock(&flctx->flc_lock);
1166	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1167		if (el == &locks_to_send) {
1168			/*
1169			 * The list ended. We don't have enough allocated
1170			 * structures - something is really wrong.
1171			 */
1172			cifs_dbg(VFS, "Can't push all brlocks!\n");
1173			break;
1174		}
1175		length = 1 + flock->fl_end - flock->fl_start;
1176		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1177			type = CIFS_RDLCK;
1178		else
1179			type = CIFS_WRLCK;
1180		lck = list_entry(el, struct lock_to_push, llist);
1181		lck->pid = flock->fl_pid;
1182		lck->netfid = cfile->fid.netfid;
1183		lck->length = length;
1184		lck->type = type;
1185		lck->offset = flock->fl_start;
1186	}
1187	spin_unlock(&flctx->flc_lock);
1188
1189	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1190		int stored_rc;
1191
1192		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1193					     lck->offset, lck->length, NULL,
1194					     lck->type, 0);
1195		if (stored_rc)
1196			rc = stored_rc;
1197		list_del(&lck->llist);
1198		kfree(lck);
1199	}
1200
1201out:
1202	free_xid(xid);
1203	return rc;
1204err_out:
1205	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1206		list_del(&lck->llist);
1207		kfree(lck);
1208	}
1209	goto out;
1210}
1211
1212static int
1213cifs_push_locks(struct cifsFileInfo *cfile)
1214{
1215	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1216	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1217	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1218	int rc = 0;
1219
1220	/* we are going to update can_cache_brlcks here - need a write access */
1221	down_write(&cinode->lock_sem);
1222	if (!cinode->can_cache_brlcks) {
1223		up_write(&cinode->lock_sem);
1224		return rc;
1225	}
1226
1227	if (cap_unix(tcon->ses) &&
1228	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1229	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1230		rc = cifs_push_posix_locks(cfile);
1231	else
1232		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1233
1234	cinode->can_cache_brlcks = false;
1235	up_write(&cinode->lock_sem);
1236	return rc;
1237}
1238
1239static void
1240cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1241		bool *wait_flag, struct TCP_Server_Info *server)
1242{
1243	if (flock->fl_flags & FL_POSIX)
1244		cifs_dbg(FYI, "Posix\n");
1245	if (flock->fl_flags & FL_FLOCK)
1246		cifs_dbg(FYI, "Flock\n");
1247	if (flock->fl_flags & FL_SLEEP) {
1248		cifs_dbg(FYI, "Blocking lock\n");
1249		*wait_flag = true;
1250	}
1251	if (flock->fl_flags & FL_ACCESS)
1252		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1253	if (flock->fl_flags & FL_LEASE)
1254		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1255	if (flock->fl_flags &
1256	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1257	       FL_ACCESS | FL_LEASE | FL_CLOSE)))
1258		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1259
1260	*type = server->vals->large_lock_type;
1261	if (flock->fl_type == F_WRLCK) {
1262		cifs_dbg(FYI, "F_WRLCK\n");
1263		*type |= server->vals->exclusive_lock_type;
1264		*lock = 1;
1265	} else if (flock->fl_type == F_UNLCK) {
1266		cifs_dbg(FYI, "F_UNLCK\n");
1267		*type |= server->vals->unlock_lock_type;
1268		*unlock = 1;
1269		/* Check if unlock includes more than one lock range */
1270	} else if (flock->fl_type == F_RDLCK) {
1271		cifs_dbg(FYI, "F_RDLCK\n");
1272		*type |= server->vals->shared_lock_type;
1273		*lock = 1;
1274	} else if (flock->fl_type == F_EXLCK) {
1275		cifs_dbg(FYI, "F_EXLCK\n");
1276		*type |= server->vals->exclusive_lock_type;
1277		*lock = 1;
1278	} else if (flock->fl_type == F_SHLCK) {
1279		cifs_dbg(FYI, "F_SHLCK\n");
1280		*type |= server->vals->shared_lock_type;
1281		*lock = 1;
1282	} else
1283		cifs_dbg(FYI, "Unknown type of lock\n");
1284}
1285
1286static int
1287cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1288	   bool wait_flag, bool posix_lck, unsigned int xid)
1289{
1290	int rc = 0;
1291	__u64 length = 1 + flock->fl_end - flock->fl_start;
1292	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1293	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1294	struct TCP_Server_Info *server = tcon->ses->server;
1295	__u16 netfid = cfile->fid.netfid;
1296
1297	if (posix_lck) {
1298		int posix_lock_type;
1299
1300		rc = cifs_posix_lock_test(file, flock);
1301		if (!rc)
1302			return rc;
1303
1304		if (type & server->vals->shared_lock_type)
1305			posix_lock_type = CIFS_RDLCK;
1306		else
1307			posix_lock_type = CIFS_WRLCK;
1308		rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
 
1309				      flock->fl_start, length, flock,
1310				      posix_lock_type, wait_flag);
1311		return rc;
1312	}
1313
1314	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1315	if (!rc)
1316		return rc;
1317
1318	/* BB we could chain these into one lock request BB */
1319	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1320				    1, 0, false);
1321	if (rc == 0) {
1322		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1323					    type, 0, 1, false);
1324		flock->fl_type = F_UNLCK;
1325		if (rc != 0)
1326			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1327				 rc);
1328		return 0;
1329	}
1330
1331	if (type & server->vals->shared_lock_type) {
1332		flock->fl_type = F_WRLCK;
1333		return 0;
1334	}
1335
1336	type &= ~server->vals->exclusive_lock_type;
1337
1338	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1339				    type | server->vals->shared_lock_type,
1340				    1, 0, false);
1341	if (rc == 0) {
1342		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1343			type | server->vals->shared_lock_type, 0, 1, false);
1344		flock->fl_type = F_RDLCK;
1345		if (rc != 0)
1346			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1347				 rc);
1348	} else
1349		flock->fl_type = F_WRLCK;
1350
1351	return 0;
1352}
1353
1354void
1355cifs_move_llist(struct list_head *source, struct list_head *dest)
1356{
1357	struct list_head *li, *tmp;
1358	list_for_each_safe(li, tmp, source)
1359		list_move(li, dest);
1360}
1361
1362void
1363cifs_free_llist(struct list_head *llist)
1364{
1365	struct cifsLockInfo *li, *tmp;
1366	list_for_each_entry_safe(li, tmp, llist, llist) {
1367		cifs_del_lock_waiters(li);
1368		list_del(&li->llist);
1369		kfree(li);
1370	}
1371}
1372
1373int
1374cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1375		  unsigned int xid)
1376{
1377	int rc = 0, stored_rc;
1378	int types[] = {LOCKING_ANDX_LARGE_FILES,
1379		       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1380	unsigned int i;
1381	unsigned int max_num, num, max_buf;
1382	LOCKING_ANDX_RANGE *buf, *cur;
1383	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1384	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1385	struct cifsLockInfo *li, *tmp;
1386	__u64 length = 1 + flock->fl_end - flock->fl_start;
1387	struct list_head tmp_llist;
1388
1389	INIT_LIST_HEAD(&tmp_llist);
1390
1391	/*
1392	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1393	 * and check it for zero before using.
1394	 */
1395	max_buf = tcon->ses->server->maxBuf;
1396	if (!max_buf)
1397		return -EINVAL;
1398
1399	max_num = (max_buf - sizeof(struct smb_hdr)) /
1400						sizeof(LOCKING_ANDX_RANGE);
1401	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1402	if (!buf)
1403		return -ENOMEM;
1404
1405	down_write(&cinode->lock_sem);
1406	for (i = 0; i < 2; i++) {
1407		cur = buf;
1408		num = 0;
1409		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1410			if (flock->fl_start > li->offset ||
1411			    (flock->fl_start + length) <
1412			    (li->offset + li->length))
1413				continue;
1414			if (current->tgid != li->pid)
1415				continue;
1416			if (types[i] != li->type)
1417				continue;
1418			if (cinode->can_cache_brlcks) {
1419				/*
1420				 * We can cache brlock requests - simply remove
1421				 * a lock from the file's list.
1422				 */
1423				list_del(&li->llist);
1424				cifs_del_lock_waiters(li);
1425				kfree(li);
1426				continue;
1427			}
1428			cur->Pid = cpu_to_le16(li->pid);
1429			cur->LengthLow = cpu_to_le32((u32)li->length);
1430			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1431			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1432			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1433			/*
1434			 * We need to save a lock here to let us add it again to
1435			 * the file's list if the unlock range request fails on
1436			 * the server.
1437			 */
1438			list_move(&li->llist, &tmp_llist);
1439			if (++num == max_num) {
1440				stored_rc = cifs_lockv(xid, tcon,
1441						       cfile->fid.netfid,
1442						       li->type, num, 0, buf);
1443				if (stored_rc) {
1444					/*
1445					 * We failed on the unlock range
1446					 * request - add all locks from the tmp
1447					 * list to the head of the file's list.
1448					 */
1449					cifs_move_llist(&tmp_llist,
1450							&cfile->llist->locks);
1451					rc = stored_rc;
1452				} else
1453					/*
1454					 * The unlock range request succeed -
1455					 * free the tmp list.
1456					 */
1457					cifs_free_llist(&tmp_llist);
1458				cur = buf;
1459				num = 0;
1460			} else
1461				cur++;
1462		}
1463		if (num) {
1464			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1465					       types[i], num, 0, buf);
1466			if (stored_rc) {
1467				cifs_move_llist(&tmp_llist,
1468						&cfile->llist->locks);
1469				rc = stored_rc;
1470			} else
1471				cifs_free_llist(&tmp_llist);
1472		}
1473	}
1474
1475	up_write(&cinode->lock_sem);
1476	kfree(buf);
1477	return rc;
1478}
1479
1480static int
1481cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1482	   bool wait_flag, bool posix_lck, int lock, int unlock,
1483	   unsigned int xid)
1484{
1485	int rc = 0;
1486	__u64 length = 1 + flock->fl_end - flock->fl_start;
1487	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1488	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1489	struct TCP_Server_Info *server = tcon->ses->server;
1490	struct inode *inode = d_inode(cfile->dentry);
1491
1492	if (posix_lck) {
1493		int posix_lock_type;
1494
1495		rc = cifs_posix_lock_set(file, flock);
1496		if (!rc || rc < 0)
1497			return rc;
1498
1499		if (type & server->vals->shared_lock_type)
1500			posix_lock_type = CIFS_RDLCK;
1501		else
1502			posix_lock_type = CIFS_WRLCK;
1503
1504		if (unlock == 1)
1505			posix_lock_type = CIFS_UNLCK;
1506
1507		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1508				      current->tgid, flock->fl_start, length,
 
1509				      NULL, posix_lock_type, wait_flag);
1510		goto out;
1511	}
1512
1513	if (lock) {
1514		struct cifsLockInfo *lock;
1515
1516		lock = cifs_lock_init(flock->fl_start, length, type);
1517		if (!lock)
1518			return -ENOMEM;
1519
1520		rc = cifs_lock_add_if(cfile, lock, wait_flag);
1521		if (rc < 0) {
1522			kfree(lock);
1523			return rc;
1524		}
1525		if (!rc)
1526			goto out;
1527
1528		/*
1529		 * Windows 7 server can delay breaking lease from read to None
1530		 * if we set a byte-range lock on a file - break it explicitly
1531		 * before sending the lock to the server to be sure the next
1532		 * read won't conflict with non-overlapted locks due to
1533		 * pagereading.
1534		 */
1535		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1536					CIFS_CACHE_READ(CIFS_I(inode))) {
1537			cifs_zap_mapping(inode);
1538			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1539				 inode);
1540			CIFS_I(inode)->oplock = 0;
1541		}
1542
1543		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1544					    type, 1, 0, wait_flag);
1545		if (rc) {
1546			kfree(lock);
1547			return rc;
1548		}
1549
1550		cifs_lock_add(cfile, lock);
1551	} else if (unlock)
1552		rc = server->ops->mand_unlock_range(cfile, flock, xid);
1553
1554out:
1555	if (flock->fl_flags & FL_POSIX && !rc)
1556		rc = locks_lock_file_wait(file, flock);
1557	return rc;
1558}
1559
1560int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1561{
1562	int rc, xid;
1563	int lock = 0, unlock = 0;
1564	bool wait_flag = false;
1565	bool posix_lck = false;
1566	struct cifs_sb_info *cifs_sb;
1567	struct cifs_tcon *tcon;
1568	struct cifsInodeInfo *cinode;
1569	struct cifsFileInfo *cfile;
1570	__u16 netfid;
1571	__u32 type;
1572
1573	rc = -EACCES;
1574	xid = get_xid();
1575
1576	cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1577		 cmd, flock->fl_flags, flock->fl_type,
1578		 flock->fl_start, flock->fl_end);
1579
1580	cfile = (struct cifsFileInfo *)file->private_data;
1581	tcon = tlink_tcon(cfile->tlink);
1582
1583	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1584			tcon->ses->server);
1585
1586	cifs_sb = CIFS_FILE_SB(file);
1587	netfid = cfile->fid.netfid;
1588	cinode = CIFS_I(file_inode(file));
1589
1590	if (cap_unix(tcon->ses) &&
1591	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1592	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1593		posix_lck = true;
1594	/*
1595	 * BB add code here to normalize offset and length to account for
1596	 * negative length which we can not accept over the wire.
1597	 */
1598	if (IS_GETLK(cmd)) {
1599		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1600		free_xid(xid);
1601		return rc;
1602	}
1603
1604	if (!lock && !unlock) {
1605		/*
1606		 * if no lock or unlock then nothing to do since we do not
1607		 * know what it is
1608		 */
1609		free_xid(xid);
1610		return -EOPNOTSUPP;
1611	}
1612
1613	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1614			xid);
1615	free_xid(xid);
1616	return rc;
1617}
1618
1619/*
1620 * update the file size (if needed) after a write. Should be called with
1621 * the inode->i_lock held
1622 */
1623void
1624cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1625		      unsigned int bytes_written)
1626{
1627	loff_t end_of_write = offset + bytes_written;
1628
1629	if (end_of_write > cifsi->server_eof)
1630		cifsi->server_eof = end_of_write;
1631}
1632
1633static ssize_t
1634cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1635	   size_t write_size, loff_t *offset)
1636{
1637	int rc = 0;
1638	unsigned int bytes_written = 0;
1639	unsigned int total_written;
1640	struct cifs_sb_info *cifs_sb;
1641	struct cifs_tcon *tcon;
1642	struct TCP_Server_Info *server;
1643	unsigned int xid;
1644	struct dentry *dentry = open_file->dentry;
1645	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1646	struct cifs_io_parms io_parms;
1647
1648	cifs_sb = CIFS_SB(dentry->d_sb);
1649
1650	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1651		 write_size, *offset, dentry);
1652
1653	tcon = tlink_tcon(open_file->tlink);
1654	server = tcon->ses->server;
1655
1656	if (!server->ops->sync_write)
1657		return -ENOSYS;
1658
1659	xid = get_xid();
1660
1661	for (total_written = 0; write_size > total_written;
1662	     total_written += bytes_written) {
1663		rc = -EAGAIN;
1664		while (rc == -EAGAIN) {
1665			struct kvec iov[2];
1666			unsigned int len;
1667
1668			if (open_file->invalidHandle) {
1669				/* we could deadlock if we called
1670				   filemap_fdatawait from here so tell
1671				   reopen_file not to flush data to
1672				   server now */
1673				rc = cifs_reopen_file(open_file, false);
1674				if (rc != 0)
1675					break;
1676			}
1677
1678			len = min(server->ops->wp_retry_size(d_inode(dentry)),
1679				  (unsigned int)write_size - total_written);
1680			/* iov[0] is reserved for smb header */
1681			iov[1].iov_base = (char *)write_data + total_written;
1682			iov[1].iov_len = len;
1683			io_parms.pid = pid;
1684			io_parms.tcon = tcon;
1685			io_parms.offset = *offset;
1686			io_parms.length = len;
1687			rc = server->ops->sync_write(xid, &open_file->fid,
1688					&io_parms, &bytes_written, iov, 1);
1689		}
1690		if (rc || (bytes_written == 0)) {
1691			if (total_written)
1692				break;
1693			else {
1694				free_xid(xid);
1695				return rc;
1696			}
1697		} else {
1698			spin_lock(&d_inode(dentry)->i_lock);
1699			cifs_update_eof(cifsi, *offset, bytes_written);
1700			spin_unlock(&d_inode(dentry)->i_lock);
1701			*offset += bytes_written;
1702		}
1703	}
1704
1705	cifs_stats_bytes_written(tcon, total_written);
1706
1707	if (total_written > 0) {
1708		spin_lock(&d_inode(dentry)->i_lock);
1709		if (*offset > d_inode(dentry)->i_size)
1710			i_size_write(d_inode(dentry), *offset);
1711		spin_unlock(&d_inode(dentry)->i_lock);
1712	}
1713	mark_inode_dirty_sync(d_inode(dentry));
1714	free_xid(xid);
1715	return total_written;
1716}
1717
1718struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1719					bool fsuid_only)
1720{
1721	struct cifsFileInfo *open_file = NULL;
1722	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
 
1723
1724	/* only filter by fsuid on multiuser mounts */
1725	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1726		fsuid_only = false;
1727
1728	spin_lock(&cifs_file_list_lock);
1729	/* we could simply get the first_list_entry since write-only entries
1730	   are always at the end of the list but since the first entry might
1731	   have a close pending, we go through the whole list */
1732	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1733		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1734			continue;
1735		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1736			if (!open_file->invalidHandle) {
1737				/* found a good file */
1738				/* lock it so it will not be closed on us */
1739				cifsFileInfo_get_locked(open_file);
1740				spin_unlock(&cifs_file_list_lock);
1741				return open_file;
1742			} /* else might as well continue, and look for
1743			     another, or simply have the caller reopen it
1744			     again rather than trying to fix this handle */
1745		} else /* write only file */
1746			break; /* write only files are last so must be done */
1747	}
1748	spin_unlock(&cifs_file_list_lock);
1749	return NULL;
1750}
1751
1752struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1753					bool fsuid_only)
1754{
1755	struct cifsFileInfo *open_file, *inv_file = NULL;
1756	struct cifs_sb_info *cifs_sb;
 
1757	bool any_available = false;
1758	int rc;
1759	unsigned int refind = 0;
1760
1761	/* Having a null inode here (because mapping->host was set to zero by
1762	the VFS or MM) should not happen but we had reports of on oops (due to
1763	it being zero) during stress testcases so we need to check for it */
1764
1765	if (cifs_inode == NULL) {
1766		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1767		dump_stack();
1768		return NULL;
1769	}
1770
1771	cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
 
1772
1773	/* only filter by fsuid on multiuser mounts */
1774	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1775		fsuid_only = false;
1776
1777	spin_lock(&cifs_file_list_lock);
1778refind_writable:
1779	if (refind > MAX_REOPEN_ATT) {
1780		spin_unlock(&cifs_file_list_lock);
1781		return NULL;
1782	}
1783	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1784		if (!any_available && open_file->pid != current->tgid)
1785			continue;
1786		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1787			continue;
1788		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1789			if (!open_file->invalidHandle) {
1790				/* found a good writable file */
1791				cifsFileInfo_get_locked(open_file);
1792				spin_unlock(&cifs_file_list_lock);
1793				return open_file;
1794			} else {
1795				if (!inv_file)
1796					inv_file = open_file;
1797			}
1798		}
1799	}
1800	/* couldn't find useable FH with same pid, try any available */
1801	if (!any_available) {
1802		any_available = true;
1803		goto refind_writable;
1804	}
1805
1806	if (inv_file) {
1807		any_available = false;
1808		cifsFileInfo_get_locked(inv_file);
1809	}
1810
1811	spin_unlock(&cifs_file_list_lock);
1812
1813	if (inv_file) {
1814		rc = cifs_reopen_file(inv_file, false);
1815		if (!rc)
1816			return inv_file;
1817		else {
1818			spin_lock(&cifs_file_list_lock);
1819			list_move_tail(&inv_file->flist,
1820					&cifs_inode->openFileList);
1821			spin_unlock(&cifs_file_list_lock);
1822			cifsFileInfo_put(inv_file);
1823			spin_lock(&cifs_file_list_lock);
1824			++refind;
1825			inv_file = NULL;
 
1826			goto refind_writable;
1827		}
1828	}
1829
1830	return NULL;
1831}
1832
1833static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1834{
1835	struct address_space *mapping = page->mapping;
1836	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1837	char *write_data;
1838	int rc = -EFAULT;
1839	int bytes_written = 0;
1840	struct inode *inode;
1841	struct cifsFileInfo *open_file;
1842
1843	if (!mapping || !mapping->host)
1844		return -EFAULT;
1845
1846	inode = page->mapping->host;
1847
1848	offset += (loff_t)from;
1849	write_data = kmap(page);
1850	write_data += from;
1851
1852	if ((to > PAGE_SIZE) || (from > to)) {
1853		kunmap(page);
1854		return -EIO;
1855	}
1856
1857	/* racing with truncate? */
1858	if (offset > mapping->host->i_size) {
1859		kunmap(page);
1860		return 0; /* don't care */
1861	}
1862
1863	/* check to make sure that we are not extending the file */
1864	if (mapping->host->i_size - offset < (loff_t)to)
1865		to = (unsigned)(mapping->host->i_size - offset);
1866
1867	open_file = find_writable_file(CIFS_I(mapping->host), false);
1868	if (open_file) {
1869		bytes_written = cifs_write(open_file, open_file->pid,
1870					   write_data, to - from, &offset);
1871		cifsFileInfo_put(open_file);
1872		/* Does mm or vfs already set times? */
1873		inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1874		if ((bytes_written > 0) && (offset))
1875			rc = 0;
1876		else if (bytes_written < 0)
1877			rc = bytes_written;
1878	} else {
1879		cifs_dbg(FYI, "No writeable filehandles for inode\n");
1880		rc = -EIO;
1881	}
1882
1883	kunmap(page);
1884	return rc;
1885}
1886
1887static struct cifs_writedata *
1888wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1889			  pgoff_t end, pgoff_t *index,
1890			  unsigned int *found_pages)
1891{
1892	unsigned int nr_pages;
1893	struct page **pages;
1894	struct cifs_writedata *wdata;
1895
1896	wdata = cifs_writedata_alloc((unsigned int)tofind,
1897				     cifs_writev_complete);
1898	if (!wdata)
1899		return NULL;
1900
1901	/*
1902	 * find_get_pages_tag seems to return a max of 256 on each
1903	 * iteration, so we must call it several times in order to
1904	 * fill the array or the wsize is effectively limited to
1905	 * 256 * PAGE_SIZE.
1906	 */
1907	*found_pages = 0;
1908	pages = wdata->pages;
1909	do {
1910		nr_pages = find_get_pages_tag(mapping, index,
1911					      PAGECACHE_TAG_DIRTY, tofind,
1912					      pages);
1913		*found_pages += nr_pages;
1914		tofind -= nr_pages;
1915		pages += nr_pages;
1916	} while (nr_pages && tofind && *index <= end);
1917
1918	return wdata;
1919}
1920
1921static unsigned int
1922wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1923		    struct address_space *mapping,
1924		    struct writeback_control *wbc,
1925		    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1926{
1927	unsigned int nr_pages = 0, i;
1928	struct page *page;
1929
1930	for (i = 0; i < found_pages; i++) {
1931		page = wdata->pages[i];
1932		/*
1933		 * At this point we hold neither mapping->tree_lock nor
1934		 * lock on the page itself: the page may be truncated or
1935		 * invalidated (changing page->mapping to NULL), or even
1936		 * swizzled back from swapper_space to tmpfs file
1937		 * mapping
1938		 */
1939
1940		if (nr_pages == 0)
1941			lock_page(page);
1942		else if (!trylock_page(page))
1943			break;
1944
1945		if (unlikely(page->mapping != mapping)) {
1946			unlock_page(page);
1947			break;
1948		}
1949
1950		if (!wbc->range_cyclic && page->index > end) {
1951			*done = true;
1952			unlock_page(page);
1953			break;
1954		}
1955
1956		if (*next && (page->index != *next)) {
1957			/* Not next consecutive page */
1958			unlock_page(page);
1959			break;
1960		}
1961
1962		if (wbc->sync_mode != WB_SYNC_NONE)
1963			wait_on_page_writeback(page);
1964
1965		if (PageWriteback(page) ||
1966				!clear_page_dirty_for_io(page)) {
1967			unlock_page(page);
1968			break;
1969		}
1970
1971		/*
1972		 * This actually clears the dirty bit in the radix tree.
1973		 * See cifs_writepage() for more commentary.
1974		 */
1975		set_page_writeback(page);
1976		if (page_offset(page) >= i_size_read(mapping->host)) {
1977			*done = true;
1978			unlock_page(page);
1979			end_page_writeback(page);
1980			break;
1981		}
1982
1983		wdata->pages[i] = page;
1984		*next = page->index + 1;
1985		++nr_pages;
1986	}
1987
1988	/* reset index to refind any pages skipped */
1989	if (nr_pages == 0)
1990		*index = wdata->pages[0]->index + 1;
1991
1992	/* put any pages we aren't going to use */
1993	for (i = nr_pages; i < found_pages; i++) {
1994		put_page(wdata->pages[i]);
1995		wdata->pages[i] = NULL;
1996	}
1997
1998	return nr_pages;
1999}
2000
2001static int
2002wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2003		 struct address_space *mapping, struct writeback_control *wbc)
2004{
2005	int rc = 0;
2006	struct TCP_Server_Info *server;
2007	unsigned int i;
2008
2009	wdata->sync_mode = wbc->sync_mode;
2010	wdata->nr_pages = nr_pages;
2011	wdata->offset = page_offset(wdata->pages[0]);
2012	wdata->pagesz = PAGE_SIZE;
2013	wdata->tailsz = min(i_size_read(mapping->host) -
2014			page_offset(wdata->pages[nr_pages - 1]),
2015			(loff_t)PAGE_SIZE);
2016	wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2017
2018	if (wdata->cfile != NULL)
2019		cifsFileInfo_put(wdata->cfile);
2020	wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2021	if (!wdata->cfile) {
2022		cifs_dbg(VFS, "No writable handles for inode\n");
2023		rc = -EBADF;
2024	} else {
2025		wdata->pid = wdata->cfile->pid;
2026		server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2027		rc = server->ops->async_writev(wdata, cifs_writedata_release);
2028	}
2029
2030	for (i = 0; i < nr_pages; ++i)
2031		unlock_page(wdata->pages[i]);
2032
2033	return rc;
2034}
2035
2036static int cifs_writepages(struct address_space *mapping,
2037			   struct writeback_control *wbc)
2038{
2039	struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2040	struct TCP_Server_Info *server;
2041	bool done = false, scanned = false, range_whole = false;
2042	pgoff_t end, index;
2043	struct cifs_writedata *wdata;
2044	int rc = 0;
2045
2046	/*
2047	 * If wsize is smaller than the page cache size, default to writing
2048	 * one page at a time via cifs_writepage
2049	 */
2050	if (cifs_sb->wsize < PAGE_SIZE)
2051		return generic_writepages(mapping, wbc);
2052
2053	if (wbc->range_cyclic) {
2054		index = mapping->writeback_index; /* Start from prev offset */
2055		end = -1;
2056	} else {
2057		index = wbc->range_start >> PAGE_SHIFT;
2058		end = wbc->range_end >> PAGE_SHIFT;
2059		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2060			range_whole = true;
2061		scanned = true;
2062	}
2063	server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2064retry:
2065	while (!done && index <= end) {
2066		unsigned int i, nr_pages, found_pages, wsize, credits;
2067		pgoff_t next = 0, tofind, saved_index = index;
2068
2069		rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2070						   &wsize, &credits);
2071		if (rc)
2072			break;
2073
2074		tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2075
2076		wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2077						  &found_pages);
2078		if (!wdata) {
2079			rc = -ENOMEM;
2080			add_credits_and_wake_if(server, credits, 0);
2081			break;
2082		}
2083
2084		if (found_pages == 0) {
2085			kref_put(&wdata->refcount, cifs_writedata_release);
2086			add_credits_and_wake_if(server, credits, 0);
2087			break;
2088		}
2089
2090		nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2091					       end, &index, &next, &done);
2092
2093		/* nothing to write? */
2094		if (nr_pages == 0) {
2095			kref_put(&wdata->refcount, cifs_writedata_release);
2096			add_credits_and_wake_if(server, credits, 0);
2097			continue;
2098		}
2099
2100		wdata->credits = credits;
2101
2102		rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2103
2104		/* send failure -- clean up the mess */
2105		if (rc != 0) {
2106			add_credits_and_wake_if(server, wdata->credits, 0);
2107			for (i = 0; i < nr_pages; ++i) {
2108				if (rc == -EAGAIN)
2109					redirty_page_for_writepage(wbc,
2110							   wdata->pages[i]);
2111				else
2112					SetPageError(wdata->pages[i]);
2113				end_page_writeback(wdata->pages[i]);
2114				put_page(wdata->pages[i]);
2115			}
2116			if (rc != -EAGAIN)
2117				mapping_set_error(mapping, rc);
2118		}
2119		kref_put(&wdata->refcount, cifs_writedata_release);
2120
2121		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2122			index = saved_index;
2123			continue;
2124		}
2125
2126		wbc->nr_to_write -= nr_pages;
2127		if (wbc->nr_to_write <= 0)
2128			done = true;
2129
2130		index = next;
2131	}
2132
2133	if (!scanned && !done) {
2134		/*
2135		 * We hit the last page and there is more work to be done: wrap
2136		 * back to the start of the file
2137		 */
2138		scanned = true;
2139		index = 0;
2140		goto retry;
2141	}
2142
2143	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2144		mapping->writeback_index = index;
2145
2146	return rc;
2147}
2148
2149static int
2150cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2151{
2152	int rc;
2153	unsigned int xid;
2154
2155	xid = get_xid();
2156/* BB add check for wbc flags */
2157	get_page(page);
2158	if (!PageUptodate(page))
2159		cifs_dbg(FYI, "ppw - page not up to date\n");
2160
2161	/*
2162	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2163	 *
2164	 * A writepage() implementation always needs to do either this,
2165	 * or re-dirty the page with "redirty_page_for_writepage()" in
2166	 * the case of a failure.
2167	 *
2168	 * Just unlocking the page will cause the radix tree tag-bits
2169	 * to fail to update with the state of the page correctly.
2170	 */
2171	set_page_writeback(page);
2172retry_write:
2173	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2174	if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2175		goto retry_write;
2176	else if (rc == -EAGAIN)
2177		redirty_page_for_writepage(wbc, page);
2178	else if (rc != 0)
2179		SetPageError(page);
2180	else
2181		SetPageUptodate(page);
2182	end_page_writeback(page);
2183	put_page(page);
2184	free_xid(xid);
2185	return rc;
2186}
2187
2188static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2189{
2190	int rc = cifs_writepage_locked(page, wbc);
2191	unlock_page(page);
2192	return rc;
2193}
2194
2195static int cifs_write_end(struct file *file, struct address_space *mapping,
2196			loff_t pos, unsigned len, unsigned copied,
2197			struct page *page, void *fsdata)
2198{
2199	int rc;
2200	struct inode *inode = mapping->host;
2201	struct cifsFileInfo *cfile = file->private_data;
2202	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2203	__u32 pid;
2204
2205	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2206		pid = cfile->pid;
2207	else
2208		pid = current->tgid;
2209
2210	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2211		 page, pos, copied);
2212
2213	if (PageChecked(page)) {
2214		if (copied == len)
2215			SetPageUptodate(page);
2216		ClearPageChecked(page);
2217	} else if (!PageUptodate(page) && copied == PAGE_SIZE)
2218		SetPageUptodate(page);
2219
2220	if (!PageUptodate(page)) {
2221		char *page_data;
2222		unsigned offset = pos & (PAGE_SIZE - 1);
2223		unsigned int xid;
2224
2225		xid = get_xid();
2226		/* this is probably better than directly calling
2227		   partialpage_write since in this function the file handle is
2228		   known which we might as well	leverage */
2229		/* BB check if anything else missing out of ppw
2230		   such as updating last write time */
2231		page_data = kmap(page);
2232		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2233		/* if (rc < 0) should we set writebehind rc? */
2234		kunmap(page);
2235
2236		free_xid(xid);
2237	} else {
2238		rc = copied;
2239		pos += copied;
2240		set_page_dirty(page);
2241	}
2242
2243	if (rc > 0) {
2244		spin_lock(&inode->i_lock);
2245		if (pos > inode->i_size)
2246			i_size_write(inode, pos);
2247		spin_unlock(&inode->i_lock);
2248	}
2249
2250	unlock_page(page);
2251	put_page(page);
2252
2253	return rc;
2254}
2255
2256int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2257		      int datasync)
2258{
2259	unsigned int xid;
2260	int rc = 0;
2261	struct cifs_tcon *tcon;
2262	struct TCP_Server_Info *server;
2263	struct cifsFileInfo *smbfile = file->private_data;
2264	struct inode *inode = file_inode(file);
2265	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2266
2267	rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2268	if (rc)
2269		return rc;
2270	inode_lock(inode);
2271
2272	xid = get_xid();
2273
2274	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2275		 file, datasync);
2276
2277	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2278		rc = cifs_zap_mapping(inode);
2279		if (rc) {
2280			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2281			rc = 0; /* don't care about it in fsync */
2282		}
2283	}
2284
2285	tcon = tlink_tcon(smbfile->tlink);
2286	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2287		server = tcon->ses->server;
2288		if (server->ops->flush)
2289			rc = server->ops->flush(xid, tcon, &smbfile->fid);
2290		else
2291			rc = -ENOSYS;
2292	}
2293
2294	free_xid(xid);
2295	inode_unlock(inode);
2296	return rc;
2297}
2298
2299int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2300{
2301	unsigned int xid;
2302	int rc = 0;
2303	struct cifs_tcon *tcon;
2304	struct TCP_Server_Info *server;
2305	struct cifsFileInfo *smbfile = file->private_data;
2306	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2307	struct inode *inode = file->f_mapping->host;
2308
2309	rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2310	if (rc)
2311		return rc;
2312	inode_lock(inode);
2313
2314	xid = get_xid();
2315
2316	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2317		 file, datasync);
2318
2319	tcon = tlink_tcon(smbfile->tlink);
2320	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2321		server = tcon->ses->server;
2322		if (server->ops->flush)
2323			rc = server->ops->flush(xid, tcon, &smbfile->fid);
2324		else
2325			rc = -ENOSYS;
2326	}
2327
2328	free_xid(xid);
2329	inode_unlock(inode);
2330	return rc;
2331}
2332
2333/*
2334 * As file closes, flush all cached write data for this inode checking
2335 * for write behind errors.
2336 */
2337int cifs_flush(struct file *file, fl_owner_t id)
2338{
2339	struct inode *inode = file_inode(file);
2340	int rc = 0;
2341
2342	if (file->f_mode & FMODE_WRITE)
2343		rc = filemap_write_and_wait(inode->i_mapping);
2344
2345	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2346
2347	return rc;
2348}
2349
2350static int
2351cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2352{
2353	int rc = 0;
2354	unsigned long i;
2355
2356	for (i = 0; i < num_pages; i++) {
2357		pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2358		if (!pages[i]) {
2359			/*
2360			 * save number of pages we have already allocated and
2361			 * return with ENOMEM error
2362			 */
2363			num_pages = i;
2364			rc = -ENOMEM;
2365			break;
2366		}
2367	}
2368
2369	if (rc) {
2370		for (i = 0; i < num_pages; i++)
2371			put_page(pages[i]);
2372	}
2373	return rc;
2374}
2375
2376static inline
2377size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2378{
2379	size_t num_pages;
2380	size_t clen;
2381
2382	clen = min_t(const size_t, len, wsize);
2383	num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2384
2385	if (cur_len)
2386		*cur_len = clen;
2387
2388	return num_pages;
2389}
2390
2391static void
2392cifs_uncached_writedata_release(struct kref *refcount)
2393{
2394	int i;
2395	struct cifs_writedata *wdata = container_of(refcount,
2396					struct cifs_writedata, refcount);
2397
2398	for (i = 0; i < wdata->nr_pages; i++)
2399		put_page(wdata->pages[i]);
2400	cifs_writedata_release(refcount);
2401}
2402
2403static void
2404cifs_uncached_writev_complete(struct work_struct *work)
2405{
2406	struct cifs_writedata *wdata = container_of(work,
2407					struct cifs_writedata, work);
2408	struct inode *inode = d_inode(wdata->cfile->dentry);
2409	struct cifsInodeInfo *cifsi = CIFS_I(inode);
2410
2411	spin_lock(&inode->i_lock);
2412	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2413	if (cifsi->server_eof > inode->i_size)
2414		i_size_write(inode, cifsi->server_eof);
2415	spin_unlock(&inode->i_lock);
2416
2417	complete(&wdata->done);
2418
2419	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2420}
2421
2422static int
2423wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2424		      size_t *len, unsigned long *num_pages)
2425{
2426	size_t save_len, copied, bytes, cur_len = *len;
2427	unsigned long i, nr_pages = *num_pages;
2428
2429	save_len = cur_len;
2430	for (i = 0; i < nr_pages; i++) {
2431		bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2432		copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2433		cur_len -= copied;
2434		/*
2435		 * If we didn't copy as much as we expected, then that
2436		 * may mean we trod into an unmapped area. Stop copying
2437		 * at that point. On the next pass through the big
2438		 * loop, we'll likely end up getting a zero-length
2439		 * write and bailing out of it.
2440		 */
2441		if (copied < bytes)
2442			break;
2443	}
2444	cur_len = save_len - cur_len;
2445	*len = cur_len;
2446
2447	/*
2448	 * If we have no data to send, then that probably means that
2449	 * the copy above failed altogether. That's most likely because
2450	 * the address in the iovec was bogus. Return -EFAULT and let
2451	 * the caller free anything we allocated and bail out.
2452	 */
2453	if (!cur_len)
2454		return -EFAULT;
2455
2456	/*
2457	 * i + 1 now represents the number of pages we actually used in
2458	 * the copy phase above.
2459	 */
2460	*num_pages = i + 1;
2461	return 0;
2462}
2463
2464static int
2465cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2466		     struct cifsFileInfo *open_file,
2467		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2468{
2469	int rc = 0;
2470	size_t cur_len;
2471	unsigned long nr_pages, num_pages, i;
2472	struct cifs_writedata *wdata;
2473	struct iov_iter saved_from;
2474	loff_t saved_offset = offset;
2475	pid_t pid;
2476	struct TCP_Server_Info *server;
2477
2478	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2479		pid = open_file->pid;
2480	else
2481		pid = current->tgid;
2482
2483	server = tlink_tcon(open_file->tlink)->ses->server;
2484	memcpy(&saved_from, from, sizeof(struct iov_iter));
2485
2486	do {
2487		unsigned int wsize, credits;
2488
2489		rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2490						   &wsize, &credits);
2491		if (rc)
2492			break;
2493
2494		nr_pages = get_numpages(wsize, len, &cur_len);
2495		wdata = cifs_writedata_alloc(nr_pages,
2496					     cifs_uncached_writev_complete);
2497		if (!wdata) {
2498			rc = -ENOMEM;
2499			add_credits_and_wake_if(server, credits, 0);
2500			break;
2501		}
2502
2503		rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2504		if (rc) {
2505			kfree(wdata);
2506			add_credits_and_wake_if(server, credits, 0);
2507			break;
2508		}
2509
2510		num_pages = nr_pages;
2511		rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2512		if (rc) {
2513			for (i = 0; i < nr_pages; i++)
2514				put_page(wdata->pages[i]);
2515			kfree(wdata);
2516			add_credits_and_wake_if(server, credits, 0);
2517			break;
2518		}
2519
2520		/*
2521		 * Bring nr_pages down to the number of pages we actually used,
2522		 * and free any pages that we didn't use.
2523		 */
2524		for ( ; nr_pages > num_pages; nr_pages--)
2525			put_page(wdata->pages[nr_pages - 1]);
2526
2527		wdata->sync_mode = WB_SYNC_ALL;
2528		wdata->nr_pages = nr_pages;
2529		wdata->offset = (__u64)offset;
2530		wdata->cfile = cifsFileInfo_get(open_file);
2531		wdata->pid = pid;
2532		wdata->bytes = cur_len;
2533		wdata->pagesz = PAGE_SIZE;
2534		wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2535		wdata->credits = credits;
2536
2537		if (!wdata->cfile->invalidHandle ||
2538		    !cifs_reopen_file(wdata->cfile, false))
2539			rc = server->ops->async_writev(wdata,
2540					cifs_uncached_writedata_release);
2541		if (rc) {
2542			add_credits_and_wake_if(server, wdata->credits, 0);
2543			kref_put(&wdata->refcount,
2544				 cifs_uncached_writedata_release);
2545			if (rc == -EAGAIN) {
2546				memcpy(from, &saved_from,
2547				       sizeof(struct iov_iter));
2548				iov_iter_advance(from, offset - saved_offset);
2549				continue;
2550			}
2551			break;
2552		}
2553
2554		list_add_tail(&wdata->list, wdata_list);
2555		offset += cur_len;
2556		len -= cur_len;
2557	} while (len > 0);
2558
2559	return rc;
2560}
2561
2562ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2563{
2564	struct file *file = iocb->ki_filp;
2565	ssize_t total_written = 0;
2566	struct cifsFileInfo *open_file;
2567	struct cifs_tcon *tcon;
2568	struct cifs_sb_info *cifs_sb;
2569	struct cifs_writedata *wdata, *tmp;
2570	struct list_head wdata_list;
2571	struct iov_iter saved_from;
2572	int rc;
2573
2574	/*
2575	 * BB - optimize the way when signing is disabled. We can drop this
2576	 * extra memory-to-memory copying and use iovec buffers for constructing
2577	 * write request.
2578	 */
2579
2580	rc = generic_write_checks(iocb, from);
2581	if (rc <= 0)
2582		return rc;
2583
2584	INIT_LIST_HEAD(&wdata_list);
2585	cifs_sb = CIFS_FILE_SB(file);
2586	open_file = file->private_data;
2587	tcon = tlink_tcon(open_file->tlink);
2588
2589	if (!tcon->ses->server->ops->async_writev)
2590		return -ENOSYS;
2591
2592	memcpy(&saved_from, from, sizeof(struct iov_iter));
2593
2594	rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2595				  open_file, cifs_sb, &wdata_list);
2596
2597	/*
2598	 * If at least one write was successfully sent, then discard any rc
2599	 * value from the later writes. If the other write succeeds, then
2600	 * we'll end up returning whatever was written. If it fails, then
2601	 * we'll get a new rc value from that.
2602	 */
2603	if (!list_empty(&wdata_list))
2604		rc = 0;
2605
2606	/*
2607	 * Wait for and collect replies for any successful sends in order of
2608	 * increasing offset. Once an error is hit or we get a fatal signal
2609	 * while waiting, then return without waiting for any more replies.
2610	 */
2611restart_loop:
2612	list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2613		if (!rc) {
2614			/* FIXME: freezable too? */
2615			rc = wait_for_completion_killable(&wdata->done);
2616			if (rc)
2617				rc = -EINTR;
2618			else if (wdata->result)
2619				rc = wdata->result;
2620			else
2621				total_written += wdata->bytes;
2622
2623			/* resend call if it's a retryable error */
2624			if (rc == -EAGAIN) {
2625				struct list_head tmp_list;
2626				struct iov_iter tmp_from;
2627
2628				INIT_LIST_HEAD(&tmp_list);
2629				list_del_init(&wdata->list);
2630
2631				memcpy(&tmp_from, &saved_from,
2632				       sizeof(struct iov_iter));
2633				iov_iter_advance(&tmp_from,
2634						 wdata->offset - iocb->ki_pos);
2635
2636				rc = cifs_write_from_iter(wdata->offset,
2637						wdata->bytes, &tmp_from,
2638						open_file, cifs_sb, &tmp_list);
2639
2640				list_splice(&tmp_list, &wdata_list);
2641
2642				kref_put(&wdata->refcount,
2643					 cifs_uncached_writedata_release);
2644				goto restart_loop;
2645			}
2646		}
2647		list_del_init(&wdata->list);
2648		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2649	}
2650
2651	if (unlikely(!total_written))
2652		return rc;
2653
2654	iocb->ki_pos += total_written;
2655	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2656	cifs_stats_bytes_written(tcon, total_written);
2657	return total_written;
2658}
2659
2660static ssize_t
2661cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2662{
2663	struct file *file = iocb->ki_filp;
2664	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2665	struct inode *inode = file->f_mapping->host;
2666	struct cifsInodeInfo *cinode = CIFS_I(inode);
2667	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2668	ssize_t rc;
2669
2670	/*
2671	 * We need to hold the sem to be sure nobody modifies lock list
2672	 * with a brlock that prevents writing.
2673	 */
2674	down_read(&cinode->lock_sem);
2675	inode_lock(inode);
2676
2677	rc = generic_write_checks(iocb, from);
2678	if (rc <= 0)
2679		goto out;
2680
2681	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2682				     server->vals->exclusive_lock_type, NULL,
2683				     CIFS_WRITE_OP))
2684		rc = __generic_file_write_iter(iocb, from);
2685	else
2686		rc = -EACCES;
2687out:
2688	inode_unlock(inode);
2689
2690	if (rc > 0) {
2691		ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2692		if (err < 0)
2693			rc = err;
2694	}
2695	up_read(&cinode->lock_sem);
2696	return rc;
2697}
2698
2699ssize_t
2700cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2701{
2702	struct inode *inode = file_inode(iocb->ki_filp);
2703	struct cifsInodeInfo *cinode = CIFS_I(inode);
2704	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2705	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2706						iocb->ki_filp->private_data;
2707	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2708	ssize_t written;
2709
2710	written = cifs_get_writer(cinode);
2711	if (written)
2712		return written;
2713
2714	if (CIFS_CACHE_WRITE(cinode)) {
2715		if (cap_unix(tcon->ses) &&
2716		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2717		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2718			written = generic_file_write_iter(iocb, from);
2719			goto out;
2720		}
2721		written = cifs_writev(iocb, from);
2722		goto out;
2723	}
2724	/*
2725	 * For non-oplocked files in strict cache mode we need to write the data
2726	 * to the server exactly from the pos to pos+len-1 rather than flush all
2727	 * affected pages because it may cause a error with mandatory locks on
2728	 * these pages but not on the region from pos to ppos+len-1.
2729	 */
2730	written = cifs_user_writev(iocb, from);
2731	if (written > 0 && CIFS_CACHE_READ(cinode)) {
2732		/*
2733		 * Windows 7 server can delay breaking level2 oplock if a write
2734		 * request comes - break it on the client to prevent reading
2735		 * an old data.
2736		 */
2737		cifs_zap_mapping(inode);
2738		cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2739			 inode);
2740		cinode->oplock = 0;
2741	}
2742out:
2743	cifs_put_writer(cinode);
2744	return written;
2745}
2746
2747static struct cifs_readdata *
2748cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2749{
2750	struct cifs_readdata *rdata;
2751
2752	rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2753			GFP_KERNEL);
2754	if (rdata != NULL) {
2755		kref_init(&rdata->refcount);
2756		INIT_LIST_HEAD(&rdata->list);
2757		init_completion(&rdata->done);
2758		INIT_WORK(&rdata->work, complete);
2759	}
2760
2761	return rdata;
2762}
2763
2764void
2765cifs_readdata_release(struct kref *refcount)
2766{
2767	struct cifs_readdata *rdata = container_of(refcount,
2768					struct cifs_readdata, refcount);
2769
2770	if (rdata->cfile)
2771		cifsFileInfo_put(rdata->cfile);
2772
2773	kfree(rdata);
2774}
2775
2776static int
2777cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2778{
2779	int rc = 0;
2780	struct page *page;
2781	unsigned int i;
2782
2783	for (i = 0; i < nr_pages; i++) {
2784		page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2785		if (!page) {
2786			rc = -ENOMEM;
2787			break;
2788		}
2789		rdata->pages[i] = page;
2790	}
2791
2792	if (rc) {
2793		for (i = 0; i < nr_pages; i++) {
2794			put_page(rdata->pages[i]);
2795			rdata->pages[i] = NULL;
2796		}
2797	}
2798	return rc;
2799}
2800
2801static void
2802cifs_uncached_readdata_release(struct kref *refcount)
2803{
2804	struct cifs_readdata *rdata = container_of(refcount,
2805					struct cifs_readdata, refcount);
2806	unsigned int i;
2807
2808	for (i = 0; i < rdata->nr_pages; i++) {
2809		put_page(rdata->pages[i]);
2810		rdata->pages[i] = NULL;
2811	}
2812	cifs_readdata_release(refcount);
2813}
2814
2815/**
2816 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2817 * @rdata:	the readdata response with list of pages holding data
2818 * @iter:	destination for our data
2819 *
2820 * This function copies data from a list of pages in a readdata response into
2821 * an array of iovecs. It will first calculate where the data should go
2822 * based on the info in the readdata and then copy the data into that spot.
2823 */
2824static int
2825cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2826{
2827	size_t remaining = rdata->got_bytes;
2828	unsigned int i;
2829
2830	for (i = 0; i < rdata->nr_pages; i++) {
2831		struct page *page = rdata->pages[i];
2832		size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2833		size_t written = copy_page_to_iter(page, 0, copy, iter);
 
 
 
 
 
 
 
 
2834		remaining -= written;
2835		if (written < copy && iov_iter_count(iter) > 0)
2836			break;
2837	}
2838	return remaining ? -EFAULT : 0;
2839}
2840
2841static void
2842cifs_uncached_readv_complete(struct work_struct *work)
2843{
2844	struct cifs_readdata *rdata = container_of(work,
2845						struct cifs_readdata, work);
2846
2847	complete(&rdata->done);
2848	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2849}
2850
2851static int
2852cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2853			struct cifs_readdata *rdata, unsigned int len)
2854{
2855	int result = 0;
2856	unsigned int i;
2857	unsigned int nr_pages = rdata->nr_pages;
2858	struct kvec iov;
2859
2860	rdata->got_bytes = 0;
2861	rdata->tailsz = PAGE_SIZE;
2862	for (i = 0; i < nr_pages; i++) {
2863		struct page *page = rdata->pages[i];
 
2864
2865		if (len >= PAGE_SIZE) {
2866			/* enough data to fill the page */
2867			iov.iov_base = kmap(page);
2868			iov.iov_len = PAGE_SIZE;
2869			cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2870				 i, iov.iov_base, iov.iov_len);
2871			len -= PAGE_SIZE;
2872		} else if (len > 0) {
2873			/* enough for partial page, fill and zero the rest */
2874			iov.iov_base = kmap(page);
2875			iov.iov_len = len;
2876			cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2877				 i, iov.iov_base, iov.iov_len);
2878			memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2879			rdata->tailsz = len;
2880			len = 0;
2881		} else {
2882			/* no need to hold page hostage */
2883			rdata->pages[i] = NULL;
2884			rdata->nr_pages--;
2885			put_page(page);
2886			continue;
2887		}
2888
2889		result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2890		kunmap(page);
 
 
 
 
 
 
 
 
2891		if (result < 0)
2892			break;
2893
2894		rdata->got_bytes += result;
2895	}
2896
2897	return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2898						rdata->got_bytes : result;
2899}
2900
2901static int
2902cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2903		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2904{
2905	struct cifs_readdata *rdata;
2906	unsigned int npages, rsize, credits;
2907	size_t cur_len;
2908	int rc;
2909	pid_t pid;
2910	struct TCP_Server_Info *server;
2911
2912	server = tlink_tcon(open_file->tlink)->ses->server;
2913
2914	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2915		pid = open_file->pid;
2916	else
2917		pid = current->tgid;
2918
2919	do {
2920		rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2921						   &rsize, &credits);
2922		if (rc)
2923			break;
2924
2925		cur_len = min_t(const size_t, len, rsize);
2926		npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2927
2928		/* allocate a readdata struct */
2929		rdata = cifs_readdata_alloc(npages,
2930					    cifs_uncached_readv_complete);
2931		if (!rdata) {
2932			add_credits_and_wake_if(server, credits, 0);
2933			rc = -ENOMEM;
2934			break;
2935		}
2936
2937		rc = cifs_read_allocate_pages(rdata, npages);
2938		if (rc)
2939			goto error;
2940
2941		rdata->cfile = cifsFileInfo_get(open_file);
2942		rdata->nr_pages = npages;
2943		rdata->offset = offset;
2944		rdata->bytes = cur_len;
2945		rdata->pid = pid;
2946		rdata->pagesz = PAGE_SIZE;
2947		rdata->read_into_pages = cifs_uncached_read_into_pages;
2948		rdata->credits = credits;
2949
2950		if (!rdata->cfile->invalidHandle ||
2951		    !cifs_reopen_file(rdata->cfile, true))
2952			rc = server->ops->async_readv(rdata);
2953error:
2954		if (rc) {
2955			add_credits_and_wake_if(server, rdata->credits, 0);
2956			kref_put(&rdata->refcount,
2957				 cifs_uncached_readdata_release);
2958			if (rc == -EAGAIN)
2959				continue;
2960			break;
2961		}
2962
2963		list_add_tail(&rdata->list, rdata_list);
2964		offset += cur_len;
2965		len -= cur_len;
2966	} while (len > 0);
2967
2968	return rc;
2969}
2970
2971ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2972{
2973	struct file *file = iocb->ki_filp;
2974	ssize_t rc;
2975	size_t len;
2976	ssize_t total_read = 0;
2977	loff_t offset = iocb->ki_pos;
2978	struct cifs_sb_info *cifs_sb;
2979	struct cifs_tcon *tcon;
2980	struct cifsFileInfo *open_file;
2981	struct cifs_readdata *rdata, *tmp;
2982	struct list_head rdata_list;
2983
2984	len = iov_iter_count(to);
2985	if (!len)
2986		return 0;
2987
2988	INIT_LIST_HEAD(&rdata_list);
2989	cifs_sb = CIFS_FILE_SB(file);
2990	open_file = file->private_data;
2991	tcon = tlink_tcon(open_file->tlink);
2992
2993	if (!tcon->ses->server->ops->async_readv)
2994		return -ENOSYS;
2995
2996	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2997		cifs_dbg(FYI, "attempting read on write only file instance\n");
2998
2999	rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3000
3001	/* if at least one read request send succeeded, then reset rc */
3002	if (!list_empty(&rdata_list))
3003		rc = 0;
3004
3005	len = iov_iter_count(to);
3006	/* the loop below should proceed in the order of increasing offsets */
3007again:
3008	list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3009		if (!rc) {
3010			/* FIXME: freezable sleep too? */
3011			rc = wait_for_completion_killable(&rdata->done);
3012			if (rc)
3013				rc = -EINTR;
3014			else if (rdata->result == -EAGAIN) {
3015				/* resend call if it's a retryable error */
3016				struct list_head tmp_list;
3017				unsigned int got_bytes = rdata->got_bytes;
3018
3019				list_del_init(&rdata->list);
3020				INIT_LIST_HEAD(&tmp_list);
3021
3022				/*
3023				 * Got a part of data and then reconnect has
3024				 * happened -- fill the buffer and continue
3025				 * reading.
3026				 */
3027				if (got_bytes && got_bytes < rdata->bytes) {
3028					rc = cifs_readdata_to_iov(rdata, to);
3029					if (rc) {
3030						kref_put(&rdata->refcount,
3031						cifs_uncached_readdata_release);
3032						continue;
3033					}
3034				}
3035
3036				rc = cifs_send_async_read(
3037						rdata->offset + got_bytes,
3038						rdata->bytes - got_bytes,
3039						rdata->cfile, cifs_sb,
3040						&tmp_list);
3041
3042				list_splice(&tmp_list, &rdata_list);
3043
3044				kref_put(&rdata->refcount,
3045					 cifs_uncached_readdata_release);
3046				goto again;
3047			} else if (rdata->result)
3048				rc = rdata->result;
3049			else
3050				rc = cifs_readdata_to_iov(rdata, to);
3051
3052			/* if there was a short read -- discard anything left */
3053			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3054				rc = -ENODATA;
3055		}
3056		list_del_init(&rdata->list);
3057		kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3058	}
3059
3060	total_read = len - iov_iter_count(to);
3061
3062	cifs_stats_bytes_read(tcon, total_read);
3063
3064	/* mask nodata case */
3065	if (rc == -ENODATA)
3066		rc = 0;
3067
3068	if (total_read) {
3069		iocb->ki_pos += total_read;
3070		return total_read;
3071	}
3072	return rc;
3073}
3074
3075ssize_t
3076cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3077{
3078	struct inode *inode = file_inode(iocb->ki_filp);
3079	struct cifsInodeInfo *cinode = CIFS_I(inode);
3080	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3081	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3082						iocb->ki_filp->private_data;
3083	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3084	int rc = -EACCES;
3085
3086	/*
3087	 * In strict cache mode we need to read from the server all the time
3088	 * if we don't have level II oplock because the server can delay mtime
3089	 * change - so we can't make a decision about inode invalidating.
3090	 * And we can also fail with pagereading if there are mandatory locks
3091	 * on pages affected by this read but not on the region from pos to
3092	 * pos+len-1.
3093	 */
3094	if (!CIFS_CACHE_READ(cinode))
3095		return cifs_user_readv(iocb, to);
3096
3097	if (cap_unix(tcon->ses) &&
3098	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3099	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3100		return generic_file_read_iter(iocb, to);
3101
3102	/*
3103	 * We need to hold the sem to be sure nobody modifies lock list
3104	 * with a brlock that prevents reading.
3105	 */
3106	down_read(&cinode->lock_sem);
3107	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3108				     tcon->ses->server->vals->shared_lock_type,
3109				     NULL, CIFS_READ_OP))
3110		rc = generic_file_read_iter(iocb, to);
3111	up_read(&cinode->lock_sem);
3112	return rc;
3113}
3114
3115static ssize_t
3116cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3117{
3118	int rc = -EACCES;
3119	unsigned int bytes_read = 0;
3120	unsigned int total_read;
3121	unsigned int current_read_size;
3122	unsigned int rsize;
3123	struct cifs_sb_info *cifs_sb;
3124	struct cifs_tcon *tcon;
3125	struct TCP_Server_Info *server;
3126	unsigned int xid;
3127	char *cur_offset;
3128	struct cifsFileInfo *open_file;
3129	struct cifs_io_parms io_parms;
3130	int buf_type = CIFS_NO_BUFFER;
3131	__u32 pid;
3132
3133	xid = get_xid();
3134	cifs_sb = CIFS_FILE_SB(file);
3135
3136	/* FIXME: set up handlers for larger reads and/or convert to async */
3137	rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3138
3139	if (file->private_data == NULL) {
3140		rc = -EBADF;
3141		free_xid(xid);
3142		return rc;
3143	}
3144	open_file = file->private_data;
3145	tcon = tlink_tcon(open_file->tlink);
3146	server = tcon->ses->server;
3147
3148	if (!server->ops->sync_read) {
3149		free_xid(xid);
3150		return -ENOSYS;
3151	}
3152
3153	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3154		pid = open_file->pid;
3155	else
3156		pid = current->tgid;
3157
3158	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3159		cifs_dbg(FYI, "attempting read on write only file instance\n");
3160
3161	for (total_read = 0, cur_offset = read_data; read_size > total_read;
3162	     total_read += bytes_read, cur_offset += bytes_read) {
3163		do {
3164			current_read_size = min_t(uint, read_size - total_read,
3165						  rsize);
3166			/*
3167			 * For windows me and 9x we do not want to request more
3168			 * than it negotiated since it will refuse the read
3169			 * then.
3170			 */
3171			if ((tcon->ses) && !(tcon->ses->capabilities &
3172				tcon->ses->server->vals->cap_large_files)) {
3173				current_read_size = min_t(uint,
3174					current_read_size, CIFSMaxBufSize);
3175			}
3176			if (open_file->invalidHandle) {
3177				rc = cifs_reopen_file(open_file, true);
3178				if (rc != 0)
3179					break;
3180			}
3181			io_parms.pid = pid;
3182			io_parms.tcon = tcon;
3183			io_parms.offset = *offset;
3184			io_parms.length = current_read_size;
3185			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3186						    &bytes_read, &cur_offset,
3187						    &buf_type);
3188		} while (rc == -EAGAIN);
3189
3190		if (rc || (bytes_read == 0)) {
3191			if (total_read) {
3192				break;
3193			} else {
3194				free_xid(xid);
3195				return rc;
3196			}
3197		} else {
3198			cifs_stats_bytes_read(tcon, total_read);
3199			*offset += bytes_read;
3200		}
3201	}
3202	free_xid(xid);
3203	return total_read;
3204}
3205
3206/*
3207 * If the page is mmap'ed into a process' page tables, then we need to make
3208 * sure that it doesn't change while being written back.
3209 */
3210static int
3211cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3212{
3213	struct page *page = vmf->page;
3214
3215	lock_page(page);
3216	return VM_FAULT_LOCKED;
3217}
3218
3219static const struct vm_operations_struct cifs_file_vm_ops = {
3220	.fault = filemap_fault,
3221	.map_pages = filemap_map_pages,
3222	.page_mkwrite = cifs_page_mkwrite,
3223};
3224
3225int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3226{
3227	int rc, xid;
3228	struct inode *inode = file_inode(file);
3229
3230	xid = get_xid();
3231
3232	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3233		rc = cifs_zap_mapping(inode);
3234		if (rc)
3235			return rc;
3236	}
3237
3238	rc = generic_file_mmap(file, vma);
3239	if (rc == 0)
3240		vma->vm_ops = &cifs_file_vm_ops;
3241	free_xid(xid);
3242	return rc;
3243}
3244
3245int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3246{
3247	int rc, xid;
3248
3249	xid = get_xid();
3250	rc = cifs_revalidate_file(file);
3251	if (rc) {
3252		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3253			 rc);
3254		free_xid(xid);
3255		return rc;
3256	}
3257	rc = generic_file_mmap(file, vma);
3258	if (rc == 0)
3259		vma->vm_ops = &cifs_file_vm_ops;
3260	free_xid(xid);
3261	return rc;
3262}
3263
3264static void
3265cifs_readv_complete(struct work_struct *work)
3266{
3267	unsigned int i, got_bytes;
3268	struct cifs_readdata *rdata = container_of(work,
3269						struct cifs_readdata, work);
3270
3271	got_bytes = rdata->got_bytes;
3272	for (i = 0; i < rdata->nr_pages; i++) {
3273		struct page *page = rdata->pages[i];
3274
3275		lru_cache_add_file(page);
3276
3277		if (rdata->result == 0 ||
3278		    (rdata->result == -EAGAIN && got_bytes)) {
3279			flush_dcache_page(page);
3280			SetPageUptodate(page);
3281		}
3282
3283		unlock_page(page);
3284
3285		if (rdata->result == 0 ||
3286		    (rdata->result == -EAGAIN && got_bytes))
3287			cifs_readpage_to_fscache(rdata->mapping->host, page);
3288
3289		got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3290
3291		put_page(page);
3292		rdata->pages[i] = NULL;
3293	}
3294	kref_put(&rdata->refcount, cifs_readdata_release);
3295}
3296
3297static int
3298cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3299			struct cifs_readdata *rdata, unsigned int len)
3300{
3301	int result = 0;
3302	unsigned int i;
3303	u64 eof;
3304	pgoff_t eof_index;
3305	unsigned int nr_pages = rdata->nr_pages;
3306	struct kvec iov;
3307
3308	/* determine the eof that the server (probably) has */
3309	eof = CIFS_I(rdata->mapping->host)->server_eof;
3310	eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3311	cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3312
3313	rdata->got_bytes = 0;
3314	rdata->tailsz = PAGE_SIZE;
3315	for (i = 0; i < nr_pages; i++) {
3316		struct page *page = rdata->pages[i];
 
3317
3318		if (len >= PAGE_SIZE) {
3319			/* enough data to fill the page */
3320			iov.iov_base = kmap(page);
3321			iov.iov_len = PAGE_SIZE;
3322			cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3323				 i, page->index, iov.iov_base, iov.iov_len);
3324			len -= PAGE_SIZE;
3325		} else if (len > 0) {
3326			/* enough for partial page, fill and zero the rest */
3327			iov.iov_base = kmap(page);
3328			iov.iov_len = len;
3329			cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3330				 i, page->index, iov.iov_base, iov.iov_len);
3331			memset(iov.iov_base + len,
3332				'\0', PAGE_SIZE - len);
3333			rdata->tailsz = len;
3334			len = 0;
3335		} else if (page->index > eof_index) {
3336			/*
3337			 * The VFS will not try to do readahead past the
3338			 * i_size, but it's possible that we have outstanding
3339			 * writes with gaps in the middle and the i_size hasn't
3340			 * caught up yet. Populate those with zeroed out pages
3341			 * to prevent the VFS from repeatedly attempting to
3342			 * fill them until the writes are flushed.
3343			 */
3344			zero_user(page, 0, PAGE_SIZE);
3345			lru_cache_add_file(page);
3346			flush_dcache_page(page);
3347			SetPageUptodate(page);
3348			unlock_page(page);
3349			put_page(page);
3350			rdata->pages[i] = NULL;
3351			rdata->nr_pages--;
3352			continue;
3353		} else {
3354			/* no need to hold page hostage */
3355			lru_cache_add_file(page);
3356			unlock_page(page);
3357			put_page(page);
3358			rdata->pages[i] = NULL;
3359			rdata->nr_pages--;
3360			continue;
3361		}
3362
3363		result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3364		kunmap(page);
3365		if (result < 0)
3366			break;
3367
3368		rdata->got_bytes += result;
3369	}
3370
3371	return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3372						rdata->got_bytes : result;
3373}
3374
3375static int
3376readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3377		    unsigned int rsize, struct list_head *tmplist,
3378		    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3379{
3380	struct page *page, *tpage;
3381	unsigned int expected_index;
3382	int rc;
3383	gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
3384
3385	INIT_LIST_HEAD(tmplist);
3386
3387	page = list_entry(page_list->prev, struct page, lru);
3388
3389	/*
3390	 * Lock the page and put it in the cache. Since no one else
3391	 * should have access to this page, we're safe to simply set
3392	 * PG_locked without checking it first.
3393	 */
3394	__SetPageLocked(page);
3395	rc = add_to_page_cache_locked(page, mapping,
3396				      page->index, gfp);
3397
3398	/* give up if we can't stick it in the cache */
3399	if (rc) {
3400		__ClearPageLocked(page);
3401		return rc;
3402	}
3403
3404	/* move first page to the tmplist */
3405	*offset = (loff_t)page->index << PAGE_SHIFT;
3406	*bytes = PAGE_SIZE;
3407	*nr_pages = 1;
3408	list_move_tail(&page->lru, tmplist);
3409
3410	/* now try and add more pages onto the request */
3411	expected_index = page->index + 1;
3412	list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3413		/* discontinuity ? */
3414		if (page->index != expected_index)
3415			break;
3416
3417		/* would this page push the read over the rsize? */
3418		if (*bytes + PAGE_SIZE > rsize)
3419			break;
3420
3421		__SetPageLocked(page);
3422		if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3423			__ClearPageLocked(page);
3424			break;
3425		}
3426		list_move_tail(&page->lru, tmplist);
3427		(*bytes) += PAGE_SIZE;
3428		expected_index++;
3429		(*nr_pages)++;
3430	}
3431	return rc;
3432}
3433
3434static int cifs_readpages(struct file *file, struct address_space *mapping,
3435	struct list_head *page_list, unsigned num_pages)
3436{
3437	int rc;
3438	struct list_head tmplist;
3439	struct cifsFileInfo *open_file = file->private_data;
3440	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3441	struct TCP_Server_Info *server;
3442	pid_t pid;
3443
3444	/*
3445	 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3446	 * immediately if the cookie is negative
3447	 *
3448	 * After this point, every page in the list might have PG_fscache set,
3449	 * so we will need to clean that up off of every page we don't use.
3450	 */
3451	rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3452					 &num_pages);
3453	if (rc == 0)
3454		return rc;
3455
3456	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3457		pid = open_file->pid;
3458	else
3459		pid = current->tgid;
3460
3461	rc = 0;
3462	server = tlink_tcon(open_file->tlink)->ses->server;
3463
3464	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3465		 __func__, file, mapping, num_pages);
3466
3467	/*
3468	 * Start with the page at end of list and move it to private
3469	 * list. Do the same with any following pages until we hit
3470	 * the rsize limit, hit an index discontinuity, or run out of
3471	 * pages. Issue the async read and then start the loop again
3472	 * until the list is empty.
3473	 *
3474	 * Note that list order is important. The page_list is in
3475	 * the order of declining indexes. When we put the pages in
3476	 * the rdata->pages, then we want them in increasing order.
3477	 */
3478	while (!list_empty(page_list)) {
3479		unsigned int i, nr_pages, bytes, rsize;
3480		loff_t offset;
3481		struct page *page, *tpage;
3482		struct cifs_readdata *rdata;
3483		unsigned credits;
3484
3485		rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3486						   &rsize, &credits);
3487		if (rc)
3488			break;
3489
3490		/*
3491		 * Give up immediately if rsize is too small to read an entire
3492		 * page. The VFS will fall back to readpage. We should never
3493		 * reach this point however since we set ra_pages to 0 when the
3494		 * rsize is smaller than a cache page.
3495		 */
3496		if (unlikely(rsize < PAGE_SIZE)) {
3497			add_credits_and_wake_if(server, credits, 0);
3498			return 0;
3499		}
3500
3501		rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3502					 &nr_pages, &offset, &bytes);
3503		if (rc) {
3504			add_credits_and_wake_if(server, credits, 0);
3505			break;
3506		}
3507
3508		rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3509		if (!rdata) {
3510			/* best to give up if we're out of mem */
3511			list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3512				list_del(&page->lru);
3513				lru_cache_add_file(page);
3514				unlock_page(page);
3515				put_page(page);
3516			}
3517			rc = -ENOMEM;
3518			add_credits_and_wake_if(server, credits, 0);
3519			break;
3520		}
3521
3522		rdata->cfile = cifsFileInfo_get(open_file);
3523		rdata->mapping = mapping;
3524		rdata->offset = offset;
3525		rdata->bytes = bytes;
3526		rdata->pid = pid;
3527		rdata->pagesz = PAGE_SIZE;
3528		rdata->read_into_pages = cifs_readpages_read_into_pages;
3529		rdata->credits = credits;
3530
3531		list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3532			list_del(&page->lru);
3533			rdata->pages[rdata->nr_pages++] = page;
3534		}
3535
3536		if (!rdata->cfile->invalidHandle ||
3537		    !cifs_reopen_file(rdata->cfile, true))
3538			rc = server->ops->async_readv(rdata);
3539		if (rc) {
3540			add_credits_and_wake_if(server, rdata->credits, 0);
3541			for (i = 0; i < rdata->nr_pages; i++) {
3542				page = rdata->pages[i];
3543				lru_cache_add_file(page);
3544				unlock_page(page);
3545				put_page(page);
3546			}
3547			/* Fallback to the readpage in error/reconnect cases */
3548			kref_put(&rdata->refcount, cifs_readdata_release);
3549			break;
3550		}
3551
3552		kref_put(&rdata->refcount, cifs_readdata_release);
3553	}
3554
3555	/* Any pages that have been shown to fscache but didn't get added to
3556	 * the pagecache must be uncached before they get returned to the
3557	 * allocator.
3558	 */
3559	cifs_fscache_readpages_cancel(mapping->host, page_list);
3560	return rc;
3561}
3562
3563/*
3564 * cifs_readpage_worker must be called with the page pinned
3565 */
3566static int cifs_readpage_worker(struct file *file, struct page *page,
3567	loff_t *poffset)
3568{
3569	char *read_data;
3570	int rc;
3571
3572	/* Is the page cached? */
3573	rc = cifs_readpage_from_fscache(file_inode(file), page);
3574	if (rc == 0)
3575		goto read_complete;
3576
3577	read_data = kmap(page);
3578	/* for reads over a certain size could initiate async read ahead */
3579
3580	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3581
3582	if (rc < 0)
3583		goto io_error;
3584	else
3585		cifs_dbg(FYI, "Bytes read %d\n", rc);
3586
3587	file_inode(file)->i_atime =
3588		current_fs_time(file_inode(file)->i_sb);
3589
3590	if (PAGE_SIZE > rc)
3591		memset(read_data + rc, 0, PAGE_SIZE - rc);
3592
3593	flush_dcache_page(page);
3594	SetPageUptodate(page);
3595
3596	/* send this page to the cache */
3597	cifs_readpage_to_fscache(file_inode(file), page);
3598
3599	rc = 0;
3600
3601io_error:
3602	kunmap(page);
3603	unlock_page(page);
3604
3605read_complete:
3606	return rc;
3607}
3608
3609static int cifs_readpage(struct file *file, struct page *page)
3610{
3611	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3612	int rc = -EACCES;
3613	unsigned int xid;
3614
3615	xid = get_xid();
3616
3617	if (file->private_data == NULL) {
3618		rc = -EBADF;
3619		free_xid(xid);
3620		return rc;
3621	}
3622
3623	cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3624		 page, (int)offset, (int)offset);
3625
3626	rc = cifs_readpage_worker(file, page, &offset);
3627
3628	free_xid(xid);
3629	return rc;
3630}
3631
3632static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3633{
3634	struct cifsFileInfo *open_file;
 
 
3635
3636	spin_lock(&cifs_file_list_lock);
3637	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3638		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3639			spin_unlock(&cifs_file_list_lock);
3640			return 1;
3641		}
3642	}
3643	spin_unlock(&cifs_file_list_lock);
3644	return 0;
3645}
3646
3647/* We do not want to update the file size from server for inodes
3648   open for write - to avoid races with writepage extending
3649   the file - in the future we could consider allowing
3650   refreshing the inode only on increases in the file size
3651   but this is tricky to do without racing with writebehind
3652   page caching in the current Linux kernel design */
3653bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3654{
3655	if (!cifsInode)
3656		return true;
3657
3658	if (is_inode_writable(cifsInode)) {
3659		/* This inode is open for write at least once */
3660		struct cifs_sb_info *cifs_sb;
3661
3662		cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3663		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3664			/* since no page cache to corrupt on directio
3665			we can change size safely */
3666			return true;
3667		}
3668
3669		if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3670			return true;
3671
3672		return false;
3673	} else
3674		return true;
3675}
3676
3677static int cifs_write_begin(struct file *file, struct address_space *mapping,
3678			loff_t pos, unsigned len, unsigned flags,
3679			struct page **pagep, void **fsdata)
3680{
3681	int oncethru = 0;
3682	pgoff_t index = pos >> PAGE_SHIFT;
3683	loff_t offset = pos & (PAGE_SIZE - 1);
3684	loff_t page_start = pos & PAGE_MASK;
3685	loff_t i_size;
3686	struct page *page;
3687	int rc = 0;
3688
3689	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3690
3691start:
3692	page = grab_cache_page_write_begin(mapping, index, flags);
3693	if (!page) {
3694		rc = -ENOMEM;
3695		goto out;
3696	}
3697
3698	if (PageUptodate(page))
3699		goto out;
3700
3701	/*
3702	 * If we write a full page it will be up to date, no need to read from
3703	 * the server. If the write is short, we'll end up doing a sync write
3704	 * instead.
3705	 */
3706	if (len == PAGE_SIZE)
3707		goto out;
3708
3709	/*
3710	 * optimize away the read when we have an oplock, and we're not
3711	 * expecting to use any of the data we'd be reading in. That
3712	 * is, when the page lies beyond the EOF, or straddles the EOF
3713	 * and the write will cover all of the existing data.
3714	 */
3715	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3716		i_size = i_size_read(mapping->host);
3717		if (page_start >= i_size ||
3718		    (offset == 0 && (pos + len) >= i_size)) {
3719			zero_user_segments(page, 0, offset,
3720					   offset + len,
3721					   PAGE_SIZE);
3722			/*
3723			 * PageChecked means that the parts of the page
3724			 * to which we're not writing are considered up
3725			 * to date. Once the data is copied to the
3726			 * page, it can be set uptodate.
3727			 */
3728			SetPageChecked(page);
3729			goto out;
3730		}
3731	}
3732
3733	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3734		/*
3735		 * might as well read a page, it is fast enough. If we get
3736		 * an error, we don't need to return it. cifs_write_end will
3737		 * do a sync write instead since PG_uptodate isn't set.
3738		 */
3739		cifs_readpage_worker(file, page, &page_start);
3740		put_page(page);
3741		oncethru = 1;
3742		goto start;
3743	} else {
3744		/* we could try using another file handle if there is one -
3745		   but how would we lock it to prevent close of that handle
3746		   racing with this read? In any case
3747		   this will be written out by write_end so is fine */
3748	}
3749out:
3750	*pagep = page;
3751	return rc;
3752}
3753
3754static int cifs_release_page(struct page *page, gfp_t gfp)
3755{
3756	if (PagePrivate(page))
3757		return 0;
3758
3759	return cifs_fscache_release_page(page, gfp);
3760}
3761
3762static void cifs_invalidate_page(struct page *page, unsigned int offset,
3763				 unsigned int length)
3764{
3765	struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3766
3767	if (offset == 0 && length == PAGE_SIZE)
3768		cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3769}
3770
3771static int cifs_launder_page(struct page *page)
3772{
3773	int rc = 0;
3774	loff_t range_start = page_offset(page);
3775	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
3776	struct writeback_control wbc = {
3777		.sync_mode = WB_SYNC_ALL,
3778		.nr_to_write = 0,
3779		.range_start = range_start,
3780		.range_end = range_end,
3781	};
3782
3783	cifs_dbg(FYI, "Launder page: %p\n", page);
3784
3785	if (clear_page_dirty_for_io(page))
3786		rc = cifs_writepage_locked(page, &wbc);
3787
3788	cifs_fscache_invalidate_page(page, page->mapping->host);
3789	return rc;
3790}
3791
3792void cifs_oplock_break(struct work_struct *work)
3793{
3794	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3795						  oplock_break);
3796	struct inode *inode = d_inode(cfile->dentry);
3797	struct cifsInodeInfo *cinode = CIFS_I(inode);
3798	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3799	struct TCP_Server_Info *server = tcon->ses->server;
3800	int rc = 0;
3801
3802	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3803			TASK_UNINTERRUPTIBLE);
3804
3805	server->ops->downgrade_oplock(server, cinode,
3806		test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3807
3808	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3809						cifs_has_mand_locks(cinode)) {
3810		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3811			 inode);
3812		cinode->oplock = 0;
3813	}
3814
3815	if (inode && S_ISREG(inode->i_mode)) {
3816		if (CIFS_CACHE_READ(cinode))
3817			break_lease(inode, O_RDONLY);
3818		else
3819			break_lease(inode, O_WRONLY);
3820		rc = filemap_fdatawrite(inode->i_mapping);
3821		if (!CIFS_CACHE_READ(cinode)) {
3822			rc = filemap_fdatawait(inode->i_mapping);
3823			mapping_set_error(inode->i_mapping, rc);
3824			cifs_zap_mapping(inode);
3825		}
3826		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3827	}
3828
3829	rc = cifs_push_locks(cfile);
3830	if (rc)
3831		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3832
3833	/*
3834	 * releasing stale oplock after recent reconnect of smb session using
3835	 * a now incorrect file handle is not a data integrity issue but do
3836	 * not bother sending an oplock release if session to server still is
3837	 * disconnected since oplock already released by the server
3838	 */
3839	if (!cfile->oplock_break_cancelled) {
3840		rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3841							     cinode);
3842		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3843	}
3844	cifs_done_oplock_break(cinode);
3845}
3846
3847/*
3848 * The presence of cifs_direct_io() in the address space ops vector
3849 * allowes open() O_DIRECT flags which would have failed otherwise.
3850 *
3851 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3852 * so this method should never be called.
3853 *
3854 * Direct IO is not yet supported in the cached mode. 
3855 */
3856static ssize_t
3857cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
3858{
3859        /*
3860         * FIXME
3861         * Eventually need to support direct IO for non forcedirectio mounts
3862         */
3863        return -EINVAL;
3864}
3865
3866
3867const struct address_space_operations cifs_addr_ops = {
3868	.readpage = cifs_readpage,
3869	.readpages = cifs_readpages,
3870	.writepage = cifs_writepage,
3871	.writepages = cifs_writepages,
3872	.write_begin = cifs_write_begin,
3873	.write_end = cifs_write_end,
3874	.set_page_dirty = __set_page_dirty_nobuffers,
3875	.releasepage = cifs_release_page,
3876	.direct_IO = cifs_direct_io,
3877	.invalidatepage = cifs_invalidate_page,
3878	.launder_page = cifs_launder_page,
3879};
3880
3881/*
3882 * cifs_readpages requires the server to support a buffer large enough to
3883 * contain the header plus one complete page of data.  Otherwise, we need
3884 * to leave cifs_readpages out of the address space operations.
3885 */
3886const struct address_space_operations cifs_addr_ops_smallbuf = {
3887	.readpage = cifs_readpage,
3888	.writepage = cifs_writepage,
3889	.writepages = cifs_writepages,
3890	.write_begin = cifs_write_begin,
3891	.write_end = cifs_write_end,
3892	.set_page_dirty = __set_page_dirty_nobuffers,
3893	.releasepage = cifs_release_page,
3894	.invalidatepage = cifs_invalidate_page,
3895	.launder_page = cifs_launder_page,
3896};
v4.10.11
   1/*
   2 *   fs/cifs/file.c
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 *   This library is free software; you can redistribute it and/or modify
  11 *   it under the terms of the GNU Lesser General Public License as published
  12 *   by the Free Software Foundation; either version 2.1 of the License, or
  13 *   (at your option) any later version.
  14 *
  15 *   This library is distributed in the hope that it will be useful,
  16 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18 *   the GNU Lesser General Public License for more details.
  19 *
  20 *   You should have received a copy of the GNU Lesser General Public License
  21 *   along with this library; if not, write to the Free Software
  22 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23 */
  24#include <linux/fs.h>
  25#include <linux/backing-dev.h>
  26#include <linux/stat.h>
  27#include <linux/fcntl.h>
  28#include <linux/pagemap.h>
  29#include <linux/pagevec.h>
  30#include <linux/writeback.h>
  31#include <linux/task_io_accounting_ops.h>
  32#include <linux/delay.h>
  33#include <linux/mount.h>
  34#include <linux/slab.h>
  35#include <linux/swap.h>
  36#include <asm/div64.h>
  37#include "cifsfs.h"
  38#include "cifspdu.h"
  39#include "cifsglob.h"
  40#include "cifsproto.h"
  41#include "cifs_unicode.h"
  42#include "cifs_debug.h"
  43#include "cifs_fs_sb.h"
  44#include "fscache.h"
  45
  46
  47static inline int cifs_convert_flags(unsigned int flags)
  48{
  49	if ((flags & O_ACCMODE) == O_RDONLY)
  50		return GENERIC_READ;
  51	else if ((flags & O_ACCMODE) == O_WRONLY)
  52		return GENERIC_WRITE;
  53	else if ((flags & O_ACCMODE) == O_RDWR) {
  54		/* GENERIC_ALL is too much permission to request
  55		   can cause unnecessary access denied on create */
  56		/* return GENERIC_ALL; */
  57		return (GENERIC_READ | GENERIC_WRITE);
  58	}
  59
  60	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  61		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  62		FILE_READ_DATA);
  63}
  64
  65static u32 cifs_posix_convert_flags(unsigned int flags)
  66{
  67	u32 posix_flags = 0;
  68
  69	if ((flags & O_ACCMODE) == O_RDONLY)
  70		posix_flags = SMB_O_RDONLY;
  71	else if ((flags & O_ACCMODE) == O_WRONLY)
  72		posix_flags = SMB_O_WRONLY;
  73	else if ((flags & O_ACCMODE) == O_RDWR)
  74		posix_flags = SMB_O_RDWR;
  75
  76	if (flags & O_CREAT) {
  77		posix_flags |= SMB_O_CREAT;
  78		if (flags & O_EXCL)
  79			posix_flags |= SMB_O_EXCL;
  80	} else if (flags & O_EXCL)
  81		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  82			 current->comm, current->tgid);
  83
  84	if (flags & O_TRUNC)
  85		posix_flags |= SMB_O_TRUNC;
  86	/* be safe and imply O_SYNC for O_DSYNC */
  87	if (flags & O_DSYNC)
  88		posix_flags |= SMB_O_SYNC;
  89	if (flags & O_DIRECTORY)
  90		posix_flags |= SMB_O_DIRECTORY;
  91	if (flags & O_NOFOLLOW)
  92		posix_flags |= SMB_O_NOFOLLOW;
  93	if (flags & O_DIRECT)
  94		posix_flags |= SMB_O_DIRECT;
  95
  96	return posix_flags;
  97}
  98
  99static inline int cifs_get_disposition(unsigned int flags)
 100{
 101	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 102		return FILE_CREATE;
 103	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 104		return FILE_OVERWRITE_IF;
 105	else if ((flags & O_CREAT) == O_CREAT)
 106		return FILE_OPEN_IF;
 107	else if ((flags & O_TRUNC) == O_TRUNC)
 108		return FILE_OVERWRITE;
 109	else
 110		return FILE_OPEN;
 111}
 112
 113int cifs_posix_open(char *full_path, struct inode **pinode,
 114			struct super_block *sb, int mode, unsigned int f_flags,
 115			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
 116{
 117	int rc;
 118	FILE_UNIX_BASIC_INFO *presp_data;
 119	__u32 posix_flags = 0;
 120	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 121	struct cifs_fattr fattr;
 122	struct tcon_link *tlink;
 123	struct cifs_tcon *tcon;
 124
 125	cifs_dbg(FYI, "posix open %s\n", full_path);
 126
 127	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 128	if (presp_data == NULL)
 129		return -ENOMEM;
 130
 131	tlink = cifs_sb_tlink(cifs_sb);
 132	if (IS_ERR(tlink)) {
 133		rc = PTR_ERR(tlink);
 134		goto posix_open_ret;
 135	}
 136
 137	tcon = tlink_tcon(tlink);
 138	mode &= ~current_umask();
 139
 140	posix_flags = cifs_posix_convert_flags(f_flags);
 141	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 142			     poplock, full_path, cifs_sb->local_nls,
 143			     cifs_remap(cifs_sb));
 144	cifs_put_tlink(tlink);
 145
 146	if (rc)
 147		goto posix_open_ret;
 148
 149	if (presp_data->Type == cpu_to_le32(-1))
 150		goto posix_open_ret; /* open ok, caller does qpathinfo */
 151
 152	if (!pinode)
 153		goto posix_open_ret; /* caller does not need info */
 154
 155	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 156
 157	/* get new inode and set it up */
 158	if (*pinode == NULL) {
 159		cifs_fill_uniqueid(sb, &fattr);
 160		*pinode = cifs_iget(sb, &fattr);
 161		if (!*pinode) {
 162			rc = -ENOMEM;
 163			goto posix_open_ret;
 164		}
 165	} else {
 166		cifs_fattr_to_inode(*pinode, &fattr);
 167	}
 168
 169posix_open_ret:
 170	kfree(presp_data);
 171	return rc;
 172}
 173
 174static int
 175cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 176	     struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 177	     struct cifs_fid *fid, unsigned int xid)
 178{
 179	int rc;
 180	int desired_access;
 181	int disposition;
 182	int create_options = CREATE_NOT_DIR;
 183	FILE_ALL_INFO *buf;
 184	struct TCP_Server_Info *server = tcon->ses->server;
 185	struct cifs_open_parms oparms;
 186
 187	if (!server->ops->open)
 188		return -ENOSYS;
 189
 190	desired_access = cifs_convert_flags(f_flags);
 191
 192/*********************************************************************
 193 *  open flag mapping table:
 194 *
 195 *	POSIX Flag            CIFS Disposition
 196 *	----------            ----------------
 197 *	O_CREAT               FILE_OPEN_IF
 198 *	O_CREAT | O_EXCL      FILE_CREATE
 199 *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 200 *	O_TRUNC               FILE_OVERWRITE
 201 *	none of the above     FILE_OPEN
 202 *
 203 *	Note that there is not a direct match between disposition
 204 *	FILE_SUPERSEDE (ie create whether or not file exists although
 205 *	O_CREAT | O_TRUNC is similar but truncates the existing
 206 *	file rather than creating a new file as FILE_SUPERSEDE does
 207 *	(which uses the attributes / metadata passed in on open call)
 208 *?
 209 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 210 *?  and the read write flags match reasonably.  O_LARGEFILE
 211 *?  is irrelevant because largefile support is always used
 212 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 213 *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 214 *********************************************************************/
 215
 216	disposition = cifs_get_disposition(f_flags);
 217
 218	/* BB pass O_SYNC flag through on file attributes .. BB */
 219
 220	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 221	if (!buf)
 222		return -ENOMEM;
 223
 224	if (backup_cred(cifs_sb))
 225		create_options |= CREATE_OPEN_BACKUP_INTENT;
 226
 227	oparms.tcon = tcon;
 228	oparms.cifs_sb = cifs_sb;
 229	oparms.desired_access = desired_access;
 230	oparms.create_options = create_options;
 231	oparms.disposition = disposition;
 232	oparms.path = full_path;
 233	oparms.fid = fid;
 234	oparms.reconnect = false;
 235
 236	rc = server->ops->open(xid, &oparms, oplock, buf);
 237
 238	if (rc)
 239		goto out;
 240
 241	if (tcon->unix_ext)
 242		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 243					      xid);
 244	else
 245		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 246					 xid, fid);
 247
 248out:
 249	kfree(buf);
 250	return rc;
 251}
 252
 253static bool
 254cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 255{
 256	struct cifs_fid_locks *cur;
 257	bool has_locks = false;
 258
 259	down_read(&cinode->lock_sem);
 260	list_for_each_entry(cur, &cinode->llist, llist) {
 261		if (!list_empty(&cur->locks)) {
 262			has_locks = true;
 263			break;
 264		}
 265	}
 266	up_read(&cinode->lock_sem);
 267	return has_locks;
 268}
 269
 270struct cifsFileInfo *
 271cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 272		  struct tcon_link *tlink, __u32 oplock)
 273{
 274	struct dentry *dentry = file_dentry(file);
 275	struct inode *inode = d_inode(dentry);
 276	struct cifsInodeInfo *cinode = CIFS_I(inode);
 277	struct cifsFileInfo *cfile;
 278	struct cifs_fid_locks *fdlocks;
 279	struct cifs_tcon *tcon = tlink_tcon(tlink);
 280	struct TCP_Server_Info *server = tcon->ses->server;
 281
 282	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 283	if (cfile == NULL)
 284		return cfile;
 285
 286	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 287	if (!fdlocks) {
 288		kfree(cfile);
 289		return NULL;
 290	}
 291
 292	INIT_LIST_HEAD(&fdlocks->locks);
 293	fdlocks->cfile = cfile;
 294	cfile->llist = fdlocks;
 295	down_write(&cinode->lock_sem);
 296	list_add(&fdlocks->llist, &cinode->llist);
 297	up_write(&cinode->lock_sem);
 298
 299	cfile->count = 1;
 300	cfile->pid = current->tgid;
 301	cfile->uid = current_fsuid();
 302	cfile->dentry = dget(dentry);
 303	cfile->f_flags = file->f_flags;
 304	cfile->invalidHandle = false;
 305	cfile->tlink = cifs_get_tlink(tlink);
 306	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 307	mutex_init(&cfile->fh_mutex);
 308	spin_lock_init(&cfile->file_info_lock);
 309
 310	cifs_sb_active(inode->i_sb);
 311
 312	/*
 313	 * If the server returned a read oplock and we have mandatory brlocks,
 314	 * set oplock level to None.
 315	 */
 316	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 317		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 318		oplock = 0;
 319	}
 320
 321	spin_lock(&tcon->open_file_lock);
 322	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 323		oplock = fid->pending_open->oplock;
 324	list_del(&fid->pending_open->olist);
 325
 326	fid->purge_cache = false;
 327	server->ops->set_fid(cfile, fid, oplock);
 328
 329	list_add(&cfile->tlist, &tcon->openFileList);
 330
 331	/* if readable file instance put first in list*/
 332	if (file->f_mode & FMODE_READ)
 333		list_add(&cfile->flist, &cinode->openFileList);
 334	else
 335		list_add_tail(&cfile->flist, &cinode->openFileList);
 336	spin_unlock(&tcon->open_file_lock);
 337
 338	if (fid->purge_cache)
 339		cifs_zap_mapping(inode);
 340
 341	file->private_data = cfile;
 342	return cfile;
 343}
 344
 345struct cifsFileInfo *
 346cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 347{
 348	spin_lock(&cifs_file->file_info_lock);
 349	cifsFileInfo_get_locked(cifs_file);
 350	spin_unlock(&cifs_file->file_info_lock);
 351	return cifs_file;
 352}
 353
 354/*
 355 * Release a reference on the file private data. This may involve closing
 356 * the filehandle out on the server. Must be called without holding
 357 * tcon->open_file_lock and cifs_file->file_info_lock.
 358 */
 359void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 360{
 361	struct inode *inode = d_inode(cifs_file->dentry);
 362	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 363	struct TCP_Server_Info *server = tcon->ses->server;
 364	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 365	struct super_block *sb = inode->i_sb;
 366	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 367	struct cifsLockInfo *li, *tmp;
 368	struct cifs_fid fid;
 369	struct cifs_pending_open open;
 370	bool oplock_break_cancelled;
 371
 372	spin_lock(&tcon->open_file_lock);
 373
 374	spin_lock(&cifs_file->file_info_lock);
 375	if (--cifs_file->count > 0) {
 376		spin_unlock(&cifs_file->file_info_lock);
 377		spin_unlock(&tcon->open_file_lock);
 378		return;
 379	}
 380	spin_unlock(&cifs_file->file_info_lock);
 381
 382	if (server->ops->get_lease_key)
 383		server->ops->get_lease_key(inode, &fid);
 384
 385	/* store open in pending opens to make sure we don't miss lease break */
 386	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 387
 388	/* remove it from the lists */
 389	list_del(&cifs_file->flist);
 390	list_del(&cifs_file->tlist);
 391
 392	if (list_empty(&cifsi->openFileList)) {
 393		cifs_dbg(FYI, "closing last open instance for inode %p\n",
 394			 d_inode(cifs_file->dentry));
 395		/*
 396		 * In strict cache mode we need invalidate mapping on the last
 397		 * close  because it may cause a error when we open this file
 398		 * again and get at least level II oplock.
 399		 */
 400		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 401			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 402		cifs_set_oplock_level(cifsi, 0);
 403	}
 404
 405	spin_unlock(&tcon->open_file_lock);
 406
 407	oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
 408
 409	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 410		struct TCP_Server_Info *server = tcon->ses->server;
 411		unsigned int xid;
 412
 413		xid = get_xid();
 414		if (server->ops->close)
 415			server->ops->close(xid, tcon, &cifs_file->fid);
 416		_free_xid(xid);
 417	}
 418
 419	if (oplock_break_cancelled)
 420		cifs_done_oplock_break(cifsi);
 421
 422	cifs_del_pending_open(&open);
 423
 424	/*
 425	 * Delete any outstanding lock records. We'll lose them when the file
 426	 * is closed anyway.
 427	 */
 428	down_write(&cifsi->lock_sem);
 429	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 430		list_del(&li->llist);
 431		cifs_del_lock_waiters(li);
 432		kfree(li);
 433	}
 434	list_del(&cifs_file->llist->llist);
 435	kfree(cifs_file->llist);
 436	up_write(&cifsi->lock_sem);
 437
 438	cifs_put_tlink(cifs_file->tlink);
 439	dput(cifs_file->dentry);
 440	cifs_sb_deactive(sb);
 441	kfree(cifs_file);
 442}
 443
 444int cifs_open(struct inode *inode, struct file *file)
 445
 446{
 447	int rc = -EACCES;
 448	unsigned int xid;
 449	__u32 oplock;
 450	struct cifs_sb_info *cifs_sb;
 451	struct TCP_Server_Info *server;
 452	struct cifs_tcon *tcon;
 453	struct tcon_link *tlink;
 454	struct cifsFileInfo *cfile = NULL;
 455	char *full_path = NULL;
 456	bool posix_open_ok = false;
 457	struct cifs_fid fid;
 458	struct cifs_pending_open open;
 459
 460	xid = get_xid();
 461
 462	cifs_sb = CIFS_SB(inode->i_sb);
 463	tlink = cifs_sb_tlink(cifs_sb);
 464	if (IS_ERR(tlink)) {
 465		free_xid(xid);
 466		return PTR_ERR(tlink);
 467	}
 468	tcon = tlink_tcon(tlink);
 469	server = tcon->ses->server;
 470
 471	full_path = build_path_from_dentry(file_dentry(file));
 472	if (full_path == NULL) {
 473		rc = -ENOMEM;
 474		goto out;
 475	}
 476
 477	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 478		 inode, file->f_flags, full_path);
 479
 480	if (file->f_flags & O_DIRECT &&
 481	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
 482		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 483			file->f_op = &cifs_file_direct_nobrl_ops;
 484		else
 485			file->f_op = &cifs_file_direct_ops;
 486	}
 487
 488	if (server->oplocks)
 489		oplock = REQ_OPLOCK;
 490	else
 491		oplock = 0;
 492
 493	if (!tcon->broken_posix_open && tcon->unix_ext &&
 494	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 495				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 496		/* can not refresh inode info since size could be stale */
 497		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 498				cifs_sb->mnt_file_mode /* ignored */,
 499				file->f_flags, &oplock, &fid.netfid, xid);
 500		if (rc == 0) {
 501			cifs_dbg(FYI, "posix open succeeded\n");
 502			posix_open_ok = true;
 503		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 504			if (tcon->ses->serverNOS)
 505				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 506					 tcon->ses->serverName,
 507					 tcon->ses->serverNOS);
 508			tcon->broken_posix_open = true;
 509		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
 510			 (rc != -EOPNOTSUPP)) /* path not found or net err */
 511			goto out;
 512		/*
 513		 * Else fallthrough to retry open the old way on network i/o
 514		 * or DFS errors.
 515		 */
 516	}
 517
 518	if (server->ops->get_lease_key)
 519		server->ops->get_lease_key(inode, &fid);
 520
 521	cifs_add_pending_open(&fid, tlink, &open);
 522
 523	if (!posix_open_ok) {
 524		if (server->ops->get_lease_key)
 525			server->ops->get_lease_key(inode, &fid);
 526
 527		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 528				  file->f_flags, &oplock, &fid, xid);
 529		if (rc) {
 530			cifs_del_pending_open(&open);
 531			goto out;
 532		}
 533	}
 534
 535	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 536	if (cfile == NULL) {
 537		if (server->ops->close)
 538			server->ops->close(xid, tcon, &fid);
 539		cifs_del_pending_open(&open);
 540		rc = -ENOMEM;
 541		goto out;
 542	}
 543
 544	cifs_fscache_set_inode_cookie(inode, file);
 545
 546	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 547		/*
 548		 * Time to set mode which we can not set earlier due to
 549		 * problems creating new read-only files.
 550		 */
 551		struct cifs_unix_set_info_args args = {
 552			.mode	= inode->i_mode,
 553			.uid	= INVALID_UID, /* no change */
 554			.gid	= INVALID_GID, /* no change */
 555			.ctime	= NO_CHANGE_64,
 556			.atime	= NO_CHANGE_64,
 557			.mtime	= NO_CHANGE_64,
 558			.device	= 0,
 559		};
 560		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 561				       cfile->pid);
 562	}
 563
 564out:
 565	kfree(full_path);
 566	free_xid(xid);
 567	cifs_put_tlink(tlink);
 568	return rc;
 569}
 570
 571static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 572
 573/*
 574 * Try to reacquire byte range locks that were released when session
 575 * to server was lost.
 576 */
 577static int
 578cifs_relock_file(struct cifsFileInfo *cfile)
 579{
 580	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 581	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 582	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 583	int rc = 0;
 584
 585	down_read(&cinode->lock_sem);
 586	if (cinode->can_cache_brlcks) {
 587		/* can cache locks - no need to relock */
 588		up_read(&cinode->lock_sem);
 589		return rc;
 590	}
 591
 592	if (cap_unix(tcon->ses) &&
 593	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 594	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 595		rc = cifs_push_posix_locks(cfile);
 596	else
 597		rc = tcon->ses->server->ops->push_mand_locks(cfile);
 598
 599	up_read(&cinode->lock_sem);
 600	return rc;
 601}
 602
 603static int
 604cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 605{
 606	int rc = -EACCES;
 607	unsigned int xid;
 608	__u32 oplock;
 609	struct cifs_sb_info *cifs_sb;
 610	struct cifs_tcon *tcon;
 611	struct TCP_Server_Info *server;
 612	struct cifsInodeInfo *cinode;
 613	struct inode *inode;
 614	char *full_path = NULL;
 615	int desired_access;
 616	int disposition = FILE_OPEN;
 617	int create_options = CREATE_NOT_DIR;
 618	struct cifs_open_parms oparms;
 619
 620	xid = get_xid();
 621	mutex_lock(&cfile->fh_mutex);
 622	if (!cfile->invalidHandle) {
 623		mutex_unlock(&cfile->fh_mutex);
 624		rc = 0;
 625		free_xid(xid);
 626		return rc;
 627	}
 628
 629	inode = d_inode(cfile->dentry);
 630	cifs_sb = CIFS_SB(inode->i_sb);
 631	tcon = tlink_tcon(cfile->tlink);
 632	server = tcon->ses->server;
 633
 634	/*
 635	 * Can not grab rename sem here because various ops, including those
 636	 * that already have the rename sem can end up causing writepage to get
 637	 * called and if the server was down that means we end up here, and we
 638	 * can never tell if the caller already has the rename_sem.
 639	 */
 640	full_path = build_path_from_dentry(cfile->dentry);
 641	if (full_path == NULL) {
 642		rc = -ENOMEM;
 643		mutex_unlock(&cfile->fh_mutex);
 644		free_xid(xid);
 645		return rc;
 646	}
 647
 648	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 649		 inode, cfile->f_flags, full_path);
 650
 651	if (tcon->ses->server->oplocks)
 652		oplock = REQ_OPLOCK;
 653	else
 654		oplock = 0;
 655
 656	if (tcon->unix_ext && cap_unix(tcon->ses) &&
 657	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 658				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 659		/*
 660		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 661		 * original open. Must mask them off for a reopen.
 662		 */
 663		unsigned int oflags = cfile->f_flags &
 664						~(O_CREAT | O_EXCL | O_TRUNC);
 665
 666		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 667				     cifs_sb->mnt_file_mode /* ignored */,
 668				     oflags, &oplock, &cfile->fid.netfid, xid);
 669		if (rc == 0) {
 670			cifs_dbg(FYI, "posix reopen succeeded\n");
 671			oparms.reconnect = true;
 672			goto reopen_success;
 673		}
 674		/*
 675		 * fallthrough to retry open the old way on errors, especially
 676		 * in the reconnect path it is important to retry hard
 677		 */
 678	}
 679
 680	desired_access = cifs_convert_flags(cfile->f_flags);
 681
 682	if (backup_cred(cifs_sb))
 683		create_options |= CREATE_OPEN_BACKUP_INTENT;
 684
 685	if (server->ops->get_lease_key)
 686		server->ops->get_lease_key(inode, &cfile->fid);
 687
 688	oparms.tcon = tcon;
 689	oparms.cifs_sb = cifs_sb;
 690	oparms.desired_access = desired_access;
 691	oparms.create_options = create_options;
 692	oparms.disposition = disposition;
 693	oparms.path = full_path;
 694	oparms.fid = &cfile->fid;
 695	oparms.reconnect = true;
 696
 697	/*
 698	 * Can not refresh inode by passing in file_info buf to be returned by
 699	 * ops->open and then calling get_inode_info with returned buf since
 700	 * file might have write behind data that needs to be flushed and server
 701	 * version of file size can be stale. If we knew for sure that inode was
 702	 * not dirty locally we could do this.
 703	 */
 704	rc = server->ops->open(xid, &oparms, &oplock, NULL);
 705	if (rc == -ENOENT && oparms.reconnect == false) {
 706		/* durable handle timeout is expired - open the file again */
 707		rc = server->ops->open(xid, &oparms, &oplock, NULL);
 708		/* indicate that we need to relock the file */
 709		oparms.reconnect = true;
 710	}
 711
 712	if (rc) {
 713		mutex_unlock(&cfile->fh_mutex);
 714		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 715		cifs_dbg(FYI, "oplock: %d\n", oplock);
 716		goto reopen_error_exit;
 717	}
 718
 719reopen_success:
 720	cfile->invalidHandle = false;
 721	mutex_unlock(&cfile->fh_mutex);
 722	cinode = CIFS_I(inode);
 723
 724	if (can_flush) {
 725		rc = filemap_write_and_wait(inode->i_mapping);
 726		mapping_set_error(inode->i_mapping, rc);
 727
 728		if (tcon->unix_ext)
 729			rc = cifs_get_inode_info_unix(&inode, full_path,
 730						      inode->i_sb, xid);
 731		else
 732			rc = cifs_get_inode_info(&inode, full_path, NULL,
 733						 inode->i_sb, xid, NULL);
 734	}
 735	/*
 736	 * Else we are writing out data to server already and could deadlock if
 737	 * we tried to flush data, and since we do not know if we have data that
 738	 * would invalidate the current end of file on the server we can not go
 739	 * to the server to get the new inode info.
 740	 */
 741
 742	/*
 743	 * If the server returned a read oplock and we have mandatory brlocks,
 744	 * set oplock level to None.
 745	 */
 746	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 747		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 748		oplock = 0;
 749	}
 750
 751	server->ops->set_fid(cfile, &cfile->fid, oplock);
 752	if (oparms.reconnect)
 753		cifs_relock_file(cfile);
 754
 755reopen_error_exit:
 756	kfree(full_path);
 757	free_xid(xid);
 758	return rc;
 759}
 760
 761int cifs_close(struct inode *inode, struct file *file)
 762{
 763	if (file->private_data != NULL) {
 764		cifsFileInfo_put(file->private_data);
 765		file->private_data = NULL;
 766	}
 767
 768	/* return code from the ->release op is always ignored */
 769	return 0;
 770}
 771
 772void
 773cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
 774{
 775	struct cifsFileInfo *open_file;
 776	struct list_head *tmp;
 777	struct list_head *tmp1;
 778	struct list_head tmp_list;
 779
 780	if (!tcon->use_persistent || !tcon->need_reopen_files)
 781		return;
 782
 783	tcon->need_reopen_files = false;
 784
 785	cifs_dbg(FYI, "Reopen persistent handles");
 786	INIT_LIST_HEAD(&tmp_list);
 787
 788	/* list all files open on tree connection, reopen resilient handles  */
 789	spin_lock(&tcon->open_file_lock);
 790	list_for_each(tmp, &tcon->openFileList) {
 791		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
 792		if (!open_file->invalidHandle)
 793			continue;
 794		cifsFileInfo_get(open_file);
 795		list_add_tail(&open_file->rlist, &tmp_list);
 796	}
 797	spin_unlock(&tcon->open_file_lock);
 798
 799	list_for_each_safe(tmp, tmp1, &tmp_list) {
 800		open_file = list_entry(tmp, struct cifsFileInfo, rlist);
 801		if (cifs_reopen_file(open_file, false /* do not flush */))
 802			tcon->need_reopen_files = true;
 803		list_del_init(&open_file->rlist);
 804		cifsFileInfo_put(open_file);
 805	}
 806}
 807
 808int cifs_closedir(struct inode *inode, struct file *file)
 809{
 810	int rc = 0;
 811	unsigned int xid;
 812	struct cifsFileInfo *cfile = file->private_data;
 813	struct cifs_tcon *tcon;
 814	struct TCP_Server_Info *server;
 815	char *buf;
 816
 817	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 818
 819	if (cfile == NULL)
 820		return rc;
 821
 822	xid = get_xid();
 823	tcon = tlink_tcon(cfile->tlink);
 824	server = tcon->ses->server;
 825
 826	cifs_dbg(FYI, "Freeing private data in close dir\n");
 827	spin_lock(&cfile->file_info_lock);
 828	if (server->ops->dir_needs_close(cfile)) {
 829		cfile->invalidHandle = true;
 830		spin_unlock(&cfile->file_info_lock);
 831		if (server->ops->close_dir)
 832			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 833		else
 834			rc = -ENOSYS;
 835		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 836		/* not much we can do if it fails anyway, ignore rc */
 837		rc = 0;
 838	} else
 839		spin_unlock(&cfile->file_info_lock);
 840
 841	buf = cfile->srch_inf.ntwrk_buf_start;
 842	if (buf) {
 843		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 844		cfile->srch_inf.ntwrk_buf_start = NULL;
 845		if (cfile->srch_inf.smallBuf)
 846			cifs_small_buf_release(buf);
 847		else
 848			cifs_buf_release(buf);
 849	}
 850
 851	cifs_put_tlink(cfile->tlink);
 852	kfree(file->private_data);
 853	file->private_data = NULL;
 854	/* BB can we lock the filestruct while this is going on? */
 855	free_xid(xid);
 856	return rc;
 857}
 858
 859static struct cifsLockInfo *
 860cifs_lock_init(__u64 offset, __u64 length, __u8 type)
 861{
 862	struct cifsLockInfo *lock =
 863		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
 864	if (!lock)
 865		return lock;
 866	lock->offset = offset;
 867	lock->length = length;
 868	lock->type = type;
 869	lock->pid = current->tgid;
 870	INIT_LIST_HEAD(&lock->blist);
 871	init_waitqueue_head(&lock->block_q);
 872	return lock;
 873}
 874
 875void
 876cifs_del_lock_waiters(struct cifsLockInfo *lock)
 877{
 878	struct cifsLockInfo *li, *tmp;
 879	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
 880		list_del_init(&li->blist);
 881		wake_up(&li->block_q);
 882	}
 883}
 884
 885#define CIFS_LOCK_OP	0
 886#define CIFS_READ_OP	1
 887#define CIFS_WRITE_OP	2
 888
 889/* @rw_check : 0 - no op, 1 - read, 2 - write */
 890static bool
 891cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 892			    __u64 length, __u8 type, struct cifsFileInfo *cfile,
 893			    struct cifsLockInfo **conf_lock, int rw_check)
 894{
 895	struct cifsLockInfo *li;
 896	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
 897	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 898
 899	list_for_each_entry(li, &fdlocks->locks, llist) {
 900		if (offset + length <= li->offset ||
 901		    offset >= li->offset + li->length)
 902			continue;
 903		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
 904		    server->ops->compare_fids(cfile, cur_cfile)) {
 905			/* shared lock prevents write op through the same fid */
 906			if (!(li->type & server->vals->shared_lock_type) ||
 907			    rw_check != CIFS_WRITE_OP)
 908				continue;
 909		}
 910		if ((type & server->vals->shared_lock_type) &&
 911		    ((server->ops->compare_fids(cfile, cur_cfile) &&
 912		     current->tgid == li->pid) || type == li->type))
 913			continue;
 914		if (conf_lock)
 915			*conf_lock = li;
 916		return true;
 917	}
 918	return false;
 919}
 920
 921bool
 922cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 923			__u8 type, struct cifsLockInfo **conf_lock,
 924			int rw_check)
 925{
 926	bool rc = false;
 927	struct cifs_fid_locks *cur;
 928	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 929
 930	list_for_each_entry(cur, &cinode->llist, llist) {
 931		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
 932						 cfile, conf_lock, rw_check);
 933		if (rc)
 934			break;
 935	}
 936
 937	return rc;
 938}
 939
 940/*
 941 * Check if there is another lock that prevents us to set the lock (mandatory
 942 * style). If such a lock exists, update the flock structure with its
 943 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 944 * or leave it the same if we can't. Returns 0 if we don't need to request to
 945 * the server or 1 otherwise.
 946 */
 947static int
 948cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 949	       __u8 type, struct file_lock *flock)
 950{
 951	int rc = 0;
 952	struct cifsLockInfo *conf_lock;
 953	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 954	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 955	bool exist;
 956
 957	down_read(&cinode->lock_sem);
 958
 959	exist = cifs_find_lock_conflict(cfile, offset, length, type,
 960					&conf_lock, CIFS_LOCK_OP);
 961	if (exist) {
 962		flock->fl_start = conf_lock->offset;
 963		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
 964		flock->fl_pid = conf_lock->pid;
 965		if (conf_lock->type & server->vals->shared_lock_type)
 966			flock->fl_type = F_RDLCK;
 967		else
 968			flock->fl_type = F_WRLCK;
 969	} else if (!cinode->can_cache_brlcks)
 970		rc = 1;
 971	else
 972		flock->fl_type = F_UNLCK;
 973
 974	up_read(&cinode->lock_sem);
 975	return rc;
 976}
 977
 978static void
 979cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 980{
 981	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 982	down_write(&cinode->lock_sem);
 983	list_add_tail(&lock->llist, &cfile->llist->locks);
 984	up_write(&cinode->lock_sem);
 985}
 986
 987/*
 988 * Set the byte-range lock (mandatory style). Returns:
 989 * 1) 0, if we set the lock and don't need to request to the server;
 990 * 2) 1, if no locks prevent us but we need to request to the server;
 991 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
 992 */
 993static int
 994cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
 995		 bool wait)
 996{
 997	struct cifsLockInfo *conf_lock;
 998	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 999	bool exist;
1000	int rc = 0;
1001
1002try_again:
1003	exist = false;
1004	down_write(&cinode->lock_sem);
1005
1006	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1007					lock->type, &conf_lock, CIFS_LOCK_OP);
1008	if (!exist && cinode->can_cache_brlcks) {
1009		list_add_tail(&lock->llist, &cfile->llist->locks);
1010		up_write(&cinode->lock_sem);
1011		return rc;
1012	}
1013
1014	if (!exist)
1015		rc = 1;
1016	else if (!wait)
1017		rc = -EACCES;
1018	else {
1019		list_add_tail(&lock->blist, &conf_lock->blist);
1020		up_write(&cinode->lock_sem);
1021		rc = wait_event_interruptible(lock->block_q,
1022					(lock->blist.prev == &lock->blist) &&
1023					(lock->blist.next == &lock->blist));
1024		if (!rc)
1025			goto try_again;
1026		down_write(&cinode->lock_sem);
1027		list_del_init(&lock->blist);
1028	}
1029
1030	up_write(&cinode->lock_sem);
1031	return rc;
1032}
1033
1034/*
1035 * Check if there is another lock that prevents us to set the lock (posix
1036 * style). If such a lock exists, update the flock structure with its
1037 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1038 * or leave it the same if we can't. Returns 0 if we don't need to request to
1039 * the server or 1 otherwise.
1040 */
1041static int
1042cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1043{
1044	int rc = 0;
1045	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1046	unsigned char saved_type = flock->fl_type;
1047
1048	if ((flock->fl_flags & FL_POSIX) == 0)
1049		return 1;
1050
1051	down_read(&cinode->lock_sem);
1052	posix_test_lock(file, flock);
1053
1054	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1055		flock->fl_type = saved_type;
1056		rc = 1;
1057	}
1058
1059	up_read(&cinode->lock_sem);
1060	return rc;
1061}
1062
1063/*
1064 * Set the byte-range lock (posix style). Returns:
1065 * 1) 0, if we set the lock and don't need to request to the server;
1066 * 2) 1, if we need to request to the server;
1067 * 3) <0, if the error occurs while setting the lock.
1068 */
1069static int
1070cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1071{
1072	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1073	int rc = 1;
1074
1075	if ((flock->fl_flags & FL_POSIX) == 0)
1076		return rc;
1077
1078try_again:
1079	down_write(&cinode->lock_sem);
1080	if (!cinode->can_cache_brlcks) {
1081		up_write(&cinode->lock_sem);
1082		return rc;
1083	}
1084
1085	rc = posix_lock_file(file, flock, NULL);
1086	up_write(&cinode->lock_sem);
1087	if (rc == FILE_LOCK_DEFERRED) {
1088		rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1089		if (!rc)
1090			goto try_again;
1091		posix_unblock_lock(flock);
1092	}
1093	return rc;
1094}
1095
1096int
1097cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1098{
1099	unsigned int xid;
1100	int rc = 0, stored_rc;
1101	struct cifsLockInfo *li, *tmp;
1102	struct cifs_tcon *tcon;
1103	unsigned int num, max_num, max_buf;
1104	LOCKING_ANDX_RANGE *buf, *cur;
1105	int types[] = {LOCKING_ANDX_LARGE_FILES,
1106		       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1107	int i;
1108
1109	xid = get_xid();
1110	tcon = tlink_tcon(cfile->tlink);
1111
1112	/*
1113	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1114	 * and check it for zero before using.
1115	 */
1116	max_buf = tcon->ses->server->maxBuf;
1117	if (!max_buf) {
1118		free_xid(xid);
1119		return -EINVAL;
1120	}
1121
1122	max_num = (max_buf - sizeof(struct smb_hdr)) /
1123						sizeof(LOCKING_ANDX_RANGE);
1124	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1125	if (!buf) {
1126		free_xid(xid);
1127		return -ENOMEM;
1128	}
1129
1130	for (i = 0; i < 2; i++) {
1131		cur = buf;
1132		num = 0;
1133		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1134			if (li->type != types[i])
1135				continue;
1136			cur->Pid = cpu_to_le16(li->pid);
1137			cur->LengthLow = cpu_to_le32((u32)li->length);
1138			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1139			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1140			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1141			if (++num == max_num) {
1142				stored_rc = cifs_lockv(xid, tcon,
1143						       cfile->fid.netfid,
1144						       (__u8)li->type, 0, num,
1145						       buf);
1146				if (stored_rc)
1147					rc = stored_rc;
1148				cur = buf;
1149				num = 0;
1150			} else
1151				cur++;
1152		}
1153
1154		if (num) {
1155			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1156					       (__u8)types[i], 0, num, buf);
1157			if (stored_rc)
1158				rc = stored_rc;
1159		}
1160	}
1161
1162	kfree(buf);
1163	free_xid(xid);
1164	return rc;
1165}
1166
1167static __u32
1168hash_lockowner(fl_owner_t owner)
1169{
1170	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1171}
1172
1173struct lock_to_push {
1174	struct list_head llist;
1175	__u64 offset;
1176	__u64 length;
1177	__u32 pid;
1178	__u16 netfid;
1179	__u8 type;
1180};
1181
1182static int
1183cifs_push_posix_locks(struct cifsFileInfo *cfile)
1184{
1185	struct inode *inode = d_inode(cfile->dentry);
1186	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1187	struct file_lock *flock;
1188	struct file_lock_context *flctx = inode->i_flctx;
1189	unsigned int count = 0, i;
1190	int rc = 0, xid, type;
1191	struct list_head locks_to_send, *el;
1192	struct lock_to_push *lck, *tmp;
1193	__u64 length;
1194
1195	xid = get_xid();
1196
1197	if (!flctx)
1198		goto out;
1199
1200	spin_lock(&flctx->flc_lock);
1201	list_for_each(el, &flctx->flc_posix) {
1202		count++;
1203	}
1204	spin_unlock(&flctx->flc_lock);
1205
1206	INIT_LIST_HEAD(&locks_to_send);
1207
1208	/*
1209	 * Allocating count locks is enough because no FL_POSIX locks can be
1210	 * added to the list while we are holding cinode->lock_sem that
1211	 * protects locking operations of this inode.
1212	 */
1213	for (i = 0; i < count; i++) {
1214		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1215		if (!lck) {
1216			rc = -ENOMEM;
1217			goto err_out;
1218		}
1219		list_add_tail(&lck->llist, &locks_to_send);
1220	}
1221
1222	el = locks_to_send.next;
1223	spin_lock(&flctx->flc_lock);
1224	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1225		if (el == &locks_to_send) {
1226			/*
1227			 * The list ended. We don't have enough allocated
1228			 * structures - something is really wrong.
1229			 */
1230			cifs_dbg(VFS, "Can't push all brlocks!\n");
1231			break;
1232		}
1233		length = 1 + flock->fl_end - flock->fl_start;
1234		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1235			type = CIFS_RDLCK;
1236		else
1237			type = CIFS_WRLCK;
1238		lck = list_entry(el, struct lock_to_push, llist);
1239		lck->pid = hash_lockowner(flock->fl_owner);
1240		lck->netfid = cfile->fid.netfid;
1241		lck->length = length;
1242		lck->type = type;
1243		lck->offset = flock->fl_start;
1244	}
1245	spin_unlock(&flctx->flc_lock);
1246
1247	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1248		int stored_rc;
1249
1250		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1251					     lck->offset, lck->length, NULL,
1252					     lck->type, 0);
1253		if (stored_rc)
1254			rc = stored_rc;
1255		list_del(&lck->llist);
1256		kfree(lck);
1257	}
1258
1259out:
1260	free_xid(xid);
1261	return rc;
1262err_out:
1263	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1264		list_del(&lck->llist);
1265		kfree(lck);
1266	}
1267	goto out;
1268}
1269
1270static int
1271cifs_push_locks(struct cifsFileInfo *cfile)
1272{
1273	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1274	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1275	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1276	int rc = 0;
1277
1278	/* we are going to update can_cache_brlcks here - need a write access */
1279	down_write(&cinode->lock_sem);
1280	if (!cinode->can_cache_brlcks) {
1281		up_write(&cinode->lock_sem);
1282		return rc;
1283	}
1284
1285	if (cap_unix(tcon->ses) &&
1286	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1287	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1288		rc = cifs_push_posix_locks(cfile);
1289	else
1290		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1291
1292	cinode->can_cache_brlcks = false;
1293	up_write(&cinode->lock_sem);
1294	return rc;
1295}
1296
1297static void
1298cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1299		bool *wait_flag, struct TCP_Server_Info *server)
1300{
1301	if (flock->fl_flags & FL_POSIX)
1302		cifs_dbg(FYI, "Posix\n");
1303	if (flock->fl_flags & FL_FLOCK)
1304		cifs_dbg(FYI, "Flock\n");
1305	if (flock->fl_flags & FL_SLEEP) {
1306		cifs_dbg(FYI, "Blocking lock\n");
1307		*wait_flag = true;
1308	}
1309	if (flock->fl_flags & FL_ACCESS)
1310		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1311	if (flock->fl_flags & FL_LEASE)
1312		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1313	if (flock->fl_flags &
1314	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1315	       FL_ACCESS | FL_LEASE | FL_CLOSE)))
1316		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1317
1318	*type = server->vals->large_lock_type;
1319	if (flock->fl_type == F_WRLCK) {
1320		cifs_dbg(FYI, "F_WRLCK\n");
1321		*type |= server->vals->exclusive_lock_type;
1322		*lock = 1;
1323	} else if (flock->fl_type == F_UNLCK) {
1324		cifs_dbg(FYI, "F_UNLCK\n");
1325		*type |= server->vals->unlock_lock_type;
1326		*unlock = 1;
1327		/* Check if unlock includes more than one lock range */
1328	} else if (flock->fl_type == F_RDLCK) {
1329		cifs_dbg(FYI, "F_RDLCK\n");
1330		*type |= server->vals->shared_lock_type;
1331		*lock = 1;
1332	} else if (flock->fl_type == F_EXLCK) {
1333		cifs_dbg(FYI, "F_EXLCK\n");
1334		*type |= server->vals->exclusive_lock_type;
1335		*lock = 1;
1336	} else if (flock->fl_type == F_SHLCK) {
1337		cifs_dbg(FYI, "F_SHLCK\n");
1338		*type |= server->vals->shared_lock_type;
1339		*lock = 1;
1340	} else
1341		cifs_dbg(FYI, "Unknown type of lock\n");
1342}
1343
1344static int
1345cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1346	   bool wait_flag, bool posix_lck, unsigned int xid)
1347{
1348	int rc = 0;
1349	__u64 length = 1 + flock->fl_end - flock->fl_start;
1350	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1351	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1352	struct TCP_Server_Info *server = tcon->ses->server;
1353	__u16 netfid = cfile->fid.netfid;
1354
1355	if (posix_lck) {
1356		int posix_lock_type;
1357
1358		rc = cifs_posix_lock_test(file, flock);
1359		if (!rc)
1360			return rc;
1361
1362		if (type & server->vals->shared_lock_type)
1363			posix_lock_type = CIFS_RDLCK;
1364		else
1365			posix_lock_type = CIFS_WRLCK;
1366		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1367				      hash_lockowner(flock->fl_owner),
1368				      flock->fl_start, length, flock,
1369				      posix_lock_type, wait_flag);
1370		return rc;
1371	}
1372
1373	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1374	if (!rc)
1375		return rc;
1376
1377	/* BB we could chain these into one lock request BB */
1378	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1379				    1, 0, false);
1380	if (rc == 0) {
1381		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1382					    type, 0, 1, false);
1383		flock->fl_type = F_UNLCK;
1384		if (rc != 0)
1385			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1386				 rc);
1387		return 0;
1388	}
1389
1390	if (type & server->vals->shared_lock_type) {
1391		flock->fl_type = F_WRLCK;
1392		return 0;
1393	}
1394
1395	type &= ~server->vals->exclusive_lock_type;
1396
1397	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1398				    type | server->vals->shared_lock_type,
1399				    1, 0, false);
1400	if (rc == 0) {
1401		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1402			type | server->vals->shared_lock_type, 0, 1, false);
1403		flock->fl_type = F_RDLCK;
1404		if (rc != 0)
1405			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1406				 rc);
1407	} else
1408		flock->fl_type = F_WRLCK;
1409
1410	return 0;
1411}
1412
1413void
1414cifs_move_llist(struct list_head *source, struct list_head *dest)
1415{
1416	struct list_head *li, *tmp;
1417	list_for_each_safe(li, tmp, source)
1418		list_move(li, dest);
1419}
1420
1421void
1422cifs_free_llist(struct list_head *llist)
1423{
1424	struct cifsLockInfo *li, *tmp;
1425	list_for_each_entry_safe(li, tmp, llist, llist) {
1426		cifs_del_lock_waiters(li);
1427		list_del(&li->llist);
1428		kfree(li);
1429	}
1430}
1431
1432int
1433cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1434		  unsigned int xid)
1435{
1436	int rc = 0, stored_rc;
1437	int types[] = {LOCKING_ANDX_LARGE_FILES,
1438		       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1439	unsigned int i;
1440	unsigned int max_num, num, max_buf;
1441	LOCKING_ANDX_RANGE *buf, *cur;
1442	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1443	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1444	struct cifsLockInfo *li, *tmp;
1445	__u64 length = 1 + flock->fl_end - flock->fl_start;
1446	struct list_head tmp_llist;
1447
1448	INIT_LIST_HEAD(&tmp_llist);
1449
1450	/*
1451	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1452	 * and check it for zero before using.
1453	 */
1454	max_buf = tcon->ses->server->maxBuf;
1455	if (!max_buf)
1456		return -EINVAL;
1457
1458	max_num = (max_buf - sizeof(struct smb_hdr)) /
1459						sizeof(LOCKING_ANDX_RANGE);
1460	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1461	if (!buf)
1462		return -ENOMEM;
1463
1464	down_write(&cinode->lock_sem);
1465	for (i = 0; i < 2; i++) {
1466		cur = buf;
1467		num = 0;
1468		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1469			if (flock->fl_start > li->offset ||
1470			    (flock->fl_start + length) <
1471			    (li->offset + li->length))
1472				continue;
1473			if (current->tgid != li->pid)
1474				continue;
1475			if (types[i] != li->type)
1476				continue;
1477			if (cinode->can_cache_brlcks) {
1478				/*
1479				 * We can cache brlock requests - simply remove
1480				 * a lock from the file's list.
1481				 */
1482				list_del(&li->llist);
1483				cifs_del_lock_waiters(li);
1484				kfree(li);
1485				continue;
1486			}
1487			cur->Pid = cpu_to_le16(li->pid);
1488			cur->LengthLow = cpu_to_le32((u32)li->length);
1489			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1490			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1491			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1492			/*
1493			 * We need to save a lock here to let us add it again to
1494			 * the file's list if the unlock range request fails on
1495			 * the server.
1496			 */
1497			list_move(&li->llist, &tmp_llist);
1498			if (++num == max_num) {
1499				stored_rc = cifs_lockv(xid, tcon,
1500						       cfile->fid.netfid,
1501						       li->type, num, 0, buf);
1502				if (stored_rc) {
1503					/*
1504					 * We failed on the unlock range
1505					 * request - add all locks from the tmp
1506					 * list to the head of the file's list.
1507					 */
1508					cifs_move_llist(&tmp_llist,
1509							&cfile->llist->locks);
1510					rc = stored_rc;
1511				} else
1512					/*
1513					 * The unlock range request succeed -
1514					 * free the tmp list.
1515					 */
1516					cifs_free_llist(&tmp_llist);
1517				cur = buf;
1518				num = 0;
1519			} else
1520				cur++;
1521		}
1522		if (num) {
1523			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1524					       types[i], num, 0, buf);
1525			if (stored_rc) {
1526				cifs_move_llist(&tmp_llist,
1527						&cfile->llist->locks);
1528				rc = stored_rc;
1529			} else
1530				cifs_free_llist(&tmp_llist);
1531		}
1532	}
1533
1534	up_write(&cinode->lock_sem);
1535	kfree(buf);
1536	return rc;
1537}
1538
1539static int
1540cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1541	   bool wait_flag, bool posix_lck, int lock, int unlock,
1542	   unsigned int xid)
1543{
1544	int rc = 0;
1545	__u64 length = 1 + flock->fl_end - flock->fl_start;
1546	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1547	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1548	struct TCP_Server_Info *server = tcon->ses->server;
1549	struct inode *inode = d_inode(cfile->dentry);
1550
1551	if (posix_lck) {
1552		int posix_lock_type;
1553
1554		rc = cifs_posix_lock_set(file, flock);
1555		if (!rc || rc < 0)
1556			return rc;
1557
1558		if (type & server->vals->shared_lock_type)
1559			posix_lock_type = CIFS_RDLCK;
1560		else
1561			posix_lock_type = CIFS_WRLCK;
1562
1563		if (unlock == 1)
1564			posix_lock_type = CIFS_UNLCK;
1565
1566		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1567				      hash_lockowner(flock->fl_owner),
1568				      flock->fl_start, length,
1569				      NULL, posix_lock_type, wait_flag);
1570		goto out;
1571	}
1572
1573	if (lock) {
1574		struct cifsLockInfo *lock;
1575
1576		lock = cifs_lock_init(flock->fl_start, length, type);
1577		if (!lock)
1578			return -ENOMEM;
1579
1580		rc = cifs_lock_add_if(cfile, lock, wait_flag);
1581		if (rc < 0) {
1582			kfree(lock);
1583			return rc;
1584		}
1585		if (!rc)
1586			goto out;
1587
1588		/*
1589		 * Windows 7 server can delay breaking lease from read to None
1590		 * if we set a byte-range lock on a file - break it explicitly
1591		 * before sending the lock to the server to be sure the next
1592		 * read won't conflict with non-overlapted locks due to
1593		 * pagereading.
1594		 */
1595		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1596					CIFS_CACHE_READ(CIFS_I(inode))) {
1597			cifs_zap_mapping(inode);
1598			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1599				 inode);
1600			CIFS_I(inode)->oplock = 0;
1601		}
1602
1603		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1604					    type, 1, 0, wait_flag);
1605		if (rc) {
1606			kfree(lock);
1607			return rc;
1608		}
1609
1610		cifs_lock_add(cfile, lock);
1611	} else if (unlock)
1612		rc = server->ops->mand_unlock_range(cfile, flock, xid);
1613
1614out:
1615	if (flock->fl_flags & FL_POSIX && !rc)
1616		rc = locks_lock_file_wait(file, flock);
1617	return rc;
1618}
1619
1620int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1621{
1622	int rc, xid;
1623	int lock = 0, unlock = 0;
1624	bool wait_flag = false;
1625	bool posix_lck = false;
1626	struct cifs_sb_info *cifs_sb;
1627	struct cifs_tcon *tcon;
1628	struct cifsInodeInfo *cinode;
1629	struct cifsFileInfo *cfile;
1630	__u16 netfid;
1631	__u32 type;
1632
1633	rc = -EACCES;
1634	xid = get_xid();
1635
1636	cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1637		 cmd, flock->fl_flags, flock->fl_type,
1638		 flock->fl_start, flock->fl_end);
1639
1640	cfile = (struct cifsFileInfo *)file->private_data;
1641	tcon = tlink_tcon(cfile->tlink);
1642
1643	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1644			tcon->ses->server);
1645
1646	cifs_sb = CIFS_FILE_SB(file);
1647	netfid = cfile->fid.netfid;
1648	cinode = CIFS_I(file_inode(file));
1649
1650	if (cap_unix(tcon->ses) &&
1651	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1652	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1653		posix_lck = true;
1654	/*
1655	 * BB add code here to normalize offset and length to account for
1656	 * negative length which we can not accept over the wire.
1657	 */
1658	if (IS_GETLK(cmd)) {
1659		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1660		free_xid(xid);
1661		return rc;
1662	}
1663
1664	if (!lock && !unlock) {
1665		/*
1666		 * if no lock or unlock then nothing to do since we do not
1667		 * know what it is
1668		 */
1669		free_xid(xid);
1670		return -EOPNOTSUPP;
1671	}
1672
1673	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1674			xid);
1675	free_xid(xid);
1676	return rc;
1677}
1678
1679/*
1680 * update the file size (if needed) after a write. Should be called with
1681 * the inode->i_lock held
1682 */
1683void
1684cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1685		      unsigned int bytes_written)
1686{
1687	loff_t end_of_write = offset + bytes_written;
1688
1689	if (end_of_write > cifsi->server_eof)
1690		cifsi->server_eof = end_of_write;
1691}
1692
1693static ssize_t
1694cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1695	   size_t write_size, loff_t *offset)
1696{
1697	int rc = 0;
1698	unsigned int bytes_written = 0;
1699	unsigned int total_written;
1700	struct cifs_sb_info *cifs_sb;
1701	struct cifs_tcon *tcon;
1702	struct TCP_Server_Info *server;
1703	unsigned int xid;
1704	struct dentry *dentry = open_file->dentry;
1705	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1706	struct cifs_io_parms io_parms;
1707
1708	cifs_sb = CIFS_SB(dentry->d_sb);
1709
1710	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1711		 write_size, *offset, dentry);
1712
1713	tcon = tlink_tcon(open_file->tlink);
1714	server = tcon->ses->server;
1715
1716	if (!server->ops->sync_write)
1717		return -ENOSYS;
1718
1719	xid = get_xid();
1720
1721	for (total_written = 0; write_size > total_written;
1722	     total_written += bytes_written) {
1723		rc = -EAGAIN;
1724		while (rc == -EAGAIN) {
1725			struct kvec iov[2];
1726			unsigned int len;
1727
1728			if (open_file->invalidHandle) {
1729				/* we could deadlock if we called
1730				   filemap_fdatawait from here so tell
1731				   reopen_file not to flush data to
1732				   server now */
1733				rc = cifs_reopen_file(open_file, false);
1734				if (rc != 0)
1735					break;
1736			}
1737
1738			len = min(server->ops->wp_retry_size(d_inode(dentry)),
1739				  (unsigned int)write_size - total_written);
1740			/* iov[0] is reserved for smb header */
1741			iov[1].iov_base = (char *)write_data + total_written;
1742			iov[1].iov_len = len;
1743			io_parms.pid = pid;
1744			io_parms.tcon = tcon;
1745			io_parms.offset = *offset;
1746			io_parms.length = len;
1747			rc = server->ops->sync_write(xid, &open_file->fid,
1748					&io_parms, &bytes_written, iov, 1);
1749		}
1750		if (rc || (bytes_written == 0)) {
1751			if (total_written)
1752				break;
1753			else {
1754				free_xid(xid);
1755				return rc;
1756			}
1757		} else {
1758			spin_lock(&d_inode(dentry)->i_lock);
1759			cifs_update_eof(cifsi, *offset, bytes_written);
1760			spin_unlock(&d_inode(dentry)->i_lock);
1761			*offset += bytes_written;
1762		}
1763	}
1764
1765	cifs_stats_bytes_written(tcon, total_written);
1766
1767	if (total_written > 0) {
1768		spin_lock(&d_inode(dentry)->i_lock);
1769		if (*offset > d_inode(dentry)->i_size)
1770			i_size_write(d_inode(dentry), *offset);
1771		spin_unlock(&d_inode(dentry)->i_lock);
1772	}
1773	mark_inode_dirty_sync(d_inode(dentry));
1774	free_xid(xid);
1775	return total_written;
1776}
1777
1778struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1779					bool fsuid_only)
1780{
1781	struct cifsFileInfo *open_file = NULL;
1782	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1783	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1784
1785	/* only filter by fsuid on multiuser mounts */
1786	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1787		fsuid_only = false;
1788
1789	spin_lock(&tcon->open_file_lock);
1790	/* we could simply get the first_list_entry since write-only entries
1791	   are always at the end of the list but since the first entry might
1792	   have a close pending, we go through the whole list */
1793	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1794		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1795			continue;
1796		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1797			if (!open_file->invalidHandle) {
1798				/* found a good file */
1799				/* lock it so it will not be closed on us */
1800				cifsFileInfo_get(open_file);
1801				spin_unlock(&tcon->open_file_lock);
1802				return open_file;
1803			} /* else might as well continue, and look for
1804			     another, or simply have the caller reopen it
1805			     again rather than trying to fix this handle */
1806		} else /* write only file */
1807			break; /* write only files are last so must be done */
1808	}
1809	spin_unlock(&tcon->open_file_lock);
1810	return NULL;
1811}
1812
1813struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1814					bool fsuid_only)
1815{
1816	struct cifsFileInfo *open_file, *inv_file = NULL;
1817	struct cifs_sb_info *cifs_sb;
1818	struct cifs_tcon *tcon;
1819	bool any_available = false;
1820	int rc;
1821	unsigned int refind = 0;
1822
1823	/* Having a null inode here (because mapping->host was set to zero by
1824	the VFS or MM) should not happen but we had reports of on oops (due to
1825	it being zero) during stress testcases so we need to check for it */
1826
1827	if (cifs_inode == NULL) {
1828		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1829		dump_stack();
1830		return NULL;
1831	}
1832
1833	cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1834	tcon = cifs_sb_master_tcon(cifs_sb);
1835
1836	/* only filter by fsuid on multiuser mounts */
1837	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1838		fsuid_only = false;
1839
1840	spin_lock(&tcon->open_file_lock);
1841refind_writable:
1842	if (refind > MAX_REOPEN_ATT) {
1843		spin_unlock(&tcon->open_file_lock);
1844		return NULL;
1845	}
1846	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1847		if (!any_available && open_file->pid != current->tgid)
1848			continue;
1849		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1850			continue;
1851		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1852			if (!open_file->invalidHandle) {
1853				/* found a good writable file */
1854				cifsFileInfo_get(open_file);
1855				spin_unlock(&tcon->open_file_lock);
1856				return open_file;
1857			} else {
1858				if (!inv_file)
1859					inv_file = open_file;
1860			}
1861		}
1862	}
1863	/* couldn't find useable FH with same pid, try any available */
1864	if (!any_available) {
1865		any_available = true;
1866		goto refind_writable;
1867	}
1868
1869	if (inv_file) {
1870		any_available = false;
1871		cifsFileInfo_get(inv_file);
1872	}
1873
1874	spin_unlock(&tcon->open_file_lock);
1875
1876	if (inv_file) {
1877		rc = cifs_reopen_file(inv_file, false);
1878		if (!rc)
1879			return inv_file;
1880		else {
1881			spin_lock(&tcon->open_file_lock);
1882			list_move_tail(&inv_file->flist,
1883					&cifs_inode->openFileList);
1884			spin_unlock(&tcon->open_file_lock);
1885			cifsFileInfo_put(inv_file);
 
1886			++refind;
1887			inv_file = NULL;
1888			spin_lock(&tcon->open_file_lock);
1889			goto refind_writable;
1890		}
1891	}
1892
1893	return NULL;
1894}
1895
1896static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1897{
1898	struct address_space *mapping = page->mapping;
1899	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1900	char *write_data;
1901	int rc = -EFAULT;
1902	int bytes_written = 0;
1903	struct inode *inode;
1904	struct cifsFileInfo *open_file;
1905
1906	if (!mapping || !mapping->host)
1907		return -EFAULT;
1908
1909	inode = page->mapping->host;
1910
1911	offset += (loff_t)from;
1912	write_data = kmap(page);
1913	write_data += from;
1914
1915	if ((to > PAGE_SIZE) || (from > to)) {
1916		kunmap(page);
1917		return -EIO;
1918	}
1919
1920	/* racing with truncate? */
1921	if (offset > mapping->host->i_size) {
1922		kunmap(page);
1923		return 0; /* don't care */
1924	}
1925
1926	/* check to make sure that we are not extending the file */
1927	if (mapping->host->i_size - offset < (loff_t)to)
1928		to = (unsigned)(mapping->host->i_size - offset);
1929
1930	open_file = find_writable_file(CIFS_I(mapping->host), false);
1931	if (open_file) {
1932		bytes_written = cifs_write(open_file, open_file->pid,
1933					   write_data, to - from, &offset);
1934		cifsFileInfo_put(open_file);
1935		/* Does mm or vfs already set times? */
1936		inode->i_atime = inode->i_mtime = current_time(inode);
1937		if ((bytes_written > 0) && (offset))
1938			rc = 0;
1939		else if (bytes_written < 0)
1940			rc = bytes_written;
1941	} else {
1942		cifs_dbg(FYI, "No writeable filehandles for inode\n");
1943		rc = -EIO;
1944	}
1945
1946	kunmap(page);
1947	return rc;
1948}
1949
1950static struct cifs_writedata *
1951wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1952			  pgoff_t end, pgoff_t *index,
1953			  unsigned int *found_pages)
1954{
1955	unsigned int nr_pages;
1956	struct page **pages;
1957	struct cifs_writedata *wdata;
1958
1959	wdata = cifs_writedata_alloc((unsigned int)tofind,
1960				     cifs_writev_complete);
1961	if (!wdata)
1962		return NULL;
1963
1964	/*
1965	 * find_get_pages_tag seems to return a max of 256 on each
1966	 * iteration, so we must call it several times in order to
1967	 * fill the array or the wsize is effectively limited to
1968	 * 256 * PAGE_SIZE.
1969	 */
1970	*found_pages = 0;
1971	pages = wdata->pages;
1972	do {
1973		nr_pages = find_get_pages_tag(mapping, index,
1974					      PAGECACHE_TAG_DIRTY, tofind,
1975					      pages);
1976		*found_pages += nr_pages;
1977		tofind -= nr_pages;
1978		pages += nr_pages;
1979	} while (nr_pages && tofind && *index <= end);
1980
1981	return wdata;
1982}
1983
1984static unsigned int
1985wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1986		    struct address_space *mapping,
1987		    struct writeback_control *wbc,
1988		    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1989{
1990	unsigned int nr_pages = 0, i;
1991	struct page *page;
1992
1993	for (i = 0; i < found_pages; i++) {
1994		page = wdata->pages[i];
1995		/*
1996		 * At this point we hold neither mapping->tree_lock nor
1997		 * lock on the page itself: the page may be truncated or
1998		 * invalidated (changing page->mapping to NULL), or even
1999		 * swizzled back from swapper_space to tmpfs file
2000		 * mapping
2001		 */
2002
2003		if (nr_pages == 0)
2004			lock_page(page);
2005		else if (!trylock_page(page))
2006			break;
2007
2008		if (unlikely(page->mapping != mapping)) {
2009			unlock_page(page);
2010			break;
2011		}
2012
2013		if (!wbc->range_cyclic && page->index > end) {
2014			*done = true;
2015			unlock_page(page);
2016			break;
2017		}
2018
2019		if (*next && (page->index != *next)) {
2020			/* Not next consecutive page */
2021			unlock_page(page);
2022			break;
2023		}
2024
2025		if (wbc->sync_mode != WB_SYNC_NONE)
2026			wait_on_page_writeback(page);
2027
2028		if (PageWriteback(page) ||
2029				!clear_page_dirty_for_io(page)) {
2030			unlock_page(page);
2031			break;
2032		}
2033
2034		/*
2035		 * This actually clears the dirty bit in the radix tree.
2036		 * See cifs_writepage() for more commentary.
2037		 */
2038		set_page_writeback(page);
2039		if (page_offset(page) >= i_size_read(mapping->host)) {
2040			*done = true;
2041			unlock_page(page);
2042			end_page_writeback(page);
2043			break;
2044		}
2045
2046		wdata->pages[i] = page;
2047		*next = page->index + 1;
2048		++nr_pages;
2049	}
2050
2051	/* reset index to refind any pages skipped */
2052	if (nr_pages == 0)
2053		*index = wdata->pages[0]->index + 1;
2054
2055	/* put any pages we aren't going to use */
2056	for (i = nr_pages; i < found_pages; i++) {
2057		put_page(wdata->pages[i]);
2058		wdata->pages[i] = NULL;
2059	}
2060
2061	return nr_pages;
2062}
2063
2064static int
2065wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2066		 struct address_space *mapping, struct writeback_control *wbc)
2067{
2068	int rc = 0;
2069	struct TCP_Server_Info *server;
2070	unsigned int i;
2071
2072	wdata->sync_mode = wbc->sync_mode;
2073	wdata->nr_pages = nr_pages;
2074	wdata->offset = page_offset(wdata->pages[0]);
2075	wdata->pagesz = PAGE_SIZE;
2076	wdata->tailsz = min(i_size_read(mapping->host) -
2077			page_offset(wdata->pages[nr_pages - 1]),
2078			(loff_t)PAGE_SIZE);
2079	wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2080
2081	if (wdata->cfile != NULL)
2082		cifsFileInfo_put(wdata->cfile);
2083	wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2084	if (!wdata->cfile) {
2085		cifs_dbg(VFS, "No writable handles for inode\n");
2086		rc = -EBADF;
2087	} else {
2088		wdata->pid = wdata->cfile->pid;
2089		server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2090		rc = server->ops->async_writev(wdata, cifs_writedata_release);
2091	}
2092
2093	for (i = 0; i < nr_pages; ++i)
2094		unlock_page(wdata->pages[i]);
2095
2096	return rc;
2097}
2098
2099static int cifs_writepages(struct address_space *mapping,
2100			   struct writeback_control *wbc)
2101{
2102	struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2103	struct TCP_Server_Info *server;
2104	bool done = false, scanned = false, range_whole = false;
2105	pgoff_t end, index;
2106	struct cifs_writedata *wdata;
2107	int rc = 0;
2108
2109	/*
2110	 * If wsize is smaller than the page cache size, default to writing
2111	 * one page at a time via cifs_writepage
2112	 */
2113	if (cifs_sb->wsize < PAGE_SIZE)
2114		return generic_writepages(mapping, wbc);
2115
2116	if (wbc->range_cyclic) {
2117		index = mapping->writeback_index; /* Start from prev offset */
2118		end = -1;
2119	} else {
2120		index = wbc->range_start >> PAGE_SHIFT;
2121		end = wbc->range_end >> PAGE_SHIFT;
2122		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2123			range_whole = true;
2124		scanned = true;
2125	}
2126	server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2127retry:
2128	while (!done && index <= end) {
2129		unsigned int i, nr_pages, found_pages, wsize, credits;
2130		pgoff_t next = 0, tofind, saved_index = index;
2131
2132		rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2133						   &wsize, &credits);
2134		if (rc)
2135			break;
2136
2137		tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2138
2139		wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2140						  &found_pages);
2141		if (!wdata) {
2142			rc = -ENOMEM;
2143			add_credits_and_wake_if(server, credits, 0);
2144			break;
2145		}
2146
2147		if (found_pages == 0) {
2148			kref_put(&wdata->refcount, cifs_writedata_release);
2149			add_credits_and_wake_if(server, credits, 0);
2150			break;
2151		}
2152
2153		nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2154					       end, &index, &next, &done);
2155
2156		/* nothing to write? */
2157		if (nr_pages == 0) {
2158			kref_put(&wdata->refcount, cifs_writedata_release);
2159			add_credits_and_wake_if(server, credits, 0);
2160			continue;
2161		}
2162
2163		wdata->credits = credits;
2164
2165		rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2166
2167		/* send failure -- clean up the mess */
2168		if (rc != 0) {
2169			add_credits_and_wake_if(server, wdata->credits, 0);
2170			for (i = 0; i < nr_pages; ++i) {
2171				if (rc == -EAGAIN)
2172					redirty_page_for_writepage(wbc,
2173							   wdata->pages[i]);
2174				else
2175					SetPageError(wdata->pages[i]);
2176				end_page_writeback(wdata->pages[i]);
2177				put_page(wdata->pages[i]);
2178			}
2179			if (rc != -EAGAIN)
2180				mapping_set_error(mapping, rc);
2181		}
2182		kref_put(&wdata->refcount, cifs_writedata_release);
2183
2184		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2185			index = saved_index;
2186			continue;
2187		}
2188
2189		wbc->nr_to_write -= nr_pages;
2190		if (wbc->nr_to_write <= 0)
2191			done = true;
2192
2193		index = next;
2194	}
2195
2196	if (!scanned && !done) {
2197		/*
2198		 * We hit the last page and there is more work to be done: wrap
2199		 * back to the start of the file
2200		 */
2201		scanned = true;
2202		index = 0;
2203		goto retry;
2204	}
2205
2206	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2207		mapping->writeback_index = index;
2208
2209	return rc;
2210}
2211
2212static int
2213cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2214{
2215	int rc;
2216	unsigned int xid;
2217
2218	xid = get_xid();
2219/* BB add check for wbc flags */
2220	get_page(page);
2221	if (!PageUptodate(page))
2222		cifs_dbg(FYI, "ppw - page not up to date\n");
2223
2224	/*
2225	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2226	 *
2227	 * A writepage() implementation always needs to do either this,
2228	 * or re-dirty the page with "redirty_page_for_writepage()" in
2229	 * the case of a failure.
2230	 *
2231	 * Just unlocking the page will cause the radix tree tag-bits
2232	 * to fail to update with the state of the page correctly.
2233	 */
2234	set_page_writeback(page);
2235retry_write:
2236	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2237	if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2238		goto retry_write;
2239	else if (rc == -EAGAIN)
2240		redirty_page_for_writepage(wbc, page);
2241	else if (rc != 0)
2242		SetPageError(page);
2243	else
2244		SetPageUptodate(page);
2245	end_page_writeback(page);
2246	put_page(page);
2247	free_xid(xid);
2248	return rc;
2249}
2250
2251static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2252{
2253	int rc = cifs_writepage_locked(page, wbc);
2254	unlock_page(page);
2255	return rc;
2256}
2257
2258static int cifs_write_end(struct file *file, struct address_space *mapping,
2259			loff_t pos, unsigned len, unsigned copied,
2260			struct page *page, void *fsdata)
2261{
2262	int rc;
2263	struct inode *inode = mapping->host;
2264	struct cifsFileInfo *cfile = file->private_data;
2265	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2266	__u32 pid;
2267
2268	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2269		pid = cfile->pid;
2270	else
2271		pid = current->tgid;
2272
2273	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2274		 page, pos, copied);
2275
2276	if (PageChecked(page)) {
2277		if (copied == len)
2278			SetPageUptodate(page);
2279		ClearPageChecked(page);
2280	} else if (!PageUptodate(page) && copied == PAGE_SIZE)
2281		SetPageUptodate(page);
2282
2283	if (!PageUptodate(page)) {
2284		char *page_data;
2285		unsigned offset = pos & (PAGE_SIZE - 1);
2286		unsigned int xid;
2287
2288		xid = get_xid();
2289		/* this is probably better than directly calling
2290		   partialpage_write since in this function the file handle is
2291		   known which we might as well	leverage */
2292		/* BB check if anything else missing out of ppw
2293		   such as updating last write time */
2294		page_data = kmap(page);
2295		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2296		/* if (rc < 0) should we set writebehind rc? */
2297		kunmap(page);
2298
2299		free_xid(xid);
2300	} else {
2301		rc = copied;
2302		pos += copied;
2303		set_page_dirty(page);
2304	}
2305
2306	if (rc > 0) {
2307		spin_lock(&inode->i_lock);
2308		if (pos > inode->i_size)
2309			i_size_write(inode, pos);
2310		spin_unlock(&inode->i_lock);
2311	}
2312
2313	unlock_page(page);
2314	put_page(page);
2315
2316	return rc;
2317}
2318
2319int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2320		      int datasync)
2321{
2322	unsigned int xid;
2323	int rc = 0;
2324	struct cifs_tcon *tcon;
2325	struct TCP_Server_Info *server;
2326	struct cifsFileInfo *smbfile = file->private_data;
2327	struct inode *inode = file_inode(file);
2328	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2329
2330	rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2331	if (rc)
2332		return rc;
2333	inode_lock(inode);
2334
2335	xid = get_xid();
2336
2337	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2338		 file, datasync);
2339
2340	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2341		rc = cifs_zap_mapping(inode);
2342		if (rc) {
2343			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2344			rc = 0; /* don't care about it in fsync */
2345		}
2346	}
2347
2348	tcon = tlink_tcon(smbfile->tlink);
2349	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2350		server = tcon->ses->server;
2351		if (server->ops->flush)
2352			rc = server->ops->flush(xid, tcon, &smbfile->fid);
2353		else
2354			rc = -ENOSYS;
2355	}
2356
2357	free_xid(xid);
2358	inode_unlock(inode);
2359	return rc;
2360}
2361
2362int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2363{
2364	unsigned int xid;
2365	int rc = 0;
2366	struct cifs_tcon *tcon;
2367	struct TCP_Server_Info *server;
2368	struct cifsFileInfo *smbfile = file->private_data;
2369	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2370	struct inode *inode = file->f_mapping->host;
2371
2372	rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2373	if (rc)
2374		return rc;
2375	inode_lock(inode);
2376
2377	xid = get_xid();
2378
2379	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2380		 file, datasync);
2381
2382	tcon = tlink_tcon(smbfile->tlink);
2383	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2384		server = tcon->ses->server;
2385		if (server->ops->flush)
2386			rc = server->ops->flush(xid, tcon, &smbfile->fid);
2387		else
2388			rc = -ENOSYS;
2389	}
2390
2391	free_xid(xid);
2392	inode_unlock(inode);
2393	return rc;
2394}
2395
2396/*
2397 * As file closes, flush all cached write data for this inode checking
2398 * for write behind errors.
2399 */
2400int cifs_flush(struct file *file, fl_owner_t id)
2401{
2402	struct inode *inode = file_inode(file);
2403	int rc = 0;
2404
2405	if (file->f_mode & FMODE_WRITE)
2406		rc = filemap_write_and_wait(inode->i_mapping);
2407
2408	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2409
2410	return rc;
2411}
2412
2413static int
2414cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2415{
2416	int rc = 0;
2417	unsigned long i;
2418
2419	for (i = 0; i < num_pages; i++) {
2420		pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2421		if (!pages[i]) {
2422			/*
2423			 * save number of pages we have already allocated and
2424			 * return with ENOMEM error
2425			 */
2426			num_pages = i;
2427			rc = -ENOMEM;
2428			break;
2429		}
2430	}
2431
2432	if (rc) {
2433		for (i = 0; i < num_pages; i++)
2434			put_page(pages[i]);
2435	}
2436	return rc;
2437}
2438
2439static inline
2440size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2441{
2442	size_t num_pages;
2443	size_t clen;
2444
2445	clen = min_t(const size_t, len, wsize);
2446	num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2447
2448	if (cur_len)
2449		*cur_len = clen;
2450
2451	return num_pages;
2452}
2453
2454static void
2455cifs_uncached_writedata_release(struct kref *refcount)
2456{
2457	int i;
2458	struct cifs_writedata *wdata = container_of(refcount,
2459					struct cifs_writedata, refcount);
2460
2461	for (i = 0; i < wdata->nr_pages; i++)
2462		put_page(wdata->pages[i]);
2463	cifs_writedata_release(refcount);
2464}
2465
2466static void
2467cifs_uncached_writev_complete(struct work_struct *work)
2468{
2469	struct cifs_writedata *wdata = container_of(work,
2470					struct cifs_writedata, work);
2471	struct inode *inode = d_inode(wdata->cfile->dentry);
2472	struct cifsInodeInfo *cifsi = CIFS_I(inode);
2473
2474	spin_lock(&inode->i_lock);
2475	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2476	if (cifsi->server_eof > inode->i_size)
2477		i_size_write(inode, cifsi->server_eof);
2478	spin_unlock(&inode->i_lock);
2479
2480	complete(&wdata->done);
2481
2482	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2483}
2484
2485static int
2486wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2487		      size_t *len, unsigned long *num_pages)
2488{
2489	size_t save_len, copied, bytes, cur_len = *len;
2490	unsigned long i, nr_pages = *num_pages;
2491
2492	save_len = cur_len;
2493	for (i = 0; i < nr_pages; i++) {
2494		bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2495		copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2496		cur_len -= copied;
2497		/*
2498		 * If we didn't copy as much as we expected, then that
2499		 * may mean we trod into an unmapped area. Stop copying
2500		 * at that point. On the next pass through the big
2501		 * loop, we'll likely end up getting a zero-length
2502		 * write and bailing out of it.
2503		 */
2504		if (copied < bytes)
2505			break;
2506	}
2507	cur_len = save_len - cur_len;
2508	*len = cur_len;
2509
2510	/*
2511	 * If we have no data to send, then that probably means that
2512	 * the copy above failed altogether. That's most likely because
2513	 * the address in the iovec was bogus. Return -EFAULT and let
2514	 * the caller free anything we allocated and bail out.
2515	 */
2516	if (!cur_len)
2517		return -EFAULT;
2518
2519	/*
2520	 * i + 1 now represents the number of pages we actually used in
2521	 * the copy phase above.
2522	 */
2523	*num_pages = i + 1;
2524	return 0;
2525}
2526
2527static int
2528cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2529		     struct cifsFileInfo *open_file,
2530		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2531{
2532	int rc = 0;
2533	size_t cur_len;
2534	unsigned long nr_pages, num_pages, i;
2535	struct cifs_writedata *wdata;
2536	struct iov_iter saved_from = *from;
2537	loff_t saved_offset = offset;
2538	pid_t pid;
2539	struct TCP_Server_Info *server;
2540
2541	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2542		pid = open_file->pid;
2543	else
2544		pid = current->tgid;
2545
2546	server = tlink_tcon(open_file->tlink)->ses->server;
 
2547
2548	do {
2549		unsigned int wsize, credits;
2550
2551		rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2552						   &wsize, &credits);
2553		if (rc)
2554			break;
2555
2556		nr_pages = get_numpages(wsize, len, &cur_len);
2557		wdata = cifs_writedata_alloc(nr_pages,
2558					     cifs_uncached_writev_complete);
2559		if (!wdata) {
2560			rc = -ENOMEM;
2561			add_credits_and_wake_if(server, credits, 0);
2562			break;
2563		}
2564
2565		rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2566		if (rc) {
2567			kfree(wdata);
2568			add_credits_and_wake_if(server, credits, 0);
2569			break;
2570		}
2571
2572		num_pages = nr_pages;
2573		rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2574		if (rc) {
2575			for (i = 0; i < nr_pages; i++)
2576				put_page(wdata->pages[i]);
2577			kfree(wdata);
2578			add_credits_and_wake_if(server, credits, 0);
2579			break;
2580		}
2581
2582		/*
2583		 * Bring nr_pages down to the number of pages we actually used,
2584		 * and free any pages that we didn't use.
2585		 */
2586		for ( ; nr_pages > num_pages; nr_pages--)
2587			put_page(wdata->pages[nr_pages - 1]);
2588
2589		wdata->sync_mode = WB_SYNC_ALL;
2590		wdata->nr_pages = nr_pages;
2591		wdata->offset = (__u64)offset;
2592		wdata->cfile = cifsFileInfo_get(open_file);
2593		wdata->pid = pid;
2594		wdata->bytes = cur_len;
2595		wdata->pagesz = PAGE_SIZE;
2596		wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2597		wdata->credits = credits;
2598
2599		if (!wdata->cfile->invalidHandle ||
2600		    !cifs_reopen_file(wdata->cfile, false))
2601			rc = server->ops->async_writev(wdata,
2602					cifs_uncached_writedata_release);
2603		if (rc) {
2604			add_credits_and_wake_if(server, wdata->credits, 0);
2605			kref_put(&wdata->refcount,
2606				 cifs_uncached_writedata_release);
2607			if (rc == -EAGAIN) {
2608				*from = saved_from;
 
2609				iov_iter_advance(from, offset - saved_offset);
2610				continue;
2611			}
2612			break;
2613		}
2614
2615		list_add_tail(&wdata->list, wdata_list);
2616		offset += cur_len;
2617		len -= cur_len;
2618	} while (len > 0);
2619
2620	return rc;
2621}
2622
2623ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2624{
2625	struct file *file = iocb->ki_filp;
2626	ssize_t total_written = 0;
2627	struct cifsFileInfo *open_file;
2628	struct cifs_tcon *tcon;
2629	struct cifs_sb_info *cifs_sb;
2630	struct cifs_writedata *wdata, *tmp;
2631	struct list_head wdata_list;
2632	struct iov_iter saved_from = *from;
2633	int rc;
2634
2635	/*
2636	 * BB - optimize the way when signing is disabled. We can drop this
2637	 * extra memory-to-memory copying and use iovec buffers for constructing
2638	 * write request.
2639	 */
2640
2641	rc = generic_write_checks(iocb, from);
2642	if (rc <= 0)
2643		return rc;
2644
2645	INIT_LIST_HEAD(&wdata_list);
2646	cifs_sb = CIFS_FILE_SB(file);
2647	open_file = file->private_data;
2648	tcon = tlink_tcon(open_file->tlink);
2649
2650	if (!tcon->ses->server->ops->async_writev)
2651		return -ENOSYS;
2652
 
 
2653	rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2654				  open_file, cifs_sb, &wdata_list);
2655
2656	/*
2657	 * If at least one write was successfully sent, then discard any rc
2658	 * value from the later writes. If the other write succeeds, then
2659	 * we'll end up returning whatever was written. If it fails, then
2660	 * we'll get a new rc value from that.
2661	 */
2662	if (!list_empty(&wdata_list))
2663		rc = 0;
2664
2665	/*
2666	 * Wait for and collect replies for any successful sends in order of
2667	 * increasing offset. Once an error is hit or we get a fatal signal
2668	 * while waiting, then return without waiting for any more replies.
2669	 */
2670restart_loop:
2671	list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2672		if (!rc) {
2673			/* FIXME: freezable too? */
2674			rc = wait_for_completion_killable(&wdata->done);
2675			if (rc)
2676				rc = -EINTR;
2677			else if (wdata->result)
2678				rc = wdata->result;
2679			else
2680				total_written += wdata->bytes;
2681
2682			/* resend call if it's a retryable error */
2683			if (rc == -EAGAIN) {
2684				struct list_head tmp_list;
2685				struct iov_iter tmp_from = saved_from;
2686
2687				INIT_LIST_HEAD(&tmp_list);
2688				list_del_init(&wdata->list);
2689
 
 
2690				iov_iter_advance(&tmp_from,
2691						 wdata->offset - iocb->ki_pos);
2692
2693				rc = cifs_write_from_iter(wdata->offset,
2694						wdata->bytes, &tmp_from,
2695						open_file, cifs_sb, &tmp_list);
2696
2697				list_splice(&tmp_list, &wdata_list);
2698
2699				kref_put(&wdata->refcount,
2700					 cifs_uncached_writedata_release);
2701				goto restart_loop;
2702			}
2703		}
2704		list_del_init(&wdata->list);
2705		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2706	}
2707
2708	if (unlikely(!total_written))
2709		return rc;
2710
2711	iocb->ki_pos += total_written;
2712	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2713	cifs_stats_bytes_written(tcon, total_written);
2714	return total_written;
2715}
2716
2717static ssize_t
2718cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2719{
2720	struct file *file = iocb->ki_filp;
2721	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2722	struct inode *inode = file->f_mapping->host;
2723	struct cifsInodeInfo *cinode = CIFS_I(inode);
2724	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2725	ssize_t rc;
2726
2727	/*
2728	 * We need to hold the sem to be sure nobody modifies lock list
2729	 * with a brlock that prevents writing.
2730	 */
2731	down_read(&cinode->lock_sem);
2732	inode_lock(inode);
2733
2734	rc = generic_write_checks(iocb, from);
2735	if (rc <= 0)
2736		goto out;
2737
2738	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2739				     server->vals->exclusive_lock_type, NULL,
2740				     CIFS_WRITE_OP))
2741		rc = __generic_file_write_iter(iocb, from);
2742	else
2743		rc = -EACCES;
2744out:
2745	inode_unlock(inode);
2746
2747	if (rc > 0)
2748		rc = generic_write_sync(iocb, rc);
 
 
 
2749	up_read(&cinode->lock_sem);
2750	return rc;
2751}
2752
2753ssize_t
2754cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2755{
2756	struct inode *inode = file_inode(iocb->ki_filp);
2757	struct cifsInodeInfo *cinode = CIFS_I(inode);
2758	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2759	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2760						iocb->ki_filp->private_data;
2761	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2762	ssize_t written;
2763
2764	written = cifs_get_writer(cinode);
2765	if (written)
2766		return written;
2767
2768	if (CIFS_CACHE_WRITE(cinode)) {
2769		if (cap_unix(tcon->ses) &&
2770		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2771		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2772			written = generic_file_write_iter(iocb, from);
2773			goto out;
2774		}
2775		written = cifs_writev(iocb, from);
2776		goto out;
2777	}
2778	/*
2779	 * For non-oplocked files in strict cache mode we need to write the data
2780	 * to the server exactly from the pos to pos+len-1 rather than flush all
2781	 * affected pages because it may cause a error with mandatory locks on
2782	 * these pages but not on the region from pos to ppos+len-1.
2783	 */
2784	written = cifs_user_writev(iocb, from);
2785	if (written > 0 && CIFS_CACHE_READ(cinode)) {
2786		/*
2787		 * Windows 7 server can delay breaking level2 oplock if a write
2788		 * request comes - break it on the client to prevent reading
2789		 * an old data.
2790		 */
2791		cifs_zap_mapping(inode);
2792		cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2793			 inode);
2794		cinode->oplock = 0;
2795	}
2796out:
2797	cifs_put_writer(cinode);
2798	return written;
2799}
2800
2801static struct cifs_readdata *
2802cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2803{
2804	struct cifs_readdata *rdata;
2805
2806	rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2807			GFP_KERNEL);
2808	if (rdata != NULL) {
2809		kref_init(&rdata->refcount);
2810		INIT_LIST_HEAD(&rdata->list);
2811		init_completion(&rdata->done);
2812		INIT_WORK(&rdata->work, complete);
2813	}
2814
2815	return rdata;
2816}
2817
2818void
2819cifs_readdata_release(struct kref *refcount)
2820{
2821	struct cifs_readdata *rdata = container_of(refcount,
2822					struct cifs_readdata, refcount);
2823
2824	if (rdata->cfile)
2825		cifsFileInfo_put(rdata->cfile);
2826
2827	kfree(rdata);
2828}
2829
2830static int
2831cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2832{
2833	int rc = 0;
2834	struct page *page;
2835	unsigned int i;
2836
2837	for (i = 0; i < nr_pages; i++) {
2838		page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2839		if (!page) {
2840			rc = -ENOMEM;
2841			break;
2842		}
2843		rdata->pages[i] = page;
2844	}
2845
2846	if (rc) {
2847		for (i = 0; i < nr_pages; i++) {
2848			put_page(rdata->pages[i]);
2849			rdata->pages[i] = NULL;
2850		}
2851	}
2852	return rc;
2853}
2854
2855static void
2856cifs_uncached_readdata_release(struct kref *refcount)
2857{
2858	struct cifs_readdata *rdata = container_of(refcount,
2859					struct cifs_readdata, refcount);
2860	unsigned int i;
2861
2862	for (i = 0; i < rdata->nr_pages; i++) {
2863		put_page(rdata->pages[i]);
2864		rdata->pages[i] = NULL;
2865	}
2866	cifs_readdata_release(refcount);
2867}
2868
2869/**
2870 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2871 * @rdata:	the readdata response with list of pages holding data
2872 * @iter:	destination for our data
2873 *
2874 * This function copies data from a list of pages in a readdata response into
2875 * an array of iovecs. It will first calculate where the data should go
2876 * based on the info in the readdata and then copy the data into that spot.
2877 */
2878static int
2879cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2880{
2881	size_t remaining = rdata->got_bytes;
2882	unsigned int i;
2883
2884	for (i = 0; i < rdata->nr_pages; i++) {
2885		struct page *page = rdata->pages[i];
2886		size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2887		size_t written;
2888
2889		if (unlikely(iter->type & ITER_PIPE)) {
2890			void *addr = kmap_atomic(page);
2891
2892			written = copy_to_iter(addr, copy, iter);
2893			kunmap_atomic(addr);
2894		} else
2895			written = copy_page_to_iter(page, 0, copy, iter);
2896		remaining -= written;
2897		if (written < copy && iov_iter_count(iter) > 0)
2898			break;
2899	}
2900	return remaining ? -EFAULT : 0;
2901}
2902
2903static void
2904cifs_uncached_readv_complete(struct work_struct *work)
2905{
2906	struct cifs_readdata *rdata = container_of(work,
2907						struct cifs_readdata, work);
2908
2909	complete(&rdata->done);
2910	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2911}
2912
2913static int
2914cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2915			struct cifs_readdata *rdata, unsigned int len)
2916{
2917	int result = 0;
2918	unsigned int i;
2919	unsigned int nr_pages = rdata->nr_pages;
 
2920
2921	rdata->got_bytes = 0;
2922	rdata->tailsz = PAGE_SIZE;
2923	for (i = 0; i < nr_pages; i++) {
2924		struct page *page = rdata->pages[i];
2925		size_t n;
2926
2927		if (len <= 0) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2928			/* no need to hold page hostage */
2929			rdata->pages[i] = NULL;
2930			rdata->nr_pages--;
2931			put_page(page);
2932			continue;
2933		}
2934		n = len;
2935		if (len >= PAGE_SIZE) {
2936			/* enough data to fill the page */
2937			n = PAGE_SIZE;
2938			len -= n;
2939		} else {
2940			zero_user(page, len, PAGE_SIZE - len);
2941			rdata->tailsz = len;
2942			len = 0;
2943		}
2944		result = cifs_read_page_from_socket(server, page, n);
2945		if (result < 0)
2946			break;
2947
2948		rdata->got_bytes += result;
2949	}
2950
2951	return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2952						rdata->got_bytes : result;
2953}
2954
2955static int
2956cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2957		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2958{
2959	struct cifs_readdata *rdata;
2960	unsigned int npages, rsize, credits;
2961	size_t cur_len;
2962	int rc;
2963	pid_t pid;
2964	struct TCP_Server_Info *server;
2965
2966	server = tlink_tcon(open_file->tlink)->ses->server;
2967
2968	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2969		pid = open_file->pid;
2970	else
2971		pid = current->tgid;
2972
2973	do {
2974		rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2975						   &rsize, &credits);
2976		if (rc)
2977			break;
2978
2979		cur_len = min_t(const size_t, len, rsize);
2980		npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2981
2982		/* allocate a readdata struct */
2983		rdata = cifs_readdata_alloc(npages,
2984					    cifs_uncached_readv_complete);
2985		if (!rdata) {
2986			add_credits_and_wake_if(server, credits, 0);
2987			rc = -ENOMEM;
2988			break;
2989		}
2990
2991		rc = cifs_read_allocate_pages(rdata, npages);
2992		if (rc)
2993			goto error;
2994
2995		rdata->cfile = cifsFileInfo_get(open_file);
2996		rdata->nr_pages = npages;
2997		rdata->offset = offset;
2998		rdata->bytes = cur_len;
2999		rdata->pid = pid;
3000		rdata->pagesz = PAGE_SIZE;
3001		rdata->read_into_pages = cifs_uncached_read_into_pages;
3002		rdata->credits = credits;
3003
3004		if (!rdata->cfile->invalidHandle ||
3005		    !cifs_reopen_file(rdata->cfile, true))
3006			rc = server->ops->async_readv(rdata);
3007error:
3008		if (rc) {
3009			add_credits_and_wake_if(server, rdata->credits, 0);
3010			kref_put(&rdata->refcount,
3011				 cifs_uncached_readdata_release);
3012			if (rc == -EAGAIN)
3013				continue;
3014			break;
3015		}
3016
3017		list_add_tail(&rdata->list, rdata_list);
3018		offset += cur_len;
3019		len -= cur_len;
3020	} while (len > 0);
3021
3022	return rc;
3023}
3024
3025ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3026{
3027	struct file *file = iocb->ki_filp;
3028	ssize_t rc;
3029	size_t len;
3030	ssize_t total_read = 0;
3031	loff_t offset = iocb->ki_pos;
3032	struct cifs_sb_info *cifs_sb;
3033	struct cifs_tcon *tcon;
3034	struct cifsFileInfo *open_file;
3035	struct cifs_readdata *rdata, *tmp;
3036	struct list_head rdata_list;
3037
3038	len = iov_iter_count(to);
3039	if (!len)
3040		return 0;
3041
3042	INIT_LIST_HEAD(&rdata_list);
3043	cifs_sb = CIFS_FILE_SB(file);
3044	open_file = file->private_data;
3045	tcon = tlink_tcon(open_file->tlink);
3046
3047	if (!tcon->ses->server->ops->async_readv)
3048		return -ENOSYS;
3049
3050	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3051		cifs_dbg(FYI, "attempting read on write only file instance\n");
3052
3053	rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3054
3055	/* if at least one read request send succeeded, then reset rc */
3056	if (!list_empty(&rdata_list))
3057		rc = 0;
3058
3059	len = iov_iter_count(to);
3060	/* the loop below should proceed in the order of increasing offsets */
3061again:
3062	list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3063		if (!rc) {
3064			/* FIXME: freezable sleep too? */
3065			rc = wait_for_completion_killable(&rdata->done);
3066			if (rc)
3067				rc = -EINTR;
3068			else if (rdata->result == -EAGAIN) {
3069				/* resend call if it's a retryable error */
3070				struct list_head tmp_list;
3071				unsigned int got_bytes = rdata->got_bytes;
3072
3073				list_del_init(&rdata->list);
3074				INIT_LIST_HEAD(&tmp_list);
3075
3076				/*
3077				 * Got a part of data and then reconnect has
3078				 * happened -- fill the buffer and continue
3079				 * reading.
3080				 */
3081				if (got_bytes && got_bytes < rdata->bytes) {
3082					rc = cifs_readdata_to_iov(rdata, to);
3083					if (rc) {
3084						kref_put(&rdata->refcount,
3085						cifs_uncached_readdata_release);
3086						continue;
3087					}
3088				}
3089
3090				rc = cifs_send_async_read(
3091						rdata->offset + got_bytes,
3092						rdata->bytes - got_bytes,
3093						rdata->cfile, cifs_sb,
3094						&tmp_list);
3095
3096				list_splice(&tmp_list, &rdata_list);
3097
3098				kref_put(&rdata->refcount,
3099					 cifs_uncached_readdata_release);
3100				goto again;
3101			} else if (rdata->result)
3102				rc = rdata->result;
3103			else
3104				rc = cifs_readdata_to_iov(rdata, to);
3105
3106			/* if there was a short read -- discard anything left */
3107			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3108				rc = -ENODATA;
3109		}
3110		list_del_init(&rdata->list);
3111		kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3112	}
3113
3114	total_read = len - iov_iter_count(to);
3115
3116	cifs_stats_bytes_read(tcon, total_read);
3117
3118	/* mask nodata case */
3119	if (rc == -ENODATA)
3120		rc = 0;
3121
3122	if (total_read) {
3123		iocb->ki_pos += total_read;
3124		return total_read;
3125	}
3126	return rc;
3127}
3128
3129ssize_t
3130cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3131{
3132	struct inode *inode = file_inode(iocb->ki_filp);
3133	struct cifsInodeInfo *cinode = CIFS_I(inode);
3134	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3135	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3136						iocb->ki_filp->private_data;
3137	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3138	int rc = -EACCES;
3139
3140	/*
3141	 * In strict cache mode we need to read from the server all the time
3142	 * if we don't have level II oplock because the server can delay mtime
3143	 * change - so we can't make a decision about inode invalidating.
3144	 * And we can also fail with pagereading if there are mandatory locks
3145	 * on pages affected by this read but not on the region from pos to
3146	 * pos+len-1.
3147	 */
3148	if (!CIFS_CACHE_READ(cinode))
3149		return cifs_user_readv(iocb, to);
3150
3151	if (cap_unix(tcon->ses) &&
3152	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3153	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3154		return generic_file_read_iter(iocb, to);
3155
3156	/*
3157	 * We need to hold the sem to be sure nobody modifies lock list
3158	 * with a brlock that prevents reading.
3159	 */
3160	down_read(&cinode->lock_sem);
3161	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3162				     tcon->ses->server->vals->shared_lock_type,
3163				     NULL, CIFS_READ_OP))
3164		rc = generic_file_read_iter(iocb, to);
3165	up_read(&cinode->lock_sem);
3166	return rc;
3167}
3168
3169static ssize_t
3170cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3171{
3172	int rc = -EACCES;
3173	unsigned int bytes_read = 0;
3174	unsigned int total_read;
3175	unsigned int current_read_size;
3176	unsigned int rsize;
3177	struct cifs_sb_info *cifs_sb;
3178	struct cifs_tcon *tcon;
3179	struct TCP_Server_Info *server;
3180	unsigned int xid;
3181	char *cur_offset;
3182	struct cifsFileInfo *open_file;
3183	struct cifs_io_parms io_parms;
3184	int buf_type = CIFS_NO_BUFFER;
3185	__u32 pid;
3186
3187	xid = get_xid();
3188	cifs_sb = CIFS_FILE_SB(file);
3189
3190	/* FIXME: set up handlers for larger reads and/or convert to async */
3191	rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3192
3193	if (file->private_data == NULL) {
3194		rc = -EBADF;
3195		free_xid(xid);
3196		return rc;
3197	}
3198	open_file = file->private_data;
3199	tcon = tlink_tcon(open_file->tlink);
3200	server = tcon->ses->server;
3201
3202	if (!server->ops->sync_read) {
3203		free_xid(xid);
3204		return -ENOSYS;
3205	}
3206
3207	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3208		pid = open_file->pid;
3209	else
3210		pid = current->tgid;
3211
3212	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3213		cifs_dbg(FYI, "attempting read on write only file instance\n");
3214
3215	for (total_read = 0, cur_offset = read_data; read_size > total_read;
3216	     total_read += bytes_read, cur_offset += bytes_read) {
3217		do {
3218			current_read_size = min_t(uint, read_size - total_read,
3219						  rsize);
3220			/*
3221			 * For windows me and 9x we do not want to request more
3222			 * than it negotiated since it will refuse the read
3223			 * then.
3224			 */
3225			if ((tcon->ses) && !(tcon->ses->capabilities &
3226				tcon->ses->server->vals->cap_large_files)) {
3227				current_read_size = min_t(uint,
3228					current_read_size, CIFSMaxBufSize);
3229			}
3230			if (open_file->invalidHandle) {
3231				rc = cifs_reopen_file(open_file, true);
3232				if (rc != 0)
3233					break;
3234			}
3235			io_parms.pid = pid;
3236			io_parms.tcon = tcon;
3237			io_parms.offset = *offset;
3238			io_parms.length = current_read_size;
3239			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3240						    &bytes_read, &cur_offset,
3241						    &buf_type);
3242		} while (rc == -EAGAIN);
3243
3244		if (rc || (bytes_read == 0)) {
3245			if (total_read) {
3246				break;
3247			} else {
3248				free_xid(xid);
3249				return rc;
3250			}
3251		} else {
3252			cifs_stats_bytes_read(tcon, total_read);
3253			*offset += bytes_read;
3254		}
3255	}
3256	free_xid(xid);
3257	return total_read;
3258}
3259
3260/*
3261 * If the page is mmap'ed into a process' page tables, then we need to make
3262 * sure that it doesn't change while being written back.
3263 */
3264static int
3265cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3266{
3267	struct page *page = vmf->page;
3268
3269	lock_page(page);
3270	return VM_FAULT_LOCKED;
3271}
3272
3273static const struct vm_operations_struct cifs_file_vm_ops = {
3274	.fault = filemap_fault,
3275	.map_pages = filemap_map_pages,
3276	.page_mkwrite = cifs_page_mkwrite,
3277};
3278
3279int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3280{
3281	int rc, xid;
3282	struct inode *inode = file_inode(file);
3283
3284	xid = get_xid();
3285
3286	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3287		rc = cifs_zap_mapping(inode);
3288		if (rc)
3289			return rc;
3290	}
3291
3292	rc = generic_file_mmap(file, vma);
3293	if (rc == 0)
3294		vma->vm_ops = &cifs_file_vm_ops;
3295	free_xid(xid);
3296	return rc;
3297}
3298
3299int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3300{
3301	int rc, xid;
3302
3303	xid = get_xid();
3304	rc = cifs_revalidate_file(file);
3305	if (rc) {
3306		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3307			 rc);
3308		free_xid(xid);
3309		return rc;
3310	}
3311	rc = generic_file_mmap(file, vma);
3312	if (rc == 0)
3313		vma->vm_ops = &cifs_file_vm_ops;
3314	free_xid(xid);
3315	return rc;
3316}
3317
3318static void
3319cifs_readv_complete(struct work_struct *work)
3320{
3321	unsigned int i, got_bytes;
3322	struct cifs_readdata *rdata = container_of(work,
3323						struct cifs_readdata, work);
3324
3325	got_bytes = rdata->got_bytes;
3326	for (i = 0; i < rdata->nr_pages; i++) {
3327		struct page *page = rdata->pages[i];
3328
3329		lru_cache_add_file(page);
3330
3331		if (rdata->result == 0 ||
3332		    (rdata->result == -EAGAIN && got_bytes)) {
3333			flush_dcache_page(page);
3334			SetPageUptodate(page);
3335		}
3336
3337		unlock_page(page);
3338
3339		if (rdata->result == 0 ||
3340		    (rdata->result == -EAGAIN && got_bytes))
3341			cifs_readpage_to_fscache(rdata->mapping->host, page);
3342
3343		got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3344
3345		put_page(page);
3346		rdata->pages[i] = NULL;
3347	}
3348	kref_put(&rdata->refcount, cifs_readdata_release);
3349}
3350
3351static int
3352cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3353			struct cifs_readdata *rdata, unsigned int len)
3354{
3355	int result = 0;
3356	unsigned int i;
3357	u64 eof;
3358	pgoff_t eof_index;
3359	unsigned int nr_pages = rdata->nr_pages;
 
3360
3361	/* determine the eof that the server (probably) has */
3362	eof = CIFS_I(rdata->mapping->host)->server_eof;
3363	eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3364	cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3365
3366	rdata->got_bytes = 0;
3367	rdata->tailsz = PAGE_SIZE;
3368	for (i = 0; i < nr_pages; i++) {
3369		struct page *page = rdata->pages[i];
3370		size_t n = PAGE_SIZE;
3371
3372		if (len >= PAGE_SIZE) {
 
 
 
 
 
3373			len -= PAGE_SIZE;
3374		} else if (len > 0) {
3375			/* enough for partial page, fill and zero the rest */
3376			zero_user(page, len, PAGE_SIZE - len);
3377			n = rdata->tailsz = len;
 
 
 
 
 
3378			len = 0;
3379		} else if (page->index > eof_index) {
3380			/*
3381			 * The VFS will not try to do readahead past the
3382			 * i_size, but it's possible that we have outstanding
3383			 * writes with gaps in the middle and the i_size hasn't
3384			 * caught up yet. Populate those with zeroed out pages
3385			 * to prevent the VFS from repeatedly attempting to
3386			 * fill them until the writes are flushed.
3387			 */
3388			zero_user(page, 0, PAGE_SIZE);
3389			lru_cache_add_file(page);
3390			flush_dcache_page(page);
3391			SetPageUptodate(page);
3392			unlock_page(page);
3393			put_page(page);
3394			rdata->pages[i] = NULL;
3395			rdata->nr_pages--;
3396			continue;
3397		} else {
3398			/* no need to hold page hostage */
3399			lru_cache_add_file(page);
3400			unlock_page(page);
3401			put_page(page);
3402			rdata->pages[i] = NULL;
3403			rdata->nr_pages--;
3404			continue;
3405		}
3406
3407		result = cifs_read_page_from_socket(server, page, n);
 
3408		if (result < 0)
3409			break;
3410
3411		rdata->got_bytes += result;
3412	}
3413
3414	return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3415						rdata->got_bytes : result;
3416}
3417
3418static int
3419readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3420		    unsigned int rsize, struct list_head *tmplist,
3421		    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3422{
3423	struct page *page, *tpage;
3424	unsigned int expected_index;
3425	int rc;
3426	gfp_t gfp = readahead_gfp_mask(mapping);
3427
3428	INIT_LIST_HEAD(tmplist);
3429
3430	page = list_entry(page_list->prev, struct page, lru);
3431
3432	/*
3433	 * Lock the page and put it in the cache. Since no one else
3434	 * should have access to this page, we're safe to simply set
3435	 * PG_locked without checking it first.
3436	 */
3437	__SetPageLocked(page);
3438	rc = add_to_page_cache_locked(page, mapping,
3439				      page->index, gfp);
3440
3441	/* give up if we can't stick it in the cache */
3442	if (rc) {
3443		__ClearPageLocked(page);
3444		return rc;
3445	}
3446
3447	/* move first page to the tmplist */
3448	*offset = (loff_t)page->index << PAGE_SHIFT;
3449	*bytes = PAGE_SIZE;
3450	*nr_pages = 1;
3451	list_move_tail(&page->lru, tmplist);
3452
3453	/* now try and add more pages onto the request */
3454	expected_index = page->index + 1;
3455	list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3456		/* discontinuity ? */
3457		if (page->index != expected_index)
3458			break;
3459
3460		/* would this page push the read over the rsize? */
3461		if (*bytes + PAGE_SIZE > rsize)
3462			break;
3463
3464		__SetPageLocked(page);
3465		if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3466			__ClearPageLocked(page);
3467			break;
3468		}
3469		list_move_tail(&page->lru, tmplist);
3470		(*bytes) += PAGE_SIZE;
3471		expected_index++;
3472		(*nr_pages)++;
3473	}
3474	return rc;
3475}
3476
3477static int cifs_readpages(struct file *file, struct address_space *mapping,
3478	struct list_head *page_list, unsigned num_pages)
3479{
3480	int rc;
3481	struct list_head tmplist;
3482	struct cifsFileInfo *open_file = file->private_data;
3483	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3484	struct TCP_Server_Info *server;
3485	pid_t pid;
3486
3487	/*
3488	 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3489	 * immediately if the cookie is negative
3490	 *
3491	 * After this point, every page in the list might have PG_fscache set,
3492	 * so we will need to clean that up off of every page we don't use.
3493	 */
3494	rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3495					 &num_pages);
3496	if (rc == 0)
3497		return rc;
3498
3499	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3500		pid = open_file->pid;
3501	else
3502		pid = current->tgid;
3503
3504	rc = 0;
3505	server = tlink_tcon(open_file->tlink)->ses->server;
3506
3507	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3508		 __func__, file, mapping, num_pages);
3509
3510	/*
3511	 * Start with the page at end of list and move it to private
3512	 * list. Do the same with any following pages until we hit
3513	 * the rsize limit, hit an index discontinuity, or run out of
3514	 * pages. Issue the async read and then start the loop again
3515	 * until the list is empty.
3516	 *
3517	 * Note that list order is important. The page_list is in
3518	 * the order of declining indexes. When we put the pages in
3519	 * the rdata->pages, then we want them in increasing order.
3520	 */
3521	while (!list_empty(page_list)) {
3522		unsigned int i, nr_pages, bytes, rsize;
3523		loff_t offset;
3524		struct page *page, *tpage;
3525		struct cifs_readdata *rdata;
3526		unsigned credits;
3527
3528		rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3529						   &rsize, &credits);
3530		if (rc)
3531			break;
3532
3533		/*
3534		 * Give up immediately if rsize is too small to read an entire
3535		 * page. The VFS will fall back to readpage. We should never
3536		 * reach this point however since we set ra_pages to 0 when the
3537		 * rsize is smaller than a cache page.
3538		 */
3539		if (unlikely(rsize < PAGE_SIZE)) {
3540			add_credits_and_wake_if(server, credits, 0);
3541			return 0;
3542		}
3543
3544		rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3545					 &nr_pages, &offset, &bytes);
3546		if (rc) {
3547			add_credits_and_wake_if(server, credits, 0);
3548			break;
3549		}
3550
3551		rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3552		if (!rdata) {
3553			/* best to give up if we're out of mem */
3554			list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3555				list_del(&page->lru);
3556				lru_cache_add_file(page);
3557				unlock_page(page);
3558				put_page(page);
3559			}
3560			rc = -ENOMEM;
3561			add_credits_and_wake_if(server, credits, 0);
3562			break;
3563		}
3564
3565		rdata->cfile = cifsFileInfo_get(open_file);
3566		rdata->mapping = mapping;
3567		rdata->offset = offset;
3568		rdata->bytes = bytes;
3569		rdata->pid = pid;
3570		rdata->pagesz = PAGE_SIZE;
3571		rdata->read_into_pages = cifs_readpages_read_into_pages;
3572		rdata->credits = credits;
3573
3574		list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3575			list_del(&page->lru);
3576			rdata->pages[rdata->nr_pages++] = page;
3577		}
3578
3579		if (!rdata->cfile->invalidHandle ||
3580		    !cifs_reopen_file(rdata->cfile, true))
3581			rc = server->ops->async_readv(rdata);
3582		if (rc) {
3583			add_credits_and_wake_if(server, rdata->credits, 0);
3584			for (i = 0; i < rdata->nr_pages; i++) {
3585				page = rdata->pages[i];
3586				lru_cache_add_file(page);
3587				unlock_page(page);
3588				put_page(page);
3589			}
3590			/* Fallback to the readpage in error/reconnect cases */
3591			kref_put(&rdata->refcount, cifs_readdata_release);
3592			break;
3593		}
3594
3595		kref_put(&rdata->refcount, cifs_readdata_release);
3596	}
3597
3598	/* Any pages that have been shown to fscache but didn't get added to
3599	 * the pagecache must be uncached before they get returned to the
3600	 * allocator.
3601	 */
3602	cifs_fscache_readpages_cancel(mapping->host, page_list);
3603	return rc;
3604}
3605
3606/*
3607 * cifs_readpage_worker must be called with the page pinned
3608 */
3609static int cifs_readpage_worker(struct file *file, struct page *page,
3610	loff_t *poffset)
3611{
3612	char *read_data;
3613	int rc;
3614
3615	/* Is the page cached? */
3616	rc = cifs_readpage_from_fscache(file_inode(file), page);
3617	if (rc == 0)
3618		goto read_complete;
3619
3620	read_data = kmap(page);
3621	/* for reads over a certain size could initiate async read ahead */
3622
3623	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3624
3625	if (rc < 0)
3626		goto io_error;
3627	else
3628		cifs_dbg(FYI, "Bytes read %d\n", rc);
3629
3630	file_inode(file)->i_atime =
3631		current_time(file_inode(file));
3632
3633	if (PAGE_SIZE > rc)
3634		memset(read_data + rc, 0, PAGE_SIZE - rc);
3635
3636	flush_dcache_page(page);
3637	SetPageUptodate(page);
3638
3639	/* send this page to the cache */
3640	cifs_readpage_to_fscache(file_inode(file), page);
3641
3642	rc = 0;
3643
3644io_error:
3645	kunmap(page);
3646	unlock_page(page);
3647
3648read_complete:
3649	return rc;
3650}
3651
3652static int cifs_readpage(struct file *file, struct page *page)
3653{
3654	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3655	int rc = -EACCES;
3656	unsigned int xid;
3657
3658	xid = get_xid();
3659
3660	if (file->private_data == NULL) {
3661		rc = -EBADF;
3662		free_xid(xid);
3663		return rc;
3664	}
3665
3666	cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3667		 page, (int)offset, (int)offset);
3668
3669	rc = cifs_readpage_worker(file, page, &offset);
3670
3671	free_xid(xid);
3672	return rc;
3673}
3674
3675static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3676{
3677	struct cifsFileInfo *open_file;
3678	struct cifs_tcon *tcon =
3679		cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3680
3681	spin_lock(&tcon->open_file_lock);
3682	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3683		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3684			spin_unlock(&tcon->open_file_lock);
3685			return 1;
3686		}
3687	}
3688	spin_unlock(&tcon->open_file_lock);
3689	return 0;
3690}
3691
3692/* We do not want to update the file size from server for inodes
3693   open for write - to avoid races with writepage extending
3694   the file - in the future we could consider allowing
3695   refreshing the inode only on increases in the file size
3696   but this is tricky to do without racing with writebehind
3697   page caching in the current Linux kernel design */
3698bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3699{
3700	if (!cifsInode)
3701		return true;
3702
3703	if (is_inode_writable(cifsInode)) {
3704		/* This inode is open for write at least once */
3705		struct cifs_sb_info *cifs_sb;
3706
3707		cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3708		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3709			/* since no page cache to corrupt on directio
3710			we can change size safely */
3711			return true;
3712		}
3713
3714		if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3715			return true;
3716
3717		return false;
3718	} else
3719		return true;
3720}
3721
3722static int cifs_write_begin(struct file *file, struct address_space *mapping,
3723			loff_t pos, unsigned len, unsigned flags,
3724			struct page **pagep, void **fsdata)
3725{
3726	int oncethru = 0;
3727	pgoff_t index = pos >> PAGE_SHIFT;
3728	loff_t offset = pos & (PAGE_SIZE - 1);
3729	loff_t page_start = pos & PAGE_MASK;
3730	loff_t i_size;
3731	struct page *page;
3732	int rc = 0;
3733
3734	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3735
3736start:
3737	page = grab_cache_page_write_begin(mapping, index, flags);
3738	if (!page) {
3739		rc = -ENOMEM;
3740		goto out;
3741	}
3742
3743	if (PageUptodate(page))
3744		goto out;
3745
3746	/*
3747	 * If we write a full page it will be up to date, no need to read from
3748	 * the server. If the write is short, we'll end up doing a sync write
3749	 * instead.
3750	 */
3751	if (len == PAGE_SIZE)
3752		goto out;
3753
3754	/*
3755	 * optimize away the read when we have an oplock, and we're not
3756	 * expecting to use any of the data we'd be reading in. That
3757	 * is, when the page lies beyond the EOF, or straddles the EOF
3758	 * and the write will cover all of the existing data.
3759	 */
3760	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3761		i_size = i_size_read(mapping->host);
3762		if (page_start >= i_size ||
3763		    (offset == 0 && (pos + len) >= i_size)) {
3764			zero_user_segments(page, 0, offset,
3765					   offset + len,
3766					   PAGE_SIZE);
3767			/*
3768			 * PageChecked means that the parts of the page
3769			 * to which we're not writing are considered up
3770			 * to date. Once the data is copied to the
3771			 * page, it can be set uptodate.
3772			 */
3773			SetPageChecked(page);
3774			goto out;
3775		}
3776	}
3777
3778	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3779		/*
3780		 * might as well read a page, it is fast enough. If we get
3781		 * an error, we don't need to return it. cifs_write_end will
3782		 * do a sync write instead since PG_uptodate isn't set.
3783		 */
3784		cifs_readpage_worker(file, page, &page_start);
3785		put_page(page);
3786		oncethru = 1;
3787		goto start;
3788	} else {
3789		/* we could try using another file handle if there is one -
3790		   but how would we lock it to prevent close of that handle
3791		   racing with this read? In any case
3792		   this will be written out by write_end so is fine */
3793	}
3794out:
3795	*pagep = page;
3796	return rc;
3797}
3798
3799static int cifs_release_page(struct page *page, gfp_t gfp)
3800{
3801	if (PagePrivate(page))
3802		return 0;
3803
3804	return cifs_fscache_release_page(page, gfp);
3805}
3806
3807static void cifs_invalidate_page(struct page *page, unsigned int offset,
3808				 unsigned int length)
3809{
3810	struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3811
3812	if (offset == 0 && length == PAGE_SIZE)
3813		cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3814}
3815
3816static int cifs_launder_page(struct page *page)
3817{
3818	int rc = 0;
3819	loff_t range_start = page_offset(page);
3820	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
3821	struct writeback_control wbc = {
3822		.sync_mode = WB_SYNC_ALL,
3823		.nr_to_write = 0,
3824		.range_start = range_start,
3825		.range_end = range_end,
3826	};
3827
3828	cifs_dbg(FYI, "Launder page: %p\n", page);
3829
3830	if (clear_page_dirty_for_io(page))
3831		rc = cifs_writepage_locked(page, &wbc);
3832
3833	cifs_fscache_invalidate_page(page, page->mapping->host);
3834	return rc;
3835}
3836
3837void cifs_oplock_break(struct work_struct *work)
3838{
3839	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3840						  oplock_break);
3841	struct inode *inode = d_inode(cfile->dentry);
3842	struct cifsInodeInfo *cinode = CIFS_I(inode);
3843	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3844	struct TCP_Server_Info *server = tcon->ses->server;
3845	int rc = 0;
3846
3847	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3848			TASK_UNINTERRUPTIBLE);
3849
3850	server->ops->downgrade_oplock(server, cinode,
3851		test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3852
3853	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3854						cifs_has_mand_locks(cinode)) {
3855		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3856			 inode);
3857		cinode->oplock = 0;
3858	}
3859
3860	if (inode && S_ISREG(inode->i_mode)) {
3861		if (CIFS_CACHE_READ(cinode))
3862			break_lease(inode, O_RDONLY);
3863		else
3864			break_lease(inode, O_WRONLY);
3865		rc = filemap_fdatawrite(inode->i_mapping);
3866		if (!CIFS_CACHE_READ(cinode)) {
3867			rc = filemap_fdatawait(inode->i_mapping);
3868			mapping_set_error(inode->i_mapping, rc);
3869			cifs_zap_mapping(inode);
3870		}
3871		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3872	}
3873
3874	rc = cifs_push_locks(cfile);
3875	if (rc)
3876		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3877
3878	/*
3879	 * releasing stale oplock after recent reconnect of smb session using
3880	 * a now incorrect file handle is not a data integrity issue but do
3881	 * not bother sending an oplock release if session to server still is
3882	 * disconnected since oplock already released by the server
3883	 */
3884	if (!cfile->oplock_break_cancelled) {
3885		rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3886							     cinode);
3887		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3888	}
3889	cifs_done_oplock_break(cinode);
3890}
3891
3892/*
3893 * The presence of cifs_direct_io() in the address space ops vector
3894 * allowes open() O_DIRECT flags which would have failed otherwise.
3895 *
3896 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3897 * so this method should never be called.
3898 *
3899 * Direct IO is not yet supported in the cached mode. 
3900 */
3901static ssize_t
3902cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
3903{
3904        /*
3905         * FIXME
3906         * Eventually need to support direct IO for non forcedirectio mounts
3907         */
3908        return -EINVAL;
3909}
3910
3911
3912const struct address_space_operations cifs_addr_ops = {
3913	.readpage = cifs_readpage,
3914	.readpages = cifs_readpages,
3915	.writepage = cifs_writepage,
3916	.writepages = cifs_writepages,
3917	.write_begin = cifs_write_begin,
3918	.write_end = cifs_write_end,
3919	.set_page_dirty = __set_page_dirty_nobuffers,
3920	.releasepage = cifs_release_page,
3921	.direct_IO = cifs_direct_io,
3922	.invalidatepage = cifs_invalidate_page,
3923	.launder_page = cifs_launder_page,
3924};
3925
3926/*
3927 * cifs_readpages requires the server to support a buffer large enough to
3928 * contain the header plus one complete page of data.  Otherwise, we need
3929 * to leave cifs_readpages out of the address space operations.
3930 */
3931const struct address_space_operations cifs_addr_ops_smallbuf = {
3932	.readpage = cifs_readpage,
3933	.writepage = cifs_writepage,
3934	.writepages = cifs_writepages,
3935	.write_begin = cifs_write_begin,
3936	.write_end = cifs_write_end,
3937	.set_page_dirty = __set_page_dirty_nobuffers,
3938	.releasepage = cifs_release_page,
3939	.invalidatepage = cifs_invalidate_page,
3940	.launder_page = cifs_launder_page,
3941};