Loading...
Note: File does not exist in v3.15.
1// SPDX-License-Identifier: LGPL-2.1
2/*
3 *
4 * vfs operations that deal with files
5 *
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
9 *
10 */
11#include <linux/fs.h>
12#include <linux/filelock.h>
13#include <linux/backing-dev.h>
14#include <linux/stat.h>
15#include <linux/fcntl.h>
16#include <linux/pagemap.h>
17#include <linux/pagevec.h>
18#include <linux/writeback.h>
19#include <linux/task_io_accounting_ops.h>
20#include <linux/delay.h>
21#include <linux/mount.h>
22#include <linux/slab.h>
23#include <linux/swap.h>
24#include <linux/mm.h>
25#include <asm/div64.h>
26#include "cifsfs.h"
27#include "cifspdu.h"
28#include "cifsglob.h"
29#include "cifsproto.h"
30#include "smb2proto.h"
31#include "cifs_unicode.h"
32#include "cifs_debug.h"
33#include "cifs_fs_sb.h"
34#include "fscache.h"
35#include "smbdirect.h"
36#include "fs_context.h"
37#include "cifs_ioctl.h"
38#include "cached_dir.h"
39
40/*
41 * Remove the dirty flags from a span of pages.
42 */
43static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44{
45 struct address_space *mapping = inode->i_mapping;
46 struct folio *folio;
47 pgoff_t end;
48
49 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50
51 rcu_read_lock();
52
53 end = (start + len - 1) / PAGE_SIZE;
54 xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 if (xas_retry(&xas, folio))
56 continue;
57 xas_pause(&xas);
58 rcu_read_unlock();
59 folio_lock(folio);
60 folio_clear_dirty_for_io(folio);
61 folio_unlock(folio);
62 rcu_read_lock();
63 }
64
65 rcu_read_unlock();
66}
67
68/*
69 * Completion of write to server.
70 */
71void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72{
73 struct address_space *mapping = inode->i_mapping;
74 struct folio *folio;
75 pgoff_t end;
76
77 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78
79 if (!len)
80 return;
81
82 rcu_read_lock();
83
84 end = (start + len - 1) / PAGE_SIZE;
85 xas_for_each(&xas, folio, end) {
86 if (xas_retry(&xas, folio))
87 continue;
88 if (!folio_test_writeback(folio)) {
89 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 len, start, folio->index, end);
91 continue;
92 }
93
94 folio_detach_private(folio);
95 folio_end_writeback(folio);
96 }
97
98 rcu_read_unlock();
99}
100
101/*
102 * Failure of write to server.
103 */
104void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105{
106 struct address_space *mapping = inode->i_mapping;
107 struct folio *folio;
108 pgoff_t end;
109
110 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111
112 if (!len)
113 return;
114
115 rcu_read_lock();
116
117 end = (start + len - 1) / PAGE_SIZE;
118 xas_for_each(&xas, folio, end) {
119 if (xas_retry(&xas, folio))
120 continue;
121 if (!folio_test_writeback(folio)) {
122 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 len, start, folio->index, end);
124 continue;
125 }
126
127 folio_set_error(folio);
128 folio_end_writeback(folio);
129 }
130
131 rcu_read_unlock();
132}
133
134/*
135 * Redirty pages after a temporary failure.
136 */
137void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138{
139 struct address_space *mapping = inode->i_mapping;
140 struct folio *folio;
141 pgoff_t end;
142
143 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144
145 if (!len)
146 return;
147
148 rcu_read_lock();
149
150 end = (start + len - 1) / PAGE_SIZE;
151 xas_for_each(&xas, folio, end) {
152 if (!folio_test_writeback(folio)) {
153 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 len, start, folio->index, end);
155 continue;
156 }
157
158 filemap_dirty_folio(folio->mapping, folio);
159 folio_end_writeback(folio);
160 }
161
162 rcu_read_unlock();
163}
164
165/*
166 * Mark as invalid, all open files on tree connections since they
167 * were closed when session to server was lost.
168 */
169void
170cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171{
172 struct cifsFileInfo *open_file = NULL;
173 struct list_head *tmp;
174 struct list_head *tmp1;
175
176 /* only send once per connect */
177 spin_lock(&tcon->tc_lock);
178 if (tcon->need_reconnect)
179 tcon->status = TID_NEED_RECON;
180
181 if (tcon->status != TID_NEED_RECON) {
182 spin_unlock(&tcon->tc_lock);
183 return;
184 }
185 tcon->status = TID_IN_FILES_INVALIDATE;
186 spin_unlock(&tcon->tc_lock);
187
188 /* list all files open on tree connection and mark them invalid */
189 spin_lock(&tcon->open_file_lock);
190 list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192 open_file->invalidHandle = true;
193 open_file->oplock_break_cancelled = true;
194 }
195 spin_unlock(&tcon->open_file_lock);
196
197 invalidate_all_cached_dirs(tcon);
198 spin_lock(&tcon->tc_lock);
199 if (tcon->status == TID_IN_FILES_INVALIDATE)
200 tcon->status = TID_NEED_TCON;
201 spin_unlock(&tcon->tc_lock);
202
203 /*
204 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205 * to this tcon.
206 */
207}
208
209static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
210{
211 if ((flags & O_ACCMODE) == O_RDONLY)
212 return GENERIC_READ;
213 else if ((flags & O_ACCMODE) == O_WRONLY)
214 return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
215 else if ((flags & O_ACCMODE) == O_RDWR) {
216 /* GENERIC_ALL is too much permission to request
217 can cause unnecessary access denied on create */
218 /* return GENERIC_ALL; */
219 return (GENERIC_READ | GENERIC_WRITE);
220 }
221
222 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224 FILE_READ_DATA);
225}
226
227#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228static u32 cifs_posix_convert_flags(unsigned int flags)
229{
230 u32 posix_flags = 0;
231
232 if ((flags & O_ACCMODE) == O_RDONLY)
233 posix_flags = SMB_O_RDONLY;
234 else if ((flags & O_ACCMODE) == O_WRONLY)
235 posix_flags = SMB_O_WRONLY;
236 else if ((flags & O_ACCMODE) == O_RDWR)
237 posix_flags = SMB_O_RDWR;
238
239 if (flags & O_CREAT) {
240 posix_flags |= SMB_O_CREAT;
241 if (flags & O_EXCL)
242 posix_flags |= SMB_O_EXCL;
243 } else if (flags & O_EXCL)
244 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245 current->comm, current->tgid);
246
247 if (flags & O_TRUNC)
248 posix_flags |= SMB_O_TRUNC;
249 /* be safe and imply O_SYNC for O_DSYNC */
250 if (flags & O_DSYNC)
251 posix_flags |= SMB_O_SYNC;
252 if (flags & O_DIRECTORY)
253 posix_flags |= SMB_O_DIRECTORY;
254 if (flags & O_NOFOLLOW)
255 posix_flags |= SMB_O_NOFOLLOW;
256 if (flags & O_DIRECT)
257 posix_flags |= SMB_O_DIRECT;
258
259 return posix_flags;
260}
261#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262
263static inline int cifs_get_disposition(unsigned int flags)
264{
265 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266 return FILE_CREATE;
267 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268 return FILE_OVERWRITE_IF;
269 else if ((flags & O_CREAT) == O_CREAT)
270 return FILE_OPEN_IF;
271 else if ((flags & O_TRUNC) == O_TRUNC)
272 return FILE_OVERWRITE;
273 else
274 return FILE_OPEN;
275}
276
277#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278int cifs_posix_open(const char *full_path, struct inode **pinode,
279 struct super_block *sb, int mode, unsigned int f_flags,
280 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
281{
282 int rc;
283 FILE_UNIX_BASIC_INFO *presp_data;
284 __u32 posix_flags = 0;
285 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286 struct cifs_fattr fattr;
287 struct tcon_link *tlink;
288 struct cifs_tcon *tcon;
289
290 cifs_dbg(FYI, "posix open %s\n", full_path);
291
292 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293 if (presp_data == NULL)
294 return -ENOMEM;
295
296 tlink = cifs_sb_tlink(cifs_sb);
297 if (IS_ERR(tlink)) {
298 rc = PTR_ERR(tlink);
299 goto posix_open_ret;
300 }
301
302 tcon = tlink_tcon(tlink);
303 mode &= ~current_umask();
304
305 posix_flags = cifs_posix_convert_flags(f_flags);
306 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307 poplock, full_path, cifs_sb->local_nls,
308 cifs_remap(cifs_sb));
309 cifs_put_tlink(tlink);
310
311 if (rc)
312 goto posix_open_ret;
313
314 if (presp_data->Type == cpu_to_le32(-1))
315 goto posix_open_ret; /* open ok, caller does qpathinfo */
316
317 if (!pinode)
318 goto posix_open_ret; /* caller does not need info */
319
320 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321
322 /* get new inode and set it up */
323 if (*pinode == NULL) {
324 cifs_fill_uniqueid(sb, &fattr);
325 *pinode = cifs_iget(sb, &fattr);
326 if (!*pinode) {
327 rc = -ENOMEM;
328 goto posix_open_ret;
329 }
330 } else {
331 cifs_revalidate_mapping(*pinode);
332 rc = cifs_fattr_to_inode(*pinode, &fattr, false);
333 }
334
335posix_open_ret:
336 kfree(presp_data);
337 return rc;
338}
339#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340
341static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343 struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344{
345 int rc;
346 int desired_access;
347 int disposition;
348 int create_options = CREATE_NOT_DIR;
349 struct TCP_Server_Info *server = tcon->ses->server;
350 struct cifs_open_parms oparms;
351 int rdwr_for_fscache = 0;
352
353 if (!server->ops->open)
354 return -ENOSYS;
355
356 /* If we're caching, we need to be able to fill in around partial writes. */
357 if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
358 rdwr_for_fscache = 1;
359
360 desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
361
362/*********************************************************************
363 * open flag mapping table:
364 *
365 * POSIX Flag CIFS Disposition
366 * ---------- ----------------
367 * O_CREAT FILE_OPEN_IF
368 * O_CREAT | O_EXCL FILE_CREATE
369 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
370 * O_TRUNC FILE_OVERWRITE
371 * none of the above FILE_OPEN
372 *
373 * Note that there is not a direct match between disposition
374 * FILE_SUPERSEDE (ie create whether or not file exists although
375 * O_CREAT | O_TRUNC is similar but truncates the existing
376 * file rather than creating a new file as FILE_SUPERSEDE does
377 * (which uses the attributes / metadata passed in on open call)
378 *?
379 *? O_SYNC is a reasonable match to CIFS writethrough flag
380 *? and the read write flags match reasonably. O_LARGEFILE
381 *? is irrelevant because largefile support is always used
382 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
383 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
384 *********************************************************************/
385
386 disposition = cifs_get_disposition(f_flags);
387
388 /* BB pass O_SYNC flag through on file attributes .. BB */
389
390 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
391 if (f_flags & O_SYNC)
392 create_options |= CREATE_WRITE_THROUGH;
393
394 if (f_flags & O_DIRECT)
395 create_options |= CREATE_NO_BUFFER;
396
397retry_open:
398 oparms = (struct cifs_open_parms) {
399 .tcon = tcon,
400 .cifs_sb = cifs_sb,
401 .desired_access = desired_access,
402 .create_options = cifs_create_options(cifs_sb, create_options),
403 .disposition = disposition,
404 .path = full_path,
405 .fid = fid,
406 };
407
408 rc = server->ops->open(xid, &oparms, oplock, buf);
409 if (rc) {
410 if (rc == -EACCES && rdwr_for_fscache == 1) {
411 desired_access = cifs_convert_flags(f_flags, 0);
412 rdwr_for_fscache = 2;
413 goto retry_open;
414 }
415 return rc;
416 }
417 if (rdwr_for_fscache == 2)
418 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
419
420 /* TODO: Add support for calling posix query info but with passing in fid */
421 if (tcon->unix_ext)
422 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
423 xid);
424 else
425 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
426 xid, fid);
427
428 if (rc) {
429 server->ops->close(xid, tcon, fid);
430 if (rc == -ESTALE)
431 rc = -EOPENSTALE;
432 }
433
434 return rc;
435}
436
437static bool
438cifs_has_mand_locks(struct cifsInodeInfo *cinode)
439{
440 struct cifs_fid_locks *cur;
441 bool has_locks = false;
442
443 down_read(&cinode->lock_sem);
444 list_for_each_entry(cur, &cinode->llist, llist) {
445 if (!list_empty(&cur->locks)) {
446 has_locks = true;
447 break;
448 }
449 }
450 up_read(&cinode->lock_sem);
451 return has_locks;
452}
453
454void
455cifs_down_write(struct rw_semaphore *sem)
456{
457 while (!down_write_trylock(sem))
458 msleep(10);
459}
460
461static void cifsFileInfo_put_work(struct work_struct *work);
462void serverclose_work(struct work_struct *work);
463
464struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
465 struct tcon_link *tlink, __u32 oplock,
466 const char *symlink_target)
467{
468 struct dentry *dentry = file_dentry(file);
469 struct inode *inode = d_inode(dentry);
470 struct cifsInodeInfo *cinode = CIFS_I(inode);
471 struct cifsFileInfo *cfile;
472 struct cifs_fid_locks *fdlocks;
473 struct cifs_tcon *tcon = tlink_tcon(tlink);
474 struct TCP_Server_Info *server = tcon->ses->server;
475
476 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
477 if (cfile == NULL)
478 return cfile;
479
480 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
481 if (!fdlocks) {
482 kfree(cfile);
483 return NULL;
484 }
485
486 if (symlink_target) {
487 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
488 if (!cfile->symlink_target) {
489 kfree(fdlocks);
490 kfree(cfile);
491 return NULL;
492 }
493 }
494
495 INIT_LIST_HEAD(&fdlocks->locks);
496 fdlocks->cfile = cfile;
497 cfile->llist = fdlocks;
498
499 cfile->count = 1;
500 cfile->pid = current->tgid;
501 cfile->uid = current_fsuid();
502 cfile->dentry = dget(dentry);
503 cfile->f_flags = file->f_flags;
504 cfile->invalidHandle = false;
505 cfile->deferred_close_scheduled = false;
506 cfile->tlink = cifs_get_tlink(tlink);
507 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
508 INIT_WORK(&cfile->put, cifsFileInfo_put_work);
509 INIT_WORK(&cfile->serverclose, serverclose_work);
510 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
511 mutex_init(&cfile->fh_mutex);
512 spin_lock_init(&cfile->file_info_lock);
513
514 cifs_sb_active(inode->i_sb);
515
516 /*
517 * If the server returned a read oplock and we have mandatory brlocks,
518 * set oplock level to None.
519 */
520 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
521 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
522 oplock = 0;
523 }
524
525 cifs_down_write(&cinode->lock_sem);
526 list_add(&fdlocks->llist, &cinode->llist);
527 up_write(&cinode->lock_sem);
528
529 spin_lock(&tcon->open_file_lock);
530 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
531 oplock = fid->pending_open->oplock;
532 list_del(&fid->pending_open->olist);
533
534 fid->purge_cache = false;
535 server->ops->set_fid(cfile, fid, oplock);
536
537 list_add(&cfile->tlist, &tcon->openFileList);
538 atomic_inc(&tcon->num_local_opens);
539
540 /* if readable file instance put first in list*/
541 spin_lock(&cinode->open_file_lock);
542 if (file->f_mode & FMODE_READ)
543 list_add(&cfile->flist, &cinode->openFileList);
544 else
545 list_add_tail(&cfile->flist, &cinode->openFileList);
546 spin_unlock(&cinode->open_file_lock);
547 spin_unlock(&tcon->open_file_lock);
548
549 if (fid->purge_cache)
550 cifs_zap_mapping(inode);
551
552 file->private_data = cfile;
553 return cfile;
554}
555
556struct cifsFileInfo *
557cifsFileInfo_get(struct cifsFileInfo *cifs_file)
558{
559 spin_lock(&cifs_file->file_info_lock);
560 cifsFileInfo_get_locked(cifs_file);
561 spin_unlock(&cifs_file->file_info_lock);
562 return cifs_file;
563}
564
565static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
566{
567 struct inode *inode = d_inode(cifs_file->dentry);
568 struct cifsInodeInfo *cifsi = CIFS_I(inode);
569 struct cifsLockInfo *li, *tmp;
570 struct super_block *sb = inode->i_sb;
571
572 /*
573 * Delete any outstanding lock records. We'll lose them when the file
574 * is closed anyway.
575 */
576 cifs_down_write(&cifsi->lock_sem);
577 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
578 list_del(&li->llist);
579 cifs_del_lock_waiters(li);
580 kfree(li);
581 }
582 list_del(&cifs_file->llist->llist);
583 kfree(cifs_file->llist);
584 up_write(&cifsi->lock_sem);
585
586 cifs_put_tlink(cifs_file->tlink);
587 dput(cifs_file->dentry);
588 cifs_sb_deactive(sb);
589 kfree(cifs_file->symlink_target);
590 kfree(cifs_file);
591}
592
593static void cifsFileInfo_put_work(struct work_struct *work)
594{
595 struct cifsFileInfo *cifs_file = container_of(work,
596 struct cifsFileInfo, put);
597
598 cifsFileInfo_put_final(cifs_file);
599}
600
601void serverclose_work(struct work_struct *work)
602{
603 struct cifsFileInfo *cifs_file = container_of(work,
604 struct cifsFileInfo, serverclose);
605
606 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
607
608 struct TCP_Server_Info *server = tcon->ses->server;
609 int rc = 0;
610 int retries = 0;
611 int MAX_RETRIES = 4;
612
613 do {
614 if (server->ops->close_getattr)
615 rc = server->ops->close_getattr(0, tcon, cifs_file);
616 else if (server->ops->close)
617 rc = server->ops->close(0, tcon, &cifs_file->fid);
618
619 if (rc == -EBUSY || rc == -EAGAIN) {
620 retries++;
621 msleep(250);
622 }
623 } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
624 );
625
626 if (retries == MAX_RETRIES)
627 pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
628
629 if (cifs_file->offload)
630 queue_work(fileinfo_put_wq, &cifs_file->put);
631 else
632 cifsFileInfo_put_final(cifs_file);
633}
634
635/**
636 * cifsFileInfo_put - release a reference of file priv data
637 *
638 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
639 *
640 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
641 */
642void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
643{
644 _cifsFileInfo_put(cifs_file, true, true);
645}
646
647/**
648 * _cifsFileInfo_put - release a reference of file priv data
649 *
650 * This may involve closing the filehandle @cifs_file out on the
651 * server. Must be called without holding tcon->open_file_lock,
652 * cinode->open_file_lock and cifs_file->file_info_lock.
653 *
654 * If @wait_for_oplock_handler is true and we are releasing the last
655 * reference, wait for any running oplock break handler of the file
656 * and cancel any pending one.
657 *
658 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
659 * @wait_oplock_handler: must be false if called from oplock_break_handler
660 * @offload: not offloaded on close and oplock breaks
661 *
662 */
663void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
664 bool wait_oplock_handler, bool offload)
665{
666 struct inode *inode = d_inode(cifs_file->dentry);
667 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
668 struct TCP_Server_Info *server = tcon->ses->server;
669 struct cifsInodeInfo *cifsi = CIFS_I(inode);
670 struct super_block *sb = inode->i_sb;
671 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
672 struct cifs_fid fid = {};
673 struct cifs_pending_open open;
674 bool oplock_break_cancelled;
675 bool serverclose_offloaded = false;
676
677 spin_lock(&tcon->open_file_lock);
678 spin_lock(&cifsi->open_file_lock);
679 spin_lock(&cifs_file->file_info_lock);
680
681 cifs_file->offload = offload;
682 if (--cifs_file->count > 0) {
683 spin_unlock(&cifs_file->file_info_lock);
684 spin_unlock(&cifsi->open_file_lock);
685 spin_unlock(&tcon->open_file_lock);
686 return;
687 }
688 spin_unlock(&cifs_file->file_info_lock);
689
690 if (server->ops->get_lease_key)
691 server->ops->get_lease_key(inode, &fid);
692
693 /* store open in pending opens to make sure we don't miss lease break */
694 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
695
696 /* remove it from the lists */
697 list_del(&cifs_file->flist);
698 list_del(&cifs_file->tlist);
699 atomic_dec(&tcon->num_local_opens);
700
701 if (list_empty(&cifsi->openFileList)) {
702 cifs_dbg(FYI, "closing last open instance for inode %p\n",
703 d_inode(cifs_file->dentry));
704 /*
705 * In strict cache mode we need invalidate mapping on the last
706 * close because it may cause a error when we open this file
707 * again and get at least level II oplock.
708 */
709 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
710 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
711 cifs_set_oplock_level(cifsi, 0);
712 }
713
714 spin_unlock(&cifsi->open_file_lock);
715 spin_unlock(&tcon->open_file_lock);
716
717 oplock_break_cancelled = wait_oplock_handler ?
718 cancel_work_sync(&cifs_file->oplock_break) : false;
719
720 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
721 struct TCP_Server_Info *server = tcon->ses->server;
722 unsigned int xid;
723 int rc = 0;
724
725 xid = get_xid();
726 if (server->ops->close_getattr)
727 rc = server->ops->close_getattr(xid, tcon, cifs_file);
728 else if (server->ops->close)
729 rc = server->ops->close(xid, tcon, &cifs_file->fid);
730 _free_xid(xid);
731
732 if (rc == -EBUSY || rc == -EAGAIN) {
733 // Server close failed, hence offloading it as an async op
734 queue_work(serverclose_wq, &cifs_file->serverclose);
735 serverclose_offloaded = true;
736 }
737 }
738
739 if (oplock_break_cancelled)
740 cifs_done_oplock_break(cifsi);
741
742 cifs_del_pending_open(&open);
743
744 // if serverclose has been offloaded to wq (on failure), it will
745 // handle offloading put as well. If serverclose not offloaded,
746 // we need to handle offloading put here.
747 if (!serverclose_offloaded) {
748 if (offload)
749 queue_work(fileinfo_put_wq, &cifs_file->put);
750 else
751 cifsFileInfo_put_final(cifs_file);
752 }
753}
754
755int cifs_open(struct inode *inode, struct file *file)
756
757{
758 int rc = -EACCES;
759 unsigned int xid;
760 __u32 oplock;
761 struct cifs_sb_info *cifs_sb;
762 struct TCP_Server_Info *server;
763 struct cifs_tcon *tcon;
764 struct tcon_link *tlink;
765 struct cifsFileInfo *cfile = NULL;
766 void *page;
767 const char *full_path;
768 bool posix_open_ok = false;
769 struct cifs_fid fid = {};
770 struct cifs_pending_open open;
771 struct cifs_open_info_data data = {};
772
773 xid = get_xid();
774
775 cifs_sb = CIFS_SB(inode->i_sb);
776 if (unlikely(cifs_forced_shutdown(cifs_sb))) {
777 free_xid(xid);
778 return -EIO;
779 }
780
781 tlink = cifs_sb_tlink(cifs_sb);
782 if (IS_ERR(tlink)) {
783 free_xid(xid);
784 return PTR_ERR(tlink);
785 }
786 tcon = tlink_tcon(tlink);
787 server = tcon->ses->server;
788
789 page = alloc_dentry_path();
790 full_path = build_path_from_dentry(file_dentry(file), page);
791 if (IS_ERR(full_path)) {
792 rc = PTR_ERR(full_path);
793 goto out;
794 }
795
796 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
797 inode, file->f_flags, full_path);
798
799 if (file->f_flags & O_DIRECT &&
800 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
801 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
802 file->f_op = &cifs_file_direct_nobrl_ops;
803 else
804 file->f_op = &cifs_file_direct_ops;
805 }
806
807 /* Get the cached handle as SMB2 close is deferred */
808 rc = cifs_get_readable_path(tcon, full_path, &cfile);
809 if (rc == 0) {
810 if (file->f_flags == cfile->f_flags) {
811 file->private_data = cfile;
812 spin_lock(&CIFS_I(inode)->deferred_lock);
813 cifs_del_deferred_close(cfile);
814 spin_unlock(&CIFS_I(inode)->deferred_lock);
815 goto use_cache;
816 } else {
817 _cifsFileInfo_put(cfile, true, false);
818 }
819 }
820
821 if (server->oplocks)
822 oplock = REQ_OPLOCK;
823 else
824 oplock = 0;
825
826#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
827 if (!tcon->broken_posix_open && tcon->unix_ext &&
828 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
829 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
830 /* can not refresh inode info since size could be stale */
831 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
832 cifs_sb->ctx->file_mode /* ignored */,
833 file->f_flags, &oplock, &fid.netfid, xid);
834 if (rc == 0) {
835 cifs_dbg(FYI, "posix open succeeded\n");
836 posix_open_ok = true;
837 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
838 if (tcon->ses->serverNOS)
839 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
840 tcon->ses->ip_addr,
841 tcon->ses->serverNOS);
842 tcon->broken_posix_open = true;
843 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
844 (rc != -EOPNOTSUPP)) /* path not found or net err */
845 goto out;
846 /*
847 * Else fallthrough to retry open the old way on network i/o
848 * or DFS errors.
849 */
850 }
851#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
852
853 if (server->ops->get_lease_key)
854 server->ops->get_lease_key(inode, &fid);
855
856 cifs_add_pending_open(&fid, tlink, &open);
857
858 if (!posix_open_ok) {
859 if (server->ops->get_lease_key)
860 server->ops->get_lease_key(inode, &fid);
861
862 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
863 xid, &data);
864 if (rc) {
865 cifs_del_pending_open(&open);
866 goto out;
867 }
868 }
869
870 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
871 if (cfile == NULL) {
872 if (server->ops->close)
873 server->ops->close(xid, tcon, &fid);
874 cifs_del_pending_open(&open);
875 rc = -ENOMEM;
876 goto out;
877 }
878
879#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
880 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
881 /*
882 * Time to set mode which we can not set earlier due to
883 * problems creating new read-only files.
884 */
885 struct cifs_unix_set_info_args args = {
886 .mode = inode->i_mode,
887 .uid = INVALID_UID, /* no change */
888 .gid = INVALID_GID, /* no change */
889 .ctime = NO_CHANGE_64,
890 .atime = NO_CHANGE_64,
891 .mtime = NO_CHANGE_64,
892 .device = 0,
893 };
894 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
895 cfile->pid);
896 }
897#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
898
899use_cache:
900 fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
901 file->f_mode & FMODE_WRITE);
902 if (!(file->f_flags & O_DIRECT))
903 goto out;
904 if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
905 goto out;
906 cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
907
908out:
909 free_dentry_path(page);
910 free_xid(xid);
911 cifs_put_tlink(tlink);
912 cifs_free_open_info(&data);
913 return rc;
914}
915
916#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
917static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
918#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
919
920/*
921 * Try to reacquire byte range locks that were released when session
922 * to server was lost.
923 */
924static int
925cifs_relock_file(struct cifsFileInfo *cfile)
926{
927 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
928 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
929 int rc = 0;
930#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
931 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
932#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
933
934 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
935 if (cinode->can_cache_brlcks) {
936 /* can cache locks - no need to relock */
937 up_read(&cinode->lock_sem);
938 return rc;
939 }
940
941#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
942 if (cap_unix(tcon->ses) &&
943 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
944 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
945 rc = cifs_push_posix_locks(cfile);
946 else
947#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
948 rc = tcon->ses->server->ops->push_mand_locks(cfile);
949
950 up_read(&cinode->lock_sem);
951 return rc;
952}
953
954static int
955cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
956{
957 int rc = -EACCES;
958 unsigned int xid;
959 __u32 oplock;
960 struct cifs_sb_info *cifs_sb;
961 struct cifs_tcon *tcon;
962 struct TCP_Server_Info *server;
963 struct cifsInodeInfo *cinode;
964 struct inode *inode;
965 void *page;
966 const char *full_path;
967 int desired_access;
968 int disposition = FILE_OPEN;
969 int create_options = CREATE_NOT_DIR;
970 struct cifs_open_parms oparms;
971 int rdwr_for_fscache = 0;
972
973 xid = get_xid();
974 mutex_lock(&cfile->fh_mutex);
975 if (!cfile->invalidHandle) {
976 mutex_unlock(&cfile->fh_mutex);
977 free_xid(xid);
978 return 0;
979 }
980
981 inode = d_inode(cfile->dentry);
982 cifs_sb = CIFS_SB(inode->i_sb);
983 tcon = tlink_tcon(cfile->tlink);
984 server = tcon->ses->server;
985
986 /*
987 * Can not grab rename sem here because various ops, including those
988 * that already have the rename sem can end up causing writepage to get
989 * called and if the server was down that means we end up here, and we
990 * can never tell if the caller already has the rename_sem.
991 */
992 page = alloc_dentry_path();
993 full_path = build_path_from_dentry(cfile->dentry, page);
994 if (IS_ERR(full_path)) {
995 mutex_unlock(&cfile->fh_mutex);
996 free_dentry_path(page);
997 free_xid(xid);
998 return PTR_ERR(full_path);
999 }
1000
1001 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
1002 inode, cfile->f_flags, full_path);
1003
1004 if (tcon->ses->server->oplocks)
1005 oplock = REQ_OPLOCK;
1006 else
1007 oplock = 0;
1008
1009#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1010 if (tcon->unix_ext && cap_unix(tcon->ses) &&
1011 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
1012 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
1013 /*
1014 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
1015 * original open. Must mask them off for a reopen.
1016 */
1017 unsigned int oflags = cfile->f_flags &
1018 ~(O_CREAT | O_EXCL | O_TRUNC);
1019
1020 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
1021 cifs_sb->ctx->file_mode /* ignored */,
1022 oflags, &oplock, &cfile->fid.netfid, xid);
1023 if (rc == 0) {
1024 cifs_dbg(FYI, "posix reopen succeeded\n");
1025 oparms.reconnect = true;
1026 goto reopen_success;
1027 }
1028 /*
1029 * fallthrough to retry open the old way on errors, especially
1030 * in the reconnect path it is important to retry hard
1031 */
1032 }
1033#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1034
1035 /* If we're caching, we need to be able to fill in around partial writes. */
1036 if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
1037 rdwr_for_fscache = 1;
1038
1039 desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
1040
1041 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
1042 if (cfile->f_flags & O_SYNC)
1043 create_options |= CREATE_WRITE_THROUGH;
1044
1045 if (cfile->f_flags & O_DIRECT)
1046 create_options |= CREATE_NO_BUFFER;
1047
1048 if (server->ops->get_lease_key)
1049 server->ops->get_lease_key(inode, &cfile->fid);
1050
1051retry_open:
1052 oparms = (struct cifs_open_parms) {
1053 .tcon = tcon,
1054 .cifs_sb = cifs_sb,
1055 .desired_access = desired_access,
1056 .create_options = cifs_create_options(cifs_sb, create_options),
1057 .disposition = disposition,
1058 .path = full_path,
1059 .fid = &cfile->fid,
1060 .reconnect = true,
1061 };
1062
1063 /*
1064 * Can not refresh inode by passing in file_info buf to be returned by
1065 * ops->open and then calling get_inode_info with returned buf since
1066 * file might have write behind data that needs to be flushed and server
1067 * version of file size can be stale. If we knew for sure that inode was
1068 * not dirty locally we could do this.
1069 */
1070 rc = server->ops->open(xid, &oparms, &oplock, NULL);
1071 if (rc == -ENOENT && oparms.reconnect == false) {
1072 /* durable handle timeout is expired - open the file again */
1073 rc = server->ops->open(xid, &oparms, &oplock, NULL);
1074 /* indicate that we need to relock the file */
1075 oparms.reconnect = true;
1076 }
1077 if (rc == -EACCES && rdwr_for_fscache == 1) {
1078 desired_access = cifs_convert_flags(cfile->f_flags, 0);
1079 rdwr_for_fscache = 2;
1080 goto retry_open;
1081 }
1082
1083 if (rc) {
1084 mutex_unlock(&cfile->fh_mutex);
1085 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1086 cifs_dbg(FYI, "oplock: %d\n", oplock);
1087 goto reopen_error_exit;
1088 }
1089
1090 if (rdwr_for_fscache == 2)
1091 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
1092
1093#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1094reopen_success:
1095#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1096 cfile->invalidHandle = false;
1097 mutex_unlock(&cfile->fh_mutex);
1098 cinode = CIFS_I(inode);
1099
1100 if (can_flush) {
1101 rc = filemap_write_and_wait(inode->i_mapping);
1102 if (!is_interrupt_error(rc))
1103 mapping_set_error(inode->i_mapping, rc);
1104
1105 if (tcon->posix_extensions) {
1106 rc = smb311_posix_get_inode_info(&inode, full_path,
1107 NULL, inode->i_sb, xid);
1108 } else if (tcon->unix_ext) {
1109 rc = cifs_get_inode_info_unix(&inode, full_path,
1110 inode->i_sb, xid);
1111 } else {
1112 rc = cifs_get_inode_info(&inode, full_path, NULL,
1113 inode->i_sb, xid, NULL);
1114 }
1115 }
1116 /*
1117 * Else we are writing out data to server already and could deadlock if
1118 * we tried to flush data, and since we do not know if we have data that
1119 * would invalidate the current end of file on the server we can not go
1120 * to the server to get the new inode info.
1121 */
1122
1123 /*
1124 * If the server returned a read oplock and we have mandatory brlocks,
1125 * set oplock level to None.
1126 */
1127 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1128 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1129 oplock = 0;
1130 }
1131
1132 server->ops->set_fid(cfile, &cfile->fid, oplock);
1133 if (oparms.reconnect)
1134 cifs_relock_file(cfile);
1135
1136reopen_error_exit:
1137 free_dentry_path(page);
1138 free_xid(xid);
1139 return rc;
1140}
1141
1142void smb2_deferred_work_close(struct work_struct *work)
1143{
1144 struct cifsFileInfo *cfile = container_of(work,
1145 struct cifsFileInfo, deferred.work);
1146
1147 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1148 cifs_del_deferred_close(cfile);
1149 cfile->deferred_close_scheduled = false;
1150 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1151 _cifsFileInfo_put(cfile, true, false);
1152}
1153
1154static bool
1155smb2_can_defer_close(struct inode *inode, struct cifs_deferred_close *dclose)
1156{
1157 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1158 struct cifsInodeInfo *cinode = CIFS_I(inode);
1159
1160 return (cifs_sb->ctx->closetimeo && cinode->lease_granted && dclose &&
1161 (cinode->oplock == CIFS_CACHE_RHW_FLG ||
1162 cinode->oplock == CIFS_CACHE_RH_FLG) &&
1163 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags));
1164
1165}
1166
1167int cifs_close(struct inode *inode, struct file *file)
1168{
1169 struct cifsFileInfo *cfile;
1170 struct cifsInodeInfo *cinode = CIFS_I(inode);
1171 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1172 struct cifs_deferred_close *dclose;
1173
1174 cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1175
1176 if (file->private_data != NULL) {
1177 cfile = file->private_data;
1178 file->private_data = NULL;
1179 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1180 if ((cfile->status_file_deleted == false) &&
1181 (smb2_can_defer_close(inode, dclose))) {
1182 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1183 inode_set_mtime_to_ts(inode,
1184 inode_set_ctime_current(inode));
1185 }
1186 spin_lock(&cinode->deferred_lock);
1187 cifs_add_deferred_close(cfile, dclose);
1188 if (cfile->deferred_close_scheduled &&
1189 delayed_work_pending(&cfile->deferred)) {
1190 /*
1191 * If there is no pending work, mod_delayed_work queues new work.
1192 * So, Increase the ref count to avoid use-after-free.
1193 */
1194 if (!mod_delayed_work(deferredclose_wq,
1195 &cfile->deferred, cifs_sb->ctx->closetimeo))
1196 cifsFileInfo_get(cfile);
1197 } else {
1198 /* Deferred close for files */
1199 queue_delayed_work(deferredclose_wq,
1200 &cfile->deferred, cifs_sb->ctx->closetimeo);
1201 cfile->deferred_close_scheduled = true;
1202 spin_unlock(&cinode->deferred_lock);
1203 return 0;
1204 }
1205 spin_unlock(&cinode->deferred_lock);
1206 _cifsFileInfo_put(cfile, true, false);
1207 } else {
1208 _cifsFileInfo_put(cfile, true, false);
1209 kfree(dclose);
1210 }
1211 }
1212
1213 /* return code from the ->release op is always ignored */
1214 return 0;
1215}
1216
1217void
1218cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1219{
1220 struct cifsFileInfo *open_file, *tmp;
1221 struct list_head tmp_list;
1222
1223 if (!tcon->use_persistent || !tcon->need_reopen_files)
1224 return;
1225
1226 tcon->need_reopen_files = false;
1227
1228 cifs_dbg(FYI, "Reopen persistent handles\n");
1229 INIT_LIST_HEAD(&tmp_list);
1230
1231 /* list all files open on tree connection, reopen resilient handles */
1232 spin_lock(&tcon->open_file_lock);
1233 list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1234 if (!open_file->invalidHandle)
1235 continue;
1236 cifsFileInfo_get(open_file);
1237 list_add_tail(&open_file->rlist, &tmp_list);
1238 }
1239 spin_unlock(&tcon->open_file_lock);
1240
1241 list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1242 if (cifs_reopen_file(open_file, false /* do not flush */))
1243 tcon->need_reopen_files = true;
1244 list_del_init(&open_file->rlist);
1245 cifsFileInfo_put(open_file);
1246 }
1247}
1248
1249int cifs_closedir(struct inode *inode, struct file *file)
1250{
1251 int rc = 0;
1252 unsigned int xid;
1253 struct cifsFileInfo *cfile = file->private_data;
1254 struct cifs_tcon *tcon;
1255 struct TCP_Server_Info *server;
1256 char *buf;
1257
1258 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1259
1260 if (cfile == NULL)
1261 return rc;
1262
1263 xid = get_xid();
1264 tcon = tlink_tcon(cfile->tlink);
1265 server = tcon->ses->server;
1266
1267 cifs_dbg(FYI, "Freeing private data in close dir\n");
1268 spin_lock(&cfile->file_info_lock);
1269 if (server->ops->dir_needs_close(cfile)) {
1270 cfile->invalidHandle = true;
1271 spin_unlock(&cfile->file_info_lock);
1272 if (server->ops->close_dir)
1273 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1274 else
1275 rc = -ENOSYS;
1276 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1277 /* not much we can do if it fails anyway, ignore rc */
1278 rc = 0;
1279 } else
1280 spin_unlock(&cfile->file_info_lock);
1281
1282 buf = cfile->srch_inf.ntwrk_buf_start;
1283 if (buf) {
1284 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1285 cfile->srch_inf.ntwrk_buf_start = NULL;
1286 if (cfile->srch_inf.smallBuf)
1287 cifs_small_buf_release(buf);
1288 else
1289 cifs_buf_release(buf);
1290 }
1291
1292 cifs_put_tlink(cfile->tlink);
1293 kfree(file->private_data);
1294 file->private_data = NULL;
1295 /* BB can we lock the filestruct while this is going on? */
1296 free_xid(xid);
1297 return rc;
1298}
1299
1300static struct cifsLockInfo *
1301cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1302{
1303 struct cifsLockInfo *lock =
1304 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1305 if (!lock)
1306 return lock;
1307 lock->offset = offset;
1308 lock->length = length;
1309 lock->type = type;
1310 lock->pid = current->tgid;
1311 lock->flags = flags;
1312 INIT_LIST_HEAD(&lock->blist);
1313 init_waitqueue_head(&lock->block_q);
1314 return lock;
1315}
1316
1317void
1318cifs_del_lock_waiters(struct cifsLockInfo *lock)
1319{
1320 struct cifsLockInfo *li, *tmp;
1321 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1322 list_del_init(&li->blist);
1323 wake_up(&li->block_q);
1324 }
1325}
1326
1327#define CIFS_LOCK_OP 0
1328#define CIFS_READ_OP 1
1329#define CIFS_WRITE_OP 2
1330
1331/* @rw_check : 0 - no op, 1 - read, 2 - write */
1332static bool
1333cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1334 __u64 length, __u8 type, __u16 flags,
1335 struct cifsFileInfo *cfile,
1336 struct cifsLockInfo **conf_lock, int rw_check)
1337{
1338 struct cifsLockInfo *li;
1339 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1340 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1341
1342 list_for_each_entry(li, &fdlocks->locks, llist) {
1343 if (offset + length <= li->offset ||
1344 offset >= li->offset + li->length)
1345 continue;
1346 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1347 server->ops->compare_fids(cfile, cur_cfile)) {
1348 /* shared lock prevents write op through the same fid */
1349 if (!(li->type & server->vals->shared_lock_type) ||
1350 rw_check != CIFS_WRITE_OP)
1351 continue;
1352 }
1353 if ((type & server->vals->shared_lock_type) &&
1354 ((server->ops->compare_fids(cfile, cur_cfile) &&
1355 current->tgid == li->pid) || type == li->type))
1356 continue;
1357 if (rw_check == CIFS_LOCK_OP &&
1358 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1359 server->ops->compare_fids(cfile, cur_cfile))
1360 continue;
1361 if (conf_lock)
1362 *conf_lock = li;
1363 return true;
1364 }
1365 return false;
1366}
1367
1368bool
1369cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1370 __u8 type, __u16 flags,
1371 struct cifsLockInfo **conf_lock, int rw_check)
1372{
1373 bool rc = false;
1374 struct cifs_fid_locks *cur;
1375 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1376
1377 list_for_each_entry(cur, &cinode->llist, llist) {
1378 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1379 flags, cfile, conf_lock,
1380 rw_check);
1381 if (rc)
1382 break;
1383 }
1384
1385 return rc;
1386}
1387
1388/*
1389 * Check if there is another lock that prevents us to set the lock (mandatory
1390 * style). If such a lock exists, update the flock structure with its
1391 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1392 * or leave it the same if we can't. Returns 0 if we don't need to request to
1393 * the server or 1 otherwise.
1394 */
1395static int
1396cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1397 __u8 type, struct file_lock *flock)
1398{
1399 int rc = 0;
1400 struct cifsLockInfo *conf_lock;
1401 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1402 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1403 bool exist;
1404
1405 down_read(&cinode->lock_sem);
1406
1407 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1408 flock->c.flc_flags, &conf_lock,
1409 CIFS_LOCK_OP);
1410 if (exist) {
1411 flock->fl_start = conf_lock->offset;
1412 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1413 flock->c.flc_pid = conf_lock->pid;
1414 if (conf_lock->type & server->vals->shared_lock_type)
1415 flock->c.flc_type = F_RDLCK;
1416 else
1417 flock->c.flc_type = F_WRLCK;
1418 } else if (!cinode->can_cache_brlcks)
1419 rc = 1;
1420 else
1421 flock->c.flc_type = F_UNLCK;
1422
1423 up_read(&cinode->lock_sem);
1424 return rc;
1425}
1426
1427static void
1428cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1429{
1430 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1431 cifs_down_write(&cinode->lock_sem);
1432 list_add_tail(&lock->llist, &cfile->llist->locks);
1433 up_write(&cinode->lock_sem);
1434}
1435
1436/*
1437 * Set the byte-range lock (mandatory style). Returns:
1438 * 1) 0, if we set the lock and don't need to request to the server;
1439 * 2) 1, if no locks prevent us but we need to request to the server;
1440 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1441 */
1442static int
1443cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1444 bool wait)
1445{
1446 struct cifsLockInfo *conf_lock;
1447 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1448 bool exist;
1449 int rc = 0;
1450
1451try_again:
1452 exist = false;
1453 cifs_down_write(&cinode->lock_sem);
1454
1455 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1456 lock->type, lock->flags, &conf_lock,
1457 CIFS_LOCK_OP);
1458 if (!exist && cinode->can_cache_brlcks) {
1459 list_add_tail(&lock->llist, &cfile->llist->locks);
1460 up_write(&cinode->lock_sem);
1461 return rc;
1462 }
1463
1464 if (!exist)
1465 rc = 1;
1466 else if (!wait)
1467 rc = -EACCES;
1468 else {
1469 list_add_tail(&lock->blist, &conf_lock->blist);
1470 up_write(&cinode->lock_sem);
1471 rc = wait_event_interruptible(lock->block_q,
1472 (lock->blist.prev == &lock->blist) &&
1473 (lock->blist.next == &lock->blist));
1474 if (!rc)
1475 goto try_again;
1476 cifs_down_write(&cinode->lock_sem);
1477 list_del_init(&lock->blist);
1478 }
1479
1480 up_write(&cinode->lock_sem);
1481 return rc;
1482}
1483
1484#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1485/*
1486 * Check if there is another lock that prevents us to set the lock (posix
1487 * style). If such a lock exists, update the flock structure with its
1488 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1489 * or leave it the same if we can't. Returns 0 if we don't need to request to
1490 * the server or 1 otherwise.
1491 */
1492static int
1493cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1494{
1495 int rc = 0;
1496 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1497 unsigned char saved_type = flock->c.flc_type;
1498
1499 if ((flock->c.flc_flags & FL_POSIX) == 0)
1500 return 1;
1501
1502 down_read(&cinode->lock_sem);
1503 posix_test_lock(file, flock);
1504
1505 if (lock_is_unlock(flock) && !cinode->can_cache_brlcks) {
1506 flock->c.flc_type = saved_type;
1507 rc = 1;
1508 }
1509
1510 up_read(&cinode->lock_sem);
1511 return rc;
1512}
1513
1514/*
1515 * Set the byte-range lock (posix style). Returns:
1516 * 1) <0, if the error occurs while setting the lock;
1517 * 2) 0, if we set the lock and don't need to request to the server;
1518 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1519 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1520 */
1521static int
1522cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1523{
1524 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1525 int rc = FILE_LOCK_DEFERRED + 1;
1526
1527 if ((flock->c.flc_flags & FL_POSIX) == 0)
1528 return rc;
1529
1530 cifs_down_write(&cinode->lock_sem);
1531 if (!cinode->can_cache_brlcks) {
1532 up_write(&cinode->lock_sem);
1533 return rc;
1534 }
1535
1536 rc = posix_lock_file(file, flock, NULL);
1537 up_write(&cinode->lock_sem);
1538 return rc;
1539}
1540
1541int
1542cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1543{
1544 unsigned int xid;
1545 int rc = 0, stored_rc;
1546 struct cifsLockInfo *li, *tmp;
1547 struct cifs_tcon *tcon;
1548 unsigned int num, max_num, max_buf;
1549 LOCKING_ANDX_RANGE *buf, *cur;
1550 static const int types[] = {
1551 LOCKING_ANDX_LARGE_FILES,
1552 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1553 };
1554 int i;
1555
1556 xid = get_xid();
1557 tcon = tlink_tcon(cfile->tlink);
1558
1559 /*
1560 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1561 * and check it before using.
1562 */
1563 max_buf = tcon->ses->server->maxBuf;
1564 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1565 free_xid(xid);
1566 return -EINVAL;
1567 }
1568
1569 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1570 PAGE_SIZE);
1571 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1572 PAGE_SIZE);
1573 max_num = (max_buf - sizeof(struct smb_hdr)) /
1574 sizeof(LOCKING_ANDX_RANGE);
1575 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1576 if (!buf) {
1577 free_xid(xid);
1578 return -ENOMEM;
1579 }
1580
1581 for (i = 0; i < 2; i++) {
1582 cur = buf;
1583 num = 0;
1584 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1585 if (li->type != types[i])
1586 continue;
1587 cur->Pid = cpu_to_le16(li->pid);
1588 cur->LengthLow = cpu_to_le32((u32)li->length);
1589 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1590 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1591 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1592 if (++num == max_num) {
1593 stored_rc = cifs_lockv(xid, tcon,
1594 cfile->fid.netfid,
1595 (__u8)li->type, 0, num,
1596 buf);
1597 if (stored_rc)
1598 rc = stored_rc;
1599 cur = buf;
1600 num = 0;
1601 } else
1602 cur++;
1603 }
1604
1605 if (num) {
1606 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1607 (__u8)types[i], 0, num, buf);
1608 if (stored_rc)
1609 rc = stored_rc;
1610 }
1611 }
1612
1613 kfree(buf);
1614 free_xid(xid);
1615 return rc;
1616}
1617
1618static __u32
1619hash_lockowner(fl_owner_t owner)
1620{
1621 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1622}
1623#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1624
1625struct lock_to_push {
1626 struct list_head llist;
1627 __u64 offset;
1628 __u64 length;
1629 __u32 pid;
1630 __u16 netfid;
1631 __u8 type;
1632};
1633
1634#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1635static int
1636cifs_push_posix_locks(struct cifsFileInfo *cfile)
1637{
1638 struct inode *inode = d_inode(cfile->dentry);
1639 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1640 struct file_lock *flock;
1641 struct file_lock_context *flctx = locks_inode_context(inode);
1642 unsigned int count = 0, i;
1643 int rc = 0, xid, type;
1644 struct list_head locks_to_send, *el;
1645 struct lock_to_push *lck, *tmp;
1646 __u64 length;
1647
1648 xid = get_xid();
1649
1650 if (!flctx)
1651 goto out;
1652
1653 spin_lock(&flctx->flc_lock);
1654 list_for_each(el, &flctx->flc_posix) {
1655 count++;
1656 }
1657 spin_unlock(&flctx->flc_lock);
1658
1659 INIT_LIST_HEAD(&locks_to_send);
1660
1661 /*
1662 * Allocating count locks is enough because no FL_POSIX locks can be
1663 * added to the list while we are holding cinode->lock_sem that
1664 * protects locking operations of this inode.
1665 */
1666 for (i = 0; i < count; i++) {
1667 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1668 if (!lck) {
1669 rc = -ENOMEM;
1670 goto err_out;
1671 }
1672 list_add_tail(&lck->llist, &locks_to_send);
1673 }
1674
1675 el = locks_to_send.next;
1676 spin_lock(&flctx->flc_lock);
1677 for_each_file_lock(flock, &flctx->flc_posix) {
1678 unsigned char ftype = flock->c.flc_type;
1679
1680 if (el == &locks_to_send) {
1681 /*
1682 * The list ended. We don't have enough allocated
1683 * structures - something is really wrong.
1684 */
1685 cifs_dbg(VFS, "Can't push all brlocks!\n");
1686 break;
1687 }
1688 length = cifs_flock_len(flock);
1689 if (ftype == F_RDLCK || ftype == F_SHLCK)
1690 type = CIFS_RDLCK;
1691 else
1692 type = CIFS_WRLCK;
1693 lck = list_entry(el, struct lock_to_push, llist);
1694 lck->pid = hash_lockowner(flock->c.flc_owner);
1695 lck->netfid = cfile->fid.netfid;
1696 lck->length = length;
1697 lck->type = type;
1698 lck->offset = flock->fl_start;
1699 }
1700 spin_unlock(&flctx->flc_lock);
1701
1702 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1703 int stored_rc;
1704
1705 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1706 lck->offset, lck->length, NULL,
1707 lck->type, 0);
1708 if (stored_rc)
1709 rc = stored_rc;
1710 list_del(&lck->llist);
1711 kfree(lck);
1712 }
1713
1714out:
1715 free_xid(xid);
1716 return rc;
1717err_out:
1718 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1719 list_del(&lck->llist);
1720 kfree(lck);
1721 }
1722 goto out;
1723}
1724#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1725
1726static int
1727cifs_push_locks(struct cifsFileInfo *cfile)
1728{
1729 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1730 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1731 int rc = 0;
1732#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1733 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1734#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1735
1736 /* we are going to update can_cache_brlcks here - need a write access */
1737 cifs_down_write(&cinode->lock_sem);
1738 if (!cinode->can_cache_brlcks) {
1739 up_write(&cinode->lock_sem);
1740 return rc;
1741 }
1742
1743#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1744 if (cap_unix(tcon->ses) &&
1745 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1746 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1747 rc = cifs_push_posix_locks(cfile);
1748 else
1749#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1750 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1751
1752 cinode->can_cache_brlcks = false;
1753 up_write(&cinode->lock_sem);
1754 return rc;
1755}
1756
1757static void
1758cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1759 bool *wait_flag, struct TCP_Server_Info *server)
1760{
1761 if (flock->c.flc_flags & FL_POSIX)
1762 cifs_dbg(FYI, "Posix\n");
1763 if (flock->c.flc_flags & FL_FLOCK)
1764 cifs_dbg(FYI, "Flock\n");
1765 if (flock->c.flc_flags & FL_SLEEP) {
1766 cifs_dbg(FYI, "Blocking lock\n");
1767 *wait_flag = true;
1768 }
1769 if (flock->c.flc_flags & FL_ACCESS)
1770 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1771 if (flock->c.flc_flags & FL_LEASE)
1772 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1773 if (flock->c.flc_flags &
1774 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1775 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1776 cifs_dbg(FYI, "Unknown lock flags 0x%x\n",
1777 flock->c.flc_flags);
1778
1779 *type = server->vals->large_lock_type;
1780 if (lock_is_write(flock)) {
1781 cifs_dbg(FYI, "F_WRLCK\n");
1782 *type |= server->vals->exclusive_lock_type;
1783 *lock = 1;
1784 } else if (lock_is_unlock(flock)) {
1785 cifs_dbg(FYI, "F_UNLCK\n");
1786 *type |= server->vals->unlock_lock_type;
1787 *unlock = 1;
1788 /* Check if unlock includes more than one lock range */
1789 } else if (lock_is_read(flock)) {
1790 cifs_dbg(FYI, "F_RDLCK\n");
1791 *type |= server->vals->shared_lock_type;
1792 *lock = 1;
1793 } else if (flock->c.flc_type == F_EXLCK) {
1794 cifs_dbg(FYI, "F_EXLCK\n");
1795 *type |= server->vals->exclusive_lock_type;
1796 *lock = 1;
1797 } else if (flock->c.flc_type == F_SHLCK) {
1798 cifs_dbg(FYI, "F_SHLCK\n");
1799 *type |= server->vals->shared_lock_type;
1800 *lock = 1;
1801 } else
1802 cifs_dbg(FYI, "Unknown type of lock\n");
1803}
1804
1805static int
1806cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1807 bool wait_flag, bool posix_lck, unsigned int xid)
1808{
1809 int rc = 0;
1810 __u64 length = cifs_flock_len(flock);
1811 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1812 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1813 struct TCP_Server_Info *server = tcon->ses->server;
1814#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1815 __u16 netfid = cfile->fid.netfid;
1816
1817 if (posix_lck) {
1818 int posix_lock_type;
1819
1820 rc = cifs_posix_lock_test(file, flock);
1821 if (!rc)
1822 return rc;
1823
1824 if (type & server->vals->shared_lock_type)
1825 posix_lock_type = CIFS_RDLCK;
1826 else
1827 posix_lock_type = CIFS_WRLCK;
1828 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1829 hash_lockowner(flock->c.flc_owner),
1830 flock->fl_start, length, flock,
1831 posix_lock_type, wait_flag);
1832 return rc;
1833 }
1834#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1835
1836 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1837 if (!rc)
1838 return rc;
1839
1840 /* BB we could chain these into one lock request BB */
1841 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1842 1, 0, false);
1843 if (rc == 0) {
1844 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1845 type, 0, 1, false);
1846 flock->c.flc_type = F_UNLCK;
1847 if (rc != 0)
1848 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1849 rc);
1850 return 0;
1851 }
1852
1853 if (type & server->vals->shared_lock_type) {
1854 flock->c.flc_type = F_WRLCK;
1855 return 0;
1856 }
1857
1858 type &= ~server->vals->exclusive_lock_type;
1859
1860 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1861 type | server->vals->shared_lock_type,
1862 1, 0, false);
1863 if (rc == 0) {
1864 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1865 type | server->vals->shared_lock_type, 0, 1, false);
1866 flock->c.flc_type = F_RDLCK;
1867 if (rc != 0)
1868 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1869 rc);
1870 } else
1871 flock->c.flc_type = F_WRLCK;
1872
1873 return 0;
1874}
1875
1876void
1877cifs_move_llist(struct list_head *source, struct list_head *dest)
1878{
1879 struct list_head *li, *tmp;
1880 list_for_each_safe(li, tmp, source)
1881 list_move(li, dest);
1882}
1883
1884void
1885cifs_free_llist(struct list_head *llist)
1886{
1887 struct cifsLockInfo *li, *tmp;
1888 list_for_each_entry_safe(li, tmp, llist, llist) {
1889 cifs_del_lock_waiters(li);
1890 list_del(&li->llist);
1891 kfree(li);
1892 }
1893}
1894
1895#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1896int
1897cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1898 unsigned int xid)
1899{
1900 int rc = 0, stored_rc;
1901 static const int types[] = {
1902 LOCKING_ANDX_LARGE_FILES,
1903 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1904 };
1905 unsigned int i;
1906 unsigned int max_num, num, max_buf;
1907 LOCKING_ANDX_RANGE *buf, *cur;
1908 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1909 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1910 struct cifsLockInfo *li, *tmp;
1911 __u64 length = cifs_flock_len(flock);
1912 struct list_head tmp_llist;
1913
1914 INIT_LIST_HEAD(&tmp_llist);
1915
1916 /*
1917 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1918 * and check it before using.
1919 */
1920 max_buf = tcon->ses->server->maxBuf;
1921 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1922 return -EINVAL;
1923
1924 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1925 PAGE_SIZE);
1926 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1927 PAGE_SIZE);
1928 max_num = (max_buf - sizeof(struct smb_hdr)) /
1929 sizeof(LOCKING_ANDX_RANGE);
1930 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1931 if (!buf)
1932 return -ENOMEM;
1933
1934 cifs_down_write(&cinode->lock_sem);
1935 for (i = 0; i < 2; i++) {
1936 cur = buf;
1937 num = 0;
1938 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1939 if (flock->fl_start > li->offset ||
1940 (flock->fl_start + length) <
1941 (li->offset + li->length))
1942 continue;
1943 if (current->tgid != li->pid)
1944 continue;
1945 if (types[i] != li->type)
1946 continue;
1947 if (cinode->can_cache_brlcks) {
1948 /*
1949 * We can cache brlock requests - simply remove
1950 * a lock from the file's list.
1951 */
1952 list_del(&li->llist);
1953 cifs_del_lock_waiters(li);
1954 kfree(li);
1955 continue;
1956 }
1957 cur->Pid = cpu_to_le16(li->pid);
1958 cur->LengthLow = cpu_to_le32((u32)li->length);
1959 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1960 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1961 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1962 /*
1963 * We need to save a lock here to let us add it again to
1964 * the file's list if the unlock range request fails on
1965 * the server.
1966 */
1967 list_move(&li->llist, &tmp_llist);
1968 if (++num == max_num) {
1969 stored_rc = cifs_lockv(xid, tcon,
1970 cfile->fid.netfid,
1971 li->type, num, 0, buf);
1972 if (stored_rc) {
1973 /*
1974 * We failed on the unlock range
1975 * request - add all locks from the tmp
1976 * list to the head of the file's list.
1977 */
1978 cifs_move_llist(&tmp_llist,
1979 &cfile->llist->locks);
1980 rc = stored_rc;
1981 } else
1982 /*
1983 * The unlock range request succeed -
1984 * free the tmp list.
1985 */
1986 cifs_free_llist(&tmp_llist);
1987 cur = buf;
1988 num = 0;
1989 } else
1990 cur++;
1991 }
1992 if (num) {
1993 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1994 types[i], num, 0, buf);
1995 if (stored_rc) {
1996 cifs_move_llist(&tmp_llist,
1997 &cfile->llist->locks);
1998 rc = stored_rc;
1999 } else
2000 cifs_free_llist(&tmp_llist);
2001 }
2002 }
2003
2004 up_write(&cinode->lock_sem);
2005 kfree(buf);
2006 return rc;
2007}
2008#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2009
2010static int
2011cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
2012 bool wait_flag, bool posix_lck, int lock, int unlock,
2013 unsigned int xid)
2014{
2015 int rc = 0;
2016 __u64 length = cifs_flock_len(flock);
2017 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2018 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2019 struct TCP_Server_Info *server = tcon->ses->server;
2020 struct inode *inode = d_inode(cfile->dentry);
2021
2022#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
2023 if (posix_lck) {
2024 int posix_lock_type;
2025
2026 rc = cifs_posix_lock_set(file, flock);
2027 if (rc <= FILE_LOCK_DEFERRED)
2028 return rc;
2029
2030 if (type & server->vals->shared_lock_type)
2031 posix_lock_type = CIFS_RDLCK;
2032 else
2033 posix_lock_type = CIFS_WRLCK;
2034
2035 if (unlock == 1)
2036 posix_lock_type = CIFS_UNLCK;
2037
2038 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
2039 hash_lockowner(flock->c.flc_owner),
2040 flock->fl_start, length,
2041 NULL, posix_lock_type, wait_flag);
2042 goto out;
2043 }
2044#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2045 if (lock) {
2046 struct cifsLockInfo *lock;
2047
2048 lock = cifs_lock_init(flock->fl_start, length, type,
2049 flock->c.flc_flags);
2050 if (!lock)
2051 return -ENOMEM;
2052
2053 rc = cifs_lock_add_if(cfile, lock, wait_flag);
2054 if (rc < 0) {
2055 kfree(lock);
2056 return rc;
2057 }
2058 if (!rc)
2059 goto out;
2060
2061 /*
2062 * Windows 7 server can delay breaking lease from read to None
2063 * if we set a byte-range lock on a file - break it explicitly
2064 * before sending the lock to the server to be sure the next
2065 * read won't conflict with non-overlapted locks due to
2066 * pagereading.
2067 */
2068 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
2069 CIFS_CACHE_READ(CIFS_I(inode))) {
2070 cifs_zap_mapping(inode);
2071 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
2072 inode);
2073 CIFS_I(inode)->oplock = 0;
2074 }
2075
2076 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
2077 type, 1, 0, wait_flag);
2078 if (rc) {
2079 kfree(lock);
2080 return rc;
2081 }
2082
2083 cifs_lock_add(cfile, lock);
2084 } else if (unlock)
2085 rc = server->ops->mand_unlock_range(cfile, flock, xid);
2086
2087out:
2088 if ((flock->c.flc_flags & FL_POSIX) || (flock->c.flc_flags & FL_FLOCK)) {
2089 /*
2090 * If this is a request to remove all locks because we
2091 * are closing the file, it doesn't matter if the
2092 * unlocking failed as both cifs.ko and the SMB server
2093 * remove the lock on file close
2094 */
2095 if (rc) {
2096 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2097 if (!(flock->c.flc_flags & FL_CLOSE))
2098 return rc;
2099 }
2100 rc = locks_lock_file_wait(file, flock);
2101 }
2102 return rc;
2103}
2104
2105int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2106{
2107 int rc, xid;
2108 int lock = 0, unlock = 0;
2109 bool wait_flag = false;
2110 bool posix_lck = false;
2111 struct cifs_sb_info *cifs_sb;
2112 struct cifs_tcon *tcon;
2113 struct cifsFileInfo *cfile;
2114 __u32 type;
2115
2116 xid = get_xid();
2117
2118 if (!(fl->c.flc_flags & FL_FLOCK)) {
2119 rc = -ENOLCK;
2120 free_xid(xid);
2121 return rc;
2122 }
2123
2124 cfile = (struct cifsFileInfo *)file->private_data;
2125 tcon = tlink_tcon(cfile->tlink);
2126
2127 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2128 tcon->ses->server);
2129 cifs_sb = CIFS_FILE_SB(file);
2130
2131 if (cap_unix(tcon->ses) &&
2132 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2133 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2134 posix_lck = true;
2135
2136 if (!lock && !unlock) {
2137 /*
2138 * if no lock or unlock then nothing to do since we do not
2139 * know what it is
2140 */
2141 rc = -EOPNOTSUPP;
2142 free_xid(xid);
2143 return rc;
2144 }
2145
2146 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2147 xid);
2148 free_xid(xid);
2149 return rc;
2150
2151
2152}
2153
2154int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2155{
2156 int rc, xid;
2157 int lock = 0, unlock = 0;
2158 bool wait_flag = false;
2159 bool posix_lck = false;
2160 struct cifs_sb_info *cifs_sb;
2161 struct cifs_tcon *tcon;
2162 struct cifsFileInfo *cfile;
2163 __u32 type;
2164
2165 rc = -EACCES;
2166 xid = get_xid();
2167
2168 cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2169 flock->c.flc_flags, flock->c.flc_type,
2170 (long long)flock->fl_start,
2171 (long long)flock->fl_end);
2172
2173 cfile = (struct cifsFileInfo *)file->private_data;
2174 tcon = tlink_tcon(cfile->tlink);
2175
2176 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2177 tcon->ses->server);
2178 cifs_sb = CIFS_FILE_SB(file);
2179 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2180
2181 if (cap_unix(tcon->ses) &&
2182 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2183 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2184 posix_lck = true;
2185 /*
2186 * BB add code here to normalize offset and length to account for
2187 * negative length which we can not accept over the wire.
2188 */
2189 if (IS_GETLK(cmd)) {
2190 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2191 free_xid(xid);
2192 return rc;
2193 }
2194
2195 if (!lock && !unlock) {
2196 /*
2197 * if no lock or unlock then nothing to do since we do not
2198 * know what it is
2199 */
2200 free_xid(xid);
2201 return -EOPNOTSUPP;
2202 }
2203
2204 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2205 xid);
2206 free_xid(xid);
2207 return rc;
2208}
2209
2210/*
2211 * update the file size (if needed) after a write. Should be called with
2212 * the inode->i_lock held
2213 */
2214void
2215cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2216 unsigned int bytes_written)
2217{
2218 loff_t end_of_write = offset + bytes_written;
2219
2220 if (end_of_write > cifsi->netfs.remote_i_size)
2221 netfs_resize_file(&cifsi->netfs, end_of_write, true);
2222}
2223
2224static ssize_t
2225cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2226 size_t write_size, loff_t *offset)
2227{
2228 int rc = 0;
2229 unsigned int bytes_written = 0;
2230 unsigned int total_written;
2231 struct cifs_tcon *tcon;
2232 struct TCP_Server_Info *server;
2233 unsigned int xid;
2234 struct dentry *dentry = open_file->dentry;
2235 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2236 struct cifs_io_parms io_parms = {0};
2237
2238 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2239 write_size, *offset, dentry);
2240
2241 tcon = tlink_tcon(open_file->tlink);
2242 server = tcon->ses->server;
2243
2244 if (!server->ops->sync_write)
2245 return -ENOSYS;
2246
2247 xid = get_xid();
2248
2249 for (total_written = 0; write_size > total_written;
2250 total_written += bytes_written) {
2251 rc = -EAGAIN;
2252 while (rc == -EAGAIN) {
2253 struct kvec iov[2];
2254 unsigned int len;
2255
2256 if (open_file->invalidHandle) {
2257 /* we could deadlock if we called
2258 filemap_fdatawait from here so tell
2259 reopen_file not to flush data to
2260 server now */
2261 rc = cifs_reopen_file(open_file, false);
2262 if (rc != 0)
2263 break;
2264 }
2265
2266 len = min(server->ops->wp_retry_size(d_inode(dentry)),
2267 (unsigned int)write_size - total_written);
2268 /* iov[0] is reserved for smb header */
2269 iov[1].iov_base = (char *)write_data + total_written;
2270 iov[1].iov_len = len;
2271 io_parms.pid = pid;
2272 io_parms.tcon = tcon;
2273 io_parms.offset = *offset;
2274 io_parms.length = len;
2275 rc = server->ops->sync_write(xid, &open_file->fid,
2276 &io_parms, &bytes_written, iov, 1);
2277 }
2278 if (rc || (bytes_written == 0)) {
2279 if (total_written)
2280 break;
2281 else {
2282 free_xid(xid);
2283 return rc;
2284 }
2285 } else {
2286 spin_lock(&d_inode(dentry)->i_lock);
2287 cifs_update_eof(cifsi, *offset, bytes_written);
2288 spin_unlock(&d_inode(dentry)->i_lock);
2289 *offset += bytes_written;
2290 }
2291 }
2292
2293 cifs_stats_bytes_written(tcon, total_written);
2294
2295 if (total_written > 0) {
2296 spin_lock(&d_inode(dentry)->i_lock);
2297 if (*offset > d_inode(dentry)->i_size) {
2298 i_size_write(d_inode(dentry), *offset);
2299 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2300 }
2301 spin_unlock(&d_inode(dentry)->i_lock);
2302 }
2303 mark_inode_dirty_sync(d_inode(dentry));
2304 free_xid(xid);
2305 return total_written;
2306}
2307
2308struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2309 bool fsuid_only)
2310{
2311 struct cifsFileInfo *open_file = NULL;
2312 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2313
2314 /* only filter by fsuid on multiuser mounts */
2315 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2316 fsuid_only = false;
2317
2318 spin_lock(&cifs_inode->open_file_lock);
2319 /* we could simply get the first_list_entry since write-only entries
2320 are always at the end of the list but since the first entry might
2321 have a close pending, we go through the whole list */
2322 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2323 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2324 continue;
2325 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2326 if ((!open_file->invalidHandle)) {
2327 /* found a good file */
2328 /* lock it so it will not be closed on us */
2329 cifsFileInfo_get(open_file);
2330 spin_unlock(&cifs_inode->open_file_lock);
2331 return open_file;
2332 } /* else might as well continue, and look for
2333 another, or simply have the caller reopen it
2334 again rather than trying to fix this handle */
2335 } else /* write only file */
2336 break; /* write only files are last so must be done */
2337 }
2338 spin_unlock(&cifs_inode->open_file_lock);
2339 return NULL;
2340}
2341
2342/* Return -EBADF if no handle is found and general rc otherwise */
2343int
2344cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2345 struct cifsFileInfo **ret_file)
2346{
2347 struct cifsFileInfo *open_file, *inv_file = NULL;
2348 struct cifs_sb_info *cifs_sb;
2349 bool any_available = false;
2350 int rc = -EBADF;
2351 unsigned int refind = 0;
2352 bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2353 bool with_delete = flags & FIND_WR_WITH_DELETE;
2354 *ret_file = NULL;
2355
2356 /*
2357 * Having a null inode here (because mapping->host was set to zero by
2358 * the VFS or MM) should not happen but we had reports of on oops (due
2359 * to it being zero) during stress testcases so we need to check for it
2360 */
2361
2362 if (cifs_inode == NULL) {
2363 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2364 dump_stack();
2365 return rc;
2366 }
2367
2368 cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2369
2370 /* only filter by fsuid on multiuser mounts */
2371 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2372 fsuid_only = false;
2373
2374 spin_lock(&cifs_inode->open_file_lock);
2375refind_writable:
2376 if (refind > MAX_REOPEN_ATT) {
2377 spin_unlock(&cifs_inode->open_file_lock);
2378 return rc;
2379 }
2380 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2381 if (!any_available && open_file->pid != current->tgid)
2382 continue;
2383 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2384 continue;
2385 if (with_delete && !(open_file->fid.access & DELETE))
2386 continue;
2387 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2388 if (!open_file->invalidHandle) {
2389 /* found a good writable file */
2390 cifsFileInfo_get(open_file);
2391 spin_unlock(&cifs_inode->open_file_lock);
2392 *ret_file = open_file;
2393 return 0;
2394 } else {
2395 if (!inv_file)
2396 inv_file = open_file;
2397 }
2398 }
2399 }
2400 /* couldn't find useable FH with same pid, try any available */
2401 if (!any_available) {
2402 any_available = true;
2403 goto refind_writable;
2404 }
2405
2406 if (inv_file) {
2407 any_available = false;
2408 cifsFileInfo_get(inv_file);
2409 }
2410
2411 spin_unlock(&cifs_inode->open_file_lock);
2412
2413 if (inv_file) {
2414 rc = cifs_reopen_file(inv_file, false);
2415 if (!rc) {
2416 *ret_file = inv_file;
2417 return 0;
2418 }
2419
2420 spin_lock(&cifs_inode->open_file_lock);
2421 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2422 spin_unlock(&cifs_inode->open_file_lock);
2423 cifsFileInfo_put(inv_file);
2424 ++refind;
2425 inv_file = NULL;
2426 spin_lock(&cifs_inode->open_file_lock);
2427 goto refind_writable;
2428 }
2429
2430 return rc;
2431}
2432
2433struct cifsFileInfo *
2434find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2435{
2436 struct cifsFileInfo *cfile;
2437 int rc;
2438
2439 rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2440 if (rc)
2441 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2442
2443 return cfile;
2444}
2445
2446int
2447cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2448 int flags,
2449 struct cifsFileInfo **ret_file)
2450{
2451 struct cifsFileInfo *cfile;
2452 void *page = alloc_dentry_path();
2453
2454 *ret_file = NULL;
2455
2456 spin_lock(&tcon->open_file_lock);
2457 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2458 struct cifsInodeInfo *cinode;
2459 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2460 if (IS_ERR(full_path)) {
2461 spin_unlock(&tcon->open_file_lock);
2462 free_dentry_path(page);
2463 return PTR_ERR(full_path);
2464 }
2465 if (strcmp(full_path, name))
2466 continue;
2467
2468 cinode = CIFS_I(d_inode(cfile->dentry));
2469 spin_unlock(&tcon->open_file_lock);
2470 free_dentry_path(page);
2471 return cifs_get_writable_file(cinode, flags, ret_file);
2472 }
2473
2474 spin_unlock(&tcon->open_file_lock);
2475 free_dentry_path(page);
2476 return -ENOENT;
2477}
2478
2479int
2480cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2481 struct cifsFileInfo **ret_file)
2482{
2483 struct cifsFileInfo *cfile;
2484 void *page = alloc_dentry_path();
2485
2486 *ret_file = NULL;
2487
2488 spin_lock(&tcon->open_file_lock);
2489 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2490 struct cifsInodeInfo *cinode;
2491 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2492 if (IS_ERR(full_path)) {
2493 spin_unlock(&tcon->open_file_lock);
2494 free_dentry_path(page);
2495 return PTR_ERR(full_path);
2496 }
2497 if (strcmp(full_path, name))
2498 continue;
2499
2500 cinode = CIFS_I(d_inode(cfile->dentry));
2501 spin_unlock(&tcon->open_file_lock);
2502 free_dentry_path(page);
2503 *ret_file = find_readable_file(cinode, 0);
2504 return *ret_file ? 0 : -ENOENT;
2505 }
2506
2507 spin_unlock(&tcon->open_file_lock);
2508 free_dentry_path(page);
2509 return -ENOENT;
2510}
2511
2512void
2513cifs_writedata_release(struct kref *refcount)
2514{
2515 struct cifs_writedata *wdata = container_of(refcount,
2516 struct cifs_writedata, refcount);
2517#ifdef CONFIG_CIFS_SMB_DIRECT
2518 if (wdata->mr) {
2519 smbd_deregister_mr(wdata->mr);
2520 wdata->mr = NULL;
2521 }
2522#endif
2523
2524 if (wdata->cfile)
2525 cifsFileInfo_put(wdata->cfile);
2526
2527 kfree(wdata);
2528}
2529
2530/*
2531 * Write failed with a retryable error. Resend the write request. It's also
2532 * possible that the page was redirtied so re-clean the page.
2533 */
2534static void
2535cifs_writev_requeue(struct cifs_writedata *wdata)
2536{
2537 int rc = 0;
2538 struct inode *inode = d_inode(wdata->cfile->dentry);
2539 struct TCP_Server_Info *server;
2540 unsigned int rest_len = wdata->bytes;
2541 loff_t fpos = wdata->offset;
2542
2543 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2544 do {
2545 struct cifs_writedata *wdata2;
2546 unsigned int wsize, cur_len;
2547
2548 wsize = server->ops->wp_retry_size(inode);
2549 if (wsize < rest_len) {
2550 if (wsize < PAGE_SIZE) {
2551 rc = -EOPNOTSUPP;
2552 break;
2553 }
2554 cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2555 } else {
2556 cur_len = rest_len;
2557 }
2558
2559 wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2560 if (!wdata2) {
2561 rc = -ENOMEM;
2562 break;
2563 }
2564
2565 wdata2->sync_mode = wdata->sync_mode;
2566 wdata2->offset = fpos;
2567 wdata2->bytes = cur_len;
2568 wdata2->iter = wdata->iter;
2569
2570 iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2571 iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2572
2573 if (iov_iter_is_xarray(&wdata2->iter))
2574 /* Check for pages having been redirtied and clean
2575 * them. We can do this by walking the xarray. If
2576 * it's not an xarray, then it's a DIO and we shouldn't
2577 * be mucking around with the page bits.
2578 */
2579 cifs_undirty_folios(inode, fpos, cur_len);
2580
2581 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2582 &wdata2->cfile);
2583 if (!wdata2->cfile) {
2584 cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2585 rc);
2586 if (!is_retryable_error(rc))
2587 rc = -EBADF;
2588 } else {
2589 wdata2->pid = wdata2->cfile->pid;
2590 rc = server->ops->async_writev(wdata2,
2591 cifs_writedata_release);
2592 }
2593
2594 kref_put(&wdata2->refcount, cifs_writedata_release);
2595 if (rc) {
2596 if (is_retryable_error(rc))
2597 continue;
2598 fpos += cur_len;
2599 rest_len -= cur_len;
2600 break;
2601 }
2602
2603 fpos += cur_len;
2604 rest_len -= cur_len;
2605 } while (rest_len > 0);
2606
2607 /* Clean up remaining pages from the original wdata */
2608 if (iov_iter_is_xarray(&wdata->iter))
2609 cifs_pages_write_failed(inode, fpos, rest_len);
2610
2611 if (rc != 0 && !is_retryable_error(rc))
2612 mapping_set_error(inode->i_mapping, rc);
2613 kref_put(&wdata->refcount, cifs_writedata_release);
2614}
2615
2616void
2617cifs_writev_complete(struct work_struct *work)
2618{
2619 struct cifs_writedata *wdata = container_of(work,
2620 struct cifs_writedata, work);
2621 struct inode *inode = d_inode(wdata->cfile->dentry);
2622
2623 if (wdata->result == 0) {
2624 spin_lock(&inode->i_lock);
2625 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2626 spin_unlock(&inode->i_lock);
2627 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2628 wdata->bytes);
2629 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2630 return cifs_writev_requeue(wdata);
2631
2632 if (wdata->result == -EAGAIN)
2633 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2634 else if (wdata->result < 0)
2635 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2636 else
2637 cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2638
2639 if (wdata->result != -EAGAIN)
2640 mapping_set_error(inode->i_mapping, wdata->result);
2641 kref_put(&wdata->refcount, cifs_writedata_release);
2642}
2643
2644struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2645{
2646 struct cifs_writedata *wdata;
2647
2648 wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2649 if (wdata != NULL) {
2650 kref_init(&wdata->refcount);
2651 INIT_LIST_HEAD(&wdata->list);
2652 init_completion(&wdata->done);
2653 INIT_WORK(&wdata->work, complete);
2654 }
2655 return wdata;
2656}
2657
2658static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2659{
2660 struct address_space *mapping = page->mapping;
2661 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2662 char *write_data;
2663 int rc = -EFAULT;
2664 int bytes_written = 0;
2665 struct inode *inode;
2666 struct cifsFileInfo *open_file;
2667
2668 if (!mapping || !mapping->host)
2669 return -EFAULT;
2670
2671 inode = page->mapping->host;
2672
2673 offset += (loff_t)from;
2674 write_data = kmap(page);
2675 write_data += from;
2676
2677 if ((to > PAGE_SIZE) || (from > to)) {
2678 kunmap(page);
2679 return -EIO;
2680 }
2681
2682 /* racing with truncate? */
2683 if (offset > mapping->host->i_size) {
2684 kunmap(page);
2685 return 0; /* don't care */
2686 }
2687
2688 /* check to make sure that we are not extending the file */
2689 if (mapping->host->i_size - offset < (loff_t)to)
2690 to = (unsigned)(mapping->host->i_size - offset);
2691
2692 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2693 &open_file);
2694 if (!rc) {
2695 bytes_written = cifs_write(open_file, open_file->pid,
2696 write_data, to - from, &offset);
2697 cifsFileInfo_put(open_file);
2698 /* Does mm or vfs already set times? */
2699 simple_inode_init_ts(inode);
2700 if ((bytes_written > 0) && (offset))
2701 rc = 0;
2702 else if (bytes_written < 0)
2703 rc = bytes_written;
2704 else
2705 rc = -EFAULT;
2706 } else {
2707 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2708 if (!is_retryable_error(rc))
2709 rc = -EIO;
2710 }
2711
2712 kunmap(page);
2713 return rc;
2714}
2715
2716/*
2717 * Extend the region to be written back to include subsequent contiguously
2718 * dirty pages if possible, but don't sleep while doing so.
2719 */
2720static void cifs_extend_writeback(struct address_space *mapping,
2721 struct xa_state *xas,
2722 long *_count,
2723 loff_t start,
2724 int max_pages,
2725 loff_t max_len,
2726 size_t *_len)
2727{
2728 struct folio_batch batch;
2729 struct folio *folio;
2730 unsigned int nr_pages;
2731 pgoff_t index = (start + *_len) / PAGE_SIZE;
2732 size_t len;
2733 bool stop = true;
2734 unsigned int i;
2735
2736 folio_batch_init(&batch);
2737
2738 do {
2739 /* Firstly, we gather up a batch of contiguous dirty pages
2740 * under the RCU read lock - but we can't clear the dirty flags
2741 * there if any of those pages are mapped.
2742 */
2743 rcu_read_lock();
2744
2745 xas_for_each(xas, folio, ULONG_MAX) {
2746 stop = true;
2747 if (xas_retry(xas, folio))
2748 continue;
2749 if (xa_is_value(folio))
2750 break;
2751 if (folio->index != index) {
2752 xas_reset(xas);
2753 break;
2754 }
2755
2756 if (!folio_try_get_rcu(folio)) {
2757 xas_reset(xas);
2758 continue;
2759 }
2760 nr_pages = folio_nr_pages(folio);
2761 if (nr_pages > max_pages) {
2762 xas_reset(xas);
2763 break;
2764 }
2765
2766 /* Has the page moved or been split? */
2767 if (unlikely(folio != xas_reload(xas))) {
2768 folio_put(folio);
2769 xas_reset(xas);
2770 break;
2771 }
2772
2773 if (!folio_trylock(folio)) {
2774 folio_put(folio);
2775 xas_reset(xas);
2776 break;
2777 }
2778 if (!folio_test_dirty(folio) ||
2779 folio_test_writeback(folio)) {
2780 folio_unlock(folio);
2781 folio_put(folio);
2782 xas_reset(xas);
2783 break;
2784 }
2785
2786 max_pages -= nr_pages;
2787 len = folio_size(folio);
2788 stop = false;
2789
2790 index += nr_pages;
2791 *_count -= nr_pages;
2792 *_len += len;
2793 if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2794 stop = true;
2795
2796 if (!folio_batch_add(&batch, folio))
2797 break;
2798 if (stop)
2799 break;
2800 }
2801
2802 xas_pause(xas);
2803 rcu_read_unlock();
2804
2805 /* Now, if we obtained any pages, we can shift them to being
2806 * writable and mark them for caching.
2807 */
2808 if (!folio_batch_count(&batch))
2809 break;
2810
2811 for (i = 0; i < folio_batch_count(&batch); i++) {
2812 folio = batch.folios[i];
2813 /* The folio should be locked, dirty and not undergoing
2814 * writeback from the loop above.
2815 */
2816 if (!folio_clear_dirty_for_io(folio))
2817 WARN_ON(1);
2818 folio_start_writeback(folio);
2819 folio_unlock(folio);
2820 }
2821
2822 folio_batch_release(&batch);
2823 cond_resched();
2824 } while (!stop);
2825}
2826
2827/*
2828 * Write back the locked page and any subsequent non-locked dirty pages.
2829 */
2830static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2831 struct writeback_control *wbc,
2832 struct xa_state *xas,
2833 struct folio *folio,
2834 unsigned long long start,
2835 unsigned long long end)
2836{
2837 struct inode *inode = mapping->host;
2838 struct TCP_Server_Info *server;
2839 struct cifs_writedata *wdata;
2840 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2841 struct cifs_credits credits_on_stack;
2842 struct cifs_credits *credits = &credits_on_stack;
2843 struct cifsFileInfo *cfile = NULL;
2844 unsigned long long i_size = i_size_read(inode), max_len;
2845 unsigned int xid, wsize;
2846 size_t len = folio_size(folio);
2847 long count = wbc->nr_to_write;
2848 int rc;
2849
2850 /* The folio should be locked, dirty and not undergoing writeback. */
2851 if (!folio_clear_dirty_for_io(folio))
2852 WARN_ON_ONCE(1);
2853 folio_start_writeback(folio);
2854
2855 count -= folio_nr_pages(folio);
2856
2857 xid = get_xid();
2858 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2859
2860 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2861 if (rc) {
2862 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2863 goto err_xid;
2864 }
2865
2866 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2867 &wsize, credits);
2868 if (rc != 0)
2869 goto err_close;
2870
2871 wdata = cifs_writedata_alloc(cifs_writev_complete);
2872 if (!wdata) {
2873 rc = -ENOMEM;
2874 goto err_uncredit;
2875 }
2876
2877 wdata->sync_mode = wbc->sync_mode;
2878 wdata->offset = folio_pos(folio);
2879 wdata->pid = cfile->pid;
2880 wdata->credits = credits_on_stack;
2881 wdata->cfile = cfile;
2882 wdata->server = server;
2883 cfile = NULL;
2884
2885 /* Find all consecutive lockable dirty pages that have contiguous
2886 * written regions, stopping when we find a page that is not
2887 * immediately lockable, is not dirty or is missing, or we reach the
2888 * end of the range.
2889 */
2890 if (start < i_size) {
2891 /* Trim the write to the EOF; the extra data is ignored. Also
2892 * put an upper limit on the size of a single storedata op.
2893 */
2894 max_len = wsize;
2895 max_len = min_t(unsigned long long, max_len, end - start + 1);
2896 max_len = min_t(unsigned long long, max_len, i_size - start);
2897
2898 if (len < max_len) {
2899 int max_pages = INT_MAX;
2900
2901#ifdef CONFIG_CIFS_SMB_DIRECT
2902 if (server->smbd_conn)
2903 max_pages = server->smbd_conn->max_frmr_depth;
2904#endif
2905 max_pages -= folio_nr_pages(folio);
2906
2907 if (max_pages > 0)
2908 cifs_extend_writeback(mapping, xas, &count, start,
2909 max_pages, max_len, &len);
2910 }
2911 }
2912 len = min_t(unsigned long long, len, i_size - start);
2913
2914 /* We now have a contiguous set of dirty pages, each with writeback
2915 * set; the first page is still locked at this point, but all the rest
2916 * have been unlocked.
2917 */
2918 folio_unlock(folio);
2919 wdata->bytes = len;
2920
2921 if (start < i_size) {
2922 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2923 start, len);
2924
2925 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2926 if (rc)
2927 goto err_wdata;
2928
2929 if (wdata->cfile->invalidHandle)
2930 rc = -EAGAIN;
2931 else
2932 rc = wdata->server->ops->async_writev(wdata,
2933 cifs_writedata_release);
2934 if (rc >= 0) {
2935 kref_put(&wdata->refcount, cifs_writedata_release);
2936 goto err_close;
2937 }
2938 } else {
2939 /* The dirty region was entirely beyond the EOF. */
2940 cifs_pages_written_back(inode, start, len);
2941 rc = 0;
2942 }
2943
2944err_wdata:
2945 kref_put(&wdata->refcount, cifs_writedata_release);
2946err_uncredit:
2947 add_credits_and_wake_if(server, credits, 0);
2948err_close:
2949 if (cfile)
2950 cifsFileInfo_put(cfile);
2951err_xid:
2952 free_xid(xid);
2953 if (rc == 0) {
2954 wbc->nr_to_write = count;
2955 rc = len;
2956 } else if (is_retryable_error(rc)) {
2957 cifs_pages_write_redirty(inode, start, len);
2958 } else {
2959 cifs_pages_write_failed(inode, start, len);
2960 mapping_set_error(mapping, rc);
2961 }
2962 /* Indication to update ctime and mtime as close is deferred */
2963 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2964 return rc;
2965}
2966
2967/*
2968 * write a region of pages back to the server
2969 */
2970static ssize_t cifs_writepages_begin(struct address_space *mapping,
2971 struct writeback_control *wbc,
2972 struct xa_state *xas,
2973 unsigned long long *_start,
2974 unsigned long long end)
2975{
2976 struct folio *folio;
2977 unsigned long long start = *_start;
2978 ssize_t ret;
2979 int skips = 0;
2980
2981search_again:
2982 /* Find the first dirty page. */
2983 rcu_read_lock();
2984
2985 for (;;) {
2986 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2987 if (xas_retry(xas, folio) || xa_is_value(folio))
2988 continue;
2989 if (!folio)
2990 break;
2991
2992 if (!folio_try_get_rcu(folio)) {
2993 xas_reset(xas);
2994 continue;
2995 }
2996
2997 if (unlikely(folio != xas_reload(xas))) {
2998 folio_put(folio);
2999 xas_reset(xas);
3000 continue;
3001 }
3002
3003 xas_pause(xas);
3004 break;
3005 }
3006 rcu_read_unlock();
3007 if (!folio)
3008 return 0;
3009
3010 start = folio_pos(folio); /* May regress with THPs */
3011
3012 /* At this point we hold neither the i_pages lock nor the page lock:
3013 * the page may be truncated or invalidated (changing page->mapping to
3014 * NULL), or even swizzled back from swapper_space to tmpfs file
3015 * mapping
3016 */
3017lock_again:
3018 if (wbc->sync_mode != WB_SYNC_NONE) {
3019 ret = folio_lock_killable(folio);
3020 if (ret < 0)
3021 return ret;
3022 } else {
3023 if (!folio_trylock(folio))
3024 goto search_again;
3025 }
3026
3027 if (folio->mapping != mapping ||
3028 !folio_test_dirty(folio)) {
3029 start += folio_size(folio);
3030 folio_unlock(folio);
3031 goto search_again;
3032 }
3033
3034 if (folio_test_writeback(folio) ||
3035 folio_test_fscache(folio)) {
3036 folio_unlock(folio);
3037 if (wbc->sync_mode != WB_SYNC_NONE) {
3038 folio_wait_writeback(folio);
3039#ifdef CONFIG_CIFS_FSCACHE
3040 folio_wait_fscache(folio);
3041#endif
3042 goto lock_again;
3043 }
3044
3045 start += folio_size(folio);
3046 if (wbc->sync_mode == WB_SYNC_NONE) {
3047 if (skips >= 5 || need_resched()) {
3048 ret = 0;
3049 goto out;
3050 }
3051 skips++;
3052 }
3053 goto search_again;
3054 }
3055
3056 ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
3057out:
3058 if (ret > 0)
3059 *_start = start + ret;
3060 return ret;
3061}
3062
3063/*
3064 * Write a region of pages back to the server
3065 */
3066static int cifs_writepages_region(struct address_space *mapping,
3067 struct writeback_control *wbc,
3068 unsigned long long *_start,
3069 unsigned long long end)
3070{
3071 ssize_t ret;
3072
3073 XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
3074
3075 do {
3076 ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
3077 if (ret > 0 && wbc->nr_to_write > 0)
3078 cond_resched();
3079 } while (ret > 0 && wbc->nr_to_write > 0);
3080
3081 return ret > 0 ? 0 : ret;
3082}
3083
3084/*
3085 * Write some of the pending data back to the server
3086 */
3087static int cifs_writepages(struct address_space *mapping,
3088 struct writeback_control *wbc)
3089{
3090 loff_t start, end;
3091 int ret;
3092
3093 /* We have to be careful as we can end up racing with setattr()
3094 * truncating the pagecache since the caller doesn't take a lock here
3095 * to prevent it.
3096 */
3097
3098 if (wbc->range_cyclic && mapping->writeback_index) {
3099 start = mapping->writeback_index * PAGE_SIZE;
3100 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3101 if (ret < 0)
3102 goto out;
3103
3104 if (wbc->nr_to_write <= 0) {
3105 mapping->writeback_index = start / PAGE_SIZE;
3106 goto out;
3107 }
3108
3109 start = 0;
3110 end = mapping->writeback_index * PAGE_SIZE;
3111 mapping->writeback_index = 0;
3112 ret = cifs_writepages_region(mapping, wbc, &start, end);
3113 if (ret == 0)
3114 mapping->writeback_index = start / PAGE_SIZE;
3115 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3116 start = 0;
3117 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3118 if (wbc->nr_to_write > 0 && ret == 0)
3119 mapping->writeback_index = start / PAGE_SIZE;
3120 } else {
3121 start = wbc->range_start;
3122 ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3123 }
3124
3125out:
3126 return ret;
3127}
3128
3129static int
3130cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3131{
3132 int rc;
3133 unsigned int xid;
3134
3135 xid = get_xid();
3136/* BB add check for wbc flags */
3137 get_page(page);
3138 if (!PageUptodate(page))
3139 cifs_dbg(FYI, "ppw - page not up to date\n");
3140
3141 /*
3142 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3143 *
3144 * A writepage() implementation always needs to do either this,
3145 * or re-dirty the page with "redirty_page_for_writepage()" in
3146 * the case of a failure.
3147 *
3148 * Just unlocking the page will cause the radix tree tag-bits
3149 * to fail to update with the state of the page correctly.
3150 */
3151 set_page_writeback(page);
3152retry_write:
3153 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3154 if (is_retryable_error(rc)) {
3155 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3156 goto retry_write;
3157 redirty_page_for_writepage(wbc, page);
3158 } else if (rc != 0) {
3159 SetPageError(page);
3160 mapping_set_error(page->mapping, rc);
3161 } else {
3162 SetPageUptodate(page);
3163 }
3164 end_page_writeback(page);
3165 put_page(page);
3166 free_xid(xid);
3167 return rc;
3168}
3169
3170static int cifs_write_end(struct file *file, struct address_space *mapping,
3171 loff_t pos, unsigned len, unsigned copied,
3172 struct page *page, void *fsdata)
3173{
3174 int rc;
3175 struct inode *inode = mapping->host;
3176 struct cifsFileInfo *cfile = file->private_data;
3177 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3178 struct folio *folio = page_folio(page);
3179 __u32 pid;
3180
3181 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3182 pid = cfile->pid;
3183 else
3184 pid = current->tgid;
3185
3186 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3187 page, pos, copied);
3188
3189 if (folio_test_checked(folio)) {
3190 if (copied == len)
3191 folio_mark_uptodate(folio);
3192 folio_clear_checked(folio);
3193 } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3194 folio_mark_uptodate(folio);
3195
3196 if (!folio_test_uptodate(folio)) {
3197 char *page_data;
3198 unsigned offset = pos & (PAGE_SIZE - 1);
3199 unsigned int xid;
3200
3201 xid = get_xid();
3202 /* this is probably better than directly calling
3203 partialpage_write since in this function the file handle is
3204 known which we might as well leverage */
3205 /* BB check if anything else missing out of ppw
3206 such as updating last write time */
3207 page_data = kmap(page);
3208 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3209 /* if (rc < 0) should we set writebehind rc? */
3210 kunmap(page);
3211
3212 free_xid(xid);
3213 } else {
3214 rc = copied;
3215 pos += copied;
3216 set_page_dirty(page);
3217 }
3218
3219 if (rc > 0) {
3220 spin_lock(&inode->i_lock);
3221 if (pos > inode->i_size) {
3222 loff_t additional_blocks = (512 - 1 + copied) >> 9;
3223
3224 i_size_write(inode, pos);
3225 /*
3226 * Estimate new allocation size based on the amount written.
3227 * This will be updated from server on close (and on queryinfo)
3228 */
3229 inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9,
3230 inode->i_blocks + additional_blocks);
3231 }
3232 spin_unlock(&inode->i_lock);
3233 }
3234
3235 unlock_page(page);
3236 put_page(page);
3237 /* Indication to update ctime and mtime as close is deferred */
3238 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3239
3240 return rc;
3241}
3242
3243int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3244 int datasync)
3245{
3246 unsigned int xid;
3247 int rc = 0;
3248 struct cifs_tcon *tcon;
3249 struct TCP_Server_Info *server;
3250 struct cifsFileInfo *smbfile = file->private_data;
3251 struct inode *inode = file_inode(file);
3252 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3253
3254 rc = file_write_and_wait_range(file, start, end);
3255 if (rc) {
3256 trace_cifs_fsync_err(inode->i_ino, rc);
3257 return rc;
3258 }
3259
3260 xid = get_xid();
3261
3262 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3263 file, datasync);
3264
3265 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3266 rc = cifs_zap_mapping(inode);
3267 if (rc) {
3268 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3269 rc = 0; /* don't care about it in fsync */
3270 }
3271 }
3272
3273 tcon = tlink_tcon(smbfile->tlink);
3274 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3275 server = tcon->ses->server;
3276 if (server->ops->flush == NULL) {
3277 rc = -ENOSYS;
3278 goto strict_fsync_exit;
3279 }
3280
3281 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3282 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3283 if (smbfile) {
3284 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3285 cifsFileInfo_put(smbfile);
3286 } else
3287 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3288 } else
3289 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3290 }
3291
3292strict_fsync_exit:
3293 free_xid(xid);
3294 return rc;
3295}
3296
3297int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3298{
3299 unsigned int xid;
3300 int rc = 0;
3301 struct cifs_tcon *tcon;
3302 struct TCP_Server_Info *server;
3303 struct cifsFileInfo *smbfile = file->private_data;
3304 struct inode *inode = file_inode(file);
3305 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3306
3307 rc = file_write_and_wait_range(file, start, end);
3308 if (rc) {
3309 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3310 return rc;
3311 }
3312
3313 xid = get_xid();
3314
3315 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3316 file, datasync);
3317
3318 tcon = tlink_tcon(smbfile->tlink);
3319 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3320 server = tcon->ses->server;
3321 if (server->ops->flush == NULL) {
3322 rc = -ENOSYS;
3323 goto fsync_exit;
3324 }
3325
3326 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3327 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3328 if (smbfile) {
3329 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3330 cifsFileInfo_put(smbfile);
3331 } else
3332 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3333 } else
3334 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3335 }
3336
3337fsync_exit:
3338 free_xid(xid);
3339 return rc;
3340}
3341
3342/*
3343 * As file closes, flush all cached write data for this inode checking
3344 * for write behind errors.
3345 */
3346int cifs_flush(struct file *file, fl_owner_t id)
3347{
3348 struct inode *inode = file_inode(file);
3349 int rc = 0;
3350
3351 if (file->f_mode & FMODE_WRITE)
3352 rc = filemap_write_and_wait(inode->i_mapping);
3353
3354 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3355 if (rc) {
3356 /* get more nuanced writeback errors */
3357 rc = filemap_check_wb_err(file->f_mapping, 0);
3358 trace_cifs_flush_err(inode->i_ino, rc);
3359 }
3360 return rc;
3361}
3362
3363static void
3364cifs_uncached_writedata_release(struct kref *refcount)
3365{
3366 struct cifs_writedata *wdata = container_of(refcount,
3367 struct cifs_writedata, refcount);
3368
3369 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3370 cifs_writedata_release(refcount);
3371}
3372
3373static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3374
3375static void
3376cifs_uncached_writev_complete(struct work_struct *work)
3377{
3378 struct cifs_writedata *wdata = container_of(work,
3379 struct cifs_writedata, work);
3380 struct inode *inode = d_inode(wdata->cfile->dentry);
3381 struct cifsInodeInfo *cifsi = CIFS_I(inode);
3382
3383 spin_lock(&inode->i_lock);
3384 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3385 if (cifsi->netfs.remote_i_size > inode->i_size)
3386 i_size_write(inode, cifsi->netfs.remote_i_size);
3387 spin_unlock(&inode->i_lock);
3388
3389 complete(&wdata->done);
3390 collect_uncached_write_data(wdata->ctx);
3391 /* the below call can possibly free the last ref to aio ctx */
3392 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3393}
3394
3395static int
3396cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3397 struct cifs_aio_ctx *ctx)
3398{
3399 unsigned int wsize;
3400 struct cifs_credits credits;
3401 int rc;
3402 struct TCP_Server_Info *server = wdata->server;
3403
3404 do {
3405 if (wdata->cfile->invalidHandle) {
3406 rc = cifs_reopen_file(wdata->cfile, false);
3407 if (rc == -EAGAIN)
3408 continue;
3409 else if (rc)
3410 break;
3411 }
3412
3413
3414 /*
3415 * Wait for credits to resend this wdata.
3416 * Note: we are attempting to resend the whole wdata not in
3417 * segments
3418 */
3419 do {
3420 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3421 &wsize, &credits);
3422 if (rc)
3423 goto fail;
3424
3425 if (wsize < wdata->bytes) {
3426 add_credits_and_wake_if(server, &credits, 0);
3427 msleep(1000);
3428 }
3429 } while (wsize < wdata->bytes);
3430 wdata->credits = credits;
3431
3432 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3433
3434 if (!rc) {
3435 if (wdata->cfile->invalidHandle)
3436 rc = -EAGAIN;
3437 else {
3438 wdata->replay = true;
3439#ifdef CONFIG_CIFS_SMB_DIRECT
3440 if (wdata->mr) {
3441 wdata->mr->need_invalidate = true;
3442 smbd_deregister_mr(wdata->mr);
3443 wdata->mr = NULL;
3444 }
3445#endif
3446 rc = server->ops->async_writev(wdata,
3447 cifs_uncached_writedata_release);
3448 }
3449 }
3450
3451 /* If the write was successfully sent, we are done */
3452 if (!rc) {
3453 list_add_tail(&wdata->list, wdata_list);
3454 return 0;
3455 }
3456
3457 /* Roll back credits and retry if needed */
3458 add_credits_and_wake_if(server, &wdata->credits, 0);
3459 } while (rc == -EAGAIN);
3460
3461fail:
3462 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3463 return rc;
3464}
3465
3466/*
3467 * Select span of a bvec iterator we're going to use. Limit it by both maximum
3468 * size and maximum number of segments.
3469 */
3470static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3471 size_t max_segs, unsigned int *_nsegs)
3472{
3473 const struct bio_vec *bvecs = iter->bvec;
3474 unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3475 size_t len, span = 0, n = iter->count;
3476 size_t skip = iter->iov_offset;
3477
3478 if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3479 return 0;
3480
3481 while (n && ix < nbv && skip) {
3482 len = bvecs[ix].bv_len;
3483 if (skip < len)
3484 break;
3485 skip -= len;
3486 n -= len;
3487 ix++;
3488 }
3489
3490 while (n && ix < nbv) {
3491 len = min3(n, bvecs[ix].bv_len - skip, max_size);
3492 span += len;
3493 max_size -= len;
3494 nsegs++;
3495 ix++;
3496 if (max_size == 0 || nsegs >= max_segs)
3497 break;
3498 skip = 0;
3499 n -= len;
3500 }
3501
3502 *_nsegs = nsegs;
3503 return span;
3504}
3505
3506static int
3507cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3508 struct cifsFileInfo *open_file,
3509 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3510 struct cifs_aio_ctx *ctx)
3511{
3512 int rc = 0;
3513 size_t cur_len, max_len;
3514 struct cifs_writedata *wdata;
3515 pid_t pid;
3516 struct TCP_Server_Info *server;
3517 unsigned int xid, max_segs = INT_MAX;
3518
3519 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3520 pid = open_file->pid;
3521 else
3522 pid = current->tgid;
3523
3524 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3525 xid = get_xid();
3526
3527#ifdef CONFIG_CIFS_SMB_DIRECT
3528 if (server->smbd_conn)
3529 max_segs = server->smbd_conn->max_frmr_depth;
3530#endif
3531
3532 do {
3533 struct cifs_credits credits_on_stack;
3534 struct cifs_credits *credits = &credits_on_stack;
3535 unsigned int wsize, nsegs = 0;
3536
3537 if (signal_pending(current)) {
3538 rc = -EINTR;
3539 break;
3540 }
3541
3542 if (open_file->invalidHandle) {
3543 rc = cifs_reopen_file(open_file, false);
3544 if (rc == -EAGAIN)
3545 continue;
3546 else if (rc)
3547 break;
3548 }
3549
3550 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3551 &wsize, credits);
3552 if (rc)
3553 break;
3554
3555 max_len = min_t(const size_t, len, wsize);
3556 if (!max_len) {
3557 rc = -EAGAIN;
3558 add_credits_and_wake_if(server, credits, 0);
3559 break;
3560 }
3561
3562 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3563 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3564 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3565 if (cur_len == 0) {
3566 rc = -EIO;
3567 add_credits_and_wake_if(server, credits, 0);
3568 break;
3569 }
3570
3571 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3572 if (!wdata) {
3573 rc = -ENOMEM;
3574 add_credits_and_wake_if(server, credits, 0);
3575 break;
3576 }
3577
3578 wdata->sync_mode = WB_SYNC_ALL;
3579 wdata->offset = (__u64)fpos;
3580 wdata->cfile = cifsFileInfo_get(open_file);
3581 wdata->server = server;
3582 wdata->pid = pid;
3583 wdata->bytes = cur_len;
3584 wdata->credits = credits_on_stack;
3585 wdata->iter = *from;
3586 wdata->ctx = ctx;
3587 kref_get(&ctx->refcount);
3588
3589 iov_iter_truncate(&wdata->iter, cur_len);
3590
3591 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3592
3593 if (!rc) {
3594 if (wdata->cfile->invalidHandle)
3595 rc = -EAGAIN;
3596 else
3597 rc = server->ops->async_writev(wdata,
3598 cifs_uncached_writedata_release);
3599 }
3600
3601 if (rc) {
3602 add_credits_and_wake_if(server, &wdata->credits, 0);
3603 kref_put(&wdata->refcount,
3604 cifs_uncached_writedata_release);
3605 if (rc == -EAGAIN)
3606 continue;
3607 break;
3608 }
3609
3610 list_add_tail(&wdata->list, wdata_list);
3611 iov_iter_advance(from, cur_len);
3612 fpos += cur_len;
3613 len -= cur_len;
3614 } while (len > 0);
3615
3616 free_xid(xid);
3617 return rc;
3618}
3619
3620static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3621{
3622 struct cifs_writedata *wdata, *tmp;
3623 struct cifs_tcon *tcon;
3624 struct cifs_sb_info *cifs_sb;
3625 struct dentry *dentry = ctx->cfile->dentry;
3626 ssize_t rc;
3627
3628 tcon = tlink_tcon(ctx->cfile->tlink);
3629 cifs_sb = CIFS_SB(dentry->d_sb);
3630
3631 mutex_lock(&ctx->aio_mutex);
3632
3633 if (list_empty(&ctx->list)) {
3634 mutex_unlock(&ctx->aio_mutex);
3635 return;
3636 }
3637
3638 rc = ctx->rc;
3639 /*
3640 * Wait for and collect replies for any successful sends in order of
3641 * increasing offset. Once an error is hit, then return without waiting
3642 * for any more replies.
3643 */
3644restart_loop:
3645 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3646 if (!rc) {
3647 if (!try_wait_for_completion(&wdata->done)) {
3648 mutex_unlock(&ctx->aio_mutex);
3649 return;
3650 }
3651
3652 if (wdata->result)
3653 rc = wdata->result;
3654 else
3655 ctx->total_len += wdata->bytes;
3656
3657 /* resend call if it's a retryable error */
3658 if (rc == -EAGAIN) {
3659 struct list_head tmp_list;
3660 struct iov_iter tmp_from = ctx->iter;
3661
3662 INIT_LIST_HEAD(&tmp_list);
3663 list_del_init(&wdata->list);
3664
3665 if (ctx->direct_io)
3666 rc = cifs_resend_wdata(
3667 wdata, &tmp_list, ctx);
3668 else {
3669 iov_iter_advance(&tmp_from,
3670 wdata->offset - ctx->pos);
3671
3672 rc = cifs_write_from_iter(wdata->offset,
3673 wdata->bytes, &tmp_from,
3674 ctx->cfile, cifs_sb, &tmp_list,
3675 ctx);
3676
3677 kref_put(&wdata->refcount,
3678 cifs_uncached_writedata_release);
3679 }
3680
3681 list_splice(&tmp_list, &ctx->list);
3682 goto restart_loop;
3683 }
3684 }
3685 list_del_init(&wdata->list);
3686 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3687 }
3688
3689 cifs_stats_bytes_written(tcon, ctx->total_len);
3690 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3691
3692 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3693
3694 mutex_unlock(&ctx->aio_mutex);
3695
3696 if (ctx->iocb && ctx->iocb->ki_complete)
3697 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3698 else
3699 complete(&ctx->done);
3700}
3701
3702static ssize_t __cifs_writev(
3703 struct kiocb *iocb, struct iov_iter *from, bool direct)
3704{
3705 struct file *file = iocb->ki_filp;
3706 ssize_t total_written = 0;
3707 struct cifsFileInfo *cfile;
3708 struct cifs_tcon *tcon;
3709 struct cifs_sb_info *cifs_sb;
3710 struct cifs_aio_ctx *ctx;
3711 int rc;
3712
3713 rc = generic_write_checks(iocb, from);
3714 if (rc <= 0)
3715 return rc;
3716
3717 cifs_sb = CIFS_FILE_SB(file);
3718 cfile = file->private_data;
3719 tcon = tlink_tcon(cfile->tlink);
3720
3721 if (!tcon->ses->server->ops->async_writev)
3722 return -ENOSYS;
3723
3724 ctx = cifs_aio_ctx_alloc();
3725 if (!ctx)
3726 return -ENOMEM;
3727
3728 ctx->cfile = cifsFileInfo_get(cfile);
3729
3730 if (!is_sync_kiocb(iocb))
3731 ctx->iocb = iocb;
3732
3733 ctx->pos = iocb->ki_pos;
3734 ctx->direct_io = direct;
3735 ctx->nr_pinned_pages = 0;
3736
3737 if (user_backed_iter(from)) {
3738 /*
3739 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3740 * they contain references to the calling process's virtual
3741 * memory layout which won't be available in an async worker
3742 * thread. This also takes a pin on every folio involved.
3743 */
3744 rc = netfs_extract_user_iter(from, iov_iter_count(from),
3745 &ctx->iter, 0);
3746 if (rc < 0) {
3747 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3748 return rc;
3749 }
3750
3751 ctx->nr_pinned_pages = rc;
3752 ctx->bv = (void *)ctx->iter.bvec;
3753 ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3754 } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3755 !is_sync_kiocb(iocb)) {
3756 /*
3757 * If the op is asynchronous, we need to copy the list attached
3758 * to a BVEC/KVEC-type iterator, but we assume that the storage
3759 * will be pinned by the caller; in any case, we may or may not
3760 * be able to pin the pages, so we don't try.
3761 */
3762 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3763 if (!ctx->bv) {
3764 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3765 return -ENOMEM;
3766 }
3767 } else {
3768 /*
3769 * Otherwise, we just pass the iterator down as-is and rely on
3770 * the caller to make sure the pages referred to by the
3771 * iterator don't evaporate.
3772 */
3773 ctx->iter = *from;
3774 }
3775
3776 ctx->len = iov_iter_count(&ctx->iter);
3777
3778 /* grab a lock here due to read response handlers can access ctx */
3779 mutex_lock(&ctx->aio_mutex);
3780
3781 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3782 cfile, cifs_sb, &ctx->list, ctx);
3783
3784 /*
3785 * If at least one write was successfully sent, then discard any rc
3786 * value from the later writes. If the other write succeeds, then
3787 * we'll end up returning whatever was written. If it fails, then
3788 * we'll get a new rc value from that.
3789 */
3790 if (!list_empty(&ctx->list))
3791 rc = 0;
3792
3793 mutex_unlock(&ctx->aio_mutex);
3794
3795 if (rc) {
3796 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3797 return rc;
3798 }
3799
3800 if (!is_sync_kiocb(iocb)) {
3801 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3802 return -EIOCBQUEUED;
3803 }
3804
3805 rc = wait_for_completion_killable(&ctx->done);
3806 if (rc) {
3807 mutex_lock(&ctx->aio_mutex);
3808 ctx->rc = rc = -EINTR;
3809 total_written = ctx->total_len;
3810 mutex_unlock(&ctx->aio_mutex);
3811 } else {
3812 rc = ctx->rc;
3813 total_written = ctx->total_len;
3814 }
3815
3816 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3817
3818 if (unlikely(!total_written))
3819 return rc;
3820
3821 iocb->ki_pos += total_written;
3822 return total_written;
3823}
3824
3825ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3826{
3827 struct file *file = iocb->ki_filp;
3828
3829 cifs_revalidate_mapping(file->f_inode);
3830 return __cifs_writev(iocb, from, true);
3831}
3832
3833ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3834{
3835 return __cifs_writev(iocb, from, false);
3836}
3837
3838static ssize_t
3839cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3840{
3841 struct file *file = iocb->ki_filp;
3842 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3843 struct inode *inode = file->f_mapping->host;
3844 struct cifsInodeInfo *cinode = CIFS_I(inode);
3845 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3846 ssize_t rc;
3847
3848 inode_lock(inode);
3849 /*
3850 * We need to hold the sem to be sure nobody modifies lock list
3851 * with a brlock that prevents writing.
3852 */
3853 down_read(&cinode->lock_sem);
3854
3855 rc = generic_write_checks(iocb, from);
3856 if (rc <= 0)
3857 goto out;
3858
3859 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3860 server->vals->exclusive_lock_type, 0,
3861 NULL, CIFS_WRITE_OP))
3862 rc = __generic_file_write_iter(iocb, from);
3863 else
3864 rc = -EACCES;
3865out:
3866 up_read(&cinode->lock_sem);
3867 inode_unlock(inode);
3868
3869 if (rc > 0)
3870 rc = generic_write_sync(iocb, rc);
3871 return rc;
3872}
3873
3874ssize_t
3875cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3876{
3877 struct inode *inode = file_inode(iocb->ki_filp);
3878 struct cifsInodeInfo *cinode = CIFS_I(inode);
3879 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3880 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3881 iocb->ki_filp->private_data;
3882 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3883 ssize_t written;
3884
3885 written = cifs_get_writer(cinode);
3886 if (written)
3887 return written;
3888
3889 if (CIFS_CACHE_WRITE(cinode)) {
3890 if (cap_unix(tcon->ses) &&
3891 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3892 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3893 written = generic_file_write_iter(iocb, from);
3894 goto out;
3895 }
3896 written = cifs_writev(iocb, from);
3897 goto out;
3898 }
3899 /*
3900 * For non-oplocked files in strict cache mode we need to write the data
3901 * to the server exactly from the pos to pos+len-1 rather than flush all
3902 * affected pages because it may cause a error with mandatory locks on
3903 * these pages but not on the region from pos to ppos+len-1.
3904 */
3905 written = cifs_user_writev(iocb, from);
3906 if (CIFS_CACHE_READ(cinode)) {
3907 /*
3908 * We have read level caching and we have just sent a write
3909 * request to the server thus making data in the cache stale.
3910 * Zap the cache and set oplock/lease level to NONE to avoid
3911 * reading stale data from the cache. All subsequent read
3912 * operations will read new data from the server.
3913 */
3914 cifs_zap_mapping(inode);
3915 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3916 inode);
3917 cinode->oplock = 0;
3918 }
3919out:
3920 cifs_put_writer(cinode);
3921 return written;
3922}
3923
3924static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3925{
3926 struct cifs_readdata *rdata;
3927
3928 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3929 if (rdata) {
3930 kref_init(&rdata->refcount);
3931 INIT_LIST_HEAD(&rdata->list);
3932 init_completion(&rdata->done);
3933 INIT_WORK(&rdata->work, complete);
3934 }
3935
3936 return rdata;
3937}
3938
3939void
3940cifs_readdata_release(struct kref *refcount)
3941{
3942 struct cifs_readdata *rdata = container_of(refcount,
3943 struct cifs_readdata, refcount);
3944
3945 if (rdata->ctx)
3946 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3947#ifdef CONFIG_CIFS_SMB_DIRECT
3948 if (rdata->mr) {
3949 smbd_deregister_mr(rdata->mr);
3950 rdata->mr = NULL;
3951 }
3952#endif
3953 if (rdata->cfile)
3954 cifsFileInfo_put(rdata->cfile);
3955
3956 kfree(rdata);
3957}
3958
3959static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3960
3961static void
3962cifs_uncached_readv_complete(struct work_struct *work)
3963{
3964 struct cifs_readdata *rdata = container_of(work,
3965 struct cifs_readdata, work);
3966
3967 complete(&rdata->done);
3968 collect_uncached_read_data(rdata->ctx);
3969 /* the below call can possibly free the last ref to aio ctx */
3970 kref_put(&rdata->refcount, cifs_readdata_release);
3971}
3972
3973static int cifs_resend_rdata(struct cifs_readdata *rdata,
3974 struct list_head *rdata_list,
3975 struct cifs_aio_ctx *ctx)
3976{
3977 unsigned int rsize;
3978 struct cifs_credits credits;
3979 int rc;
3980 struct TCP_Server_Info *server;
3981
3982 /* XXX: should we pick a new channel here? */
3983 server = rdata->server;
3984
3985 do {
3986 if (rdata->cfile->invalidHandle) {
3987 rc = cifs_reopen_file(rdata->cfile, true);
3988 if (rc == -EAGAIN)
3989 continue;
3990 else if (rc)
3991 break;
3992 }
3993
3994 /*
3995 * Wait for credits to resend this rdata.
3996 * Note: we are attempting to resend the whole rdata not in
3997 * segments
3998 */
3999 do {
4000 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
4001 &rsize, &credits);
4002
4003 if (rc)
4004 goto fail;
4005
4006 if (rsize < rdata->bytes) {
4007 add_credits_and_wake_if(server, &credits, 0);
4008 msleep(1000);
4009 }
4010 } while (rsize < rdata->bytes);
4011 rdata->credits = credits;
4012
4013 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4014 if (!rc) {
4015 if (rdata->cfile->invalidHandle)
4016 rc = -EAGAIN;
4017 else {
4018#ifdef CONFIG_CIFS_SMB_DIRECT
4019 if (rdata->mr) {
4020 rdata->mr->need_invalidate = true;
4021 smbd_deregister_mr(rdata->mr);
4022 rdata->mr = NULL;
4023 }
4024#endif
4025 rc = server->ops->async_readv(rdata);
4026 }
4027 }
4028
4029 /* If the read was successfully sent, we are done */
4030 if (!rc) {
4031 /* Add to aio pending list */
4032 list_add_tail(&rdata->list, rdata_list);
4033 return 0;
4034 }
4035
4036 /* Roll back credits and retry if needed */
4037 add_credits_and_wake_if(server, &rdata->credits, 0);
4038 } while (rc == -EAGAIN);
4039
4040fail:
4041 kref_put(&rdata->refcount, cifs_readdata_release);
4042 return rc;
4043}
4044
4045static int
4046cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
4047 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
4048 struct cifs_aio_ctx *ctx)
4049{
4050 struct cifs_readdata *rdata;
4051 unsigned int rsize, nsegs, max_segs = INT_MAX;
4052 struct cifs_credits credits_on_stack;
4053 struct cifs_credits *credits = &credits_on_stack;
4054 size_t cur_len, max_len;
4055 int rc;
4056 pid_t pid;
4057 struct TCP_Server_Info *server;
4058
4059 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4060
4061#ifdef CONFIG_CIFS_SMB_DIRECT
4062 if (server->smbd_conn)
4063 max_segs = server->smbd_conn->max_frmr_depth;
4064#endif
4065
4066 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4067 pid = open_file->pid;
4068 else
4069 pid = current->tgid;
4070
4071 do {
4072 if (open_file->invalidHandle) {
4073 rc = cifs_reopen_file(open_file, true);
4074 if (rc == -EAGAIN)
4075 continue;
4076 else if (rc)
4077 break;
4078 }
4079
4080 if (cifs_sb->ctx->rsize == 0)
4081 cifs_sb->ctx->rsize =
4082 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4083 cifs_sb->ctx);
4084
4085 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4086 &rsize, credits);
4087 if (rc)
4088 break;
4089
4090 max_len = min_t(size_t, len, rsize);
4091
4092 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
4093 max_segs, &nsegs);
4094 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
4095 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
4096 if (cur_len == 0) {
4097 rc = -EIO;
4098 add_credits_and_wake_if(server, credits, 0);
4099 break;
4100 }
4101
4102 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
4103 if (!rdata) {
4104 add_credits_and_wake_if(server, credits, 0);
4105 rc = -ENOMEM;
4106 break;
4107 }
4108
4109 rdata->server = server;
4110 rdata->cfile = cifsFileInfo_get(open_file);
4111 rdata->offset = fpos;
4112 rdata->bytes = cur_len;
4113 rdata->pid = pid;
4114 rdata->credits = credits_on_stack;
4115 rdata->ctx = ctx;
4116 kref_get(&ctx->refcount);
4117
4118 rdata->iter = ctx->iter;
4119 iov_iter_truncate(&rdata->iter, cur_len);
4120
4121 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4122
4123 if (!rc) {
4124 if (rdata->cfile->invalidHandle)
4125 rc = -EAGAIN;
4126 else
4127 rc = server->ops->async_readv(rdata);
4128 }
4129
4130 if (rc) {
4131 add_credits_and_wake_if(server, &rdata->credits, 0);
4132 kref_put(&rdata->refcount, cifs_readdata_release);
4133 if (rc == -EAGAIN)
4134 continue;
4135 break;
4136 }
4137
4138 list_add_tail(&rdata->list, rdata_list);
4139 iov_iter_advance(&ctx->iter, cur_len);
4140 fpos += cur_len;
4141 len -= cur_len;
4142 } while (len > 0);
4143
4144 return rc;
4145}
4146
4147static void
4148collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4149{
4150 struct cifs_readdata *rdata, *tmp;
4151 struct cifs_sb_info *cifs_sb;
4152 int rc;
4153
4154 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4155
4156 mutex_lock(&ctx->aio_mutex);
4157
4158 if (list_empty(&ctx->list)) {
4159 mutex_unlock(&ctx->aio_mutex);
4160 return;
4161 }
4162
4163 rc = ctx->rc;
4164 /* the loop below should proceed in the order of increasing offsets */
4165again:
4166 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4167 if (!rc) {
4168 if (!try_wait_for_completion(&rdata->done)) {
4169 mutex_unlock(&ctx->aio_mutex);
4170 return;
4171 }
4172
4173 if (rdata->result == -EAGAIN) {
4174 /* resend call if it's a retryable error */
4175 struct list_head tmp_list;
4176 unsigned int got_bytes = rdata->got_bytes;
4177
4178 list_del_init(&rdata->list);
4179 INIT_LIST_HEAD(&tmp_list);
4180
4181 if (ctx->direct_io) {
4182 /*
4183 * Re-use rdata as this is a
4184 * direct I/O
4185 */
4186 rc = cifs_resend_rdata(
4187 rdata,
4188 &tmp_list, ctx);
4189 } else {
4190 rc = cifs_send_async_read(
4191 rdata->offset + got_bytes,
4192 rdata->bytes - got_bytes,
4193 rdata->cfile, cifs_sb,
4194 &tmp_list, ctx);
4195
4196 kref_put(&rdata->refcount,
4197 cifs_readdata_release);
4198 }
4199
4200 list_splice(&tmp_list, &ctx->list);
4201
4202 goto again;
4203 } else if (rdata->result)
4204 rc = rdata->result;
4205
4206 /* if there was a short read -- discard anything left */
4207 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4208 rc = -ENODATA;
4209
4210 ctx->total_len += rdata->got_bytes;
4211 }
4212 list_del_init(&rdata->list);
4213 kref_put(&rdata->refcount, cifs_readdata_release);
4214 }
4215
4216 /* mask nodata case */
4217 if (rc == -ENODATA)
4218 rc = 0;
4219
4220 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4221
4222 mutex_unlock(&ctx->aio_mutex);
4223
4224 if (ctx->iocb && ctx->iocb->ki_complete)
4225 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4226 else
4227 complete(&ctx->done);
4228}
4229
4230static ssize_t __cifs_readv(
4231 struct kiocb *iocb, struct iov_iter *to, bool direct)
4232{
4233 size_t len;
4234 struct file *file = iocb->ki_filp;
4235 struct cifs_sb_info *cifs_sb;
4236 struct cifsFileInfo *cfile;
4237 struct cifs_tcon *tcon;
4238 ssize_t rc, total_read = 0;
4239 loff_t offset = iocb->ki_pos;
4240 struct cifs_aio_ctx *ctx;
4241
4242 len = iov_iter_count(to);
4243 if (!len)
4244 return 0;
4245
4246 cifs_sb = CIFS_FILE_SB(file);
4247 cfile = file->private_data;
4248 tcon = tlink_tcon(cfile->tlink);
4249
4250 if (!tcon->ses->server->ops->async_readv)
4251 return -ENOSYS;
4252
4253 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4254 cifs_dbg(FYI, "attempting read on write only file instance\n");
4255
4256 ctx = cifs_aio_ctx_alloc();
4257 if (!ctx)
4258 return -ENOMEM;
4259
4260 ctx->pos = offset;
4261 ctx->direct_io = direct;
4262 ctx->len = len;
4263 ctx->cfile = cifsFileInfo_get(cfile);
4264 ctx->nr_pinned_pages = 0;
4265
4266 if (!is_sync_kiocb(iocb))
4267 ctx->iocb = iocb;
4268
4269 if (user_backed_iter(to)) {
4270 /*
4271 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4272 * they contain references to the calling process's virtual
4273 * memory layout which won't be available in an async worker
4274 * thread. This also takes a pin on every folio involved.
4275 */
4276 rc = netfs_extract_user_iter(to, iov_iter_count(to),
4277 &ctx->iter, 0);
4278 if (rc < 0) {
4279 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4280 return rc;
4281 }
4282
4283 ctx->nr_pinned_pages = rc;
4284 ctx->bv = (void *)ctx->iter.bvec;
4285 ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4286 ctx->should_dirty = true;
4287 } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4288 !is_sync_kiocb(iocb)) {
4289 /*
4290 * If the op is asynchronous, we need to copy the list attached
4291 * to a BVEC/KVEC-type iterator, but we assume that the storage
4292 * will be retained by the caller; in any case, we may or may
4293 * not be able to pin the pages, so we don't try.
4294 */
4295 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4296 if (!ctx->bv) {
4297 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4298 return -ENOMEM;
4299 }
4300 } else {
4301 /*
4302 * Otherwise, we just pass the iterator down as-is and rely on
4303 * the caller to make sure the pages referred to by the
4304 * iterator don't evaporate.
4305 */
4306 ctx->iter = *to;
4307 }
4308
4309 if (direct) {
4310 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4311 offset, offset + len - 1);
4312 if (rc) {
4313 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4314 return -EAGAIN;
4315 }
4316 }
4317
4318 /* grab a lock here due to read response handlers can access ctx */
4319 mutex_lock(&ctx->aio_mutex);
4320
4321 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4322
4323 /* if at least one read request send succeeded, then reset rc */
4324 if (!list_empty(&ctx->list))
4325 rc = 0;
4326
4327 mutex_unlock(&ctx->aio_mutex);
4328
4329 if (rc) {
4330 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4331 return rc;
4332 }
4333
4334 if (!is_sync_kiocb(iocb)) {
4335 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4336 return -EIOCBQUEUED;
4337 }
4338
4339 rc = wait_for_completion_killable(&ctx->done);
4340 if (rc) {
4341 mutex_lock(&ctx->aio_mutex);
4342 ctx->rc = rc = -EINTR;
4343 total_read = ctx->total_len;
4344 mutex_unlock(&ctx->aio_mutex);
4345 } else {
4346 rc = ctx->rc;
4347 total_read = ctx->total_len;
4348 }
4349
4350 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4351
4352 if (total_read) {
4353 iocb->ki_pos += total_read;
4354 return total_read;
4355 }
4356 return rc;
4357}
4358
4359ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4360{
4361 return __cifs_readv(iocb, to, true);
4362}
4363
4364ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4365{
4366 return __cifs_readv(iocb, to, false);
4367}
4368
4369ssize_t
4370cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4371{
4372 struct inode *inode = file_inode(iocb->ki_filp);
4373 struct cifsInodeInfo *cinode = CIFS_I(inode);
4374 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4375 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4376 iocb->ki_filp->private_data;
4377 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4378 int rc = -EACCES;
4379
4380 /*
4381 * In strict cache mode we need to read from the server all the time
4382 * if we don't have level II oplock because the server can delay mtime
4383 * change - so we can't make a decision about inode invalidating.
4384 * And we can also fail with pagereading if there are mandatory locks
4385 * on pages affected by this read but not on the region from pos to
4386 * pos+len-1.
4387 */
4388 if (!CIFS_CACHE_READ(cinode))
4389 return cifs_user_readv(iocb, to);
4390
4391 if (cap_unix(tcon->ses) &&
4392 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4393 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4394 return generic_file_read_iter(iocb, to);
4395
4396 /*
4397 * We need to hold the sem to be sure nobody modifies lock list
4398 * with a brlock that prevents reading.
4399 */
4400 down_read(&cinode->lock_sem);
4401 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4402 tcon->ses->server->vals->shared_lock_type,
4403 0, NULL, CIFS_READ_OP))
4404 rc = generic_file_read_iter(iocb, to);
4405 up_read(&cinode->lock_sem);
4406 return rc;
4407}
4408
4409static ssize_t
4410cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4411{
4412 int rc = -EACCES;
4413 unsigned int bytes_read = 0;
4414 unsigned int total_read;
4415 unsigned int current_read_size;
4416 unsigned int rsize;
4417 struct cifs_sb_info *cifs_sb;
4418 struct cifs_tcon *tcon;
4419 struct TCP_Server_Info *server;
4420 unsigned int xid;
4421 char *cur_offset;
4422 struct cifsFileInfo *open_file;
4423 struct cifs_io_parms io_parms = {0};
4424 int buf_type = CIFS_NO_BUFFER;
4425 __u32 pid;
4426
4427 xid = get_xid();
4428 cifs_sb = CIFS_FILE_SB(file);
4429
4430 /* FIXME: set up handlers for larger reads and/or convert to async */
4431 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4432
4433 if (file->private_data == NULL) {
4434 rc = -EBADF;
4435 free_xid(xid);
4436 return rc;
4437 }
4438 open_file = file->private_data;
4439 tcon = tlink_tcon(open_file->tlink);
4440 server = cifs_pick_channel(tcon->ses);
4441
4442 if (!server->ops->sync_read) {
4443 free_xid(xid);
4444 return -ENOSYS;
4445 }
4446
4447 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4448 pid = open_file->pid;
4449 else
4450 pid = current->tgid;
4451
4452 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4453 cifs_dbg(FYI, "attempting read on write only file instance\n");
4454
4455 for (total_read = 0, cur_offset = read_data; read_size > total_read;
4456 total_read += bytes_read, cur_offset += bytes_read) {
4457 do {
4458 current_read_size = min_t(uint, read_size - total_read,
4459 rsize);
4460 /*
4461 * For windows me and 9x we do not want to request more
4462 * than it negotiated since it will refuse the read
4463 * then.
4464 */
4465 if (!(tcon->ses->capabilities &
4466 tcon->ses->server->vals->cap_large_files)) {
4467 current_read_size = min_t(uint,
4468 current_read_size, CIFSMaxBufSize);
4469 }
4470 if (open_file->invalidHandle) {
4471 rc = cifs_reopen_file(open_file, true);
4472 if (rc != 0)
4473 break;
4474 }
4475 io_parms.pid = pid;
4476 io_parms.tcon = tcon;
4477 io_parms.offset = *offset;
4478 io_parms.length = current_read_size;
4479 io_parms.server = server;
4480 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4481 &bytes_read, &cur_offset,
4482 &buf_type);
4483 } while (rc == -EAGAIN);
4484
4485 if (rc || (bytes_read == 0)) {
4486 if (total_read) {
4487 break;
4488 } else {
4489 free_xid(xid);
4490 return rc;
4491 }
4492 } else {
4493 cifs_stats_bytes_read(tcon, total_read);
4494 *offset += bytes_read;
4495 }
4496 }
4497 free_xid(xid);
4498 return total_read;
4499}
4500
4501/*
4502 * If the page is mmap'ed into a process' page tables, then we need to make
4503 * sure that it doesn't change while being written back.
4504 */
4505static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4506{
4507 struct folio *folio = page_folio(vmf->page);
4508
4509 /* Wait for the folio to be written to the cache before we allow it to
4510 * be modified. We then assume the entire folio will need writing back.
4511 */
4512#ifdef CONFIG_CIFS_FSCACHE
4513 if (folio_test_fscache(folio) &&
4514 folio_wait_fscache_killable(folio) < 0)
4515 return VM_FAULT_RETRY;
4516#endif
4517
4518 folio_wait_writeback(folio);
4519
4520 if (folio_lock_killable(folio) < 0)
4521 return VM_FAULT_RETRY;
4522 return VM_FAULT_LOCKED;
4523}
4524
4525static const struct vm_operations_struct cifs_file_vm_ops = {
4526 .fault = filemap_fault,
4527 .map_pages = filemap_map_pages,
4528 .page_mkwrite = cifs_page_mkwrite,
4529};
4530
4531int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4532{
4533 int xid, rc = 0;
4534 struct inode *inode = file_inode(file);
4535
4536 xid = get_xid();
4537
4538 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4539 rc = cifs_zap_mapping(inode);
4540 if (!rc)
4541 rc = generic_file_mmap(file, vma);
4542 if (!rc)
4543 vma->vm_ops = &cifs_file_vm_ops;
4544
4545 free_xid(xid);
4546 return rc;
4547}
4548
4549int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4550{
4551 int rc, xid;
4552
4553 xid = get_xid();
4554
4555 rc = cifs_revalidate_file(file);
4556 if (rc)
4557 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4558 rc);
4559 if (!rc)
4560 rc = generic_file_mmap(file, vma);
4561 if (!rc)
4562 vma->vm_ops = &cifs_file_vm_ops;
4563
4564 free_xid(xid);
4565 return rc;
4566}
4567
4568/*
4569 * Unlock a bunch of folios in the pagecache.
4570 */
4571static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4572{
4573 struct folio *folio;
4574 XA_STATE(xas, &mapping->i_pages, first);
4575
4576 rcu_read_lock();
4577 xas_for_each(&xas, folio, last) {
4578 folio_unlock(folio);
4579 }
4580 rcu_read_unlock();
4581}
4582
4583static void cifs_readahead_complete(struct work_struct *work)
4584{
4585 struct cifs_readdata *rdata = container_of(work,
4586 struct cifs_readdata, work);
4587 struct folio *folio;
4588 pgoff_t last;
4589 bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4590
4591 XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4592
4593 if (good)
4594 cifs_readahead_to_fscache(rdata->mapping->host,
4595 rdata->offset, rdata->bytes);
4596
4597 if (iov_iter_count(&rdata->iter) > 0)
4598 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4599
4600 last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4601
4602 rcu_read_lock();
4603 xas_for_each(&xas, folio, last) {
4604 if (good) {
4605 flush_dcache_folio(folio);
4606 folio_mark_uptodate(folio);
4607 }
4608 folio_unlock(folio);
4609 }
4610 rcu_read_unlock();
4611
4612 kref_put(&rdata->refcount, cifs_readdata_release);
4613}
4614
4615static void cifs_readahead(struct readahead_control *ractl)
4616{
4617 struct cifsFileInfo *open_file = ractl->file->private_data;
4618 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4619 struct TCP_Server_Info *server;
4620 unsigned int xid, nr_pages, cache_nr_pages = 0;
4621 unsigned int ra_pages;
4622 pgoff_t next_cached = ULONG_MAX, ra_index;
4623 bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4624 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4625 bool check_cache = caching;
4626 pid_t pid;
4627 int rc = 0;
4628
4629 /* Note that readahead_count() lags behind our dequeuing of pages from
4630 * the ractl, wo we have to keep track for ourselves.
4631 */
4632 ra_pages = readahead_count(ractl);
4633 ra_index = readahead_index(ractl);
4634
4635 xid = get_xid();
4636
4637 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4638 pid = open_file->pid;
4639 else
4640 pid = current->tgid;
4641
4642 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4643
4644 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4645 __func__, ractl->file, ractl->mapping, ra_pages);
4646
4647 /*
4648 * Chop the readahead request up into rsize-sized read requests.
4649 */
4650 while ((nr_pages = ra_pages)) {
4651 unsigned int i, rsize;
4652 struct cifs_readdata *rdata;
4653 struct cifs_credits credits_on_stack;
4654 struct cifs_credits *credits = &credits_on_stack;
4655 struct folio *folio;
4656 pgoff_t fsize;
4657
4658 /*
4659 * Find out if we have anything cached in the range of
4660 * interest, and if so, where the next chunk of cached data is.
4661 */
4662 if (caching) {
4663 if (check_cache) {
4664 rc = cifs_fscache_query_occupancy(
4665 ractl->mapping->host, ra_index, nr_pages,
4666 &next_cached, &cache_nr_pages);
4667 if (rc < 0)
4668 caching = false;
4669 check_cache = false;
4670 }
4671
4672 if (ra_index == next_cached) {
4673 /*
4674 * TODO: Send a whole batch of pages to be read
4675 * by the cache.
4676 */
4677 folio = readahead_folio(ractl);
4678 fsize = folio_nr_pages(folio);
4679 ra_pages -= fsize;
4680 ra_index += fsize;
4681 if (cifs_readpage_from_fscache(ractl->mapping->host,
4682 &folio->page) < 0) {
4683 /*
4684 * TODO: Deal with cache read failure
4685 * here, but for the moment, delegate
4686 * that to readpage.
4687 */
4688 caching = false;
4689 }
4690 folio_unlock(folio);
4691 next_cached += fsize;
4692 cache_nr_pages -= fsize;
4693 if (cache_nr_pages == 0)
4694 check_cache = true;
4695 continue;
4696 }
4697 }
4698
4699 if (open_file->invalidHandle) {
4700 rc = cifs_reopen_file(open_file, true);
4701 if (rc) {
4702 if (rc == -EAGAIN)
4703 continue;
4704 break;
4705 }
4706 }
4707
4708 if (cifs_sb->ctx->rsize == 0)
4709 cifs_sb->ctx->rsize =
4710 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4711 cifs_sb->ctx);
4712
4713 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4714 &rsize, credits);
4715 if (rc)
4716 break;
4717 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4718 if (next_cached != ULONG_MAX)
4719 nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4720
4721 /*
4722 * Give up immediately if rsize is too small to read an entire
4723 * page. The VFS will fall back to readpage. We should never
4724 * reach this point however since we set ra_pages to 0 when the
4725 * rsize is smaller than a cache page.
4726 */
4727 if (unlikely(!nr_pages)) {
4728 add_credits_and_wake_if(server, credits, 0);
4729 break;
4730 }
4731
4732 rdata = cifs_readdata_alloc(cifs_readahead_complete);
4733 if (!rdata) {
4734 /* best to give up if we're out of mem */
4735 add_credits_and_wake_if(server, credits, 0);
4736 break;
4737 }
4738
4739 rdata->offset = ra_index * PAGE_SIZE;
4740 rdata->bytes = nr_pages * PAGE_SIZE;
4741 rdata->cfile = cifsFileInfo_get(open_file);
4742 rdata->server = server;
4743 rdata->mapping = ractl->mapping;
4744 rdata->pid = pid;
4745 rdata->credits = credits_on_stack;
4746
4747 for (i = 0; i < nr_pages; i++) {
4748 if (!readahead_folio(ractl))
4749 WARN_ON(1);
4750 }
4751 ra_pages -= nr_pages;
4752 ra_index += nr_pages;
4753
4754 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4755 rdata->offset, rdata->bytes);
4756
4757 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4758 if (!rc) {
4759 if (rdata->cfile->invalidHandle)
4760 rc = -EAGAIN;
4761 else
4762 rc = server->ops->async_readv(rdata);
4763 }
4764
4765 if (rc) {
4766 add_credits_and_wake_if(server, &rdata->credits, 0);
4767 cifs_unlock_folios(rdata->mapping,
4768 rdata->offset / PAGE_SIZE,
4769 (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4770 /* Fallback to the readpage in error/reconnect cases */
4771 kref_put(&rdata->refcount, cifs_readdata_release);
4772 break;
4773 }
4774
4775 kref_put(&rdata->refcount, cifs_readdata_release);
4776 }
4777
4778 free_xid(xid);
4779}
4780
4781/*
4782 * cifs_readpage_worker must be called with the page pinned
4783 */
4784static int cifs_readpage_worker(struct file *file, struct page *page,
4785 loff_t *poffset)
4786{
4787 struct inode *inode = file_inode(file);
4788 struct timespec64 atime, mtime;
4789 char *read_data;
4790 int rc;
4791
4792 /* Is the page cached? */
4793 rc = cifs_readpage_from_fscache(inode, page);
4794 if (rc == 0)
4795 goto read_complete;
4796
4797 read_data = kmap(page);
4798 /* for reads over a certain size could initiate async read ahead */
4799
4800 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4801
4802 if (rc < 0)
4803 goto io_error;
4804 else
4805 cifs_dbg(FYI, "Bytes read %d\n", rc);
4806
4807 /* we do not want atime to be less than mtime, it broke some apps */
4808 atime = inode_set_atime_to_ts(inode, current_time(inode));
4809 mtime = inode_get_mtime(inode);
4810 if (timespec64_compare(&atime, &mtime) < 0)
4811 inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4812
4813 if (PAGE_SIZE > rc)
4814 memset(read_data + rc, 0, PAGE_SIZE - rc);
4815
4816 flush_dcache_page(page);
4817 SetPageUptodate(page);
4818 rc = 0;
4819
4820io_error:
4821 kunmap(page);
4822
4823read_complete:
4824 unlock_page(page);
4825 return rc;
4826}
4827
4828static int cifs_read_folio(struct file *file, struct folio *folio)
4829{
4830 struct page *page = &folio->page;
4831 loff_t offset = page_file_offset(page);
4832 int rc = -EACCES;
4833 unsigned int xid;
4834
4835 xid = get_xid();
4836
4837 if (file->private_data == NULL) {
4838 rc = -EBADF;
4839 free_xid(xid);
4840 return rc;
4841 }
4842
4843 cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4844 page, (int)offset, (int)offset);
4845
4846 rc = cifs_readpage_worker(file, page, &offset);
4847
4848 free_xid(xid);
4849 return rc;
4850}
4851
4852static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4853{
4854 struct cifsFileInfo *open_file;
4855
4856 spin_lock(&cifs_inode->open_file_lock);
4857 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4858 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4859 spin_unlock(&cifs_inode->open_file_lock);
4860 return 1;
4861 }
4862 }
4863 spin_unlock(&cifs_inode->open_file_lock);
4864 return 0;
4865}
4866
4867/* We do not want to update the file size from server for inodes
4868 open for write - to avoid races with writepage extending
4869 the file - in the future we could consider allowing
4870 refreshing the inode only on increases in the file size
4871 but this is tricky to do without racing with writebehind
4872 page caching in the current Linux kernel design */
4873bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4874 bool from_readdir)
4875{
4876 if (!cifsInode)
4877 return true;
4878
4879 if (is_inode_writable(cifsInode) ||
4880 ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4881 /* This inode is open for write at least once */
4882 struct cifs_sb_info *cifs_sb;
4883
4884 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4885 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4886 /* since no page cache to corrupt on directio
4887 we can change size safely */
4888 return true;
4889 }
4890
4891 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4892 return true;
4893
4894 return false;
4895 } else
4896 return true;
4897}
4898
4899static int cifs_write_begin(struct file *file, struct address_space *mapping,
4900 loff_t pos, unsigned len,
4901 struct page **pagep, void **fsdata)
4902{
4903 int oncethru = 0;
4904 pgoff_t index = pos >> PAGE_SHIFT;
4905 loff_t offset = pos & (PAGE_SIZE - 1);
4906 loff_t page_start = pos & PAGE_MASK;
4907 loff_t i_size;
4908 struct page *page;
4909 int rc = 0;
4910
4911 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4912
4913start:
4914 page = grab_cache_page_write_begin(mapping, index);
4915 if (!page) {
4916 rc = -ENOMEM;
4917 goto out;
4918 }
4919
4920 if (PageUptodate(page))
4921 goto out;
4922
4923 /*
4924 * If we write a full page it will be up to date, no need to read from
4925 * the server. If the write is short, we'll end up doing a sync write
4926 * instead.
4927 */
4928 if (len == PAGE_SIZE)
4929 goto out;
4930
4931 /*
4932 * optimize away the read when we have an oplock, and we're not
4933 * expecting to use any of the data we'd be reading in. That
4934 * is, when the page lies beyond the EOF, or straddles the EOF
4935 * and the write will cover all of the existing data.
4936 */
4937 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4938 i_size = i_size_read(mapping->host);
4939 if (page_start >= i_size ||
4940 (offset == 0 && (pos + len) >= i_size)) {
4941 zero_user_segments(page, 0, offset,
4942 offset + len,
4943 PAGE_SIZE);
4944 /*
4945 * PageChecked means that the parts of the page
4946 * to which we're not writing are considered up
4947 * to date. Once the data is copied to the
4948 * page, it can be set uptodate.
4949 */
4950 SetPageChecked(page);
4951 goto out;
4952 }
4953 }
4954
4955 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4956 /*
4957 * might as well read a page, it is fast enough. If we get
4958 * an error, we don't need to return it. cifs_write_end will
4959 * do a sync write instead since PG_uptodate isn't set.
4960 */
4961 cifs_readpage_worker(file, page, &page_start);
4962 put_page(page);
4963 oncethru = 1;
4964 goto start;
4965 } else {
4966 /* we could try using another file handle if there is one -
4967 but how would we lock it to prevent close of that handle
4968 racing with this read? In any case
4969 this will be written out by write_end so is fine */
4970 }
4971out:
4972 *pagep = page;
4973 return rc;
4974}
4975
4976static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4977{
4978 if (folio_test_private(folio))
4979 return 0;
4980 if (folio_test_fscache(folio)) {
4981 if (current_is_kswapd() || !(gfp & __GFP_FS))
4982 return false;
4983 folio_wait_fscache(folio);
4984 }
4985 fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4986 return true;
4987}
4988
4989static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4990 size_t length)
4991{
4992 folio_wait_fscache(folio);
4993}
4994
4995static int cifs_launder_folio(struct folio *folio)
4996{
4997 int rc = 0;
4998 loff_t range_start = folio_pos(folio);
4999 loff_t range_end = range_start + folio_size(folio);
5000 struct writeback_control wbc = {
5001 .sync_mode = WB_SYNC_ALL,
5002 .nr_to_write = 0,
5003 .range_start = range_start,
5004 .range_end = range_end,
5005 };
5006
5007 cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5008
5009 if (folio_clear_dirty_for_io(folio))
5010 rc = cifs_writepage_locked(&folio->page, &wbc);
5011
5012 folio_wait_fscache(folio);
5013 return rc;
5014}
5015
5016void cifs_oplock_break(struct work_struct *work)
5017{
5018 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5019 oplock_break);
5020 struct inode *inode = d_inode(cfile->dentry);
5021 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
5022 struct cifsInodeInfo *cinode = CIFS_I(inode);
5023 struct cifs_tcon *tcon;
5024 struct TCP_Server_Info *server;
5025 struct tcon_link *tlink;
5026 int rc = 0;
5027 bool purge_cache = false, oplock_break_cancelled;
5028 __u64 persistent_fid, volatile_fid;
5029 __u16 net_fid;
5030
5031 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5032 TASK_UNINTERRUPTIBLE);
5033
5034 tlink = cifs_sb_tlink(cifs_sb);
5035 if (IS_ERR(tlink))
5036 goto out;
5037 tcon = tlink_tcon(tlink);
5038 server = tcon->ses->server;
5039
5040 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5041 cfile->oplock_epoch, &purge_cache);
5042
5043 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5044 cifs_has_mand_locks(cinode)) {
5045 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5046 inode);
5047 cinode->oplock = 0;
5048 }
5049
5050 if (inode && S_ISREG(inode->i_mode)) {
5051 if (CIFS_CACHE_READ(cinode))
5052 break_lease(inode, O_RDONLY);
5053 else
5054 break_lease(inode, O_WRONLY);
5055 rc = filemap_fdatawrite(inode->i_mapping);
5056 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5057 rc = filemap_fdatawait(inode->i_mapping);
5058 mapping_set_error(inode->i_mapping, rc);
5059 cifs_zap_mapping(inode);
5060 }
5061 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5062 if (CIFS_CACHE_WRITE(cinode))
5063 goto oplock_break_ack;
5064 }
5065
5066 rc = cifs_push_locks(cfile);
5067 if (rc)
5068 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5069
5070oplock_break_ack:
5071 /*
5072 * When oplock break is received and there are no active
5073 * file handles but cached, then schedule deferred close immediately.
5074 * So, new open will not use cached handle.
5075 */
5076
5077 if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5078 cifs_close_deferred_file(cinode);
5079
5080 persistent_fid = cfile->fid.persistent_fid;
5081 volatile_fid = cfile->fid.volatile_fid;
5082 net_fid = cfile->fid.netfid;
5083 oplock_break_cancelled = cfile->oplock_break_cancelled;
5084
5085 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5086 /*
5087 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5088 * an acknowledgment to be sent when the file has already been closed.
5089 */
5090 spin_lock(&cinode->open_file_lock);
5091 /* check list empty since can race with kill_sb calling tree disconnect */
5092 if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5093 spin_unlock(&cinode->open_file_lock);
5094 rc = server->ops->oplock_response(tcon, persistent_fid,
5095 volatile_fid, net_fid, cinode);
5096 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5097 } else
5098 spin_unlock(&cinode->open_file_lock);
5099
5100 cifs_put_tlink(tlink);
5101out:
5102 cifs_done_oplock_break(cinode);
5103}
5104
5105/*
5106 * The presence of cifs_direct_io() in the address space ops vector
5107 * allowes open() O_DIRECT flags which would have failed otherwise.
5108 *
5109 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5110 * so this method should never be called.
5111 *
5112 * Direct IO is not yet supported in the cached mode.
5113 */
5114static ssize_t
5115cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5116{
5117 /*
5118 * FIXME
5119 * Eventually need to support direct IO for non forcedirectio mounts
5120 */
5121 return -EINVAL;
5122}
5123
5124static int cifs_swap_activate(struct swap_info_struct *sis,
5125 struct file *swap_file, sector_t *span)
5126{
5127 struct cifsFileInfo *cfile = swap_file->private_data;
5128 struct inode *inode = swap_file->f_mapping->host;
5129 unsigned long blocks;
5130 long long isize;
5131
5132 cifs_dbg(FYI, "swap activate\n");
5133
5134 if (!swap_file->f_mapping->a_ops->swap_rw)
5135 /* Cannot support swap */
5136 return -EINVAL;
5137
5138 spin_lock(&inode->i_lock);
5139 blocks = inode->i_blocks;
5140 isize = inode->i_size;
5141 spin_unlock(&inode->i_lock);
5142 if (blocks*512 < isize) {
5143 pr_warn("swap activate: swapfile has holes\n");
5144 return -EINVAL;
5145 }
5146 *span = sis->pages;
5147
5148 pr_warn_once("Swap support over SMB3 is experimental\n");
5149
5150 /*
5151 * TODO: consider adding ACL (or documenting how) to prevent other
5152 * users (on this or other systems) from reading it
5153 */
5154
5155
5156 /* TODO: add sk_set_memalloc(inet) or similar */
5157
5158 if (cfile)
5159 cfile->swapfile = true;
5160 /*
5161 * TODO: Since file already open, we can't open with DENY_ALL here
5162 * but we could add call to grab a byte range lock to prevent others
5163 * from reading or writing the file
5164 */
5165
5166 sis->flags |= SWP_FS_OPS;
5167 return add_swap_extent(sis, 0, sis->max, 0);
5168}
5169
5170static void cifs_swap_deactivate(struct file *file)
5171{
5172 struct cifsFileInfo *cfile = file->private_data;
5173
5174 cifs_dbg(FYI, "swap deactivate\n");
5175
5176 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5177
5178 if (cfile)
5179 cfile->swapfile = false;
5180
5181 /* do we need to unpin (or unlock) the file */
5182}
5183
5184const struct address_space_operations cifs_addr_ops = {
5185 .read_folio = cifs_read_folio,
5186 .readahead = cifs_readahead,
5187 .writepages = cifs_writepages,
5188 .write_begin = cifs_write_begin,
5189 .write_end = cifs_write_end,
5190 .dirty_folio = netfs_dirty_folio,
5191 .release_folio = cifs_release_folio,
5192 .direct_IO = cifs_direct_io,
5193 .invalidate_folio = cifs_invalidate_folio,
5194 .launder_folio = cifs_launder_folio,
5195 .migrate_folio = filemap_migrate_folio,
5196 /*
5197 * TODO: investigate and if useful we could add an is_dirty_writeback
5198 * helper if needed
5199 */
5200 .swap_activate = cifs_swap_activate,
5201 .swap_deactivate = cifs_swap_deactivate,
5202};
5203
5204/*
5205 * cifs_readahead requires the server to support a buffer large enough to
5206 * contain the header plus one complete page of data. Otherwise, we need
5207 * to leave cifs_readahead out of the address space operations.
5208 */
5209const struct address_space_operations cifs_addr_ops_smallbuf = {
5210 .read_folio = cifs_read_folio,
5211 .writepages = cifs_writepages,
5212 .write_begin = cifs_write_begin,
5213 .write_end = cifs_write_end,
5214 .dirty_folio = netfs_dirty_folio,
5215 .release_folio = cifs_release_folio,
5216 .invalidate_folio = cifs_invalidate_folio,
5217 .launder_folio = cifs_launder_folio,
5218 .migrate_folio = filemap_migrate_folio,
5219};