Linux Audio

Check our new training course

Loading...
v3.15
   1/*
   2 *  pNFS functions to call and manage layout drivers.
   3 *
   4 *  Copyright (c) 2002 [year of first publication]
   5 *  The Regents of the University of Michigan
   6 *  All Rights Reserved
   7 *
   8 *  Dean Hildebrand <dhildebz@umich.edu>
   9 *
  10 *  Permission is granted to use, copy, create derivative works, and
  11 *  redistribute this software and such derivative works for any purpose,
  12 *  so long as the name of the University of Michigan is not used in
  13 *  any advertising or publicity pertaining to the use or distribution
  14 *  of this software without specific, written prior authorization. If
  15 *  the above copyright notice or any other identification of the
  16 *  University of Michigan is included in any copy of any portion of
  17 *  this software, then the disclaimer below must also be included.
  18 *
  19 *  This software is provided as is, without representation or warranty
  20 *  of any kind either express or implied, including without limitation
  21 *  the implied warranties of merchantability, fitness for a particular
  22 *  purpose, or noninfringement.  The Regents of the University of
  23 *  Michigan shall not be liable for any damages, including special,
  24 *  indirect, incidental, or consequential damages, with respect to any
  25 *  claim arising out of or in connection with the use of the software,
  26 *  even if it has been or is hereafter advised of the possibility of
  27 *  such damages.
  28 */
  29
  30#include <linux/nfs_fs.h>
  31#include <linux/nfs_page.h>
  32#include <linux/module.h>
  33#include "internal.h"
  34#include "pnfs.h"
  35#include "iostat.h"
  36#include "nfs4trace.h"
  37
  38#define NFSDBG_FACILITY		NFSDBG_PNFS
  39#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
  40
  41/* Locking:
  42 *
  43 * pnfs_spinlock:
  44 *      protects pnfs_modules_tbl.
  45 */
  46static DEFINE_SPINLOCK(pnfs_spinlock);
  47
  48/*
  49 * pnfs_modules_tbl holds all pnfs modules
  50 */
  51static LIST_HEAD(pnfs_modules_tbl);
  52
  53/* Return the registered pnfs layout driver module matching given id */
  54static struct pnfs_layoutdriver_type *
  55find_pnfs_driver_locked(u32 id)
  56{
  57	struct pnfs_layoutdriver_type *local;
  58
  59	list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
  60		if (local->id == id)
  61			goto out;
  62	local = NULL;
  63out:
  64	dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
  65	return local;
  66}
  67
  68static struct pnfs_layoutdriver_type *
  69find_pnfs_driver(u32 id)
  70{
  71	struct pnfs_layoutdriver_type *local;
  72
  73	spin_lock(&pnfs_spinlock);
  74	local = find_pnfs_driver_locked(id);
  75	if (local != NULL && !try_module_get(local->owner)) {
  76		dprintk("%s: Could not grab reference on module\n", __func__);
  77		local = NULL;
  78	}
  79	spin_unlock(&pnfs_spinlock);
  80	return local;
  81}
  82
  83void
  84unset_pnfs_layoutdriver(struct nfs_server *nfss)
  85{
  86	if (nfss->pnfs_curr_ld) {
  87		if (nfss->pnfs_curr_ld->clear_layoutdriver)
  88			nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
  89		/* Decrement the MDS count. Purge the deviceid cache if zero */
  90		if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count))
  91			nfs4_deviceid_purge_client(nfss->nfs_client);
  92		module_put(nfss->pnfs_curr_ld->owner);
  93	}
  94	nfss->pnfs_curr_ld = NULL;
  95}
  96
  97/*
  98 * Try to set the server's pnfs module to the pnfs layout type specified by id.
  99 * Currently only one pNFS layout driver per filesystem is supported.
 100 *
 101 * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
 102 */
 103void
 104set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
 105		      u32 id)
 106{
 107	struct pnfs_layoutdriver_type *ld_type = NULL;
 108
 109	if (id == 0)
 110		goto out_no_driver;
 111	if (!(server->nfs_client->cl_exchange_flags &
 112		 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
 113		printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
 114			__func__, id, server->nfs_client->cl_exchange_flags);
 115		goto out_no_driver;
 116	}
 117	ld_type = find_pnfs_driver(id);
 118	if (!ld_type) {
 119		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
 120		ld_type = find_pnfs_driver(id);
 121		if (!ld_type) {
 122			dprintk("%s: No pNFS module found for %u.\n",
 123				__func__, id);
 124			goto out_no_driver;
 125		}
 126	}
 127	server->pnfs_curr_ld = ld_type;
 128	if (ld_type->set_layoutdriver
 129	    && ld_type->set_layoutdriver(server, mntfh)) {
 130		printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
 131			"driver %u.\n", __func__, id);
 132		module_put(ld_type->owner);
 133		goto out_no_driver;
 134	}
 135	/* Bump the MDS count */
 136	atomic_inc(&server->nfs_client->cl_mds_count);
 137
 138	dprintk("%s: pNFS module for %u set\n", __func__, id);
 139	return;
 140
 141out_no_driver:
 142	dprintk("%s: Using NFSv4 I/O\n", __func__);
 143	server->pnfs_curr_ld = NULL;
 144}
 145
 146int
 147pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
 148{
 149	int status = -EINVAL;
 150	struct pnfs_layoutdriver_type *tmp;
 151
 152	if (ld_type->id == 0) {
 153		printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__);
 154		return status;
 155	}
 156	if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
 157		printk(KERN_ERR "NFS: %s Layout driver must provide "
 158		       "alloc_lseg and free_lseg.\n", __func__);
 159		return status;
 160	}
 161
 162	spin_lock(&pnfs_spinlock);
 163	tmp = find_pnfs_driver_locked(ld_type->id);
 164	if (!tmp) {
 165		list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
 166		status = 0;
 167		dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
 168			ld_type->name);
 169	} else {
 170		printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n",
 171			__func__, ld_type->id);
 172	}
 173	spin_unlock(&pnfs_spinlock);
 174
 175	return status;
 176}
 177EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
 178
 179void
 180pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
 181{
 182	dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
 183	spin_lock(&pnfs_spinlock);
 184	list_del(&ld_type->pnfs_tblid);
 185	spin_unlock(&pnfs_spinlock);
 186}
 187EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
 188
 189/*
 190 * pNFS client layout cache
 191 */
 192
 193/* Need to hold i_lock if caller does not already hold reference */
 194void
 195pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo)
 196{
 197	atomic_inc(&lo->plh_refcount);
 198}
 199
 200static struct pnfs_layout_hdr *
 201pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
 202{
 203	struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
 204	return ld->alloc_layout_hdr(ino, gfp_flags);
 
 205}
 206
 207static void
 208pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
 209{
 210	struct nfs_server *server = NFS_SERVER(lo->plh_inode);
 211	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
 212
 213	if (!list_empty(&lo->plh_layouts)) {
 214		struct nfs_client *clp = server->nfs_client;
 215
 216		spin_lock(&clp->cl_lock);
 217		list_del_init(&lo->plh_layouts);
 218		spin_unlock(&clp->cl_lock);
 219	}
 220	put_rpccred(lo->plh_lc_cred);
 221	return ld->free_layout_hdr(lo);
 222}
 223
 224static void
 225pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
 226{
 227	struct nfs_inode *nfsi = NFS_I(lo->plh_inode);
 228	dprintk("%s: freeing layout cache %p\n", __func__, lo);
 229	nfsi->layout = NULL;
 230	/* Reset MDS Threshold I/O counters */
 231	nfsi->write_io = 0;
 232	nfsi->read_io = 0;
 233}
 234
 235void
 236pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
 237{
 238	struct inode *inode = lo->plh_inode;
 239
 240	if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
 241		pnfs_detach_layout_hdr(lo);
 242		spin_unlock(&inode->i_lock);
 243		pnfs_free_layout_hdr(lo);
 244	}
 245}
 246
 247static int
 248pnfs_iomode_to_fail_bit(u32 iomode)
 249{
 250	return iomode == IOMODE_RW ?
 251		NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
 252}
 253
 254static void
 255pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
 256{
 257	lo->plh_retry_timestamp = jiffies;
 258	if (!test_and_set_bit(fail_bit, &lo->plh_flags))
 259		atomic_inc(&lo->plh_refcount);
 260}
 261
 262static void
 263pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
 264{
 265	if (test_and_clear_bit(fail_bit, &lo->plh_flags))
 266		atomic_dec(&lo->plh_refcount);
 267}
 268
 269static void
 270pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode)
 271{
 272	struct inode *inode = lo->plh_inode;
 273	struct pnfs_layout_range range = {
 274		.iomode = iomode,
 275		.offset = 0,
 276		.length = NFS4_MAX_UINT64,
 277	};
 278	LIST_HEAD(head);
 279
 280	spin_lock(&inode->i_lock);
 281	pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
 282	pnfs_mark_matching_lsegs_invalid(lo, &head, &range);
 283	spin_unlock(&inode->i_lock);
 284	pnfs_free_lseg_list(&head);
 285	dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
 286			iomode == IOMODE_RW ?  "RW" : "READ");
 287}
 288
 289static bool
 290pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode)
 291{
 292	unsigned long start, end;
 293	int fail_bit = pnfs_iomode_to_fail_bit(iomode);
 294
 295	if (test_bit(fail_bit, &lo->plh_flags) == 0)
 296		return false;
 297	end = jiffies;
 298	start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT;
 299	if (!time_in_range(lo->plh_retry_timestamp, start, end)) {
 300		/* It is time to retry the failed layoutgets */
 301		pnfs_layout_clear_fail_bit(lo, fail_bit);
 302		return false;
 303	}
 304	return true;
 305}
 306
 307static void
 308init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
 309{
 310	INIT_LIST_HEAD(&lseg->pls_list);
 311	INIT_LIST_HEAD(&lseg->pls_lc_list);
 312	atomic_set(&lseg->pls_refcount, 1);
 313	smp_mb();
 314	set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
 315	lseg->pls_layout = lo;
 316}
 317
 318static void pnfs_free_lseg(struct pnfs_layout_segment *lseg)
 319{
 320	struct inode *ino = lseg->pls_layout->plh_inode;
 321
 322	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
 
 
 323}
 324
 325static void
 326pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
 327		struct pnfs_layout_segment *lseg)
 328{
 329	struct inode *inode = lo->plh_inode;
 330
 331	WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
 332	list_del_init(&lseg->pls_list);
 333	/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
 334	atomic_dec(&lo->plh_refcount);
 335	if (list_empty(&lo->plh_segs))
 336		clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
 
 337	rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
 338}
 339
 340void
 341pnfs_put_lseg(struct pnfs_layout_segment *lseg)
 342{
 343	struct pnfs_layout_hdr *lo;
 344	struct inode *inode;
 345
 346	if (!lseg)
 347		return;
 348
 349	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
 350		atomic_read(&lseg->pls_refcount),
 351		test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
 352	lo = lseg->pls_layout;
 353	inode = lo->plh_inode;
 354	if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
 355		pnfs_get_layout_hdr(lo);
 356		pnfs_layout_remove_lseg(lo, lseg);
 
 
 357		spin_unlock(&inode->i_lock);
 358		pnfs_free_lseg(lseg);
 359		pnfs_put_layout_hdr(lo);
 360	}
 361}
 362EXPORT_SYMBOL_GPL(pnfs_put_lseg);
 363
 364static u64
 365end_offset(u64 start, u64 len)
 366{
 367	u64 end;
 368
 369	end = start + len;
 370	return end >= start ? end : NFS4_MAX_UINT64;
 371}
 372
 
 
 
 
 
 
 
 
 
 
 
 373/*
 374 * is l2 fully contained in l1?
 375 *   start1                             end1
 376 *   [----------------------------------)
 377 *           start2           end2
 378 *           [----------------)
 379 */
 380static bool
 381pnfs_lseg_range_contained(const struct pnfs_layout_range *l1,
 382		 const struct pnfs_layout_range *l2)
 383{
 384	u64 start1 = l1->offset;
 385	u64 end1 = end_offset(start1, l1->length);
 386	u64 start2 = l2->offset;
 387	u64 end2 = end_offset(start2, l2->length);
 388
 389	return (start1 <= start2) && (end1 >= end2);
 390}
 391
 392/*
 393 * is l1 and l2 intersecting?
 394 *   start1                             end1
 395 *   [----------------------------------)
 396 *                              start2           end2
 397 *                              [----------------)
 398 */
 399static bool
 400pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1,
 401		    const struct pnfs_layout_range *l2)
 402{
 403	u64 start1 = l1->offset;
 404	u64 end1 = end_offset(start1, l1->length);
 405	u64 start2 = l2->offset;
 406	u64 end2 = end_offset(start2, l2->length);
 407
 408	return (end1 == NFS4_MAX_UINT64 || end1 > start2) &&
 409	       (end2 == NFS4_MAX_UINT64 || end2 > start1);
 410}
 411
 412static bool
 413should_free_lseg(const struct pnfs_layout_range *lseg_range,
 414		 const struct pnfs_layout_range *recall_range)
 415{
 416	return (recall_range->iomode == IOMODE_ANY ||
 417		lseg_range->iomode == recall_range->iomode) &&
 418	       pnfs_lseg_range_intersecting(lseg_range, recall_range);
 419}
 420
 421static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
 422		struct list_head *tmp_list)
 423{
 424	if (!atomic_dec_and_test(&lseg->pls_refcount))
 425		return false;
 426	pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
 427	list_add(&lseg->pls_list, tmp_list);
 428	return true;
 429}
 430
 431/* Returns 1 if lseg is removed from list, 0 otherwise */
 432static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
 433			     struct list_head *tmp_list)
 434{
 435	int rv = 0;
 436
 437	if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
 438		/* Remove the reference keeping the lseg in the
 439		 * list.  It will now be removed when all
 440		 * outstanding io is finished.
 441		 */
 442		dprintk("%s: lseg %p ref %d\n", __func__, lseg,
 443			atomic_read(&lseg->pls_refcount));
 444		if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
 
 
 445			rv = 1;
 
 446	}
 447	return rv;
 448}
 449
 450/* Returns count of number of matching invalid lsegs remaining in list
 451 * after call.
 452 */
 453int
 454pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 455			    struct list_head *tmp_list,
 456			    struct pnfs_layout_range *recall_range)
 457{
 458	struct pnfs_layout_segment *lseg, *next;
 459	int invalid = 0, removed = 0;
 460
 461	dprintk("%s:Begin lo %p\n", __func__, lo);
 462
 463	if (list_empty(&lo->plh_segs))
 
 
 
 
 
 464		return 0;
 
 465	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
 466		if (!recall_range ||
 467		    should_free_lseg(&lseg->pls_range, recall_range)) {
 468			dprintk("%s: freeing lseg %p iomode %d "
 469				"offset %llu length %llu\n", __func__,
 470				lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
 471				lseg->pls_range.length);
 472			invalid++;
 473			removed += mark_lseg_invalid(lseg, tmp_list);
 474		}
 475	dprintk("%s:Return %i\n", __func__, invalid - removed);
 476	return invalid - removed;
 477}
 478
 479/* note free_me must contain lsegs from a single layout_hdr */
 480void
 481pnfs_free_lseg_list(struct list_head *free_me)
 482{
 483	struct pnfs_layout_segment *lseg, *tmp;
 
 484
 485	if (list_empty(free_me))
 486		return;
 487
 
 
 
 
 
 
 
 
 
 
 
 488	list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
 489		list_del(&lseg->pls_list);
 490		pnfs_free_lseg(lseg);
 491	}
 492}
 493
 494void
 495pnfs_destroy_layout(struct nfs_inode *nfsi)
 496{
 497	struct pnfs_layout_hdr *lo;
 498	LIST_HEAD(tmp_list);
 499
 500	spin_lock(&nfsi->vfs_inode.i_lock);
 501	lo = nfsi->layout;
 502	if (lo) {
 503		lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
 504		pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
 505		pnfs_get_layout_hdr(lo);
 506		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
 507		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
 508		spin_unlock(&nfsi->vfs_inode.i_lock);
 509		pnfs_free_lseg_list(&tmp_list);
 510		pnfs_put_layout_hdr(lo);
 511	} else
 512		spin_unlock(&nfsi->vfs_inode.i_lock);
 513}
 514EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
 515
 516static bool
 517pnfs_layout_add_bulk_destroy_list(struct inode *inode,
 518		struct list_head *layout_list)
 519{
 520	struct pnfs_layout_hdr *lo;
 521	bool ret = false;
 522
 523	spin_lock(&inode->i_lock);
 524	lo = NFS_I(inode)->layout;
 525	if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
 526		pnfs_get_layout_hdr(lo);
 527		list_add(&lo->plh_bulk_destroy, layout_list);
 528		ret = true;
 529	}
 530	spin_unlock(&inode->i_lock);
 531	return ret;
 532}
 533
 534/* Caller must hold rcu_read_lock and clp->cl_lock */
 535static int
 536pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
 537		struct nfs_server *server,
 538		struct list_head *layout_list)
 539{
 540	struct pnfs_layout_hdr *lo, *next;
 541	struct inode *inode;
 542
 543	list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
 544		inode = igrab(lo->plh_inode);
 545		if (inode == NULL)
 546			continue;
 547		list_del_init(&lo->plh_layouts);
 548		if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
 549			continue;
 550		rcu_read_unlock();
 551		spin_unlock(&clp->cl_lock);
 552		iput(inode);
 553		spin_lock(&clp->cl_lock);
 554		rcu_read_lock();
 555		return -EAGAIN;
 556	}
 557	return 0;
 558}
 
 559
 560static int
 561pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
 562		bool is_bulk_recall)
 
 
 
 563{
 
 564	struct pnfs_layout_hdr *lo;
 565	struct inode *inode;
 566	struct pnfs_layout_range range = {
 567		.iomode = IOMODE_ANY,
 568		.offset = 0,
 569		.length = NFS4_MAX_UINT64,
 570	};
 571	LIST_HEAD(lseg_list);
 572	int ret = 0;
 573
 574	while (!list_empty(layout_list)) {
 575		lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
 576				plh_bulk_destroy);
 577		dprintk("%s freeing layout for inode %lu\n", __func__,
 578			lo->plh_inode->i_ino);
 579		inode = lo->plh_inode;
 580		spin_lock(&inode->i_lock);
 581		list_del_init(&lo->plh_bulk_destroy);
 582		lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
 583		if (is_bulk_recall)
 584			set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
 585		if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
 586			ret = -EAGAIN;
 587		spin_unlock(&inode->i_lock);
 588		pnfs_free_lseg_list(&lseg_list);
 589		pnfs_put_layout_hdr(lo);
 590		iput(inode);
 591	}
 592	return ret;
 593}
 594
 595int
 596pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
 597		struct nfs_fsid *fsid,
 598		bool is_recall)
 599{
 600	struct nfs_server *server;
 601	LIST_HEAD(layout_list);
 602
 603	spin_lock(&clp->cl_lock);
 604	rcu_read_lock();
 605restart:
 606	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
 607		if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
 608			continue;
 609		if (pnfs_layout_bulk_destroy_byserver_locked(clp,
 610				server,
 611				&layout_list) != 0)
 612			goto restart;
 613	}
 614	rcu_read_unlock();
 615	spin_unlock(&clp->cl_lock);
 616
 617	if (list_empty(&layout_list))
 618		return 0;
 619	return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
 620}
 621
 622int
 623pnfs_destroy_layouts_byclid(struct nfs_client *clp,
 624		bool is_recall)
 625{
 626	struct nfs_server *server;
 627	LIST_HEAD(layout_list);
 628
 629	spin_lock(&clp->cl_lock);
 630	rcu_read_lock();
 631restart:
 632	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
 633		if (pnfs_layout_bulk_destroy_byserver_locked(clp,
 634					server,
 635					&layout_list) != 0)
 636			goto restart;
 637	}
 638	rcu_read_unlock();
 639	spin_unlock(&clp->cl_lock);
 640
 641	if (list_empty(&layout_list))
 642		return 0;
 643	return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
 644}
 645
 646/*
 647 * Called by the state manger to remove all layouts established under an
 648 * expired lease.
 649 */
 650void
 651pnfs_destroy_all_layouts(struct nfs_client *clp)
 652{
 653	nfs4_deviceid_mark_client_invalid(clp);
 654	nfs4_deviceid_purge_client(clp);
 655
 656	pnfs_destroy_layouts_byclid(clp, false);
 657}
 658
 659/*
 660 * Compare 2 layout stateid sequence ids, to see which is newer,
 661 * taking into account wraparound issues.
 662 */
 663static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
 664{
 665	return (s32)(s1 - s2) > 0;
 666}
 667
 668static void
 669pnfs_verify_layout_stateid(struct pnfs_layout_hdr *lo,
 670		const nfs4_stateid *new,
 671		struct list_head *free_me_list)
 672{
 673	if (nfs4_stateid_match_other(&lo->plh_stateid, new))
 674		return;
 675	/* Layout is new! Kill existing layout segments */
 676	pnfs_mark_matching_lsegs_invalid(lo, free_me_list, NULL);
 677}
 678
 679/* update lo->plh_stateid with new if is more recent */
 680void
 681pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
 682			bool update_barrier)
 683{
 684	u32 oldseq, newseq, new_barrier;
 685	int empty = list_empty(&lo->plh_segs);
 686
 687	oldseq = be32_to_cpu(lo->plh_stateid.seqid);
 688	newseq = be32_to_cpu(new->seqid);
 689	if (empty || pnfs_seqid_is_newer(newseq, oldseq)) {
 690		nfs4_stateid_copy(&lo->plh_stateid, new);
 691		if (update_barrier) {
 692			new_barrier = be32_to_cpu(new->seqid);
 
 
 
 693		} else {
 694			/* Because of wraparound, we want to keep the barrier
 695			 * "close" to the current seqids.
 
 
 
 
 696			 */
 697			new_barrier = newseq - atomic_read(&lo->plh_outstanding);
 
 698		}
 699		if (empty || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
 700			lo->plh_barrier = new_barrier;
 701	}
 702}
 703
 704static bool
 705pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo,
 706		const nfs4_stateid *stateid)
 707{
 708	u32 seqid = be32_to_cpu(stateid->seqid);
 709
 710	return !pnfs_seqid_is_newer(seqid, lo->plh_barrier);
 711}
 712
 713/* lget is set to 1 if called from inside send_layoutget call chain */
 714static bool
 715pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget)
 
 716{
 
 
 
 717	return lo->plh_block_lgets ||
 
 718		test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
 719		(list_empty(&lo->plh_segs) &&
 720		 (atomic_read(&lo->plh_outstanding) > lget));
 721}
 722
 723int
 724pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
 725			      struct nfs4_state *open_state)
 726{
 727	int status = 0;
 728
 729	dprintk("--> %s\n", __func__);
 730	spin_lock(&lo->plh_inode->i_lock);
 731	if (pnfs_layoutgets_blocked(lo, 1)) {
 732		status = -EAGAIN;
 733	} else if (!nfs4_valid_open_stateid(open_state)) {
 734		status = -EBADF;
 735	} else if (list_empty(&lo->plh_segs)) {
 736		int seq;
 737
 738		do {
 739			seq = read_seqbegin(&open_state->seqlock);
 740			nfs4_stateid_copy(dst, &open_state->stateid);
 741		} while (read_seqretry(&open_state->seqlock, seq));
 742	} else
 743		nfs4_stateid_copy(dst, &lo->plh_stateid);
 744	spin_unlock(&lo->plh_inode->i_lock);
 745	dprintk("<-- %s\n", __func__);
 746	return status;
 747}
 748
 749/*
 750* Get layout from server.
 751*    for now, assume that whole file layouts are requested.
 752*    arg->offset: 0
 753*    arg->length: all ones
 754*/
 755static struct pnfs_layout_segment *
 756send_layoutget(struct pnfs_layout_hdr *lo,
 757	   struct nfs_open_context *ctx,
 758	   struct pnfs_layout_range *range,
 759	   gfp_t gfp_flags)
 760{
 761	struct inode *ino = lo->plh_inode;
 762	struct nfs_server *server = NFS_SERVER(ino);
 763	struct nfs4_layoutget *lgp;
 764	struct pnfs_layout_segment *lseg;
 765
 766	dprintk("--> %s\n", __func__);
 767
 
 768	lgp = kzalloc(sizeof(*lgp), gfp_flags);
 769	if (lgp == NULL)
 770		return NULL;
 771
 772	lgp->args.minlength = PAGE_CACHE_SIZE;
 773	if (lgp->args.minlength > range->length)
 774		lgp->args.minlength = range->length;
 775	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
 776	lgp->args.range = *range;
 777	lgp->args.type = server->pnfs_curr_ld->id;
 778	lgp->args.inode = ino;
 779	lgp->args.ctx = get_nfs_open_context(ctx);
 
 780	lgp->gfp_flags = gfp_flags;
 781	lgp->cred = lo->plh_lc_cred;
 782
 783	/* Synchronously retrieve layout information from server and
 784	 * store in lseg.
 785	 */
 786	lseg = nfs4_proc_layoutget(lgp, gfp_flags);
 787	if (IS_ERR(lseg)) {
 788		switch (PTR_ERR(lseg)) {
 789		case -ENOMEM:
 790		case -ERESTARTSYS:
 791			break;
 792		default:
 793			/* remember that LAYOUTGET failed and suspend trying */
 794			pnfs_layout_io_set_failed(lo, range->iomode);
 795		}
 796		return NULL;
 797	}
 798
 799	return lseg;
 800}
 801
 802static void pnfs_clear_layoutcommit(struct inode *inode,
 803		struct list_head *head)
 804{
 805	struct nfs_inode *nfsi = NFS_I(inode);
 806	struct pnfs_layout_segment *lseg, *tmp;
 807
 808	if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
 809		return;
 810	list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
 811		if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
 812			continue;
 813		pnfs_lseg_dec_and_remove_zero(lseg, head);
 814	}
 815}
 816
 817/*
 818 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
 819 * when the layout segment list is empty.
 820 *
 821 * Note that a pnfs_layout_hdr can exist with an empty layout segment
 822 * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the
 823 * deviceid is marked invalid.
 824 */
 825int
 826_pnfs_return_layout(struct inode *ino)
 827{
 828	struct pnfs_layout_hdr *lo = NULL;
 829	struct nfs_inode *nfsi = NFS_I(ino);
 830	LIST_HEAD(tmp_list);
 831	struct nfs4_layoutreturn *lrp;
 832	nfs4_stateid stateid;
 833	int status = 0, empty;
 834
 835	dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
 836
 837	spin_lock(&ino->i_lock);
 838	lo = nfsi->layout;
 839	if (!lo) {
 840		spin_unlock(&ino->i_lock);
 841		dprintk("NFS: %s no layout to return\n", __func__);
 842		goto out;
 843	}
 844	stateid = nfsi->layout->plh_stateid;
 845	/* Reference matched in nfs4_layoutreturn_release */
 846	pnfs_get_layout_hdr(lo);
 847	empty = list_empty(&lo->plh_segs);
 848	pnfs_clear_layoutcommit(ino, &tmp_list);
 849	pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
 850	/* Don't send a LAYOUTRETURN if list was initially empty */
 851	if (empty) {
 852		spin_unlock(&ino->i_lock);
 853		pnfs_put_layout_hdr(lo);
 854		dprintk("NFS: %s no layout segments to return\n", __func__);
 855		goto out;
 856	}
 857	lo->plh_block_lgets++;
 858	spin_unlock(&ino->i_lock);
 859	pnfs_free_lseg_list(&tmp_list);
 860
 
 
 861	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
 862	if (unlikely(lrp == NULL)) {
 863		status = -ENOMEM;
 864		spin_lock(&ino->i_lock);
 865		lo->plh_block_lgets--;
 866		spin_unlock(&ino->i_lock);
 867		pnfs_put_layout_hdr(lo);
 868		goto out;
 869	}
 870
 871	lrp->args.stateid = stateid;
 872	lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
 873	lrp->args.inode = ino;
 874	lrp->args.layout = lo;
 875	lrp->clp = NFS_SERVER(ino)->nfs_client;
 876	lrp->cred = lo->plh_lc_cred;
 877
 878	status = nfs4_proc_layoutreturn(lrp);
 879out:
 880	dprintk("<-- %s status: %d\n", __func__, status);
 881	return status;
 882}
 883EXPORT_SYMBOL_GPL(_pnfs_return_layout);
 884
 885int
 886pnfs_commit_and_return_layout(struct inode *inode)
 887{
 888	struct pnfs_layout_hdr *lo;
 889	int ret;
 890
 891	spin_lock(&inode->i_lock);
 892	lo = NFS_I(inode)->layout;
 893	if (lo == NULL) {
 894		spin_unlock(&inode->i_lock);
 895		return 0;
 896	}
 897	pnfs_get_layout_hdr(lo);
 898	/* Block new layoutgets and read/write to ds */
 899	lo->plh_block_lgets++;
 900	spin_unlock(&inode->i_lock);
 901	filemap_fdatawait(inode->i_mapping);
 902	ret = pnfs_layoutcommit_inode(inode, true);
 903	if (ret == 0)
 904		ret = _pnfs_return_layout(inode);
 905	spin_lock(&inode->i_lock);
 906	lo->plh_block_lgets--;
 907	spin_unlock(&inode->i_lock);
 908	pnfs_put_layout_hdr(lo);
 909	return ret;
 910}
 911
 912bool pnfs_roc(struct inode *ino)
 913{
 914	struct pnfs_layout_hdr *lo;
 915	struct pnfs_layout_segment *lseg, *tmp;
 916	LIST_HEAD(tmp_list);
 917	bool found = false;
 918
 919	spin_lock(&ino->i_lock);
 920	lo = NFS_I(ino)->layout;
 921	if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
 922	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
 923		goto out_nolayout;
 924	list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
 925		if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
 926			mark_lseg_invalid(lseg, &tmp_list);
 927			found = true;
 928		}
 929	if (!found)
 930		goto out_nolayout;
 931	lo->plh_block_lgets++;
 932	pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */
 933	spin_unlock(&ino->i_lock);
 934	pnfs_free_lseg_list(&tmp_list);
 935	return true;
 936
 937out_nolayout:
 938	spin_unlock(&ino->i_lock);
 939	return false;
 940}
 941
 942void pnfs_roc_release(struct inode *ino)
 943{
 944	struct pnfs_layout_hdr *lo;
 945
 946	spin_lock(&ino->i_lock);
 947	lo = NFS_I(ino)->layout;
 948	lo->plh_block_lgets--;
 949	if (atomic_dec_and_test(&lo->plh_refcount)) {
 950		pnfs_detach_layout_hdr(lo);
 951		spin_unlock(&ino->i_lock);
 952		pnfs_free_layout_hdr(lo);
 953	} else
 954		spin_unlock(&ino->i_lock);
 955}
 956
 957void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
 958{
 959	struct pnfs_layout_hdr *lo;
 960
 961	spin_lock(&ino->i_lock);
 962	lo = NFS_I(ino)->layout;
 963	if (pnfs_seqid_is_newer(barrier, lo->plh_barrier))
 964		lo->plh_barrier = barrier;
 965	spin_unlock(&ino->i_lock);
 966}
 967
 968bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
 969{
 970	struct nfs_inode *nfsi = NFS_I(ino);
 971	struct pnfs_layout_hdr *lo;
 972	struct pnfs_layout_segment *lseg;
 973	u32 current_seqid;
 974	bool found = false;
 975
 976	spin_lock(&ino->i_lock);
 977	list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
 978		if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
 979			rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
 980			found = true;
 981			goto out;
 982		}
 983	lo = nfsi->layout;
 984	current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
 
 985
 986	/* Since close does not return a layout stateid for use as
 987	 * a barrier, we choose the worst-case barrier.
 988	 */
 989	*barrier = current_seqid + atomic_read(&lo->plh_outstanding);
 990out:
 991	spin_unlock(&ino->i_lock);
 992	return found;
 993}
 994
 995/*
 996 * Compare two layout segments for sorting into layout cache.
 997 * We want to preferentially return RW over RO layouts, so ensure those
 998 * are seen first.
 999 */
1000static s64
1001pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1,
1002	   const struct pnfs_layout_range *l2)
1003{
1004	s64 d;
1005
1006	/* high offset > low offset */
1007	d = l1->offset - l2->offset;
1008	if (d)
1009		return d;
1010
1011	/* short length > long length */
1012	d = l2->length - l1->length;
1013	if (d)
1014		return d;
1015
1016	/* read > read/write */
1017	return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ);
1018}
1019
1020static void
1021pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo,
1022		   struct pnfs_layout_segment *lseg)
1023{
1024	struct pnfs_layout_segment *lp;
1025
1026	dprintk("%s:Begin\n", __func__);
1027
 
1028	list_for_each_entry(lp, &lo->plh_segs, pls_list) {
1029		if (pnfs_lseg_range_cmp(&lseg->pls_range, &lp->pls_range) > 0)
1030			continue;
1031		list_add_tail(&lseg->pls_list, &lp->pls_list);
1032		dprintk("%s: inserted lseg %p "
1033			"iomode %d offset %llu length %llu before "
1034			"lp %p iomode %d offset %llu length %llu\n",
1035			__func__, lseg, lseg->pls_range.iomode,
1036			lseg->pls_range.offset, lseg->pls_range.length,
1037			lp, lp->pls_range.iomode, lp->pls_range.offset,
1038			lp->pls_range.length);
1039		goto out;
1040	}
1041	list_add_tail(&lseg->pls_list, &lo->plh_segs);
1042	dprintk("%s: inserted lseg %p "
1043		"iomode %d offset %llu length %llu at tail\n",
1044		__func__, lseg, lseg->pls_range.iomode,
1045		lseg->pls_range.offset, lseg->pls_range.length);
1046out:
1047	pnfs_get_layout_hdr(lo);
1048
1049	dprintk("%s:Return\n", __func__);
1050}
1051
1052static struct pnfs_layout_hdr *
1053alloc_init_layout_hdr(struct inode *ino,
1054		      struct nfs_open_context *ctx,
1055		      gfp_t gfp_flags)
1056{
1057	struct pnfs_layout_hdr *lo;
1058
1059	lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
1060	if (!lo)
1061		return NULL;
1062	atomic_set(&lo->plh_refcount, 1);
1063	INIT_LIST_HEAD(&lo->plh_layouts);
1064	INIT_LIST_HEAD(&lo->plh_segs);
1065	INIT_LIST_HEAD(&lo->plh_bulk_destroy);
1066	lo->plh_inode = ino;
1067	lo->plh_lc_cred = get_rpccred(ctx->cred);
1068	return lo;
1069}
1070
1071static struct pnfs_layout_hdr *
1072pnfs_find_alloc_layout(struct inode *ino,
1073		       struct nfs_open_context *ctx,
1074		       gfp_t gfp_flags)
1075{
1076	struct nfs_inode *nfsi = NFS_I(ino);
1077	struct pnfs_layout_hdr *new = NULL;
1078
1079	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
1080
1081	if (nfsi->layout != NULL)
1082		goto out_existing;
 
 
 
 
 
1083	spin_unlock(&ino->i_lock);
1084	new = alloc_init_layout_hdr(ino, ctx, gfp_flags);
1085	spin_lock(&ino->i_lock);
1086
1087	if (likely(nfsi->layout == NULL)) {	/* Won the race? */
1088		nfsi->layout = new;
1089		return new;
1090	} else if (new != NULL)
1091		pnfs_free_layout_hdr(new);
1092out_existing:
1093	pnfs_get_layout_hdr(nfsi->layout);
1094	return nfsi->layout;
1095}
1096
1097/*
1098 * iomode matching rules:
1099 * iomode	lseg	match
1100 * -----	-----	-----
1101 * ANY		READ	true
1102 * ANY		RW	true
1103 * RW		READ	false
1104 * RW		RW	true
1105 * READ		READ	true
1106 * READ		RW	true
1107 */
1108static bool
1109pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range,
1110		 const struct pnfs_layout_range *range)
1111{
1112	struct pnfs_layout_range range1;
1113
1114	if ((range->iomode == IOMODE_RW &&
1115	     ls_range->iomode != IOMODE_RW) ||
1116	    !pnfs_lseg_range_intersecting(ls_range, range))
1117		return 0;
1118
1119	/* range1 covers only the first byte in the range */
1120	range1 = *range;
1121	range1.length = 1;
1122	return pnfs_lseg_range_contained(ls_range, &range1);
1123}
1124
1125/*
1126 * lookup range in layout
1127 */
1128static struct pnfs_layout_segment *
1129pnfs_find_lseg(struct pnfs_layout_hdr *lo,
1130		struct pnfs_layout_range *range)
1131{
1132	struct pnfs_layout_segment *lseg, *ret = NULL;
1133
1134	dprintk("%s:Begin\n", __func__);
1135
 
1136	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
1137		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
1138		    pnfs_lseg_range_match(&lseg->pls_range, range)) {
1139			ret = pnfs_get_lseg(lseg);
1140			break;
1141		}
1142		if (lseg->pls_range.offset > range->offset)
1143			break;
1144	}
1145
1146	dprintk("%s:Return lseg %p ref %d\n",
1147		__func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
1148	return ret;
1149}
1150
1151/*
1152 * Use mdsthreshold hints set at each OPEN to determine if I/O should go
1153 * to the MDS or over pNFS
1154 *
1155 * The nfs_inode read_io and write_io fields are cumulative counters reset
1156 * when there are no layout segments. Note that in pnfs_update_layout iomode
1157 * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
1158 * WRITE request.
1159 *
1160 * A return of true means use MDS I/O.
1161 *
1162 * From rfc 5661:
1163 * If a file's size is smaller than the file size threshold, data accesses
1164 * SHOULD be sent to the metadata server.  If an I/O request has a length that
1165 * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
1166 * server.  If both file size and I/O size are provided, the client SHOULD
1167 * reach or exceed  both thresholds before sending its read or write
1168 * requests to the data server.
1169 */
1170static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
1171				     struct inode *ino, int iomode)
1172{
1173	struct nfs4_threshold *t = ctx->mdsthreshold;
1174	struct nfs_inode *nfsi = NFS_I(ino);
1175	loff_t fsize = i_size_read(ino);
1176	bool size = false, size_set = false, io = false, io_set = false, ret = false;
1177
1178	if (t == NULL)
1179		return ret;
1180
1181	dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
1182		__func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
1183
1184	switch (iomode) {
1185	case IOMODE_READ:
1186		if (t->bm & THRESHOLD_RD) {
1187			dprintk("%s fsize %llu\n", __func__, fsize);
1188			size_set = true;
1189			if (fsize < t->rd_sz)
1190				size = true;
1191		}
1192		if (t->bm & THRESHOLD_RD_IO) {
1193			dprintk("%s nfsi->read_io %llu\n", __func__,
1194				nfsi->read_io);
1195			io_set = true;
1196			if (nfsi->read_io < t->rd_io_sz)
1197				io = true;
1198		}
1199		break;
1200	case IOMODE_RW:
1201		if (t->bm & THRESHOLD_WR) {
1202			dprintk("%s fsize %llu\n", __func__, fsize);
1203			size_set = true;
1204			if (fsize < t->wr_sz)
1205				size = true;
1206		}
1207		if (t->bm & THRESHOLD_WR_IO) {
1208			dprintk("%s nfsi->write_io %llu\n", __func__,
1209				nfsi->write_io);
1210			io_set = true;
1211			if (nfsi->write_io < t->wr_io_sz)
1212				io = true;
1213		}
1214		break;
1215	}
1216	if (size_set && io_set) {
1217		if (size && io)
1218			ret = true;
1219	} else if (size || io)
1220		ret = true;
1221
1222	dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
1223	return ret;
1224}
1225
1226/*
1227 * Layout segment is retreived from the server if not cached.
1228 * The appropriate layout segment is referenced and returned to the caller.
1229 */
1230struct pnfs_layout_segment *
1231pnfs_update_layout(struct inode *ino,
1232		   struct nfs_open_context *ctx,
1233		   loff_t pos,
1234		   u64 count,
1235		   enum pnfs_iomode iomode,
1236		   gfp_t gfp_flags)
1237{
1238	struct pnfs_layout_range arg = {
1239		.iomode = iomode,
1240		.offset = pos,
1241		.length = count,
1242	};
1243	unsigned pg_offset;
 
1244	struct nfs_server *server = NFS_SERVER(ino);
1245	struct nfs_client *clp = server->nfs_client;
1246	struct pnfs_layout_hdr *lo;
1247	struct pnfs_layout_segment *lseg = NULL;
1248	bool first;
1249
1250	if (!pnfs_enabled_sb(NFS_SERVER(ino)))
1251		goto out;
1252
1253	if (pnfs_within_mdsthreshold(ctx, ino, iomode))
1254		goto out;
1255
1256	spin_lock(&ino->i_lock);
1257	lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
1258	if (lo == NULL) {
1259		spin_unlock(&ino->i_lock);
1260		goto out;
1261	}
1262
1263	/* Do we even need to bother with this? */
1264	if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
1265		dprintk("%s matches recall, use MDS\n", __func__);
1266		goto out_unlock;
1267	}
1268
1269	/* if LAYOUTGET already failed once we don't try again */
1270	if (pnfs_layout_io_test_failed(lo, iomode))
1271		goto out_unlock;
1272
1273	/* Check to see if the layout for the given range already exists */
1274	lseg = pnfs_find_lseg(lo, &arg);
1275	if (lseg)
1276		goto out_unlock;
1277
1278	if (pnfs_layoutgets_blocked(lo, 0))
1279		goto out_unlock;
1280	atomic_inc(&lo->plh_outstanding);
1281
1282	first = list_empty(&lo->plh_layouts) ? true : false;
 
 
1283	spin_unlock(&ino->i_lock);
1284
1285	if (first) {
1286		/* The lo must be on the clp list if there is any
1287		 * chance of a CB_LAYOUTRECALL(FILE) coming in.
1288		 */
1289		spin_lock(&clp->cl_lock);
 
1290		list_add_tail(&lo->plh_layouts, &server->layouts);
1291		spin_unlock(&clp->cl_lock);
1292	}
1293
1294	pg_offset = arg.offset & ~PAGE_CACHE_MASK;
1295	if (pg_offset) {
1296		arg.offset -= pg_offset;
1297		arg.length += pg_offset;
1298	}
1299	if (arg.length != NFS4_MAX_UINT64)
1300		arg.length = PAGE_CACHE_ALIGN(arg.length);
1301
1302	lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
 
 
 
 
 
1303	atomic_dec(&lo->plh_outstanding);
1304out_put_layout_hdr:
1305	pnfs_put_layout_hdr(lo);
1306out:
1307	dprintk("%s: inode %s/%llu pNFS layout segment %s for "
1308			"(%s, offset: %llu, length: %llu)\n",
1309			__func__, ino->i_sb->s_id,
1310			(unsigned long long)NFS_FILEID(ino),
1311			lseg == NULL ? "not found" : "found",
1312			iomode==IOMODE_RW ?  "read/write" : "read-only",
1313			(unsigned long long)pos,
1314			(unsigned long long)count);
1315	return lseg;
1316out_unlock:
1317	spin_unlock(&ino->i_lock);
1318	goto out_put_layout_hdr;
1319}
1320EXPORT_SYMBOL_GPL(pnfs_update_layout);
1321
1322struct pnfs_layout_segment *
1323pnfs_layout_process(struct nfs4_layoutget *lgp)
1324{
1325	struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
1326	struct nfs4_layoutget_res *res = &lgp->res;
1327	struct pnfs_layout_segment *lseg;
1328	struct inode *ino = lo->plh_inode;
1329	LIST_HEAD(free_me);
1330	int status = 0;
1331
1332	/* Inject layout blob into I/O device driver */
1333	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
1334	if (!lseg || IS_ERR(lseg)) {
1335		if (!lseg)
1336			status = -ENOMEM;
1337		else
1338			status = PTR_ERR(lseg);
1339		dprintk("%s: Could not allocate layout: error %d\n",
1340		       __func__, status);
1341		goto out;
1342	}
1343
1344	spin_lock(&ino->i_lock);
1345	if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
1346		dprintk("%s forget reply due to recall\n", __func__);
1347		goto out_forget_reply;
1348	}
1349
1350	if (pnfs_layoutgets_blocked(lo, 1) ||
1351	    pnfs_layout_stateid_blocked(lo, &res->stateid)) {
1352		dprintk("%s forget reply due to state\n", __func__);
1353		goto out_forget_reply;
1354	}
1355
1356	/* Check that the new stateid matches the old stateid */
1357	pnfs_verify_layout_stateid(lo, &res->stateid, &free_me);
1358	/* Done processing layoutget. Set the layout stateid */
1359	pnfs_set_layout_stateid(lo, &res->stateid, false);
1360
1361	init_lseg(lo, lseg);
1362	lseg->pls_range = res->range;
1363	pnfs_get_lseg(lseg);
1364	pnfs_layout_insert_lseg(lo, lseg);
1365
1366	if (res->return_on_close) {
1367		set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
1368		set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
1369	}
1370
 
 
1371	spin_unlock(&ino->i_lock);
1372	pnfs_free_lseg_list(&free_me);
1373	return lseg;
1374out:
1375	return ERR_PTR(status);
1376
1377out_forget_reply:
1378	spin_unlock(&ino->i_lock);
1379	lseg->pls_layout = lo;
1380	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
1381	goto out;
1382}
1383
1384void
1385pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
1386{
1387	u64 rd_size = req->wb_bytes;
1388
1389	WARN_ON_ONCE(pgio->pg_lseg != NULL);
1390
1391	if (req->wb_offset != req->wb_pgbase) {
1392		nfs_pageio_reset_read_mds(pgio);
1393		return;
1394	}
1395
1396	if (pgio->pg_dreq == NULL)
1397		rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
1398	else
1399		rd_size = nfs_dreq_bytes_left(pgio->pg_dreq);
1400
1401	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1402					   req->wb_context,
1403					   req_offset(req),
1404					   rd_size,
1405					   IOMODE_READ,
1406					   GFP_KERNEL);
1407	/* If no lseg, fall back to read through mds */
1408	if (pgio->pg_lseg == NULL)
1409		nfs_pageio_reset_read_mds(pgio);
1410
1411}
1412EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
1413
1414void
1415pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
1416			   struct nfs_page *req, u64 wb_size)
1417{
1418	WARN_ON_ONCE(pgio->pg_lseg != NULL);
1419
1420	if (req->wb_offset != req->wb_pgbase) {
1421		nfs_pageio_reset_write_mds(pgio);
1422		return;
1423	}
1424
1425	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1426					   req->wb_context,
1427					   req_offset(req),
1428					   wb_size,
1429					   IOMODE_RW,
1430					   GFP_NOFS);
1431	/* If no lseg, fall back to write through mds */
1432	if (pgio->pg_lseg == NULL)
1433		nfs_pageio_reset_write_mds(pgio);
1434}
1435EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
1436
1437void
1438pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1439		      const struct nfs_pgio_completion_ops *compl_ops)
1440{
1441	struct nfs_server *server = NFS_SERVER(inode);
1442	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1443
1444	if (ld == NULL)
1445		nfs_pageio_init_read(pgio, inode, compl_ops);
1446	else
1447		nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0);
 
1448}
1449
1450void
1451pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1452		       int ioflags,
1453		       const struct nfs_pgio_completion_ops *compl_ops)
1454{
1455	struct nfs_server *server = NFS_SERVER(inode);
1456	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1457
1458	if (ld == NULL)
1459		nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
1460	else
1461		nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags);
 
1462}
1463
1464bool
1465pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1466		     struct nfs_page *req)
1467{
1468	if (pgio->pg_lseg == NULL)
1469		return nfs_generic_pg_test(pgio, prev, req);
1470
1471	/*
1472	 * Test if a nfs_page is fully contained in the pnfs_layout_range.
1473	 * Note that this test makes several assumptions:
1474	 * - that the previous nfs_page in the struct nfs_pageio_descriptor
1475	 *   is known to lie within the range.
1476	 *   - that the nfs_page being tested is known to be contiguous with the
1477	 *   previous nfs_page.
1478	 *   - Layout ranges are page aligned, so we only have to test the
1479	 *   start offset of the request.
1480	 *
1481	 * Please also note that 'end_offset' is actually the offset of the
1482	 * first byte that lies outside the pnfs_layout_range. FIXME?
1483	 *
1484	 */
1485	return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
1486					 pgio->pg_lseg->pls_range.length);
1487}
1488EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1489
1490int pnfs_write_done_resend_to_mds(struct inode *inode,
1491				struct list_head *head,
1492				const struct nfs_pgio_completion_ops *compl_ops,
1493				struct nfs_direct_req *dreq)
1494{
1495	struct nfs_pageio_descriptor pgio;
1496	LIST_HEAD(failed);
1497
1498	/* Resend all requests through the MDS */
1499	nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops);
1500	pgio.pg_dreq = dreq;
1501	while (!list_empty(head)) {
1502		struct nfs_page *req = nfs_list_entry(head->next);
1503
1504		nfs_list_remove_request(req);
1505		if (!nfs_pageio_add_request(&pgio, req))
1506			nfs_list_add_request(req, &failed);
1507	}
1508	nfs_pageio_complete(&pgio);
1509
1510	if (!list_empty(&failed)) {
1511		/* For some reason our attempt to resend pages. Mark the
1512		 * overall send request as having failed, and let
1513		 * nfs_writeback_release_full deal with the error.
1514		 */
1515		list_move(&failed, head);
1516		return -EIO;
1517	}
1518	return 0;
1519}
1520EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
1521
1522static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
1523{
1524	struct nfs_pgio_header *hdr = data->header;
1525
1526	dprintk("pnfs write error = %d\n", hdr->pnfs_error);
1527	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1528	    PNFS_LAYOUTRET_ON_ERROR) {
 
1529		pnfs_return_layout(hdr->inode);
1530	}
1531	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1532		data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
1533							&hdr->pages,
1534							hdr->completion_ops,
1535							hdr->dreq);
1536}
1537
1538/*
1539 * Called by non rpc-based layout drivers
1540 */
1541void pnfs_ld_write_done(struct nfs_write_data *data)
1542{
1543	struct nfs_pgio_header *hdr = data->header;
1544
1545	trace_nfs4_pnfs_write(data, hdr->pnfs_error);
1546	if (!hdr->pnfs_error) {
1547		pnfs_set_layoutcommit(data);
1548		hdr->mds_ops->rpc_call_done(&data->task, data);
1549	} else
1550		pnfs_ld_handle_write_error(data);
1551	hdr->mds_ops->rpc_release(data);
1552}
1553EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1554
1555static void
1556pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1557		struct nfs_write_data *data)
1558{
1559	struct nfs_pgio_header *hdr = data->header;
1560
1561	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1562		list_splice_tail_init(&hdr->pages, &desc->pg_list);
1563		nfs_pageio_reset_write_mds(desc);
1564		desc->pg_recoalesce = 1;
1565	}
1566	nfs_writedata_release(data);
1567}
1568
1569static enum pnfs_try_status
1570pnfs_try_to_write_data(struct nfs_write_data *wdata,
1571			const struct rpc_call_ops *call_ops,
1572			struct pnfs_layout_segment *lseg,
1573			int how)
1574{
1575	struct nfs_pgio_header *hdr = wdata->header;
1576	struct inode *inode = hdr->inode;
1577	enum pnfs_try_status trypnfs;
1578	struct nfs_server *nfss = NFS_SERVER(inode);
1579
1580	hdr->mds_ops = call_ops;
1581
1582	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
1583		inode->i_ino, wdata->args.count, wdata->args.offset, how);
1584	trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
1585	if (trypnfs != PNFS_NOT_ATTEMPTED)
1586		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
1587	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1588	return trypnfs;
1589}
1590
1591static void
1592pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
1593{
1594	struct nfs_write_data *data;
1595	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1596	struct pnfs_layout_segment *lseg = desc->pg_lseg;
1597
1598	desc->pg_lseg = NULL;
1599	while (!list_empty(head)) {
1600		enum pnfs_try_status trypnfs;
1601
1602		data = list_first_entry(head, struct nfs_write_data, list);
1603		list_del_init(&data->list);
1604
1605		trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
1606		if (trypnfs == PNFS_NOT_ATTEMPTED)
1607			pnfs_write_through_mds(desc, data);
1608	}
1609	pnfs_put_lseg(lseg);
1610}
1611
1612static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1613{
1614	pnfs_put_lseg(hdr->lseg);
1615	nfs_writehdr_free(hdr);
1616}
1617EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
1618
1619int
1620pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1621{
1622	struct nfs_write_header *whdr;
1623	struct nfs_pgio_header *hdr;
1624	int ret;
1625
1626	whdr = nfs_writehdr_alloc();
1627	if (!whdr) {
1628		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1629		pnfs_put_lseg(desc->pg_lseg);
1630		desc->pg_lseg = NULL;
1631		return -ENOMEM;
1632	}
1633	hdr = &whdr->header;
1634	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
1635	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1636	atomic_inc(&hdr->refcnt);
1637	ret = nfs_generic_flush(desc, hdr);
1638	if (ret != 0) {
1639		pnfs_put_lseg(desc->pg_lseg);
1640		desc->pg_lseg = NULL;
1641	} else
1642		pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
1643	if (atomic_dec_and_test(&hdr->refcnt))
1644		hdr->completion_ops->completion(hdr);
1645	return ret;
1646}
1647EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1648
1649int pnfs_read_done_resend_to_mds(struct inode *inode,
1650				struct list_head *head,
1651				const struct nfs_pgio_completion_ops *compl_ops,
1652				struct nfs_direct_req *dreq)
1653{
1654	struct nfs_pageio_descriptor pgio;
1655	LIST_HEAD(failed);
1656
1657	/* Resend all requests through the MDS */
1658	nfs_pageio_init_read(&pgio, inode, compl_ops);
1659	pgio.pg_dreq = dreq;
1660	while (!list_empty(head)) {
1661		struct nfs_page *req = nfs_list_entry(head->next);
1662
1663		nfs_list_remove_request(req);
1664		if (!nfs_pageio_add_request(&pgio, req))
1665			nfs_list_add_request(req, &failed);
1666	}
1667	nfs_pageio_complete(&pgio);
1668
1669	if (!list_empty(&failed)) {
1670		list_move(&failed, head);
1671		return -EIO;
1672	}
1673	return 0;
1674}
1675EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
1676
1677static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1678{
1679	struct nfs_pgio_header *hdr = data->header;
1680
1681	dprintk("pnfs read error = %d\n", hdr->pnfs_error);
1682	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1683	    PNFS_LAYOUTRET_ON_ERROR) {
 
1684		pnfs_return_layout(hdr->inode);
1685	}
1686	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1687		data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
1688							&hdr->pages,
1689							hdr->completion_ops,
1690							hdr->dreq);
1691}
1692
1693/*
1694 * Called by non rpc-based layout drivers
1695 */
1696void pnfs_ld_read_done(struct nfs_read_data *data)
1697{
1698	struct nfs_pgio_header *hdr = data->header;
1699
1700	trace_nfs4_pnfs_read(data, hdr->pnfs_error);
1701	if (likely(!hdr->pnfs_error)) {
1702		__nfs4_read_done_cb(data);
1703		hdr->mds_ops->rpc_call_done(&data->task, data);
1704	} else
1705		pnfs_ld_handle_read_error(data);
1706	hdr->mds_ops->rpc_release(data);
1707}
1708EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1709
1710static void
1711pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1712		struct nfs_read_data *data)
1713{
1714	struct nfs_pgio_header *hdr = data->header;
1715
1716	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1717		list_splice_tail_init(&hdr->pages, &desc->pg_list);
1718		nfs_pageio_reset_read_mds(desc);
1719		desc->pg_recoalesce = 1;
1720	}
1721	nfs_readdata_release(data);
1722}
1723
1724/*
1725 * Call the appropriate parallel I/O subsystem read function.
1726 */
1727static enum pnfs_try_status
1728pnfs_try_to_read_data(struct nfs_read_data *rdata,
1729		       const struct rpc_call_ops *call_ops,
1730		       struct pnfs_layout_segment *lseg)
1731{
1732	struct nfs_pgio_header *hdr = rdata->header;
1733	struct inode *inode = hdr->inode;
1734	struct nfs_server *nfss = NFS_SERVER(inode);
1735	enum pnfs_try_status trypnfs;
1736
1737	hdr->mds_ops = call_ops;
1738
1739	dprintk("%s: Reading ino:%lu %u@%llu\n",
1740		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
1741
1742	trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
1743	if (trypnfs != PNFS_NOT_ATTEMPTED)
1744		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
1745	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1746	return trypnfs;
1747}
1748
1749static void
1750pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
1751{
1752	struct nfs_read_data *data;
1753	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1754	struct pnfs_layout_segment *lseg = desc->pg_lseg;
1755
1756	desc->pg_lseg = NULL;
1757	while (!list_empty(head)) {
1758		enum pnfs_try_status trypnfs;
1759
1760		data = list_first_entry(head, struct nfs_read_data, list);
1761		list_del_init(&data->list);
1762
1763		trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
1764		if (trypnfs == PNFS_NOT_ATTEMPTED)
1765			pnfs_read_through_mds(desc, data);
1766	}
1767	pnfs_put_lseg(lseg);
1768}
1769
1770static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1771{
1772	pnfs_put_lseg(hdr->lseg);
1773	nfs_readhdr_free(hdr);
1774}
1775EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
1776
1777int
1778pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1779{
1780	struct nfs_read_header *rhdr;
1781	struct nfs_pgio_header *hdr;
1782	int ret;
1783
1784	rhdr = nfs_readhdr_alloc();
1785	if (!rhdr) {
1786		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1787		ret = -ENOMEM;
1788		pnfs_put_lseg(desc->pg_lseg);
1789		desc->pg_lseg = NULL;
1790		return ret;
1791	}
1792	hdr = &rhdr->header;
1793	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1794	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1795	atomic_inc(&hdr->refcnt);
1796	ret = nfs_generic_pagein(desc, hdr);
1797	if (ret != 0) {
1798		pnfs_put_lseg(desc->pg_lseg);
1799		desc->pg_lseg = NULL;
1800	} else
1801		pnfs_do_multiple_reads(desc, &hdr->rpc_list);
1802	if (atomic_dec_and_test(&hdr->refcnt))
1803		hdr->completion_ops->completion(hdr);
1804	return ret;
1805}
1806EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
1807
1808static void pnfs_clear_layoutcommitting(struct inode *inode)
1809{
1810	unsigned long *bitlock = &NFS_I(inode)->flags;
1811
1812	clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
1813	smp_mb__after_clear_bit();
1814	wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
1815}
1816
1817/*
1818 * There can be multiple RW segments.
1819 */
1820static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
1821{
1822	struct pnfs_layout_segment *lseg;
1823
1824	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
1825		if (lseg->pls_range.iomode == IOMODE_RW &&
1826		    test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
1827			list_add(&lseg->pls_lc_list, listp);
1828	}
1829}
1830
1831static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
1832{
1833	struct pnfs_layout_segment *lseg, *tmp;
1834
1835	/* Matched by references in pnfs_set_layoutcommit */
1836	list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
1837		list_del_init(&lseg->pls_lc_list);
1838		pnfs_put_lseg(lseg);
1839	}
1840
1841	pnfs_clear_layoutcommitting(inode);
1842}
1843
1844void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
1845{
1846	pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
 
 
 
 
 
 
1847}
1848EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1849
1850void
1851pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1852{
1853	struct nfs_pgio_header *hdr = wdata->header;
1854	struct inode *inode = hdr->inode;
1855	struct nfs_inode *nfsi = NFS_I(inode);
1856	loff_t end_pos = wdata->mds_offset + wdata->res.count;
1857	bool mark_as_dirty = false;
1858
1859	spin_lock(&inode->i_lock);
1860	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1861		mark_as_dirty = true;
1862		dprintk("%s: Set layoutcommit for inode %lu ",
1863			__func__, inode->i_ino);
1864	}
1865	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
1866		/* references matched in nfs4_layoutcommit_release */
1867		pnfs_get_lseg(hdr->lseg);
1868	}
1869	if (end_pos > nfsi->layout->plh_lwb)
1870		nfsi->layout->plh_lwb = end_pos;
1871	spin_unlock(&inode->i_lock);
1872	dprintk("%s: lseg %p end_pos %llu\n",
1873		__func__, hdr->lseg, nfsi->layout->plh_lwb);
1874
1875	/* if pnfs_layoutcommit_inode() runs between inode locks, the next one
1876	 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
1877	if (mark_as_dirty)
1878		mark_inode_dirty_sync(inode);
1879}
1880EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
1881
1882void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
1883{
1884	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
1885
1886	if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
1887		nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
1888	pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
1889}
1890
1891/*
1892 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
1893 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
1894 * data to disk to allow the server to recover the data if it crashes.
1895 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
1896 * is off, and a COMMIT is sent to a data server, or
1897 * if WRITEs to a data server return NFS_DATA_SYNC.
1898 */
1899int
1900pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1901{
1902	struct nfs4_layoutcommit_data *data;
1903	struct nfs_inode *nfsi = NFS_I(inode);
1904	loff_t end_pos;
1905	int status;
1906
1907	if (!pnfs_layoutcommit_outstanding(inode))
 
 
1908		return 0;
1909
1910	dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
 
 
 
 
 
 
 
 
1911
1912	status = -EAGAIN;
1913	if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
1914		if (!sync)
1915			goto out;
1916		status = wait_on_bit_lock(&nfsi->flags,
1917				NFS_INO_LAYOUTCOMMITTING,
1918				nfs_wait_bit_killable,
1919				TASK_KILLABLE);
1920		if (status)
1921			goto out;
1922	}
1923
1924	status = -ENOMEM;
1925	/* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
1926	data = kzalloc(sizeof(*data), GFP_NOFS);
1927	if (!data)
1928		goto clear_layoutcommitting;
1929
1930	status = 0;
1931	spin_lock(&inode->i_lock);
1932	if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
1933		goto out_unlock;
 
 
 
 
1934
1935	INIT_LIST_HEAD(&data->lseg_list);
1936	pnfs_list_write_lseg(inode, &data->lseg_list);
1937
1938	end_pos = nfsi->layout->plh_lwb;
1939	nfsi->layout->plh_lwb = 0;
1940
1941	nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid);
1942	spin_unlock(&inode->i_lock);
1943
1944	data->args.inode = inode;
1945	data->cred = get_rpccred(nfsi->layout->plh_lc_cred);
1946	nfs_fattr_init(&data->fattr);
1947	data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
1948	data->res.fattr = &data->fattr;
1949	data->args.lastbytewritten = end_pos - 1;
1950	data->res.server = NFS_SERVER(inode);
1951
1952	status = nfs4_proc_layoutcommit(data, sync);
1953out:
1954	if (status)
1955		mark_inode_dirty_sync(inode);
1956	dprintk("<-- %s status %d\n", __func__, status);
1957	return status;
1958out_unlock:
1959	spin_unlock(&inode->i_lock);
1960	kfree(data);
1961clear_layoutcommitting:
1962	pnfs_clear_layoutcommitting(inode);
1963	goto out;
1964}
1965
1966struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
1967{
1968	struct nfs4_threshold *thp;
1969
1970	thp = kzalloc(sizeof(*thp), GFP_NOFS);
1971	if (!thp) {
1972		dprintk("%s mdsthreshold allocation failed\n", __func__);
1973		return NULL;
1974	}
1975	return thp;
1976}
v3.5.6
   1/*
   2 *  pNFS functions to call and manage layout drivers.
   3 *
   4 *  Copyright (c) 2002 [year of first publication]
   5 *  The Regents of the University of Michigan
   6 *  All Rights Reserved
   7 *
   8 *  Dean Hildebrand <dhildebz@umich.edu>
   9 *
  10 *  Permission is granted to use, copy, create derivative works, and
  11 *  redistribute this software and such derivative works for any purpose,
  12 *  so long as the name of the University of Michigan is not used in
  13 *  any advertising or publicity pertaining to the use or distribution
  14 *  of this software without specific, written prior authorization. If
  15 *  the above copyright notice or any other identification of the
  16 *  University of Michigan is included in any copy of any portion of
  17 *  this software, then the disclaimer below must also be included.
  18 *
  19 *  This software is provided as is, without representation or warranty
  20 *  of any kind either express or implied, including without limitation
  21 *  the implied warranties of merchantability, fitness for a particular
  22 *  purpose, or noninfringement.  The Regents of the University of
  23 *  Michigan shall not be liable for any damages, including special,
  24 *  indirect, incidental, or consequential damages, with respect to any
  25 *  claim arising out of or in connection with the use of the software,
  26 *  even if it has been or is hereafter advised of the possibility of
  27 *  such damages.
  28 */
  29
  30#include <linux/nfs_fs.h>
  31#include <linux/nfs_page.h>
  32#include <linux/module.h>
  33#include "internal.h"
  34#include "pnfs.h"
  35#include "iostat.h"
 
  36
  37#define NFSDBG_FACILITY		NFSDBG_PNFS
 
  38
  39/* Locking:
  40 *
  41 * pnfs_spinlock:
  42 *      protects pnfs_modules_tbl.
  43 */
  44static DEFINE_SPINLOCK(pnfs_spinlock);
  45
  46/*
  47 * pnfs_modules_tbl holds all pnfs modules
  48 */
  49static LIST_HEAD(pnfs_modules_tbl);
  50
  51/* Return the registered pnfs layout driver module matching given id */
  52static struct pnfs_layoutdriver_type *
  53find_pnfs_driver_locked(u32 id)
  54{
  55	struct pnfs_layoutdriver_type *local;
  56
  57	list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
  58		if (local->id == id)
  59			goto out;
  60	local = NULL;
  61out:
  62	dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
  63	return local;
  64}
  65
  66static struct pnfs_layoutdriver_type *
  67find_pnfs_driver(u32 id)
  68{
  69	struct pnfs_layoutdriver_type *local;
  70
  71	spin_lock(&pnfs_spinlock);
  72	local = find_pnfs_driver_locked(id);
  73	if (local != NULL && !try_module_get(local->owner)) {
  74		dprintk("%s: Could not grab reference on module\n", __func__);
  75		local = NULL;
  76	}
  77	spin_unlock(&pnfs_spinlock);
  78	return local;
  79}
  80
  81void
  82unset_pnfs_layoutdriver(struct nfs_server *nfss)
  83{
  84	if (nfss->pnfs_curr_ld) {
  85		if (nfss->pnfs_curr_ld->clear_layoutdriver)
  86			nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
  87		/* Decrement the MDS count. Purge the deviceid cache if zero */
  88		if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count))
  89			nfs4_deviceid_purge_client(nfss->nfs_client);
  90		module_put(nfss->pnfs_curr_ld->owner);
  91	}
  92	nfss->pnfs_curr_ld = NULL;
  93}
  94
  95/*
  96 * Try to set the server's pnfs module to the pnfs layout type specified by id.
  97 * Currently only one pNFS layout driver per filesystem is supported.
  98 *
  99 * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
 100 */
 101void
 102set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
 103		      u32 id)
 104{
 105	struct pnfs_layoutdriver_type *ld_type = NULL;
 106
 107	if (id == 0)
 108		goto out_no_driver;
 109	if (!(server->nfs_client->cl_exchange_flags &
 110		 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
 111		printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
 112			__func__, id, server->nfs_client->cl_exchange_flags);
 113		goto out_no_driver;
 114	}
 115	ld_type = find_pnfs_driver(id);
 116	if (!ld_type) {
 117		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
 118		ld_type = find_pnfs_driver(id);
 119		if (!ld_type) {
 120			dprintk("%s: No pNFS module found for %u.\n",
 121				__func__, id);
 122			goto out_no_driver;
 123		}
 124	}
 125	server->pnfs_curr_ld = ld_type;
 126	if (ld_type->set_layoutdriver
 127	    && ld_type->set_layoutdriver(server, mntfh)) {
 128		printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
 129			"driver %u.\n", __func__, id);
 130		module_put(ld_type->owner);
 131		goto out_no_driver;
 132	}
 133	/* Bump the MDS count */
 134	atomic_inc(&server->nfs_client->cl_mds_count);
 135
 136	dprintk("%s: pNFS module for %u set\n", __func__, id);
 137	return;
 138
 139out_no_driver:
 140	dprintk("%s: Using NFSv4 I/O\n", __func__);
 141	server->pnfs_curr_ld = NULL;
 142}
 143
 144int
 145pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
 146{
 147	int status = -EINVAL;
 148	struct pnfs_layoutdriver_type *tmp;
 149
 150	if (ld_type->id == 0) {
 151		printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__);
 152		return status;
 153	}
 154	if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
 155		printk(KERN_ERR "NFS: %s Layout driver must provide "
 156		       "alloc_lseg and free_lseg.\n", __func__);
 157		return status;
 158	}
 159
 160	spin_lock(&pnfs_spinlock);
 161	tmp = find_pnfs_driver_locked(ld_type->id);
 162	if (!tmp) {
 163		list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
 164		status = 0;
 165		dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
 166			ld_type->name);
 167	} else {
 168		printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n",
 169			__func__, ld_type->id);
 170	}
 171	spin_unlock(&pnfs_spinlock);
 172
 173	return status;
 174}
 175EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
 176
 177void
 178pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
 179{
 180	dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
 181	spin_lock(&pnfs_spinlock);
 182	list_del(&ld_type->pnfs_tblid);
 183	spin_unlock(&pnfs_spinlock);
 184}
 185EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
 186
 187/*
 188 * pNFS client layout cache
 189 */
 190
 191/* Need to hold i_lock if caller does not already hold reference */
 192void
 193get_layout_hdr(struct pnfs_layout_hdr *lo)
 194{
 195	atomic_inc(&lo->plh_refcount);
 196}
 197
 198static struct pnfs_layout_hdr *
 199pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
 200{
 201	struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
 202	return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) :
 203		kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
 204}
 205
 206static void
 207pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
 208{
 209	struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld;
 
 
 
 
 
 
 
 
 
 210	put_rpccred(lo->plh_lc_cred);
 211	return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo);
 212}
 213
 214static void
 215destroy_layout_hdr(struct pnfs_layout_hdr *lo)
 216{
 
 217	dprintk("%s: freeing layout cache %p\n", __func__, lo);
 218	BUG_ON(!list_empty(&lo->plh_layouts));
 219	NFS_I(lo->plh_inode)->layout = NULL;
 220	pnfs_free_layout_hdr(lo);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 221}
 222
 223static void
 224put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
 225{
 226	if (atomic_dec_and_test(&lo->plh_refcount))
 227		destroy_layout_hdr(lo);
 228}
 229
 230void
 231put_layout_hdr(struct pnfs_layout_hdr *lo)
 232{
 233	struct inode *inode = lo->plh_inode;
 
 
 
 
 
 
 234
 235	if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
 236		destroy_layout_hdr(lo);
 237		spin_unlock(&inode->i_lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 238	}
 
 239}
 240
 241static void
 242init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
 243{
 244	INIT_LIST_HEAD(&lseg->pls_list);
 245	INIT_LIST_HEAD(&lseg->pls_lc_list);
 246	atomic_set(&lseg->pls_refcount, 1);
 247	smp_mb();
 248	set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
 249	lseg->pls_layout = lo;
 250}
 251
 252static void free_lseg(struct pnfs_layout_segment *lseg)
 253{
 254	struct inode *ino = lseg->pls_layout->plh_inode;
 255
 256	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
 257	/* Matched by get_layout_hdr in pnfs_insert_layout */
 258	put_layout_hdr(NFS_I(ino)->layout);
 259}
 260
 261static void
 262put_lseg_common(struct pnfs_layout_segment *lseg)
 
 263{
 264	struct inode *inode = lseg->pls_layout->plh_inode;
 265
 266	WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
 267	list_del_init(&lseg->pls_list);
 268	if (list_empty(&lseg->pls_layout->plh_segs)) {
 269		set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
 270		/* Matched by initial refcount set in alloc_init_layout_hdr */
 271		put_layout_hdr_locked(lseg->pls_layout);
 272	}
 273	rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
 274}
 275
 276void
 277put_lseg(struct pnfs_layout_segment *lseg)
 278{
 
 279	struct inode *inode;
 280
 281	if (!lseg)
 282		return;
 283
 284	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
 285		atomic_read(&lseg->pls_refcount),
 286		test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
 287	inode = lseg->pls_layout->plh_inode;
 
 288	if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
 289		LIST_HEAD(free_me);
 290
 291		put_lseg_common(lseg);
 292		list_add(&lseg->pls_list, &free_me);
 293		spin_unlock(&inode->i_lock);
 294		pnfs_free_lseg_list(&free_me);
 
 295	}
 296}
 297EXPORT_SYMBOL_GPL(put_lseg);
 298
 299static inline u64
 300end_offset(u64 start, u64 len)
 301{
 302	u64 end;
 303
 304	end = start + len;
 305	return end >= start ? end : NFS4_MAX_UINT64;
 306}
 307
 308/* last octet in a range */
 309static inline u64
 310last_byte_offset(u64 start, u64 len)
 311{
 312	u64 end;
 313
 314	BUG_ON(!len);
 315	end = start + len;
 316	return end > start ? end - 1 : NFS4_MAX_UINT64;
 317}
 318
 319/*
 320 * is l2 fully contained in l1?
 321 *   start1                             end1
 322 *   [----------------------------------)
 323 *           start2           end2
 324 *           [----------------)
 325 */
 326static inline int
 327lo_seg_contained(struct pnfs_layout_range *l1,
 328		 struct pnfs_layout_range *l2)
 329{
 330	u64 start1 = l1->offset;
 331	u64 end1 = end_offset(start1, l1->length);
 332	u64 start2 = l2->offset;
 333	u64 end2 = end_offset(start2, l2->length);
 334
 335	return (start1 <= start2) && (end1 >= end2);
 336}
 337
 338/*
 339 * is l1 and l2 intersecting?
 340 *   start1                             end1
 341 *   [----------------------------------)
 342 *                              start2           end2
 343 *                              [----------------)
 344 */
 345static inline int
 346lo_seg_intersecting(struct pnfs_layout_range *l1,
 347		    struct pnfs_layout_range *l2)
 348{
 349	u64 start1 = l1->offset;
 350	u64 end1 = end_offset(start1, l1->length);
 351	u64 start2 = l2->offset;
 352	u64 end2 = end_offset(start2, l2->length);
 353
 354	return (end1 == NFS4_MAX_UINT64 || end1 > start2) &&
 355	       (end2 == NFS4_MAX_UINT64 || end2 > start1);
 356}
 357
 358static bool
 359should_free_lseg(struct pnfs_layout_range *lseg_range,
 360		 struct pnfs_layout_range *recall_range)
 361{
 362	return (recall_range->iomode == IOMODE_ANY ||
 363		lseg_range->iomode == recall_range->iomode) &&
 364	       lo_seg_intersecting(lseg_range, recall_range);
 
 
 
 
 
 
 
 
 
 
 365}
 366
 367/* Returns 1 if lseg is removed from list, 0 otherwise */
 368static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
 369			     struct list_head *tmp_list)
 370{
 371	int rv = 0;
 372
 373	if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
 374		/* Remove the reference keeping the lseg in the
 375		 * list.  It will now be removed when all
 376		 * outstanding io is finished.
 377		 */
 378		dprintk("%s: lseg %p ref %d\n", __func__, lseg,
 379			atomic_read(&lseg->pls_refcount));
 380		if (atomic_dec_and_test(&lseg->pls_refcount)) {
 381			put_lseg_common(lseg);
 382			list_add(&lseg->pls_list, tmp_list);
 383			rv = 1;
 384		}
 385	}
 386	return rv;
 387}
 388
 389/* Returns count of number of matching invalid lsegs remaining in list
 390 * after call.
 391 */
 392int
 393mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 394			    struct list_head *tmp_list,
 395			    struct pnfs_layout_range *recall_range)
 396{
 397	struct pnfs_layout_segment *lseg, *next;
 398	int invalid = 0, removed = 0;
 399
 400	dprintk("%s:Begin lo %p\n", __func__, lo);
 401
 402	if (list_empty(&lo->plh_segs)) {
 403		/* Reset MDS Threshold I/O counters */
 404		NFS_I(lo->plh_inode)->write_io = 0;
 405		NFS_I(lo->plh_inode)->read_io = 0;
 406		if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
 407			put_layout_hdr_locked(lo);
 408		return 0;
 409	}
 410	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
 411		if (!recall_range ||
 412		    should_free_lseg(&lseg->pls_range, recall_range)) {
 413			dprintk("%s: freeing lseg %p iomode %d "
 414				"offset %llu length %llu\n", __func__,
 415				lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
 416				lseg->pls_range.length);
 417			invalid++;
 418			removed += mark_lseg_invalid(lseg, tmp_list);
 419		}
 420	dprintk("%s:Return %i\n", __func__, invalid - removed);
 421	return invalid - removed;
 422}
 423
 424/* note free_me must contain lsegs from a single layout_hdr */
 425void
 426pnfs_free_lseg_list(struct list_head *free_me)
 427{
 428	struct pnfs_layout_segment *lseg, *tmp;
 429	struct pnfs_layout_hdr *lo;
 430
 431	if (list_empty(free_me))
 432		return;
 433
 434	lo = list_first_entry(free_me, struct pnfs_layout_segment,
 435			      pls_list)->pls_layout;
 436
 437	if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
 438		struct nfs_client *clp;
 439
 440		clp = NFS_SERVER(lo->plh_inode)->nfs_client;
 441		spin_lock(&clp->cl_lock);
 442		list_del_init(&lo->plh_layouts);
 443		spin_unlock(&clp->cl_lock);
 444	}
 445	list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
 446		list_del(&lseg->pls_list);
 447		free_lseg(lseg);
 448	}
 449}
 450
 451void
 452pnfs_destroy_layout(struct nfs_inode *nfsi)
 453{
 454	struct pnfs_layout_hdr *lo;
 455	LIST_HEAD(tmp_list);
 456
 457	spin_lock(&nfsi->vfs_inode.i_lock);
 458	lo = nfsi->layout;
 459	if (lo) {
 460		lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
 461		mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 462	}
 463	spin_unlock(&nfsi->vfs_inode.i_lock);
 464	pnfs_free_lseg_list(&tmp_list);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 465}
 466EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
 467
 468/*
 469 * Called by the state manger to remove all layouts established under an
 470 * expired lease.
 471 */
 472void
 473pnfs_destroy_all_layouts(struct nfs_client *clp)
 474{
 475	struct nfs_server *server;
 476	struct pnfs_layout_hdr *lo;
 477	LIST_HEAD(tmp_list);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 478
 479	nfs4_deviceid_mark_client_invalid(clp);
 480	nfs4_deviceid_purge_client(clp);
 
 
 
 
 
 481
 482	spin_lock(&clp->cl_lock);
 483	rcu_read_lock();
 
 484	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
 485		if (!list_empty(&server->layouts))
 486			list_splice_init(&server->layouts, &tmp_list);
 
 
 
 
 487	}
 488	rcu_read_unlock();
 489	spin_unlock(&clp->cl_lock);
 490
 491	while (!list_empty(&tmp_list)) {
 492		lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
 493				plh_layouts);
 494		dprintk("%s freeing layout for inode %lu\n", __func__,
 495			lo->plh_inode->i_ino);
 496		list_del_init(&lo->plh_layouts);
 497		pnfs_destroy_layout(NFS_I(lo->plh_inode));
 
 
 
 
 
 
 
 
 
 
 
 
 
 498	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 499}
 500
 501/* update lo->plh_stateid with new if is more recent */
 502void
 503pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
 504			bool update_barrier)
 505{
 506	u32 oldseq, newseq;
 
 507
 508	oldseq = be32_to_cpu(lo->plh_stateid.seqid);
 509	newseq = be32_to_cpu(new->seqid);
 510	if ((int)(newseq - oldseq) > 0) {
 511		nfs4_stateid_copy(&lo->plh_stateid, new);
 512		if (update_barrier) {
 513			u32 new_barrier = be32_to_cpu(new->seqid);
 514
 515			if ((int)(new_barrier - lo->plh_barrier))
 516				lo->plh_barrier = new_barrier;
 517		} else {
 518			/* Because of wraparound, we want to keep the barrier
 519			 * "close" to the current seqids.  It needs to be
 520			 * within 2**31 to count as "behind", so if it
 521			 * gets too near that limit, give us a litle leeway
 522			 * and bring it to within 2**30.
 523			 * NOTE - and yes, this is all unsigned arithmetic.
 524			 */
 525			if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
 526				lo->plh_barrier = newseq - (1 << 30);
 527		}
 
 
 528	}
 529}
 530
 
 
 
 
 
 
 
 
 
 531/* lget is set to 1 if called from inside send_layoutget call chain */
 532static bool
 533pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
 534			int lget)
 535{
 536	if ((stateid) &&
 537	    (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0)
 538		return true;
 539	return lo->plh_block_lgets ||
 540		test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
 541		test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
 542		(list_empty(&lo->plh_segs) &&
 543		 (atomic_read(&lo->plh_outstanding) > lget));
 544}
 545
 546int
 547pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
 548			      struct nfs4_state *open_state)
 549{
 550	int status = 0;
 551
 552	dprintk("--> %s\n", __func__);
 553	spin_lock(&lo->plh_inode->i_lock);
 554	if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
 555		status = -EAGAIN;
 
 
 556	} else if (list_empty(&lo->plh_segs)) {
 557		int seq;
 558
 559		do {
 560			seq = read_seqbegin(&open_state->seqlock);
 561			nfs4_stateid_copy(dst, &open_state->stateid);
 562		} while (read_seqretry(&open_state->seqlock, seq));
 563	} else
 564		nfs4_stateid_copy(dst, &lo->plh_stateid);
 565	spin_unlock(&lo->plh_inode->i_lock);
 566	dprintk("<-- %s\n", __func__);
 567	return status;
 568}
 569
 570/*
 571* Get layout from server.
 572*    for now, assume that whole file layouts are requested.
 573*    arg->offset: 0
 574*    arg->length: all ones
 575*/
 576static struct pnfs_layout_segment *
 577send_layoutget(struct pnfs_layout_hdr *lo,
 578	   struct nfs_open_context *ctx,
 579	   struct pnfs_layout_range *range,
 580	   gfp_t gfp_flags)
 581{
 582	struct inode *ino = lo->plh_inode;
 583	struct nfs_server *server = NFS_SERVER(ino);
 584	struct nfs4_layoutget *lgp;
 585	struct pnfs_layout_segment *lseg = NULL;
 586
 587	dprintk("--> %s\n", __func__);
 588
 589	BUG_ON(ctx == NULL);
 590	lgp = kzalloc(sizeof(*lgp), gfp_flags);
 591	if (lgp == NULL)
 592		return NULL;
 593
 594	lgp->args.minlength = PAGE_CACHE_SIZE;
 595	if (lgp->args.minlength > range->length)
 596		lgp->args.minlength = range->length;
 597	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
 598	lgp->args.range = *range;
 599	lgp->args.type = server->pnfs_curr_ld->id;
 600	lgp->args.inode = ino;
 601	lgp->args.ctx = get_nfs_open_context(ctx);
 602	lgp->lsegpp = &lseg;
 603	lgp->gfp_flags = gfp_flags;
 
 604
 605	/* Synchronously retrieve layout information from server and
 606	 * store in lseg.
 607	 */
 608	nfs4_proc_layoutget(lgp, gfp_flags);
 609	if (!lseg) {
 610		/* remember that LAYOUTGET failed and suspend trying */
 611		set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
 
 
 
 
 
 
 
 612	}
 613
 614	return lseg;
 615}
 616
 617/* Initiates a LAYOUTRETURN(FILE) */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 618int
 619_pnfs_return_layout(struct inode *ino)
 620{
 621	struct pnfs_layout_hdr *lo = NULL;
 622	struct nfs_inode *nfsi = NFS_I(ino);
 623	LIST_HEAD(tmp_list);
 624	struct nfs4_layoutreturn *lrp;
 625	nfs4_stateid stateid;
 626	int status = 0;
 627
 628	dprintk("--> %s\n", __func__);
 629
 630	spin_lock(&ino->i_lock);
 631	lo = nfsi->layout;
 632	if (!lo) {
 633		spin_unlock(&ino->i_lock);
 634		dprintk("%s: no layout to return\n", __func__);
 635		return status;
 636	}
 637	stateid = nfsi->layout->plh_stateid;
 638	/* Reference matched in nfs4_layoutreturn_release */
 639	get_layout_hdr(lo);
 640	mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
 
 
 
 
 
 
 
 
 
 641	lo->plh_block_lgets++;
 642	spin_unlock(&ino->i_lock);
 643	pnfs_free_lseg_list(&tmp_list);
 644
 645	WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
 646
 647	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
 648	if (unlikely(lrp == NULL)) {
 649		status = -ENOMEM;
 650		set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags);
 651		set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags);
 652		put_layout_hdr(lo);
 
 653		goto out;
 654	}
 655
 656	lrp->args.stateid = stateid;
 657	lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
 658	lrp->args.inode = ino;
 659	lrp->args.layout = lo;
 660	lrp->clp = NFS_SERVER(ino)->nfs_client;
 
 661
 662	status = nfs4_proc_layoutreturn(lrp);
 663out:
 664	dprintk("<-- %s status: %d\n", __func__, status);
 665	return status;
 666}
 667EXPORT_SYMBOL_GPL(_pnfs_return_layout);
 668
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 669bool pnfs_roc(struct inode *ino)
 670{
 671	struct pnfs_layout_hdr *lo;
 672	struct pnfs_layout_segment *lseg, *tmp;
 673	LIST_HEAD(tmp_list);
 674	bool found = false;
 675
 676	spin_lock(&ino->i_lock);
 677	lo = NFS_I(ino)->layout;
 678	if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
 679	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
 680		goto out_nolayout;
 681	list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
 682		if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
 683			mark_lseg_invalid(lseg, &tmp_list);
 684			found = true;
 685		}
 686	if (!found)
 687		goto out_nolayout;
 688	lo->plh_block_lgets++;
 689	get_layout_hdr(lo); /* matched in pnfs_roc_release */
 690	spin_unlock(&ino->i_lock);
 691	pnfs_free_lseg_list(&tmp_list);
 692	return true;
 693
 694out_nolayout:
 695	spin_unlock(&ino->i_lock);
 696	return false;
 697}
 698
 699void pnfs_roc_release(struct inode *ino)
 700{
 701	struct pnfs_layout_hdr *lo;
 702
 703	spin_lock(&ino->i_lock);
 704	lo = NFS_I(ino)->layout;
 705	lo->plh_block_lgets--;
 706	put_layout_hdr_locked(lo);
 707	spin_unlock(&ino->i_lock);
 
 
 
 
 708}
 709
 710void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
 711{
 712	struct pnfs_layout_hdr *lo;
 713
 714	spin_lock(&ino->i_lock);
 715	lo = NFS_I(ino)->layout;
 716	if ((int)(barrier - lo->plh_barrier) > 0)
 717		lo->plh_barrier = barrier;
 718	spin_unlock(&ino->i_lock);
 719}
 720
 721bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
 722{
 723	struct nfs_inode *nfsi = NFS_I(ino);
 
 724	struct pnfs_layout_segment *lseg;
 
 725	bool found = false;
 726
 727	spin_lock(&ino->i_lock);
 728	list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
 729		if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
 
 730			found = true;
 731			break;
 732		}
 733	if (!found) {
 734		struct pnfs_layout_hdr *lo = nfsi->layout;
 735		u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
 736
 737		/* Since close does not return a layout stateid for use as
 738		 * a barrier, we choose the worst-case barrier.
 739		 */
 740		*barrier = current_seqid + atomic_read(&lo->plh_outstanding);
 741	}
 742	spin_unlock(&ino->i_lock);
 743	return found;
 744}
 745
 746/*
 747 * Compare two layout segments for sorting into layout cache.
 748 * We want to preferentially return RW over RO layouts, so ensure those
 749 * are seen first.
 750 */
 751static s64
 752cmp_layout(struct pnfs_layout_range *l1,
 753	   struct pnfs_layout_range *l2)
 754{
 755	s64 d;
 756
 757	/* high offset > low offset */
 758	d = l1->offset - l2->offset;
 759	if (d)
 760		return d;
 761
 762	/* short length > long length */
 763	d = l2->length - l1->length;
 764	if (d)
 765		return d;
 766
 767	/* read > read/write */
 768	return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ);
 769}
 770
 771static void
 772pnfs_insert_layout(struct pnfs_layout_hdr *lo,
 773		   struct pnfs_layout_segment *lseg)
 774{
 775	struct pnfs_layout_segment *lp;
 776
 777	dprintk("%s:Begin\n", __func__);
 778
 779	assert_spin_locked(&lo->plh_inode->i_lock);
 780	list_for_each_entry(lp, &lo->plh_segs, pls_list) {
 781		if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0)
 782			continue;
 783		list_add_tail(&lseg->pls_list, &lp->pls_list);
 784		dprintk("%s: inserted lseg %p "
 785			"iomode %d offset %llu length %llu before "
 786			"lp %p iomode %d offset %llu length %llu\n",
 787			__func__, lseg, lseg->pls_range.iomode,
 788			lseg->pls_range.offset, lseg->pls_range.length,
 789			lp, lp->pls_range.iomode, lp->pls_range.offset,
 790			lp->pls_range.length);
 791		goto out;
 792	}
 793	list_add_tail(&lseg->pls_list, &lo->plh_segs);
 794	dprintk("%s: inserted lseg %p "
 795		"iomode %d offset %llu length %llu at tail\n",
 796		__func__, lseg, lseg->pls_range.iomode,
 797		lseg->pls_range.offset, lseg->pls_range.length);
 798out:
 799	get_layout_hdr(lo);
 800
 801	dprintk("%s:Return\n", __func__);
 802}
 803
 804static struct pnfs_layout_hdr *
 805alloc_init_layout_hdr(struct inode *ino,
 806		      struct nfs_open_context *ctx,
 807		      gfp_t gfp_flags)
 808{
 809	struct pnfs_layout_hdr *lo;
 810
 811	lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
 812	if (!lo)
 813		return NULL;
 814	atomic_set(&lo->plh_refcount, 1);
 815	INIT_LIST_HEAD(&lo->plh_layouts);
 816	INIT_LIST_HEAD(&lo->plh_segs);
 817	INIT_LIST_HEAD(&lo->plh_bulk_recall);
 818	lo->plh_inode = ino;
 819	lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred);
 820	return lo;
 821}
 822
 823static struct pnfs_layout_hdr *
 824pnfs_find_alloc_layout(struct inode *ino,
 825		       struct nfs_open_context *ctx,
 826		       gfp_t gfp_flags)
 827{
 828	struct nfs_inode *nfsi = NFS_I(ino);
 829	struct pnfs_layout_hdr *new = NULL;
 830
 831	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
 832
 833	assert_spin_locked(&ino->i_lock);
 834	if (nfsi->layout) {
 835		if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
 836			return NULL;
 837		else
 838			return nfsi->layout;
 839	}
 840	spin_unlock(&ino->i_lock);
 841	new = alloc_init_layout_hdr(ino, ctx, gfp_flags);
 842	spin_lock(&ino->i_lock);
 843
 844	if (likely(nfsi->layout == NULL))	/* Won the race? */
 845		nfsi->layout = new;
 846	else
 
 847		pnfs_free_layout_hdr(new);
 
 
 848	return nfsi->layout;
 849}
 850
 851/*
 852 * iomode matching rules:
 853 * iomode	lseg	match
 854 * -----	-----	-----
 855 * ANY		READ	true
 856 * ANY		RW	true
 857 * RW		READ	false
 858 * RW		RW	true
 859 * READ		READ	true
 860 * READ		RW	true
 861 */
 862static int
 863is_matching_lseg(struct pnfs_layout_range *ls_range,
 864		 struct pnfs_layout_range *range)
 865{
 866	struct pnfs_layout_range range1;
 867
 868	if ((range->iomode == IOMODE_RW &&
 869	     ls_range->iomode != IOMODE_RW) ||
 870	    !lo_seg_intersecting(ls_range, range))
 871		return 0;
 872
 873	/* range1 covers only the first byte in the range */
 874	range1 = *range;
 875	range1.length = 1;
 876	return lo_seg_contained(ls_range, &range1);
 877}
 878
 879/*
 880 * lookup range in layout
 881 */
 882static struct pnfs_layout_segment *
 883pnfs_find_lseg(struct pnfs_layout_hdr *lo,
 884		struct pnfs_layout_range *range)
 885{
 886	struct pnfs_layout_segment *lseg, *ret = NULL;
 887
 888	dprintk("%s:Begin\n", __func__);
 889
 890	assert_spin_locked(&lo->plh_inode->i_lock);
 891	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
 892		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
 893		    is_matching_lseg(&lseg->pls_range, range)) {
 894			ret = get_lseg(lseg);
 895			break;
 896		}
 897		if (lseg->pls_range.offset > range->offset)
 898			break;
 899	}
 900
 901	dprintk("%s:Return lseg %p ref %d\n",
 902		__func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
 903	return ret;
 904}
 905
 906/*
 907 * Use mdsthreshold hints set at each OPEN to determine if I/O should go
 908 * to the MDS or over pNFS
 909 *
 910 * The nfs_inode read_io and write_io fields are cumulative counters reset
 911 * when there are no layout segments. Note that in pnfs_update_layout iomode
 912 * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
 913 * WRITE request.
 914 *
 915 * A return of true means use MDS I/O.
 916 *
 917 * From rfc 5661:
 918 * If a file's size is smaller than the file size threshold, data accesses
 919 * SHOULD be sent to the metadata server.  If an I/O request has a length that
 920 * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
 921 * server.  If both file size and I/O size are provided, the client SHOULD
 922 * reach or exceed  both thresholds before sending its read or write
 923 * requests to the data server.
 924 */
 925static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
 926				     struct inode *ino, int iomode)
 927{
 928	struct nfs4_threshold *t = ctx->mdsthreshold;
 929	struct nfs_inode *nfsi = NFS_I(ino);
 930	loff_t fsize = i_size_read(ino);
 931	bool size = false, size_set = false, io = false, io_set = false, ret = false;
 932
 933	if (t == NULL)
 934		return ret;
 935
 936	dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
 937		__func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
 938
 939	switch (iomode) {
 940	case IOMODE_READ:
 941		if (t->bm & THRESHOLD_RD) {
 942			dprintk("%s fsize %llu\n", __func__, fsize);
 943			size_set = true;
 944			if (fsize < t->rd_sz)
 945				size = true;
 946		}
 947		if (t->bm & THRESHOLD_RD_IO) {
 948			dprintk("%s nfsi->read_io %llu\n", __func__,
 949				nfsi->read_io);
 950			io_set = true;
 951			if (nfsi->read_io < t->rd_io_sz)
 952				io = true;
 953		}
 954		break;
 955	case IOMODE_RW:
 956		if (t->bm & THRESHOLD_WR) {
 957			dprintk("%s fsize %llu\n", __func__, fsize);
 958			size_set = true;
 959			if (fsize < t->wr_sz)
 960				size = true;
 961		}
 962		if (t->bm & THRESHOLD_WR_IO) {
 963			dprintk("%s nfsi->write_io %llu\n", __func__,
 964				nfsi->write_io);
 965			io_set = true;
 966			if (nfsi->write_io < t->wr_io_sz)
 967				io = true;
 968		}
 969		break;
 970	}
 971	if (size_set && io_set) {
 972		if (size && io)
 973			ret = true;
 974	} else if (size || io)
 975		ret = true;
 976
 977	dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
 978	return ret;
 979}
 980
 981/*
 982 * Layout segment is retreived from the server if not cached.
 983 * The appropriate layout segment is referenced and returned to the caller.
 984 */
 985struct pnfs_layout_segment *
 986pnfs_update_layout(struct inode *ino,
 987		   struct nfs_open_context *ctx,
 988		   loff_t pos,
 989		   u64 count,
 990		   enum pnfs_iomode iomode,
 991		   gfp_t gfp_flags)
 992{
 993	struct pnfs_layout_range arg = {
 994		.iomode = iomode,
 995		.offset = pos,
 996		.length = count,
 997	};
 998	unsigned pg_offset;
 999	struct nfs_inode *nfsi = NFS_I(ino);
1000	struct nfs_server *server = NFS_SERVER(ino);
1001	struct nfs_client *clp = server->nfs_client;
1002	struct pnfs_layout_hdr *lo;
1003	struct pnfs_layout_segment *lseg = NULL;
1004	bool first = false;
1005
1006	if (!pnfs_enabled_sb(NFS_SERVER(ino)))
1007		return NULL;
1008
1009	if (pnfs_within_mdsthreshold(ctx, ino, iomode))
1010		return NULL;
1011
1012	spin_lock(&ino->i_lock);
1013	lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
1014	if (lo == NULL) {
1015		dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
1016		goto out_unlock;
1017	}
1018
1019	/* Do we even need to bother with this? */
1020	if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
1021		dprintk("%s matches recall, use MDS\n", __func__);
1022		goto out_unlock;
1023	}
1024
1025	/* if LAYOUTGET already failed once we don't try again */
1026	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
1027		goto out_unlock;
1028
1029	/* Check to see if the layout for the given range already exists */
1030	lseg = pnfs_find_lseg(lo, &arg);
1031	if (lseg)
1032		goto out_unlock;
1033
1034	if (pnfs_layoutgets_blocked(lo, NULL, 0))
1035		goto out_unlock;
1036	atomic_inc(&lo->plh_outstanding);
1037
1038	get_layout_hdr(lo);
1039	if (list_empty(&lo->plh_segs))
1040		first = true;
1041	spin_unlock(&ino->i_lock);
 
1042	if (first) {
1043		/* The lo must be on the clp list if there is any
1044		 * chance of a CB_LAYOUTRECALL(FILE) coming in.
1045		 */
1046		spin_lock(&clp->cl_lock);
1047		BUG_ON(!list_empty(&lo->plh_layouts));
1048		list_add_tail(&lo->plh_layouts, &server->layouts);
1049		spin_unlock(&clp->cl_lock);
1050	}
1051
1052	pg_offset = arg.offset & ~PAGE_CACHE_MASK;
1053	if (pg_offset) {
1054		arg.offset -= pg_offset;
1055		arg.length += pg_offset;
1056	}
1057	if (arg.length != NFS4_MAX_UINT64)
1058		arg.length = PAGE_CACHE_ALIGN(arg.length);
1059
1060	lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
1061	if (!lseg && first) {
1062		spin_lock(&clp->cl_lock);
1063		list_del_init(&lo->plh_layouts);
1064		spin_unlock(&clp->cl_lock);
1065	}
1066	atomic_dec(&lo->plh_outstanding);
1067	put_layout_hdr(lo);
 
1068out:
1069	dprintk("%s end, state 0x%lx lseg %p\n", __func__,
1070		nfsi->layout ? nfsi->layout->plh_flags : -1, lseg);
 
 
 
 
 
 
1071	return lseg;
1072out_unlock:
1073	spin_unlock(&ino->i_lock);
1074	goto out;
1075}
1076EXPORT_SYMBOL_GPL(pnfs_update_layout);
1077
1078int
1079pnfs_layout_process(struct nfs4_layoutget *lgp)
1080{
1081	struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
1082	struct nfs4_layoutget_res *res = &lgp->res;
1083	struct pnfs_layout_segment *lseg;
1084	struct inode *ino = lo->plh_inode;
 
1085	int status = 0;
1086
1087	/* Inject layout blob into I/O device driver */
1088	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
1089	if (!lseg || IS_ERR(lseg)) {
1090		if (!lseg)
1091			status = -ENOMEM;
1092		else
1093			status = PTR_ERR(lseg);
1094		dprintk("%s: Could not allocate layout: error %d\n",
1095		       __func__, status);
1096		goto out;
1097	}
1098
1099	spin_lock(&ino->i_lock);
1100	if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
1101		dprintk("%s forget reply due to recall\n", __func__);
1102		goto out_forget_reply;
1103	}
1104
1105	if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
 
1106		dprintk("%s forget reply due to state\n", __func__);
1107		goto out_forget_reply;
1108	}
 
 
 
 
 
 
1109	init_lseg(lo, lseg);
1110	lseg->pls_range = res->range;
1111	*lgp->lsegpp = get_lseg(lseg);
1112	pnfs_insert_layout(lo, lseg);
1113
1114	if (res->return_on_close) {
1115		set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
1116		set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
1117	}
1118
1119	/* Done processing layoutget. Set the layout stateid */
1120	pnfs_set_layout_stateid(lo, &res->stateid, false);
1121	spin_unlock(&ino->i_lock);
 
 
1122out:
1123	return status;
1124
1125out_forget_reply:
1126	spin_unlock(&ino->i_lock);
1127	lseg->pls_layout = lo;
1128	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
1129	goto out;
1130}
1131
1132void
1133pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
1134{
1135	BUG_ON(pgio->pg_lseg != NULL);
 
 
1136
1137	if (req->wb_offset != req->wb_pgbase) {
1138		nfs_pageio_reset_read_mds(pgio);
1139		return;
1140	}
 
 
 
 
 
 
1141	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1142					   req->wb_context,
1143					   req_offset(req),
1144					   req->wb_bytes,
1145					   IOMODE_READ,
1146					   GFP_KERNEL);
1147	/* If no lseg, fall back to read through mds */
1148	if (pgio->pg_lseg == NULL)
1149		nfs_pageio_reset_read_mds(pgio);
1150
1151}
1152EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
1153
1154void
1155pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 
1156{
1157	BUG_ON(pgio->pg_lseg != NULL);
1158
1159	if (req->wb_offset != req->wb_pgbase) {
1160		nfs_pageio_reset_write_mds(pgio);
1161		return;
1162	}
 
1163	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1164					   req->wb_context,
1165					   req_offset(req),
1166					   req->wb_bytes,
1167					   IOMODE_RW,
1168					   GFP_NOFS);
1169	/* If no lseg, fall back to write through mds */
1170	if (pgio->pg_lseg == NULL)
1171		nfs_pageio_reset_write_mds(pgio);
1172}
1173EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
1174
1175bool
1176pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1177		      const struct nfs_pgio_completion_ops *compl_ops)
1178{
1179	struct nfs_server *server = NFS_SERVER(inode);
1180	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1181
1182	if (ld == NULL)
1183		return false;
1184	nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops,
1185			server->rsize, 0);
1186	return true;
1187}
1188
1189bool
1190pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1191		       int ioflags,
1192		       const struct nfs_pgio_completion_ops *compl_ops)
1193{
1194	struct nfs_server *server = NFS_SERVER(inode);
1195	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1196
1197	if (ld == NULL)
1198		return false;
1199	nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops,
1200			server->wsize, ioflags);
1201	return true;
1202}
1203
1204bool
1205pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1206		     struct nfs_page *req)
1207{
1208	if (pgio->pg_lseg == NULL)
1209		return nfs_generic_pg_test(pgio, prev, req);
1210
1211	/*
1212	 * Test if a nfs_page is fully contained in the pnfs_layout_range.
1213	 * Note that this test makes several assumptions:
1214	 * - that the previous nfs_page in the struct nfs_pageio_descriptor
1215	 *   is known to lie within the range.
1216	 *   - that the nfs_page being tested is known to be contiguous with the
1217	 *   previous nfs_page.
1218	 *   - Layout ranges are page aligned, so we only have to test the
1219	 *   start offset of the request.
1220	 *
1221	 * Please also note that 'end_offset' is actually the offset of the
1222	 * first byte that lies outside the pnfs_layout_range. FIXME?
1223	 *
1224	 */
1225	return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
1226					 pgio->pg_lseg->pls_range.length);
1227}
1228EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1229
1230int pnfs_write_done_resend_to_mds(struct inode *inode,
1231				struct list_head *head,
1232				const struct nfs_pgio_completion_ops *compl_ops)
 
1233{
1234	struct nfs_pageio_descriptor pgio;
1235	LIST_HEAD(failed);
1236
1237	/* Resend all requests through the MDS */
1238	nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);
 
1239	while (!list_empty(head)) {
1240		struct nfs_page *req = nfs_list_entry(head->next);
1241
1242		nfs_list_remove_request(req);
1243		if (!nfs_pageio_add_request(&pgio, req))
1244			nfs_list_add_request(req, &failed);
1245	}
1246	nfs_pageio_complete(&pgio);
1247
1248	if (!list_empty(&failed)) {
1249		/* For some reason our attempt to resend pages. Mark the
1250		 * overall send request as having failed, and let
1251		 * nfs_writeback_release_full deal with the error.
1252		 */
1253		list_move(&failed, head);
1254		return -EIO;
1255	}
1256	return 0;
1257}
1258EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
1259
1260static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
1261{
1262	struct nfs_pgio_header *hdr = data->header;
1263
1264	dprintk("pnfs write error = %d\n", hdr->pnfs_error);
1265	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1266	    PNFS_LAYOUTRET_ON_ERROR) {
1267		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1268		pnfs_return_layout(hdr->inode);
1269	}
1270	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1271		data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
1272							&hdr->pages,
1273							hdr->completion_ops);
 
1274}
1275
1276/*
1277 * Called by non rpc-based layout drivers
1278 */
1279void pnfs_ld_write_done(struct nfs_write_data *data)
1280{
1281	struct nfs_pgio_header *hdr = data->header;
1282
 
1283	if (!hdr->pnfs_error) {
1284		pnfs_set_layoutcommit(data);
1285		hdr->mds_ops->rpc_call_done(&data->task, data);
1286	} else
1287		pnfs_ld_handle_write_error(data);
1288	hdr->mds_ops->rpc_release(data);
1289}
1290EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1291
1292static void
1293pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1294		struct nfs_write_data *data)
1295{
1296	struct nfs_pgio_header *hdr = data->header;
1297
1298	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1299		list_splice_tail_init(&hdr->pages, &desc->pg_list);
1300		nfs_pageio_reset_write_mds(desc);
1301		desc->pg_recoalesce = 1;
1302	}
1303	nfs_writedata_release(data);
1304}
1305
1306static enum pnfs_try_status
1307pnfs_try_to_write_data(struct nfs_write_data *wdata,
1308			const struct rpc_call_ops *call_ops,
1309			struct pnfs_layout_segment *lseg,
1310			int how)
1311{
1312	struct nfs_pgio_header *hdr = wdata->header;
1313	struct inode *inode = hdr->inode;
1314	enum pnfs_try_status trypnfs;
1315	struct nfs_server *nfss = NFS_SERVER(inode);
1316
1317	hdr->mds_ops = call_ops;
1318
1319	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
1320		inode->i_ino, wdata->args.count, wdata->args.offset, how);
1321	trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
1322	if (trypnfs != PNFS_NOT_ATTEMPTED)
1323		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
1324	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1325	return trypnfs;
1326}
1327
1328static void
1329pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
1330{
1331	struct nfs_write_data *data;
1332	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1333	struct pnfs_layout_segment *lseg = desc->pg_lseg;
1334
1335	desc->pg_lseg = NULL;
1336	while (!list_empty(head)) {
1337		enum pnfs_try_status trypnfs;
1338
1339		data = list_first_entry(head, struct nfs_write_data, list);
1340		list_del_init(&data->list);
1341
1342		trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
1343		if (trypnfs == PNFS_NOT_ATTEMPTED)
1344			pnfs_write_through_mds(desc, data);
1345	}
1346	put_lseg(lseg);
1347}
1348
1349static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1350{
1351	put_lseg(hdr->lseg);
1352	nfs_writehdr_free(hdr);
1353}
 
1354
1355int
1356pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1357{
1358	struct nfs_write_header *whdr;
1359	struct nfs_pgio_header *hdr;
1360	int ret;
1361
1362	whdr = nfs_writehdr_alloc();
1363	if (!whdr) {
1364		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1365		put_lseg(desc->pg_lseg);
1366		desc->pg_lseg = NULL;
1367		return -ENOMEM;
1368	}
1369	hdr = &whdr->header;
1370	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
1371	hdr->lseg = get_lseg(desc->pg_lseg);
1372	atomic_inc(&hdr->refcnt);
1373	ret = nfs_generic_flush(desc, hdr);
1374	if (ret != 0) {
1375		put_lseg(desc->pg_lseg);
1376		desc->pg_lseg = NULL;
1377	} else
1378		pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
1379	if (atomic_dec_and_test(&hdr->refcnt))
1380		hdr->completion_ops->completion(hdr);
1381	return ret;
1382}
1383EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1384
1385int pnfs_read_done_resend_to_mds(struct inode *inode,
1386				struct list_head *head,
1387				const struct nfs_pgio_completion_ops *compl_ops)
 
1388{
1389	struct nfs_pageio_descriptor pgio;
1390	LIST_HEAD(failed);
1391
1392	/* Resend all requests through the MDS */
1393	nfs_pageio_init_read_mds(&pgio, inode, compl_ops);
 
1394	while (!list_empty(head)) {
1395		struct nfs_page *req = nfs_list_entry(head->next);
1396
1397		nfs_list_remove_request(req);
1398		if (!nfs_pageio_add_request(&pgio, req))
1399			nfs_list_add_request(req, &failed);
1400	}
1401	nfs_pageio_complete(&pgio);
1402
1403	if (!list_empty(&failed)) {
1404		list_move(&failed, head);
1405		return -EIO;
1406	}
1407	return 0;
1408}
1409EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
1410
1411static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1412{
1413	struct nfs_pgio_header *hdr = data->header;
1414
1415	dprintk("pnfs read error = %d\n", hdr->pnfs_error);
1416	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1417	    PNFS_LAYOUTRET_ON_ERROR) {
1418		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1419		pnfs_return_layout(hdr->inode);
1420	}
1421	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1422		data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
1423							&hdr->pages,
1424							hdr->completion_ops);
 
1425}
1426
1427/*
1428 * Called by non rpc-based layout drivers
1429 */
1430void pnfs_ld_read_done(struct nfs_read_data *data)
1431{
1432	struct nfs_pgio_header *hdr = data->header;
1433
 
1434	if (likely(!hdr->pnfs_error)) {
1435		__nfs4_read_done_cb(data);
1436		hdr->mds_ops->rpc_call_done(&data->task, data);
1437	} else
1438		pnfs_ld_handle_read_error(data);
1439	hdr->mds_ops->rpc_release(data);
1440}
1441EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1442
1443static void
1444pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1445		struct nfs_read_data *data)
1446{
1447	struct nfs_pgio_header *hdr = data->header;
1448
1449	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1450		list_splice_tail_init(&hdr->pages, &desc->pg_list);
1451		nfs_pageio_reset_read_mds(desc);
1452		desc->pg_recoalesce = 1;
1453	}
1454	nfs_readdata_release(data);
1455}
1456
1457/*
1458 * Call the appropriate parallel I/O subsystem read function.
1459 */
1460static enum pnfs_try_status
1461pnfs_try_to_read_data(struct nfs_read_data *rdata,
1462		       const struct rpc_call_ops *call_ops,
1463		       struct pnfs_layout_segment *lseg)
1464{
1465	struct nfs_pgio_header *hdr = rdata->header;
1466	struct inode *inode = hdr->inode;
1467	struct nfs_server *nfss = NFS_SERVER(inode);
1468	enum pnfs_try_status trypnfs;
1469
1470	hdr->mds_ops = call_ops;
1471
1472	dprintk("%s: Reading ino:%lu %u@%llu\n",
1473		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
1474
1475	trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
1476	if (trypnfs != PNFS_NOT_ATTEMPTED)
1477		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
1478	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1479	return trypnfs;
1480}
1481
1482static void
1483pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
1484{
1485	struct nfs_read_data *data;
1486	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1487	struct pnfs_layout_segment *lseg = desc->pg_lseg;
1488
1489	desc->pg_lseg = NULL;
1490	while (!list_empty(head)) {
1491		enum pnfs_try_status trypnfs;
1492
1493		data = list_first_entry(head, struct nfs_read_data, list);
1494		list_del_init(&data->list);
1495
1496		trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
1497		if (trypnfs == PNFS_NOT_ATTEMPTED)
1498			pnfs_read_through_mds(desc, data);
1499	}
1500	put_lseg(lseg);
1501}
1502
1503static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1504{
1505	put_lseg(hdr->lseg);
1506	nfs_readhdr_free(hdr);
1507}
 
1508
1509int
1510pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1511{
1512	struct nfs_read_header *rhdr;
1513	struct nfs_pgio_header *hdr;
1514	int ret;
1515
1516	rhdr = nfs_readhdr_alloc();
1517	if (!rhdr) {
1518		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1519		ret = -ENOMEM;
1520		put_lseg(desc->pg_lseg);
1521		desc->pg_lseg = NULL;
1522		return ret;
1523	}
1524	hdr = &rhdr->header;
1525	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1526	hdr->lseg = get_lseg(desc->pg_lseg);
1527	atomic_inc(&hdr->refcnt);
1528	ret = nfs_generic_pagein(desc, hdr);
1529	if (ret != 0) {
1530		put_lseg(desc->pg_lseg);
1531		desc->pg_lseg = NULL;
1532	} else
1533		pnfs_do_multiple_reads(desc, &hdr->rpc_list);
1534	if (atomic_dec_and_test(&hdr->refcnt))
1535		hdr->completion_ops->completion(hdr);
1536	return ret;
1537}
1538EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
1539
 
 
 
 
 
 
 
 
 
1540/*
1541 * There can be multiple RW segments.
1542 */
1543static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
1544{
1545	struct pnfs_layout_segment *lseg;
1546
1547	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
1548		if (lseg->pls_range.iomode == IOMODE_RW &&
1549		    test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
1550			list_add(&lseg->pls_lc_list, listp);
1551	}
1552}
1553
 
 
 
 
 
 
 
 
 
 
 
 
 
1554void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
1555{
1556	if (lseg->pls_range.iomode == IOMODE_RW) {
1557		dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
1558		set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
1559	} else {
1560		dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
1561		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
1562	}
1563}
1564EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1565
1566void
1567pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1568{
1569	struct nfs_pgio_header *hdr = wdata->header;
1570	struct inode *inode = hdr->inode;
1571	struct nfs_inode *nfsi = NFS_I(inode);
1572	loff_t end_pos = wdata->mds_offset + wdata->res.count;
1573	bool mark_as_dirty = false;
1574
1575	spin_lock(&inode->i_lock);
1576	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1577		mark_as_dirty = true;
1578		dprintk("%s: Set layoutcommit for inode %lu ",
1579			__func__, inode->i_ino);
1580	}
1581	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
1582		/* references matched in nfs4_layoutcommit_release */
1583		get_lseg(hdr->lseg);
1584	}
1585	if (end_pos > nfsi->layout->plh_lwb)
1586		nfsi->layout->plh_lwb = end_pos;
1587	spin_unlock(&inode->i_lock);
1588	dprintk("%s: lseg %p end_pos %llu\n",
1589		__func__, hdr->lseg, nfsi->layout->plh_lwb);
1590
1591	/* if pnfs_layoutcommit_inode() runs between inode locks, the next one
1592	 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
1593	if (mark_as_dirty)
1594		mark_inode_dirty_sync(inode);
1595}
1596EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
1597
1598void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
1599{
1600	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
1601
1602	if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
1603		nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
 
1604}
1605
1606/*
1607 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
1608 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
1609 * data to disk to allow the server to recover the data if it crashes.
1610 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
1611 * is off, and a COMMIT is sent to a data server, or
1612 * if WRITEs to a data server return NFS_DATA_SYNC.
1613 */
1614int
1615pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1616{
1617	struct nfs4_layoutcommit_data *data;
1618	struct nfs_inode *nfsi = NFS_I(inode);
1619	loff_t end_pos;
1620	int status = 0;
1621
1622	dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
1623
1624	if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
1625		return 0;
1626
1627	/* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
1628	data = kzalloc(sizeof(*data), GFP_NOFS);
1629	if (!data) {
1630		status = -ENOMEM;
1631		goto out;
1632	}
1633
1634	if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
1635		goto out_free;
1636
 
1637	if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
1638		if (!sync) {
1639			status = -EAGAIN;
1640			goto out_free;
1641		}
1642		status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING,
1643					nfs_wait_bit_killable, TASK_KILLABLE);
1644		if (status)
1645			goto out_free;
1646	}
1647
1648	INIT_LIST_HEAD(&data->lseg_list);
 
 
 
 
 
 
1649	spin_lock(&inode->i_lock);
1650	if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1651		clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags);
1652		spin_unlock(&inode->i_lock);
1653		wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING);
1654		goto out_free;
1655	}
1656
 
1657	pnfs_list_write_lseg(inode, &data->lseg_list);
1658
1659	end_pos = nfsi->layout->plh_lwb;
1660	nfsi->layout->plh_lwb = 0;
1661
1662	nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid);
1663	spin_unlock(&inode->i_lock);
1664
1665	data->args.inode = inode;
1666	data->cred = get_rpccred(nfsi->layout->plh_lc_cred);
1667	nfs_fattr_init(&data->fattr);
1668	data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
1669	data->res.fattr = &data->fattr;
1670	data->args.lastbytewritten = end_pos - 1;
1671	data->res.server = NFS_SERVER(inode);
1672
1673	status = nfs4_proc_layoutcommit(data, sync);
1674out:
1675	if (status)
1676		mark_inode_dirty_sync(inode);
1677	dprintk("<-- %s status %d\n", __func__, status);
1678	return status;
1679out_free:
 
1680	kfree(data);
 
 
1681	goto out;
1682}
1683
1684struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
1685{
1686	struct nfs4_threshold *thp;
1687
1688	thp = kzalloc(sizeof(*thp), GFP_NOFS);
1689	if (!thp) {
1690		dprintk("%s mdsthreshold allocation failed\n", __func__);
1691		return NULL;
1692	}
1693	return thp;
1694}