Linux Audio

Check our new training course

Real-Time Linux with PREEMPT_RT training

Feb 18-20, 2025
Register
Loading...
v6.8
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Common NFS I/O  operations for the pnfs file based
   4 * layout drivers.
   5 *
   6 * Copyright (c) 2014, Primary Data, Inc. All rights reserved.
   7 *
   8 * Tom Haynes <loghyr@primarydata.com>
   9 */
  10
  11#include <linux/nfs_fs.h>
  12#include <linux/nfs_page.h>
  13#include <linux/sunrpc/addr.h>
  14#include <linux/module.h>
  15
  16#include "nfs4session.h"
  17#include "internal.h"
  18#include "pnfs.h"
  19
  20#define NFSDBG_FACILITY		NFSDBG_PNFS
  21
  22void pnfs_generic_rw_release(void *data)
  23{
  24	struct nfs_pgio_header *hdr = data;
  25
  26	nfs_put_client(hdr->ds_clp);
  27	hdr->mds_ops->rpc_release(data);
  28}
  29EXPORT_SYMBOL_GPL(pnfs_generic_rw_release);
  30
  31/* Fake up some data that will cause nfs_commit_release to retry the writes. */
  32void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data)
  33{
  34	struct nfs_writeverf *verf = data->res.verf;
  35
  36	data->task.tk_status = 0;
  37	memset(&verf->verifier, 0, sizeof(verf->verifier));
  38	verf->committed = NFS_UNSTABLE;
 
  39}
  40EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes);
  41
  42void pnfs_generic_write_commit_done(struct rpc_task *task, void *data)
  43{
  44	struct nfs_commit_data *wdata = data;
  45
  46	/* Note this may cause RPC to be resent */
  47	wdata->mds_ops->rpc_call_done(task, data);
  48}
  49EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done);
  50
  51void pnfs_generic_commit_release(void *calldata)
  52{
  53	struct nfs_commit_data *data = calldata;
  54
  55	data->completion_ops->completion(data);
  56	pnfs_put_lseg(data->lseg);
  57	nfs_put_client(data->ds_clp);
  58	nfs_commitdata_release(data);
  59}
  60EXPORT_SYMBOL_GPL(pnfs_generic_commit_release);
  61
  62static struct pnfs_layout_segment *
  63pnfs_free_bucket_lseg(struct pnfs_commit_bucket *bucket)
  64{
  65	if (list_empty(&bucket->committing) && list_empty(&bucket->written)) {
  66		struct pnfs_layout_segment *freeme = bucket->lseg;
  67		bucket->lseg = NULL;
  68		return freeme;
  69	}
  70	return NULL;
  71}
  72
  73/* The generic layer is about to remove the req from the commit list.
  74 * If this will make the bucket empty, it will need to put the lseg reference.
  75 * Note this must be called holding nfsi->commit_mutex
  76 */
  77void
  78pnfs_generic_clear_request_commit(struct nfs_page *req,
  79				  struct nfs_commit_info *cinfo)
  80{
  81	struct pnfs_commit_bucket *bucket = NULL;
  82
  83	if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
  84		goto out;
  85	cinfo->ds->nwritten--;
  86	if (list_is_singular(&req->wb_list))
 
 
  87		bucket = list_first_entry(&req->wb_list,
  88					  struct pnfs_commit_bucket, written);
 
 
 
 
  89out:
  90	nfs_request_remove_commit_list(req, cinfo);
  91	if (bucket)
  92		pnfs_put_lseg(pnfs_free_bucket_lseg(bucket));
  93}
  94EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit);
  95
  96struct pnfs_commit_array *
  97pnfs_alloc_commit_array(size_t n, gfp_t gfp_flags)
  98{
  99	struct pnfs_commit_array *p;
 100	struct pnfs_commit_bucket *b;
 101
 102	p = kmalloc(struct_size(p, buckets, n), gfp_flags);
 103	if (!p)
 104		return NULL;
 105	p->nbuckets = n;
 106	INIT_LIST_HEAD(&p->cinfo_list);
 107	INIT_LIST_HEAD(&p->lseg_list);
 108	p->lseg = NULL;
 109	for (b = &p->buckets[0]; n != 0; b++, n--) {
 110		INIT_LIST_HEAD(&b->written);
 111		INIT_LIST_HEAD(&b->committing);
 112		b->lseg = NULL;
 113		b->direct_verf.committed = NFS_INVALID_STABLE_HOW;
 114	}
 115	return p;
 116}
 117EXPORT_SYMBOL_GPL(pnfs_alloc_commit_array);
 118
 119void
 120pnfs_free_commit_array(struct pnfs_commit_array *p)
 121{
 122	kfree_rcu(p, rcu);
 123}
 124EXPORT_SYMBOL_GPL(pnfs_free_commit_array);
 125
 126static struct pnfs_commit_array *
 127pnfs_find_commit_array_by_lseg(struct pnfs_ds_commit_info *fl_cinfo,
 128		struct pnfs_layout_segment *lseg)
 129{
 130	struct pnfs_commit_array *array;
 131
 132	list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
 133		if (array->lseg == lseg)
 134			return array;
 135	}
 136	return NULL;
 137}
 138
 139struct pnfs_commit_array *
 140pnfs_add_commit_array(struct pnfs_ds_commit_info *fl_cinfo,
 141		struct pnfs_commit_array *new,
 142		struct pnfs_layout_segment *lseg)
 143{
 144	struct pnfs_commit_array *array;
 145
 146	array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
 147	if (array)
 148		return array;
 149	new->lseg = lseg;
 150	refcount_set(&new->refcount, 1);
 151	list_add_rcu(&new->cinfo_list, &fl_cinfo->commits);
 152	list_add(&new->lseg_list, &lseg->pls_commits);
 153	return new;
 154}
 155EXPORT_SYMBOL_GPL(pnfs_add_commit_array);
 156
 157static struct pnfs_commit_array *
 158pnfs_lookup_commit_array(struct pnfs_ds_commit_info *fl_cinfo,
 159		struct pnfs_layout_segment *lseg)
 160{
 161	struct pnfs_commit_array *array;
 162
 163	rcu_read_lock();
 164	array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
 165	if (!array) {
 166		rcu_read_unlock();
 167		fl_cinfo->ops->setup_ds_info(fl_cinfo, lseg);
 168		rcu_read_lock();
 169		array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
 170	}
 171	rcu_read_unlock();
 172	return array;
 173}
 174
 175static void
 176pnfs_release_commit_array_locked(struct pnfs_commit_array *array)
 177{
 178	list_del_rcu(&array->cinfo_list);
 179	list_del(&array->lseg_list);
 180	pnfs_free_commit_array(array);
 181}
 182
 183static void
 184pnfs_put_commit_array_locked(struct pnfs_commit_array *array)
 185{
 186	if (refcount_dec_and_test(&array->refcount))
 187		pnfs_release_commit_array_locked(array);
 188}
 189
 190static void
 191pnfs_put_commit_array(struct pnfs_commit_array *array, struct inode *inode)
 192{
 193	if (refcount_dec_and_lock(&array->refcount, &inode->i_lock)) {
 194		pnfs_release_commit_array_locked(array);
 195		spin_unlock(&inode->i_lock);
 196	}
 197}
 198
 199static struct pnfs_commit_array *
 200pnfs_get_commit_array(struct pnfs_commit_array *array)
 201{
 202	if (refcount_inc_not_zero(&array->refcount))
 203		return array;
 204	return NULL;
 205}
 206
 207static void
 208pnfs_remove_and_free_commit_array(struct pnfs_commit_array *array)
 209{
 210	array->lseg = NULL;
 211	list_del_init(&array->lseg_list);
 212	pnfs_put_commit_array_locked(array);
 213}
 214
 215void
 216pnfs_generic_ds_cinfo_release_lseg(struct pnfs_ds_commit_info *fl_cinfo,
 217		struct pnfs_layout_segment *lseg)
 218{
 219	struct pnfs_commit_array *array, *tmp;
 220
 221	list_for_each_entry_safe(array, tmp, &lseg->pls_commits, lseg_list)
 222		pnfs_remove_and_free_commit_array(array);
 223}
 224EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_release_lseg);
 225
 226void
 227pnfs_generic_ds_cinfo_destroy(struct pnfs_ds_commit_info *fl_cinfo)
 228{
 229	struct pnfs_commit_array *array, *tmp;
 230
 231	list_for_each_entry_safe(array, tmp, &fl_cinfo->commits, cinfo_list)
 232		pnfs_remove_and_free_commit_array(array);
 233}
 234EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_destroy);
 235
 236/*
 237 * Locks the nfs_page requests for commit and moves them to
 238 * @bucket->committing.
 239 */
 240static int
 241pnfs_bucket_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
 242				struct nfs_commit_info *cinfo,
 243				int max)
 244{
 245	struct list_head *src = &bucket->written;
 246	struct list_head *dst = &bucket->committing;
 247	int ret;
 248
 249	lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
 250	ret = nfs_scan_commit_list(src, dst, cinfo, max);
 251	if (ret) {
 252		cinfo->ds->nwritten -= ret;
 253		cinfo->ds->ncommitting += ret;
 
 
 
 
 
 
 254	}
 255	return ret;
 256}
 257
 258static int pnfs_bucket_scan_array(struct nfs_commit_info *cinfo,
 259				  struct pnfs_commit_bucket *buckets,
 260				  unsigned int nbuckets,
 261				  int max)
 262{
 263	unsigned int i;
 264	int rv = 0, cnt;
 265
 266	for (i = 0; i < nbuckets && max != 0; i++) {
 267		cnt = pnfs_bucket_scan_ds_commit_list(&buckets[i], cinfo, max);
 268		rv += cnt;
 269		max -= cnt;
 270	}
 271	return rv;
 272}
 273
 274/* Move reqs from written to committing lists, returning count
 275 * of number moved.
 276 */
 277int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max)
 
 278{
 279	struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
 280	struct pnfs_commit_array *array;
 281	int rv = 0, cnt;
 282
 283	rcu_read_lock();
 284	list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
 285		if (!array->lseg || !pnfs_get_commit_array(array))
 286			continue;
 287		rcu_read_unlock();
 288		cnt = pnfs_bucket_scan_array(cinfo, array->buckets,
 289				array->nbuckets, max);
 290		rcu_read_lock();
 291		pnfs_put_commit_array(array, cinfo->inode);
 292		rv += cnt;
 293		max -= cnt;
 294		if (!max)
 295			break;
 296	}
 297	rcu_read_unlock();
 298	return rv;
 299}
 300EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists);
 301
 302static unsigned int
 303pnfs_bucket_recover_commit_reqs(struct list_head *dst,
 304			        struct pnfs_commit_bucket *buckets,
 305				unsigned int nbuckets,
 306				struct nfs_commit_info *cinfo)
 307{
 308	struct pnfs_commit_bucket *b;
 309	struct pnfs_layout_segment *freeme;
 310	unsigned int nwritten, ret = 0;
 311	unsigned int i;
 312
 
 313restart:
 314	for (i = 0, b = buckets; i < nbuckets; i++, b++) {
 315		nwritten = nfs_scan_commit_list(&b->written, dst, cinfo, 0);
 316		if (!nwritten)
 317			continue;
 318		ret += nwritten;
 319		freeme = pnfs_free_bucket_lseg(b);
 320		if (freeme) {
 
 321			pnfs_put_lseg(freeme);
 322			goto restart;
 323		}
 324	}
 325	return ret;
 326}
 327
 328/* Pull everything off the committing lists and dump into @dst.  */
 329void pnfs_generic_recover_commit_reqs(struct list_head *dst,
 330				      struct nfs_commit_info *cinfo)
 331{
 332	struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
 333	struct pnfs_commit_array *array;
 334	unsigned int nwritten;
 335
 336	lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
 337	rcu_read_lock();
 338	list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
 339		if (!array->lseg || !pnfs_get_commit_array(array))
 340			continue;
 341		rcu_read_unlock();
 342		nwritten = pnfs_bucket_recover_commit_reqs(dst,
 343							   array->buckets,
 344							   array->nbuckets,
 345							   cinfo);
 346		rcu_read_lock();
 347		pnfs_put_commit_array(array, cinfo->inode);
 348		fl_cinfo->nwritten -= nwritten;
 349	}
 350	rcu_read_unlock();
 351}
 352EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
 353
 354static struct nfs_page *
 355pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets,
 356			       unsigned int nbuckets, struct folio *folio)
 357{
 358	struct nfs_page *req;
 359	struct pnfs_commit_bucket *b;
 360	unsigned int i;
 361
 362	/* Linearly search the commit lists for each bucket until a matching
 363	 * request is found */
 364	for (i = 0, b = buckets; i < nbuckets; i++, b++) {
 365		list_for_each_entry(req, &b->written, wb_list) {
 366			if (nfs_page_to_folio(req) == folio)
 367				return req->wb_head;
 368		}
 369		list_for_each_entry(req, &b->committing, wb_list) {
 370			if (nfs_page_to_folio(req) == folio)
 371				return req->wb_head;
 372		}
 373	}
 374	return NULL;
 375}
 376
 377/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request
 378 *				   for @folio
 379 * @cinfo - commit info for current inode
 380 * @folio - page to search for matching head request
 381 *
 382 * Return: the head request if one is found, otherwise %NULL.
 383 */
 384struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo,
 385						 struct folio *folio)
 386{
 387	struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
 388	struct pnfs_commit_array *array;
 389	struct nfs_page *req;
 390
 391	list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) {
 392		req = pnfs_bucket_search_commit_reqs(array->buckets,
 393						     array->nbuckets, folio);
 394		if (req)
 395			return req;
 396	}
 397	return NULL;
 398}
 399EXPORT_SYMBOL_GPL(pnfs_generic_search_commit_reqs);
 400
 401static struct pnfs_layout_segment *
 402pnfs_bucket_get_committing(struct list_head *head,
 403			   struct pnfs_commit_bucket *bucket,
 404			   struct nfs_commit_info *cinfo)
 405{
 406	struct pnfs_layout_segment *lseg;
 407	struct list_head *pos;
 408
 409	list_for_each(pos, &bucket->committing)
 410		cinfo->ds->ncommitting--;
 411	list_splice_init(&bucket->committing, head);
 412	lseg = pnfs_free_bucket_lseg(bucket);
 413	if (!lseg)
 414		lseg = pnfs_get_lseg(bucket->lseg);
 415	return lseg;
 416}
 417
 418static struct nfs_commit_data *
 419pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket,
 420			     struct nfs_commit_info *cinfo)
 421{
 422	struct nfs_commit_data *data = nfs_commitdata_alloc();
 423
 424	if (!data)
 425		return NULL;
 426	data->lseg = pnfs_bucket_get_committing(&data->pages, bucket, cinfo);
 427	return data;
 428}
 429
 430static void pnfs_generic_retry_commit(struct pnfs_commit_bucket *buckets,
 431				      unsigned int nbuckets,
 432				      struct nfs_commit_info *cinfo,
 433				      unsigned int idx)
 434{
 435	struct pnfs_commit_bucket *bucket;
 436	struct pnfs_layout_segment *freeme;
 
 437	LIST_HEAD(pages);
 
 438
 439	for (bucket = buckets; idx < nbuckets; bucket++, idx++) {
 
 
 440		if (list_empty(&bucket->committing))
 441			continue;
 442		mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
 443		freeme = pnfs_bucket_get_committing(&pages, bucket, cinfo);
 
 
 
 444		mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
 445		nfs_retry_commit(&pages, freeme, cinfo, idx);
 446		pnfs_put_lseg(freeme);
 
 447	}
 
 448}
 449
 450static unsigned int
 451pnfs_bucket_alloc_ds_commits(struct list_head *list,
 452			     struct pnfs_commit_bucket *buckets,
 453			     unsigned int nbuckets,
 454			     struct nfs_commit_info *cinfo)
 455{
 
 456	struct pnfs_commit_bucket *bucket;
 457	struct nfs_commit_data *data;
 458	unsigned int i;
 459	unsigned int nreq = 0;
 460
 461	for (i = 0, bucket = buckets; i < nbuckets; i++, bucket++) {
 
 
 462		if (list_empty(&bucket->committing))
 463			continue;
 464		mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
 465		if (!list_empty(&bucket->committing)) {
 466			data = pnfs_bucket_fetch_commitdata(bucket, cinfo);
 467			if (!data)
 468				goto out_error;
 469			data->ds_commit_index = i;
 470			list_add_tail(&data->list, list);
 471			nreq++;
 472		}
 473		mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
 474	}
 475	return nreq;
 476out_error:
 477	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
 478	/* Clean up on error */
 479	pnfs_generic_retry_commit(buckets, nbuckets, cinfo, i);
 480	return nreq;
 481}
 482
 483static unsigned int
 484pnfs_alloc_ds_commits_list(struct list_head *list,
 485			   struct pnfs_ds_commit_info *fl_cinfo,
 486			   struct nfs_commit_info *cinfo)
 487{
 488	struct pnfs_commit_array *array;
 489	unsigned int ret = 0;
 490
 491	rcu_read_lock();
 492	list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
 493		if (!array->lseg || !pnfs_get_commit_array(array))
 494			continue;
 495		rcu_read_unlock();
 496		ret += pnfs_bucket_alloc_ds_commits(list, array->buckets,
 497				array->nbuckets, cinfo);
 498		rcu_read_lock();
 499		pnfs_put_commit_array(array, cinfo->inode);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 500	}
 501	rcu_read_unlock();
 502	return ret;
 503}
 504
 505/* This follows nfs_commit_list pretty closely */
 506int
 507pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 508			     int how, struct nfs_commit_info *cinfo,
 509			     int (*initiate_commit)(struct nfs_commit_data *data,
 510						    int how))
 511{
 512	struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
 513	struct nfs_commit_data *data, *tmp;
 514	LIST_HEAD(list);
 515	unsigned int nreq = 0;
 516
 517	if (!list_empty(mds_pages)) {
 518		data = nfs_commitdata_alloc();
 519		if (!data) {
 520			nfs_retry_commit(mds_pages, NULL, cinfo, -1);
 521			return -ENOMEM;
 522		}
 523		data->ds_commit_index = -1;
 524		list_splice_init(mds_pages, &data->pages);
 525		list_add_tail(&data->list, &list);
 526		nreq++;
 527	}
 528
 529	nreq += pnfs_alloc_ds_commits_list(&list, fl_cinfo, cinfo);
 
 530	if (nreq == 0)
 531		goto out;
 532
 533	list_for_each_entry_safe(data, tmp, &list, list) {
 534		list_del(&data->list);
 
 
 535		if (data->ds_commit_index < 0) {
 536			nfs_init_commit(data, NULL, NULL, cinfo);
 
 
 
 
 
 537			nfs_initiate_commit(NFS_CLIENT(inode), data,
 538					    NFS_PROTO(data->inode),
 539					    data->mds_ops, how,
 540					    RPC_TASK_CRED_NOREF);
 541		} else {
 542			nfs_init_commit(data, NULL, data->lseg, cinfo);
 
 
 
 
 
 
 
 
 
 543			initiate_commit(data, how);
 544		}
 545	}
 546out:
 547	return PNFS_ATTEMPTED;
 548}
 549EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist);
 550
 551/*
 552 * Data server cache
 553 *
 554 * Data servers can be mapped to different device ids.
 555 * nfs4_pnfs_ds reference counting
 556 *   - set to 1 on allocation
 557 *   - incremented when a device id maps a data server already in the cache.
 558 *   - decremented when deviceid is removed from the cache.
 559 */
 560static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
 561static LIST_HEAD(nfs4_data_server_cache);
 562
 563/* Debug routines */
 564static void
 565print_ds(struct nfs4_pnfs_ds *ds)
 566{
 567	if (ds == NULL) {
 568		printk(KERN_WARNING "%s NULL device\n", __func__);
 569		return;
 570	}
 571	printk(KERN_WARNING "        ds %s\n"
 572		"        ref count %d\n"
 573		"        client %p\n"
 574		"        cl_exchange_flags %x\n",
 575		ds->ds_remotestr,
 576		refcount_read(&ds->ds_count), ds->ds_clp,
 577		ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
 578}
 579
 580static bool
 581same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
 582{
 583	struct sockaddr_in *a, *b;
 584	struct sockaddr_in6 *a6, *b6;
 585
 586	if (addr1->sa_family != addr2->sa_family)
 587		return false;
 588
 589	switch (addr1->sa_family) {
 590	case AF_INET:
 591		a = (struct sockaddr_in *)addr1;
 592		b = (struct sockaddr_in *)addr2;
 593
 594		if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
 595		    a->sin_port == b->sin_port)
 596			return true;
 597		break;
 598
 599	case AF_INET6:
 600		a6 = (struct sockaddr_in6 *)addr1;
 601		b6 = (struct sockaddr_in6 *)addr2;
 602
 603		/* LINKLOCAL addresses must have matching scope_id */
 604		if (ipv6_addr_src_scope(&a6->sin6_addr) ==
 605		    IPV6_ADDR_SCOPE_LINKLOCAL &&
 606		    a6->sin6_scope_id != b6->sin6_scope_id)
 607			return false;
 608
 609		if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
 610		    a6->sin6_port == b6->sin6_port)
 611			return true;
 612		break;
 613
 614	default:
 615		dprintk("%s: unhandled address family: %u\n",
 616			__func__, addr1->sa_family);
 617		return false;
 618	}
 619
 620	return false;
 621}
 622
 623/*
 624 * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does,
 625 * declare a match.
 626 */
 627static bool
 628_same_data_server_addrs_locked(const struct list_head *dsaddrs1,
 629			       const struct list_head *dsaddrs2)
 630{
 631	struct nfs4_pnfs_ds_addr *da1, *da2;
 632	struct sockaddr *sa1, *sa2;
 633	bool match = false;
 634
 635	list_for_each_entry(da1, dsaddrs1, da_node) {
 636		sa1 = (struct sockaddr *)&da1->da_addr;
 637		match = false;
 638		list_for_each_entry(da2, dsaddrs2, da_node) {
 639			sa2 = (struct sockaddr *)&da2->da_addr;
 640			match = same_sockaddr(sa1, sa2);
 641			if (match)
 642				break;
 643		}
 644		if (!match)
 645			break;
 646	}
 647	return match;
 648}
 649
 650/*
 651 * Lookup DS by addresses.  nfs4_ds_cache_lock is held
 652 */
 653static struct nfs4_pnfs_ds *
 654_data_server_lookup_locked(const struct list_head *dsaddrs)
 655{
 656	struct nfs4_pnfs_ds *ds;
 657
 658	list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
 659		if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
 660			return ds;
 661	return NULL;
 662}
 663
 664static struct nfs4_pnfs_ds_addr *nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags)
 665{
 666	struct nfs4_pnfs_ds_addr *da = kzalloc(sizeof(*da), gfp_flags);
 667	if (da)
 668		INIT_LIST_HEAD(&da->da_node);
 669	return da;
 670}
 671
 672static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr *da)
 673{
 674	kfree(da->da_remotestr);
 675	kfree(da->da_netid);
 676	kfree(da);
 677}
 678
 679static void destroy_ds(struct nfs4_pnfs_ds *ds)
 680{
 681	struct nfs4_pnfs_ds_addr *da;
 682
 683	dprintk("--> %s\n", __func__);
 684	ifdebug(FACILITY)
 685		print_ds(ds);
 686
 687	nfs_put_client(ds->ds_clp);
 688
 689	while (!list_empty(&ds->ds_addrs)) {
 690		da = list_first_entry(&ds->ds_addrs,
 691				      struct nfs4_pnfs_ds_addr,
 692				      da_node);
 693		list_del_init(&da->da_node);
 694		nfs4_pnfs_ds_addr_free(da);
 
 695	}
 696
 697	kfree(ds->ds_remotestr);
 698	kfree(ds);
 699}
 700
 701void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds)
 702{
 703	if (refcount_dec_and_lock(&ds->ds_count,
 704				&nfs4_ds_cache_lock)) {
 705		list_del_init(&ds->ds_node);
 706		spin_unlock(&nfs4_ds_cache_lock);
 707		destroy_ds(ds);
 708	}
 709}
 710EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put);
 711
 712/*
 713 * Create a string with a human readable address and port to avoid
 714 * complicated setup around many dprinks.
 715 */
 716static char *
 717nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
 718{
 719	struct nfs4_pnfs_ds_addr *da;
 720	char *remotestr;
 721	size_t len;
 722	char *p;
 723
 724	len = 3;        /* '{', '}' and eol */
 725	list_for_each_entry(da, dsaddrs, da_node) {
 726		len += strlen(da->da_remotestr) + 1;    /* string plus comma */
 727	}
 728
 729	remotestr = kzalloc(len, gfp_flags);
 730	if (!remotestr)
 731		return NULL;
 732
 733	p = remotestr;
 734	*(p++) = '{';
 735	len--;
 736	list_for_each_entry(da, dsaddrs, da_node) {
 737		size_t ll = strlen(da->da_remotestr);
 738
 739		if (ll > len)
 740			goto out_err;
 741
 742		memcpy(p, da->da_remotestr, ll);
 743		p += ll;
 744		len -= ll;
 745
 746		if (len < 1)
 747			goto out_err;
 748		(*p++) = ',';
 749		len--;
 750	}
 751	if (len < 2)
 752		goto out_err;
 753	*(p++) = '}';
 754	*p = '\0';
 755	return remotestr;
 756out_err:
 757	kfree(remotestr);
 758	return NULL;
 759}
 760
 761/*
 762 * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if
 763 * uncached and return cached struct nfs4_pnfs_ds.
 764 */
 765struct nfs4_pnfs_ds *
 766nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
 767{
 768	struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
 769	char *remotestr;
 770
 771	if (list_empty(dsaddrs)) {
 772		dprintk("%s: no addresses defined\n", __func__);
 773		goto out;
 774	}
 775
 776	ds = kzalloc(sizeof(*ds), gfp_flags);
 777	if (!ds)
 778		goto out;
 779
 780	/* this is only used for debugging, so it's ok if its NULL */
 781	remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
 782
 783	spin_lock(&nfs4_ds_cache_lock);
 784	tmp_ds = _data_server_lookup_locked(dsaddrs);
 785	if (tmp_ds == NULL) {
 786		INIT_LIST_HEAD(&ds->ds_addrs);
 787		list_splice_init(dsaddrs, &ds->ds_addrs);
 788		ds->ds_remotestr = remotestr;
 789		refcount_set(&ds->ds_count, 1);
 790		INIT_LIST_HEAD(&ds->ds_node);
 791		ds->ds_clp = NULL;
 792		list_add(&ds->ds_node, &nfs4_data_server_cache);
 793		dprintk("%s add new data server %s\n", __func__,
 794			ds->ds_remotestr);
 795	} else {
 796		kfree(remotestr);
 797		kfree(ds);
 798		refcount_inc(&tmp_ds->ds_count);
 799		dprintk("%s data server %s found, inc'ed ds_count to %d\n",
 800			__func__, tmp_ds->ds_remotestr,
 801			refcount_read(&tmp_ds->ds_count));
 802		ds = tmp_ds;
 803	}
 804	spin_unlock(&nfs4_ds_cache_lock);
 805out:
 806	return ds;
 807}
 808EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add);
 809
 810static int nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
 811{
 812	might_sleep();
 813	return wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, TASK_KILLABLE);
 
 814}
 815
 816static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
 817{
 818	smp_mb__before_atomic();
 819	clear_and_wake_up_bit(NFS4DS_CONNECTING, &ds->ds_state);
 
 
 820}
 821
 822static struct nfs_client *(*get_v3_ds_connect)(
 823			struct nfs_server *mds_srv,
 824			const struct sockaddr_storage *ds_addr,
 825			int ds_addrlen,
 826			int ds_proto,
 827			unsigned int ds_timeo,
 828			unsigned int ds_retrans);
 829
 830static bool load_v3_ds_connect(void)
 831{
 832	if (!get_v3_ds_connect) {
 833		get_v3_ds_connect = symbol_request(nfs3_set_ds_client);
 834		WARN_ON_ONCE(!get_v3_ds_connect);
 835	}
 836
 837	return(get_v3_ds_connect != NULL);
 838}
 839
 840void nfs4_pnfs_v3_ds_connect_unload(void)
 841{
 842	if (get_v3_ds_connect) {
 843		symbol_put(nfs3_set_ds_client);
 844		get_v3_ds_connect = NULL;
 845	}
 846}
 847
 848static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
 849				 struct nfs4_pnfs_ds *ds,
 850				 unsigned int timeo,
 851				 unsigned int retrans)
 852{
 853	struct nfs_client *clp = ERR_PTR(-EIO);
 854	struct nfs4_pnfs_ds_addr *da;
 855	unsigned long connect_timeout = timeo * (retrans + 1) * HZ / 10;
 856	int status = 0;
 857
 858	dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
 859
 860	if (!load_v3_ds_connect())
 861		return -EPROTONOSUPPORT;
 862
 863	list_for_each_entry(da, &ds->ds_addrs, da_node) {
 864		dprintk("%s: DS %s: trying address %s\n",
 865			__func__, ds->ds_remotestr, da->da_remotestr);
 866
 867		if (!IS_ERR(clp)) {
 868			struct xprt_create xprt_args = {
 869				.ident = da->da_transport,
 870				.net = clp->cl_net,
 871				.dstaddr = (struct sockaddr *)&da->da_addr,
 872				.addrlen = da->da_addrlen,
 873				.servername = clp->cl_hostname,
 874				.connect_timeout = connect_timeout,
 875				.reconnect_timeout = connect_timeout,
 876			};
 877
 878			if (da->da_transport != clp->cl_proto)
 879				continue;
 880			if (da->da_addr.ss_family != clp->cl_addr.ss_family)
 881				continue;
 882			/* Add this address as an alias */
 883			rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
 884					rpc_clnt_test_and_add_xprt, NULL);
 885			continue;
 886		}
 887		clp = get_v3_ds_connect(mds_srv,
 888				&da->da_addr,
 889				da->da_addrlen, da->da_transport,
 890				timeo, retrans);
 891		if (IS_ERR(clp))
 892			continue;
 893		clp->cl_rpcclient->cl_softerr = 0;
 894		clp->cl_rpcclient->cl_softrtry = 0;
 895	}
 896
 897	if (IS_ERR(clp)) {
 898		status = PTR_ERR(clp);
 899		goto out;
 900	}
 901
 902	smp_wmb();
 903	WRITE_ONCE(ds->ds_clp, clp);
 904	dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
 905out:
 906	return status;
 907}
 908
 909static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
 910				 struct nfs4_pnfs_ds *ds,
 911				 unsigned int timeo,
 912				 unsigned int retrans,
 913				 u32 minor_version)
 914{
 915	struct nfs_client *clp = ERR_PTR(-EIO);
 916	struct nfs4_pnfs_ds_addr *da;
 917	int status = 0;
 918
 919	dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
 920
 921	list_for_each_entry(da, &ds->ds_addrs, da_node) {
 922		dprintk("%s: DS %s: trying address %s\n",
 923			__func__, ds->ds_remotestr, da->da_remotestr);
 924
 925		if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) {
 926			struct xprt_create xprt_args = {
 927				.ident = da->da_transport,
 928				.net = clp->cl_net,
 929				.dstaddr = (struct sockaddr *)&da->da_addr,
 930				.addrlen = da->da_addrlen,
 931				.servername = clp->cl_hostname,
 932			};
 933			struct nfs4_add_xprt_data xprtdata = {
 934				.clp = clp,
 
 935			};
 936			struct rpc_add_xprt_test rpcdata = {
 937				.add_xprt_test = clp->cl_mvops->session_trunk,
 938				.data = &xprtdata,
 939			};
 940
 941			if (da->da_transport != clp->cl_proto)
 942				continue;
 943			if (da->da_addr.ss_family != clp->cl_addr.ss_family)
 944				continue;
 945			/**
 946			* Test this address for session trunking and
 947			* add as an alias
 948			*/
 949			xprtdata.cred = nfs4_get_clid_cred(clp);
 950			rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
 951					  rpc_clnt_setup_test_and_add_xprt,
 952					  &rpcdata);
 953			if (xprtdata.cred)
 954				put_cred(xprtdata.cred);
 955		} else {
 956			clp = nfs4_set_ds_client(mds_srv,
 957						&da->da_addr,
 958						da->da_addrlen,
 959						da->da_transport, timeo,
 960						retrans, minor_version);
 961			if (IS_ERR(clp))
 962				continue;
 963
 964			status = nfs4_init_ds_session(clp,
 965					mds_srv->nfs_client->cl_lease_time);
 966			if (status) {
 967				nfs_put_client(clp);
 968				clp = ERR_PTR(-EIO);
 969				continue;
 970			}
 971
 972		}
 973	}
 974
 975	if (IS_ERR(clp)) {
 976		status = PTR_ERR(clp);
 977		goto out;
 978	}
 979
 980	smp_wmb();
 981	WRITE_ONCE(ds->ds_clp, clp);
 982	dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
 983out:
 984	return status;
 985}
 986
 987/*
 988 * Create an rpc connection to the nfs4_pnfs_ds data server.
 989 * Currently only supports IPv4 and IPv6 addresses.
 990 * If connection fails, make devid unavailable and return a -errno.
 991 */
 992int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 993			  struct nfs4_deviceid_node *devid, unsigned int timeo,
 994			  unsigned int retrans, u32 version, u32 minor_version)
 995{
 996	int err;
 997
 998	do {
 999		err = nfs4_wait_ds_connect(ds);
1000		if (err || ds->ds_clp)
1001			goto out;
1002		if (nfs4_test_deviceid_unavailable(devid))
1003			return -ENODEV;
1004	} while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0);
1005
1006	if (ds->ds_clp)
1007		goto connect_done;
1008
1009	switch (version) {
1010	case 3:
1011		err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, retrans);
1012		break;
1013	case 4:
1014		err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, retrans,
1015					       minor_version);
1016		break;
1017	default:
1018		dprintk("%s: unsupported DS version %d\n", __func__, version);
1019		err = -EPROTONOSUPPORT;
1020	}
1021
1022connect_done:
1023	nfs4_clear_ds_conn_bit(ds);
1024out:
1025	/*
1026	 * At this point the ds->ds_clp should be ready, but it might have
1027	 * hit an error.
1028	 */
1029	if (!err) {
1030		if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
1031			WARN_ON_ONCE(ds->ds_clp ||
1032				!nfs4_test_deviceid_unavailable(devid));
1033			return -EINVAL;
1034		}
1035		err = nfs_client_init_status(ds->ds_clp);
1036	}
1037
1038	return err;
1039}
1040EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
1041
1042/*
1043 * Currently only supports ipv4, ipv6 and one multi-path address.
1044 */
1045struct nfs4_pnfs_ds_addr *
1046nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
1047{
1048	struct nfs4_pnfs_ds_addr *da = NULL;
1049	char *buf, *portstr;
1050	__be16 port;
1051	ssize_t nlen, rlen;
1052	int tmp[2];
1053	char *netid;
1054	size_t len;
 
1055	char *startsep = "";
1056	char *endsep = "";
1057
1058
1059	/* r_netid */
1060	nlen = xdr_stream_decode_string_dup(xdr, &netid, XDR_MAX_NETOBJ,
1061					    gfp_flags);
1062	if (unlikely(nlen < 0))
1063		goto out_err;
 
 
 
 
 
 
 
 
 
 
 
 
1064
1065	/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
 
 
 
 
 
 
 
 
 
1066	/* port is ".ABC.DEF", 8 chars max */
1067	rlen = xdr_stream_decode_string_dup(xdr, &buf, INET6_ADDRSTRLEN +
1068					    IPV6_SCOPE_ID_LEN + 8, gfp_flags);
1069	if (unlikely(rlen < 0))
 
 
 
 
 
1070		goto out_free_netid;
 
 
 
1071
1072	/* replace port '.' with '-' */
1073	portstr = strrchr(buf, '.');
1074	if (!portstr) {
1075		dprintk("%s: Failed finding expected dot in port\n",
1076			__func__);
1077		goto out_free_buf;
1078	}
1079	*portstr = '-';
1080
1081	/* find '.' between address and port */
1082	portstr = strrchr(buf, '.');
1083	if (!portstr) {
1084		dprintk("%s: Failed finding expected dot between address and "
1085			"port\n", __func__);
1086		goto out_free_buf;
1087	}
1088	*portstr = '\0';
1089
1090	da = nfs4_pnfs_ds_addr_alloc(gfp_flags);
1091	if (unlikely(!da))
1092		goto out_free_buf;
1093
 
 
1094	if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
1095		      sizeof(da->da_addr))) {
1096		dprintk("%s: error parsing address %s\n", __func__, buf);
1097		goto out_free_da;
1098	}
1099
1100	portstr++;
1101	sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
1102	port = htons((tmp[0] << 8) | (tmp[1]));
1103
1104	switch (da->da_addr.ss_family) {
1105	case AF_INET:
1106		((struct sockaddr_in *)&da->da_addr)->sin_port = port;
1107		da->da_addrlen = sizeof(struct sockaddr_in);
 
 
1108		break;
1109
1110	case AF_INET6:
1111		((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
1112		da->da_addrlen = sizeof(struct sockaddr_in6);
 
 
1113		startsep = "[";
1114		endsep = "]";
1115		break;
1116
1117	default:
1118		dprintk("%s: unsupported address family: %u\n",
1119			__func__, da->da_addr.ss_family);
1120		goto out_free_da;
1121	}
1122
1123	da->da_transport = xprt_find_transport_ident(netid);
1124	if (da->da_transport < 0) {
1125		dprintk("%s: ERROR: unknown r_netid \"%s\"\n",
1126			__func__, netid);
1127		goto out_free_da;
1128	}
1129
1130	da->da_netid = netid;
1131
1132	/* save human readable address */
1133	len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
1134	da->da_remotestr = kzalloc(len, gfp_flags);
1135
1136	/* NULL is ok, only used for dprintk */
1137	if (da->da_remotestr)
1138		snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
1139			 buf, endsep, ntohs(port));
1140
1141	dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
1142	kfree(buf);
 
1143	return da;
1144
1145out_free_da:
1146	kfree(da);
1147out_free_buf:
1148	dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
1149	kfree(buf);
1150out_free_netid:
1151	kfree(netid);
1152out_err:
1153	return NULL;
1154}
1155EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr);
1156
1157void
1158pnfs_layout_mark_request_commit(struct nfs_page *req,
1159				struct pnfs_layout_segment *lseg,
1160				struct nfs_commit_info *cinfo,
1161				u32 ds_commit_idx)
1162{
1163	struct list_head *list;
1164	struct pnfs_commit_array *array;
1165	struct pnfs_commit_bucket *bucket;
1166
1167	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
1168	array = pnfs_lookup_commit_array(cinfo->ds, lseg);
1169	if (!array || !pnfs_is_valid_lseg(lseg))
1170		goto out_resched;
1171	bucket = &array->buckets[ds_commit_idx];
1172	list = &bucket->written;
1173	/* Non-empty buckets hold a reference on the lseg.  That ref
1174	 * is normally transferred to the COMMIT call and released
1175	 * there.  It could also be released if the last req is pulled
1176	 * off due to a rewrite, in which case it will be done in
1177	 * pnfs_common_clear_request_commit
1178	 */
1179	if (!bucket->lseg)
1180		bucket->lseg = pnfs_get_lseg(lseg);
 
 
 
 
1181	set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1182	cinfo->ds->nwritten++;
1183
1184	nfs_request_add_commit_list_locked(req, list, cinfo);
1185	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
1186	nfs_folio_mark_unstable(nfs_page_to_folio(req), cinfo);
1187	return;
1188out_resched:
1189	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
1190	cinfo->completion_ops->resched_write(cinfo, req);
1191}
1192EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
1193
1194int
1195pnfs_nfs_generic_sync(struct inode *inode, bool datasync)
1196{
1197	int ret;
1198
1199	if (!pnfs_layoutcommit_outstanding(inode))
1200		return 0;
1201	ret = nfs_commit_inode(inode, FLUSH_SYNC);
1202	if (ret < 0)
1203		return ret;
1204	if (datasync)
1205		return 0;
1206	return pnfs_layoutcommit_inode(inode, true);
1207}
1208EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync);
1209
v5.4
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Common NFS I/O  operations for the pnfs file based
  4 * layout drivers.
  5 *
  6 * Copyright (c) 2014, Primary Data, Inc. All rights reserved.
  7 *
  8 * Tom Haynes <loghyr@primarydata.com>
  9 */
 10
 11#include <linux/nfs_fs.h>
 12#include <linux/nfs_page.h>
 13#include <linux/sunrpc/addr.h>
 14#include <linux/module.h>
 15
 16#include "nfs4session.h"
 17#include "internal.h"
 18#include "pnfs.h"
 19
 20#define NFSDBG_FACILITY		NFSDBG_PNFS
 21
 22void pnfs_generic_rw_release(void *data)
 23{
 24	struct nfs_pgio_header *hdr = data;
 25
 26	nfs_put_client(hdr->ds_clp);
 27	hdr->mds_ops->rpc_release(data);
 28}
 29EXPORT_SYMBOL_GPL(pnfs_generic_rw_release);
 30
 31/* Fake up some data that will cause nfs_commit_release to retry the writes. */
 32void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data)
 33{
 34	struct nfs_page *first = nfs_list_entry(data->pages.next);
 35
 36	data->task.tk_status = 0;
 37	memcpy(&data->verf.verifier, &first->wb_verf,
 38	       sizeof(data->verf.verifier));
 39	data->verf.verifier.data[0]++; /* ensure verifier mismatch */
 40}
 41EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes);
 42
 43void pnfs_generic_write_commit_done(struct rpc_task *task, void *data)
 44{
 45	struct nfs_commit_data *wdata = data;
 46
 47	/* Note this may cause RPC to be resent */
 48	wdata->mds_ops->rpc_call_done(task, data);
 49}
 50EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done);
 51
 52void pnfs_generic_commit_release(void *calldata)
 53{
 54	struct nfs_commit_data *data = calldata;
 55
 56	data->completion_ops->completion(data);
 57	pnfs_put_lseg(data->lseg);
 58	nfs_put_client(data->ds_clp);
 59	nfs_commitdata_release(data);
 60}
 61EXPORT_SYMBOL_GPL(pnfs_generic_commit_release);
 62
 
 
 
 
 
 
 
 
 
 
 
 63/* The generic layer is about to remove the req from the commit list.
 64 * If this will make the bucket empty, it will need to put the lseg reference.
 65 * Note this must be called holding nfsi->commit_mutex
 66 */
 67void
 68pnfs_generic_clear_request_commit(struct nfs_page *req,
 69				  struct nfs_commit_info *cinfo)
 70{
 71	struct pnfs_layout_segment *freeme = NULL;
 72
 73	if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
 74		goto out;
 75	cinfo->ds->nwritten--;
 76	if (list_is_singular(&req->wb_list)) {
 77		struct pnfs_commit_bucket *bucket;
 78
 79		bucket = list_first_entry(&req->wb_list,
 80					  struct pnfs_commit_bucket,
 81					  written);
 82		freeme = bucket->wlseg;
 83		bucket->wlseg = NULL;
 84	}
 85out:
 86	nfs_request_remove_commit_list(req, cinfo);
 87	pnfs_put_lseg(freeme);
 
 88}
 89EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit);
 90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 91static int
 92pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
 93				 struct nfs_commit_info *cinfo,
 94				 int max)
 95{
 96	struct list_head *src = &bucket->written;
 97	struct list_head *dst = &bucket->committing;
 98	int ret;
 99
100	lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
101	ret = nfs_scan_commit_list(src, dst, cinfo, max);
102	if (ret) {
103		cinfo->ds->nwritten -= ret;
104		cinfo->ds->ncommitting += ret;
105		if (bucket->clseg == NULL)
106			bucket->clseg = pnfs_get_lseg(bucket->wlseg);
107		if (list_empty(src)) {
108			pnfs_put_lseg(bucket->wlseg);
109			bucket->wlseg = NULL;
110		}
111	}
112	return ret;
113}
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115/* Move reqs from written to committing lists, returning count
116 * of number moved.
117 */
118int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo,
119				   int max)
120{
121	int i, rv = 0, cnt;
 
 
122
123	lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
124	for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
125		cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i],
126						       cinfo, max);
 
 
 
 
 
 
127		max -= cnt;
128		rv += cnt;
 
129	}
 
130	return rv;
131}
132EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists);
133
134/* Pull everything off the committing lists and dump into @dst.  */
135void pnfs_generic_recover_commit_reqs(struct list_head *dst,
136				      struct nfs_commit_info *cinfo)
 
 
137{
138	struct pnfs_commit_bucket *b;
139	struct pnfs_layout_segment *freeme;
140	int nwritten;
141	int i;
142
143	lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
144restart:
145	for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
146		nwritten = nfs_scan_commit_list(&b->written, dst, cinfo, 0);
147		if (!nwritten)
148			continue;
149		cinfo->ds->nwritten -= nwritten;
150		if (list_empty(&b->written)) {
151			freeme = b->wlseg;
152			b->wlseg = NULL;
153			pnfs_put_lseg(freeme);
154			goto restart;
155		}
156	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157}
158EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
159
160static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161{
162	struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163	struct pnfs_commit_bucket *bucket;
164	struct pnfs_layout_segment *freeme;
165	struct list_head *pos;
166	LIST_HEAD(pages);
167	int i;
168
169	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
170	for (i = idx; i < fl_cinfo->nbuckets; i++) {
171		bucket = &fl_cinfo->buckets[i];
172		if (list_empty(&bucket->committing))
173			continue;
174		freeme = bucket->clseg;
175		bucket->clseg = NULL;
176		list_for_each(pos, &bucket->committing)
177			cinfo->ds->ncommitting--;
178		list_splice_init(&bucket->committing, &pages);
179		mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
180		nfs_retry_commit(&pages, freeme, cinfo, i);
181		pnfs_put_lseg(freeme);
182		mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
183	}
184	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
185}
186
187static unsigned int
188pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo,
189			      struct list_head *list)
 
 
190{
191	struct pnfs_ds_commit_info *fl_cinfo;
192	struct pnfs_commit_bucket *bucket;
193	struct nfs_commit_data *data;
194	int i;
195	unsigned int nreq = 0;
196
197	fl_cinfo = cinfo->ds;
198	bucket = fl_cinfo->buckets;
199	for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
200		if (list_empty(&bucket->committing))
201			continue;
202		data = nfs_commitdata_alloc(false);
203		if (!data)
204			break;
205		data->ds_commit_index = i;
206		list_add(&data->pages, list);
207		nreq++;
 
 
 
 
208	}
209
 
 
210	/* Clean up on error */
211	pnfs_generic_retry_commit(cinfo, i);
212	return nreq;
213}
214
215static inline
216void pnfs_fetch_commit_bucket_list(struct list_head *pages,
217		struct nfs_commit_data *data,
218		struct nfs_commit_info *cinfo)
219{
220	struct pnfs_commit_bucket *bucket;
221	struct list_head *pos;
222
223	bucket = &cinfo->ds->buckets[data->ds_commit_index];
224	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
225	list_for_each(pos, &bucket->committing)
226		cinfo->ds->ncommitting--;
227	list_splice_init(&bucket->committing, pages);
228	data->lseg = bucket->clseg;
229	bucket->clseg = NULL;
230	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
231
232}
233
234/* Helper function for pnfs_generic_commit_pagelist to catch an empty
235 * page list. This can happen when two commits race.
236 *
237 * This must be called instead of nfs_init_commit - call one or the other, but
238 * not both!
239 */
240static bool
241pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
242					  struct nfs_commit_data *data,
243					  struct nfs_commit_info *cinfo)
244{
245	if (list_empty(pages)) {
246		if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
247			wake_up_var(&cinfo->mds->rpcs_out);
248		/* don't call nfs_commitdata_release - it tries to put
249		 * the open_context which is not acquired until nfs_init_commit
250		 * which has not been called on @data */
251		WARN_ON_ONCE(data->context);
252		nfs_commit_free(data);
253		return true;
254	}
255
256	return false;
257}
258
259/* This follows nfs_commit_list pretty closely */
260int
261pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
262			     int how, struct nfs_commit_info *cinfo,
263			     int (*initiate_commit)(struct nfs_commit_data *data,
264						    int how))
265{
 
266	struct nfs_commit_data *data, *tmp;
267	LIST_HEAD(list);
268	unsigned int nreq = 0;
269
270	if (!list_empty(mds_pages)) {
271		data = nfs_commitdata_alloc(true);
 
 
 
 
272		data->ds_commit_index = -1;
273		list_add(&data->pages, &list);
 
274		nreq++;
275	}
276
277	nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);
278
279	if (nreq == 0)
280		goto out;
281
282	atomic_add(nreq, &cinfo->mds->rpcs_out);
283
284	list_for_each_entry_safe(data, tmp, &list, pages) {
285		list_del_init(&data->pages);
286		if (data->ds_commit_index < 0) {
287			/* another commit raced with us */
288			if (pnfs_generic_commit_cancel_empty_pagelist(mds_pages,
289				data, cinfo))
290				continue;
291
292			nfs_init_commit(data, mds_pages, NULL, cinfo);
293			nfs_initiate_commit(NFS_CLIENT(inode), data,
294					    NFS_PROTO(data->inode),
295					    data->mds_ops, how, 0);
 
296		} else {
297			LIST_HEAD(pages);
298
299			pnfs_fetch_commit_bucket_list(&pages, data, cinfo);
300
301			/* another commit raced with us */
302			if (pnfs_generic_commit_cancel_empty_pagelist(&pages,
303				data, cinfo))
304				continue;
305
306			nfs_init_commit(data, &pages, data->lseg, cinfo);
307			initiate_commit(data, how);
308		}
309	}
310out:
311	return PNFS_ATTEMPTED;
312}
313EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist);
314
315/*
316 * Data server cache
317 *
318 * Data servers can be mapped to different device ids.
319 * nfs4_pnfs_ds reference counting
320 *   - set to 1 on allocation
321 *   - incremented when a device id maps a data server already in the cache.
322 *   - decremented when deviceid is removed from the cache.
323 */
324static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
325static LIST_HEAD(nfs4_data_server_cache);
326
327/* Debug routines */
328static void
329print_ds(struct nfs4_pnfs_ds *ds)
330{
331	if (ds == NULL) {
332		printk(KERN_WARNING "%s NULL device\n", __func__);
333		return;
334	}
335	printk(KERN_WARNING "        ds %s\n"
336		"        ref count %d\n"
337		"        client %p\n"
338		"        cl_exchange_flags %x\n",
339		ds->ds_remotestr,
340		refcount_read(&ds->ds_count), ds->ds_clp,
341		ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
342}
343
344static bool
345same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
346{
347	struct sockaddr_in *a, *b;
348	struct sockaddr_in6 *a6, *b6;
349
350	if (addr1->sa_family != addr2->sa_family)
351		return false;
352
353	switch (addr1->sa_family) {
354	case AF_INET:
355		a = (struct sockaddr_in *)addr1;
356		b = (struct sockaddr_in *)addr2;
357
358		if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
359		    a->sin_port == b->sin_port)
360			return true;
361		break;
362
363	case AF_INET6:
364		a6 = (struct sockaddr_in6 *)addr1;
365		b6 = (struct sockaddr_in6 *)addr2;
366
367		/* LINKLOCAL addresses must have matching scope_id */
368		if (ipv6_addr_src_scope(&a6->sin6_addr) ==
369		    IPV6_ADDR_SCOPE_LINKLOCAL &&
370		    a6->sin6_scope_id != b6->sin6_scope_id)
371			return false;
372
373		if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
374		    a6->sin6_port == b6->sin6_port)
375			return true;
376		break;
377
378	default:
379		dprintk("%s: unhandled address family: %u\n",
380			__func__, addr1->sa_family);
381		return false;
382	}
383
384	return false;
385}
386
387/*
388 * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does,
389 * declare a match.
390 */
391static bool
392_same_data_server_addrs_locked(const struct list_head *dsaddrs1,
393			       const struct list_head *dsaddrs2)
394{
395	struct nfs4_pnfs_ds_addr *da1, *da2;
396	struct sockaddr *sa1, *sa2;
397	bool match = false;
398
399	list_for_each_entry(da1, dsaddrs1, da_node) {
400		sa1 = (struct sockaddr *)&da1->da_addr;
401		match = false;
402		list_for_each_entry(da2, dsaddrs2, da_node) {
403			sa2 = (struct sockaddr *)&da2->da_addr;
404			match = same_sockaddr(sa1, sa2);
405			if (match)
406				break;
407		}
408		if (!match)
409			break;
410	}
411	return match;
412}
413
414/*
415 * Lookup DS by addresses.  nfs4_ds_cache_lock is held
416 */
417static struct nfs4_pnfs_ds *
418_data_server_lookup_locked(const struct list_head *dsaddrs)
419{
420	struct nfs4_pnfs_ds *ds;
421
422	list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
423		if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
424			return ds;
425	return NULL;
426}
427
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428static void destroy_ds(struct nfs4_pnfs_ds *ds)
429{
430	struct nfs4_pnfs_ds_addr *da;
431
432	dprintk("--> %s\n", __func__);
433	ifdebug(FACILITY)
434		print_ds(ds);
435
436	nfs_put_client(ds->ds_clp);
437
438	while (!list_empty(&ds->ds_addrs)) {
439		da = list_first_entry(&ds->ds_addrs,
440				      struct nfs4_pnfs_ds_addr,
441				      da_node);
442		list_del_init(&da->da_node);
443		kfree(da->da_remotestr);
444		kfree(da);
445	}
446
447	kfree(ds->ds_remotestr);
448	kfree(ds);
449}
450
451void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds)
452{
453	if (refcount_dec_and_lock(&ds->ds_count,
454				&nfs4_ds_cache_lock)) {
455		list_del_init(&ds->ds_node);
456		spin_unlock(&nfs4_ds_cache_lock);
457		destroy_ds(ds);
458	}
459}
460EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put);
461
462/*
463 * Create a string with a human readable address and port to avoid
464 * complicated setup around many dprinks.
465 */
466static char *
467nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
468{
469	struct nfs4_pnfs_ds_addr *da;
470	char *remotestr;
471	size_t len;
472	char *p;
473
474	len = 3;        /* '{', '}' and eol */
475	list_for_each_entry(da, dsaddrs, da_node) {
476		len += strlen(da->da_remotestr) + 1;    /* string plus comma */
477	}
478
479	remotestr = kzalloc(len, gfp_flags);
480	if (!remotestr)
481		return NULL;
482
483	p = remotestr;
484	*(p++) = '{';
485	len--;
486	list_for_each_entry(da, dsaddrs, da_node) {
487		size_t ll = strlen(da->da_remotestr);
488
489		if (ll > len)
490			goto out_err;
491
492		memcpy(p, da->da_remotestr, ll);
493		p += ll;
494		len -= ll;
495
496		if (len < 1)
497			goto out_err;
498		(*p++) = ',';
499		len--;
500	}
501	if (len < 2)
502		goto out_err;
503	*(p++) = '}';
504	*p = '\0';
505	return remotestr;
506out_err:
507	kfree(remotestr);
508	return NULL;
509}
510
511/*
512 * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if
513 * uncached and return cached struct nfs4_pnfs_ds.
514 */
515struct nfs4_pnfs_ds *
516nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
517{
518	struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
519	char *remotestr;
520
521	if (list_empty(dsaddrs)) {
522		dprintk("%s: no addresses defined\n", __func__);
523		goto out;
524	}
525
526	ds = kzalloc(sizeof(*ds), gfp_flags);
527	if (!ds)
528		goto out;
529
530	/* this is only used for debugging, so it's ok if its NULL */
531	remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
532
533	spin_lock(&nfs4_ds_cache_lock);
534	tmp_ds = _data_server_lookup_locked(dsaddrs);
535	if (tmp_ds == NULL) {
536		INIT_LIST_HEAD(&ds->ds_addrs);
537		list_splice_init(dsaddrs, &ds->ds_addrs);
538		ds->ds_remotestr = remotestr;
539		refcount_set(&ds->ds_count, 1);
540		INIT_LIST_HEAD(&ds->ds_node);
541		ds->ds_clp = NULL;
542		list_add(&ds->ds_node, &nfs4_data_server_cache);
543		dprintk("%s add new data server %s\n", __func__,
544			ds->ds_remotestr);
545	} else {
546		kfree(remotestr);
547		kfree(ds);
548		refcount_inc(&tmp_ds->ds_count);
549		dprintk("%s data server %s found, inc'ed ds_count to %d\n",
550			__func__, tmp_ds->ds_remotestr,
551			refcount_read(&tmp_ds->ds_count));
552		ds = tmp_ds;
553	}
554	spin_unlock(&nfs4_ds_cache_lock);
555out:
556	return ds;
557}
558EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add);
559
560static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
561{
562	might_sleep();
563	wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
564			TASK_KILLABLE);
565}
566
567static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
568{
569	smp_mb__before_atomic();
570	clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
571	smp_mb__after_atomic();
572	wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
573}
574
575static struct nfs_client *(*get_v3_ds_connect)(
576			struct nfs_server *mds_srv,
577			const struct sockaddr *ds_addr,
578			int ds_addrlen,
579			int ds_proto,
580			unsigned int ds_timeo,
581			unsigned int ds_retrans);
582
583static bool load_v3_ds_connect(void)
584{
585	if (!get_v3_ds_connect) {
586		get_v3_ds_connect = symbol_request(nfs3_set_ds_client);
587		WARN_ON_ONCE(!get_v3_ds_connect);
588	}
589
590	return(get_v3_ds_connect != NULL);
591}
592
593void nfs4_pnfs_v3_ds_connect_unload(void)
594{
595	if (get_v3_ds_connect) {
596		symbol_put(nfs3_set_ds_client);
597		get_v3_ds_connect = NULL;
598	}
599}
600
601static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
602				 struct nfs4_pnfs_ds *ds,
603				 unsigned int timeo,
604				 unsigned int retrans)
605{
606	struct nfs_client *clp = ERR_PTR(-EIO);
607	struct nfs4_pnfs_ds_addr *da;
 
608	int status = 0;
609
610	dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
611
612	if (!load_v3_ds_connect())
613		goto out;
614
615	list_for_each_entry(da, &ds->ds_addrs, da_node) {
616		dprintk("%s: DS %s: trying address %s\n",
617			__func__, ds->ds_remotestr, da->da_remotestr);
618
619		if (!IS_ERR(clp)) {
620			struct xprt_create xprt_args = {
621				.ident = XPRT_TRANSPORT_TCP,
622				.net = clp->cl_net,
623				.dstaddr = (struct sockaddr *)&da->da_addr,
624				.addrlen = da->da_addrlen,
625				.servername = clp->cl_hostname,
 
 
626			};
 
 
 
 
 
627			/* Add this address as an alias */
628			rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
629					rpc_clnt_test_and_add_xprt, NULL);
630			continue;
631		}
632		clp = get_v3_ds_connect(mds_srv,
633				(struct sockaddr *)&da->da_addr,
634				da->da_addrlen, IPPROTO_TCP,
635				timeo, retrans);
636		if (IS_ERR(clp))
637			continue;
638		clp->cl_rpcclient->cl_softerr = 0;
639		clp->cl_rpcclient->cl_softrtry = 0;
640	}
641
642	if (IS_ERR(clp)) {
643		status = PTR_ERR(clp);
644		goto out;
645	}
646
647	smp_wmb();
648	ds->ds_clp = clp;
649	dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
650out:
651	return status;
652}
653
654static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
655				 struct nfs4_pnfs_ds *ds,
656				 unsigned int timeo,
657				 unsigned int retrans,
658				 u32 minor_version)
659{
660	struct nfs_client *clp = ERR_PTR(-EIO);
661	struct nfs4_pnfs_ds_addr *da;
662	int status = 0;
663
664	dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
665
666	list_for_each_entry(da, &ds->ds_addrs, da_node) {
667		dprintk("%s: DS %s: trying address %s\n",
668			__func__, ds->ds_remotestr, da->da_remotestr);
669
670		if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) {
671			struct xprt_create xprt_args = {
672				.ident = XPRT_TRANSPORT_TCP,
673				.net = clp->cl_net,
674				.dstaddr = (struct sockaddr *)&da->da_addr,
675				.addrlen = da->da_addrlen,
676				.servername = clp->cl_hostname,
677			};
678			struct nfs4_add_xprt_data xprtdata = {
679				.clp = clp,
680				.cred = nfs4_get_clid_cred(clp),
681			};
682			struct rpc_add_xprt_test rpcdata = {
683				.add_xprt_test = clp->cl_mvops->session_trunk,
684				.data = &xprtdata,
685			};
686
 
 
 
 
687			/**
688			* Test this address for session trunking and
689			* add as an alias
690			*/
 
691			rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
692					  rpc_clnt_setup_test_and_add_xprt,
693					  &rpcdata);
694			if (xprtdata.cred)
695				put_cred(xprtdata.cred);
696		} else {
697			clp = nfs4_set_ds_client(mds_srv,
698						(struct sockaddr *)&da->da_addr,
699						da->da_addrlen, IPPROTO_TCP,
700						timeo, retrans, minor_version);
 
701			if (IS_ERR(clp))
702				continue;
703
704			status = nfs4_init_ds_session(clp,
705					mds_srv->nfs_client->cl_lease_time);
706			if (status) {
707				nfs_put_client(clp);
708				clp = ERR_PTR(-EIO);
709				continue;
710			}
711
712		}
713	}
714
715	if (IS_ERR(clp)) {
716		status = PTR_ERR(clp);
717		goto out;
718	}
719
720	smp_wmb();
721	ds->ds_clp = clp;
722	dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
723out:
724	return status;
725}
726
727/*
728 * Create an rpc connection to the nfs4_pnfs_ds data server.
729 * Currently only supports IPv4 and IPv6 addresses.
730 * If connection fails, make devid unavailable and return a -errno.
731 */
732int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
733			  struct nfs4_deviceid_node *devid, unsigned int timeo,
734			  unsigned int retrans, u32 version, u32 minor_version)
735{
736	int err;
737
738again:
739	err = 0;
740	if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
741		if (version == 3) {
742			err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo,
743						       retrans);
744		} else if (version == 4) {
745			err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo,
746						       retrans, minor_version);
747		} else {
748			dprintk("%s: unsupported DS version %d\n", __func__,
749				version);
750			err = -EPROTONOSUPPORT;
751		}
752
753		nfs4_clear_ds_conn_bit(ds);
754	} else {
755		nfs4_wait_ds_connect(ds);
756
757		/* what was waited on didn't connect AND didn't mark unavail */
758		if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid))
759			goto again;
760	}
761
 
 
 
762	/*
763	 * At this point the ds->ds_clp should be ready, but it might have
764	 * hit an error.
765	 */
766	if (!err) {
767		if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
768			WARN_ON_ONCE(ds->ds_clp ||
769				!nfs4_test_deviceid_unavailable(devid));
770			return -EINVAL;
771		}
772		err = nfs_client_init_status(ds->ds_clp);
773	}
774
775	return err;
776}
777EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
778
779/*
780 * Currently only supports ipv4, ipv6 and one multi-path address.
781 */
782struct nfs4_pnfs_ds_addr *
783nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
784{
785	struct nfs4_pnfs_ds_addr *da = NULL;
786	char *buf, *portstr;
787	__be16 port;
788	int nlen, rlen;
789	int tmp[2];
790	__be32 *p;
791	char *netid, *match_netid;
792	size_t len, match_netid_len;
793	char *startsep = "";
794	char *endsep = "";
795
796
797	/* r_netid */
798	p = xdr_inline_decode(xdr, 4);
799	if (unlikely(!p))
 
800		goto out_err;
801	nlen = be32_to_cpup(p++);
802
803	p = xdr_inline_decode(xdr, nlen);
804	if (unlikely(!p))
805		goto out_err;
806
807	netid = kmalloc(nlen+1, gfp_flags);
808	if (unlikely(!netid))
809		goto out_err;
810
811	netid[nlen] = '\0';
812	memcpy(netid, p, nlen);
813
814	/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
815	p = xdr_inline_decode(xdr, 4);
816	if (unlikely(!p))
817		goto out_free_netid;
818	rlen = be32_to_cpup(p);
819
820	p = xdr_inline_decode(xdr, rlen);
821	if (unlikely(!p))
822		goto out_free_netid;
823
824	/* port is ".ABC.DEF", 8 chars max */
825	if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
826		dprintk("%s: Invalid address, length %d\n", __func__,
827			rlen);
828		goto out_free_netid;
829	}
830	buf = kmalloc(rlen + 1, gfp_flags);
831	if (!buf) {
832		dprintk("%s: Not enough memory\n", __func__);
833		goto out_free_netid;
834	}
835	buf[rlen] = '\0';
836	memcpy(buf, p, rlen);
837
838	/* replace port '.' with '-' */
839	portstr = strrchr(buf, '.');
840	if (!portstr) {
841		dprintk("%s: Failed finding expected dot in port\n",
842			__func__);
843		goto out_free_buf;
844	}
845	*portstr = '-';
846
847	/* find '.' between address and port */
848	portstr = strrchr(buf, '.');
849	if (!portstr) {
850		dprintk("%s: Failed finding expected dot between address and "
851			"port\n", __func__);
852		goto out_free_buf;
853	}
854	*portstr = '\0';
855
856	da = kzalloc(sizeof(*da), gfp_flags);
857	if (unlikely(!da))
858		goto out_free_buf;
859
860	INIT_LIST_HEAD(&da->da_node);
861
862	if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
863		      sizeof(da->da_addr))) {
864		dprintk("%s: error parsing address %s\n", __func__, buf);
865		goto out_free_da;
866	}
867
868	portstr++;
869	sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
870	port = htons((tmp[0] << 8) | (tmp[1]));
871
872	switch (da->da_addr.ss_family) {
873	case AF_INET:
874		((struct sockaddr_in *)&da->da_addr)->sin_port = port;
875		da->da_addrlen = sizeof(struct sockaddr_in);
876		match_netid = "tcp";
877		match_netid_len = 3;
878		break;
879
880	case AF_INET6:
881		((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
882		da->da_addrlen = sizeof(struct sockaddr_in6);
883		match_netid = "tcp6";
884		match_netid_len = 4;
885		startsep = "[";
886		endsep = "]";
887		break;
888
889	default:
890		dprintk("%s: unsupported address family: %u\n",
891			__func__, da->da_addr.ss_family);
892		goto out_free_da;
893	}
894
895	if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
896		dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
897			__func__, netid, match_netid);
 
898		goto out_free_da;
899	}
900
 
 
901	/* save human readable address */
902	len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
903	da->da_remotestr = kzalloc(len, gfp_flags);
904
905	/* NULL is ok, only used for dprintk */
906	if (da->da_remotestr)
907		snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
908			 buf, endsep, ntohs(port));
909
910	dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
911	kfree(buf);
912	kfree(netid);
913	return da;
914
915out_free_da:
916	kfree(da);
917out_free_buf:
918	dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
919	kfree(buf);
920out_free_netid:
921	kfree(netid);
922out_err:
923	return NULL;
924}
925EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr);
926
927void
928pnfs_layout_mark_request_commit(struct nfs_page *req,
929				struct pnfs_layout_segment *lseg,
930				struct nfs_commit_info *cinfo,
931				u32 ds_commit_idx)
932{
933	struct list_head *list;
934	struct pnfs_commit_bucket *buckets;
 
935
936	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
937	buckets = cinfo->ds->buckets;
938	list = &buckets[ds_commit_idx].written;
939	if (list_empty(list)) {
940		if (!pnfs_is_valid_lseg(lseg)) {
941			mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
942			cinfo->completion_ops->resched_write(cinfo, req);
943			return;
944		}
945		/* Non-empty buckets hold a reference on the lseg.  That ref
946		 * is normally transferred to the COMMIT call and released
947		 * there.  It could also be released if the last req is pulled
948		 * off due to a rewrite, in which case it will be done in
949		 * pnfs_common_clear_request_commit
950		 */
951		WARN_ON_ONCE(buckets[ds_commit_idx].wlseg != NULL);
952		buckets[ds_commit_idx].wlseg = pnfs_get_lseg(lseg);
953	}
954	set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
955	cinfo->ds->nwritten++;
956
957	nfs_request_add_commit_list_locked(req, list, cinfo);
958	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
959	nfs_mark_page_unstable(req->wb_page, cinfo);
 
 
 
 
960}
961EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
962
963int
964pnfs_nfs_generic_sync(struct inode *inode, bool datasync)
965{
966	int ret;
967
968	if (!pnfs_layoutcommit_outstanding(inode))
969		return 0;
970	ret = nfs_commit_inode(inode, FLUSH_SYNC);
971	if (ret < 0)
972		return ret;
973	if (datasync)
974		return 0;
975	return pnfs_layoutcommit_inode(inode, true);
976}
977EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync);
978