Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Intel MIC Platform Software Stack (MPSS)
   4 *
   5 * Copyright(c) 2014 Intel Corporation.
   6 *
   7 * Intel SCIF driver.
   8 */
   9#include <linux/scif.h>
  10#include "scif_main.h"
  11#include "scif_map.h"
  12
  13static const char * const scif_ep_states[] = {
  14	"Unbound",
  15	"Bound",
  16	"Listening",
  17	"Connected",
  18	"Connecting",
  19	"Mapping",
  20	"Closing",
  21	"Close Listening",
  22	"Disconnected",
  23	"Zombie"};
  24
  25enum conn_async_state {
  26	ASYNC_CONN_IDLE = 1,	/* ep setup for async connect */
  27	ASYNC_CONN_INPROGRESS,	/* async connect in progress */
  28	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
  29};
  30
  31/*
  32 * File operations for anonymous inode file associated with a SCIF endpoint,
  33 * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
  34 * poll API in the kernel and these take in a struct file *. Since a struct
  35 * file is not available to kernel mode SCIF, it uses an anonymous file for
  36 * this purpose.
  37 */
  38const struct file_operations scif_anon_fops = {
  39	.owner = THIS_MODULE,
  40};
  41
  42scif_epd_t scif_open(void)
  43{
  44	struct scif_endpt *ep;
  45	int err;
  46
  47	might_sleep();
  48	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
  49	if (!ep)
  50		goto err_ep_alloc;
  51
  52	ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
  53	if (!ep->qp_info.qp)
  54		goto err_qp_alloc;
  55
  56	err = scif_anon_inode_getfile(ep);
  57	if (err)
  58		goto err_anon_inode;
  59
  60	spin_lock_init(&ep->lock);
  61	mutex_init(&ep->sendlock);
  62	mutex_init(&ep->recvlock);
  63
  64	scif_rma_ep_init(ep);
  65	ep->state = SCIFEP_UNBOUND;
  66	dev_dbg(scif_info.mdev.this_device,
  67		"SCIFAPI open: ep %p success\n", ep);
  68	return ep;
  69
  70err_anon_inode:
  71	kfree(ep->qp_info.qp);
  72err_qp_alloc:
  73	kfree(ep);
  74err_ep_alloc:
  75	return NULL;
  76}
  77EXPORT_SYMBOL_GPL(scif_open);
  78
  79/*
  80 * scif_disconnect_ep - Disconnects the endpoint if found
  81 * @epd: The end point returned from scif_open()
  82 */
  83static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
  84{
  85	struct scifmsg msg;
  86	struct scif_endpt *fep = NULL;
  87	struct scif_endpt *tmpep;
  88	struct list_head *pos, *tmpq;
  89	int err;
  90
  91	/*
  92	 * Wake up any threads blocked in send()/recv() before closing
  93	 * out the connection. Grabbing and releasing the send/recv lock
  94	 * will ensure that any blocked senders/receivers have exited for
  95	 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
  96	 * close. Ring 3 endpoints are not affected since close will not
  97	 * be called while there are IOCTLs executing.
  98	 */
  99	wake_up_interruptible(&ep->sendwq);
 100	wake_up_interruptible(&ep->recvwq);
 101	mutex_lock(&ep->sendlock);
 102	mutex_unlock(&ep->sendlock);
 103	mutex_lock(&ep->recvlock);
 104	mutex_unlock(&ep->recvlock);
 105
 106	/* Remove from the connected list */
 107	mutex_lock(&scif_info.connlock);
 108	list_for_each_safe(pos, tmpq, &scif_info.connected) {
 109		tmpep = list_entry(pos, struct scif_endpt, list);
 110		if (tmpep == ep) {
 111			list_del(pos);
 112			fep = tmpep;
 113			spin_lock(&ep->lock);
 114			break;
 115		}
 116	}
 117
 118	if (!fep) {
 119		/*
 120		 * The other side has completed the disconnect before
 121		 * the end point can be removed from the list. Therefore
 122		 * the ep lock is not locked, traverse the disconnected
 123		 * list to find the endpoint and release the conn lock.
 124		 */
 125		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 126			tmpep = list_entry(pos, struct scif_endpt, list);
 127			if (tmpep == ep) {
 128				list_del(pos);
 129				break;
 130			}
 131		}
 132		mutex_unlock(&scif_info.connlock);
 133		return NULL;
 134	}
 135
 136	init_completion(&ep->discon);
 137	msg.uop = SCIF_DISCNCT;
 138	msg.src = ep->port;
 139	msg.dst = ep->peer;
 140	msg.payload[0] = (u64)ep;
 141	msg.payload[1] = ep->remote_ep;
 142
 143	err = scif_nodeqp_send(ep->remote_dev, &msg);
 144	spin_unlock(&ep->lock);
 145	mutex_unlock(&scif_info.connlock);
 146
 147	if (!err)
 148		/* Wait for the remote node to respond with SCIF_DISCNT_ACK */
 149		wait_for_completion_timeout(&ep->discon,
 150					    SCIF_NODE_ALIVE_TIMEOUT);
 151	return ep;
 152}
 153
 154int scif_close(scif_epd_t epd)
 155{
 156	struct scif_endpt *ep = (struct scif_endpt *)epd;
 157	struct scif_endpt *tmpep;
 158	struct list_head *pos, *tmpq;
 159	enum scif_epd_state oldstate;
 160	bool flush_conn;
 161
 162	dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
 163		ep, scif_ep_states[ep->state]);
 164	might_sleep();
 165	spin_lock(&ep->lock);
 166	flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
 167	spin_unlock(&ep->lock);
 168
 169	if (flush_conn)
 170		flush_work(&scif_info.conn_work);
 171
 172	spin_lock(&ep->lock);
 173	oldstate = ep->state;
 174
 175	ep->state = SCIFEP_CLOSING;
 176
 177	switch (oldstate) {
 178	case SCIFEP_ZOMBIE:
 179		dev_err(scif_info.mdev.this_device,
 180			"SCIFAPI close: zombie state unexpected\n");
 181		/* fall through */
 182	case SCIFEP_DISCONNECTED:
 183		spin_unlock(&ep->lock);
 184		scif_unregister_all_windows(epd);
 185		/* Remove from the disconnected list */
 186		mutex_lock(&scif_info.connlock);
 187		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 188			tmpep = list_entry(pos, struct scif_endpt, list);
 189			if (tmpep == ep) {
 190				list_del(pos);
 191				break;
 192			}
 193		}
 194		mutex_unlock(&scif_info.connlock);
 195		break;
 196	case SCIFEP_UNBOUND:
 197	case SCIFEP_BOUND:
 198	case SCIFEP_CONNECTING:
 199		spin_unlock(&ep->lock);
 200		break;
 201	case SCIFEP_MAPPING:
 202	case SCIFEP_CONNECTED:
 203	case SCIFEP_CLOSING:
 204	{
 205		spin_unlock(&ep->lock);
 206		scif_unregister_all_windows(epd);
 207		scif_disconnect_ep(ep);
 208		break;
 209	}
 210	case SCIFEP_LISTENING:
 211	case SCIFEP_CLLISTEN:
 212	{
 213		struct scif_conreq *conreq;
 214		struct scifmsg msg;
 215		struct scif_endpt *aep;
 216
 217		spin_unlock(&ep->lock);
 218		mutex_lock(&scif_info.eplock);
 219
 220		/* remove from listen list */
 221		list_for_each_safe(pos, tmpq, &scif_info.listen) {
 222			tmpep = list_entry(pos, struct scif_endpt, list);
 223			if (tmpep == ep)
 224				list_del(pos);
 225		}
 226		/* Remove any dangling accepts */
 227		while (ep->acceptcnt) {
 228			aep = list_first_entry(&ep->li_accept,
 229					       struct scif_endpt, liacceptlist);
 230			list_del(&aep->liacceptlist);
 231			scif_put_port(aep->port.port);
 232			list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
 233				tmpep = list_entry(pos, struct scif_endpt,
 234						   miacceptlist);
 235				if (tmpep == aep) {
 236					list_del(pos);
 237					break;
 238				}
 239			}
 240			mutex_unlock(&scif_info.eplock);
 241			mutex_lock(&scif_info.connlock);
 242			list_for_each_safe(pos, tmpq, &scif_info.connected) {
 243				tmpep = list_entry(pos,
 244						   struct scif_endpt, list);
 245				if (tmpep == aep) {
 246					list_del(pos);
 247					break;
 248				}
 249			}
 250			list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 251				tmpep = list_entry(pos,
 252						   struct scif_endpt, list);
 253				if (tmpep == aep) {
 254					list_del(pos);
 255					break;
 256				}
 257			}
 258			mutex_unlock(&scif_info.connlock);
 259			scif_teardown_ep(aep);
 260			mutex_lock(&scif_info.eplock);
 261			scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
 262			ep->acceptcnt--;
 263		}
 264
 265		spin_lock(&ep->lock);
 266		mutex_unlock(&scif_info.eplock);
 267
 268		/* Remove and reject any pending connection requests. */
 269		while (ep->conreqcnt) {
 270			conreq = list_first_entry(&ep->conlist,
 271						  struct scif_conreq, list);
 272			list_del(&conreq->list);
 273
 274			msg.uop = SCIF_CNCT_REJ;
 275			msg.dst.node = conreq->msg.src.node;
 276			msg.dst.port = conreq->msg.src.port;
 277			msg.payload[0] = conreq->msg.payload[0];
 278			msg.payload[1] = conreq->msg.payload[1];
 279			/*
 280			 * No Error Handling on purpose for scif_nodeqp_send().
 281			 * If the remote node is lost we still want free the
 282			 * connection requests on the self node.
 283			 */
 284			scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
 285					 &msg);
 286			ep->conreqcnt--;
 287			kfree(conreq);
 288		}
 289
 290		spin_unlock(&ep->lock);
 291		/* If a kSCIF accept is waiting wake it up */
 292		wake_up_interruptible(&ep->conwq);
 293		break;
 294	}
 295	}
 296	scif_put_port(ep->port.port);
 297	scif_anon_inode_fput(ep);
 298	scif_teardown_ep(ep);
 299	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
 300	return 0;
 301}
 302EXPORT_SYMBOL_GPL(scif_close);
 303
 304/**
 305 * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
 306 *			accept new connections.
 307 * @epd: The end point returned from scif_open()
 308 */
 309int __scif_flush(scif_epd_t epd)
 310{
 311	struct scif_endpt *ep = (struct scif_endpt *)epd;
 312
 313	switch (ep->state) {
 314	case SCIFEP_LISTENING:
 315	{
 316		ep->state = SCIFEP_CLLISTEN;
 317
 318		/* If an accept is waiting wake it up */
 319		wake_up_interruptible(&ep->conwq);
 320		break;
 321	}
 322	default:
 323		break;
 324	}
 325	return 0;
 326}
 327
 328int scif_bind(scif_epd_t epd, u16 pn)
 329{
 330	struct scif_endpt *ep = (struct scif_endpt *)epd;
 331	int ret = 0;
 332	int tmp;
 333
 334	dev_dbg(scif_info.mdev.this_device,
 335		"SCIFAPI bind: ep %p %s requested port number %d\n",
 336		ep, scif_ep_states[ep->state], pn);
 337	if (pn) {
 338		/*
 339		 * Similar to IETF RFC 1700, SCIF ports below
 340		 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
 341		 * processes or by processes executed by privileged users.
 342		 */
 343		if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
 344			ret = -EACCES;
 345			goto scif_bind_admin_exit;
 346		}
 347	}
 348
 349	spin_lock(&ep->lock);
 350	if (ep->state == SCIFEP_BOUND) {
 351		ret = -EINVAL;
 352		goto scif_bind_exit;
 353	} else if (ep->state != SCIFEP_UNBOUND) {
 354		ret = -EISCONN;
 355		goto scif_bind_exit;
 356	}
 357
 358	if (pn) {
 359		tmp = scif_rsrv_port(pn);
 360		if (tmp != pn) {
 361			ret = -EINVAL;
 362			goto scif_bind_exit;
 363		}
 364	} else {
 365		ret = scif_get_new_port();
 366		if (ret < 0)
 367			goto scif_bind_exit;
 368		pn = ret;
 369	}
 370
 371	ep->state = SCIFEP_BOUND;
 372	ep->port.node = scif_info.nodeid;
 373	ep->port.port = pn;
 374	ep->conn_async_state = ASYNC_CONN_IDLE;
 375	ret = pn;
 376	dev_dbg(scif_info.mdev.this_device,
 377		"SCIFAPI bind: bound to port number %d\n", pn);
 378scif_bind_exit:
 379	spin_unlock(&ep->lock);
 380scif_bind_admin_exit:
 381	return ret;
 382}
 383EXPORT_SYMBOL_GPL(scif_bind);
 384
 385int scif_listen(scif_epd_t epd, int backlog)
 386{
 387	struct scif_endpt *ep = (struct scif_endpt *)epd;
 388
 389	dev_dbg(scif_info.mdev.this_device,
 390		"SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
 391	spin_lock(&ep->lock);
 392	switch (ep->state) {
 393	case SCIFEP_ZOMBIE:
 394	case SCIFEP_CLOSING:
 395	case SCIFEP_CLLISTEN:
 396	case SCIFEP_UNBOUND:
 397	case SCIFEP_DISCONNECTED:
 398		spin_unlock(&ep->lock);
 399		return -EINVAL;
 400	case SCIFEP_LISTENING:
 401	case SCIFEP_CONNECTED:
 402	case SCIFEP_CONNECTING:
 403	case SCIFEP_MAPPING:
 404		spin_unlock(&ep->lock);
 405		return -EISCONN;
 406	case SCIFEP_BOUND:
 407		break;
 408	}
 409
 410	ep->state = SCIFEP_LISTENING;
 411	ep->backlog = backlog;
 412
 413	ep->conreqcnt = 0;
 414	ep->acceptcnt = 0;
 415	INIT_LIST_HEAD(&ep->conlist);
 416	init_waitqueue_head(&ep->conwq);
 417	INIT_LIST_HEAD(&ep->li_accept);
 418	spin_unlock(&ep->lock);
 419
 420	/*
 421	 * Listen status is complete so delete the qp information not needed
 422	 * on a listen before placing on the list of listening ep's
 423	 */
 424	scif_teardown_ep(ep);
 425	ep->qp_info.qp = NULL;
 426
 427	mutex_lock(&scif_info.eplock);
 428	list_add_tail(&ep->list, &scif_info.listen);
 429	mutex_unlock(&scif_info.eplock);
 430	return 0;
 431}
 432EXPORT_SYMBOL_GPL(scif_listen);
 433
 434/*
 435 ************************************************************************
 436 * SCIF connection flow:
 437 *
 438 * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
 439 *	connections via a SCIF_CNCT_REQ message
 440 * 2) A SCIF endpoint can initiate a SCIF connection by calling
 441 *	scif_connect(..) which calls scif_setup_qp_connect(..) which
 442 *	allocates the local qp for the endpoint ring buffer and then sends
 443 *	a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
 444 *	a SCIF_CNCT_REJ message
 445 * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
 446 *	wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
 447 *	message otherwise
 448 * 4) A thread blocked waiting for incoming connections allocates its local
 449 *	endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
 450 *	and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
 451 *	the node sends a SCIF_CNCT_REJ message
 452 * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
 453 *	connecting endpoint is woken up as part of handling
 454 *	scif_cnctgnt_resp(..) following which it maps the remote endpoints'
 455 *	QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
 456 *	success or a SCIF_CNCT_GNTNACK message on failure and completes
 457 *	the scif_connect(..) API
 458 * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
 459 *	in step 4 is woken up and completes the scif_accept(..) API
 460 * 7) The SCIF connection is now established between the two SCIF endpoints.
 461 */
 462static int scif_conn_func(struct scif_endpt *ep)
 463{
 464	int err = 0;
 465	struct scifmsg msg;
 466	struct device *spdev;
 467
 468	err = scif_reserve_dma_chan(ep);
 469	if (err) {
 470		dev_err(&ep->remote_dev->sdev->dev,
 471			"%s %d err %d\n", __func__, __LINE__, err);
 472		ep->state = SCIFEP_BOUND;
 473		goto connect_error_simple;
 474	}
 475	/* Initiate the first part of the endpoint QP setup */
 476	err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
 477				    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
 478	if (err) {
 479		dev_err(&ep->remote_dev->sdev->dev,
 480			"%s err %d qp_offset 0x%llx\n",
 481			__func__, err, ep->qp_info.qp_offset);
 482		ep->state = SCIFEP_BOUND;
 483		goto connect_error_simple;
 484	}
 485
 486	spdev = scif_get_peer_dev(ep->remote_dev);
 487	if (IS_ERR(spdev)) {
 488		err = PTR_ERR(spdev);
 489		goto cleanup_qp;
 490	}
 491	/* Format connect message and send it */
 492	msg.src = ep->port;
 493	msg.dst = ep->conn_port;
 494	msg.uop = SCIF_CNCT_REQ;
 495	msg.payload[0] = (u64)ep;
 496	msg.payload[1] = ep->qp_info.qp_offset;
 497	err = _scif_nodeqp_send(ep->remote_dev, &msg);
 498	if (err)
 499		goto connect_error_dec;
 500	scif_put_peer_dev(spdev);
 501	/*
 502	 * Wait for the remote node to respond with SCIF_CNCT_GNT or
 503	 * SCIF_CNCT_REJ message.
 504	 */
 505	err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
 506				 SCIF_NODE_ALIVE_TIMEOUT);
 507	if (!err) {
 508		dev_err(&ep->remote_dev->sdev->dev,
 509			"%s %d timeout\n", __func__, __LINE__);
 510		ep->state = SCIFEP_BOUND;
 511	}
 512	spdev = scif_get_peer_dev(ep->remote_dev);
 513	if (IS_ERR(spdev)) {
 514		err = PTR_ERR(spdev);
 515		goto cleanup_qp;
 516	}
 517	if (ep->state == SCIFEP_MAPPING) {
 518		err = scif_setup_qp_connect_response(ep->remote_dev,
 519						     ep->qp_info.qp,
 520						     ep->qp_info.gnt_pld);
 521		/*
 522		 * If the resource to map the queue are not available then
 523		 * we need to tell the other side to terminate the accept
 524		 */
 525		if (err) {
 526			dev_err(&ep->remote_dev->sdev->dev,
 527				"%s %d err %d\n", __func__, __LINE__, err);
 528			msg.uop = SCIF_CNCT_GNTNACK;
 529			msg.payload[0] = ep->remote_ep;
 530			_scif_nodeqp_send(ep->remote_dev, &msg);
 531			ep->state = SCIFEP_BOUND;
 532			goto connect_error_dec;
 533		}
 534
 535		msg.uop = SCIF_CNCT_GNTACK;
 536		msg.payload[0] = ep->remote_ep;
 537		err = _scif_nodeqp_send(ep->remote_dev, &msg);
 538		if (err) {
 539			ep->state = SCIFEP_BOUND;
 540			goto connect_error_dec;
 541		}
 542		ep->state = SCIFEP_CONNECTED;
 543		mutex_lock(&scif_info.connlock);
 544		list_add_tail(&ep->list, &scif_info.connected);
 545		mutex_unlock(&scif_info.connlock);
 546		dev_dbg(&ep->remote_dev->sdev->dev,
 547			"SCIFAPI connect: ep %p connected\n", ep);
 548	} else if (ep->state == SCIFEP_BOUND) {
 549		dev_dbg(&ep->remote_dev->sdev->dev,
 550			"SCIFAPI connect: ep %p connection refused\n", ep);
 551		err = -ECONNREFUSED;
 552		goto connect_error_dec;
 553	}
 554	scif_put_peer_dev(spdev);
 555	return err;
 556connect_error_dec:
 557	scif_put_peer_dev(spdev);
 558cleanup_qp:
 559	scif_cleanup_ep_qp(ep);
 560connect_error_simple:
 561	return err;
 562}
 563
 564/*
 565 * scif_conn_handler:
 566 *
 567 * Workqueue handler for servicing non-blocking SCIF connect
 568 *
 569 */
 570void scif_conn_handler(struct work_struct *work)
 571{
 572	struct scif_endpt *ep;
 573
 574	do {
 575		ep = NULL;
 576		spin_lock(&scif_info.nb_connect_lock);
 577		if (!list_empty(&scif_info.nb_connect_list)) {
 578			ep = list_first_entry(&scif_info.nb_connect_list,
 579					      struct scif_endpt, conn_list);
 580			list_del(&ep->conn_list);
 581		}
 582		spin_unlock(&scif_info.nb_connect_lock);
 583		if (ep) {
 584			ep->conn_err = scif_conn_func(ep);
 585			wake_up_interruptible(&ep->conn_pend_wq);
 586		}
 587	} while (ep);
 588}
 589
 590int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
 591{
 592	struct scif_endpt *ep = (struct scif_endpt *)epd;
 593	int err = 0;
 594	struct scif_dev *remote_dev;
 595	struct device *spdev;
 596
 597	dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
 598		scif_ep_states[ep->state]);
 599
 600	if (!scif_dev || dst->node > scif_info.maxid)
 601		return -ENODEV;
 602
 603	might_sleep();
 604
 605	remote_dev = &scif_dev[dst->node];
 606	spdev = scif_get_peer_dev(remote_dev);
 607	if (IS_ERR(spdev)) {
 608		err = PTR_ERR(spdev);
 609		return err;
 610	}
 611
 612	spin_lock(&ep->lock);
 613	switch (ep->state) {
 614	case SCIFEP_ZOMBIE:
 615	case SCIFEP_CLOSING:
 616		err = -EINVAL;
 617		break;
 618	case SCIFEP_DISCONNECTED:
 619		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 620			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 621		else
 622			err = -EINVAL;
 623		break;
 624	case SCIFEP_LISTENING:
 625	case SCIFEP_CLLISTEN:
 626		err = -EOPNOTSUPP;
 627		break;
 628	case SCIFEP_CONNECTING:
 629	case SCIFEP_MAPPING:
 630		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 631			err = -EINPROGRESS;
 632		else
 633			err = -EISCONN;
 634		break;
 635	case SCIFEP_CONNECTED:
 636		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 637			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 638		else
 639			err = -EISCONN;
 640		break;
 641	case SCIFEP_UNBOUND:
 642		err = scif_get_new_port();
 643		if (err < 0)
 644			break;
 645		ep->port.port = err;
 646		ep->port.node = scif_info.nodeid;
 647		ep->conn_async_state = ASYNC_CONN_IDLE;
 648		/* Fall through */
 649	case SCIFEP_BOUND:
 650		/*
 651		 * If a non-blocking connect has been already initiated
 652		 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
 653		 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
 654		 * SCIF_BOUND due an error in the connection process
 655		 * (e.g., connection refused) If conn_async_state is
 656		 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
 657		 * so that the error status can be collected. If the state is
 658		 * already ASYNC_CONN_FLUSH_WORK - then set the error to
 659		 * EINPROGRESS since some other thread is waiting to collect
 660		 * error status.
 661		 */
 662		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
 663			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 664		} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
 665			err = -EINPROGRESS;
 666		} else {
 667			ep->conn_port = *dst;
 668			init_waitqueue_head(&ep->sendwq);
 669			init_waitqueue_head(&ep->recvwq);
 670			init_waitqueue_head(&ep->conwq);
 671			ep->conn_async_state = 0;
 672
 673			if (unlikely(non_block))
 674				ep->conn_async_state = ASYNC_CONN_INPROGRESS;
 675		}
 676		break;
 677	}
 678
 679	if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
 680			goto connect_simple_unlock1;
 681
 682	ep->state = SCIFEP_CONNECTING;
 683	ep->remote_dev = &scif_dev[dst->node];
 684	ep->qp_info.qp->magic = SCIFEP_MAGIC;
 685	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
 686		init_waitqueue_head(&ep->conn_pend_wq);
 687		spin_lock(&scif_info.nb_connect_lock);
 688		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
 689		spin_unlock(&scif_info.nb_connect_lock);
 690		err = -EINPROGRESS;
 691		schedule_work(&scif_info.conn_work);
 692	}
 693connect_simple_unlock1:
 694	spin_unlock(&ep->lock);
 695	scif_put_peer_dev(spdev);
 696	if (err) {
 697		return err;
 698	} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
 699		flush_work(&scif_info.conn_work);
 700		err = ep->conn_err;
 701		spin_lock(&ep->lock);
 702		ep->conn_async_state = ASYNC_CONN_IDLE;
 703		spin_unlock(&ep->lock);
 704	} else {
 705		err = scif_conn_func(ep);
 706	}
 707	return err;
 708}
 709
 710int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
 711{
 712	return __scif_connect(epd, dst, false);
 713}
 714EXPORT_SYMBOL_GPL(scif_connect);
 715
 716/**
 717 * scif_accept() - Accept a connection request from the remote node
 718 *
 719 * The function accepts a connection request from the remote node.  Successful
 720 * complete is indicate by a new end point being created and passed back
 721 * to the caller for future reference.
 722 *
 723 * Upon successful complete a zero will be returned and the peer information
 724 * will be filled in.
 725 *
 726 * If the end point is not in the listening state -EINVAL will be returned.
 727 *
 728 * If during the connection sequence resource allocation fails the -ENOMEM
 729 * will be returned.
 730 *
 731 * If the function is called with the ASYNC flag set and no connection requests
 732 * are pending it will return -EAGAIN.
 733 *
 734 * If the remote side is not sending any connection requests the caller may
 735 * terminate this function with a signal.  If so a -EINTR will be returned.
 736 */
 737int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
 738		scif_epd_t *newepd, int flags)
 739{
 740	struct scif_endpt *lep = (struct scif_endpt *)epd;
 741	struct scif_endpt *cep;
 742	struct scif_conreq *conreq;
 743	struct scifmsg msg;
 744	int err;
 745	struct device *spdev;
 746
 747	dev_dbg(scif_info.mdev.this_device,
 748		"SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
 749
 750	if (flags & ~SCIF_ACCEPT_SYNC)
 751		return -EINVAL;
 752
 753	if (!peer || !newepd)
 754		return -EINVAL;
 755
 756	might_sleep();
 757	spin_lock(&lep->lock);
 758	if (lep->state != SCIFEP_LISTENING) {
 759		spin_unlock(&lep->lock);
 760		return -EINVAL;
 761	}
 762
 763	if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
 764		/* No connection request present and we do not want to wait */
 765		spin_unlock(&lep->lock);
 766		return -EAGAIN;
 767	}
 768
 769	lep->files = current->files;
 770retry_connection:
 771	spin_unlock(&lep->lock);
 772	/* Wait for the remote node to send us a SCIF_CNCT_REQ */
 773	err = wait_event_interruptible(lep->conwq,
 774				       (lep->conreqcnt ||
 775				       (lep->state != SCIFEP_LISTENING)));
 776	if (err)
 777		return err;
 778
 779	if (lep->state != SCIFEP_LISTENING)
 780		return -EINTR;
 781
 782	spin_lock(&lep->lock);
 783
 784	if (!lep->conreqcnt)
 785		goto retry_connection;
 786
 787	/* Get the first connect request off the list */
 788	conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
 789	list_del(&conreq->list);
 790	lep->conreqcnt--;
 791	spin_unlock(&lep->lock);
 792
 793	/* Fill in the peer information */
 794	peer->node = conreq->msg.src.node;
 795	peer->port = conreq->msg.src.port;
 796
 797	cep = kzalloc(sizeof(*cep), GFP_KERNEL);
 798	if (!cep) {
 799		err = -ENOMEM;
 800		goto scif_accept_error_epalloc;
 801	}
 802	spin_lock_init(&cep->lock);
 803	mutex_init(&cep->sendlock);
 804	mutex_init(&cep->recvlock);
 805	cep->state = SCIFEP_CONNECTING;
 806	cep->remote_dev = &scif_dev[peer->node];
 807	cep->remote_ep = conreq->msg.payload[0];
 808
 809	scif_rma_ep_init(cep);
 810
 811	err = scif_reserve_dma_chan(cep);
 812	if (err) {
 813		dev_err(scif_info.mdev.this_device,
 814			"%s %d err %d\n", __func__, __LINE__, err);
 815		goto scif_accept_error_qpalloc;
 816	}
 817
 818	cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
 819	if (!cep->qp_info.qp) {
 820		err = -ENOMEM;
 821		goto scif_accept_error_qpalloc;
 822	}
 823
 824	err = scif_anon_inode_getfile(cep);
 825	if (err)
 826		goto scif_accept_error_anon_inode;
 827
 828	cep->qp_info.qp->magic = SCIFEP_MAGIC;
 829	spdev = scif_get_peer_dev(cep->remote_dev);
 830	if (IS_ERR(spdev)) {
 831		err = PTR_ERR(spdev);
 832		goto scif_accept_error_map;
 833	}
 834	err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
 835				   conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
 836				   cep->remote_dev);
 837	if (err) {
 838		dev_dbg(&cep->remote_dev->sdev->dev,
 839			"SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
 840			lep, cep, err, cep->qp_info.qp_offset);
 841		scif_put_peer_dev(spdev);
 842		goto scif_accept_error_map;
 843	}
 844
 845	cep->port.node = lep->port.node;
 846	cep->port.port = lep->port.port;
 847	cep->peer.node = peer->node;
 848	cep->peer.port = peer->port;
 849	init_waitqueue_head(&cep->sendwq);
 850	init_waitqueue_head(&cep->recvwq);
 851	init_waitqueue_head(&cep->conwq);
 852
 853	msg.uop = SCIF_CNCT_GNT;
 854	msg.src = cep->port;
 855	msg.payload[0] = cep->remote_ep;
 856	msg.payload[1] = cep->qp_info.qp_offset;
 857	msg.payload[2] = (u64)cep;
 858
 859	err = _scif_nodeqp_send(cep->remote_dev, &msg);
 860	scif_put_peer_dev(spdev);
 861	if (err)
 862		goto scif_accept_error_map;
 863retry:
 864	/* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
 865	err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
 866				 SCIF_NODE_ACCEPT_TIMEOUT);
 867	if (!err && scifdev_alive(cep))
 868		goto retry;
 869	err = !err ? -ENODEV : 0;
 870	if (err)
 871		goto scif_accept_error_map;
 872	kfree(conreq);
 873
 874	spin_lock(&cep->lock);
 875
 876	if (cep->state == SCIFEP_CLOSING) {
 877		/*
 878		 * Remote failed to allocate resources and NAKed the grant.
 879		 * There is at this point nothing referencing the new end point.
 880		 */
 881		spin_unlock(&cep->lock);
 882		scif_teardown_ep(cep);
 883		kfree(cep);
 884
 885		/* If call with sync flag then go back and wait. */
 886		if (flags & SCIF_ACCEPT_SYNC) {
 887			spin_lock(&lep->lock);
 888			goto retry_connection;
 889		}
 890		return -EAGAIN;
 891	}
 892
 893	scif_get_port(cep->port.port);
 894	*newepd = (scif_epd_t)cep;
 895	spin_unlock(&cep->lock);
 896	return 0;
 897scif_accept_error_map:
 898	scif_anon_inode_fput(cep);
 899scif_accept_error_anon_inode:
 900	scif_teardown_ep(cep);
 901scif_accept_error_qpalloc:
 902	kfree(cep);
 903scif_accept_error_epalloc:
 904	msg.uop = SCIF_CNCT_REJ;
 905	msg.dst.node = conreq->msg.src.node;
 906	msg.dst.port = conreq->msg.src.port;
 907	msg.payload[0] = conreq->msg.payload[0];
 908	msg.payload[1] = conreq->msg.payload[1];
 909	scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
 910	kfree(conreq);
 911	return err;
 912}
 913EXPORT_SYMBOL_GPL(scif_accept);
 914
 915/*
 916 * scif_msg_param_check:
 917 * @epd: The end point returned from scif_open()
 918 * @len: Length to receive
 919 * @flags: blocking or non blocking
 920 *
 921 * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
 922 */
 923static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
 924{
 925	int ret = -EINVAL;
 926
 927	if (len < 0)
 928		goto err_ret;
 929	if (flags && (!(flags & SCIF_RECV_BLOCK)))
 930		goto err_ret;
 931	ret = 0;
 932err_ret:
 933	return ret;
 934}
 935
 936static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
 937{
 938	struct scif_endpt *ep = (struct scif_endpt *)epd;
 939	struct scifmsg notif_msg;
 940	int curr_xfer_len = 0, sent_len = 0, write_count;
 941	int ret = 0;
 942	struct scif_qp *qp = ep->qp_info.qp;
 943
 944	if (flags & SCIF_SEND_BLOCK)
 945		might_sleep();
 946
 947	spin_lock(&ep->lock);
 948	while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
 949		write_count = scif_rb_space(&qp->outbound_q);
 950		if (write_count) {
 951			/* Best effort to send as much data as possible */
 952			curr_xfer_len = min(len - sent_len, write_count);
 953			ret = scif_rb_write(&qp->outbound_q, msg,
 954					    curr_xfer_len);
 955			if (ret < 0)
 956				break;
 957			/* Success. Update write pointer */
 958			scif_rb_commit(&qp->outbound_q);
 959			/*
 960			 * Send a notification to the peer about the
 961			 * produced data message.
 962			 */
 963			notif_msg.src = ep->port;
 964			notif_msg.uop = SCIF_CLIENT_SENT;
 965			notif_msg.payload[0] = ep->remote_ep;
 966			ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
 967			if (ret)
 968				break;
 969			sent_len += curr_xfer_len;
 970			msg = msg + curr_xfer_len;
 971			continue;
 972		}
 973		curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
 974		/* Not enough RB space. return for the Non Blocking case */
 975		if (!(flags & SCIF_SEND_BLOCK))
 976			break;
 977
 978		spin_unlock(&ep->lock);
 979		/* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
 980		ret =
 981		wait_event_interruptible(ep->sendwq,
 982					 (SCIFEP_CONNECTED != ep->state) ||
 983					 (scif_rb_space(&qp->outbound_q) >=
 984					 curr_xfer_len));
 985		spin_lock(&ep->lock);
 986		if (ret)
 987			break;
 988	}
 989	if (sent_len)
 990		ret = sent_len;
 991	else if (!ret && SCIFEP_CONNECTED != ep->state)
 992		ret = SCIFEP_DISCONNECTED == ep->state ?
 993			-ECONNRESET : -ENOTCONN;
 994	spin_unlock(&ep->lock);
 995	return ret;
 996}
 997
 998static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
 999{
1000	int read_size;
1001	struct scif_endpt *ep = (struct scif_endpt *)epd;
1002	struct scifmsg notif_msg;
1003	int curr_recv_len = 0, remaining_len = len, read_count;
1004	int ret = 0;
1005	struct scif_qp *qp = ep->qp_info.qp;
1006
1007	if (flags & SCIF_RECV_BLOCK)
1008		might_sleep();
1009	spin_lock(&ep->lock);
1010	while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
1011				 SCIFEP_DISCONNECTED == ep->state)) {
1012		read_count = scif_rb_count(&qp->inbound_q, remaining_len);
1013		if (read_count) {
1014			/*
1015			 * Best effort to recv as much data as there
1016			 * are bytes to read in the RB particularly
1017			 * important for the Non Blocking case.
1018			 */
1019			curr_recv_len = min(remaining_len, read_count);
1020			read_size = scif_rb_get_next(&qp->inbound_q,
1021						     msg, curr_recv_len);
1022			if (ep->state == SCIFEP_CONNECTED) {
1023				/*
1024				 * Update the read pointer only if the endpoint
1025				 * is still connected else the read pointer
1026				 * might no longer exist since the peer has
1027				 * freed resources!
1028				 */
1029				scif_rb_update_read_ptr(&qp->inbound_q);
1030				/*
1031				 * Send a notification to the peer about the
1032				 * consumed data message only if the EP is in
1033				 * SCIFEP_CONNECTED state.
1034				 */
1035				notif_msg.src = ep->port;
1036				notif_msg.uop = SCIF_CLIENT_RCVD;
1037				notif_msg.payload[0] = ep->remote_ep;
1038				ret = _scif_nodeqp_send(ep->remote_dev,
1039							&notif_msg);
1040				if (ret)
1041					break;
1042			}
1043			remaining_len -= curr_recv_len;
1044			msg = msg + curr_recv_len;
1045			continue;
1046		}
1047		/*
1048		 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1049		 * we will keep looping forever.
1050		 */
1051		if (ep->state == SCIFEP_DISCONNECTED)
1052			break;
1053		/*
1054		 * Return in the Non Blocking case if there is no data
1055		 * to read in this iteration.
1056		 */
1057		if (!(flags & SCIF_RECV_BLOCK))
1058			break;
1059		curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
1060		spin_unlock(&ep->lock);
1061		/*
1062		 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1063		 * or until other side disconnects.
1064		 */
1065		ret =
1066		wait_event_interruptible(ep->recvwq,
1067					 SCIFEP_CONNECTED != ep->state ||
1068					 scif_rb_count(&qp->inbound_q,
1069						       curr_recv_len)
1070					 >= curr_recv_len);
1071		spin_lock(&ep->lock);
1072		if (ret)
1073			break;
1074	}
1075	if (len - remaining_len)
1076		ret = len - remaining_len;
1077	else if (!ret && ep->state != SCIFEP_CONNECTED)
1078		ret = ep->state == SCIFEP_DISCONNECTED ?
1079			-ECONNRESET : -ENOTCONN;
1080	spin_unlock(&ep->lock);
1081	return ret;
1082}
1083
1084/**
1085 * scif_user_send() - Send data to connection queue
1086 * @epd: The end point returned from scif_open()
1087 * @msg: Address to place data
1088 * @len: Length to receive
1089 * @flags: blocking or non blocking
1090 *
1091 * This function is called from the driver IOCTL entry point
1092 * only and is a wrapper for _scif_send().
1093 */
1094int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
1095{
1096	struct scif_endpt *ep = (struct scif_endpt *)epd;
1097	int err = 0;
1098	int sent_len = 0;
1099	char *tmp;
1100	int loop_len;
1101	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1102
1103	dev_dbg(scif_info.mdev.this_device,
1104		"SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1105	if (!len)
1106		return 0;
1107
1108	err = scif_msg_param_check(epd, len, flags);
1109	if (err)
1110		goto send_err;
1111
1112	tmp = kmalloc(chunk_len, GFP_KERNEL);
1113	if (!tmp) {
1114		err = -ENOMEM;
1115		goto send_err;
1116	}
1117	/*
1118	 * Grabbing the lock before breaking up the transfer in
1119	 * multiple chunks is required to ensure that messages do
1120	 * not get fragmented and reordered.
1121	 */
1122	mutex_lock(&ep->sendlock);
1123	while (sent_len != len) {
1124		loop_len = len - sent_len;
1125		loop_len = min(chunk_len, loop_len);
1126		if (copy_from_user(tmp, msg, loop_len)) {
1127			err = -EFAULT;
1128			goto send_free_err;
1129		}
1130		err = _scif_send(epd, tmp, loop_len, flags);
1131		if (err < 0)
1132			goto send_free_err;
1133		sent_len += err;
1134		msg += err;
1135		if (err != loop_len)
1136			goto send_free_err;
1137	}
1138send_free_err:
1139	mutex_unlock(&ep->sendlock);
1140	kfree(tmp);
1141send_err:
1142	return err < 0 ? err : sent_len;
1143}
1144
1145/**
1146 * scif_user_recv() - Receive data from connection queue
1147 * @epd: The end point returned from scif_open()
1148 * @msg: Address to place data
1149 * @len: Length to receive
1150 * @flags: blocking or non blocking
1151 *
1152 * This function is called from the driver IOCTL entry point
1153 * only and is a wrapper for _scif_recv().
1154 */
1155int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
1156{
1157	struct scif_endpt *ep = (struct scif_endpt *)epd;
1158	int err = 0;
1159	int recv_len = 0;
1160	char *tmp;
1161	int loop_len;
1162	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1163
1164	dev_dbg(scif_info.mdev.this_device,
1165		"SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1166	if (!len)
1167		return 0;
1168
1169	err = scif_msg_param_check(epd, len, flags);
1170	if (err)
1171		goto recv_err;
1172
1173	tmp = kmalloc(chunk_len, GFP_KERNEL);
1174	if (!tmp) {
1175		err = -ENOMEM;
1176		goto recv_err;
1177	}
1178	/*
1179	 * Grabbing the lock before breaking up the transfer in
1180	 * multiple chunks is required to ensure that messages do
1181	 * not get fragmented and reordered.
1182	 */
1183	mutex_lock(&ep->recvlock);
1184	while (recv_len != len) {
1185		loop_len = len - recv_len;
1186		loop_len = min(chunk_len, loop_len);
1187		err = _scif_recv(epd, tmp, loop_len, flags);
1188		if (err < 0)
1189			goto recv_free_err;
1190		if (copy_to_user(msg, tmp, err)) {
1191			err = -EFAULT;
1192			goto recv_free_err;
1193		}
1194		recv_len += err;
1195		msg += err;
1196		if (err != loop_len)
1197			goto recv_free_err;
1198	}
1199recv_free_err:
1200	mutex_unlock(&ep->recvlock);
1201	kfree(tmp);
1202recv_err:
1203	return err < 0 ? err : recv_len;
1204}
1205
1206/**
1207 * scif_send() - Send data to connection queue
1208 * @epd: The end point returned from scif_open()
1209 * @msg: Address to place data
1210 * @len: Length to receive
1211 * @flags: blocking or non blocking
1212 *
1213 * This function is called from the kernel mode only and is
1214 * a wrapper for _scif_send().
1215 */
1216int scif_send(scif_epd_t epd, void *msg, int len, int flags)
1217{
1218	struct scif_endpt *ep = (struct scif_endpt *)epd;
1219	int ret;
1220
1221	dev_dbg(scif_info.mdev.this_device,
1222		"SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1223	if (!len)
1224		return 0;
1225
1226	ret = scif_msg_param_check(epd, len, flags);
1227	if (ret)
1228		return ret;
1229	if (!ep->remote_dev)
1230		return -ENOTCONN;
1231	/*
1232	 * Grab the mutex lock in the blocking case only
1233	 * to ensure messages do not get fragmented/reordered.
1234	 * The non blocking mode is protected using spin locks
1235	 * in _scif_send().
1236	 */
1237	if (flags & SCIF_SEND_BLOCK)
1238		mutex_lock(&ep->sendlock);
1239
1240	ret = _scif_send(epd, msg, len, flags);
1241
1242	if (flags & SCIF_SEND_BLOCK)
1243		mutex_unlock(&ep->sendlock);
1244	return ret;
1245}
1246EXPORT_SYMBOL_GPL(scif_send);
1247
1248/**
1249 * scif_recv() - Receive data from connection queue
1250 * @epd: The end point returned from scif_open()
1251 * @msg: Address to place data
1252 * @len: Length to receive
1253 * @flags: blocking or non blocking
1254 *
1255 * This function is called from the kernel mode only and is
1256 * a wrapper for _scif_recv().
1257 */
1258int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1259{
1260	struct scif_endpt *ep = (struct scif_endpt *)epd;
1261	int ret;
1262
1263	dev_dbg(scif_info.mdev.this_device,
1264		"SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1265	if (!len)
1266		return 0;
1267
1268	ret = scif_msg_param_check(epd, len, flags);
1269	if (ret)
1270		return ret;
1271	/*
1272	 * Grab the mutex lock in the blocking case only
1273	 * to ensure messages do not get fragmented/reordered.
1274	 * The non blocking mode is protected using spin locks
1275	 * in _scif_send().
1276	 */
1277	if (flags & SCIF_RECV_BLOCK)
1278		mutex_lock(&ep->recvlock);
1279
1280	ret = _scif_recv(epd, msg, len, flags);
1281
1282	if (flags & SCIF_RECV_BLOCK)
1283		mutex_unlock(&ep->recvlock);
1284
1285	return ret;
1286}
1287EXPORT_SYMBOL_GPL(scif_recv);
1288
1289static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
1290				   poll_table *p, struct scif_endpt *ep)
1291{
1292	/*
1293	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1294	 * and regrab it afterwards. Because the endpoint state might have
1295	 * changed while the lock was given up, the state must be checked
1296	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
1297	 * does this.
1298	 */
1299	spin_unlock(&ep->lock);
1300	poll_wait(f, wq, p);
1301	spin_lock(&ep->lock);
1302}
1303
1304__poll_t
1305__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
1306{
1307	__poll_t mask = 0;
1308
1309	dev_dbg(scif_info.mdev.this_device,
1310		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
1311
1312	spin_lock(&ep->lock);
1313
1314	/* Endpoint is waiting for a non-blocking connect to complete */
1315	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1316		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
1317		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1318			if (ep->state == SCIFEP_CONNECTED ||
1319			    ep->state == SCIFEP_DISCONNECTED ||
1320			    ep->conn_err)
1321				mask |= EPOLLOUT;
1322			goto exit;
1323		}
1324	}
1325
1326	/* Endpoint is listening for incoming connection requests */
1327	if (ep->state == SCIFEP_LISTENING) {
1328		_scif_poll_wait(f, &ep->conwq, wait, ep);
1329		if (ep->state == SCIFEP_LISTENING) {
1330			if (ep->conreqcnt)
1331				mask |= EPOLLIN;
1332			goto exit;
1333		}
1334	}
1335
1336	/* Endpoint is connected or disconnected */
1337	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
1338		if (poll_requested_events(wait) & EPOLLIN)
1339			_scif_poll_wait(f, &ep->recvwq, wait, ep);
1340		if (poll_requested_events(wait) & EPOLLOUT)
1341			_scif_poll_wait(f, &ep->sendwq, wait, ep);
1342		if (ep->state == SCIFEP_CONNECTED ||
1343		    ep->state == SCIFEP_DISCONNECTED) {
1344			/* Data can be read without blocking */
1345			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
1346				mask |= EPOLLIN;
1347			/* Data can be written without blocking */
1348			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
1349				mask |= EPOLLOUT;
1350			/* Return EPOLLHUP if endpoint is disconnected */
1351			if (ep->state == SCIFEP_DISCONNECTED)
1352				mask |= EPOLLHUP;
1353			goto exit;
1354		}
1355	}
1356
1357	/* Return EPOLLERR if the endpoint is in none of the above states */
1358	mask |= EPOLLERR;
1359exit:
1360	spin_unlock(&ep->lock);
1361	return mask;
1362}
1363
1364/**
1365 * scif_poll() - Kernel mode SCIF poll
1366 * @ufds: Array of scif_pollepd structures containing the end points
1367 *	  and events to poll on
1368 * @nfds: Size of the ufds array
1369 * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1370 *
1371 * The code flow in this function is based on do_poll(..) in select.c
1372 *
1373 * Returns the number of endpoints which have pending events or 0 in
1374 * the event of a timeout. If a signal is used for wake up, -EINTR is
1375 * returned.
1376 */
1377int
1378scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
1379{
1380	struct poll_wqueues table;
1381	poll_table *pt;
1382	int i, count = 0, timed_out = timeout_msecs == 0;
1383	__poll_t mask;
1384	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
1385		: msecs_to_jiffies(timeout_msecs);
1386
1387	poll_initwait(&table);
1388	pt = &table.pt;
1389	while (1) {
1390		for (i = 0; i < nfds; i++) {
1391			pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP;
1392			mask = __scif_pollfd(ufds[i].epd->anon,
1393					     pt, ufds[i].epd);
1394			mask &= ufds[i].events | EPOLLERR | EPOLLHUP;
1395			if (mask) {
1396				count++;
1397				pt->_qproc = NULL;
1398			}
1399			ufds[i].revents = mask;
1400		}
1401		pt->_qproc = NULL;
1402		if (!count) {
1403			count = table.error;
1404			if (signal_pending(current))
1405				count = -EINTR;
1406		}
1407		if (count || timed_out)
1408			break;
1409
1410		if (!schedule_timeout_interruptible(timeout))
1411			timed_out = 1;
1412	}
1413	poll_freewait(&table);
1414	return count;
1415}
1416EXPORT_SYMBOL_GPL(scif_poll);
1417
1418int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1419{
1420	int online = 0;
1421	int offset = 0;
1422	int node;
1423
1424	if (!scif_is_mgmt_node())
1425		scif_get_node_info();
1426
1427	*self = scif_info.nodeid;
1428	mutex_lock(&scif_info.conflock);
1429	len = min_t(int, len, scif_info.total);
1430	for (node = 0; node <= scif_info.maxid; node++) {
1431		if (_scifdev_alive(&scif_dev[node])) {
1432			online++;
1433			if (offset < len)
1434				nodes[offset++] = node;
1435		}
1436	}
1437	dev_dbg(scif_info.mdev.this_device,
1438		"SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1439		scif_info.total, online, offset);
1440	mutex_unlock(&scif_info.conflock);
1441
1442	return online;
1443}
1444EXPORT_SYMBOL_GPL(scif_get_node_ids);
1445
1446static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
1447{
1448	struct scif_client *client =
1449		container_of(si, struct scif_client, si);
1450	struct scif_peer_dev *spdev =
1451		container_of(dev, struct scif_peer_dev, dev);
1452
1453	if (client->probe)
1454		client->probe(spdev);
1455	return 0;
1456}
1457
1458static void scif_remove_client_dev(struct device *dev,
1459				   struct subsys_interface *si)
1460{
1461	struct scif_client *client =
1462		container_of(si, struct scif_client, si);
1463	struct scif_peer_dev *spdev =
1464		container_of(dev, struct scif_peer_dev, dev);
1465
1466	if (client->remove)
1467		client->remove(spdev);
1468}
1469
1470void scif_client_unregister(struct scif_client *client)
1471{
1472	subsys_interface_unregister(&client->si);
1473}
1474EXPORT_SYMBOL_GPL(scif_client_unregister);
1475
1476int scif_client_register(struct scif_client *client)
1477{
1478	struct subsys_interface *si = &client->si;
1479
1480	si->name = client->name;
1481	si->subsys = &scif_peer_bus;
1482	si->add_dev = scif_add_client_dev;
1483	si->remove_dev = scif_remove_client_dev;
1484
1485	return subsys_interface_register(&client->si);
1486}
1487EXPORT_SYMBOL_GPL(scif_client_register);