Linux Audio

Check our new training course

Yocto / OpenEmbedded training

Mar 24-27, 2025, special US time zones
Register
Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Intel MIC Platform Software Stack (MPSS)
   4 *
   5 * Copyright(c) 2014 Intel Corporation.
   6 *
   7 * Intel SCIF driver.
   8 */
   9#include <linux/scif.h>
  10#include "scif_main.h"
  11#include "scif_map.h"
  12
  13static const char * const scif_ep_states[] = {
  14	"Unbound",
  15	"Bound",
  16	"Listening",
  17	"Connected",
  18	"Connecting",
  19	"Mapping",
  20	"Closing",
  21	"Close Listening",
  22	"Disconnected",
  23	"Zombie"};
  24
  25enum conn_async_state {
  26	ASYNC_CONN_IDLE = 1,	/* ep setup for async connect */
  27	ASYNC_CONN_INPROGRESS,	/* async connect in progress */
  28	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
  29};
  30
  31/*
  32 * File operations for anonymous inode file associated with a SCIF endpoint,
  33 * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
  34 * poll API in the kernel and these take in a struct file *. Since a struct
  35 * file is not available to kernel mode SCIF, it uses an anonymous file for
  36 * this purpose.
  37 */
  38const struct file_operations scif_anon_fops = {
  39	.owner = THIS_MODULE,
  40};
  41
  42scif_epd_t scif_open(void)
  43{
  44	struct scif_endpt *ep;
  45	int err;
  46
  47	might_sleep();
  48	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
  49	if (!ep)
  50		goto err_ep_alloc;
  51
  52	ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
  53	if (!ep->qp_info.qp)
  54		goto err_qp_alloc;
  55
  56	err = scif_anon_inode_getfile(ep);
  57	if (err)
  58		goto err_anon_inode;
  59
  60	spin_lock_init(&ep->lock);
  61	mutex_init(&ep->sendlock);
  62	mutex_init(&ep->recvlock);
  63
  64	scif_rma_ep_init(ep);
  65	ep->state = SCIFEP_UNBOUND;
  66	dev_dbg(scif_info.mdev.this_device,
  67		"SCIFAPI open: ep %p success\n", ep);
  68	return ep;
  69
  70err_anon_inode:
  71	kfree(ep->qp_info.qp);
  72err_qp_alloc:
  73	kfree(ep);
  74err_ep_alloc:
  75	return NULL;
  76}
  77EXPORT_SYMBOL_GPL(scif_open);
  78
  79/*
  80 * scif_disconnect_ep - Disconnects the endpoint if found
  81 * @epd: The end point returned from scif_open()
  82 */
  83static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
  84{
  85	struct scifmsg msg;
  86	struct scif_endpt *fep = NULL;
  87	struct scif_endpt *tmpep;
  88	struct list_head *pos, *tmpq;
  89	int err;
  90
  91	/*
  92	 * Wake up any threads blocked in send()/recv() before closing
  93	 * out the connection. Grabbing and releasing the send/recv lock
  94	 * will ensure that any blocked senders/receivers have exited for
  95	 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
  96	 * close. Ring 3 endpoints are not affected since close will not
  97	 * be called while there are IOCTLs executing.
  98	 */
  99	wake_up_interruptible(&ep->sendwq);
 100	wake_up_interruptible(&ep->recvwq);
 101	mutex_lock(&ep->sendlock);
 102	mutex_unlock(&ep->sendlock);
 103	mutex_lock(&ep->recvlock);
 104	mutex_unlock(&ep->recvlock);
 105
 106	/* Remove from the connected list */
 107	mutex_lock(&scif_info.connlock);
 108	list_for_each_safe(pos, tmpq, &scif_info.connected) {
 109		tmpep = list_entry(pos, struct scif_endpt, list);
 110		if (tmpep == ep) {
 111			list_del(pos);
 112			fep = tmpep;
 113			spin_lock(&ep->lock);
 114			break;
 115		}
 116	}
 117
 118	if (!fep) {
 119		/*
 120		 * The other side has completed the disconnect before
 121		 * the end point can be removed from the list. Therefore
 122		 * the ep lock is not locked, traverse the disconnected
 123		 * list to find the endpoint and release the conn lock.
 124		 */
 125		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 126			tmpep = list_entry(pos, struct scif_endpt, list);
 127			if (tmpep == ep) {
 128				list_del(pos);
 129				break;
 130			}
 131		}
 132		mutex_unlock(&scif_info.connlock);
 133		return NULL;
 134	}
 135
 136	init_completion(&ep->discon);
 137	msg.uop = SCIF_DISCNCT;
 138	msg.src = ep->port;
 139	msg.dst = ep->peer;
 140	msg.payload[0] = (u64)ep;
 141	msg.payload[1] = ep->remote_ep;
 142
 143	err = scif_nodeqp_send(ep->remote_dev, &msg);
 144	spin_unlock(&ep->lock);
 145	mutex_unlock(&scif_info.connlock);
 146
 147	if (!err)
 148		/* Wait for the remote node to respond with SCIF_DISCNT_ACK */
 149		wait_for_completion_timeout(&ep->discon,
 150					    SCIF_NODE_ALIVE_TIMEOUT);
 151	return ep;
 152}
 153
 154int scif_close(scif_epd_t epd)
 155{
 156	struct scif_endpt *ep = (struct scif_endpt *)epd;
 157	struct scif_endpt *tmpep;
 158	struct list_head *pos, *tmpq;
 159	enum scif_epd_state oldstate;
 160	bool flush_conn;
 161
 162	dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
 163		ep, scif_ep_states[ep->state]);
 164	might_sleep();
 165	spin_lock(&ep->lock);
 166	flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
 167	spin_unlock(&ep->lock);
 168
 169	if (flush_conn)
 170		flush_work(&scif_info.conn_work);
 171
 172	spin_lock(&ep->lock);
 173	oldstate = ep->state;
 174
 175	ep->state = SCIFEP_CLOSING;
 176
 177	switch (oldstate) {
 178	case SCIFEP_ZOMBIE:
 179		dev_err(scif_info.mdev.this_device,
 180			"SCIFAPI close: zombie state unexpected\n");
 181		fallthrough;
 182	case SCIFEP_DISCONNECTED:
 183		spin_unlock(&ep->lock);
 184		scif_unregister_all_windows(epd);
 185		/* Remove from the disconnected list */
 186		mutex_lock(&scif_info.connlock);
 187		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 188			tmpep = list_entry(pos, struct scif_endpt, list);
 189			if (tmpep == ep) {
 190				list_del(pos);
 191				break;
 192			}
 193		}
 194		mutex_unlock(&scif_info.connlock);
 195		break;
 196	case SCIFEP_UNBOUND:
 197	case SCIFEP_BOUND:
 198	case SCIFEP_CONNECTING:
 199		spin_unlock(&ep->lock);
 200		break;
 201	case SCIFEP_MAPPING:
 202	case SCIFEP_CONNECTED:
 203	case SCIFEP_CLOSING:
 204	{
 205		spin_unlock(&ep->lock);
 206		scif_unregister_all_windows(epd);
 207		scif_disconnect_ep(ep);
 208		break;
 209	}
 210	case SCIFEP_LISTENING:
 211	case SCIFEP_CLLISTEN:
 212	{
 213		struct scif_conreq *conreq;
 214		struct scifmsg msg;
 215		struct scif_endpt *aep;
 216
 217		spin_unlock(&ep->lock);
 218		mutex_lock(&scif_info.eplock);
 219
 220		/* remove from listen list */
 221		list_for_each_safe(pos, tmpq, &scif_info.listen) {
 222			tmpep = list_entry(pos, struct scif_endpt, list);
 223			if (tmpep == ep)
 224				list_del(pos);
 225		}
 226		/* Remove any dangling accepts */
 227		while (ep->acceptcnt) {
 228			aep = list_first_entry(&ep->li_accept,
 229					       struct scif_endpt, liacceptlist);
 230			list_del(&aep->liacceptlist);
 231			scif_put_port(aep->port.port);
 232			list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
 233				tmpep = list_entry(pos, struct scif_endpt,
 234						   miacceptlist);
 235				if (tmpep == aep) {
 236					list_del(pos);
 237					break;
 238				}
 239			}
 240			mutex_unlock(&scif_info.eplock);
 241			mutex_lock(&scif_info.connlock);
 242			list_for_each_safe(pos, tmpq, &scif_info.connected) {
 243				tmpep = list_entry(pos,
 244						   struct scif_endpt, list);
 245				if (tmpep == aep) {
 246					list_del(pos);
 247					break;
 248				}
 249			}
 250			list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 251				tmpep = list_entry(pos,
 252						   struct scif_endpt, list);
 253				if (tmpep == aep) {
 254					list_del(pos);
 255					break;
 256				}
 257			}
 258			mutex_unlock(&scif_info.connlock);
 259			scif_teardown_ep(aep);
 260			mutex_lock(&scif_info.eplock);
 261			scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
 262			ep->acceptcnt--;
 263		}
 264
 265		spin_lock(&ep->lock);
 266		mutex_unlock(&scif_info.eplock);
 267
 268		/* Remove and reject any pending connection requests. */
 269		while (ep->conreqcnt) {
 270			conreq = list_first_entry(&ep->conlist,
 271						  struct scif_conreq, list);
 272			list_del(&conreq->list);
 273
 274			msg.uop = SCIF_CNCT_REJ;
 275			msg.dst.node = conreq->msg.src.node;
 276			msg.dst.port = conreq->msg.src.port;
 277			msg.payload[0] = conreq->msg.payload[0];
 278			msg.payload[1] = conreq->msg.payload[1];
 279			/*
 280			 * No Error Handling on purpose for scif_nodeqp_send().
 281			 * If the remote node is lost we still want free the
 282			 * connection requests on the self node.
 283			 */
 284			scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
 285					 &msg);
 286			ep->conreqcnt--;
 287			kfree(conreq);
 288		}
 289
 290		spin_unlock(&ep->lock);
 291		/* If a kSCIF accept is waiting wake it up */
 292		wake_up_interruptible(&ep->conwq);
 293		break;
 294	}
 295	}
 296	scif_put_port(ep->port.port);
 297	scif_anon_inode_fput(ep);
 298	scif_teardown_ep(ep);
 299	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
 300	return 0;
 301}
 302EXPORT_SYMBOL_GPL(scif_close);
 303
 304/**
 305 * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
 306 *			accept new connections.
 307 * @epd: The end point returned from scif_open()
 308 */
 309int __scif_flush(scif_epd_t epd)
 310{
 311	struct scif_endpt *ep = (struct scif_endpt *)epd;
 312
 313	switch (ep->state) {
 314	case SCIFEP_LISTENING:
 315	{
 316		ep->state = SCIFEP_CLLISTEN;
 317
 318		/* If an accept is waiting wake it up */
 319		wake_up_interruptible(&ep->conwq);
 320		break;
 321	}
 322	default:
 323		break;
 324	}
 325	return 0;
 326}
 327
 328int scif_bind(scif_epd_t epd, u16 pn)
 329{
 330	struct scif_endpt *ep = (struct scif_endpt *)epd;
 331	int ret = 0;
 332	int tmp;
 333
 334	dev_dbg(scif_info.mdev.this_device,
 335		"SCIFAPI bind: ep %p %s requested port number %d\n",
 336		ep, scif_ep_states[ep->state], pn);
 337	if (pn) {
 338		/*
 339		 * Similar to IETF RFC 1700, SCIF ports below
 340		 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
 341		 * processes or by processes executed by privileged users.
 342		 */
 343		if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
 344			ret = -EACCES;
 345			goto scif_bind_admin_exit;
 346		}
 347	}
 348
 349	spin_lock(&ep->lock);
 350	if (ep->state == SCIFEP_BOUND) {
 351		ret = -EINVAL;
 352		goto scif_bind_exit;
 353	} else if (ep->state != SCIFEP_UNBOUND) {
 354		ret = -EISCONN;
 355		goto scif_bind_exit;
 356	}
 357
 358	if (pn) {
 359		tmp = scif_rsrv_port(pn);
 360		if (tmp != pn) {
 361			ret = -EINVAL;
 362			goto scif_bind_exit;
 363		}
 364	} else {
 365		ret = scif_get_new_port();
 366		if (ret < 0)
 367			goto scif_bind_exit;
 368		pn = ret;
 369	}
 370
 371	ep->state = SCIFEP_BOUND;
 372	ep->port.node = scif_info.nodeid;
 373	ep->port.port = pn;
 374	ep->conn_async_state = ASYNC_CONN_IDLE;
 375	ret = pn;
 376	dev_dbg(scif_info.mdev.this_device,
 377		"SCIFAPI bind: bound to port number %d\n", pn);
 378scif_bind_exit:
 379	spin_unlock(&ep->lock);
 380scif_bind_admin_exit:
 381	return ret;
 382}
 383EXPORT_SYMBOL_GPL(scif_bind);
 384
 385int scif_listen(scif_epd_t epd, int backlog)
 386{
 387	struct scif_endpt *ep = (struct scif_endpt *)epd;
 388
 389	dev_dbg(scif_info.mdev.this_device,
 390		"SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
 391	spin_lock(&ep->lock);
 392	switch (ep->state) {
 393	case SCIFEP_ZOMBIE:
 394	case SCIFEP_CLOSING:
 395	case SCIFEP_CLLISTEN:
 396	case SCIFEP_UNBOUND:
 397	case SCIFEP_DISCONNECTED:
 398		spin_unlock(&ep->lock);
 399		return -EINVAL;
 400	case SCIFEP_LISTENING:
 401	case SCIFEP_CONNECTED:
 402	case SCIFEP_CONNECTING:
 403	case SCIFEP_MAPPING:
 404		spin_unlock(&ep->lock);
 405		return -EISCONN;
 406	case SCIFEP_BOUND:
 407		break;
 408	}
 409
 410	ep->state = SCIFEP_LISTENING;
 411	ep->backlog = backlog;
 412
 413	ep->conreqcnt = 0;
 414	ep->acceptcnt = 0;
 415	INIT_LIST_HEAD(&ep->conlist);
 416	init_waitqueue_head(&ep->conwq);
 417	INIT_LIST_HEAD(&ep->li_accept);
 418	spin_unlock(&ep->lock);
 419
 420	/*
 421	 * Listen status is complete so delete the qp information not needed
 422	 * on a listen before placing on the list of listening ep's
 423	 */
 424	scif_teardown_ep(ep);
 425	ep->qp_info.qp = NULL;
 426
 427	mutex_lock(&scif_info.eplock);
 428	list_add_tail(&ep->list, &scif_info.listen);
 429	mutex_unlock(&scif_info.eplock);
 430	return 0;
 431}
 432EXPORT_SYMBOL_GPL(scif_listen);
 433
 434/*
 435 ************************************************************************
 436 * SCIF connection flow:
 437 *
 438 * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
 439 *	connections via a SCIF_CNCT_REQ message
 440 * 2) A SCIF endpoint can initiate a SCIF connection by calling
 441 *	scif_connect(..) which calls scif_setup_qp_connect(..) which
 442 *	allocates the local qp for the endpoint ring buffer and then sends
 443 *	a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
 444 *	a SCIF_CNCT_REJ message
 445 * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
 446 *	wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
 447 *	message otherwise
 448 * 4) A thread blocked waiting for incoming connections allocates its local
 449 *	endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
 450 *	and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
 451 *	the node sends a SCIF_CNCT_REJ message
 452 * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
 453 *	connecting endpoint is woken up as part of handling
 454 *	scif_cnctgnt_resp(..) following which it maps the remote endpoints'
 455 *	QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
 456 *	success or a SCIF_CNCT_GNTNACK message on failure and completes
 457 *	the scif_connect(..) API
 458 * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
 459 *	in step 4 is woken up and completes the scif_accept(..) API
 460 * 7) The SCIF connection is now established between the two SCIF endpoints.
 461 */
 462static int scif_conn_func(struct scif_endpt *ep)
 463{
 464	int err = 0;
 465	struct scifmsg msg;
 466	struct device *spdev;
 467
 468	err = scif_reserve_dma_chan(ep);
 469	if (err) {
 470		dev_err(&ep->remote_dev->sdev->dev,
 471			"%s %d err %d\n", __func__, __LINE__, err);
 472		ep->state = SCIFEP_BOUND;
 473		goto connect_error_simple;
 474	}
 475	/* Initiate the first part of the endpoint QP setup */
 476	err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
 477				    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
 478	if (err) {
 479		dev_err(&ep->remote_dev->sdev->dev,
 480			"%s err %d qp_offset 0x%llx\n",
 481			__func__, err, ep->qp_info.qp_offset);
 482		ep->state = SCIFEP_BOUND;
 483		goto connect_error_simple;
 484	}
 485
 486	spdev = scif_get_peer_dev(ep->remote_dev);
 487	if (IS_ERR(spdev)) {
 488		err = PTR_ERR(spdev);
 489		goto cleanup_qp;
 490	}
 491	/* Format connect message and send it */
 492	msg.src = ep->port;
 493	msg.dst = ep->conn_port;
 494	msg.uop = SCIF_CNCT_REQ;
 495	msg.payload[0] = (u64)ep;
 496	msg.payload[1] = ep->qp_info.qp_offset;
 497	err = _scif_nodeqp_send(ep->remote_dev, &msg);
 498	if (err)
 499		goto connect_error_dec;
 500	scif_put_peer_dev(spdev);
 501	/*
 502	 * Wait for the remote node to respond with SCIF_CNCT_GNT or
 503	 * SCIF_CNCT_REJ message.
 504	 */
 505	err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
 506				 SCIF_NODE_ALIVE_TIMEOUT);
 507	if (!err) {
 508		dev_err(&ep->remote_dev->sdev->dev,
 509			"%s %d timeout\n", __func__, __LINE__);
 510		ep->state = SCIFEP_BOUND;
 511	}
 512	spdev = scif_get_peer_dev(ep->remote_dev);
 513	if (IS_ERR(spdev)) {
 514		err = PTR_ERR(spdev);
 515		goto cleanup_qp;
 516	}
 517	if (ep->state == SCIFEP_MAPPING) {
 518		err = scif_setup_qp_connect_response(ep->remote_dev,
 519						     ep->qp_info.qp,
 520						     ep->qp_info.gnt_pld);
 521		/*
 522		 * If the resource to map the queue are not available then
 523		 * we need to tell the other side to terminate the accept
 524		 */
 525		if (err) {
 526			dev_err(&ep->remote_dev->sdev->dev,
 527				"%s %d err %d\n", __func__, __LINE__, err);
 528			msg.uop = SCIF_CNCT_GNTNACK;
 529			msg.payload[0] = ep->remote_ep;
 530			_scif_nodeqp_send(ep->remote_dev, &msg);
 531			ep->state = SCIFEP_BOUND;
 532			goto connect_error_dec;
 533		}
 534
 535		msg.uop = SCIF_CNCT_GNTACK;
 536		msg.payload[0] = ep->remote_ep;
 537		err = _scif_nodeqp_send(ep->remote_dev, &msg);
 538		if (err) {
 539			ep->state = SCIFEP_BOUND;
 540			goto connect_error_dec;
 541		}
 542		ep->state = SCIFEP_CONNECTED;
 543		mutex_lock(&scif_info.connlock);
 544		list_add_tail(&ep->list, &scif_info.connected);
 545		mutex_unlock(&scif_info.connlock);
 546		dev_dbg(&ep->remote_dev->sdev->dev,
 547			"SCIFAPI connect: ep %p connected\n", ep);
 548	} else if (ep->state == SCIFEP_BOUND) {
 549		dev_dbg(&ep->remote_dev->sdev->dev,
 550			"SCIFAPI connect: ep %p connection refused\n", ep);
 551		err = -ECONNREFUSED;
 552		goto connect_error_dec;
 553	}
 554	scif_put_peer_dev(spdev);
 555	return err;
 556connect_error_dec:
 557	scif_put_peer_dev(spdev);
 558cleanup_qp:
 559	scif_cleanup_ep_qp(ep);
 560connect_error_simple:
 561	return err;
 562}
 563
 564/*
 565 * scif_conn_handler:
 566 *
 567 * Workqueue handler for servicing non-blocking SCIF connect
 568 *
 569 */
 570void scif_conn_handler(struct work_struct *work)
 571{
 572	struct scif_endpt *ep;
 573
 574	do {
 575		ep = NULL;
 576		spin_lock(&scif_info.nb_connect_lock);
 577		if (!list_empty(&scif_info.nb_connect_list)) {
 578			ep = list_first_entry(&scif_info.nb_connect_list,
 579					      struct scif_endpt, conn_list);
 580			list_del(&ep->conn_list);
 581		}
 582		spin_unlock(&scif_info.nb_connect_lock);
 583		if (ep) {
 584			ep->conn_err = scif_conn_func(ep);
 585			wake_up_interruptible(&ep->conn_pend_wq);
 586		}
 587	} while (ep);
 588}
 589
 590int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
 591{
 592	struct scif_endpt *ep = (struct scif_endpt *)epd;
 593	int err = 0;
 594	struct scif_dev *remote_dev;
 595	struct device *spdev;
 596
 597	dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
 598		scif_ep_states[ep->state]);
 599
 600	if (!scif_dev || dst->node > scif_info.maxid)
 601		return -ENODEV;
 602
 603	might_sleep();
 604
 605	remote_dev = &scif_dev[dst->node];
 606	spdev = scif_get_peer_dev(remote_dev);
 607	if (IS_ERR(spdev)) {
 608		err = PTR_ERR(spdev);
 609		return err;
 610	}
 611
 612	spin_lock(&ep->lock);
 613	switch (ep->state) {
 614	case SCIFEP_ZOMBIE:
 615	case SCIFEP_CLOSING:
 616		err = -EINVAL;
 617		break;
 618	case SCIFEP_DISCONNECTED:
 619		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 620			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 621		else
 622			err = -EINVAL;
 623		break;
 624	case SCIFEP_LISTENING:
 625	case SCIFEP_CLLISTEN:
 626		err = -EOPNOTSUPP;
 627		break;
 628	case SCIFEP_CONNECTING:
 629	case SCIFEP_MAPPING:
 630		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 631			err = -EINPROGRESS;
 632		else
 633			err = -EISCONN;
 634		break;
 635	case SCIFEP_CONNECTED:
 636		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 637			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 638		else
 639			err = -EISCONN;
 640		break;
 641	case SCIFEP_UNBOUND:
 642		err = scif_get_new_port();
 643		if (err < 0)
 644			break;
 645		ep->port.port = err;
 646		ep->port.node = scif_info.nodeid;
 647		ep->conn_async_state = ASYNC_CONN_IDLE;
 648		fallthrough;
 649	case SCIFEP_BOUND:
 650		/*
 651		 * If a non-blocking connect has been already initiated
 652		 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
 653		 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
 654		 * SCIF_BOUND due an error in the connection process
 655		 * (e.g., connection refused) If conn_async_state is
 656		 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
 657		 * so that the error status can be collected. If the state is
 658		 * already ASYNC_CONN_FLUSH_WORK - then set the error to
 659		 * EINPROGRESS since some other thread is waiting to collect
 660		 * error status.
 661		 */
 662		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
 663			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 664		} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
 665			err = -EINPROGRESS;
 666		} else {
 667			ep->conn_port = *dst;
 668			init_waitqueue_head(&ep->sendwq);
 669			init_waitqueue_head(&ep->recvwq);
 670			init_waitqueue_head(&ep->conwq);
 671			ep->conn_async_state = 0;
 672
 673			if (unlikely(non_block))
 674				ep->conn_async_state = ASYNC_CONN_INPROGRESS;
 675		}
 676		break;
 677	}
 678
 679	if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
 680			goto connect_simple_unlock1;
 681
 682	ep->state = SCIFEP_CONNECTING;
 683	ep->remote_dev = &scif_dev[dst->node];
 684	ep->qp_info.qp->magic = SCIFEP_MAGIC;
 685	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
 686		init_waitqueue_head(&ep->conn_pend_wq);
 687		spin_lock(&scif_info.nb_connect_lock);
 688		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
 689		spin_unlock(&scif_info.nb_connect_lock);
 690		err = -EINPROGRESS;
 691		schedule_work(&scif_info.conn_work);
 692	}
 693connect_simple_unlock1:
 694	spin_unlock(&ep->lock);
 695	scif_put_peer_dev(spdev);
 696	if (err) {
 697		return err;
 698	} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
 699		flush_work(&scif_info.conn_work);
 700		err = ep->conn_err;
 701		spin_lock(&ep->lock);
 702		ep->conn_async_state = ASYNC_CONN_IDLE;
 703		spin_unlock(&ep->lock);
 704	} else {
 705		err = scif_conn_func(ep);
 706	}
 707	return err;
 708}
 709
 710int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
 711{
 712	return __scif_connect(epd, dst, false);
 713}
 714EXPORT_SYMBOL_GPL(scif_connect);
 715
 716/*
 717 * scif_accept() - Accept a connection request from the remote node
 718 *
 719 * The function accepts a connection request from the remote node.  Successful
 720 * complete is indicate by a new end point being created and passed back
 721 * to the caller for future reference.
 722 *
 723 * Upon successful complete a zero will be returned and the peer information
 724 * will be filled in.
 725 *
 726 * If the end point is not in the listening state -EINVAL will be returned.
 727 *
 728 * If during the connection sequence resource allocation fails the -ENOMEM
 729 * will be returned.
 730 *
 731 * If the function is called with the ASYNC flag set and no connection requests
 732 * are pending it will return -EAGAIN.
 733 *
 734 * If the remote side is not sending any connection requests the caller may
 735 * terminate this function with a signal.  If so a -EINTR will be returned.
 736 */
 737int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
 738		scif_epd_t *newepd, int flags)
 739{
 740	struct scif_endpt *lep = (struct scif_endpt *)epd;
 741	struct scif_endpt *cep;
 742	struct scif_conreq *conreq;
 743	struct scifmsg msg;
 744	int err;
 745	struct device *spdev;
 746
 747	dev_dbg(scif_info.mdev.this_device,
 748		"SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
 749
 750	if (flags & ~SCIF_ACCEPT_SYNC)
 751		return -EINVAL;
 752
 753	if (!peer || !newepd)
 754		return -EINVAL;
 755
 756	might_sleep();
 757	spin_lock(&lep->lock);
 758	if (lep->state != SCIFEP_LISTENING) {
 759		spin_unlock(&lep->lock);
 760		return -EINVAL;
 761	}
 762
 763	if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
 764		/* No connection request present and we do not want to wait */
 765		spin_unlock(&lep->lock);
 766		return -EAGAIN;
 767	}
 768
 769	lep->files = current->files;
 770retry_connection:
 771	spin_unlock(&lep->lock);
 772	/* Wait for the remote node to send us a SCIF_CNCT_REQ */
 773	err = wait_event_interruptible(lep->conwq,
 774				       (lep->conreqcnt ||
 775				       (lep->state != SCIFEP_LISTENING)));
 776	if (err)
 777		return err;
 778
 779	if (lep->state != SCIFEP_LISTENING)
 780		return -EINTR;
 781
 782	spin_lock(&lep->lock);
 783
 784	if (!lep->conreqcnt)
 785		goto retry_connection;
 786
 787	/* Get the first connect request off the list */
 788	conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
 789	list_del(&conreq->list);
 790	lep->conreqcnt--;
 791	spin_unlock(&lep->lock);
 792
 793	/* Fill in the peer information */
 794	peer->node = conreq->msg.src.node;
 795	peer->port = conreq->msg.src.port;
 796
 797	cep = kzalloc(sizeof(*cep), GFP_KERNEL);
 798	if (!cep) {
 799		err = -ENOMEM;
 800		goto scif_accept_error_epalloc;
 801	}
 802	spin_lock_init(&cep->lock);
 803	mutex_init(&cep->sendlock);
 804	mutex_init(&cep->recvlock);
 805	cep->state = SCIFEP_CONNECTING;
 806	cep->remote_dev = &scif_dev[peer->node];
 807	cep->remote_ep = conreq->msg.payload[0];
 808
 809	scif_rma_ep_init(cep);
 810
 811	err = scif_reserve_dma_chan(cep);
 812	if (err) {
 813		dev_err(scif_info.mdev.this_device,
 814			"%s %d err %d\n", __func__, __LINE__, err);
 815		goto scif_accept_error_qpalloc;
 816	}
 817
 818	cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
 819	if (!cep->qp_info.qp) {
 820		err = -ENOMEM;
 821		goto scif_accept_error_qpalloc;
 822	}
 823
 824	err = scif_anon_inode_getfile(cep);
 825	if (err)
 826		goto scif_accept_error_anon_inode;
 827
 828	cep->qp_info.qp->magic = SCIFEP_MAGIC;
 829	spdev = scif_get_peer_dev(cep->remote_dev);
 830	if (IS_ERR(spdev)) {
 831		err = PTR_ERR(spdev);
 832		goto scif_accept_error_map;
 833	}
 834	err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
 835				   conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
 836				   cep->remote_dev);
 837	if (err) {
 838		dev_dbg(&cep->remote_dev->sdev->dev,
 839			"SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
 840			lep, cep, err, cep->qp_info.qp_offset);
 841		scif_put_peer_dev(spdev);
 842		goto scif_accept_error_map;
 843	}
 844
 845	cep->port.node = lep->port.node;
 846	cep->port.port = lep->port.port;
 847	cep->peer.node = peer->node;
 848	cep->peer.port = peer->port;
 849	init_waitqueue_head(&cep->sendwq);
 850	init_waitqueue_head(&cep->recvwq);
 851	init_waitqueue_head(&cep->conwq);
 852
 853	msg.uop = SCIF_CNCT_GNT;
 854	msg.src = cep->port;
 855	msg.payload[0] = cep->remote_ep;
 856	msg.payload[1] = cep->qp_info.qp_offset;
 857	msg.payload[2] = (u64)cep;
 858
 859	err = _scif_nodeqp_send(cep->remote_dev, &msg);
 860	scif_put_peer_dev(spdev);
 861	if (err)
 862		goto scif_accept_error_map;
 863retry:
 864	/* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
 865	err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
 866				 SCIF_NODE_ACCEPT_TIMEOUT);
 867	if (!err && scifdev_alive(cep))
 868		goto retry;
 869	err = !err ? -ENODEV : 0;
 870	if (err)
 871		goto scif_accept_error_map;
 872	kfree(conreq);
 873
 874	spin_lock(&cep->lock);
 875
 876	if (cep->state == SCIFEP_CLOSING) {
 877		/*
 878		 * Remote failed to allocate resources and NAKed the grant.
 879		 * There is at this point nothing referencing the new end point.
 880		 */
 881		spin_unlock(&cep->lock);
 882		scif_teardown_ep(cep);
 883		kfree(cep);
 884
 885		/* If call with sync flag then go back and wait. */
 886		if (flags & SCIF_ACCEPT_SYNC) {
 887			spin_lock(&lep->lock);
 888			goto retry_connection;
 889		}
 890		return -EAGAIN;
 891	}
 892
 893	scif_get_port(cep->port.port);
 894	*newepd = (scif_epd_t)cep;
 895	spin_unlock(&cep->lock);
 896	return 0;
 897scif_accept_error_map:
 898	scif_anon_inode_fput(cep);
 899scif_accept_error_anon_inode:
 900	scif_teardown_ep(cep);
 901scif_accept_error_qpalloc:
 902	kfree(cep);
 903scif_accept_error_epalloc:
 904	msg.uop = SCIF_CNCT_REJ;
 905	msg.dst.node = conreq->msg.src.node;
 906	msg.dst.port = conreq->msg.src.port;
 907	msg.payload[0] = conreq->msg.payload[0];
 908	msg.payload[1] = conreq->msg.payload[1];
 909	scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
 910	kfree(conreq);
 911	return err;
 912}
 913EXPORT_SYMBOL_GPL(scif_accept);
 914
 915/*
 916 * scif_msg_param_check:
 917 * @epd: The end point returned from scif_open()
 918 * @len: Length to receive
 919 * @flags: blocking or non blocking
 920 *
 921 * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
 922 */
 923static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
 924{
 925	int ret = -EINVAL;
 926
 927	if (len < 0)
 928		goto err_ret;
 929	if (flags && (!(flags & SCIF_RECV_BLOCK)))
 930		goto err_ret;
 931	ret = 0;
 932err_ret:
 933	return ret;
 934}
 935
 936static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
 937{
 938	struct scif_endpt *ep = (struct scif_endpt *)epd;
 939	struct scifmsg notif_msg;
 940	int curr_xfer_len = 0, sent_len = 0, write_count;
 941	int ret = 0;
 942	struct scif_qp *qp = ep->qp_info.qp;
 943
 944	if (flags & SCIF_SEND_BLOCK)
 945		might_sleep();
 946
 947	spin_lock(&ep->lock);
 948	while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
 949		write_count = scif_rb_space(&qp->outbound_q);
 950		if (write_count) {
 951			/* Best effort to send as much data as possible */
 952			curr_xfer_len = min(len - sent_len, write_count);
 953			ret = scif_rb_write(&qp->outbound_q, msg,
 954					    curr_xfer_len);
 955			if (ret < 0)
 956				break;
 957			/* Success. Update write pointer */
 958			scif_rb_commit(&qp->outbound_q);
 959			/*
 960			 * Send a notification to the peer about the
 961			 * produced data message.
 962			 */
 963			notif_msg.src = ep->port;
 964			notif_msg.uop = SCIF_CLIENT_SENT;
 965			notif_msg.payload[0] = ep->remote_ep;
 966			ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
 967			if (ret)
 968				break;
 969			sent_len += curr_xfer_len;
 970			msg = msg + curr_xfer_len;
 971			continue;
 972		}
 973		curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
 974		/* Not enough RB space. return for the Non Blocking case */
 975		if (!(flags & SCIF_SEND_BLOCK))
 976			break;
 977
 978		spin_unlock(&ep->lock);
 979		/* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
 980		ret =
 981		wait_event_interruptible(ep->sendwq,
 982					 (SCIFEP_CONNECTED != ep->state) ||
 983					 (scif_rb_space(&qp->outbound_q) >=
 984					 curr_xfer_len));
 985		spin_lock(&ep->lock);
 986		if (ret)
 987			break;
 988	}
 989	if (sent_len)
 990		ret = sent_len;
 991	else if (!ret && SCIFEP_CONNECTED != ep->state)
 992		ret = SCIFEP_DISCONNECTED == ep->state ?
 993			-ECONNRESET : -ENOTCONN;
 994	spin_unlock(&ep->lock);
 995	return ret;
 996}
 997
 998static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
 999{
1000	struct scif_endpt *ep = (struct scif_endpt *)epd;
1001	struct scifmsg notif_msg;
1002	int curr_recv_len = 0, remaining_len = len, read_count;
1003	int ret = 0;
1004	struct scif_qp *qp = ep->qp_info.qp;
1005
1006	if (flags & SCIF_RECV_BLOCK)
1007		might_sleep();
1008	spin_lock(&ep->lock);
1009	while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
1010				 SCIFEP_DISCONNECTED == ep->state)) {
1011		read_count = scif_rb_count(&qp->inbound_q, remaining_len);
1012		if (read_count) {
1013			/*
1014			 * Best effort to recv as much data as there
1015			 * are bytes to read in the RB particularly
1016			 * important for the Non Blocking case.
1017			 */
1018			curr_recv_len = min(remaining_len, read_count);
1019			scif_rb_get_next(&qp->inbound_q, msg, curr_recv_len);
1020			if (ep->state == SCIFEP_CONNECTED) {
1021				/*
1022				 * Update the read pointer only if the endpoint
1023				 * is still connected else the read pointer
1024				 * might no longer exist since the peer has
1025				 * freed resources!
1026				 */
1027				scif_rb_update_read_ptr(&qp->inbound_q);
1028				/*
1029				 * Send a notification to the peer about the
1030				 * consumed data message only if the EP is in
1031				 * SCIFEP_CONNECTED state.
1032				 */
1033				notif_msg.src = ep->port;
1034				notif_msg.uop = SCIF_CLIENT_RCVD;
1035				notif_msg.payload[0] = ep->remote_ep;
1036				ret = _scif_nodeqp_send(ep->remote_dev,
1037							&notif_msg);
1038				if (ret)
1039					break;
1040			}
1041			remaining_len -= curr_recv_len;
1042			msg = msg + curr_recv_len;
1043			continue;
1044		}
1045		/*
1046		 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1047		 * we will keep looping forever.
1048		 */
1049		if (ep->state == SCIFEP_DISCONNECTED)
1050			break;
1051		/*
1052		 * Return in the Non Blocking case if there is no data
1053		 * to read in this iteration.
1054		 */
1055		if (!(flags & SCIF_RECV_BLOCK))
1056			break;
1057		curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
1058		spin_unlock(&ep->lock);
1059		/*
1060		 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1061		 * or until other side disconnects.
1062		 */
1063		ret =
1064		wait_event_interruptible(ep->recvwq,
1065					 SCIFEP_CONNECTED != ep->state ||
1066					 scif_rb_count(&qp->inbound_q,
1067						       curr_recv_len)
1068					 >= curr_recv_len);
1069		spin_lock(&ep->lock);
1070		if (ret)
1071			break;
1072	}
1073	if (len - remaining_len)
1074		ret = len - remaining_len;
1075	else if (!ret && ep->state != SCIFEP_CONNECTED)
1076		ret = ep->state == SCIFEP_DISCONNECTED ?
1077			-ECONNRESET : -ENOTCONN;
1078	spin_unlock(&ep->lock);
1079	return ret;
1080}
1081
1082/**
1083 * scif_user_send() - Send data to connection queue
1084 * @epd: The end point returned from scif_open()
1085 * @msg: Address to place data
1086 * @len: Length to receive
1087 * @flags: blocking or non blocking
1088 *
1089 * This function is called from the driver IOCTL entry point
1090 * only and is a wrapper for _scif_send().
1091 */
1092int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
1093{
1094	struct scif_endpt *ep = (struct scif_endpt *)epd;
1095	int err = 0;
1096	int sent_len = 0;
1097	char *tmp;
1098	int loop_len;
1099	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1100
1101	dev_dbg(scif_info.mdev.this_device,
1102		"SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1103	if (!len)
1104		return 0;
1105
1106	err = scif_msg_param_check(epd, len, flags);
1107	if (err)
1108		goto send_err;
1109
1110	tmp = kmalloc(chunk_len, GFP_KERNEL);
1111	if (!tmp) {
1112		err = -ENOMEM;
1113		goto send_err;
1114	}
1115	/*
1116	 * Grabbing the lock before breaking up the transfer in
1117	 * multiple chunks is required to ensure that messages do
1118	 * not get fragmented and reordered.
1119	 */
1120	mutex_lock(&ep->sendlock);
1121	while (sent_len != len) {
1122		loop_len = len - sent_len;
1123		loop_len = min(chunk_len, loop_len);
1124		if (copy_from_user(tmp, msg, loop_len)) {
1125			err = -EFAULT;
1126			goto send_free_err;
1127		}
1128		err = _scif_send(epd, tmp, loop_len, flags);
1129		if (err < 0)
1130			goto send_free_err;
1131		sent_len += err;
1132		msg += err;
1133		if (err != loop_len)
1134			goto send_free_err;
1135	}
1136send_free_err:
1137	mutex_unlock(&ep->sendlock);
1138	kfree(tmp);
1139send_err:
1140	return err < 0 ? err : sent_len;
1141}
1142
1143/**
1144 * scif_user_recv() - Receive data from connection queue
1145 * @epd: The end point returned from scif_open()
1146 * @msg: Address to place data
1147 * @len: Length to receive
1148 * @flags: blocking or non blocking
1149 *
1150 * This function is called from the driver IOCTL entry point
1151 * only and is a wrapper for _scif_recv().
1152 */
1153int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
1154{
1155	struct scif_endpt *ep = (struct scif_endpt *)epd;
1156	int err = 0;
1157	int recv_len = 0;
1158	char *tmp;
1159	int loop_len;
1160	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1161
1162	dev_dbg(scif_info.mdev.this_device,
1163		"SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1164	if (!len)
1165		return 0;
1166
1167	err = scif_msg_param_check(epd, len, flags);
1168	if (err)
1169		goto recv_err;
1170
1171	tmp = kmalloc(chunk_len, GFP_KERNEL);
1172	if (!tmp) {
1173		err = -ENOMEM;
1174		goto recv_err;
1175	}
1176	/*
1177	 * Grabbing the lock before breaking up the transfer in
1178	 * multiple chunks is required to ensure that messages do
1179	 * not get fragmented and reordered.
1180	 */
1181	mutex_lock(&ep->recvlock);
1182	while (recv_len != len) {
1183		loop_len = len - recv_len;
1184		loop_len = min(chunk_len, loop_len);
1185		err = _scif_recv(epd, tmp, loop_len, flags);
1186		if (err < 0)
1187			goto recv_free_err;
1188		if (copy_to_user(msg, tmp, err)) {
1189			err = -EFAULT;
1190			goto recv_free_err;
1191		}
1192		recv_len += err;
1193		msg += err;
1194		if (err != loop_len)
1195			goto recv_free_err;
1196	}
1197recv_free_err:
1198	mutex_unlock(&ep->recvlock);
1199	kfree(tmp);
1200recv_err:
1201	return err < 0 ? err : recv_len;
1202}
1203
1204/**
1205 * scif_send() - Send data to connection queue
1206 * @epd: The end point returned from scif_open()
1207 * @msg: Address to place data
1208 * @len: Length to receive
1209 * @flags: blocking or non blocking
1210 *
1211 * This function is called from the kernel mode only and is
1212 * a wrapper for _scif_send().
1213 */
1214int scif_send(scif_epd_t epd, void *msg, int len, int flags)
1215{
1216	struct scif_endpt *ep = (struct scif_endpt *)epd;
1217	int ret;
1218
1219	dev_dbg(scif_info.mdev.this_device,
1220		"SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1221	if (!len)
1222		return 0;
1223
1224	ret = scif_msg_param_check(epd, len, flags);
1225	if (ret)
1226		return ret;
1227	if (!ep->remote_dev)
1228		return -ENOTCONN;
1229	/*
1230	 * Grab the mutex lock in the blocking case only
1231	 * to ensure messages do not get fragmented/reordered.
1232	 * The non blocking mode is protected using spin locks
1233	 * in _scif_send().
1234	 */
1235	if (flags & SCIF_SEND_BLOCK)
1236		mutex_lock(&ep->sendlock);
1237
1238	ret = _scif_send(epd, msg, len, flags);
1239
1240	if (flags & SCIF_SEND_BLOCK)
1241		mutex_unlock(&ep->sendlock);
1242	return ret;
1243}
1244EXPORT_SYMBOL_GPL(scif_send);
1245
1246/**
1247 * scif_recv() - Receive data from connection queue
1248 * @epd: The end point returned from scif_open()
1249 * @msg: Address to place data
1250 * @len: Length to receive
1251 * @flags: blocking or non blocking
1252 *
1253 * This function is called from the kernel mode only and is
1254 * a wrapper for _scif_recv().
1255 */
1256int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1257{
1258	struct scif_endpt *ep = (struct scif_endpt *)epd;
1259	int ret;
1260
1261	dev_dbg(scif_info.mdev.this_device,
1262		"SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1263	if (!len)
1264		return 0;
1265
1266	ret = scif_msg_param_check(epd, len, flags);
1267	if (ret)
1268		return ret;
1269	/*
1270	 * Grab the mutex lock in the blocking case only
1271	 * to ensure messages do not get fragmented/reordered.
1272	 * The non blocking mode is protected using spin locks
1273	 * in _scif_send().
1274	 */
1275	if (flags & SCIF_RECV_BLOCK)
1276		mutex_lock(&ep->recvlock);
1277
1278	ret = _scif_recv(epd, msg, len, flags);
1279
1280	if (flags & SCIF_RECV_BLOCK)
1281		mutex_unlock(&ep->recvlock);
1282
1283	return ret;
1284}
1285EXPORT_SYMBOL_GPL(scif_recv);
1286
1287static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
1288				   poll_table *p, struct scif_endpt *ep)
1289{
1290	/*
1291	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1292	 * and regrab it afterwards. Because the endpoint state might have
1293	 * changed while the lock was given up, the state must be checked
1294	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
1295	 * does this.
1296	 */
1297	spin_unlock(&ep->lock);
1298	poll_wait(f, wq, p);
1299	spin_lock(&ep->lock);
1300}
1301
1302__poll_t
1303__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
1304{
1305	__poll_t mask = 0;
1306
1307	dev_dbg(scif_info.mdev.this_device,
1308		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
1309
1310	spin_lock(&ep->lock);
1311
1312	/* Endpoint is waiting for a non-blocking connect to complete */
1313	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1314		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
1315		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1316			if (ep->state == SCIFEP_CONNECTED ||
1317			    ep->state == SCIFEP_DISCONNECTED ||
1318			    ep->conn_err)
1319				mask |= EPOLLOUT;
1320			goto exit;
1321		}
1322	}
1323
1324	/* Endpoint is listening for incoming connection requests */
1325	if (ep->state == SCIFEP_LISTENING) {
1326		_scif_poll_wait(f, &ep->conwq, wait, ep);
1327		if (ep->state == SCIFEP_LISTENING) {
1328			if (ep->conreqcnt)
1329				mask |= EPOLLIN;
1330			goto exit;
1331		}
1332	}
1333
1334	/* Endpoint is connected or disconnected */
1335	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
1336		if (poll_requested_events(wait) & EPOLLIN)
1337			_scif_poll_wait(f, &ep->recvwq, wait, ep);
1338		if (poll_requested_events(wait) & EPOLLOUT)
1339			_scif_poll_wait(f, &ep->sendwq, wait, ep);
1340		if (ep->state == SCIFEP_CONNECTED ||
1341		    ep->state == SCIFEP_DISCONNECTED) {
1342			/* Data can be read without blocking */
1343			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
1344				mask |= EPOLLIN;
1345			/* Data can be written without blocking */
1346			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
1347				mask |= EPOLLOUT;
1348			/* Return EPOLLHUP if endpoint is disconnected */
1349			if (ep->state == SCIFEP_DISCONNECTED)
1350				mask |= EPOLLHUP;
1351			goto exit;
1352		}
1353	}
1354
1355	/* Return EPOLLERR if the endpoint is in none of the above states */
1356	mask |= EPOLLERR;
1357exit:
1358	spin_unlock(&ep->lock);
1359	return mask;
1360}
1361
1362/**
1363 * scif_poll() - Kernel mode SCIF poll
1364 * @ufds: Array of scif_pollepd structures containing the end points
1365 *	  and events to poll on
1366 * @nfds: Size of the ufds array
1367 * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1368 *
1369 * The code flow in this function is based on do_poll(..) in select.c
1370 *
1371 * Returns the number of endpoints which have pending events or 0 in
1372 * the event of a timeout. If a signal is used for wake up, -EINTR is
1373 * returned.
1374 */
1375int
1376scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
1377{
1378	struct poll_wqueues table;
1379	poll_table *pt;
1380	int i, count = 0, timed_out = timeout_msecs == 0;
1381	__poll_t mask;
1382	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
1383		: msecs_to_jiffies(timeout_msecs);
1384
1385	poll_initwait(&table);
1386	pt = &table.pt;
1387	while (1) {
1388		for (i = 0; i < nfds; i++) {
1389			pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP;
1390			mask = __scif_pollfd(ufds[i].epd->anon,
1391					     pt, ufds[i].epd);
1392			mask &= ufds[i].events | EPOLLERR | EPOLLHUP;
1393			if (mask) {
1394				count++;
1395				pt->_qproc = NULL;
1396			}
1397			ufds[i].revents = mask;
1398		}
1399		pt->_qproc = NULL;
1400		if (!count) {
1401			count = table.error;
1402			if (signal_pending(current))
1403				count = -EINTR;
1404		}
1405		if (count || timed_out)
1406			break;
1407
1408		if (!schedule_timeout_interruptible(timeout))
1409			timed_out = 1;
1410	}
1411	poll_freewait(&table);
1412	return count;
1413}
1414EXPORT_SYMBOL_GPL(scif_poll);
1415
1416int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1417{
1418	int online = 0;
1419	int offset = 0;
1420	int node;
1421
1422	if (!scif_is_mgmt_node())
1423		scif_get_node_info();
1424
1425	*self = scif_info.nodeid;
1426	mutex_lock(&scif_info.conflock);
1427	len = min_t(int, len, scif_info.total);
1428	for (node = 0; node <= scif_info.maxid; node++) {
1429		if (_scifdev_alive(&scif_dev[node])) {
1430			online++;
1431			if (offset < len)
1432				nodes[offset++] = node;
1433		}
1434	}
1435	dev_dbg(scif_info.mdev.this_device,
1436		"SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1437		scif_info.total, online, offset);
1438	mutex_unlock(&scif_info.conflock);
1439
1440	return online;
1441}
1442EXPORT_SYMBOL_GPL(scif_get_node_ids);
1443
1444static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
1445{
1446	struct scif_client *client =
1447		container_of(si, struct scif_client, si);
1448	struct scif_peer_dev *spdev =
1449		container_of(dev, struct scif_peer_dev, dev);
1450
1451	if (client->probe)
1452		client->probe(spdev);
1453	return 0;
1454}
1455
1456static void scif_remove_client_dev(struct device *dev,
1457				   struct subsys_interface *si)
1458{
1459	struct scif_client *client =
1460		container_of(si, struct scif_client, si);
1461	struct scif_peer_dev *spdev =
1462		container_of(dev, struct scif_peer_dev, dev);
1463
1464	if (client->remove)
1465		client->remove(spdev);
1466}
1467
1468void scif_client_unregister(struct scif_client *client)
1469{
1470	subsys_interface_unregister(&client->si);
1471}
1472EXPORT_SYMBOL_GPL(scif_client_unregister);
1473
1474int scif_client_register(struct scif_client *client)
1475{
1476	struct subsys_interface *si = &client->si;
1477
1478	si->name = client->name;
1479	si->subsys = &scif_peer_bus;
1480	si->add_dev = scif_add_client_dev;
1481	si->remove_dev = scif_remove_client_dev;
1482
1483	return subsys_interface_register(&client->si);
1484}
1485EXPORT_SYMBOL_GPL(scif_client_register);