Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Intel MIC Platform Software Stack (MPSS)
   3 *
   4 * Copyright(c) 2014 Intel Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License, version 2, as
   8 * published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 * General Public License for more details.
  14 *
  15 * Intel SCIF driver.
  16 *
  17 */
  18#include <linux/scif.h>
  19#include "scif_main.h"
  20#include "scif_map.h"
  21
  22static const char * const scif_ep_states[] = {
  23	"Unbound",
  24	"Bound",
  25	"Listening",
  26	"Connected",
  27	"Connecting",
  28	"Mapping",
  29	"Closing",
  30	"Close Listening",
  31	"Disconnected",
  32	"Zombie"};
  33
  34enum conn_async_state {
  35	ASYNC_CONN_IDLE = 1,	/* ep setup for async connect */
  36	ASYNC_CONN_INPROGRESS,	/* async connect in progress */
  37	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
  38};
  39
  40/*
  41 * File operations for anonymous inode file associated with a SCIF endpoint,
  42 * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
  43 * poll API in the kernel and these take in a struct file *. Since a struct
  44 * file is not available to kernel mode SCIF, it uses an anonymous file for
  45 * this purpose.
  46 */
  47const struct file_operations scif_anon_fops = {
  48	.owner = THIS_MODULE,
  49};
  50
  51scif_epd_t scif_open(void)
  52{
  53	struct scif_endpt *ep;
  54	int err;
  55
  56	might_sleep();
  57	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
  58	if (!ep)
  59		goto err_ep_alloc;
  60
  61	ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
  62	if (!ep->qp_info.qp)
  63		goto err_qp_alloc;
  64
  65	err = scif_anon_inode_getfile(ep);
  66	if (err)
  67		goto err_anon_inode;
  68
  69	spin_lock_init(&ep->lock);
  70	mutex_init(&ep->sendlock);
  71	mutex_init(&ep->recvlock);
  72
  73	scif_rma_ep_init(ep);
  74	ep->state = SCIFEP_UNBOUND;
  75	dev_dbg(scif_info.mdev.this_device,
  76		"SCIFAPI open: ep %p success\n", ep);
  77	return ep;
  78
  79err_anon_inode:
  80	kfree(ep->qp_info.qp);
  81err_qp_alloc:
  82	kfree(ep);
  83err_ep_alloc:
  84	return NULL;
  85}
  86EXPORT_SYMBOL_GPL(scif_open);
  87
  88/*
  89 * scif_disconnect_ep - Disconnects the endpoint if found
  90 * @epd: The end point returned from scif_open()
  91 */
  92static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
  93{
  94	struct scifmsg msg;
  95	struct scif_endpt *fep = NULL;
  96	struct scif_endpt *tmpep;
  97	struct list_head *pos, *tmpq;
  98	int err;
  99
 100	/*
 101	 * Wake up any threads blocked in send()/recv() before closing
 102	 * out the connection. Grabbing and releasing the send/recv lock
 103	 * will ensure that any blocked senders/receivers have exited for
 104	 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
 105	 * close. Ring 3 endpoints are not affected since close will not
 106	 * be called while there are IOCTLs executing.
 107	 */
 108	wake_up_interruptible(&ep->sendwq);
 109	wake_up_interruptible(&ep->recvwq);
 110	mutex_lock(&ep->sendlock);
 111	mutex_unlock(&ep->sendlock);
 112	mutex_lock(&ep->recvlock);
 113	mutex_unlock(&ep->recvlock);
 114
 115	/* Remove from the connected list */
 116	mutex_lock(&scif_info.connlock);
 117	list_for_each_safe(pos, tmpq, &scif_info.connected) {
 118		tmpep = list_entry(pos, struct scif_endpt, list);
 119		if (tmpep == ep) {
 120			list_del(pos);
 121			fep = tmpep;
 122			spin_lock(&ep->lock);
 123			break;
 124		}
 125	}
 126
 127	if (!fep) {
 128		/*
 129		 * The other side has completed the disconnect before
 130		 * the end point can be removed from the list. Therefore
 131		 * the ep lock is not locked, traverse the disconnected
 132		 * list to find the endpoint and release the conn lock.
 133		 */
 134		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 135			tmpep = list_entry(pos, struct scif_endpt, list);
 136			if (tmpep == ep) {
 137				list_del(pos);
 138				break;
 139			}
 140		}
 141		mutex_unlock(&scif_info.connlock);
 142		return NULL;
 143	}
 144
 145	init_completion(&ep->discon);
 146	msg.uop = SCIF_DISCNCT;
 147	msg.src = ep->port;
 148	msg.dst = ep->peer;
 149	msg.payload[0] = (u64)ep;
 150	msg.payload[1] = ep->remote_ep;
 151
 152	err = scif_nodeqp_send(ep->remote_dev, &msg);
 153	spin_unlock(&ep->lock);
 154	mutex_unlock(&scif_info.connlock);
 155
 156	if (!err)
 157		/* Wait for the remote node to respond with SCIF_DISCNT_ACK */
 158		wait_for_completion_timeout(&ep->discon,
 159					    SCIF_NODE_ALIVE_TIMEOUT);
 160	return ep;
 161}
 162
 163int scif_close(scif_epd_t epd)
 164{
 165	struct scif_endpt *ep = (struct scif_endpt *)epd;
 166	struct scif_endpt *tmpep;
 167	struct list_head *pos, *tmpq;
 168	enum scif_epd_state oldstate;
 169	bool flush_conn;
 170
 171	dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
 172		ep, scif_ep_states[ep->state]);
 173	might_sleep();
 174	spin_lock(&ep->lock);
 175	flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
 176	spin_unlock(&ep->lock);
 177
 178	if (flush_conn)
 179		flush_work(&scif_info.conn_work);
 180
 181	spin_lock(&ep->lock);
 182	oldstate = ep->state;
 183
 184	ep->state = SCIFEP_CLOSING;
 185
 186	switch (oldstate) {
 187	case SCIFEP_ZOMBIE:
 188		dev_err(scif_info.mdev.this_device,
 189			"SCIFAPI close: zombie state unexpected\n");
 190	case SCIFEP_DISCONNECTED:
 191		spin_unlock(&ep->lock);
 192		scif_unregister_all_windows(epd);
 193		/* Remove from the disconnected list */
 194		mutex_lock(&scif_info.connlock);
 195		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 196			tmpep = list_entry(pos, struct scif_endpt, list);
 197			if (tmpep == ep) {
 198				list_del(pos);
 199				break;
 200			}
 201		}
 202		mutex_unlock(&scif_info.connlock);
 203		break;
 204	case SCIFEP_UNBOUND:
 205	case SCIFEP_BOUND:
 206	case SCIFEP_CONNECTING:
 207		spin_unlock(&ep->lock);
 208		break;
 209	case SCIFEP_MAPPING:
 210	case SCIFEP_CONNECTED:
 211	case SCIFEP_CLOSING:
 212	{
 213		spin_unlock(&ep->lock);
 214		scif_unregister_all_windows(epd);
 215		scif_disconnect_ep(ep);
 216		break;
 217	}
 218	case SCIFEP_LISTENING:
 219	case SCIFEP_CLLISTEN:
 220	{
 221		struct scif_conreq *conreq;
 222		struct scifmsg msg;
 223		struct scif_endpt *aep;
 224
 225		spin_unlock(&ep->lock);
 226		mutex_lock(&scif_info.eplock);
 227
 228		/* remove from listen list */
 229		list_for_each_safe(pos, tmpq, &scif_info.listen) {
 230			tmpep = list_entry(pos, struct scif_endpt, list);
 231			if (tmpep == ep)
 232				list_del(pos);
 233		}
 234		/* Remove any dangling accepts */
 235		while (ep->acceptcnt) {
 236			aep = list_first_entry(&ep->li_accept,
 237					       struct scif_endpt, liacceptlist);
 238			list_del(&aep->liacceptlist);
 239			scif_put_port(aep->port.port);
 240			list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
 241				tmpep = list_entry(pos, struct scif_endpt,
 242						   miacceptlist);
 243				if (tmpep == aep) {
 244					list_del(pos);
 245					break;
 246				}
 247			}
 248			mutex_unlock(&scif_info.eplock);
 249			mutex_lock(&scif_info.connlock);
 250			list_for_each_safe(pos, tmpq, &scif_info.connected) {
 251				tmpep = list_entry(pos,
 252						   struct scif_endpt, list);
 253				if (tmpep == aep) {
 254					list_del(pos);
 255					break;
 256				}
 257			}
 258			list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 259				tmpep = list_entry(pos,
 260						   struct scif_endpt, list);
 261				if (tmpep == aep) {
 262					list_del(pos);
 263					break;
 264				}
 265			}
 266			mutex_unlock(&scif_info.connlock);
 267			scif_teardown_ep(aep);
 268			mutex_lock(&scif_info.eplock);
 269			scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
 270			ep->acceptcnt--;
 271		}
 272
 273		spin_lock(&ep->lock);
 274		mutex_unlock(&scif_info.eplock);
 275
 276		/* Remove and reject any pending connection requests. */
 277		while (ep->conreqcnt) {
 278			conreq = list_first_entry(&ep->conlist,
 279						  struct scif_conreq, list);
 280			list_del(&conreq->list);
 281
 282			msg.uop = SCIF_CNCT_REJ;
 283			msg.dst.node = conreq->msg.src.node;
 284			msg.dst.port = conreq->msg.src.port;
 285			msg.payload[0] = conreq->msg.payload[0];
 286			msg.payload[1] = conreq->msg.payload[1];
 287			/*
 288			 * No Error Handling on purpose for scif_nodeqp_send().
 289			 * If the remote node is lost we still want free the
 290			 * connection requests on the self node.
 291			 */
 292			scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
 293					 &msg);
 294			ep->conreqcnt--;
 295			kfree(conreq);
 296		}
 297
 298		spin_unlock(&ep->lock);
 299		/* If a kSCIF accept is waiting wake it up */
 300		wake_up_interruptible(&ep->conwq);
 301		break;
 302	}
 303	}
 304	scif_put_port(ep->port.port);
 305	scif_anon_inode_fput(ep);
 306	scif_teardown_ep(ep);
 307	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
 308	return 0;
 309}
 310EXPORT_SYMBOL_GPL(scif_close);
 311
 312/**
 313 * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
 314 *			accept new connections.
 315 * @epd: The end point returned from scif_open()
 316 */
 317int __scif_flush(scif_epd_t epd)
 318{
 319	struct scif_endpt *ep = (struct scif_endpt *)epd;
 320
 321	switch (ep->state) {
 322	case SCIFEP_LISTENING:
 323	{
 324		ep->state = SCIFEP_CLLISTEN;
 325
 326		/* If an accept is waiting wake it up */
 327		wake_up_interruptible(&ep->conwq);
 328		break;
 329	}
 330	default:
 331		break;
 332	}
 333	return 0;
 334}
 335
 336int scif_bind(scif_epd_t epd, u16 pn)
 337{
 338	struct scif_endpt *ep = (struct scif_endpt *)epd;
 339	int ret = 0;
 340	int tmp;
 341
 342	dev_dbg(scif_info.mdev.this_device,
 343		"SCIFAPI bind: ep %p %s requested port number %d\n",
 344		ep, scif_ep_states[ep->state], pn);
 345	if (pn) {
 346		/*
 347		 * Similar to IETF RFC 1700, SCIF ports below
 348		 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
 349		 * processes or by processes executed by privileged users.
 350		 */
 351		if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
 352			ret = -EACCES;
 353			goto scif_bind_admin_exit;
 354		}
 355	}
 356
 357	spin_lock(&ep->lock);
 358	if (ep->state == SCIFEP_BOUND) {
 359		ret = -EINVAL;
 360		goto scif_bind_exit;
 361	} else if (ep->state != SCIFEP_UNBOUND) {
 362		ret = -EISCONN;
 363		goto scif_bind_exit;
 364	}
 365
 366	if (pn) {
 367		tmp = scif_rsrv_port(pn);
 368		if (tmp != pn) {
 369			ret = -EINVAL;
 370			goto scif_bind_exit;
 371		}
 372	} else {
 373		pn = scif_get_new_port();
 374		if (!pn) {
 375			ret = -ENOSPC;
 376			goto scif_bind_exit;
 377		}
 378	}
 379
 380	ep->state = SCIFEP_BOUND;
 381	ep->port.node = scif_info.nodeid;
 382	ep->port.port = pn;
 383	ep->conn_async_state = ASYNC_CONN_IDLE;
 384	ret = pn;
 385	dev_dbg(scif_info.mdev.this_device,
 386		"SCIFAPI bind: bound to port number %d\n", pn);
 387scif_bind_exit:
 388	spin_unlock(&ep->lock);
 389scif_bind_admin_exit:
 390	return ret;
 391}
 392EXPORT_SYMBOL_GPL(scif_bind);
 393
 394int scif_listen(scif_epd_t epd, int backlog)
 395{
 396	struct scif_endpt *ep = (struct scif_endpt *)epd;
 397
 398	dev_dbg(scif_info.mdev.this_device,
 399		"SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
 400	spin_lock(&ep->lock);
 401	switch (ep->state) {
 402	case SCIFEP_ZOMBIE:
 403	case SCIFEP_CLOSING:
 404	case SCIFEP_CLLISTEN:
 405	case SCIFEP_UNBOUND:
 406	case SCIFEP_DISCONNECTED:
 407		spin_unlock(&ep->lock);
 408		return -EINVAL;
 409	case SCIFEP_LISTENING:
 410	case SCIFEP_CONNECTED:
 411	case SCIFEP_CONNECTING:
 412	case SCIFEP_MAPPING:
 413		spin_unlock(&ep->lock);
 414		return -EISCONN;
 415	case SCIFEP_BOUND:
 416		break;
 417	}
 418
 419	ep->state = SCIFEP_LISTENING;
 420	ep->backlog = backlog;
 421
 422	ep->conreqcnt = 0;
 423	ep->acceptcnt = 0;
 424	INIT_LIST_HEAD(&ep->conlist);
 425	init_waitqueue_head(&ep->conwq);
 426	INIT_LIST_HEAD(&ep->li_accept);
 427	spin_unlock(&ep->lock);
 428
 429	/*
 430	 * Listen status is complete so delete the qp information not needed
 431	 * on a listen before placing on the list of listening ep's
 432	 */
 433	scif_teardown_ep(ep);
 434	ep->qp_info.qp = NULL;
 435
 436	mutex_lock(&scif_info.eplock);
 437	list_add_tail(&ep->list, &scif_info.listen);
 438	mutex_unlock(&scif_info.eplock);
 439	return 0;
 440}
 441EXPORT_SYMBOL_GPL(scif_listen);
 442
 443/*
 444 ************************************************************************
 445 * SCIF connection flow:
 446 *
 447 * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
 448 *	connections via a SCIF_CNCT_REQ message
 449 * 2) A SCIF endpoint can initiate a SCIF connection by calling
 450 *	scif_connect(..) which calls scif_setup_qp_connect(..) which
 451 *	allocates the local qp for the endpoint ring buffer and then sends
 452 *	a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
 453 *	a SCIF_CNCT_REJ message
 454 * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
 455 *	wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
 456 *	message otherwise
 457 * 4) A thread blocked waiting for incoming connections allocates its local
 458 *	endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
 459 *	and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
 460 *	the node sends a SCIF_CNCT_REJ message
 461 * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
 462 *	connecting endpoint is woken up as part of handling
 463 *	scif_cnctgnt_resp(..) following which it maps the remote endpoints'
 464 *	QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
 465 *	success or a SCIF_CNCT_GNTNACK message on failure and completes
 466 *	the scif_connect(..) API
 467 * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
 468 *	in step 4 is woken up and completes the scif_accept(..) API
 469 * 7) The SCIF connection is now established between the two SCIF endpoints.
 470 */
 471static int scif_conn_func(struct scif_endpt *ep)
 472{
 473	int err = 0;
 474	struct scifmsg msg;
 475	struct device *spdev;
 476
 477	err = scif_reserve_dma_chan(ep);
 478	if (err) {
 479		dev_err(&ep->remote_dev->sdev->dev,
 480			"%s %d err %d\n", __func__, __LINE__, err);
 481		ep->state = SCIFEP_BOUND;
 482		goto connect_error_simple;
 483	}
 484	/* Initiate the first part of the endpoint QP setup */
 485	err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
 486				    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
 487	if (err) {
 488		dev_err(&ep->remote_dev->sdev->dev,
 489			"%s err %d qp_offset 0x%llx\n",
 490			__func__, err, ep->qp_info.qp_offset);
 491		ep->state = SCIFEP_BOUND;
 492		goto connect_error_simple;
 493	}
 494
 495	spdev = scif_get_peer_dev(ep->remote_dev);
 496	if (IS_ERR(spdev)) {
 497		err = PTR_ERR(spdev);
 498		goto cleanup_qp;
 499	}
 500	/* Format connect message and send it */
 501	msg.src = ep->port;
 502	msg.dst = ep->conn_port;
 503	msg.uop = SCIF_CNCT_REQ;
 504	msg.payload[0] = (u64)ep;
 505	msg.payload[1] = ep->qp_info.qp_offset;
 506	err = _scif_nodeqp_send(ep->remote_dev, &msg);
 507	if (err)
 508		goto connect_error_dec;
 509	scif_put_peer_dev(spdev);
 510	/*
 511	 * Wait for the remote node to respond with SCIF_CNCT_GNT or
 512	 * SCIF_CNCT_REJ message.
 513	 */
 514	err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
 515				 SCIF_NODE_ALIVE_TIMEOUT);
 516	if (!err) {
 517		dev_err(&ep->remote_dev->sdev->dev,
 518			"%s %d timeout\n", __func__, __LINE__);
 519		ep->state = SCIFEP_BOUND;
 520	}
 521	spdev = scif_get_peer_dev(ep->remote_dev);
 522	if (IS_ERR(spdev)) {
 523		err = PTR_ERR(spdev);
 524		goto cleanup_qp;
 525	}
 526	if (ep->state == SCIFEP_MAPPING) {
 527		err = scif_setup_qp_connect_response(ep->remote_dev,
 528						     ep->qp_info.qp,
 529						     ep->qp_info.gnt_pld);
 530		/*
 531		 * If the resource to map the queue are not available then
 532		 * we need to tell the other side to terminate the accept
 533		 */
 534		if (err) {
 535			dev_err(&ep->remote_dev->sdev->dev,
 536				"%s %d err %d\n", __func__, __LINE__, err);
 537			msg.uop = SCIF_CNCT_GNTNACK;
 538			msg.payload[0] = ep->remote_ep;
 539			_scif_nodeqp_send(ep->remote_dev, &msg);
 540			ep->state = SCIFEP_BOUND;
 541			goto connect_error_dec;
 542		}
 543
 544		msg.uop = SCIF_CNCT_GNTACK;
 545		msg.payload[0] = ep->remote_ep;
 546		err = _scif_nodeqp_send(ep->remote_dev, &msg);
 547		if (err) {
 548			ep->state = SCIFEP_BOUND;
 549			goto connect_error_dec;
 550		}
 551		ep->state = SCIFEP_CONNECTED;
 552		mutex_lock(&scif_info.connlock);
 553		list_add_tail(&ep->list, &scif_info.connected);
 554		mutex_unlock(&scif_info.connlock);
 555		dev_dbg(&ep->remote_dev->sdev->dev,
 556			"SCIFAPI connect: ep %p connected\n", ep);
 557	} else if (ep->state == SCIFEP_BOUND) {
 558		dev_dbg(&ep->remote_dev->sdev->dev,
 559			"SCIFAPI connect: ep %p connection refused\n", ep);
 560		err = -ECONNREFUSED;
 561		goto connect_error_dec;
 562	}
 563	scif_put_peer_dev(spdev);
 564	return err;
 565connect_error_dec:
 566	scif_put_peer_dev(spdev);
 567cleanup_qp:
 568	scif_cleanup_ep_qp(ep);
 569connect_error_simple:
 570	return err;
 571}
 572
 573/*
 574 * scif_conn_handler:
 575 *
 576 * Workqueue handler for servicing non-blocking SCIF connect
 577 *
 578 */
 579void scif_conn_handler(struct work_struct *work)
 580{
 581	struct scif_endpt *ep;
 582
 583	do {
 584		ep = NULL;
 585		spin_lock(&scif_info.nb_connect_lock);
 586		if (!list_empty(&scif_info.nb_connect_list)) {
 587			ep = list_first_entry(&scif_info.nb_connect_list,
 588					      struct scif_endpt, conn_list);
 589			list_del(&ep->conn_list);
 590		}
 591		spin_unlock(&scif_info.nb_connect_lock);
 592		if (ep) {
 593			ep->conn_err = scif_conn_func(ep);
 594			wake_up_interruptible(&ep->conn_pend_wq);
 595		}
 596	} while (ep);
 597}
 598
 599int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
 600{
 601	struct scif_endpt *ep = (struct scif_endpt *)epd;
 602	int err = 0;
 603	struct scif_dev *remote_dev;
 604	struct device *spdev;
 605
 606	dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
 607		scif_ep_states[ep->state]);
 608
 609	if (!scif_dev || dst->node > scif_info.maxid)
 610		return -ENODEV;
 611
 612	might_sleep();
 613
 614	remote_dev = &scif_dev[dst->node];
 615	spdev = scif_get_peer_dev(remote_dev);
 616	if (IS_ERR(spdev)) {
 617		err = PTR_ERR(spdev);
 618		return err;
 619	}
 620
 621	spin_lock(&ep->lock);
 622	switch (ep->state) {
 623	case SCIFEP_ZOMBIE:
 624	case SCIFEP_CLOSING:
 625		err = -EINVAL;
 626		break;
 627	case SCIFEP_DISCONNECTED:
 628		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 629			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 630		else
 631			err = -EINVAL;
 632		break;
 633	case SCIFEP_LISTENING:
 634	case SCIFEP_CLLISTEN:
 635		err = -EOPNOTSUPP;
 636		break;
 637	case SCIFEP_CONNECTING:
 638	case SCIFEP_MAPPING:
 639		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 640			err = -EINPROGRESS;
 641		else
 642			err = -EISCONN;
 643		break;
 644	case SCIFEP_CONNECTED:
 645		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 646			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 647		else
 648			err = -EISCONN;
 649		break;
 650	case SCIFEP_UNBOUND:
 651		ep->port.port = scif_get_new_port();
 652		if (!ep->port.port) {
 653			err = -ENOSPC;
 654		} else {
 655			ep->port.node = scif_info.nodeid;
 656			ep->conn_async_state = ASYNC_CONN_IDLE;
 657		}
 658		/* Fall through */
 659	case SCIFEP_BOUND:
 660		/*
 661		 * If a non-blocking connect has been already initiated
 662		 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
 663		 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
 664		 * SCIF_BOUND due an error in the connection process
 665		 * (e.g., connection refused) If conn_async_state is
 666		 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
 667		 * so that the error status can be collected. If the state is
 668		 * already ASYNC_CONN_FLUSH_WORK - then set the error to
 669		 * EINPROGRESS since some other thread is waiting to collect
 670		 * error status.
 671		 */
 672		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
 673			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 674		} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
 675			err = -EINPROGRESS;
 676		} else {
 677			ep->conn_port = *dst;
 678			init_waitqueue_head(&ep->sendwq);
 679			init_waitqueue_head(&ep->recvwq);
 680			init_waitqueue_head(&ep->conwq);
 681			ep->conn_async_state = 0;
 682
 683			if (unlikely(non_block))
 684				ep->conn_async_state = ASYNC_CONN_INPROGRESS;
 685		}
 686		break;
 687	}
 688
 689	if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
 690			goto connect_simple_unlock1;
 691
 692	ep->state = SCIFEP_CONNECTING;
 693	ep->remote_dev = &scif_dev[dst->node];
 694	ep->qp_info.qp->magic = SCIFEP_MAGIC;
 695	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
 696		init_waitqueue_head(&ep->conn_pend_wq);
 697		spin_lock(&scif_info.nb_connect_lock);
 698		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
 699		spin_unlock(&scif_info.nb_connect_lock);
 700		err = -EINPROGRESS;
 701		schedule_work(&scif_info.conn_work);
 702	}
 703connect_simple_unlock1:
 704	spin_unlock(&ep->lock);
 705	scif_put_peer_dev(spdev);
 706	if (err) {
 707		return err;
 708	} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
 709		flush_work(&scif_info.conn_work);
 710		err = ep->conn_err;
 711		spin_lock(&ep->lock);
 712		ep->conn_async_state = ASYNC_CONN_IDLE;
 713		spin_unlock(&ep->lock);
 714	} else {
 715		err = scif_conn_func(ep);
 716	}
 717	return err;
 718}
 719
 720int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
 721{
 722	return __scif_connect(epd, dst, false);
 723}
 724EXPORT_SYMBOL_GPL(scif_connect);
 725
 726/**
 727 * scif_accept() - Accept a connection request from the remote node
 728 *
 729 * The function accepts a connection request from the remote node.  Successful
 730 * complete is indicate by a new end point being created and passed back
 731 * to the caller for future reference.
 732 *
 733 * Upon successful complete a zero will be returned and the peer information
 734 * will be filled in.
 735 *
 736 * If the end point is not in the listening state -EINVAL will be returned.
 737 *
 738 * If during the connection sequence resource allocation fails the -ENOMEM
 739 * will be returned.
 740 *
 741 * If the function is called with the ASYNC flag set and no connection requests
 742 * are pending it will return -EAGAIN.
 743 *
 744 * If the remote side is not sending any connection requests the caller may
 745 * terminate this function with a signal.  If so a -EINTR will be returned.
 746 */
 747int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
 748		scif_epd_t *newepd, int flags)
 749{
 750	struct scif_endpt *lep = (struct scif_endpt *)epd;
 751	struct scif_endpt *cep;
 752	struct scif_conreq *conreq;
 753	struct scifmsg msg;
 754	int err;
 755	struct device *spdev;
 756
 757	dev_dbg(scif_info.mdev.this_device,
 758		"SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
 759
 760	if (flags & ~SCIF_ACCEPT_SYNC)
 761		return -EINVAL;
 762
 763	if (!peer || !newepd)
 764		return -EINVAL;
 765
 766	might_sleep();
 767	spin_lock(&lep->lock);
 768	if (lep->state != SCIFEP_LISTENING) {
 769		spin_unlock(&lep->lock);
 770		return -EINVAL;
 771	}
 772
 773	if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
 774		/* No connection request present and we do not want to wait */
 775		spin_unlock(&lep->lock);
 776		return -EAGAIN;
 777	}
 778
 779	lep->files = current->files;
 780retry_connection:
 781	spin_unlock(&lep->lock);
 782	/* Wait for the remote node to send us a SCIF_CNCT_REQ */
 783	err = wait_event_interruptible(lep->conwq,
 784				       (lep->conreqcnt ||
 785				       (lep->state != SCIFEP_LISTENING)));
 786	if (err)
 787		return err;
 788
 789	if (lep->state != SCIFEP_LISTENING)
 790		return -EINTR;
 791
 792	spin_lock(&lep->lock);
 793
 794	if (!lep->conreqcnt)
 795		goto retry_connection;
 796
 797	/* Get the first connect request off the list */
 798	conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
 799	list_del(&conreq->list);
 800	lep->conreqcnt--;
 801	spin_unlock(&lep->lock);
 802
 803	/* Fill in the peer information */
 804	peer->node = conreq->msg.src.node;
 805	peer->port = conreq->msg.src.port;
 806
 807	cep = kzalloc(sizeof(*cep), GFP_KERNEL);
 808	if (!cep) {
 809		err = -ENOMEM;
 810		goto scif_accept_error_epalloc;
 811	}
 812	spin_lock_init(&cep->lock);
 813	mutex_init(&cep->sendlock);
 814	mutex_init(&cep->recvlock);
 815	cep->state = SCIFEP_CONNECTING;
 816	cep->remote_dev = &scif_dev[peer->node];
 817	cep->remote_ep = conreq->msg.payload[0];
 818
 819	scif_rma_ep_init(cep);
 820
 821	err = scif_reserve_dma_chan(cep);
 822	if (err) {
 823		dev_err(scif_info.mdev.this_device,
 824			"%s %d err %d\n", __func__, __LINE__, err);
 825		goto scif_accept_error_qpalloc;
 826	}
 827
 828	cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
 829	if (!cep->qp_info.qp) {
 830		err = -ENOMEM;
 831		goto scif_accept_error_qpalloc;
 832	}
 833
 834	err = scif_anon_inode_getfile(cep);
 835	if (err)
 836		goto scif_accept_error_anon_inode;
 837
 838	cep->qp_info.qp->magic = SCIFEP_MAGIC;
 839	spdev = scif_get_peer_dev(cep->remote_dev);
 840	if (IS_ERR(spdev)) {
 841		err = PTR_ERR(spdev);
 842		goto scif_accept_error_map;
 843	}
 844	err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
 845				   conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
 846				   cep->remote_dev);
 847	if (err) {
 848		dev_dbg(&cep->remote_dev->sdev->dev,
 849			"SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
 850			lep, cep, err, cep->qp_info.qp_offset);
 851		scif_put_peer_dev(spdev);
 852		goto scif_accept_error_map;
 853	}
 854
 855	cep->port.node = lep->port.node;
 856	cep->port.port = lep->port.port;
 857	cep->peer.node = peer->node;
 858	cep->peer.port = peer->port;
 859	init_waitqueue_head(&cep->sendwq);
 860	init_waitqueue_head(&cep->recvwq);
 861	init_waitqueue_head(&cep->conwq);
 862
 863	msg.uop = SCIF_CNCT_GNT;
 864	msg.src = cep->port;
 865	msg.payload[0] = cep->remote_ep;
 866	msg.payload[1] = cep->qp_info.qp_offset;
 867	msg.payload[2] = (u64)cep;
 868
 869	err = _scif_nodeqp_send(cep->remote_dev, &msg);
 870	scif_put_peer_dev(spdev);
 871	if (err)
 872		goto scif_accept_error_map;
 873retry:
 874	/* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
 875	err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
 876				 SCIF_NODE_ACCEPT_TIMEOUT);
 877	if (!err && scifdev_alive(cep))
 878		goto retry;
 879	err = !err ? -ENODEV : 0;
 880	if (err)
 881		goto scif_accept_error_map;
 882	kfree(conreq);
 883
 884	spin_lock(&cep->lock);
 885
 886	if (cep->state == SCIFEP_CLOSING) {
 887		/*
 888		 * Remote failed to allocate resources and NAKed the grant.
 889		 * There is at this point nothing referencing the new end point.
 890		 */
 891		spin_unlock(&cep->lock);
 892		scif_teardown_ep(cep);
 893		kfree(cep);
 894
 895		/* If call with sync flag then go back and wait. */
 896		if (flags & SCIF_ACCEPT_SYNC) {
 897			spin_lock(&lep->lock);
 898			goto retry_connection;
 899		}
 900		return -EAGAIN;
 901	}
 902
 903	scif_get_port(cep->port.port);
 904	*newepd = (scif_epd_t)cep;
 905	spin_unlock(&cep->lock);
 906	return 0;
 907scif_accept_error_map:
 908	scif_anon_inode_fput(cep);
 909scif_accept_error_anon_inode:
 910	scif_teardown_ep(cep);
 911scif_accept_error_qpalloc:
 912	kfree(cep);
 913scif_accept_error_epalloc:
 914	msg.uop = SCIF_CNCT_REJ;
 915	msg.dst.node = conreq->msg.src.node;
 916	msg.dst.port = conreq->msg.src.port;
 917	msg.payload[0] = conreq->msg.payload[0];
 918	msg.payload[1] = conreq->msg.payload[1];
 919	scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
 920	kfree(conreq);
 921	return err;
 922}
 923EXPORT_SYMBOL_GPL(scif_accept);
 924
 925/*
 926 * scif_msg_param_check:
 927 * @epd: The end point returned from scif_open()
 928 * @len: Length to receive
 929 * @flags: blocking or non blocking
 930 *
 931 * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
 932 */
 933static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
 934{
 935	int ret = -EINVAL;
 936
 937	if (len < 0)
 938		goto err_ret;
 939	if (flags && (!(flags & SCIF_RECV_BLOCK)))
 940		goto err_ret;
 941	ret = 0;
 942err_ret:
 943	return ret;
 944}
 945
 946static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
 947{
 948	struct scif_endpt *ep = (struct scif_endpt *)epd;
 949	struct scifmsg notif_msg;
 950	int curr_xfer_len = 0, sent_len = 0, write_count;
 951	int ret = 0;
 952	struct scif_qp *qp = ep->qp_info.qp;
 953
 954	if (flags & SCIF_SEND_BLOCK)
 955		might_sleep();
 956
 957	spin_lock(&ep->lock);
 958	while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
 959		write_count = scif_rb_space(&qp->outbound_q);
 960		if (write_count) {
 961			/* Best effort to send as much data as possible */
 962			curr_xfer_len = min(len - sent_len, write_count);
 963			ret = scif_rb_write(&qp->outbound_q, msg,
 964					    curr_xfer_len);
 965			if (ret < 0)
 966				break;
 967			/* Success. Update write pointer */
 968			scif_rb_commit(&qp->outbound_q);
 969			/*
 970			 * Send a notification to the peer about the
 971			 * produced data message.
 972			 */
 973			notif_msg.src = ep->port;
 974			notif_msg.uop = SCIF_CLIENT_SENT;
 975			notif_msg.payload[0] = ep->remote_ep;
 976			ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
 977			if (ret)
 978				break;
 979			sent_len += curr_xfer_len;
 980			msg = msg + curr_xfer_len;
 981			continue;
 982		}
 983		curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
 984		/* Not enough RB space. return for the Non Blocking case */
 985		if (!(flags & SCIF_SEND_BLOCK))
 986			break;
 987
 988		spin_unlock(&ep->lock);
 989		/* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
 990		ret =
 991		wait_event_interruptible(ep->sendwq,
 992					 (SCIFEP_CONNECTED != ep->state) ||
 993					 (scif_rb_space(&qp->outbound_q) >=
 994					 curr_xfer_len));
 995		spin_lock(&ep->lock);
 996		if (ret)
 997			break;
 998	}
 999	if (sent_len)
1000		ret = sent_len;
1001	else if (!ret && SCIFEP_CONNECTED != ep->state)
1002		ret = SCIFEP_DISCONNECTED == ep->state ?
1003			-ECONNRESET : -ENOTCONN;
1004	spin_unlock(&ep->lock);
1005	return ret;
1006}
1007
1008static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1009{
1010	int read_size;
1011	struct scif_endpt *ep = (struct scif_endpt *)epd;
1012	struct scifmsg notif_msg;
1013	int curr_recv_len = 0, remaining_len = len, read_count;
1014	int ret = 0;
1015	struct scif_qp *qp = ep->qp_info.qp;
1016
1017	if (flags & SCIF_RECV_BLOCK)
1018		might_sleep();
1019	spin_lock(&ep->lock);
1020	while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
1021				 SCIFEP_DISCONNECTED == ep->state)) {
1022		read_count = scif_rb_count(&qp->inbound_q, remaining_len);
1023		if (read_count) {
1024			/*
1025			 * Best effort to recv as much data as there
1026			 * are bytes to read in the RB particularly
1027			 * important for the Non Blocking case.
1028			 */
1029			curr_recv_len = min(remaining_len, read_count);
1030			read_size = scif_rb_get_next(&qp->inbound_q,
1031						     msg, curr_recv_len);
1032			if (ep->state == SCIFEP_CONNECTED) {
1033				/*
1034				 * Update the read pointer only if the endpoint
1035				 * is still connected else the read pointer
1036				 * might no longer exist since the peer has
1037				 * freed resources!
1038				 */
1039				scif_rb_update_read_ptr(&qp->inbound_q);
1040				/*
1041				 * Send a notification to the peer about the
1042				 * consumed data message only if the EP is in
1043				 * SCIFEP_CONNECTED state.
1044				 */
1045				notif_msg.src = ep->port;
1046				notif_msg.uop = SCIF_CLIENT_RCVD;
1047				notif_msg.payload[0] = ep->remote_ep;
1048				ret = _scif_nodeqp_send(ep->remote_dev,
1049							&notif_msg);
1050				if (ret)
1051					break;
1052			}
1053			remaining_len -= curr_recv_len;
1054			msg = msg + curr_recv_len;
1055			continue;
1056		}
1057		/*
1058		 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1059		 * we will keep looping forever.
1060		 */
1061		if (ep->state == SCIFEP_DISCONNECTED)
1062			break;
1063		/*
1064		 * Return in the Non Blocking case if there is no data
1065		 * to read in this iteration.
1066		 */
1067		if (!(flags & SCIF_RECV_BLOCK))
1068			break;
1069		curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
1070		spin_unlock(&ep->lock);
1071		/*
1072		 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1073		 * or until other side disconnects.
1074		 */
1075		ret =
1076		wait_event_interruptible(ep->recvwq,
1077					 SCIFEP_CONNECTED != ep->state ||
1078					 scif_rb_count(&qp->inbound_q,
1079						       curr_recv_len)
1080					 >= curr_recv_len);
1081		spin_lock(&ep->lock);
1082		if (ret)
1083			break;
1084	}
1085	if (len - remaining_len)
1086		ret = len - remaining_len;
1087	else if (!ret && ep->state != SCIFEP_CONNECTED)
1088		ret = ep->state == SCIFEP_DISCONNECTED ?
1089			-ECONNRESET : -ENOTCONN;
1090	spin_unlock(&ep->lock);
1091	return ret;
1092}
1093
1094/**
1095 * scif_user_send() - Send data to connection queue
1096 * @epd: The end point returned from scif_open()
1097 * @msg: Address to place data
1098 * @len: Length to receive
1099 * @flags: blocking or non blocking
1100 *
1101 * This function is called from the driver IOCTL entry point
1102 * only and is a wrapper for _scif_send().
1103 */
1104int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
1105{
1106	struct scif_endpt *ep = (struct scif_endpt *)epd;
1107	int err = 0;
1108	int sent_len = 0;
1109	char *tmp;
1110	int loop_len;
1111	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1112
1113	dev_dbg(scif_info.mdev.this_device,
1114		"SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1115	if (!len)
1116		return 0;
1117
1118	err = scif_msg_param_check(epd, len, flags);
1119	if (err)
1120		goto send_err;
1121
1122	tmp = kmalloc(chunk_len, GFP_KERNEL);
1123	if (!tmp) {
1124		err = -ENOMEM;
1125		goto send_err;
1126	}
1127	/*
1128	 * Grabbing the lock before breaking up the transfer in
1129	 * multiple chunks is required to ensure that messages do
1130	 * not get fragmented and reordered.
1131	 */
1132	mutex_lock(&ep->sendlock);
1133	while (sent_len != len) {
1134		loop_len = len - sent_len;
1135		loop_len = min(chunk_len, loop_len);
1136		if (copy_from_user(tmp, msg, loop_len)) {
1137			err = -EFAULT;
1138			goto send_free_err;
1139		}
1140		err = _scif_send(epd, tmp, loop_len, flags);
1141		if (err < 0)
1142			goto send_free_err;
1143		sent_len += err;
1144		msg += err;
1145		if (err != loop_len)
1146			goto send_free_err;
1147	}
1148send_free_err:
1149	mutex_unlock(&ep->sendlock);
1150	kfree(tmp);
1151send_err:
1152	return err < 0 ? err : sent_len;
1153}
1154
1155/**
1156 * scif_user_recv() - Receive data from connection queue
1157 * @epd: The end point returned from scif_open()
1158 * @msg: Address to place data
1159 * @len: Length to receive
1160 * @flags: blocking or non blocking
1161 *
1162 * This function is called from the driver IOCTL entry point
1163 * only and is a wrapper for _scif_recv().
1164 */
1165int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
1166{
1167	struct scif_endpt *ep = (struct scif_endpt *)epd;
1168	int err = 0;
1169	int recv_len = 0;
1170	char *tmp;
1171	int loop_len;
1172	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1173
1174	dev_dbg(scif_info.mdev.this_device,
1175		"SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1176	if (!len)
1177		return 0;
1178
1179	err = scif_msg_param_check(epd, len, flags);
1180	if (err)
1181		goto recv_err;
1182
1183	tmp = kmalloc(chunk_len, GFP_KERNEL);
1184	if (!tmp) {
1185		err = -ENOMEM;
1186		goto recv_err;
1187	}
1188	/*
1189	 * Grabbing the lock before breaking up the transfer in
1190	 * multiple chunks is required to ensure that messages do
1191	 * not get fragmented and reordered.
1192	 */
1193	mutex_lock(&ep->recvlock);
1194	while (recv_len != len) {
1195		loop_len = len - recv_len;
1196		loop_len = min(chunk_len, loop_len);
1197		err = _scif_recv(epd, tmp, loop_len, flags);
1198		if (err < 0)
1199			goto recv_free_err;
1200		if (copy_to_user(msg, tmp, err)) {
1201			err = -EFAULT;
1202			goto recv_free_err;
1203		}
1204		recv_len += err;
1205		msg += err;
1206		if (err != loop_len)
1207			goto recv_free_err;
1208	}
1209recv_free_err:
1210	mutex_unlock(&ep->recvlock);
1211	kfree(tmp);
1212recv_err:
1213	return err < 0 ? err : recv_len;
1214}
1215
1216/**
1217 * scif_send() - Send data to connection queue
1218 * @epd: The end point returned from scif_open()
1219 * @msg: Address to place data
1220 * @len: Length to receive
1221 * @flags: blocking or non blocking
1222 *
1223 * This function is called from the kernel mode only and is
1224 * a wrapper for _scif_send().
1225 */
1226int scif_send(scif_epd_t epd, void *msg, int len, int flags)
1227{
1228	struct scif_endpt *ep = (struct scif_endpt *)epd;
1229	int ret;
1230
1231	dev_dbg(scif_info.mdev.this_device,
1232		"SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1233	if (!len)
1234		return 0;
1235
1236	ret = scif_msg_param_check(epd, len, flags);
1237	if (ret)
1238		return ret;
1239	if (!ep->remote_dev)
1240		return -ENOTCONN;
1241	/*
1242	 * Grab the mutex lock in the blocking case only
1243	 * to ensure messages do not get fragmented/reordered.
1244	 * The non blocking mode is protected using spin locks
1245	 * in _scif_send().
1246	 */
1247	if (flags & SCIF_SEND_BLOCK)
1248		mutex_lock(&ep->sendlock);
1249
1250	ret = _scif_send(epd, msg, len, flags);
1251
1252	if (flags & SCIF_SEND_BLOCK)
1253		mutex_unlock(&ep->sendlock);
1254	return ret;
1255}
1256EXPORT_SYMBOL_GPL(scif_send);
1257
1258/**
1259 * scif_recv() - Receive data from connection queue
1260 * @epd: The end point returned from scif_open()
1261 * @msg: Address to place data
1262 * @len: Length to receive
1263 * @flags: blocking or non blocking
1264 *
1265 * This function is called from the kernel mode only and is
1266 * a wrapper for _scif_recv().
1267 */
1268int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1269{
1270	struct scif_endpt *ep = (struct scif_endpt *)epd;
1271	int ret;
1272
1273	dev_dbg(scif_info.mdev.this_device,
1274		"SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1275	if (!len)
1276		return 0;
1277
1278	ret = scif_msg_param_check(epd, len, flags);
1279	if (ret)
1280		return ret;
1281	/*
1282	 * Grab the mutex lock in the blocking case only
1283	 * to ensure messages do not get fragmented/reordered.
1284	 * The non blocking mode is protected using spin locks
1285	 * in _scif_send().
1286	 */
1287	if (flags & SCIF_RECV_BLOCK)
1288		mutex_lock(&ep->recvlock);
1289
1290	ret = _scif_recv(epd, msg, len, flags);
1291
1292	if (flags & SCIF_RECV_BLOCK)
1293		mutex_unlock(&ep->recvlock);
1294
1295	return ret;
1296}
1297EXPORT_SYMBOL_GPL(scif_recv);
1298
1299static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
1300				   poll_table *p, struct scif_endpt *ep)
1301{
1302	/*
1303	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1304	 * and regrab it afterwards. Because the endpoint state might have
1305	 * changed while the lock was given up, the state must be checked
1306	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
1307	 * does this.
1308	 */
1309	spin_unlock(&ep->lock);
1310	poll_wait(f, wq, p);
1311	spin_lock(&ep->lock);
1312}
1313
1314unsigned int
1315__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
1316{
1317	unsigned int mask = 0;
1318
1319	dev_dbg(scif_info.mdev.this_device,
1320		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
1321
1322	spin_lock(&ep->lock);
1323
1324	/* Endpoint is waiting for a non-blocking connect to complete */
1325	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1326		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
1327		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1328			if (ep->state == SCIFEP_CONNECTED ||
1329			    ep->state == SCIFEP_DISCONNECTED ||
1330			    ep->conn_err)
1331				mask |= POLLOUT;
1332			goto exit;
1333		}
1334	}
1335
1336	/* Endpoint is listening for incoming connection requests */
1337	if (ep->state == SCIFEP_LISTENING) {
1338		_scif_poll_wait(f, &ep->conwq, wait, ep);
1339		if (ep->state == SCIFEP_LISTENING) {
1340			if (ep->conreqcnt)
1341				mask |= POLLIN;
1342			goto exit;
1343		}
1344	}
1345
1346	/* Endpoint is connected or disconnected */
1347	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
1348		if (poll_requested_events(wait) & POLLIN)
1349			_scif_poll_wait(f, &ep->recvwq, wait, ep);
1350		if (poll_requested_events(wait) & POLLOUT)
1351			_scif_poll_wait(f, &ep->sendwq, wait, ep);
1352		if (ep->state == SCIFEP_CONNECTED ||
1353		    ep->state == SCIFEP_DISCONNECTED) {
1354			/* Data can be read without blocking */
1355			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
1356				mask |= POLLIN;
1357			/* Data can be written without blocking */
1358			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
1359				mask |= POLLOUT;
1360			/* Return POLLHUP if endpoint is disconnected */
1361			if (ep->state == SCIFEP_DISCONNECTED)
1362				mask |= POLLHUP;
1363			goto exit;
1364		}
1365	}
1366
1367	/* Return POLLERR if the endpoint is in none of the above states */
1368	mask |= POLLERR;
1369exit:
1370	spin_unlock(&ep->lock);
1371	return mask;
1372}
1373
1374/**
1375 * scif_poll() - Kernel mode SCIF poll
1376 * @ufds: Array of scif_pollepd structures containing the end points
1377 *	  and events to poll on
1378 * @nfds: Size of the ufds array
1379 * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1380 *
1381 * The code flow in this function is based on do_poll(..) in select.c
1382 *
1383 * Returns the number of endpoints which have pending events or 0 in
1384 * the event of a timeout. If a signal is used for wake up, -EINTR is
1385 * returned.
1386 */
1387int
1388scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
1389{
1390	struct poll_wqueues table;
1391	poll_table *pt;
1392	int i, mask, count = 0, timed_out = timeout_msecs == 0;
1393	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
1394		: msecs_to_jiffies(timeout_msecs);
1395
1396	poll_initwait(&table);
1397	pt = &table.pt;
1398	while (1) {
1399		for (i = 0; i < nfds; i++) {
1400			pt->_key = ufds[i].events | POLLERR | POLLHUP;
1401			mask = __scif_pollfd(ufds[i].epd->anon,
1402					     pt, ufds[i].epd);
1403			mask &= ufds[i].events | POLLERR | POLLHUP;
1404			if (mask) {
1405				count++;
1406				pt->_qproc = NULL;
1407			}
1408			ufds[i].revents = mask;
1409		}
1410		pt->_qproc = NULL;
1411		if (!count) {
1412			count = table.error;
1413			if (signal_pending(current))
1414				count = -EINTR;
1415		}
1416		if (count || timed_out)
1417			break;
1418
1419		if (!schedule_timeout_interruptible(timeout))
1420			timed_out = 1;
1421	}
1422	poll_freewait(&table);
1423	return count;
1424}
1425EXPORT_SYMBOL_GPL(scif_poll);
1426
1427int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1428{
1429	int online = 0;
1430	int offset = 0;
1431	int node;
1432
1433	if (!scif_is_mgmt_node())
1434		scif_get_node_info();
1435
1436	*self = scif_info.nodeid;
1437	mutex_lock(&scif_info.conflock);
1438	len = min_t(int, len, scif_info.total);
1439	for (node = 0; node <= scif_info.maxid; node++) {
1440		if (_scifdev_alive(&scif_dev[node])) {
1441			online++;
1442			if (offset < len)
1443				nodes[offset++] = node;
1444		}
1445	}
1446	dev_dbg(scif_info.mdev.this_device,
1447		"SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1448		scif_info.total, online, offset);
1449	mutex_unlock(&scif_info.conflock);
1450
1451	return online;
1452}
1453EXPORT_SYMBOL_GPL(scif_get_node_ids);
1454
1455static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
1456{
1457	struct scif_client *client =
1458		container_of(si, struct scif_client, si);
1459	struct scif_peer_dev *spdev =
1460		container_of(dev, struct scif_peer_dev, dev);
1461
1462	if (client->probe)
1463		client->probe(spdev);
1464	return 0;
1465}
1466
1467static void scif_remove_client_dev(struct device *dev,
1468				   struct subsys_interface *si)
1469{
1470	struct scif_client *client =
1471		container_of(si, struct scif_client, si);
1472	struct scif_peer_dev *spdev =
1473		container_of(dev, struct scif_peer_dev, dev);
1474
1475	if (client->remove)
1476		client->remove(spdev);
1477}
1478
1479void scif_client_unregister(struct scif_client *client)
1480{
1481	subsys_interface_unregister(&client->si);
1482}
1483EXPORT_SYMBOL_GPL(scif_client_unregister);
1484
1485int scif_client_register(struct scif_client *client)
1486{
1487	struct subsys_interface *si = &client->si;
1488
1489	si->name = client->name;
1490	si->subsys = &scif_peer_bus;
1491	si->add_dev = scif_add_client_dev;
1492	si->remove_dev = scif_remove_client_dev;
1493
1494	return subsys_interface_register(&client->si);
1495}
1496EXPORT_SYMBOL_GPL(scif_client_register);