Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.4.
   1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
   2
   3/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
   4/*          Kai Shen <kaishen@linux.alibaba.com> */
   5/* Copyright (c) 2020-2022, Alibaba Group. */
   6
   7/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
   8/*          Fredy Neeser */
   9/*          Greg Joyce <greg@opengridcomputing.com> */
  10/* Copyright (c) 2008-2019, IBM Corporation */
  11/* Copyright (c) 2017, Open Grid Computing, Inc. */
  12
  13#include <linux/workqueue.h>
  14#include <trace/events/sock.h>
  15
  16#include "erdma.h"
  17#include "erdma_cm.h"
  18#include "erdma_verbs.h"
  19
  20static struct workqueue_struct *erdma_cm_wq;
  21
  22static void erdma_cm_llp_state_change(struct sock *sk);
  23static void erdma_cm_llp_data_ready(struct sock *sk);
  24static void erdma_cm_llp_error_report(struct sock *sk);
  25
  26static void erdma_sk_assign_cm_upcalls(struct sock *sk)
  27{
  28	write_lock_bh(&sk->sk_callback_lock);
  29	sk->sk_state_change = erdma_cm_llp_state_change;
  30	sk->sk_data_ready = erdma_cm_llp_data_ready;
  31	sk->sk_error_report = erdma_cm_llp_error_report;
  32	write_unlock_bh(&sk->sk_callback_lock);
  33}
  34
  35static void erdma_sk_save_upcalls(struct sock *sk)
  36{
  37	struct erdma_cep *cep = sk_to_cep(sk);
  38
  39	write_lock_bh(&sk->sk_callback_lock);
  40	cep->sk_state_change = sk->sk_state_change;
  41	cep->sk_data_ready = sk->sk_data_ready;
  42	cep->sk_error_report = sk->sk_error_report;
  43	write_unlock_bh(&sk->sk_callback_lock);
  44}
  45
  46static void erdma_sk_restore_upcalls(struct sock *sk, struct erdma_cep *cep)
  47{
  48	sk->sk_state_change = cep->sk_state_change;
  49	sk->sk_data_ready = cep->sk_data_ready;
  50	sk->sk_error_report = cep->sk_error_report;
  51	sk->sk_user_data = NULL;
  52}
  53
  54static void erdma_socket_disassoc(struct socket *s)
  55{
  56	struct sock *sk = s->sk;
  57	struct erdma_cep *cep;
  58
  59	if (sk) {
  60		write_lock_bh(&sk->sk_callback_lock);
  61		cep = sk_to_cep(sk);
  62		if (cep) {
  63			erdma_sk_restore_upcalls(sk, cep);
  64			erdma_cep_put(cep);
  65		} else {
  66			WARN_ON_ONCE(1);
  67		}
  68		write_unlock_bh(&sk->sk_callback_lock);
  69	} else {
  70		WARN_ON_ONCE(1);
  71	}
  72}
  73
  74static void erdma_cep_socket_assoc(struct erdma_cep *cep, struct socket *s)
  75{
  76	cep->sock = s;
  77	erdma_cep_get(cep);
  78	s->sk->sk_user_data = cep;
  79
  80	erdma_sk_save_upcalls(s->sk);
  81	erdma_sk_assign_cm_upcalls(s->sk);
  82}
  83
  84static void erdma_disassoc_listen_cep(struct erdma_cep *cep)
  85{
  86	if (cep->listen_cep) {
  87		erdma_cep_put(cep->listen_cep);
  88		cep->listen_cep = NULL;
  89	}
  90}
  91
  92static struct erdma_cep *erdma_cep_alloc(struct erdma_dev *dev)
  93{
  94	struct erdma_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL);
  95	unsigned long flags;
  96
  97	if (!cep)
  98		return NULL;
  99
 100	INIT_LIST_HEAD(&cep->listenq);
 101	INIT_LIST_HEAD(&cep->devq);
 102	INIT_LIST_HEAD(&cep->work_freelist);
 103
 104	kref_init(&cep->ref);
 105	cep->state = ERDMA_EPSTATE_IDLE;
 106	init_waitqueue_head(&cep->waitq);
 107	spin_lock_init(&cep->lock);
 108	cep->dev = dev;
 109
 110	spin_lock_irqsave(&dev->lock, flags);
 111	list_add_tail(&cep->devq, &dev->cep_list);
 112	spin_unlock_irqrestore(&dev->lock, flags);
 113
 114	return cep;
 115}
 116
 117static void erdma_cm_free_work(struct erdma_cep *cep)
 118{
 119	struct list_head *w, *tmp;
 120	struct erdma_cm_work *work;
 121
 122	list_for_each_safe(w, tmp, &cep->work_freelist) {
 123		work = list_entry(w, struct erdma_cm_work, list);
 124		list_del(&work->list);
 125		kfree(work);
 126	}
 127}
 128
 129static void erdma_cancel_mpatimer(struct erdma_cep *cep)
 130{
 131	spin_lock_bh(&cep->lock);
 132	if (cep->mpa_timer) {
 133		if (cancel_delayed_work(&cep->mpa_timer->work)) {
 134			erdma_cep_put(cep);
 135			kfree(cep->mpa_timer);
 136		}
 137		cep->mpa_timer = NULL;
 138	}
 139	spin_unlock_bh(&cep->lock);
 140}
 141
 142static void erdma_put_work(struct erdma_cm_work *work)
 143{
 144	INIT_LIST_HEAD(&work->list);
 145	spin_lock_bh(&work->cep->lock);
 146	list_add(&work->list, &work->cep->work_freelist);
 147	spin_unlock_bh(&work->cep->lock);
 148}
 149
 150static void erdma_cep_set_inuse(struct erdma_cep *cep)
 151{
 152	unsigned long flags;
 153
 154	spin_lock_irqsave(&cep->lock, flags);
 155	while (cep->in_use) {
 156		spin_unlock_irqrestore(&cep->lock, flags);
 157		wait_event_interruptible(cep->waitq, !cep->in_use);
 158		if (signal_pending(current))
 159			flush_signals(current);
 160
 161		spin_lock_irqsave(&cep->lock, flags);
 162	}
 163
 164	cep->in_use = 1;
 165	spin_unlock_irqrestore(&cep->lock, flags);
 166}
 167
 168static void erdma_cep_set_free(struct erdma_cep *cep)
 169{
 170	unsigned long flags;
 171
 172	spin_lock_irqsave(&cep->lock, flags);
 173	cep->in_use = 0;
 174	spin_unlock_irqrestore(&cep->lock, flags);
 175
 176	wake_up(&cep->waitq);
 177}
 178
 179static void __erdma_cep_dealloc(struct kref *ref)
 180{
 181	struct erdma_cep *cep = container_of(ref, struct erdma_cep, ref);
 182	struct erdma_dev *dev = cep->dev;
 183	unsigned long flags;
 184
 185	WARN_ON(cep->listen_cep);
 186
 187	kfree(cep->private_data);
 188	kfree(cep->mpa.pdata);
 189	spin_lock_bh(&cep->lock);
 190	if (!list_empty(&cep->work_freelist))
 191		erdma_cm_free_work(cep);
 192	spin_unlock_bh(&cep->lock);
 193
 194	spin_lock_irqsave(&dev->lock, flags);
 195	list_del(&cep->devq);
 196	spin_unlock_irqrestore(&dev->lock, flags);
 197	kfree(cep);
 198}
 199
 200static struct erdma_cm_work *erdma_get_work(struct erdma_cep *cep)
 201{
 202	struct erdma_cm_work *work = NULL;
 203
 204	spin_lock_bh(&cep->lock);
 205	if (!list_empty(&cep->work_freelist)) {
 206		work = list_entry(cep->work_freelist.next, struct erdma_cm_work,
 207				  list);
 208		list_del_init(&work->list);
 209	}
 210
 211	spin_unlock_bh(&cep->lock);
 212	return work;
 213}
 214
 215static int erdma_cm_alloc_work(struct erdma_cep *cep, int num)
 216{
 217	struct erdma_cm_work *work;
 218
 219	while (num--) {
 220		work = kmalloc(sizeof(*work), GFP_KERNEL);
 221		if (!work) {
 222			if (!(list_empty(&cep->work_freelist)))
 223				erdma_cm_free_work(cep);
 224			return -ENOMEM;
 225		}
 226		work->cep = cep;
 227		INIT_LIST_HEAD(&work->list);
 228		list_add(&work->list, &cep->work_freelist);
 229	}
 230
 231	return 0;
 232}
 233
 234static int erdma_cm_upcall(struct erdma_cep *cep, enum iw_cm_event_type reason,
 235			   int status)
 236{
 237	struct iw_cm_event event;
 238	struct iw_cm_id *cm_id;
 239
 240	memset(&event, 0, sizeof(event));
 241	event.status = status;
 242	event.event = reason;
 243
 244	if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
 245		event.provider_data = cep;
 246		cm_id = cep->listen_cep->cm_id;
 247
 248		event.ird = cep->dev->attrs.max_ird;
 249		event.ord = cep->dev->attrs.max_ord;
 250	} else {
 251		cm_id = cep->cm_id;
 252	}
 253
 254	if (reason == IW_CM_EVENT_CONNECT_REQUEST ||
 255	    reason == IW_CM_EVENT_CONNECT_REPLY) {
 256		u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len);
 257
 258		if (pd_len && cep->mpa.pdata) {
 259			event.private_data_len = pd_len;
 260			event.private_data = cep->mpa.pdata;
 261		}
 262
 263		getname_local(cep->sock, &event.local_addr);
 264		getname_peer(cep->sock, &event.remote_addr);
 265	}
 266
 267	return cm_id->event_handler(cm_id, &event);
 268}
 269
 270void erdma_qp_cm_drop(struct erdma_qp *qp)
 271{
 272	struct erdma_cep *cep = qp->cep;
 273
 274	if (!qp->cep)
 275		return;
 276
 277	erdma_cep_set_inuse(cep);
 278
 279	/* already closed. */
 280	if (cep->state == ERDMA_EPSTATE_CLOSED)
 281		goto out;
 282
 283	if (cep->cm_id) {
 284		switch (cep->state) {
 285		case ERDMA_EPSTATE_AWAIT_MPAREP:
 286			erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
 287					-EINVAL);
 288			break;
 289		case ERDMA_EPSTATE_RDMA_MODE:
 290			erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
 291			break;
 292		case ERDMA_EPSTATE_IDLE:
 293		case ERDMA_EPSTATE_LISTENING:
 294		case ERDMA_EPSTATE_CONNECTING:
 295		case ERDMA_EPSTATE_AWAIT_MPAREQ:
 296		case ERDMA_EPSTATE_RECVD_MPAREQ:
 297		case ERDMA_EPSTATE_CLOSED:
 298		default:
 299			break;
 300		}
 301		cep->cm_id->rem_ref(cep->cm_id);
 302		cep->cm_id = NULL;
 303		erdma_cep_put(cep);
 304	}
 305	cep->state = ERDMA_EPSTATE_CLOSED;
 306
 307	if (cep->sock) {
 308		erdma_socket_disassoc(cep->sock);
 309		sock_release(cep->sock);
 310		cep->sock = NULL;
 311	}
 312
 313	if (cep->qp) {
 314		cep->qp = NULL;
 315		erdma_qp_put(qp);
 316	}
 317out:
 318	erdma_cep_set_free(cep);
 319}
 320
 321void erdma_cep_put(struct erdma_cep *cep)
 322{
 323	WARN_ON(kref_read(&cep->ref) < 1);
 324	kref_put(&cep->ref, __erdma_cep_dealloc);
 325}
 326
 327void erdma_cep_get(struct erdma_cep *cep)
 328{
 329	kref_get(&cep->ref);
 330}
 331
 332static int erdma_send_mpareqrep(struct erdma_cep *cep, const void *pdata,
 333				u8 pd_len)
 334{
 335	struct socket *s = cep->sock;
 336	struct mpa_rr *rr = &cep->mpa.hdr;
 337	struct kvec iov[3];
 338	struct msghdr msg;
 339	int iovec_num = 0;
 340	int ret;
 341	int mpa_len;
 342
 343	memset(&msg, 0, sizeof(msg));
 344
 345	rr->params.pd_len = cpu_to_be16(pd_len);
 346
 347	iov[iovec_num].iov_base = rr;
 348	iov[iovec_num].iov_len = sizeof(*rr);
 349	iovec_num++;
 350	mpa_len = sizeof(*rr);
 351
 352	iov[iovec_num].iov_base = &cep->mpa.ext_data;
 353	iov[iovec_num].iov_len = sizeof(cep->mpa.ext_data);
 354	iovec_num++;
 355	mpa_len += sizeof(cep->mpa.ext_data);
 356
 357	if (pd_len) {
 358		iov[iovec_num].iov_base = (char *)pdata;
 359		iov[iovec_num].iov_len = pd_len;
 360		mpa_len += pd_len;
 361		iovec_num++;
 362	}
 363
 364	ret = kernel_sendmsg(s, &msg, iov, iovec_num, mpa_len);
 365
 366	return ret < 0 ? ret : 0;
 367}
 368
 369static inline int ksock_recv(struct socket *sock, char *buf, size_t size,
 370			     int flags)
 371{
 372	struct kvec iov = { buf, size };
 373	struct msghdr msg = { .msg_name = NULL, .msg_flags = flags };
 374
 375	return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
 376}
 377
 378static int __recv_mpa_hdr(struct erdma_cep *cep, int hdr_rcvd, char *hdr,
 379			  int hdr_size, int *rcvd_out)
 380{
 381	struct socket *s = cep->sock;
 382	int rcvd;
 383
 384	*rcvd_out = 0;
 385	if (hdr_rcvd < hdr_size) {
 386		rcvd = ksock_recv(s, hdr + hdr_rcvd, hdr_size - hdr_rcvd,
 387				  MSG_DONTWAIT);
 388		if (rcvd == -EAGAIN)
 389			return -EAGAIN;
 390
 391		if (rcvd <= 0)
 392			return -ECONNABORTED;
 393
 394		hdr_rcvd += rcvd;
 395		*rcvd_out = rcvd;
 396
 397		if (hdr_rcvd < hdr_size)
 398			return -EAGAIN;
 399	}
 400
 401	return 0;
 402}
 403
 404static void __mpa_rr_set_revision(__be16 *bits, u8 rev)
 405{
 406	*bits = (*bits & ~MPA_RR_MASK_REVISION) |
 407		(cpu_to_be16(rev) & MPA_RR_MASK_REVISION);
 408}
 409
 410static u8 __mpa_rr_revision(__be16 mpa_rr_bits)
 411{
 412	__be16 rev = mpa_rr_bits & MPA_RR_MASK_REVISION;
 413
 414	return (u8)be16_to_cpu(rev);
 415}
 416
 417static void __mpa_ext_set_cc(__be32 *bits, u32 cc)
 418{
 419	*bits = (*bits & ~MPA_EXT_FLAG_CC) |
 420		(cpu_to_be32(cc) & MPA_EXT_FLAG_CC);
 421}
 422
 423static u8 __mpa_ext_cc(__be32 mpa_ext_bits)
 424{
 425	__be32 cc = mpa_ext_bits & MPA_EXT_FLAG_CC;
 426
 427	return (u8)be32_to_cpu(cc);
 428}
 429
 430/*
 431 * Receive MPA Request/Reply header.
 432 *
 433 * Returns 0 if complete MPA Request/Reply haeder including
 434 * eventual private data was received. Returns -EAGAIN if
 435 * header was partially received or negative error code otherwise.
 436 *
 437 * Context: May be called in process context only
 438 */
 439static int erdma_recv_mpa_rr(struct erdma_cep *cep)
 440{
 441	struct mpa_rr *hdr = &cep->mpa.hdr;
 442	struct socket *s = cep->sock;
 443	u16 pd_len;
 444	int rcvd, to_rcv, ret, pd_rcvd;
 445
 446	if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) {
 447		ret = __recv_mpa_hdr(cep, cep->mpa.bytes_rcvd,
 448				     (char *)&cep->mpa.hdr,
 449				     sizeof(struct mpa_rr), &rcvd);
 450		cep->mpa.bytes_rcvd += rcvd;
 451		if (ret)
 452			return ret;
 453	}
 454
 455	if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA ||
 456	    __mpa_rr_revision(hdr->params.bits) != MPA_REVISION_EXT_1)
 457		return -EPROTO;
 458
 459	if (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) <
 460	    sizeof(struct erdma_mpa_ext)) {
 461		ret = __recv_mpa_hdr(
 462			cep, cep->mpa.bytes_rcvd - sizeof(struct mpa_rr),
 463			(char *)&cep->mpa.ext_data,
 464			sizeof(struct erdma_mpa_ext), &rcvd);
 465		cep->mpa.bytes_rcvd += rcvd;
 466		if (ret)
 467			return ret;
 468	}
 469
 470	pd_len = be16_to_cpu(hdr->params.pd_len);
 471	pd_rcvd = cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) -
 472		  sizeof(struct erdma_mpa_ext);
 473	to_rcv = pd_len - pd_rcvd;
 474
 475	if (!to_rcv) {
 476		/*
 477		 * We have received the whole MPA Request/Reply message.
 478		 * Check against peer protocol violation.
 479		 */
 480		u32 word;
 481
 482		ret = __recv_mpa_hdr(cep, 0, (char *)&word, sizeof(word),
 483				     &rcvd);
 484		if (ret == -EAGAIN && rcvd == 0)
 485			return 0;
 486
 487		if (ret)
 488			return ret;
 489
 490		return -EPROTO;
 491	}
 492
 493	/*
 494	 * At this point, MPA header has been fully received, and pd_len != 0.
 495	 * So, begin to receive private data.
 496	 */
 497	if (!cep->mpa.pdata) {
 498		cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL);
 499		if (!cep->mpa.pdata)
 500			return -ENOMEM;
 501	}
 502
 503	rcvd = ksock_recv(s, cep->mpa.pdata + pd_rcvd, to_rcv + 4,
 504			  MSG_DONTWAIT);
 505	if (rcvd < 0)
 506		return rcvd;
 507
 508	if (rcvd > to_rcv)
 509		return -EPROTO;
 510
 511	cep->mpa.bytes_rcvd += rcvd;
 512
 513	if (to_rcv == rcvd)
 514		return 0;
 515
 516	return -EAGAIN;
 517}
 518
 519/*
 520 * erdma_proc_mpareq()
 521 *
 522 * Read MPA Request from socket and signal new connection to IWCM
 523 * if success. Caller must hold lock on corresponding listening CEP.
 524 */
 525static int erdma_proc_mpareq(struct erdma_cep *cep)
 526{
 527	struct mpa_rr *req;
 528	int ret;
 529
 530	ret = erdma_recv_mpa_rr(cep);
 531	if (ret)
 532		return ret;
 533
 534	req = &cep->mpa.hdr;
 535
 536	if (memcmp(req->key, MPA_KEY_REQ, MPA_KEY_SIZE))
 537		return -EPROTO;
 538
 539	memcpy(req->key, MPA_KEY_REP, MPA_KEY_SIZE);
 540
 541	/* Currently does not support marker and crc. */
 542	if (req->params.bits & MPA_RR_FLAG_MARKERS ||
 543	    req->params.bits & MPA_RR_FLAG_CRC)
 544		goto reject_conn;
 545
 546	cep->state = ERDMA_EPSTATE_RECVD_MPAREQ;
 547
 548	/* Keep reference until IWCM accepts/rejects */
 549	erdma_cep_get(cep);
 550	ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0);
 551	if (ret)
 552		erdma_cep_put(cep);
 553
 554	return ret;
 555
 556reject_conn:
 557	req->params.bits &= ~MPA_RR_FLAG_MARKERS;
 558	req->params.bits |= MPA_RR_FLAG_REJECT;
 559	req->params.bits &= ~MPA_RR_FLAG_CRC;
 560
 561	kfree(cep->mpa.pdata);
 562	cep->mpa.pdata = NULL;
 563	erdma_send_mpareqrep(cep, NULL, 0);
 564
 565	return -EOPNOTSUPP;
 566}
 567
 568static int erdma_proc_mpareply(struct erdma_cep *cep)
 569{
 570	struct erdma_qp_attrs qp_attrs;
 571	struct erdma_qp *qp = cep->qp;
 572	struct mpa_rr *rep;
 573	int ret;
 574
 575	ret = erdma_recv_mpa_rr(cep);
 576	if (ret)
 577		goto out_err;
 578
 579	erdma_cancel_mpatimer(cep);
 580
 581	rep = &cep->mpa.hdr;
 582
 583	if (memcmp(rep->key, MPA_KEY_REP, MPA_KEY_SIZE)) {
 584		ret = -EPROTO;
 585		goto out_err;
 586	}
 587
 588	if (rep->params.bits & MPA_RR_FLAG_REJECT) {
 589		erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET);
 590		return -ECONNRESET;
 591	}
 592
 593	/* Currently does not support marker and crc. */
 594	if ((rep->params.bits & MPA_RR_FLAG_MARKERS) ||
 595	    (rep->params.bits & MPA_RR_FLAG_CRC)) {
 596		erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
 597		return -EINVAL;
 598	}
 599
 600	memset(&qp_attrs, 0, sizeof(qp_attrs));
 601	qp_attrs.irq_size = cep->ird;
 602	qp_attrs.orq_size = cep->ord;
 603	qp_attrs.state = ERDMA_QP_STATE_RTS;
 604
 605	down_write(&qp->state_lock);
 606	if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
 607		ret = -EINVAL;
 608		up_write(&qp->state_lock);
 609		goto out_err;
 610	}
 611
 612	qp->attrs.qp_type = ERDMA_QP_ACTIVE;
 613	if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc)
 614		qp->attrs.cc = COMPROMISE_CC;
 615
 616	ret = erdma_modify_qp_internal(qp, &qp_attrs,
 617				       ERDMA_QP_ATTR_STATE |
 618				       ERDMA_QP_ATTR_LLP_HANDLE |
 619				       ERDMA_QP_ATTR_MPA);
 620
 621	up_write(&qp->state_lock);
 622
 623	if (!ret) {
 624		ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0);
 625		if (!ret)
 626			cep->state = ERDMA_EPSTATE_RDMA_MODE;
 627
 628		return 0;
 629	}
 630
 631out_err:
 632	if (ret != -EAGAIN)
 633		erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
 634
 635	return ret;
 636}
 637
 638static void erdma_accept_newconn(struct erdma_cep *cep)
 639{
 640	struct socket *s = cep->sock;
 641	struct socket *new_s = NULL;
 642	struct erdma_cep *new_cep = NULL;
 643	int ret = 0;
 644
 645	if (cep->state != ERDMA_EPSTATE_LISTENING)
 646		goto error;
 647
 648	new_cep = erdma_cep_alloc(cep->dev);
 649	if (!new_cep)
 650		goto error;
 651
 652	/*
 653	 * 4: Allocate a sufficient number of work elements
 654	 * to allow concurrent handling of local + peer close
 655	 * events, MPA header processing + MPA timeout.
 656	 */
 657	if (erdma_cm_alloc_work(new_cep, 4) != 0)
 658		goto error;
 659
 660	/*
 661	 * Copy saved socket callbacks from listening CEP
 662	 * and assign new socket with new CEP
 663	 */
 664	new_cep->sk_state_change = cep->sk_state_change;
 665	new_cep->sk_data_ready = cep->sk_data_ready;
 666	new_cep->sk_error_report = cep->sk_error_report;
 667
 668	ret = kernel_accept(s, &new_s, O_NONBLOCK);
 669	if (ret != 0)
 670		goto error;
 671
 672	new_cep->sock = new_s;
 673	erdma_cep_get(new_cep);
 674	new_s->sk->sk_user_data = new_cep;
 675
 676	tcp_sock_set_nodelay(new_s->sk);
 677	new_cep->state = ERDMA_EPSTATE_AWAIT_MPAREQ;
 678
 679	ret = erdma_cm_queue_work(new_cep, ERDMA_CM_WORK_MPATIMEOUT);
 680	if (ret)
 681		goto error;
 682
 683	new_cep->listen_cep = cep;
 684	erdma_cep_get(cep);
 685
 686	if (atomic_read(&new_s->sk->sk_rmem_alloc)) {
 687		/* MPA REQ already queued */
 688		erdma_cep_set_inuse(new_cep);
 689		ret = erdma_proc_mpareq(new_cep);
 690		if (ret != -EAGAIN) {
 691			erdma_cep_put(cep);
 692			new_cep->listen_cep = NULL;
 693			if (ret) {
 694				erdma_cep_set_free(new_cep);
 695				goto error;
 696			}
 697		}
 698		erdma_cep_set_free(new_cep);
 699	}
 700	return;
 701
 702error:
 703	if (new_cep) {
 704		new_cep->state = ERDMA_EPSTATE_CLOSED;
 705		erdma_cancel_mpatimer(new_cep);
 706
 707		erdma_cep_put(new_cep);
 708		new_cep->sock = NULL;
 709	}
 710
 711	if (new_s) {
 712		erdma_socket_disassoc(new_s);
 713		sock_release(new_s);
 714	}
 715}
 716
 717static int erdma_newconn_connected(struct erdma_cep *cep)
 718{
 719	int ret = 0;
 720
 721	cep->mpa.hdr.params.bits = 0;
 722	__mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1);
 723
 724	memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE);
 725	cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
 726	__mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc);
 727
 728	ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len);
 729	cep->state = ERDMA_EPSTATE_AWAIT_MPAREP;
 730	cep->mpa.hdr.params.pd_len = 0;
 731
 732	if (ret >= 0)
 733		ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_MPATIMEOUT);
 734
 735	return ret;
 736}
 737
 738static void erdma_cm_work_handler(struct work_struct *w)
 739{
 740	struct erdma_cm_work *work;
 741	struct erdma_cep *cep;
 742	int release_cep = 0, ret = 0;
 743
 744	work = container_of(w, struct erdma_cm_work, work.work);
 745	cep = work->cep;
 746
 747	erdma_cep_set_inuse(cep);
 748
 749	switch (work->type) {
 750	case ERDMA_CM_WORK_CONNECTED:
 751		erdma_cancel_mpatimer(cep);
 752		if (cep->state == ERDMA_EPSTATE_CONNECTING) {
 753			ret = erdma_newconn_connected(cep);
 754			if (ret) {
 755				erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
 756						-EIO);
 757				release_cep = 1;
 758			}
 759		}
 760		break;
 761	case ERDMA_CM_WORK_CONNECTTIMEOUT:
 762		if (cep->state == ERDMA_EPSTATE_CONNECTING) {
 763			cep->mpa_timer = NULL;
 764			erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
 765					-ETIMEDOUT);
 766			release_cep = 1;
 767		}
 768		break;
 769	case ERDMA_CM_WORK_ACCEPT:
 770		erdma_accept_newconn(cep);
 771		break;
 772	case ERDMA_CM_WORK_READ_MPAHDR:
 773		if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
 774			if (cep->listen_cep) {
 775				erdma_cep_set_inuse(cep->listen_cep);
 776
 777				if (cep->listen_cep->state ==
 778				    ERDMA_EPSTATE_LISTENING)
 779					ret = erdma_proc_mpareq(cep);
 780				else
 781					ret = -EFAULT;
 782
 783				erdma_cep_set_free(cep->listen_cep);
 784
 785				if (ret != -EAGAIN) {
 786					erdma_cep_put(cep->listen_cep);
 787					cep->listen_cep = NULL;
 788					if (ret)
 789						erdma_cep_put(cep);
 790				}
 791			}
 792		} else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
 793			ret = erdma_proc_mpareply(cep);
 794		}
 795
 796		if (ret && ret != -EAGAIN)
 797			release_cep = 1;
 798		break;
 799	case ERDMA_CM_WORK_CLOSE_LLP:
 800		if (cep->cm_id)
 801			erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
 802		release_cep = 1;
 803		break;
 804	case ERDMA_CM_WORK_PEER_CLOSE:
 805		if (cep->cm_id) {
 806			if (cep->state == ERDMA_EPSTATE_CONNECTING ||
 807			    cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
 808				/*
 809				 * MPA reply not received, but connection drop
 810				 */
 811				erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
 812						-ECONNRESET);
 813			} else if (cep->state == ERDMA_EPSTATE_RDMA_MODE) {
 814				/*
 815				 * NOTE: IW_CM_EVENT_DISCONNECT is given just
 816				 *       to transition IWCM into CLOSING.
 817				 */
 818				erdma_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0);
 819				erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
 820			}
 821		} else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
 822			/* Socket close before MPA request received. */
 823			erdma_disassoc_listen_cep(cep);
 824			erdma_cep_put(cep);
 825		}
 826		release_cep = 1;
 827		break;
 828	case ERDMA_CM_WORK_MPATIMEOUT:
 829		cep->mpa_timer = NULL;
 830		if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
 831			/*
 832			 * MPA request timed out:
 833			 * Hide any partially received private data and signal
 834			 * timeout
 835			 */
 836			cep->mpa.hdr.params.pd_len = 0;
 837
 838			if (cep->cm_id)
 839				erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
 840						-ETIMEDOUT);
 841			release_cep = 1;
 842		} else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
 843			/* No MPA req received after peer TCP stream setup. */
 844			erdma_disassoc_listen_cep(cep);
 845
 846			erdma_cep_put(cep);
 847			release_cep = 1;
 848		}
 849		break;
 850	default:
 851		WARN(1, "Undefined CM work type: %d\n", work->type);
 852	}
 853
 854	if (release_cep) {
 855		erdma_cancel_mpatimer(cep);
 856		cep->state = ERDMA_EPSTATE_CLOSED;
 857		if (cep->qp) {
 858			struct erdma_qp *qp = cep->qp;
 859			/*
 860			 * Serialize a potential race with application
 861			 * closing the QP and calling erdma_qp_cm_drop()
 862			 */
 863			erdma_qp_get(qp);
 864			erdma_cep_set_free(cep);
 865
 866			erdma_qp_llp_close(qp);
 867			erdma_qp_put(qp);
 868
 869			erdma_cep_set_inuse(cep);
 870			cep->qp = NULL;
 871			erdma_qp_put(qp);
 872		}
 873
 874		if (cep->sock) {
 875			erdma_socket_disassoc(cep->sock);
 876			sock_release(cep->sock);
 877			cep->sock = NULL;
 878		}
 879
 880		if (cep->cm_id) {
 881			cep->cm_id->rem_ref(cep->cm_id);
 882			cep->cm_id = NULL;
 883			if (cep->state != ERDMA_EPSTATE_LISTENING)
 884				erdma_cep_put(cep);
 885		}
 886	}
 887	erdma_cep_set_free(cep);
 888	erdma_put_work(work);
 889	erdma_cep_put(cep);
 890}
 891
 892int erdma_cm_queue_work(struct erdma_cep *cep, enum erdma_work_type type)
 893{
 894	struct erdma_cm_work *work = erdma_get_work(cep);
 895	unsigned long delay = 0;
 896
 897	if (!work)
 898		return -ENOMEM;
 899
 900	work->type = type;
 901	work->cep = cep;
 902
 903	erdma_cep_get(cep);
 904
 905	INIT_DELAYED_WORK(&work->work, erdma_cm_work_handler);
 906
 907	if (type == ERDMA_CM_WORK_MPATIMEOUT) {
 908		cep->mpa_timer = work;
 909
 910		if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
 911			delay = MPAREP_TIMEOUT;
 912		else
 913			delay = MPAREQ_TIMEOUT;
 914	} else if (type == ERDMA_CM_WORK_CONNECTTIMEOUT) {
 915		cep->mpa_timer = work;
 916
 917		delay = CONNECT_TIMEOUT;
 918	}
 919
 920	queue_delayed_work(erdma_cm_wq, &work->work, delay);
 921
 922	return 0;
 923}
 924
 925static void erdma_cm_llp_data_ready(struct sock *sk)
 926{
 927	struct erdma_cep *cep;
 928
 929	trace_sk_data_ready(sk);
 930
 931	read_lock(&sk->sk_callback_lock);
 932
 933	cep = sk_to_cep(sk);
 934	if (!cep)
 935		goto out;
 936
 937	if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ ||
 938	    cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
 939		erdma_cm_queue_work(cep, ERDMA_CM_WORK_READ_MPAHDR);
 940
 941out:
 942	read_unlock(&sk->sk_callback_lock);
 943}
 944
 945static void erdma_cm_llp_error_report(struct sock *sk)
 946{
 947	struct erdma_cep *cep = sk_to_cep(sk);
 948
 949	if (cep)
 950		cep->sk_error_report(sk);
 951}
 952
 953static void erdma_cm_llp_state_change(struct sock *sk)
 954{
 955	struct erdma_cep *cep;
 956	void (*orig_state_change)(struct sock *sk);
 957
 958	read_lock(&sk->sk_callback_lock);
 959
 960	cep = sk_to_cep(sk);
 961	if (!cep) {
 962		read_unlock(&sk->sk_callback_lock);
 963		return;
 964	}
 965	orig_state_change = cep->sk_state_change;
 966
 967	switch (sk->sk_state) {
 968	case TCP_ESTABLISHED:
 969		if (cep->state == ERDMA_EPSTATE_CONNECTING)
 970			erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
 971		else
 972			erdma_cm_queue_work(cep, ERDMA_CM_WORK_ACCEPT);
 973		break;
 974	case TCP_CLOSE:
 975	case TCP_CLOSE_WAIT:
 976		if (cep->state != ERDMA_EPSTATE_LISTENING)
 977			erdma_cm_queue_work(cep, ERDMA_CM_WORK_PEER_CLOSE);
 978		break;
 979	default:
 980		break;
 981	}
 982	read_unlock(&sk->sk_callback_lock);
 983	orig_state_change(sk);
 984}
 985
 986static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
 987			      int laddrlen, struct sockaddr *raddr,
 988			      int raddrlen, int flags)
 989{
 990	int ret;
 991
 992	sock_set_reuseaddr(s->sk);
 993	ret = s->ops->bind(s, laddr, laddrlen);
 994	if (ret)
 995		return ret;
 996	ret = s->ops->connect(s, raddr, raddrlen, flags);
 997	return ret < 0 ? ret : 0;
 998}
 999
1000int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
1001{
1002	struct erdma_dev *dev = to_edev(id->device);
1003	struct erdma_qp *qp;
1004	struct erdma_cep *cep = NULL;
1005	struct socket *s = NULL;
1006	struct sockaddr *laddr = (struct sockaddr *)&id->m_local_addr;
1007	struct sockaddr *raddr = (struct sockaddr *)&id->m_remote_addr;
1008	u16 pd_len = params->private_data_len;
1009	int ret;
1010
1011	if (pd_len > MPA_MAX_PRIVDATA)
1012		return -EINVAL;
1013
1014	if (params->ird > dev->attrs.max_ird ||
1015	    params->ord > dev->attrs.max_ord)
1016		return -EINVAL;
1017
1018	if (laddr->sa_family != AF_INET || raddr->sa_family != AF_INET)
1019		return -EAFNOSUPPORT;
1020
1021	qp = find_qp_by_qpn(dev, params->qpn);
1022	if (!qp)
1023		return -ENOENT;
1024	erdma_qp_get(qp);
1025
1026	ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &s);
1027	if (ret < 0)
1028		goto error_put_qp;
1029
1030	cep = erdma_cep_alloc(dev);
1031	if (!cep) {
1032		ret = -ENOMEM;
1033		goto error_release_sock;
1034	}
1035
1036	erdma_cep_set_inuse(cep);
1037
1038	/* Associate QP with CEP */
1039	erdma_cep_get(cep);
1040	qp->cep = cep;
1041	cep->qp = qp;
1042
1043	/* Associate cm_id with CEP */
1044	id->add_ref(id);
1045	cep->cm_id = id;
1046
1047	/*
1048	 * 6: Allocate a sufficient number of work elements
1049	 * to allow concurrent handling of local + peer close
1050	 * events, MPA header processing + MPA timeout, connected event
1051	 * and connect timeout.
1052	 */
1053	ret = erdma_cm_alloc_work(cep, 6);
1054	if (ret != 0) {
1055		ret = -ENOMEM;
1056		goto error_release_cep;
1057	}
1058
1059	cep->ird = params->ird;
1060	cep->ord = params->ord;
1061	cep->state = ERDMA_EPSTATE_CONNECTING;
1062
1063	erdma_cep_socket_assoc(cep, s);
1064
1065	if (pd_len) {
1066		cep->pd_len = pd_len;
1067		cep->private_data = kmalloc(pd_len, GFP_KERNEL);
1068		if (!cep->private_data) {
1069			ret = -ENOMEM;
1070			goto error_disassoc;
1071		}
1072
1073		memcpy(cep->private_data, params->private_data,
1074		       params->private_data_len);
1075	}
1076
1077	ret = kernel_bindconnect(s, laddr, sizeof(*laddr), raddr,
1078				 sizeof(*raddr), O_NONBLOCK);
1079	if (ret != -EINPROGRESS && ret != 0) {
1080		goto error_disassoc;
1081	} else if (ret == 0) {
1082		ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
1083		if (ret)
1084			goto error_disassoc;
1085	} else {
1086		ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTTIMEOUT);
1087		if (ret)
1088			goto error_disassoc;
1089	}
1090
1091	erdma_cep_set_free(cep);
1092	return 0;
1093
1094error_disassoc:
1095	kfree(cep->private_data);
1096	cep->private_data = NULL;
1097	cep->pd_len = 0;
1098
1099	erdma_socket_disassoc(s);
1100
1101error_release_cep:
1102	/* disassoc with cm_id */
1103	cep->cm_id = NULL;
1104	id->rem_ref(id);
1105
1106	/* disassoc with qp */
1107	qp->cep = NULL;
1108	erdma_cep_put(cep);
1109	cep->qp = NULL;
1110
1111	cep->state = ERDMA_EPSTATE_CLOSED;
1112
1113	erdma_cep_set_free(cep);
1114
1115	/* release the cep. */
1116	erdma_cep_put(cep);
1117
1118error_release_sock:
1119	if (s)
1120		sock_release(s);
1121error_put_qp:
1122	erdma_qp_put(qp);
1123
1124	return ret;
1125}
1126
1127int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
1128{
1129	struct erdma_dev *dev = to_edev(id->device);
1130	struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
1131	struct erdma_qp *qp;
1132	struct erdma_qp_attrs qp_attrs;
1133	int ret;
1134
1135	erdma_cep_set_inuse(cep);
1136	erdma_cep_put(cep);
1137
1138	/* Free lingering inbound private data */
1139	if (cep->mpa.hdr.params.pd_len) {
1140		cep->mpa.hdr.params.pd_len = 0;
1141		kfree(cep->mpa.pdata);
1142		cep->mpa.pdata = NULL;
1143	}
1144	erdma_cancel_mpatimer(cep);
1145
1146	if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
1147		erdma_cep_set_free(cep);
1148		erdma_cep_put(cep);
1149
1150		return -ECONNRESET;
1151	}
1152
1153	qp = find_qp_by_qpn(dev, params->qpn);
1154	if (!qp)
1155		return -ENOENT;
1156	erdma_qp_get(qp);
1157
1158	down_write(&qp->state_lock);
1159	if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
1160		ret = -EINVAL;
1161		up_write(&qp->state_lock);
1162		goto error;
1163	}
1164
1165	if (params->ord > dev->attrs.max_ord ||
1166	    params->ird > dev->attrs.max_ord) {
1167		ret = -EINVAL;
1168		up_write(&qp->state_lock);
1169		goto error;
1170	}
1171
1172	if (params->private_data_len > MPA_MAX_PRIVDATA) {
1173		ret = -EINVAL;
1174		up_write(&qp->state_lock);
1175		goto error;
1176	}
1177
1178	cep->ird = params->ird;
1179	cep->ord = params->ord;
1180
1181	cep->cm_id = id;
1182	id->add_ref(id);
1183
1184	memset(&qp_attrs, 0, sizeof(qp_attrs));
1185	qp_attrs.orq_size = params->ord;
1186	qp_attrs.irq_size = params->ird;
1187
1188	qp_attrs.state = ERDMA_QP_STATE_RTS;
1189
1190	/* Associate QP with CEP */
1191	erdma_cep_get(cep);
1192	qp->cep = cep;
1193	cep->qp = qp;
1194
1195	cep->state = ERDMA_EPSTATE_RDMA_MODE;
1196
1197	qp->attrs.qp_type = ERDMA_QP_PASSIVE;
1198	qp->attrs.pd_len = params->private_data_len;
1199
1200	if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits))
1201		qp->attrs.cc = COMPROMISE_CC;
1202
1203	/* move to rts */
1204	ret = erdma_modify_qp_internal(qp, &qp_attrs,
1205				       ERDMA_QP_ATTR_STATE |
1206				       ERDMA_QP_ATTR_ORD |
1207				       ERDMA_QP_ATTR_LLP_HANDLE |
1208				       ERDMA_QP_ATTR_IRD |
1209				       ERDMA_QP_ATTR_MPA);
1210	up_write(&qp->state_lock);
1211
1212	if (ret)
1213		goto error;
1214
1215	cep->mpa.ext_data.bits = 0;
1216	__mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc);
1217	cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
1218
1219	ret = erdma_send_mpareqrep(cep, params->private_data,
1220				   params->private_data_len);
1221	if (!ret) {
1222		ret = erdma_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0);
1223		if (ret)
1224			goto error;
1225
1226		erdma_cep_set_free(cep);
1227
1228		return 0;
1229	}
1230
1231error:
1232	erdma_socket_disassoc(cep->sock);
1233	sock_release(cep->sock);
1234	cep->sock = NULL;
1235
1236	cep->state = ERDMA_EPSTATE_CLOSED;
1237
1238	if (cep->cm_id) {
1239		cep->cm_id->rem_ref(id);
1240		cep->cm_id = NULL;
1241	}
1242
1243	if (qp->cep) {
1244		erdma_cep_put(cep);
1245		qp->cep = NULL;
1246	}
1247
1248	cep->qp = NULL;
1249	erdma_qp_put(qp);
1250
1251	erdma_cep_set_free(cep);
1252	erdma_cep_put(cep);
1253
1254	return ret;
1255}
1256
1257int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen)
1258{
1259	struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
1260
1261	erdma_cep_set_inuse(cep);
1262	erdma_cep_put(cep);
1263
1264	erdma_cancel_mpatimer(cep);
1265
1266	if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
1267		erdma_cep_set_free(cep);
1268		erdma_cep_put(cep);
1269
1270		return -ECONNRESET;
1271	}
1272
1273	if (__mpa_rr_revision(cep->mpa.hdr.params.bits) == MPA_REVISION_EXT_1) {
1274		cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */
1275		erdma_send_mpareqrep(cep, pdata, plen);
1276	}
1277
1278	erdma_socket_disassoc(cep->sock);
1279	sock_release(cep->sock);
1280	cep->sock = NULL;
1281
1282	cep->state = ERDMA_EPSTATE_CLOSED;
1283
1284	erdma_cep_set_free(cep);
1285	erdma_cep_put(cep);
1286
1287	return 0;
1288}
1289
1290int erdma_create_listen(struct iw_cm_id *id, int backlog)
1291{
1292	struct socket *s;
1293	struct erdma_cep *cep = NULL;
1294	int ret = 0;
1295	struct erdma_dev *dev = to_edev(id->device);
1296	int addr_family = id->local_addr.ss_family;
1297	struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
1298
1299	if (addr_family != AF_INET)
1300		return -EAFNOSUPPORT;
1301
1302	ret = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
1303	if (ret < 0)
1304		return ret;
1305
1306	sock_set_reuseaddr(s->sk);
1307
1308	/* For wildcard addr, limit binding to current device only */
1309	if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
1310		s->sk->sk_bound_dev_if = dev->netdev->ifindex;
1311
1312	ret = s->ops->bind(s, (struct sockaddr *)laddr,
1313			   sizeof(struct sockaddr_in));
1314	if (ret)
1315		goto error;
1316
1317	cep = erdma_cep_alloc(dev);
1318	if (!cep) {
1319		ret = -ENOMEM;
1320		goto error;
1321	}
1322	erdma_cep_socket_assoc(cep, s);
1323
1324	ret = erdma_cm_alloc_work(cep, backlog);
1325	if (ret)
1326		goto error;
1327
1328	ret = s->ops->listen(s, backlog);
1329	if (ret)
1330		goto error;
1331
1332	cep->cm_id = id;
1333	id->add_ref(id);
1334
1335	if (!id->provider_data) {
1336		id->provider_data =
1337			kmalloc(sizeof(struct list_head), GFP_KERNEL);
1338		if (!id->provider_data) {
1339			ret = -ENOMEM;
1340			goto error;
1341		}
1342		INIT_LIST_HEAD((struct list_head *)id->provider_data);
1343	}
1344
1345	list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
1346	cep->state = ERDMA_EPSTATE_LISTENING;
1347
1348	return 0;
1349
1350error:
1351	if (cep) {
1352		erdma_cep_set_inuse(cep);
1353
1354		if (cep->cm_id) {
1355			cep->cm_id->rem_ref(cep->cm_id);
1356			cep->cm_id = NULL;
1357		}
1358		cep->sock = NULL;
1359		erdma_socket_disassoc(s);
1360		cep->state = ERDMA_EPSTATE_CLOSED;
1361
1362		erdma_cep_set_free(cep);
1363		erdma_cep_put(cep);
1364	}
1365	sock_release(s);
1366
1367	return ret;
1368}
1369
1370static void erdma_drop_listeners(struct iw_cm_id *id)
1371{
1372	struct list_head *p, *tmp;
1373	/*
1374	 * In case of a wildcard rdma_listen on a multi-homed device,
1375	 * a listener's IWCM id is associated with more than one listening CEP.
1376	 */
1377	list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) {
1378		struct erdma_cep *cep =
1379			list_entry(p, struct erdma_cep, listenq);
1380
1381		list_del(p);
1382
1383		erdma_cep_set_inuse(cep);
1384
1385		if (cep->cm_id) {
1386			cep->cm_id->rem_ref(cep->cm_id);
1387			cep->cm_id = NULL;
1388		}
1389		if (cep->sock) {
1390			erdma_socket_disassoc(cep->sock);
1391			sock_release(cep->sock);
1392			cep->sock = NULL;
1393		}
1394		cep->state = ERDMA_EPSTATE_CLOSED;
1395		erdma_cep_set_free(cep);
1396		erdma_cep_put(cep);
1397	}
1398}
1399
1400int erdma_destroy_listen(struct iw_cm_id *id)
1401{
1402	if (!id->provider_data)
1403		return 0;
1404
1405	erdma_drop_listeners(id);
1406	kfree(id->provider_data);
1407	id->provider_data = NULL;
1408
1409	return 0;
1410}
1411
1412int erdma_cm_init(void)
1413{
1414	erdma_cm_wq = create_singlethread_workqueue("erdma_cm_wq");
1415	if (!erdma_cm_wq)
1416		return -ENOMEM;
1417
1418	return 0;
1419}
1420
1421void erdma_cm_exit(void)
1422{
1423	if (erdma_cm_wq)
1424		destroy_workqueue(erdma_cm_wq);
1425}