Linux Audio

Check our new training course

Loading...
v6.2
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
  4 *
  5 * Work Requests exploiting Infiniband API
  6 *
  7 * Work requests (WR) of type ib_post_send or ib_post_recv respectively
  8 * are submitted to either RC SQ or RC RQ respectively
  9 * (reliably connected send/receive queue)
 10 * and become work queue entries (WQEs).
 11 * While an SQ WR/WQE is pending, we track it until transmission completion.
 12 * Through a send or receive completion queue (CQ) respectively,
 13 * we get completion queue entries (CQEs) [aka work completions (WCs)].
 14 * Since the CQ callback is called from IRQ context, we split work by using
 15 * bottom halves implemented by tasklets.
 16 *
 17 * SMC uses this to exchange LLC (link layer control)
 18 * and CDC (connection data control) messages.
 19 *
 20 * Copyright IBM Corp. 2016
 21 *
 22 * Author(s):  Steffen Maier <maier@linux.vnet.ibm.com>
 23 */
 24
 25#include <linux/atomic.h>
 26#include <linux/hashtable.h>
 27#include <linux/wait.h>
 28#include <rdma/ib_verbs.h>
 29#include <asm/div64.h>
 30
 31#include "smc.h"
 32#include "smc_wr.h"
 33
 34#define SMC_WR_MAX_POLL_CQE 10	/* max. # of compl. queue elements in 1 poll */
 35
 36#define SMC_WR_RX_HASH_BITS 4
 37static DEFINE_HASHTABLE(smc_wr_rx_hash, SMC_WR_RX_HASH_BITS);
 38static DEFINE_SPINLOCK(smc_wr_rx_hash_lock);
 39
 40struct smc_wr_tx_pend {	/* control data for a pending send request */
 41	u64			wr_id;		/* work request id sent */
 42	smc_wr_tx_handler	handler;
 43	enum ib_wc_status	wc_status;	/* CQE status */
 44	struct smc_link		*link;
 45	u32			idx;
 46	struct smc_wr_tx_pend_priv priv;
 47	u8			compl_requested;
 48};
 49
 50/******************************** send queue *********************************/
 51
 52/*------------------------------- completion --------------------------------*/
 53
 54/* returns true if at least one tx work request is pending on the given link */
 55static inline bool smc_wr_is_tx_pend(struct smc_link *link)
 56{
 57	return !bitmap_empty(link->wr_tx_mask, link->wr_tx_cnt);
 
 
 
 
 58}
 59
 60/* wait till all pending tx work requests on the given link are completed */
 61void smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
 62{
 63	wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link));
 
 
 
 
 64}
 65
 66static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id)
 67{
 68	u32 i;
 69
 70	for (i = 0; i < link->wr_tx_cnt; i++) {
 71		if (link->wr_tx_pends[i].wr_id == wr_id)
 72			return i;
 73	}
 74	return link->wr_tx_cnt;
 75}
 76
 77static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
 78{
 79	struct smc_wr_tx_pend pnd_snd;
 80	struct smc_link *link;
 81	u32 pnd_snd_idx;
 
 82
 83	link = wc->qp->qp_context;
 84
 85	if (wc->opcode == IB_WC_REG_MR) {
 86		if (wc->status)
 87			link->wr_reg_state = FAILED;
 88		else
 89			link->wr_reg_state = CONFIRMED;
 90		smc_wr_wakeup_reg_wait(link);
 91		return;
 92	}
 93
 94	pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
 95	if (pnd_snd_idx == link->wr_tx_cnt) {
 96		if (link->lgr->smc_version != SMC_V2 ||
 97		    link->wr_tx_v2_pend->wr_id != wc->wr_id)
 98			return;
 99		link->wr_tx_v2_pend->wc_status = wc->status;
100		memcpy(&pnd_snd, link->wr_tx_v2_pend, sizeof(pnd_snd));
101		/* clear the full struct smc_wr_tx_pend including .priv */
102		memset(link->wr_tx_v2_pend, 0,
103		       sizeof(*link->wr_tx_v2_pend));
104		memset(link->lgr->wr_tx_buf_v2, 0,
105		       sizeof(*link->lgr->wr_tx_buf_v2));
106	} else {
107		link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
108		if (link->wr_tx_pends[pnd_snd_idx].compl_requested)
109			complete(&link->wr_tx_compl[pnd_snd_idx]);
110		memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx],
111		       sizeof(pnd_snd));
112		/* clear the full struct smc_wr_tx_pend including .priv */
113		memset(&link->wr_tx_pends[pnd_snd_idx], 0,
114		       sizeof(link->wr_tx_pends[pnd_snd_idx]));
115		memset(&link->wr_tx_bufs[pnd_snd_idx], 0,
116		       sizeof(link->wr_tx_bufs[pnd_snd_idx]));
117		if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
118			return;
119	}
120
121	if (wc->status) {
122		if (link->lgr->smc_version == SMC_V2) {
123			memset(link->wr_tx_v2_pend, 0,
124			       sizeof(*link->wr_tx_v2_pend));
125			memset(link->lgr->wr_tx_buf_v2, 0,
126			       sizeof(*link->lgr->wr_tx_buf_v2));
 
 
127		}
128		/* terminate link */
129		smcr_link_down_cond_sched(link);
130	}
131	if (pnd_snd.handler)
132		pnd_snd.handler(&pnd_snd.priv, link, wc->status);
133	wake_up(&link->wr_tx_wait);
134}
135
136static void smc_wr_tx_tasklet_fn(struct tasklet_struct *t)
137{
138	struct smc_ib_device *dev = from_tasklet(dev, t, send_tasklet);
139	struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
140	int i = 0, rc;
141	int polled = 0;
142
143again:
144	polled++;
145	do {
146		memset(&wc, 0, sizeof(wc));
147		rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
148		if (polled == 1) {
149			ib_req_notify_cq(dev->roce_cq_send,
150					 IB_CQ_NEXT_COMP |
151					 IB_CQ_REPORT_MISSED_EVENTS);
152		}
153		if (!rc)
154			break;
155		for (i = 0; i < rc; i++)
156			smc_wr_tx_process_cqe(&wc[i]);
157	} while (rc > 0);
158	if (polled == 1)
159		goto again;
160}
161
162void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
163{
164	struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
165
166	tasklet_schedule(&dev->send_tasklet);
167}
168
169/*---------------------------- request submission ---------------------------*/
170
171static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
172{
173	*idx = link->wr_tx_cnt;
174	if (!smc_link_sendable(link))
175		return -ENOLINK;
176	for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
177		if (!test_and_set_bit(*idx, link->wr_tx_mask))
178			return 0;
179	}
180	*idx = link->wr_tx_cnt;
181	return -EBUSY;
182}
183
184/**
185 * smc_wr_tx_get_free_slot() - returns buffer for message assembly,
186 *			and sets info for pending transmit tracking
187 * @link:		Pointer to smc_link used to later send the message.
188 * @handler:		Send completion handler function pointer.
189 * @wr_buf:		Out value returns pointer to message buffer.
190 * @wr_rdma_buf:	Out value returns pointer to rdma work request.
191 * @wr_pend_priv:	Out value returns pointer serving as handler context.
192 *
193 * Return: 0 on success, or -errno on error.
194 */
195int smc_wr_tx_get_free_slot(struct smc_link *link,
196			    smc_wr_tx_handler handler,
197			    struct smc_wr_buf **wr_buf,
198			    struct smc_rdma_wr **wr_rdma_buf,
199			    struct smc_wr_tx_pend_priv **wr_pend_priv)
200{
201	struct smc_link_group *lgr = smc_get_lgr(link);
202	struct smc_wr_tx_pend *wr_pend;
203	u32 idx = link->wr_tx_cnt;
204	struct ib_send_wr *wr_ib;
205	u64 wr_id;
206	int rc;
207
208	*wr_buf = NULL;
209	*wr_pend_priv = NULL;
210	if (in_softirq() || lgr->terminating) {
211		rc = smc_wr_tx_get_free_slot_index(link, &idx);
212		if (rc)
213			return rc;
214	} else {
215		rc = wait_event_interruptible_timeout(
216			link->wr_tx_wait,
217			!smc_link_sendable(link) ||
218			lgr->terminating ||
219			(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
220			SMC_WR_TX_WAIT_FREE_SLOT_TIME);
221		if (!rc) {
222			/* timeout - terminate link */
223			smcr_link_down_cond_sched(link);
224			return -EPIPE;
225		}
226		if (idx == link->wr_tx_cnt)
227			return -EPIPE;
228	}
229	wr_id = smc_wr_tx_get_next_wr_id(link);
230	wr_pend = &link->wr_tx_pends[idx];
231	wr_pend->wr_id = wr_id;
232	wr_pend->handler = handler;
233	wr_pend->link = link;
234	wr_pend->idx = idx;
235	wr_ib = &link->wr_tx_ibs[idx];
236	wr_ib->wr_id = wr_id;
237	*wr_buf = &link->wr_tx_bufs[idx];
238	if (wr_rdma_buf)
239		*wr_rdma_buf = &link->wr_tx_rdmas[idx];
240	*wr_pend_priv = &wr_pend->priv;
241	return 0;
242}
243
244int smc_wr_tx_get_v2_slot(struct smc_link *link,
245			  smc_wr_tx_handler handler,
246			  struct smc_wr_v2_buf **wr_buf,
247			  struct smc_wr_tx_pend_priv **wr_pend_priv)
248{
249	struct smc_wr_tx_pend *wr_pend;
250	struct ib_send_wr *wr_ib;
251	u64 wr_id;
252
253	if (link->wr_tx_v2_pend->idx == link->wr_tx_cnt)
254		return -EBUSY;
255
256	*wr_buf = NULL;
257	*wr_pend_priv = NULL;
258	wr_id = smc_wr_tx_get_next_wr_id(link);
259	wr_pend = link->wr_tx_v2_pend;
260	wr_pend->wr_id = wr_id;
261	wr_pend->handler = handler;
262	wr_pend->link = link;
263	wr_pend->idx = link->wr_tx_cnt;
264	wr_ib = link->wr_tx_v2_ib;
265	wr_ib->wr_id = wr_id;
266	*wr_buf = link->lgr->wr_tx_buf_v2;
267	*wr_pend_priv = &wr_pend->priv;
268	return 0;
269}
270
271int smc_wr_tx_put_slot(struct smc_link *link,
272		       struct smc_wr_tx_pend_priv *wr_pend_priv)
273{
274	struct smc_wr_tx_pend *pend;
275
276	pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv);
277	if (pend->idx < link->wr_tx_cnt) {
278		u32 idx = pend->idx;
279
280		/* clear the full struct smc_wr_tx_pend including .priv */
281		memset(&link->wr_tx_pends[idx], 0,
282		       sizeof(link->wr_tx_pends[idx]));
283		memset(&link->wr_tx_bufs[idx], 0,
284		       sizeof(link->wr_tx_bufs[idx]));
285		test_and_clear_bit(idx, link->wr_tx_mask);
286		wake_up(&link->wr_tx_wait);
287		return 1;
288	} else if (link->lgr->smc_version == SMC_V2 &&
289		   pend->idx == link->wr_tx_cnt) {
290		/* Large v2 buffer */
291		memset(&link->wr_tx_v2_pend, 0,
292		       sizeof(link->wr_tx_v2_pend));
293		memset(&link->lgr->wr_tx_buf_v2, 0,
294		       sizeof(link->lgr->wr_tx_buf_v2));
295		return 1;
296	}
297
298	return 0;
299}
300
301/* Send prepared WR slot via ib_post_send.
302 * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
303 */
304int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
305{
306	struct smc_wr_tx_pend *pend;
307	int rc;
308
309	ib_req_notify_cq(link->smcibdev->roce_cq_send,
310			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
311	pend = container_of(priv, struct smc_wr_tx_pend, priv);
312	rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
313	if (rc) {
314		smc_wr_tx_put_slot(link, priv);
315		smcr_link_down_cond_sched(link);
316	}
317	return rc;
318}
319
320int smc_wr_tx_v2_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
321		      int len)
322{
323	int rc;
324
325	link->wr_tx_v2_ib->sg_list[0].length = len;
326	ib_req_notify_cq(link->smcibdev->roce_cq_send,
327			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
328	rc = ib_post_send(link->roce_qp, link->wr_tx_v2_ib, NULL);
329	if (rc) {
330		smc_wr_tx_put_slot(link, priv);
331		smcr_link_down_cond_sched(link);
332	}
333	return rc;
334}
335
336/* Send prepared WR slot via ib_post_send and wait for send completion
337 * notification.
338 * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
339 */
340int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
341			unsigned long timeout)
342{
343	struct smc_wr_tx_pend *pend;
344	u32 pnd_idx;
345	int rc;
346
347	pend = container_of(priv, struct smc_wr_tx_pend, priv);
348	pend->compl_requested = 1;
349	pnd_idx = pend->idx;
350	init_completion(&link->wr_tx_compl[pnd_idx]);
351
352	rc = smc_wr_tx_send(link, priv);
353	if (rc)
354		return rc;
355	/* wait for completion by smc_wr_tx_process_cqe() */
356	rc = wait_for_completion_interruptible_timeout(
357					&link->wr_tx_compl[pnd_idx], timeout);
358	if (rc <= 0)
359		rc = -ENODATA;
360	if (rc > 0)
361		rc = 0;
362	return rc;
363}
364
365/* Register a memory region and wait for result. */
366int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
367{
368	int rc;
369
370	ib_req_notify_cq(link->smcibdev->roce_cq_send,
371			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
372	link->wr_reg_state = POSTED;
373	link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
374	link->wr_reg.mr = mr;
375	link->wr_reg.key = mr->rkey;
376	rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, NULL);
377	if (rc)
378		return rc;
379
380	atomic_inc(&link->wr_reg_refcnt);
381	rc = wait_event_interruptible_timeout(link->wr_reg_wait,
382					      (link->wr_reg_state != POSTED),
383					      SMC_WR_REG_MR_WAIT_TIME);
384	if (atomic_dec_and_test(&link->wr_reg_refcnt))
385		wake_up_all(&link->wr_reg_wait);
386	if (!rc) {
387		/* timeout - terminate link */
388		smcr_link_down_cond_sched(link);
389		return -EPIPE;
390	}
391	if (rc == -ERESTARTSYS)
392		return -EINTR;
393	switch (link->wr_reg_state) {
394	case CONFIRMED:
395		rc = 0;
396		break;
397	case FAILED:
398		rc = -EIO;
399		break;
400	case POSTED:
401		rc = -EPIPE;
402		break;
403	}
404	return rc;
405}
406
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407/****************************** receive queue ********************************/
408
409int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
410{
411	struct smc_wr_rx_handler *h_iter;
412	int rc = 0;
413
414	spin_lock(&smc_wr_rx_hash_lock);
415	hash_for_each_possible(smc_wr_rx_hash, h_iter, list, handler->type) {
416		if (h_iter->type == handler->type) {
417			rc = -EEXIST;
418			goto out_unlock;
419		}
420	}
421	hash_add(smc_wr_rx_hash, &handler->list, handler->type);
422out_unlock:
423	spin_unlock(&smc_wr_rx_hash_lock);
424	return rc;
425}
426
427/* Demultiplex a received work request based on the message type to its handler.
428 * Relies on smc_wr_rx_hash having been completely filled before any IB WRs,
429 * and not being modified any more afterwards so we don't need to lock it.
430 */
431static inline void smc_wr_rx_demultiplex(struct ib_wc *wc)
432{
433	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
434	struct smc_wr_rx_handler *handler;
435	struct smc_wr_rx_hdr *wr_rx;
436	u64 temp_wr_id;
437	u32 index;
438
439	if (wc->byte_len < sizeof(*wr_rx))
440		return; /* short message */
441	temp_wr_id = wc->wr_id;
442	index = do_div(temp_wr_id, link->wr_rx_cnt);
443	wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[index];
444	hash_for_each_possible(smc_wr_rx_hash, handler, list, wr_rx->type) {
445		if (handler->type == wr_rx->type)
446			handler->handler(wc, wr_rx);
447	}
448}
449
450static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
451{
452	struct smc_link *link;
453	int i;
454
455	for (i = 0; i < num; i++) {
456		link = wc[i].qp->qp_context;
457		link->wr_rx_id_compl = wc[i].wr_id;
458		if (wc[i].status == IB_WC_SUCCESS) {
459			link->wr_rx_tstamp = jiffies;
460			smc_wr_rx_demultiplex(&wc[i]);
461			smc_wr_rx_post(link); /* refill WR RX */
462		} else {
463			/* handle status errors */
464			switch (wc[i].status) {
465			case IB_WC_RETRY_EXC_ERR:
466			case IB_WC_RNR_RETRY_EXC_ERR:
467			case IB_WC_WR_FLUSH_ERR:
468				smcr_link_down_cond_sched(link);
469				if (link->wr_rx_id_compl == link->wr_rx_id)
470					wake_up(&link->wr_rx_empty_wait);
471				break;
472			default:
473				smc_wr_rx_post(link); /* refill WR RX */
474				break;
475			}
476		}
477	}
478}
479
480static void smc_wr_rx_tasklet_fn(struct tasklet_struct *t)
481{
482	struct smc_ib_device *dev = from_tasklet(dev, t, recv_tasklet);
483	struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
484	int polled = 0;
485	int rc;
486
487again:
488	polled++;
489	do {
490		memset(&wc, 0, sizeof(wc));
491		rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc);
492		if (polled == 1) {
493			ib_req_notify_cq(dev->roce_cq_recv,
494					 IB_CQ_SOLICITED_MASK
495					 | IB_CQ_REPORT_MISSED_EVENTS);
496		}
497		if (!rc)
498			break;
499		smc_wr_rx_process_cqes(&wc[0], rc);
500	} while (rc > 0);
501	if (polled == 1)
502		goto again;
503}
504
505void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
506{
507	struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
508
509	tasklet_schedule(&dev->recv_tasklet);
510}
511
512int smc_wr_rx_post_init(struct smc_link *link)
513{
514	u32 i;
515	int rc = 0;
516
517	for (i = 0; i < link->wr_rx_cnt; i++)
518		rc = smc_wr_rx_post(link);
519	return rc;
520}
521
522/***************************** init, exit, misc ******************************/
523
524void smc_wr_remember_qp_attr(struct smc_link *lnk)
525{
526	struct ib_qp_attr *attr = &lnk->qp_attr;
527	struct ib_qp_init_attr init_attr;
528
529	memset(attr, 0, sizeof(*attr));
530	memset(&init_attr, 0, sizeof(init_attr));
531	ib_query_qp(lnk->roce_qp, attr,
532		    IB_QP_STATE |
533		    IB_QP_CUR_STATE |
534		    IB_QP_PKEY_INDEX |
535		    IB_QP_PORT |
536		    IB_QP_QKEY |
537		    IB_QP_AV |
538		    IB_QP_PATH_MTU |
539		    IB_QP_TIMEOUT |
540		    IB_QP_RETRY_CNT |
541		    IB_QP_RNR_RETRY |
542		    IB_QP_RQ_PSN |
543		    IB_QP_ALT_PATH |
544		    IB_QP_MIN_RNR_TIMER |
545		    IB_QP_SQ_PSN |
546		    IB_QP_PATH_MIG_STATE |
547		    IB_QP_CAP |
548		    IB_QP_DEST_QPN,
549		    &init_attr);
550
551	lnk->wr_tx_cnt = min_t(size_t, SMC_WR_BUF_CNT,
552			       lnk->qp_attr.cap.max_send_wr);
553	lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT * 3,
554			       lnk->qp_attr.cap.max_recv_wr);
555}
556
557static void smc_wr_init_sge(struct smc_link *lnk)
558{
559	int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
560	bool send_inline = (lnk->qp_attr.cap.max_inline_data > SMC_WR_TX_SIZE);
561	u32 i;
562
563	for (i = 0; i < lnk->wr_tx_cnt; i++) {
564		lnk->wr_tx_sges[i].addr = send_inline ? (uintptr_t)(&lnk->wr_tx_bufs[i]) :
565			lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
566		lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
567		lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
568		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey =
569			lnk->roce_pd->local_dma_lkey;
570		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey =
571			lnk->roce_pd->local_dma_lkey;
572		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey =
573			lnk->roce_pd->local_dma_lkey;
574		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey =
575			lnk->roce_pd->local_dma_lkey;
576		lnk->wr_tx_ibs[i].next = NULL;
577		lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i];
578		lnk->wr_tx_ibs[i].num_sge = 1;
579		lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
580		lnk->wr_tx_ibs[i].send_flags =
581			IB_SEND_SIGNALED | IB_SEND_SOLICITED;
582		if (send_inline)
583			lnk->wr_tx_ibs[i].send_flags |= IB_SEND_INLINE;
584		lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE;
585		lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE;
586		lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list =
587			lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge;
588		lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list =
589			lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;
590	}
591
592	if (lnk->lgr->smc_version == SMC_V2) {
593		lnk->wr_tx_v2_sge->addr = lnk->wr_tx_v2_dma_addr;
594		lnk->wr_tx_v2_sge->length = SMC_WR_BUF_V2_SIZE;
595		lnk->wr_tx_v2_sge->lkey = lnk->roce_pd->local_dma_lkey;
596
597		lnk->wr_tx_v2_ib->next = NULL;
598		lnk->wr_tx_v2_ib->sg_list = lnk->wr_tx_v2_sge;
599		lnk->wr_tx_v2_ib->num_sge = 1;
600		lnk->wr_tx_v2_ib->opcode = IB_WR_SEND;
601		lnk->wr_tx_v2_ib->send_flags =
602			IB_SEND_SIGNALED | IB_SEND_SOLICITED;
603	}
604
605	/* With SMC-Rv2 there can be messages larger than SMC_WR_TX_SIZE.
606	 * Each ib_recv_wr gets 2 sges, the second one is a spillover buffer
607	 * and the same buffer for all sges. When a larger message arrived then
608	 * the content of the first small sge is copied to the beginning of
609	 * the larger spillover buffer, allowing easy data mapping.
610	 */
611	for (i = 0; i < lnk->wr_rx_cnt; i++) {
612		int x = i * sges_per_buf;
613
614		lnk->wr_rx_sges[x].addr =
615			lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE;
616		lnk->wr_rx_sges[x].length = SMC_WR_TX_SIZE;
617		lnk->wr_rx_sges[x].lkey = lnk->roce_pd->local_dma_lkey;
618		if (lnk->lgr->smc_version == SMC_V2) {
619			lnk->wr_rx_sges[x + 1].addr =
620					lnk->wr_rx_v2_dma_addr + SMC_WR_TX_SIZE;
621			lnk->wr_rx_sges[x + 1].length =
622					SMC_WR_BUF_V2_SIZE - SMC_WR_TX_SIZE;
623			lnk->wr_rx_sges[x + 1].lkey =
624					lnk->roce_pd->local_dma_lkey;
625		}
626		lnk->wr_rx_ibs[i].next = NULL;
627		lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[x];
628		lnk->wr_rx_ibs[i].num_sge = sges_per_buf;
629	}
630	lnk->wr_reg.wr.next = NULL;
631	lnk->wr_reg.wr.num_sge = 0;
632	lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED;
633	lnk->wr_reg.wr.opcode = IB_WR_REG_MR;
634	lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE;
635}
636
637void smc_wr_free_link(struct smc_link *lnk)
638{
639	struct ib_device *ibdev;
640
641	if (!lnk->smcibdev)
642		return;
643	ibdev = lnk->smcibdev->ibdev;
644
645	smc_wr_drain_cq(lnk);
646	smc_wr_wakeup_reg_wait(lnk);
647	smc_wr_wakeup_tx_wait(lnk);
648
649	smc_wr_tx_wait_no_pending_sends(lnk);
 
 
 
650	wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
651	wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
652
653	if (lnk->wr_rx_dma_addr) {
654		ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
655				    SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
656				    DMA_FROM_DEVICE);
657		lnk->wr_rx_dma_addr = 0;
658	}
659	if (lnk->wr_rx_v2_dma_addr) {
660		ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr,
661				    SMC_WR_BUF_V2_SIZE,
662				    DMA_FROM_DEVICE);
663		lnk->wr_rx_v2_dma_addr = 0;
664	}
665	if (lnk->wr_tx_dma_addr) {
666		ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr,
667				    SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
668				    DMA_TO_DEVICE);
669		lnk->wr_tx_dma_addr = 0;
670	}
671	if (lnk->wr_tx_v2_dma_addr) {
672		ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr,
673				    SMC_WR_BUF_V2_SIZE,
674				    DMA_TO_DEVICE);
675		lnk->wr_tx_v2_dma_addr = 0;
676	}
677}
678
679void smc_wr_free_lgr_mem(struct smc_link_group *lgr)
680{
681	if (lgr->smc_version < SMC_V2)
682		return;
683
684	kfree(lgr->wr_rx_buf_v2);
685	lgr->wr_rx_buf_v2 = NULL;
686	kfree(lgr->wr_tx_buf_v2);
687	lgr->wr_tx_buf_v2 = NULL;
688}
689
690void smc_wr_free_link_mem(struct smc_link *lnk)
691{
692	kfree(lnk->wr_tx_v2_ib);
693	lnk->wr_tx_v2_ib = NULL;
694	kfree(lnk->wr_tx_v2_sge);
695	lnk->wr_tx_v2_sge = NULL;
696	kfree(lnk->wr_tx_v2_pend);
697	lnk->wr_tx_v2_pend = NULL;
698	kfree(lnk->wr_tx_compl);
699	lnk->wr_tx_compl = NULL;
700	kfree(lnk->wr_tx_pends);
701	lnk->wr_tx_pends = NULL;
702	bitmap_free(lnk->wr_tx_mask);
703	lnk->wr_tx_mask = NULL;
704	kfree(lnk->wr_tx_sges);
705	lnk->wr_tx_sges = NULL;
706	kfree(lnk->wr_tx_rdma_sges);
707	lnk->wr_tx_rdma_sges = NULL;
708	kfree(lnk->wr_rx_sges);
709	lnk->wr_rx_sges = NULL;
710	kfree(lnk->wr_tx_rdmas);
711	lnk->wr_tx_rdmas = NULL;
712	kfree(lnk->wr_rx_ibs);
713	lnk->wr_rx_ibs = NULL;
714	kfree(lnk->wr_tx_ibs);
715	lnk->wr_tx_ibs = NULL;
716	kfree(lnk->wr_tx_bufs);
717	lnk->wr_tx_bufs = NULL;
718	kfree(lnk->wr_rx_bufs);
719	lnk->wr_rx_bufs = NULL;
720}
721
722int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr)
723{
724	if (lgr->smc_version < SMC_V2)
725		return 0;
726
727	lgr->wr_rx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL);
728	if (!lgr->wr_rx_buf_v2)
729		return -ENOMEM;
730	lgr->wr_tx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL);
731	if (!lgr->wr_tx_buf_v2) {
732		kfree(lgr->wr_rx_buf_v2);
733		return -ENOMEM;
734	}
735	return 0;
736}
737
738int smc_wr_alloc_link_mem(struct smc_link *link)
739{
740	int sges_per_buf = link->lgr->smc_version == SMC_V2 ? 2 : 1;
741
742	/* allocate link related memory */
743	link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL);
744	if (!link->wr_tx_bufs)
745		goto no_mem;
746	link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, SMC_WR_BUF_SIZE,
747				   GFP_KERNEL);
748	if (!link->wr_rx_bufs)
749		goto no_mem_wr_tx_bufs;
750	link->wr_tx_ibs = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_ibs[0]),
751				  GFP_KERNEL);
752	if (!link->wr_tx_ibs)
753		goto no_mem_wr_rx_bufs;
754	link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT * 3,
755				  sizeof(link->wr_rx_ibs[0]),
756				  GFP_KERNEL);
757	if (!link->wr_rx_ibs)
758		goto no_mem_wr_tx_ibs;
759	link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT,
760				    sizeof(link->wr_tx_rdmas[0]),
761				    GFP_KERNEL);
762	if (!link->wr_tx_rdmas)
763		goto no_mem_wr_rx_ibs;
764	link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT,
765					sizeof(link->wr_tx_rdma_sges[0]),
766					GFP_KERNEL);
767	if (!link->wr_tx_rdma_sges)
768		goto no_mem_wr_tx_rdmas;
769	link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),
770				   GFP_KERNEL);
771	if (!link->wr_tx_sges)
772		goto no_mem_wr_tx_rdma_sges;
773	link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
774				   sizeof(link->wr_rx_sges[0]) * sges_per_buf,
775				   GFP_KERNEL);
776	if (!link->wr_rx_sges)
777		goto no_mem_wr_tx_sges;
778	link->wr_tx_mask = bitmap_zalloc(SMC_WR_BUF_CNT, GFP_KERNEL);
 
 
779	if (!link->wr_tx_mask)
780		goto no_mem_wr_rx_sges;
781	link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT,
782				    sizeof(link->wr_tx_pends[0]),
783				    GFP_KERNEL);
784	if (!link->wr_tx_pends)
785		goto no_mem_wr_tx_mask;
786	link->wr_tx_compl = kcalloc(SMC_WR_BUF_CNT,
787				    sizeof(link->wr_tx_compl[0]),
788				    GFP_KERNEL);
789	if (!link->wr_tx_compl)
790		goto no_mem_wr_tx_pends;
791
792	if (link->lgr->smc_version == SMC_V2) {
793		link->wr_tx_v2_ib = kzalloc(sizeof(*link->wr_tx_v2_ib),
794					    GFP_KERNEL);
795		if (!link->wr_tx_v2_ib)
796			goto no_mem_tx_compl;
797		link->wr_tx_v2_sge = kzalloc(sizeof(*link->wr_tx_v2_sge),
798					     GFP_KERNEL);
799		if (!link->wr_tx_v2_sge)
800			goto no_mem_v2_ib;
801		link->wr_tx_v2_pend = kzalloc(sizeof(*link->wr_tx_v2_pend),
802					      GFP_KERNEL);
803		if (!link->wr_tx_v2_pend)
804			goto no_mem_v2_sge;
805	}
806	return 0;
807
808no_mem_v2_sge:
809	kfree(link->wr_tx_v2_sge);
810no_mem_v2_ib:
811	kfree(link->wr_tx_v2_ib);
812no_mem_tx_compl:
813	kfree(link->wr_tx_compl);
814no_mem_wr_tx_pends:
815	kfree(link->wr_tx_pends);
816no_mem_wr_tx_mask:
817	kfree(link->wr_tx_mask);
818no_mem_wr_rx_sges:
819	kfree(link->wr_rx_sges);
820no_mem_wr_tx_sges:
821	kfree(link->wr_tx_sges);
822no_mem_wr_tx_rdma_sges:
823	kfree(link->wr_tx_rdma_sges);
824no_mem_wr_tx_rdmas:
825	kfree(link->wr_tx_rdmas);
826no_mem_wr_rx_ibs:
827	kfree(link->wr_rx_ibs);
828no_mem_wr_tx_ibs:
829	kfree(link->wr_tx_ibs);
830no_mem_wr_rx_bufs:
831	kfree(link->wr_rx_bufs);
832no_mem_wr_tx_bufs:
833	kfree(link->wr_tx_bufs);
834no_mem:
835	return -ENOMEM;
836}
837
838void smc_wr_remove_dev(struct smc_ib_device *smcibdev)
839{
840	tasklet_kill(&smcibdev->recv_tasklet);
841	tasklet_kill(&smcibdev->send_tasklet);
842}
843
844void smc_wr_add_dev(struct smc_ib_device *smcibdev)
845{
846	tasklet_setup(&smcibdev->recv_tasklet, smc_wr_rx_tasklet_fn);
847	tasklet_setup(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn);
848}
849
850int smc_wr_create_link(struct smc_link *lnk)
851{
852	struct ib_device *ibdev = lnk->smcibdev->ibdev;
853	int rc = 0;
854
855	smc_wr_tx_set_wr_id(&lnk->wr_tx_id, 0);
856	lnk->wr_rx_id = 0;
857	lnk->wr_rx_dma_addr = ib_dma_map_single(
858		ibdev, lnk->wr_rx_bufs,	SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
859		DMA_FROM_DEVICE);
860	if (ib_dma_mapping_error(ibdev, lnk->wr_rx_dma_addr)) {
861		lnk->wr_rx_dma_addr = 0;
862		rc = -EIO;
863		goto out;
864	}
865	if (lnk->lgr->smc_version == SMC_V2) {
866		lnk->wr_rx_v2_dma_addr = ib_dma_map_single(ibdev,
867			lnk->lgr->wr_rx_buf_v2, SMC_WR_BUF_V2_SIZE,
868			DMA_FROM_DEVICE);
869		if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) {
870			lnk->wr_rx_v2_dma_addr = 0;
871			rc = -EIO;
872			goto dma_unmap;
873		}
874		lnk->wr_tx_v2_dma_addr = ib_dma_map_single(ibdev,
875			lnk->lgr->wr_tx_buf_v2, SMC_WR_BUF_V2_SIZE,
876			DMA_TO_DEVICE);
877		if (ib_dma_mapping_error(ibdev, lnk->wr_tx_v2_dma_addr)) {
878			lnk->wr_tx_v2_dma_addr = 0;
879			rc = -EIO;
880			goto dma_unmap;
881		}
882	}
883	lnk->wr_tx_dma_addr = ib_dma_map_single(
884		ibdev, lnk->wr_tx_bufs,	SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
885		DMA_TO_DEVICE);
886	if (ib_dma_mapping_error(ibdev, lnk->wr_tx_dma_addr)) {
887		rc = -EIO;
888		goto dma_unmap;
889	}
890	smc_wr_init_sge(lnk);
891	bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT);
 
892	init_waitqueue_head(&lnk->wr_tx_wait);
893	atomic_set(&lnk->wr_tx_refcnt, 0);
894	init_waitqueue_head(&lnk->wr_reg_wait);
895	atomic_set(&lnk->wr_reg_refcnt, 0);
896	init_waitqueue_head(&lnk->wr_rx_empty_wait);
897	return rc;
898
899dma_unmap:
900	if (lnk->wr_rx_v2_dma_addr) {
901		ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr,
902				    SMC_WR_BUF_V2_SIZE,
903				    DMA_FROM_DEVICE);
904		lnk->wr_rx_v2_dma_addr = 0;
905	}
906	if (lnk->wr_tx_v2_dma_addr) {
907		ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr,
908				    SMC_WR_BUF_V2_SIZE,
909				    DMA_TO_DEVICE);
910		lnk->wr_tx_v2_dma_addr = 0;
911	}
912	ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
913			    SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
914			    DMA_FROM_DEVICE);
915	lnk->wr_rx_dma_addr = 0;
916out:
917	return rc;
918}
v5.14.15
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
  4 *
  5 * Work Requests exploiting Infiniband API
  6 *
  7 * Work requests (WR) of type ib_post_send or ib_post_recv respectively
  8 * are submitted to either RC SQ or RC RQ respectively
  9 * (reliably connected send/receive queue)
 10 * and become work queue entries (WQEs).
 11 * While an SQ WR/WQE is pending, we track it until transmission completion.
 12 * Through a send or receive completion queue (CQ) respectively,
 13 * we get completion queue entries (CQEs) [aka work completions (WCs)].
 14 * Since the CQ callback is called from IRQ context, we split work by using
 15 * bottom halves implemented by tasklets.
 16 *
 17 * SMC uses this to exchange LLC (link layer control)
 18 * and CDC (connection data control) messages.
 19 *
 20 * Copyright IBM Corp. 2016
 21 *
 22 * Author(s):  Steffen Maier <maier@linux.vnet.ibm.com>
 23 */
 24
 25#include <linux/atomic.h>
 26#include <linux/hashtable.h>
 27#include <linux/wait.h>
 28#include <rdma/ib_verbs.h>
 29#include <asm/div64.h>
 30
 31#include "smc.h"
 32#include "smc_wr.h"
 33
 34#define SMC_WR_MAX_POLL_CQE 10	/* max. # of compl. queue elements in 1 poll */
 35
 36#define SMC_WR_RX_HASH_BITS 4
 37static DEFINE_HASHTABLE(smc_wr_rx_hash, SMC_WR_RX_HASH_BITS);
 38static DEFINE_SPINLOCK(smc_wr_rx_hash_lock);
 39
 40struct smc_wr_tx_pend {	/* control data for a pending send request */
 41	u64			wr_id;		/* work request id sent */
 42	smc_wr_tx_handler	handler;
 43	enum ib_wc_status	wc_status;	/* CQE status */
 44	struct smc_link		*link;
 45	u32			idx;
 46	struct smc_wr_tx_pend_priv priv;
 47	u8			compl_requested;
 48};
 49
 50/******************************** send queue *********************************/
 51
 52/*------------------------------- completion --------------------------------*/
 53
 54/* returns true if at least one tx work request is pending on the given link */
 55static inline bool smc_wr_is_tx_pend(struct smc_link *link)
 56{
 57	if (find_first_bit(link->wr_tx_mask, link->wr_tx_cnt) !=
 58							link->wr_tx_cnt) {
 59		return true;
 60	}
 61	return false;
 62}
 63
 64/* wait till all pending tx work requests on the given link are completed */
 65int smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
 66{
 67	if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link),
 68			       SMC_WR_TX_WAIT_PENDING_TIME))
 69		return 0;
 70	else /* timeout */
 71		return -EPIPE;
 72}
 73
 74static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id)
 75{
 76	u32 i;
 77
 78	for (i = 0; i < link->wr_tx_cnt; i++) {
 79		if (link->wr_tx_pends[i].wr_id == wr_id)
 80			return i;
 81	}
 82	return link->wr_tx_cnt;
 83}
 84
 85static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
 86{
 87	struct smc_wr_tx_pend pnd_snd;
 88	struct smc_link *link;
 89	u32 pnd_snd_idx;
 90	int i;
 91
 92	link = wc->qp->qp_context;
 93
 94	if (wc->opcode == IB_WC_REG_MR) {
 95		if (wc->status)
 96			link->wr_reg_state = FAILED;
 97		else
 98			link->wr_reg_state = CONFIRMED;
 99		smc_wr_wakeup_reg_wait(link);
100		return;
101	}
102
103	pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
104	if (pnd_snd_idx == link->wr_tx_cnt)
105		return;
106	link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
107	if (link->wr_tx_pends[pnd_snd_idx].compl_requested)
108		complete(&link->wr_tx_compl[pnd_snd_idx]);
109	memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd));
110	/* clear the full struct smc_wr_tx_pend including .priv */
111	memset(&link->wr_tx_pends[pnd_snd_idx], 0,
112	       sizeof(link->wr_tx_pends[pnd_snd_idx]));
113	memset(&link->wr_tx_bufs[pnd_snd_idx], 0,
114	       sizeof(link->wr_tx_bufs[pnd_snd_idx]));
115	if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
116		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
117	if (wc->status) {
118		for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
119			/* clear full struct smc_wr_tx_pend including .priv */
120			memset(&link->wr_tx_pends[i], 0,
121			       sizeof(link->wr_tx_pends[i]));
122			memset(&link->wr_tx_bufs[i], 0,
123			       sizeof(link->wr_tx_bufs[i]));
124			clear_bit(i, link->wr_tx_mask);
125		}
126		/* terminate link */
127		smcr_link_down_cond_sched(link);
128	}
129	if (pnd_snd.handler)
130		pnd_snd.handler(&pnd_snd.priv, link, wc->status);
131	wake_up(&link->wr_tx_wait);
132}
133
134static void smc_wr_tx_tasklet_fn(struct tasklet_struct *t)
135{
136	struct smc_ib_device *dev = from_tasklet(dev, t, send_tasklet);
137	struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
138	int i = 0, rc;
139	int polled = 0;
140
141again:
142	polled++;
143	do {
144		memset(&wc, 0, sizeof(wc));
145		rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
146		if (polled == 1) {
147			ib_req_notify_cq(dev->roce_cq_send,
148					 IB_CQ_NEXT_COMP |
149					 IB_CQ_REPORT_MISSED_EVENTS);
150		}
151		if (!rc)
152			break;
153		for (i = 0; i < rc; i++)
154			smc_wr_tx_process_cqe(&wc[i]);
155	} while (rc > 0);
156	if (polled == 1)
157		goto again;
158}
159
160void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
161{
162	struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
163
164	tasklet_schedule(&dev->send_tasklet);
165}
166
167/*---------------------------- request submission ---------------------------*/
168
169static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
170{
171	*idx = link->wr_tx_cnt;
172	if (!smc_link_usable(link))
173		return -ENOLINK;
174	for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
175		if (!test_and_set_bit(*idx, link->wr_tx_mask))
176			return 0;
177	}
178	*idx = link->wr_tx_cnt;
179	return -EBUSY;
180}
181
182/**
183 * smc_wr_tx_get_free_slot() - returns buffer for message assembly,
184 *			and sets info for pending transmit tracking
185 * @link:		Pointer to smc_link used to later send the message.
186 * @handler:		Send completion handler function pointer.
187 * @wr_buf:		Out value returns pointer to message buffer.
188 * @wr_rdma_buf:	Out value returns pointer to rdma work request.
189 * @wr_pend_priv:	Out value returns pointer serving as handler context.
190 *
191 * Return: 0 on success, or -errno on error.
192 */
193int smc_wr_tx_get_free_slot(struct smc_link *link,
194			    smc_wr_tx_handler handler,
195			    struct smc_wr_buf **wr_buf,
196			    struct smc_rdma_wr **wr_rdma_buf,
197			    struct smc_wr_tx_pend_priv **wr_pend_priv)
198{
199	struct smc_link_group *lgr = smc_get_lgr(link);
200	struct smc_wr_tx_pend *wr_pend;
201	u32 idx = link->wr_tx_cnt;
202	struct ib_send_wr *wr_ib;
203	u64 wr_id;
204	int rc;
205
206	*wr_buf = NULL;
207	*wr_pend_priv = NULL;
208	if (in_softirq() || lgr->terminating) {
209		rc = smc_wr_tx_get_free_slot_index(link, &idx);
210		if (rc)
211			return rc;
212	} else {
213		rc = wait_event_interruptible_timeout(
214			link->wr_tx_wait,
215			!smc_link_usable(link) ||
216			lgr->terminating ||
217			(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
218			SMC_WR_TX_WAIT_FREE_SLOT_TIME);
219		if (!rc) {
220			/* timeout - terminate link */
221			smcr_link_down_cond_sched(link);
222			return -EPIPE;
223		}
224		if (idx == link->wr_tx_cnt)
225			return -EPIPE;
226	}
227	wr_id = smc_wr_tx_get_next_wr_id(link);
228	wr_pend = &link->wr_tx_pends[idx];
229	wr_pend->wr_id = wr_id;
230	wr_pend->handler = handler;
231	wr_pend->link = link;
232	wr_pend->idx = idx;
233	wr_ib = &link->wr_tx_ibs[idx];
234	wr_ib->wr_id = wr_id;
235	*wr_buf = &link->wr_tx_bufs[idx];
236	if (wr_rdma_buf)
237		*wr_rdma_buf = &link->wr_tx_rdmas[idx];
238	*wr_pend_priv = &wr_pend->priv;
239	return 0;
240}
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242int smc_wr_tx_put_slot(struct smc_link *link,
243		       struct smc_wr_tx_pend_priv *wr_pend_priv)
244{
245	struct smc_wr_tx_pend *pend;
246
247	pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv);
248	if (pend->idx < link->wr_tx_cnt) {
249		u32 idx = pend->idx;
250
251		/* clear the full struct smc_wr_tx_pend including .priv */
252		memset(&link->wr_tx_pends[idx], 0,
253		       sizeof(link->wr_tx_pends[idx]));
254		memset(&link->wr_tx_bufs[idx], 0,
255		       sizeof(link->wr_tx_bufs[idx]));
256		test_and_clear_bit(idx, link->wr_tx_mask);
257		wake_up(&link->wr_tx_wait);
258		return 1;
 
 
 
 
 
 
 
 
259	}
260
261	return 0;
262}
263
264/* Send prepared WR slot via ib_post_send.
265 * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
266 */
267int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
268{
269	struct smc_wr_tx_pend *pend;
270	int rc;
271
272	ib_req_notify_cq(link->smcibdev->roce_cq_send,
273			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
274	pend = container_of(priv, struct smc_wr_tx_pend, priv);
275	rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
276	if (rc) {
277		smc_wr_tx_put_slot(link, priv);
278		smcr_link_down_cond_sched(link);
279	}
280	return rc;
281}
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283/* Send prepared WR slot via ib_post_send and wait for send completion
284 * notification.
285 * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
286 */
287int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
288			unsigned long timeout)
289{
290	struct smc_wr_tx_pend *pend;
 
291	int rc;
292
293	pend = container_of(priv, struct smc_wr_tx_pend, priv);
294	pend->compl_requested = 1;
295	init_completion(&link->wr_tx_compl[pend->idx]);
 
296
297	rc = smc_wr_tx_send(link, priv);
298	if (rc)
299		return rc;
300	/* wait for completion by smc_wr_tx_process_cqe() */
301	rc = wait_for_completion_interruptible_timeout(
302					&link->wr_tx_compl[pend->idx], timeout);
303	if (rc <= 0)
304		rc = -ENODATA;
305	if (rc > 0)
306		rc = 0;
307	return rc;
308}
309
310/* Register a memory region and wait for result. */
311int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
312{
313	int rc;
314
315	ib_req_notify_cq(link->smcibdev->roce_cq_send,
316			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
317	link->wr_reg_state = POSTED;
318	link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
319	link->wr_reg.mr = mr;
320	link->wr_reg.key = mr->rkey;
321	rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, NULL);
322	if (rc)
323		return rc;
324
325	atomic_inc(&link->wr_reg_refcnt);
326	rc = wait_event_interruptible_timeout(link->wr_reg_wait,
327					      (link->wr_reg_state != POSTED),
328					      SMC_WR_REG_MR_WAIT_TIME);
329	if (atomic_dec_and_test(&link->wr_reg_refcnt))
330		wake_up_all(&link->wr_reg_wait);
331	if (!rc) {
332		/* timeout - terminate link */
333		smcr_link_down_cond_sched(link);
334		return -EPIPE;
335	}
336	if (rc == -ERESTARTSYS)
337		return -EINTR;
338	switch (link->wr_reg_state) {
339	case CONFIRMED:
340		rc = 0;
341		break;
342	case FAILED:
343		rc = -EIO;
344		break;
345	case POSTED:
346		rc = -EPIPE;
347		break;
348	}
349	return rc;
350}
351
352void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type,
353			     smc_wr_tx_filter filter,
354			     smc_wr_tx_dismisser dismisser,
355			     unsigned long data)
356{
357	struct smc_wr_tx_pend_priv *tx_pend;
358	struct smc_wr_rx_hdr *wr_tx;
359	int i;
360
361	for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
362		wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i];
363		if (wr_tx->type != wr_tx_hdr_type)
364			continue;
365		tx_pend = &link->wr_tx_pends[i].priv;
366		if (filter(tx_pend, data))
367			dismisser(tx_pend);
368	}
369}
370
371/****************************** receive queue ********************************/
372
373int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
374{
375	struct smc_wr_rx_handler *h_iter;
376	int rc = 0;
377
378	spin_lock(&smc_wr_rx_hash_lock);
379	hash_for_each_possible(smc_wr_rx_hash, h_iter, list, handler->type) {
380		if (h_iter->type == handler->type) {
381			rc = -EEXIST;
382			goto out_unlock;
383		}
384	}
385	hash_add(smc_wr_rx_hash, &handler->list, handler->type);
386out_unlock:
387	spin_unlock(&smc_wr_rx_hash_lock);
388	return rc;
389}
390
391/* Demultiplex a received work request based on the message type to its handler.
392 * Relies on smc_wr_rx_hash having been completely filled before any IB WRs,
393 * and not being modified any more afterwards so we don't need to lock it.
394 */
395static inline void smc_wr_rx_demultiplex(struct ib_wc *wc)
396{
397	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
398	struct smc_wr_rx_handler *handler;
399	struct smc_wr_rx_hdr *wr_rx;
400	u64 temp_wr_id;
401	u32 index;
402
403	if (wc->byte_len < sizeof(*wr_rx))
404		return; /* short message */
405	temp_wr_id = wc->wr_id;
406	index = do_div(temp_wr_id, link->wr_rx_cnt);
407	wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[index];
408	hash_for_each_possible(smc_wr_rx_hash, handler, list, wr_rx->type) {
409		if (handler->type == wr_rx->type)
410			handler->handler(wc, wr_rx);
411	}
412}
413
414static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
415{
416	struct smc_link *link;
417	int i;
418
419	for (i = 0; i < num; i++) {
420		link = wc[i].qp->qp_context;
 
421		if (wc[i].status == IB_WC_SUCCESS) {
422			link->wr_rx_tstamp = jiffies;
423			smc_wr_rx_demultiplex(&wc[i]);
424			smc_wr_rx_post(link); /* refill WR RX */
425		} else {
426			/* handle status errors */
427			switch (wc[i].status) {
428			case IB_WC_RETRY_EXC_ERR:
429			case IB_WC_RNR_RETRY_EXC_ERR:
430			case IB_WC_WR_FLUSH_ERR:
431				smcr_link_down_cond_sched(link);
 
 
432				break;
433			default:
434				smc_wr_rx_post(link); /* refill WR RX */
435				break;
436			}
437		}
438	}
439}
440
441static void smc_wr_rx_tasklet_fn(struct tasklet_struct *t)
442{
443	struct smc_ib_device *dev = from_tasklet(dev, t, recv_tasklet);
444	struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
445	int polled = 0;
446	int rc;
447
448again:
449	polled++;
450	do {
451		memset(&wc, 0, sizeof(wc));
452		rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc);
453		if (polled == 1) {
454			ib_req_notify_cq(dev->roce_cq_recv,
455					 IB_CQ_SOLICITED_MASK
456					 | IB_CQ_REPORT_MISSED_EVENTS);
457		}
458		if (!rc)
459			break;
460		smc_wr_rx_process_cqes(&wc[0], rc);
461	} while (rc > 0);
462	if (polled == 1)
463		goto again;
464}
465
466void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
467{
468	struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
469
470	tasklet_schedule(&dev->recv_tasklet);
471}
472
473int smc_wr_rx_post_init(struct smc_link *link)
474{
475	u32 i;
476	int rc = 0;
477
478	for (i = 0; i < link->wr_rx_cnt; i++)
479		rc = smc_wr_rx_post(link);
480	return rc;
481}
482
483/***************************** init, exit, misc ******************************/
484
485void smc_wr_remember_qp_attr(struct smc_link *lnk)
486{
487	struct ib_qp_attr *attr = &lnk->qp_attr;
488	struct ib_qp_init_attr init_attr;
489
490	memset(attr, 0, sizeof(*attr));
491	memset(&init_attr, 0, sizeof(init_attr));
492	ib_query_qp(lnk->roce_qp, attr,
493		    IB_QP_STATE |
494		    IB_QP_CUR_STATE |
495		    IB_QP_PKEY_INDEX |
496		    IB_QP_PORT |
497		    IB_QP_QKEY |
498		    IB_QP_AV |
499		    IB_QP_PATH_MTU |
500		    IB_QP_TIMEOUT |
501		    IB_QP_RETRY_CNT |
502		    IB_QP_RNR_RETRY |
503		    IB_QP_RQ_PSN |
504		    IB_QP_ALT_PATH |
505		    IB_QP_MIN_RNR_TIMER |
506		    IB_QP_SQ_PSN |
507		    IB_QP_PATH_MIG_STATE |
508		    IB_QP_CAP |
509		    IB_QP_DEST_QPN,
510		    &init_attr);
511
512	lnk->wr_tx_cnt = min_t(size_t, SMC_WR_BUF_CNT,
513			       lnk->qp_attr.cap.max_send_wr);
514	lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT * 3,
515			       lnk->qp_attr.cap.max_recv_wr);
516}
517
518static void smc_wr_init_sge(struct smc_link *lnk)
519{
 
 
520	u32 i;
521
522	for (i = 0; i < lnk->wr_tx_cnt; i++) {
523		lnk->wr_tx_sges[i].addr =
524			lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
525		lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
526		lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
527		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey =
528			lnk->roce_pd->local_dma_lkey;
529		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey =
530			lnk->roce_pd->local_dma_lkey;
531		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey =
532			lnk->roce_pd->local_dma_lkey;
533		lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey =
534			lnk->roce_pd->local_dma_lkey;
535		lnk->wr_tx_ibs[i].next = NULL;
536		lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i];
537		lnk->wr_tx_ibs[i].num_sge = 1;
538		lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
539		lnk->wr_tx_ibs[i].send_flags =
540			IB_SEND_SIGNALED | IB_SEND_SOLICITED;
 
 
541		lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE;
542		lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE;
543		lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list =
544			lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge;
545		lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list =
546			lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;
547	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548	for (i = 0; i < lnk->wr_rx_cnt; i++) {
549		lnk->wr_rx_sges[i].addr =
 
 
550			lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE;
551		lnk->wr_rx_sges[i].length = SMC_WR_BUF_SIZE;
552		lnk->wr_rx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
 
 
 
 
 
 
 
 
553		lnk->wr_rx_ibs[i].next = NULL;
554		lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i];
555		lnk->wr_rx_ibs[i].num_sge = 1;
556	}
557	lnk->wr_reg.wr.next = NULL;
558	lnk->wr_reg.wr.num_sge = 0;
559	lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED;
560	lnk->wr_reg.wr.opcode = IB_WR_REG_MR;
561	lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE;
562}
563
564void smc_wr_free_link(struct smc_link *lnk)
565{
566	struct ib_device *ibdev;
567
568	if (!lnk->smcibdev)
569		return;
570	ibdev = lnk->smcibdev->ibdev;
571
 
572	smc_wr_wakeup_reg_wait(lnk);
573	smc_wr_wakeup_tx_wait(lnk);
574
575	if (smc_wr_tx_wait_no_pending_sends(lnk))
576		memset(lnk->wr_tx_mask, 0,
577		       BITS_TO_LONGS(SMC_WR_BUF_CNT) *
578						sizeof(*lnk->wr_tx_mask));
579	wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
580	wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
581
582	if (lnk->wr_rx_dma_addr) {
583		ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
584				    SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
585				    DMA_FROM_DEVICE);
586		lnk->wr_rx_dma_addr = 0;
587	}
 
 
 
 
 
 
588	if (lnk->wr_tx_dma_addr) {
589		ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr,
590				    SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
591				    DMA_TO_DEVICE);
592		lnk->wr_tx_dma_addr = 0;
593	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594}
595
596void smc_wr_free_link_mem(struct smc_link *lnk)
597{
 
 
 
 
 
 
598	kfree(lnk->wr_tx_compl);
599	lnk->wr_tx_compl = NULL;
600	kfree(lnk->wr_tx_pends);
601	lnk->wr_tx_pends = NULL;
602	kfree(lnk->wr_tx_mask);
603	lnk->wr_tx_mask = NULL;
604	kfree(lnk->wr_tx_sges);
605	lnk->wr_tx_sges = NULL;
606	kfree(lnk->wr_tx_rdma_sges);
607	lnk->wr_tx_rdma_sges = NULL;
608	kfree(lnk->wr_rx_sges);
609	lnk->wr_rx_sges = NULL;
610	kfree(lnk->wr_tx_rdmas);
611	lnk->wr_tx_rdmas = NULL;
612	kfree(lnk->wr_rx_ibs);
613	lnk->wr_rx_ibs = NULL;
614	kfree(lnk->wr_tx_ibs);
615	lnk->wr_tx_ibs = NULL;
616	kfree(lnk->wr_tx_bufs);
617	lnk->wr_tx_bufs = NULL;
618	kfree(lnk->wr_rx_bufs);
619	lnk->wr_rx_bufs = NULL;
620}
621
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622int smc_wr_alloc_link_mem(struct smc_link *link)
623{
 
 
624	/* allocate link related memory */
625	link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL);
626	if (!link->wr_tx_bufs)
627		goto no_mem;
628	link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, SMC_WR_BUF_SIZE,
629				   GFP_KERNEL);
630	if (!link->wr_rx_bufs)
631		goto no_mem_wr_tx_bufs;
632	link->wr_tx_ibs = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_ibs[0]),
633				  GFP_KERNEL);
634	if (!link->wr_tx_ibs)
635		goto no_mem_wr_rx_bufs;
636	link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT * 3,
637				  sizeof(link->wr_rx_ibs[0]),
638				  GFP_KERNEL);
639	if (!link->wr_rx_ibs)
640		goto no_mem_wr_tx_ibs;
641	link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT,
642				    sizeof(link->wr_tx_rdmas[0]),
643				    GFP_KERNEL);
644	if (!link->wr_tx_rdmas)
645		goto no_mem_wr_rx_ibs;
646	link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT,
647					sizeof(link->wr_tx_rdma_sges[0]),
648					GFP_KERNEL);
649	if (!link->wr_tx_rdma_sges)
650		goto no_mem_wr_tx_rdmas;
651	link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),
652				   GFP_KERNEL);
653	if (!link->wr_tx_sges)
654		goto no_mem_wr_tx_rdma_sges;
655	link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
656				   sizeof(link->wr_rx_sges[0]),
657				   GFP_KERNEL);
658	if (!link->wr_rx_sges)
659		goto no_mem_wr_tx_sges;
660	link->wr_tx_mask = kcalloc(BITS_TO_LONGS(SMC_WR_BUF_CNT),
661				   sizeof(*link->wr_tx_mask),
662				   GFP_KERNEL);
663	if (!link->wr_tx_mask)
664		goto no_mem_wr_rx_sges;
665	link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT,
666				    sizeof(link->wr_tx_pends[0]),
667				    GFP_KERNEL);
668	if (!link->wr_tx_pends)
669		goto no_mem_wr_tx_mask;
670	link->wr_tx_compl = kcalloc(SMC_WR_BUF_CNT,
671				    sizeof(link->wr_tx_compl[0]),
672				    GFP_KERNEL);
673	if (!link->wr_tx_compl)
674		goto no_mem_wr_tx_pends;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
675	return 0;
676
 
 
 
 
 
 
677no_mem_wr_tx_pends:
678	kfree(link->wr_tx_pends);
679no_mem_wr_tx_mask:
680	kfree(link->wr_tx_mask);
681no_mem_wr_rx_sges:
682	kfree(link->wr_rx_sges);
683no_mem_wr_tx_sges:
684	kfree(link->wr_tx_sges);
685no_mem_wr_tx_rdma_sges:
686	kfree(link->wr_tx_rdma_sges);
687no_mem_wr_tx_rdmas:
688	kfree(link->wr_tx_rdmas);
689no_mem_wr_rx_ibs:
690	kfree(link->wr_rx_ibs);
691no_mem_wr_tx_ibs:
692	kfree(link->wr_tx_ibs);
693no_mem_wr_rx_bufs:
694	kfree(link->wr_rx_bufs);
695no_mem_wr_tx_bufs:
696	kfree(link->wr_tx_bufs);
697no_mem:
698	return -ENOMEM;
699}
700
701void smc_wr_remove_dev(struct smc_ib_device *smcibdev)
702{
703	tasklet_kill(&smcibdev->recv_tasklet);
704	tasklet_kill(&smcibdev->send_tasklet);
705}
706
707void smc_wr_add_dev(struct smc_ib_device *smcibdev)
708{
709	tasklet_setup(&smcibdev->recv_tasklet, smc_wr_rx_tasklet_fn);
710	tasklet_setup(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn);
711}
712
713int smc_wr_create_link(struct smc_link *lnk)
714{
715	struct ib_device *ibdev = lnk->smcibdev->ibdev;
716	int rc = 0;
717
718	smc_wr_tx_set_wr_id(&lnk->wr_tx_id, 0);
719	lnk->wr_rx_id = 0;
720	lnk->wr_rx_dma_addr = ib_dma_map_single(
721		ibdev, lnk->wr_rx_bufs,	SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
722		DMA_FROM_DEVICE);
723	if (ib_dma_mapping_error(ibdev, lnk->wr_rx_dma_addr)) {
724		lnk->wr_rx_dma_addr = 0;
725		rc = -EIO;
726		goto out;
727	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
728	lnk->wr_tx_dma_addr = ib_dma_map_single(
729		ibdev, lnk->wr_tx_bufs,	SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
730		DMA_TO_DEVICE);
731	if (ib_dma_mapping_error(ibdev, lnk->wr_tx_dma_addr)) {
732		rc = -EIO;
733		goto dma_unmap;
734	}
735	smc_wr_init_sge(lnk);
736	memset(lnk->wr_tx_mask, 0,
737	       BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
738	init_waitqueue_head(&lnk->wr_tx_wait);
739	atomic_set(&lnk->wr_tx_refcnt, 0);
740	init_waitqueue_head(&lnk->wr_reg_wait);
741	atomic_set(&lnk->wr_reg_refcnt, 0);
 
742	return rc;
743
744dma_unmap:
 
 
 
 
 
 
 
 
 
 
 
 
745	ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
746			    SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
747			    DMA_FROM_DEVICE);
748	lnk->wr_rx_dma_addr = 0;
749out:
750	return rc;
751}