Linux Audio

Check our new training course

Open-source upstreaming

Need help get the support for your hardware in upstream Linux?
Loading...
  1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  2/*
  3 * Copyright(c) 2015 - 2018 Intel Corporation.
  4 */
  5
  6#include "hfi.h"
  7#include "verbs_txreq.h"
  8#include "qp.h"
  9
 10/* cut down ridiculously long IB macro names */
 11#define OP(x) UC_OP(x)
 12
 13/**
 14 * hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
 15 * @qp: a pointer to the QP
 16 * @ps: the current packet state
 17 *
 18 * Assume s_lock is held.
 19 *
 20 * Return 1 if constructed; otherwise, return 0.
 21 */
 22int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 23{
 24	struct hfi1_qp_priv *priv = qp->priv;
 25	struct ib_other_headers *ohdr;
 26	struct rvt_swqe *wqe;
 27	u32 hwords;
 28	u32 bth0 = 0;
 29	u32 len;
 30	u32 pmtu = qp->pmtu;
 31	int middle = 0;
 32
 33	ps->s_txreq = get_txreq(ps->dev, qp);
 34	if (!ps->s_txreq)
 35		goto bail_no_tx;
 36
 37	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
 38		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 39			goto bail;
 40		/* We are in the error state, flush the work request. */
 41		if (qp->s_last == READ_ONCE(qp->s_head))
 42			goto bail;
 43		/* If DMAs are in progress, we can't flush immediately. */
 44		if (iowait_sdma_pending(&priv->s_iowait)) {
 45			qp->s_flags |= RVT_S_WAIT_DMA;
 46			goto bail;
 47		}
 48		clear_ahg(qp);
 49		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 50		rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 51		goto done_free_tx;
 52	}
 53
 54	if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
 55		/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 56		hwords = 5;
 57		if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
 58			ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth;
 59		else
 60			ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
 61	} else {
 62		/* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
 63		hwords = 7;
 64		if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
 65		    (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))))
 66			ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth;
 67		else
 68			ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth;
 69	}
 70
 71	/* Get the next send request. */
 72	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 73	qp->s_wqe = NULL;
 74	switch (qp->s_state) {
 75	default:
 76		if (!(ib_rvt_state_ops[qp->state] &
 77		    RVT_PROCESS_NEXT_SEND_OK))
 78			goto bail;
 79		/* Check if send work queue is empty. */
 80		if (qp->s_cur == READ_ONCE(qp->s_head)) {
 81			clear_ahg(qp);
 82			goto bail;
 83		}
 84		/*
 85		 * Local operations are processed immediately
 86		 * after all prior requests have completed.
 87		 */
 88		if (wqe->wr.opcode == IB_WR_REG_MR ||
 89		    wqe->wr.opcode == IB_WR_LOCAL_INV) {
 90			int local_ops = 0;
 91			int err = 0;
 92
 93			if (qp->s_last != qp->s_cur)
 94				goto bail;
 95			if (++qp->s_cur == qp->s_size)
 96				qp->s_cur = 0;
 97			if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
 98				err = rvt_invalidate_rkey(
 99					qp, wqe->wr.ex.invalidate_rkey);
100				local_ops = 1;
101			}
102			rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
103							: IB_WC_SUCCESS);
104			if (local_ops)
105				atomic_dec(&qp->local_ops_pending);
106			goto done_free_tx;
107		}
108		/*
109		 * Start a new request.
110		 */
111		qp->s_psn = wqe->psn;
112		qp->s_sge.sge = wqe->sg_list[0];
113		qp->s_sge.sg_list = wqe->sg_list + 1;
114		qp->s_sge.num_sge = wqe->wr.num_sge;
115		qp->s_sge.total_len = wqe->length;
116		len = wqe->length;
117		qp->s_len = len;
118		switch (wqe->wr.opcode) {
119		case IB_WR_SEND:
120		case IB_WR_SEND_WITH_IMM:
121			if (len > pmtu) {
122				qp->s_state = OP(SEND_FIRST);
123				len = pmtu;
124				break;
125			}
126			if (wqe->wr.opcode == IB_WR_SEND) {
127				qp->s_state = OP(SEND_ONLY);
128			} else {
129				qp->s_state =
130					OP(SEND_ONLY_WITH_IMMEDIATE);
131				/* Immediate data comes after the BTH */
132				ohdr->u.imm_data = wqe->wr.ex.imm_data;
133				hwords += 1;
134			}
135			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
136				bth0 |= IB_BTH_SOLICITED;
137			qp->s_wqe = wqe;
138			if (++qp->s_cur >= qp->s_size)
139				qp->s_cur = 0;
140			break;
141
142		case IB_WR_RDMA_WRITE:
143		case IB_WR_RDMA_WRITE_WITH_IMM:
144			ohdr->u.rc.reth.vaddr =
145				cpu_to_be64(wqe->rdma_wr.remote_addr);
146			ohdr->u.rc.reth.rkey =
147				cpu_to_be32(wqe->rdma_wr.rkey);
148			ohdr->u.rc.reth.length = cpu_to_be32(len);
149			hwords += sizeof(struct ib_reth) / 4;
150			if (len > pmtu) {
151				qp->s_state = OP(RDMA_WRITE_FIRST);
152				len = pmtu;
153				break;
154			}
155			if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
156				qp->s_state = OP(RDMA_WRITE_ONLY);
157			} else {
158				qp->s_state =
159					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
160				/* Immediate data comes after the RETH */
161				ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
162				hwords += 1;
163				if (wqe->wr.send_flags & IB_SEND_SOLICITED)
164					bth0 |= IB_BTH_SOLICITED;
165			}
166			qp->s_wqe = wqe;
167			if (++qp->s_cur >= qp->s_size)
168				qp->s_cur = 0;
169			break;
170
171		default:
172			goto bail;
173		}
174		break;
175
176	case OP(SEND_FIRST):
177		qp->s_state = OP(SEND_MIDDLE);
178		fallthrough;
179	case OP(SEND_MIDDLE):
180		len = qp->s_len;
181		if (len > pmtu) {
182			len = pmtu;
183			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
184			break;
185		}
186		if (wqe->wr.opcode == IB_WR_SEND) {
187			qp->s_state = OP(SEND_LAST);
188		} else {
189			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
190			/* Immediate data comes after the BTH */
191			ohdr->u.imm_data = wqe->wr.ex.imm_data;
192			hwords += 1;
193		}
194		if (wqe->wr.send_flags & IB_SEND_SOLICITED)
195			bth0 |= IB_BTH_SOLICITED;
196		qp->s_wqe = wqe;
197		if (++qp->s_cur >= qp->s_size)
198			qp->s_cur = 0;
199		break;
200
201	case OP(RDMA_WRITE_FIRST):
202		qp->s_state = OP(RDMA_WRITE_MIDDLE);
203		fallthrough;
204	case OP(RDMA_WRITE_MIDDLE):
205		len = qp->s_len;
206		if (len > pmtu) {
207			len = pmtu;
208			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
209			break;
210		}
211		if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
212			qp->s_state = OP(RDMA_WRITE_LAST);
213		} else {
214			qp->s_state =
215				OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
216			/* Immediate data comes after the BTH */
217			ohdr->u.imm_data = wqe->wr.ex.imm_data;
218			hwords += 1;
219			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
220				bth0 |= IB_BTH_SOLICITED;
221		}
222		qp->s_wqe = wqe;
223		if (++qp->s_cur >= qp->s_size)
224			qp->s_cur = 0;
225		break;
226	}
227	qp->s_len -= len;
228	ps->s_txreq->hdr_dwords = hwords;
229	ps->s_txreq->sde = priv->s_sde;
230	ps->s_txreq->ss = &qp->s_sge;
231	ps->s_txreq->s_cur_size = len;
232	hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
233			     qp->remote_qpn, mask_psn(qp->s_psn++),
234			     middle, ps);
235	return 1;
236
237done_free_tx:
238	hfi1_put_txreq(ps->s_txreq);
239	ps->s_txreq = NULL;
240	return 1;
241
242bail:
243	hfi1_put_txreq(ps->s_txreq);
244
245bail_no_tx:
246	ps->s_txreq = NULL;
247	qp->s_flags &= ~RVT_S_BUSY;
248	return 0;
249}
250
251/**
252 * hfi1_uc_rcv - handle an incoming UC packet
253 * @packet: the packet structure
254 *
255 * This is called from qp_rcv() to process an incoming UC packet
256 * for the given QP.
257 * Called at interrupt level.
258 */
259void hfi1_uc_rcv(struct hfi1_packet *packet)
260{
261	struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
262	void *data = packet->payload;
263	u32 tlen = packet->tlen;
264	struct rvt_qp *qp = packet->qp;
265	struct ib_other_headers *ohdr = packet->ohdr;
266	u32 opcode = packet->opcode;
267	u32 hdrsize = packet->hlen;
268	u32 psn;
269	u32 pad = packet->pad;
270	struct ib_wc wc;
271	u32 pmtu = qp->pmtu;
272	struct ib_reth *reth;
273	int ret;
274	u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
275
276	if (hfi1_ruc_check_hdr(ibp, packet))
277		return;
278
279	process_ecn(qp, packet);
280
281	psn = ib_bth_get_psn(ohdr);
282	/* Compare the PSN verses the expected PSN. */
283	if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
284		/*
285		 * Handle a sequence error.
286		 * Silently drop any current message.
287		 */
288		qp->r_psn = psn;
289inv:
290		if (qp->r_state == OP(SEND_FIRST) ||
291		    qp->r_state == OP(SEND_MIDDLE)) {
292			set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
293			qp->r_sge.num_sge = 0;
294		} else {
295			rvt_put_ss(&qp->r_sge);
296		}
297		qp->r_state = OP(SEND_LAST);
298		switch (opcode) {
299		case OP(SEND_FIRST):
300		case OP(SEND_ONLY):
301		case OP(SEND_ONLY_WITH_IMMEDIATE):
302			goto send_first;
303
304		case OP(RDMA_WRITE_FIRST):
305		case OP(RDMA_WRITE_ONLY):
306		case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
307			goto rdma_first;
308
309		default:
310			goto drop;
311		}
312	}
313
314	/* Check for opcode sequence errors. */
315	switch (qp->r_state) {
316	case OP(SEND_FIRST):
317	case OP(SEND_MIDDLE):
318		if (opcode == OP(SEND_MIDDLE) ||
319		    opcode == OP(SEND_LAST) ||
320		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
321			break;
322		goto inv;
323
324	case OP(RDMA_WRITE_FIRST):
325	case OP(RDMA_WRITE_MIDDLE):
326		if (opcode == OP(RDMA_WRITE_MIDDLE) ||
327		    opcode == OP(RDMA_WRITE_LAST) ||
328		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
329			break;
330		goto inv;
331
332	default:
333		if (opcode == OP(SEND_FIRST) ||
334		    opcode == OP(SEND_ONLY) ||
335		    opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
336		    opcode == OP(RDMA_WRITE_FIRST) ||
337		    opcode == OP(RDMA_WRITE_ONLY) ||
338		    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
339			break;
340		goto inv;
341	}
342
343	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
344		rvt_comm_est(qp);
345
346	/* OK, process the packet. */
347	switch (opcode) {
348	case OP(SEND_FIRST):
349	case OP(SEND_ONLY):
350	case OP(SEND_ONLY_WITH_IMMEDIATE):
351send_first:
352		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
353			qp->r_sge = qp->s_rdma_read_sge;
354		} else {
355			ret = rvt_get_rwqe(qp, false);
356			if (ret < 0)
357				goto op_err;
358			if (!ret)
359				goto drop;
360			/*
361			 * qp->s_rdma_read_sge will be the owner
362			 * of the mr references.
363			 */
364			qp->s_rdma_read_sge = qp->r_sge;
365		}
366		qp->r_rcv_len = 0;
367		if (opcode == OP(SEND_ONLY))
368			goto no_immediate_data;
369		else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
370			goto send_last_imm;
371		fallthrough;
372	case OP(SEND_MIDDLE):
373		/* Check for invalid length PMTU or posted rwqe len. */
374		/*
375		 * There will be no padding for 9B packet but 16B packets
376		 * will come in with some padding since we always add
377		 * CRC and LT bytes which will need to be flit aligned
378		 */
379		if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
380			goto rewind;
381		qp->r_rcv_len += pmtu;
382		if (unlikely(qp->r_rcv_len > qp->r_len))
383			goto rewind;
384		rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
385		break;
386
387	case OP(SEND_LAST_WITH_IMMEDIATE):
388send_last_imm:
389		wc.ex.imm_data = ohdr->u.imm_data;
390		wc.wc_flags = IB_WC_WITH_IMM;
391		goto send_last;
392	case OP(SEND_LAST):
393no_immediate_data:
394		wc.ex.imm_data = 0;
395		wc.wc_flags = 0;
396send_last:
397		/* Check for invalid length. */
398		/* LAST len should be >= 1 */
399		if (unlikely(tlen < (hdrsize + extra_bytes)))
400			goto rewind;
401		/* Don't count the CRC. */
402		tlen -= (hdrsize + extra_bytes);
403		wc.byte_len = tlen + qp->r_rcv_len;
404		if (unlikely(wc.byte_len > qp->r_len))
405			goto rewind;
406		wc.opcode = IB_WC_RECV;
407		rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
408		rvt_put_ss(&qp->s_rdma_read_sge);
409last_imm:
410		wc.wr_id = qp->r_wr_id;
411		wc.status = IB_WC_SUCCESS;
412		wc.qp = &qp->ibqp;
413		wc.src_qp = qp->remote_qpn;
414		wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
415		/*
416		 * It seems that IB mandates the presence of an SL in a
417		 * work completion only for the UD transport (see section
418		 * 11.4.2 of IBTA Vol. 1).
419		 *
420		 * However, the way the SL is chosen below is consistent
421		 * with the way that IB/qib works and is trying avoid
422		 * introducing incompatibilities.
423		 *
424		 * See also OPA Vol. 1, section 9.7.6, and table 9-17.
425		 */
426		wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
427		/* zero fields that are N/A */
428		wc.vendor_err = 0;
429		wc.pkey_index = 0;
430		wc.dlid_path_bits = 0;
431		wc.port_num = 0;
432		/* Signal completion event if the solicited bit is set. */
433		rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
434		break;
435
436	case OP(RDMA_WRITE_FIRST):
437	case OP(RDMA_WRITE_ONLY):
438	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
439rdma_first:
440		if (unlikely(!(qp->qp_access_flags &
441			       IB_ACCESS_REMOTE_WRITE))) {
442			goto drop;
443		}
444		reth = &ohdr->u.rc.reth;
445		qp->r_len = be32_to_cpu(reth->length);
446		qp->r_rcv_len = 0;
447		qp->r_sge.sg_list = NULL;
448		if (qp->r_len != 0) {
449			u32 rkey = be32_to_cpu(reth->rkey);
450			u64 vaddr = be64_to_cpu(reth->vaddr);
451			int ok;
452
453			/* Check rkey */
454			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
455					 vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
456			if (unlikely(!ok))
457				goto drop;
458			qp->r_sge.num_sge = 1;
459		} else {
460			qp->r_sge.num_sge = 0;
461			qp->r_sge.sge.mr = NULL;
462			qp->r_sge.sge.vaddr = NULL;
463			qp->r_sge.sge.length = 0;
464			qp->r_sge.sge.sge_length = 0;
465		}
466		if (opcode == OP(RDMA_WRITE_ONLY)) {
467			goto rdma_last;
468		} else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
469			wc.ex.imm_data = ohdr->u.rc.imm_data;
470			goto rdma_last_imm;
471		}
472		fallthrough;
473	case OP(RDMA_WRITE_MIDDLE):
474		/* Check for invalid length PMTU or posted rwqe len. */
475		if (unlikely(tlen != (hdrsize + pmtu + 4)))
476			goto drop;
477		qp->r_rcv_len += pmtu;
478		if (unlikely(qp->r_rcv_len > qp->r_len))
479			goto drop;
480		rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
481		break;
482
483	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
484		wc.ex.imm_data = ohdr->u.imm_data;
485rdma_last_imm:
486		wc.wc_flags = IB_WC_WITH_IMM;
487
488		/* Check for invalid length. */
489		/* LAST len should be >= 1 */
490		if (unlikely(tlen < (hdrsize + pad + 4)))
491			goto drop;
492		/* Don't count the CRC. */
493		tlen -= (hdrsize + extra_bytes);
494		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
495			goto drop;
496		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
497			rvt_put_ss(&qp->s_rdma_read_sge);
498		} else {
499			ret = rvt_get_rwqe(qp, true);
500			if (ret < 0)
501				goto op_err;
502			if (!ret)
503				goto drop;
504		}
505		wc.byte_len = qp->r_len;
506		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
507		rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
508		rvt_put_ss(&qp->r_sge);
509		goto last_imm;
510
511	case OP(RDMA_WRITE_LAST):
512rdma_last:
513		/* Check for invalid length. */
514		/* LAST len should be >= 1 */
515		if (unlikely(tlen < (hdrsize + pad + 4)))
516			goto drop;
517		/* Don't count the CRC. */
518		tlen -= (hdrsize + extra_bytes);
519		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
520			goto drop;
521		rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
522		rvt_put_ss(&qp->r_sge);
523		break;
524
525	default:
526		/* Drop packet for unknown opcodes. */
527		goto drop;
528	}
529	qp->r_psn++;
530	qp->r_state = opcode;
531	return;
532
533rewind:
534	set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
535	qp->r_sge.num_sge = 0;
536drop:
537	ibp->rvp.n_pkt_drops++;
538	return;
539
540op_err:
541	rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
542}