Loading...
Note: File does not exist in v3.1.
1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/*
3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 */
6
7#include <linux/skbuff.h>
8
9#include "rxe.h"
10#include "rxe_loc.h"
11#include "rxe_queue.h"
12
13enum resp_states {
14 RESPST_NONE,
15 RESPST_GET_REQ,
16 RESPST_CHK_PSN,
17 RESPST_CHK_OP_SEQ,
18 RESPST_CHK_OP_VALID,
19 RESPST_CHK_RESOURCE,
20 RESPST_CHK_LENGTH,
21 RESPST_CHK_RKEY,
22 RESPST_EXECUTE,
23 RESPST_READ_REPLY,
24 RESPST_ATOMIC_REPLY,
25 RESPST_ATOMIC_WRITE_REPLY,
26 RESPST_PROCESS_FLUSH,
27 RESPST_COMPLETE,
28 RESPST_ACKNOWLEDGE,
29 RESPST_CLEANUP,
30 RESPST_DUPLICATE_REQUEST,
31 RESPST_ERR_MALFORMED_WQE,
32 RESPST_ERR_UNSUPPORTED_OPCODE,
33 RESPST_ERR_MISALIGNED_ATOMIC,
34 RESPST_ERR_PSN_OUT_OF_SEQ,
35 RESPST_ERR_MISSING_OPCODE_FIRST,
36 RESPST_ERR_MISSING_OPCODE_LAST_C,
37 RESPST_ERR_MISSING_OPCODE_LAST_D1E,
38 RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
39 RESPST_ERR_RNR,
40 RESPST_ERR_RKEY_VIOLATION,
41 RESPST_ERR_INVALIDATE_RKEY,
42 RESPST_ERR_LENGTH,
43 RESPST_ERR_CQ_OVERFLOW,
44 RESPST_ERROR,
45 RESPST_RESET,
46 RESPST_DONE,
47 RESPST_EXIT,
48};
49
50static char *resp_state_name[] = {
51 [RESPST_NONE] = "NONE",
52 [RESPST_GET_REQ] = "GET_REQ",
53 [RESPST_CHK_PSN] = "CHK_PSN",
54 [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ",
55 [RESPST_CHK_OP_VALID] = "CHK_OP_VALID",
56 [RESPST_CHK_RESOURCE] = "CHK_RESOURCE",
57 [RESPST_CHK_LENGTH] = "CHK_LENGTH",
58 [RESPST_CHK_RKEY] = "CHK_RKEY",
59 [RESPST_EXECUTE] = "EXECUTE",
60 [RESPST_READ_REPLY] = "READ_REPLY",
61 [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY",
62 [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY",
63 [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH",
64 [RESPST_COMPLETE] = "COMPLETE",
65 [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",
66 [RESPST_CLEANUP] = "CLEANUP",
67 [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST",
68 [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE",
69 [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE",
70 [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC",
71 [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ",
72 [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST",
73 [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C",
74 [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E",
75 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ",
76 [RESPST_ERR_RNR] = "ERR_RNR",
77 [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION",
78 [RESPST_ERR_INVALIDATE_RKEY] = "ERR_INVALIDATE_RKEY_VIOLATION",
79 [RESPST_ERR_LENGTH] = "ERR_LENGTH",
80 [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW",
81 [RESPST_ERROR] = "ERROR",
82 [RESPST_RESET] = "RESET",
83 [RESPST_DONE] = "DONE",
84 [RESPST_EXIT] = "EXIT",
85};
86
87/* rxe_recv calls here to add a request packet to the input queue */
88void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
89{
90 int must_sched;
91 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
92
93 skb_queue_tail(&qp->req_pkts, skb);
94
95 must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
96 (skb_queue_len(&qp->req_pkts) > 1);
97
98 if (must_sched)
99 rxe_sched_task(&qp->resp.task);
100 else
101 rxe_run_task(&qp->resp.task);
102}
103
104static inline enum resp_states get_req(struct rxe_qp *qp,
105 struct rxe_pkt_info **pkt_p)
106{
107 struct sk_buff *skb;
108
109 if (qp->resp.state == QP_STATE_ERROR) {
110 while ((skb = skb_dequeue(&qp->req_pkts))) {
111 rxe_put(qp);
112 kfree_skb(skb);
113 ib_device_put(qp->ibqp.device);
114 }
115
116 /* go drain recv wr queue */
117 return RESPST_CHK_RESOURCE;
118 }
119
120 skb = skb_peek(&qp->req_pkts);
121 if (!skb)
122 return RESPST_EXIT;
123
124 *pkt_p = SKB_TO_PKT(skb);
125
126 return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN;
127}
128
129static enum resp_states check_psn(struct rxe_qp *qp,
130 struct rxe_pkt_info *pkt)
131{
132 int diff = psn_compare(pkt->psn, qp->resp.psn);
133 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
134
135 switch (qp_type(qp)) {
136 case IB_QPT_RC:
137 if (diff > 0) {
138 if (qp->resp.sent_psn_nak)
139 return RESPST_CLEANUP;
140
141 qp->resp.sent_psn_nak = 1;
142 rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ);
143 return RESPST_ERR_PSN_OUT_OF_SEQ;
144
145 } else if (diff < 0) {
146 rxe_counter_inc(rxe, RXE_CNT_DUP_REQ);
147 return RESPST_DUPLICATE_REQUEST;
148 }
149
150 if (qp->resp.sent_psn_nak)
151 qp->resp.sent_psn_nak = 0;
152
153 break;
154
155 case IB_QPT_UC:
156 if (qp->resp.drop_msg || diff != 0) {
157 if (pkt->mask & RXE_START_MASK) {
158 qp->resp.drop_msg = 0;
159 return RESPST_CHK_OP_SEQ;
160 }
161
162 qp->resp.drop_msg = 1;
163 return RESPST_CLEANUP;
164 }
165 break;
166 default:
167 break;
168 }
169
170 return RESPST_CHK_OP_SEQ;
171}
172
173static enum resp_states check_op_seq(struct rxe_qp *qp,
174 struct rxe_pkt_info *pkt)
175{
176 switch (qp_type(qp)) {
177 case IB_QPT_RC:
178 switch (qp->resp.opcode) {
179 case IB_OPCODE_RC_SEND_FIRST:
180 case IB_OPCODE_RC_SEND_MIDDLE:
181 switch (pkt->opcode) {
182 case IB_OPCODE_RC_SEND_MIDDLE:
183 case IB_OPCODE_RC_SEND_LAST:
184 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
185 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
186 return RESPST_CHK_OP_VALID;
187 default:
188 return RESPST_ERR_MISSING_OPCODE_LAST_C;
189 }
190
191 case IB_OPCODE_RC_RDMA_WRITE_FIRST:
192 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
193 switch (pkt->opcode) {
194 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
195 case IB_OPCODE_RC_RDMA_WRITE_LAST:
196 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
197 return RESPST_CHK_OP_VALID;
198 default:
199 return RESPST_ERR_MISSING_OPCODE_LAST_C;
200 }
201
202 default:
203 switch (pkt->opcode) {
204 case IB_OPCODE_RC_SEND_MIDDLE:
205 case IB_OPCODE_RC_SEND_LAST:
206 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
207 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
208 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
209 case IB_OPCODE_RC_RDMA_WRITE_LAST:
210 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
211 return RESPST_ERR_MISSING_OPCODE_FIRST;
212 default:
213 return RESPST_CHK_OP_VALID;
214 }
215 }
216 break;
217
218 case IB_QPT_UC:
219 switch (qp->resp.opcode) {
220 case IB_OPCODE_UC_SEND_FIRST:
221 case IB_OPCODE_UC_SEND_MIDDLE:
222 switch (pkt->opcode) {
223 case IB_OPCODE_UC_SEND_MIDDLE:
224 case IB_OPCODE_UC_SEND_LAST:
225 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
226 return RESPST_CHK_OP_VALID;
227 default:
228 return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
229 }
230
231 case IB_OPCODE_UC_RDMA_WRITE_FIRST:
232 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
233 switch (pkt->opcode) {
234 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
235 case IB_OPCODE_UC_RDMA_WRITE_LAST:
236 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
237 return RESPST_CHK_OP_VALID;
238 default:
239 return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
240 }
241
242 default:
243 switch (pkt->opcode) {
244 case IB_OPCODE_UC_SEND_MIDDLE:
245 case IB_OPCODE_UC_SEND_LAST:
246 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
247 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
248 case IB_OPCODE_UC_RDMA_WRITE_LAST:
249 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
250 qp->resp.drop_msg = 1;
251 return RESPST_CLEANUP;
252 default:
253 return RESPST_CHK_OP_VALID;
254 }
255 }
256 break;
257
258 default:
259 return RESPST_CHK_OP_VALID;
260 }
261}
262
263static bool check_qp_attr_access(struct rxe_qp *qp,
264 struct rxe_pkt_info *pkt)
265{
266 if (((pkt->mask & RXE_READ_MASK) &&
267 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
268 ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
269 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
270 ((pkt->mask & RXE_ATOMIC_MASK) &&
271 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
272 return false;
273
274 if (pkt->mask & RXE_FLUSH_MASK) {
275 u32 flush_type = feth_plt(pkt);
276
277 if ((flush_type & IB_FLUSH_GLOBAL &&
278 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) ||
279 (flush_type & IB_FLUSH_PERSISTENT &&
280 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT)))
281 return false;
282 }
283
284 return true;
285}
286
287static enum resp_states check_op_valid(struct rxe_qp *qp,
288 struct rxe_pkt_info *pkt)
289{
290 switch (qp_type(qp)) {
291 case IB_QPT_RC:
292 if (!check_qp_attr_access(qp, pkt))
293 return RESPST_ERR_UNSUPPORTED_OPCODE;
294
295 break;
296
297 case IB_QPT_UC:
298 if ((pkt->mask & RXE_WRITE_MASK) &&
299 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) {
300 qp->resp.drop_msg = 1;
301 return RESPST_CLEANUP;
302 }
303
304 break;
305
306 case IB_QPT_UD:
307 case IB_QPT_GSI:
308 break;
309
310 default:
311 WARN_ON_ONCE(1);
312 break;
313 }
314
315 return RESPST_CHK_RESOURCE;
316}
317
318static enum resp_states get_srq_wqe(struct rxe_qp *qp)
319{
320 struct rxe_srq *srq = qp->srq;
321 struct rxe_queue *q = srq->rq.queue;
322 struct rxe_recv_wqe *wqe;
323 struct ib_event ev;
324 unsigned int count;
325 size_t size;
326 unsigned long flags;
327
328 if (srq->error)
329 return RESPST_ERR_RNR;
330
331 spin_lock_irqsave(&srq->rq.consumer_lock, flags);
332
333 wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
334 if (!wqe) {
335 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
336 return RESPST_ERR_RNR;
337 }
338
339 /* don't trust user space data */
340 if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
341 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
342 rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n");
343 return RESPST_ERR_MALFORMED_WQE;
344 }
345 size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
346 memcpy(&qp->resp.srq_wqe, wqe, size);
347
348 qp->resp.wqe = &qp->resp.srq_wqe.wqe;
349 queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT);
350 count = queue_count(q, QUEUE_TYPE_FROM_CLIENT);
351
352 if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) {
353 srq->limit = 0;
354 goto event;
355 }
356
357 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
358 return RESPST_CHK_LENGTH;
359
360event:
361 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
362 ev.device = qp->ibqp.device;
363 ev.element.srq = qp->ibqp.srq;
364 ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
365 srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context);
366 return RESPST_CHK_LENGTH;
367}
368
369static enum resp_states check_resource(struct rxe_qp *qp,
370 struct rxe_pkt_info *pkt)
371{
372 struct rxe_srq *srq = qp->srq;
373
374 if (qp->resp.state == QP_STATE_ERROR) {
375 if (qp->resp.wqe) {
376 qp->resp.status = IB_WC_WR_FLUSH_ERR;
377 return RESPST_COMPLETE;
378 } else if (!srq) {
379 qp->resp.wqe = queue_head(qp->rq.queue,
380 QUEUE_TYPE_FROM_CLIENT);
381 if (qp->resp.wqe) {
382 qp->resp.status = IB_WC_WR_FLUSH_ERR;
383 return RESPST_COMPLETE;
384 } else {
385 return RESPST_EXIT;
386 }
387 } else {
388 return RESPST_EXIT;
389 }
390 }
391
392 if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
393 /* it is the requesters job to not send
394 * too many read/atomic ops, we just
395 * recycle the responder resource queue
396 */
397 if (likely(qp->attr.max_dest_rd_atomic > 0))
398 return RESPST_CHK_LENGTH;
399 else
400 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ;
401 }
402
403 if (pkt->mask & RXE_RWR_MASK) {
404 if (srq)
405 return get_srq_wqe(qp);
406
407 qp->resp.wqe = queue_head(qp->rq.queue,
408 QUEUE_TYPE_FROM_CLIENT);
409 return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;
410 }
411
412 return RESPST_CHK_LENGTH;
413}
414
415static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
416 struct rxe_pkt_info *pkt)
417{
418 /*
419 * See IBA C9-92
420 * For UD QPs we only check if the packet will fit in the
421 * receive buffer later. For rmda operations additional
422 * length checks are performed in check_rkey.
423 */
424 if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
425 (qp_type(qp) == IB_QPT_UC))) {
426 unsigned int mtu = qp->mtu;
427 unsigned int payload = payload_size(pkt);
428
429 if ((pkt->mask & RXE_START_MASK) &&
430 (pkt->mask & RXE_END_MASK)) {
431 if (unlikely(payload > mtu)) {
432 rxe_dbg_qp(qp, "only packet too long");
433 return RESPST_ERR_LENGTH;
434 }
435 } else if ((pkt->mask & RXE_START_MASK) ||
436 (pkt->mask & RXE_MIDDLE_MASK)) {
437 if (unlikely(payload != mtu)) {
438 rxe_dbg_qp(qp, "first or middle packet not mtu");
439 return RESPST_ERR_LENGTH;
440 }
441 } else if (pkt->mask & RXE_END_MASK) {
442 if (unlikely((payload == 0) || (payload > mtu))) {
443 rxe_dbg_qp(qp, "last packet zero or too long");
444 return RESPST_ERR_LENGTH;
445 }
446 }
447 }
448
449 /* See IBA C9-94 */
450 if (pkt->mask & RXE_RETH_MASK) {
451 if (reth_len(pkt) > (1U << 31)) {
452 rxe_dbg_qp(qp, "dma length too long");
453 return RESPST_ERR_LENGTH;
454 }
455 }
456
457 return RESPST_CHK_RKEY;
458}
459
460static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
461{
462 qp->resp.va = reth_va(pkt);
463 qp->resp.offset = 0;
464 qp->resp.rkey = reth_rkey(pkt);
465 qp->resp.resid = reth_len(pkt);
466 qp->resp.length = reth_len(pkt);
467}
468
469static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
470{
471 qp->resp.va = atmeth_va(pkt);
472 qp->resp.offset = 0;
473 qp->resp.rkey = atmeth_rkey(pkt);
474 qp->resp.resid = sizeof(u64);
475}
476
477static enum resp_states check_rkey(struct rxe_qp *qp,
478 struct rxe_pkt_info *pkt)
479{
480 struct rxe_mr *mr = NULL;
481 struct rxe_mw *mw = NULL;
482 u64 va;
483 u32 rkey;
484 u32 resid;
485 u32 pktlen;
486 int mtu = qp->mtu;
487 enum resp_states state;
488 int access = 0;
489
490 if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
491 if (pkt->mask & RXE_RETH_MASK)
492 qp_resp_from_reth(qp, pkt);
493
494 access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
495 : IB_ACCESS_REMOTE_WRITE;
496 } else if (pkt->mask & RXE_FLUSH_MASK) {
497 u32 flush_type = feth_plt(pkt);
498
499 if (pkt->mask & RXE_RETH_MASK)
500 qp_resp_from_reth(qp, pkt);
501
502 if (flush_type & IB_FLUSH_GLOBAL)
503 access |= IB_ACCESS_FLUSH_GLOBAL;
504 if (flush_type & IB_FLUSH_PERSISTENT)
505 access |= IB_ACCESS_FLUSH_PERSISTENT;
506 } else if (pkt->mask & RXE_ATOMIC_MASK) {
507 qp_resp_from_atmeth(qp, pkt);
508 access = IB_ACCESS_REMOTE_ATOMIC;
509 } else {
510 return RESPST_EXECUTE;
511 }
512
513 /* A zero-byte op is not required to set an addr or rkey. See C9-88 */
514 if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
515 (pkt->mask & RXE_RETH_MASK) &&
516 reth_len(pkt) == 0) {
517 return RESPST_EXECUTE;
518 }
519
520 va = qp->resp.va;
521 rkey = qp->resp.rkey;
522 resid = qp->resp.resid;
523 pktlen = payload_size(pkt);
524
525 if (rkey_is_mw(rkey)) {
526 mw = rxe_lookup_mw(qp, access, rkey);
527 if (!mw) {
528 rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey);
529 state = RESPST_ERR_RKEY_VIOLATION;
530 goto err;
531 }
532
533 mr = mw->mr;
534 if (!mr) {
535 rxe_dbg_qp(qp, "MW doesn't have an MR\n");
536 state = RESPST_ERR_RKEY_VIOLATION;
537 goto err;
538 }
539
540 if (mw->access & IB_ZERO_BASED)
541 qp->resp.offset = mw->addr;
542
543 rxe_put(mw);
544 rxe_get(mr);
545 } else {
546 mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
547 if (!mr) {
548 rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey);
549 state = RESPST_ERR_RKEY_VIOLATION;
550 goto err;
551 }
552 }
553
554 if (pkt->mask & RXE_FLUSH_MASK) {
555 /* FLUSH MR may not set va or resid
556 * no need to check range since we will flush whole mr
557 */
558 if (feth_sel(pkt) == IB_FLUSH_MR)
559 goto skip_check_range;
560 }
561
562 if (mr_check_range(mr, va + qp->resp.offset, resid)) {
563 state = RESPST_ERR_RKEY_VIOLATION;
564 goto err;
565 }
566
567skip_check_range:
568 if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
569 if (resid > mtu) {
570 if (pktlen != mtu || bth_pad(pkt)) {
571 state = RESPST_ERR_LENGTH;
572 goto err;
573 }
574 } else {
575 if (pktlen != resid) {
576 state = RESPST_ERR_LENGTH;
577 goto err;
578 }
579 if ((bth_pad(pkt) != (0x3 & (-resid)))) {
580 /* This case may not be exactly that
581 * but nothing else fits.
582 */
583 state = RESPST_ERR_LENGTH;
584 goto err;
585 }
586 }
587 }
588
589 WARN_ON_ONCE(qp->resp.mr);
590
591 qp->resp.mr = mr;
592 return RESPST_EXECUTE;
593
594err:
595 if (mr)
596 rxe_put(mr);
597 if (mw)
598 rxe_put(mw);
599
600 return state;
601}
602
603static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
604 int data_len)
605{
606 int err;
607
608 err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
609 data_addr, data_len, RXE_TO_MR_OBJ);
610 if (unlikely(err))
611 return (err == -ENOSPC) ? RESPST_ERR_LENGTH
612 : RESPST_ERR_MALFORMED_WQE;
613
614 return RESPST_NONE;
615}
616
617static enum resp_states write_data_in(struct rxe_qp *qp,
618 struct rxe_pkt_info *pkt)
619{
620 enum resp_states rc = RESPST_NONE;
621 int err;
622 int data_len = payload_size(pkt);
623
624 err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
625 payload_addr(pkt), data_len, RXE_TO_MR_OBJ);
626 if (err) {
627 rc = RESPST_ERR_RKEY_VIOLATION;
628 goto out;
629 }
630
631 qp->resp.va += data_len;
632 qp->resp.resid -= data_len;
633
634out:
635 return rc;
636}
637
638static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
639 struct rxe_pkt_info *pkt,
640 int type)
641{
642 struct resp_res *res;
643 u32 pkts;
644
645 res = &qp->resp.resources[qp->resp.res_head];
646 rxe_advance_resp_resource(qp);
647 free_rd_atomic_resource(res);
648
649 res->type = type;
650 res->replay = 0;
651
652 switch (type) {
653 case RXE_READ_MASK:
654 res->read.va = qp->resp.va + qp->resp.offset;
655 res->read.va_org = qp->resp.va + qp->resp.offset;
656 res->read.resid = qp->resp.resid;
657 res->read.length = qp->resp.resid;
658 res->read.rkey = qp->resp.rkey;
659
660 pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1);
661 res->first_psn = pkt->psn;
662 res->cur_psn = pkt->psn;
663 res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK;
664
665 res->state = rdatm_res_state_new;
666 break;
667 case RXE_ATOMIC_MASK:
668 case RXE_ATOMIC_WRITE_MASK:
669 res->first_psn = pkt->psn;
670 res->last_psn = pkt->psn;
671 res->cur_psn = pkt->psn;
672 break;
673 case RXE_FLUSH_MASK:
674 res->flush.va = qp->resp.va + qp->resp.offset;
675 res->flush.length = qp->resp.length;
676 res->flush.type = feth_plt(pkt);
677 res->flush.level = feth_sel(pkt);
678 }
679
680 return res;
681}
682
683static enum resp_states process_flush(struct rxe_qp *qp,
684 struct rxe_pkt_info *pkt)
685{
686 u64 length, start;
687 struct rxe_mr *mr = qp->resp.mr;
688 struct resp_res *res = qp->resp.res;
689
690 /* oA19-14, oA19-15 */
691 if (res && res->replay)
692 return RESPST_ACKNOWLEDGE;
693 else if (!res) {
694 res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK);
695 qp->resp.res = res;
696 }
697
698 if (res->flush.level == IB_FLUSH_RANGE) {
699 start = res->flush.va;
700 length = res->flush.length;
701 } else { /* level == IB_FLUSH_MR */
702 start = mr->ibmr.iova;
703 length = mr->ibmr.length;
704 }
705
706 if (res->flush.type & IB_FLUSH_PERSISTENT) {
707 if (rxe_flush_pmem_iova(mr, start, length))
708 return RESPST_ERR_RKEY_VIOLATION;
709 /* Make data persistent. */
710 wmb();
711 } else if (res->flush.type & IB_FLUSH_GLOBAL) {
712 /* Make data global visibility. */
713 wmb();
714 }
715
716 qp->resp.msn++;
717
718 /* next expected psn, read handles this separately */
719 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
720 qp->resp.ack_psn = qp->resp.psn;
721
722 qp->resp.opcode = pkt->opcode;
723 qp->resp.status = IB_WC_SUCCESS;
724
725 return RESPST_ACKNOWLEDGE;
726}
727
728/* Guarantee atomicity of atomic operations at the machine level. */
729static DEFINE_SPINLOCK(atomic_ops_lock);
730
731static enum resp_states atomic_reply(struct rxe_qp *qp,
732 struct rxe_pkt_info *pkt)
733{
734 u64 *vaddr;
735 enum resp_states ret;
736 struct rxe_mr *mr = qp->resp.mr;
737 struct resp_res *res = qp->resp.res;
738 u64 value;
739
740 if (!res) {
741 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK);
742 qp->resp.res = res;
743 }
744
745 if (!res->replay) {
746 if (mr->state != RXE_MR_STATE_VALID) {
747 ret = RESPST_ERR_RKEY_VIOLATION;
748 goto out;
749 }
750
751 vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset,
752 sizeof(u64));
753
754 /* check vaddr is 8 bytes aligned. */
755 if (!vaddr || (uintptr_t)vaddr & 7) {
756 ret = RESPST_ERR_MISALIGNED_ATOMIC;
757 goto out;
758 }
759
760 spin_lock_bh(&atomic_ops_lock);
761 res->atomic.orig_val = value = *vaddr;
762
763 if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) {
764 if (value == atmeth_comp(pkt))
765 value = atmeth_swap_add(pkt);
766 } else {
767 value += atmeth_swap_add(pkt);
768 }
769
770 *vaddr = value;
771 spin_unlock_bh(&atomic_ops_lock);
772
773 qp->resp.msn++;
774
775 /* next expected psn, read handles this separately */
776 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
777 qp->resp.ack_psn = qp->resp.psn;
778
779 qp->resp.opcode = pkt->opcode;
780 qp->resp.status = IB_WC_SUCCESS;
781 }
782
783 ret = RESPST_ACKNOWLEDGE;
784out:
785 return ret;
786}
787
788#ifdef CONFIG_64BIT
789static enum resp_states do_atomic_write(struct rxe_qp *qp,
790 struct rxe_pkt_info *pkt)
791{
792 struct rxe_mr *mr = qp->resp.mr;
793 int payload = payload_size(pkt);
794 u64 src, *dst;
795
796 if (mr->state != RXE_MR_STATE_VALID)
797 return RESPST_ERR_RKEY_VIOLATION;
798
799 memcpy(&src, payload_addr(pkt), payload);
800
801 dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
802 /* check vaddr is 8 bytes aligned. */
803 if (!dst || (uintptr_t)dst & 7)
804 return RESPST_ERR_MISALIGNED_ATOMIC;
805
806 /* Do atomic write after all prior operations have completed */
807 smp_store_release(dst, src);
808
809 /* decrease resp.resid to zero */
810 qp->resp.resid -= sizeof(payload);
811
812 qp->resp.msn++;
813
814 /* next expected psn, read handles this separately */
815 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
816 qp->resp.ack_psn = qp->resp.psn;
817
818 qp->resp.opcode = pkt->opcode;
819 qp->resp.status = IB_WC_SUCCESS;
820 return RESPST_ACKNOWLEDGE;
821}
822#else
823static enum resp_states do_atomic_write(struct rxe_qp *qp,
824 struct rxe_pkt_info *pkt)
825{
826 return RESPST_ERR_UNSUPPORTED_OPCODE;
827}
828#endif /* CONFIG_64BIT */
829
830static enum resp_states atomic_write_reply(struct rxe_qp *qp,
831 struct rxe_pkt_info *pkt)
832{
833 struct resp_res *res = qp->resp.res;
834
835 if (!res) {
836 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
837 qp->resp.res = res;
838 }
839
840 if (res->replay)
841 return RESPST_ACKNOWLEDGE;
842 return do_atomic_write(qp, pkt);
843}
844
845static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
846 struct rxe_pkt_info *ack,
847 int opcode,
848 int payload,
849 u32 psn,
850 u8 syndrome)
851{
852 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
853 struct sk_buff *skb;
854 int paylen;
855 int pad;
856 int err;
857
858 /*
859 * allocate packet
860 */
861 pad = (-payload) & 0x3;
862 paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
863
864 skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack);
865 if (!skb)
866 return NULL;
867
868 ack->qp = qp;
869 ack->opcode = opcode;
870 ack->mask = rxe_opcode[opcode].mask;
871 ack->paylen = paylen;
872 ack->psn = psn;
873
874 bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL,
875 qp->attr.dest_qp_num, 0, psn);
876
877 if (ack->mask & RXE_AETH_MASK) {
878 aeth_set_syn(ack, syndrome);
879 aeth_set_msn(ack, qp->resp.msn);
880 }
881
882 if (ack->mask & RXE_ATMACK_MASK)
883 atmack_set_orig(ack, qp->resp.res->atomic.orig_val);
884
885 err = rxe_prepare(&qp->pri_av, ack, skb);
886 if (err) {
887 kfree_skb(skb);
888 return NULL;
889 }
890
891 return skb;
892}
893
894/**
895 * rxe_recheck_mr - revalidate MR from rkey and get a reference
896 * @qp: the qp
897 * @rkey: the rkey
898 *
899 * This code allows the MR to be invalidated or deregistered or
900 * the MW if one was used to be invalidated or deallocated.
901 * It is assumed that the access permissions if originally good
902 * are OK and the mappings to be unchanged.
903 *
904 * TODO: If someone reregisters an MR to change its size or
905 * access permissions during the processing of an RDMA read
906 * we should kill the responder resource and complete the
907 * operation with an error.
908 *
909 * Return: mr on success else NULL
910 */
911static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
912{
913 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
914 struct rxe_mr *mr;
915 struct rxe_mw *mw;
916
917 if (rkey_is_mw(rkey)) {
918 mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
919 if (!mw)
920 return NULL;
921
922 mr = mw->mr;
923 if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID ||
924 !mr || mr->state != RXE_MR_STATE_VALID) {
925 rxe_put(mw);
926 return NULL;
927 }
928
929 rxe_get(mr);
930 rxe_put(mw);
931
932 return mr;
933 }
934
935 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
936 if (!mr)
937 return NULL;
938
939 if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) {
940 rxe_put(mr);
941 return NULL;
942 }
943
944 return mr;
945}
946
947/* RDMA read response. If res is not NULL, then we have a current RDMA request
948 * being processed or replayed.
949 */
950static enum resp_states read_reply(struct rxe_qp *qp,
951 struct rxe_pkt_info *req_pkt)
952{
953 struct rxe_pkt_info ack_pkt;
954 struct sk_buff *skb;
955 int mtu = qp->mtu;
956 enum resp_states state;
957 int payload;
958 int opcode;
959 int err;
960 struct resp_res *res = qp->resp.res;
961 struct rxe_mr *mr;
962
963 if (!res) {
964 res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
965 qp->resp.res = res;
966 }
967
968 if (res->state == rdatm_res_state_new) {
969 if (!res->replay) {
970 mr = qp->resp.mr;
971 qp->resp.mr = NULL;
972 } else {
973 mr = rxe_recheck_mr(qp, res->read.rkey);
974 if (!mr)
975 return RESPST_ERR_RKEY_VIOLATION;
976 }
977
978 if (res->read.resid <= mtu)
979 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
980 else
981 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
982 } else {
983 mr = rxe_recheck_mr(qp, res->read.rkey);
984 if (!mr)
985 return RESPST_ERR_RKEY_VIOLATION;
986
987 if (res->read.resid > mtu)
988 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE;
989 else
990 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST;
991 }
992
993 res->state = rdatm_res_state_next;
994
995 payload = min_t(int, res->read.resid, mtu);
996
997 skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
998 res->cur_psn, AETH_ACK_UNLIMITED);
999 if (!skb) {
1000 if (mr)
1001 rxe_put(mr);
1002 return RESPST_ERR_RNR;
1003 }
1004
1005 err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
1006 payload, RXE_FROM_MR_OBJ);
1007 if (mr)
1008 rxe_put(mr);
1009 if (err) {
1010 kfree_skb(skb);
1011 return RESPST_ERR_RKEY_VIOLATION;
1012 }
1013
1014 if (bth_pad(&ack_pkt)) {
1015 u8 *pad = payload_addr(&ack_pkt) + payload;
1016
1017 memset(pad, 0, bth_pad(&ack_pkt));
1018 }
1019
1020 err = rxe_xmit_packet(qp, &ack_pkt, skb);
1021 if (err)
1022 return RESPST_ERR_RNR;
1023
1024 res->read.va += payload;
1025 res->read.resid -= payload;
1026 res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK;
1027
1028 if (res->read.resid > 0) {
1029 state = RESPST_DONE;
1030 } else {
1031 qp->resp.res = NULL;
1032 if (!res->replay)
1033 qp->resp.opcode = -1;
1034 if (psn_compare(res->cur_psn, qp->resp.psn) >= 0)
1035 qp->resp.psn = res->cur_psn;
1036 state = RESPST_CLEANUP;
1037 }
1038
1039 return state;
1040}
1041
1042static int invalidate_rkey(struct rxe_qp *qp, u32 rkey)
1043{
1044 if (rkey_is_mw(rkey))
1045 return rxe_invalidate_mw(qp, rkey);
1046 else
1047 return rxe_invalidate_mr(qp, rkey);
1048}
1049
1050/* Executes a new request. A retried request never reach that function (send
1051 * and writes are discarded, and reads and atomics are retried elsewhere.
1052 */
1053static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
1054{
1055 enum resp_states err;
1056 struct sk_buff *skb = PKT_TO_SKB(pkt);
1057 union rdma_network_hdr hdr;
1058
1059 if (pkt->mask & RXE_SEND_MASK) {
1060 if (qp_type(qp) == IB_QPT_UD ||
1061 qp_type(qp) == IB_QPT_GSI) {
1062 if (skb->protocol == htons(ETH_P_IP)) {
1063 memset(&hdr.reserved, 0,
1064 sizeof(hdr.reserved));
1065 memcpy(&hdr.roce4grh, ip_hdr(skb),
1066 sizeof(hdr.roce4grh));
1067 err = send_data_in(qp, &hdr, sizeof(hdr));
1068 } else {
1069 err = send_data_in(qp, ipv6_hdr(skb),
1070 sizeof(hdr));
1071 }
1072 if (err)
1073 return err;
1074 }
1075 err = send_data_in(qp, payload_addr(pkt), payload_size(pkt));
1076 if (err)
1077 return err;
1078 } else if (pkt->mask & RXE_WRITE_MASK) {
1079 err = write_data_in(qp, pkt);
1080 if (err)
1081 return err;
1082 } else if (pkt->mask & RXE_READ_MASK) {
1083 /* For RDMA Read we can increment the msn now. See C9-148. */
1084 qp->resp.msn++;
1085 return RESPST_READ_REPLY;
1086 } else if (pkt->mask & RXE_ATOMIC_MASK) {
1087 return RESPST_ATOMIC_REPLY;
1088 } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
1089 return RESPST_ATOMIC_WRITE_REPLY;
1090 } else if (pkt->mask & RXE_FLUSH_MASK) {
1091 return RESPST_PROCESS_FLUSH;
1092 } else {
1093 /* Unreachable */
1094 WARN_ON_ONCE(1);
1095 }
1096
1097 if (pkt->mask & RXE_IETH_MASK) {
1098 u32 rkey = ieth_rkey(pkt);
1099
1100 err = invalidate_rkey(qp, rkey);
1101 if (err)
1102 return RESPST_ERR_INVALIDATE_RKEY;
1103 }
1104
1105 if (pkt->mask & RXE_END_MASK)
1106 /* We successfully processed this new request. */
1107 qp->resp.msn++;
1108
1109 /* next expected psn, read handles this separately */
1110 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
1111 qp->resp.ack_psn = qp->resp.psn;
1112
1113 qp->resp.opcode = pkt->opcode;
1114 qp->resp.status = IB_WC_SUCCESS;
1115
1116 if (pkt->mask & RXE_COMP_MASK)
1117 return RESPST_COMPLETE;
1118 else if (qp_type(qp) == IB_QPT_RC)
1119 return RESPST_ACKNOWLEDGE;
1120 else
1121 return RESPST_CLEANUP;
1122}
1123
1124static enum resp_states do_complete(struct rxe_qp *qp,
1125 struct rxe_pkt_info *pkt)
1126{
1127 struct rxe_cqe cqe;
1128 struct ib_wc *wc = &cqe.ibwc;
1129 struct ib_uverbs_wc *uwc = &cqe.uibwc;
1130 struct rxe_recv_wqe *wqe = qp->resp.wqe;
1131 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
1132
1133 if (!wqe)
1134 goto finish;
1135
1136 memset(&cqe, 0, sizeof(cqe));
1137
1138 if (qp->rcq->is_user) {
1139 uwc->status = qp->resp.status;
1140 uwc->qp_num = qp->ibqp.qp_num;
1141 uwc->wr_id = wqe->wr_id;
1142 } else {
1143 wc->status = qp->resp.status;
1144 wc->qp = &qp->ibqp;
1145 wc->wr_id = wqe->wr_id;
1146 }
1147
1148 if (wc->status == IB_WC_SUCCESS) {
1149 rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV);
1150 wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
1151 pkt->mask & RXE_WRITE_MASK) ?
1152 IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
1153 wc->byte_len = (pkt->mask & RXE_IMMDT_MASK &&
1154 pkt->mask & RXE_WRITE_MASK) ?
1155 qp->resp.length : wqe->dma.length - wqe->dma.resid;
1156
1157 /* fields after byte_len are different between kernel and user
1158 * space
1159 */
1160 if (qp->rcq->is_user) {
1161 uwc->wc_flags = IB_WC_GRH;
1162
1163 if (pkt->mask & RXE_IMMDT_MASK) {
1164 uwc->wc_flags |= IB_WC_WITH_IMM;
1165 uwc->ex.imm_data = immdt_imm(pkt);
1166 }
1167
1168 if (pkt->mask & RXE_IETH_MASK) {
1169 uwc->wc_flags |= IB_WC_WITH_INVALIDATE;
1170 uwc->ex.invalidate_rkey = ieth_rkey(pkt);
1171 }
1172
1173 if (pkt->mask & RXE_DETH_MASK)
1174 uwc->src_qp = deth_sqp(pkt);
1175
1176 uwc->port_num = qp->attr.port_num;
1177 } else {
1178 struct sk_buff *skb = PKT_TO_SKB(pkt);
1179
1180 wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE;
1181 if (skb->protocol == htons(ETH_P_IP))
1182 wc->network_hdr_type = RDMA_NETWORK_IPV4;
1183 else
1184 wc->network_hdr_type = RDMA_NETWORK_IPV6;
1185
1186 if (is_vlan_dev(skb->dev)) {
1187 wc->wc_flags |= IB_WC_WITH_VLAN;
1188 wc->vlan_id = vlan_dev_vlan_id(skb->dev);
1189 }
1190
1191 if (pkt->mask & RXE_IMMDT_MASK) {
1192 wc->wc_flags |= IB_WC_WITH_IMM;
1193 wc->ex.imm_data = immdt_imm(pkt);
1194 }
1195
1196 if (pkt->mask & RXE_IETH_MASK) {
1197 wc->wc_flags |= IB_WC_WITH_INVALIDATE;
1198 wc->ex.invalidate_rkey = ieth_rkey(pkt);
1199 }
1200
1201 if (pkt->mask & RXE_DETH_MASK)
1202 wc->src_qp = deth_sqp(pkt);
1203
1204 wc->port_num = qp->attr.port_num;
1205 }
1206 }
1207
1208 /* have copy for srq and reference for !srq */
1209 if (!qp->srq)
1210 queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT);
1211
1212 qp->resp.wqe = NULL;
1213
1214 if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1))
1215 return RESPST_ERR_CQ_OVERFLOW;
1216
1217finish:
1218 if (unlikely(qp->resp.state == QP_STATE_ERROR))
1219 return RESPST_CHK_RESOURCE;
1220 if (unlikely(!pkt))
1221 return RESPST_DONE;
1222 if (qp_type(qp) == IB_QPT_RC)
1223 return RESPST_ACKNOWLEDGE;
1224 else
1225 return RESPST_CLEANUP;
1226}
1227
1228
1229static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
1230 int opcode, const char *msg)
1231{
1232 int err;
1233 struct rxe_pkt_info ack_pkt;
1234 struct sk_buff *skb;
1235
1236 skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome);
1237 if (!skb)
1238 return -ENOMEM;
1239
1240 err = rxe_xmit_packet(qp, &ack_pkt, skb);
1241 if (err)
1242 rxe_dbg_qp(qp, "Failed sending %s\n", msg);
1243
1244 return err;
1245}
1246
1247static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1248{
1249 return send_common_ack(qp, syndrome, psn,
1250 IB_OPCODE_RC_ACKNOWLEDGE, "ACK");
1251}
1252
1253static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1254{
1255 int ret = send_common_ack(qp, syndrome, psn,
1256 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK");
1257
1258 /* have to clear this since it is used to trigger
1259 * long read replies
1260 */
1261 qp->resp.res = NULL;
1262 return ret;
1263}
1264
1265static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1266{
1267 int ret = send_common_ack(qp, syndrome, psn,
1268 IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY,
1269 "RDMA READ response of length zero ACK");
1270
1271 /* have to clear this since it is used to trigger
1272 * long read replies
1273 */
1274 qp->resp.res = NULL;
1275 return ret;
1276}
1277
1278static enum resp_states acknowledge(struct rxe_qp *qp,
1279 struct rxe_pkt_info *pkt)
1280{
1281 if (qp_type(qp) != IB_QPT_RC)
1282 return RESPST_CLEANUP;
1283
1284 if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)
1285 send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
1286 else if (pkt->mask & RXE_ATOMIC_MASK)
1287 send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1288 else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK))
1289 send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1290 else if (bth_ack(pkt))
1291 send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1292
1293 return RESPST_CLEANUP;
1294}
1295
1296static enum resp_states cleanup(struct rxe_qp *qp,
1297 struct rxe_pkt_info *pkt)
1298{
1299 struct sk_buff *skb;
1300
1301 if (pkt) {
1302 skb = skb_dequeue(&qp->req_pkts);
1303 rxe_put(qp);
1304 kfree_skb(skb);
1305 ib_device_put(qp->ibqp.device);
1306 }
1307
1308 if (qp->resp.mr) {
1309 rxe_put(qp->resp.mr);
1310 qp->resp.mr = NULL;
1311 }
1312
1313 return RESPST_DONE;
1314}
1315
1316static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn)
1317{
1318 int i;
1319
1320 for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
1321 struct resp_res *res = &qp->resp.resources[i];
1322
1323 if (res->type == 0)
1324 continue;
1325
1326 if (psn_compare(psn, res->first_psn) >= 0 &&
1327 psn_compare(psn, res->last_psn) <= 0) {
1328 return res;
1329 }
1330 }
1331
1332 return NULL;
1333}
1334
1335static enum resp_states duplicate_request(struct rxe_qp *qp,
1336 struct rxe_pkt_info *pkt)
1337{
1338 enum resp_states rc;
1339 u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK;
1340
1341 if (pkt->mask & RXE_SEND_MASK ||
1342 pkt->mask & RXE_WRITE_MASK) {
1343 /* SEND. Ack again and cleanup. C9-105. */
1344 send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
1345 return RESPST_CLEANUP;
1346 } else if (pkt->mask & RXE_FLUSH_MASK) {
1347 struct resp_res *res;
1348
1349 /* Find the operation in our list of responder resources. */
1350 res = find_resource(qp, pkt->psn);
1351 if (res) {
1352 res->replay = 1;
1353 res->cur_psn = pkt->psn;
1354 qp->resp.res = res;
1355 rc = RESPST_PROCESS_FLUSH;
1356 goto out;
1357 }
1358
1359 /* Resource not found. Class D error. Drop the request. */
1360 rc = RESPST_CLEANUP;
1361 goto out;
1362 } else if (pkt->mask & RXE_READ_MASK) {
1363 struct resp_res *res;
1364
1365 res = find_resource(qp, pkt->psn);
1366 if (!res) {
1367 /* Resource not found. Class D error. Drop the
1368 * request.
1369 */
1370 rc = RESPST_CLEANUP;
1371 goto out;
1372 } else {
1373 /* Ensure this new request is the same as the previous
1374 * one or a subset of it.
1375 */
1376 u64 iova = reth_va(pkt);
1377 u32 resid = reth_len(pkt);
1378
1379 if (iova < res->read.va_org ||
1380 resid > res->read.length ||
1381 (iova + resid) > (res->read.va_org +
1382 res->read.length)) {
1383 rc = RESPST_CLEANUP;
1384 goto out;
1385 }
1386
1387 if (reth_rkey(pkt) != res->read.rkey) {
1388 rc = RESPST_CLEANUP;
1389 goto out;
1390 }
1391
1392 res->cur_psn = pkt->psn;
1393 res->state = (pkt->psn == res->first_psn) ?
1394 rdatm_res_state_new :
1395 rdatm_res_state_replay;
1396 res->replay = 1;
1397
1398 /* Reset the resource, except length. */
1399 res->read.va_org = iova;
1400 res->read.va = iova;
1401 res->read.resid = resid;
1402
1403 /* Replay the RDMA read reply. */
1404 qp->resp.res = res;
1405 rc = RESPST_READ_REPLY;
1406 goto out;
1407 }
1408 } else {
1409 struct resp_res *res;
1410
1411 /* Find the operation in our list of responder resources. */
1412 res = find_resource(qp, pkt->psn);
1413 if (res) {
1414 res->replay = 1;
1415 res->cur_psn = pkt->psn;
1416 qp->resp.res = res;
1417 rc = pkt->mask & RXE_ATOMIC_MASK ?
1418 RESPST_ATOMIC_REPLY :
1419 RESPST_ATOMIC_WRITE_REPLY;
1420 goto out;
1421 }
1422
1423 /* Resource not found. Class D error. Drop the request. */
1424 rc = RESPST_CLEANUP;
1425 goto out;
1426 }
1427out:
1428 return rc;
1429}
1430
1431/* Process a class A or C. Both are treated the same in this implementation. */
1432static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome,
1433 enum ib_wc_status status)
1434{
1435 qp->resp.aeth_syndrome = syndrome;
1436 qp->resp.status = status;
1437
1438 /* indicate that we should go through the ERROR state */
1439 qp->resp.goto_error = 1;
1440}
1441
1442static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
1443{
1444 /* UC */
1445 if (qp->srq) {
1446 /* Class E */
1447 qp->resp.drop_msg = 1;
1448 if (qp->resp.wqe) {
1449 qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1450 return RESPST_COMPLETE;
1451 } else {
1452 return RESPST_CLEANUP;
1453 }
1454 } else {
1455 /* Class D1. This packet may be the start of a
1456 * new message and could be valid. The previous
1457 * message is invalid and ignored. reset the
1458 * recv wr to its original state
1459 */
1460 if (qp->resp.wqe) {
1461 qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length;
1462 qp->resp.wqe->dma.cur_sge = 0;
1463 qp->resp.wqe->dma.sge_offset = 0;
1464 qp->resp.opcode = -1;
1465 }
1466
1467 if (qp->resp.mr) {
1468 rxe_put(qp->resp.mr);
1469 qp->resp.mr = NULL;
1470 }
1471
1472 return RESPST_CLEANUP;
1473 }
1474}
1475
1476static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
1477{
1478 struct sk_buff *skb;
1479 struct rxe_queue *q = qp->rq.queue;
1480
1481 while ((skb = skb_dequeue(&qp->req_pkts))) {
1482 rxe_put(qp);
1483 kfree_skb(skb);
1484 ib_device_put(qp->ibqp.device);
1485 }
1486
1487 if (notify)
1488 return;
1489
1490 while (!qp->srq && q && queue_head(q, q->type))
1491 queue_advance_consumer(q, q->type);
1492}
1493
1494int rxe_responder(void *arg)
1495{
1496 struct rxe_qp *qp = (struct rxe_qp *)arg;
1497 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
1498 enum resp_states state;
1499 struct rxe_pkt_info *pkt = NULL;
1500 int ret;
1501
1502 if (!rxe_get(qp))
1503 return -EAGAIN;
1504
1505 qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
1506
1507 if (!qp->valid)
1508 goto exit;
1509
1510 switch (qp->resp.state) {
1511 case QP_STATE_RESET:
1512 state = RESPST_RESET;
1513 break;
1514
1515 default:
1516 state = RESPST_GET_REQ;
1517 break;
1518 }
1519
1520 while (1) {
1521 rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]);
1522 switch (state) {
1523 case RESPST_GET_REQ:
1524 state = get_req(qp, &pkt);
1525 break;
1526 case RESPST_CHK_PSN:
1527 state = check_psn(qp, pkt);
1528 break;
1529 case RESPST_CHK_OP_SEQ:
1530 state = check_op_seq(qp, pkt);
1531 break;
1532 case RESPST_CHK_OP_VALID:
1533 state = check_op_valid(qp, pkt);
1534 break;
1535 case RESPST_CHK_RESOURCE:
1536 state = check_resource(qp, pkt);
1537 break;
1538 case RESPST_CHK_LENGTH:
1539 state = rxe_resp_check_length(qp, pkt);
1540 break;
1541 case RESPST_CHK_RKEY:
1542 state = check_rkey(qp, pkt);
1543 break;
1544 case RESPST_EXECUTE:
1545 state = execute(qp, pkt);
1546 break;
1547 case RESPST_COMPLETE:
1548 state = do_complete(qp, pkt);
1549 break;
1550 case RESPST_READ_REPLY:
1551 state = read_reply(qp, pkt);
1552 break;
1553 case RESPST_ATOMIC_REPLY:
1554 state = atomic_reply(qp, pkt);
1555 break;
1556 case RESPST_ATOMIC_WRITE_REPLY:
1557 state = atomic_write_reply(qp, pkt);
1558 break;
1559 case RESPST_PROCESS_FLUSH:
1560 state = process_flush(qp, pkt);
1561 break;
1562 case RESPST_ACKNOWLEDGE:
1563 state = acknowledge(qp, pkt);
1564 break;
1565 case RESPST_CLEANUP:
1566 state = cleanup(qp, pkt);
1567 break;
1568 case RESPST_DUPLICATE_REQUEST:
1569 state = duplicate_request(qp, pkt);
1570 break;
1571 case RESPST_ERR_PSN_OUT_OF_SEQ:
1572 /* RC only - Class B. Drop packet. */
1573 send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
1574 state = RESPST_CLEANUP;
1575 break;
1576
1577 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ:
1578 case RESPST_ERR_MISSING_OPCODE_FIRST:
1579 case RESPST_ERR_MISSING_OPCODE_LAST_C:
1580 case RESPST_ERR_UNSUPPORTED_OPCODE:
1581 case RESPST_ERR_MISALIGNED_ATOMIC:
1582 /* RC Only - Class C. */
1583 do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1584 IB_WC_REM_INV_REQ_ERR);
1585 state = RESPST_COMPLETE;
1586 break;
1587
1588 case RESPST_ERR_MISSING_OPCODE_LAST_D1E:
1589 state = do_class_d1e_error(qp);
1590 break;
1591 case RESPST_ERR_RNR:
1592 if (qp_type(qp) == IB_QPT_RC) {
1593 rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
1594 /* RC - class B */
1595 send_ack(qp, AETH_RNR_NAK |
1596 (~AETH_TYPE_MASK &
1597 qp->attr.min_rnr_timer),
1598 pkt->psn);
1599 } else {
1600 /* UD/UC - class D */
1601 qp->resp.drop_msg = 1;
1602 }
1603 state = RESPST_CLEANUP;
1604 break;
1605
1606 case RESPST_ERR_RKEY_VIOLATION:
1607 if (qp_type(qp) == IB_QPT_RC) {
1608 /* Class C */
1609 do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR,
1610 IB_WC_REM_ACCESS_ERR);
1611 state = RESPST_COMPLETE;
1612 } else {
1613 qp->resp.drop_msg = 1;
1614 if (qp->srq) {
1615 /* UC/SRQ Class D */
1616 qp->resp.status = IB_WC_REM_ACCESS_ERR;
1617 state = RESPST_COMPLETE;
1618 } else {
1619 /* UC/non-SRQ Class E. */
1620 state = RESPST_CLEANUP;
1621 }
1622 }
1623 break;
1624
1625 case RESPST_ERR_INVALIDATE_RKEY:
1626 /* RC - Class J. */
1627 qp->resp.goto_error = 1;
1628 qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1629 state = RESPST_COMPLETE;
1630 break;
1631
1632 case RESPST_ERR_LENGTH:
1633 if (qp_type(qp) == IB_QPT_RC) {
1634 /* Class C */
1635 do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1636 IB_WC_REM_INV_REQ_ERR);
1637 state = RESPST_COMPLETE;
1638 } else if (qp->srq) {
1639 /* UC/UD - class E */
1640 qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1641 state = RESPST_COMPLETE;
1642 } else {
1643 /* UC/UD - class D */
1644 qp->resp.drop_msg = 1;
1645 state = RESPST_CLEANUP;
1646 }
1647 break;
1648
1649 case RESPST_ERR_MALFORMED_WQE:
1650 /* All, Class A. */
1651 do_class_ac_error(qp, AETH_NAK_REM_OP_ERR,
1652 IB_WC_LOC_QP_OP_ERR);
1653 state = RESPST_COMPLETE;
1654 break;
1655
1656 case RESPST_ERR_CQ_OVERFLOW:
1657 /* All - Class G */
1658 state = RESPST_ERROR;
1659 break;
1660
1661 case RESPST_DONE:
1662 if (qp->resp.goto_error) {
1663 state = RESPST_ERROR;
1664 break;
1665 }
1666
1667 goto done;
1668
1669 case RESPST_EXIT:
1670 if (qp->resp.goto_error) {
1671 state = RESPST_ERROR;
1672 break;
1673 }
1674
1675 goto exit;
1676
1677 case RESPST_RESET:
1678 rxe_drain_req_pkts(qp, false);
1679 qp->resp.wqe = NULL;
1680 goto exit;
1681
1682 case RESPST_ERROR:
1683 qp->resp.goto_error = 0;
1684 rxe_dbg_qp(qp, "moved to error state\n");
1685 rxe_qp_error(qp);
1686 goto exit;
1687
1688 default:
1689 WARN_ON_ONCE(1);
1690 }
1691 }
1692
1693 /* A non-zero return value will cause rxe_do_task to
1694 * exit its loop and end the tasklet. A zero return
1695 * will continue looping and return to rxe_responder
1696 */
1697done:
1698 ret = 0;
1699 goto out;
1700exit:
1701 ret = -EAGAIN;
1702out:
1703 rxe_put(qp);
1704 return ret;
1705}