Linux Audio

Check our new training course

Loading...
v6.8
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
   4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
   5 */
   6
   7#include <linux/skbuff.h>
   8
   9#include "rxe.h"
  10#include "rxe_loc.h"
  11#include "rxe_queue.h"
  12
  13static char *resp_state_name[] = {
  14	[RESPST_NONE]				= "NONE",
  15	[RESPST_GET_REQ]			= "GET_REQ",
  16	[RESPST_CHK_PSN]			= "CHK_PSN",
  17	[RESPST_CHK_OP_SEQ]			= "CHK_OP_SEQ",
  18	[RESPST_CHK_OP_VALID]			= "CHK_OP_VALID",
  19	[RESPST_CHK_RESOURCE]			= "CHK_RESOURCE",
  20	[RESPST_CHK_LENGTH]			= "CHK_LENGTH",
  21	[RESPST_CHK_RKEY]			= "CHK_RKEY",
  22	[RESPST_EXECUTE]			= "EXECUTE",
  23	[RESPST_READ_REPLY]			= "READ_REPLY",
  24	[RESPST_ATOMIC_REPLY]			= "ATOMIC_REPLY",
  25	[RESPST_ATOMIC_WRITE_REPLY]		= "ATOMIC_WRITE_REPLY",
  26	[RESPST_PROCESS_FLUSH]			= "PROCESS_FLUSH",
  27	[RESPST_COMPLETE]			= "COMPLETE",
  28	[RESPST_ACKNOWLEDGE]			= "ACKNOWLEDGE",
  29	[RESPST_CLEANUP]			= "CLEANUP",
  30	[RESPST_DUPLICATE_REQUEST]		= "DUPLICATE_REQUEST",
  31	[RESPST_ERR_MALFORMED_WQE]		= "ERR_MALFORMED_WQE",
  32	[RESPST_ERR_UNSUPPORTED_OPCODE]		= "ERR_UNSUPPORTED_OPCODE",
  33	[RESPST_ERR_MISALIGNED_ATOMIC]		= "ERR_MISALIGNED_ATOMIC",
  34	[RESPST_ERR_PSN_OUT_OF_SEQ]		= "ERR_PSN_OUT_OF_SEQ",
  35	[RESPST_ERR_MISSING_OPCODE_FIRST]	= "ERR_MISSING_OPCODE_FIRST",
  36	[RESPST_ERR_MISSING_OPCODE_LAST_C]	= "ERR_MISSING_OPCODE_LAST_C",
  37	[RESPST_ERR_MISSING_OPCODE_LAST_D1E]	= "ERR_MISSING_OPCODE_LAST_D1E",
  38	[RESPST_ERR_TOO_MANY_RDMA_ATM_REQ]	= "ERR_TOO_MANY_RDMA_ATM_REQ",
  39	[RESPST_ERR_RNR]			= "ERR_RNR",
  40	[RESPST_ERR_RKEY_VIOLATION]		= "ERR_RKEY_VIOLATION",
  41	[RESPST_ERR_INVALIDATE_RKEY]		= "ERR_INVALIDATE_RKEY_VIOLATION",
  42	[RESPST_ERR_LENGTH]			= "ERR_LENGTH",
  43	[RESPST_ERR_CQ_OVERFLOW]		= "ERR_CQ_OVERFLOW",
  44	[RESPST_ERROR]				= "ERROR",
  45	[RESPST_DONE]				= "DONE",
  46	[RESPST_EXIT]				= "EXIT",
  47};
  48
  49/* rxe_recv calls here to add a request packet to the input queue */
  50void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
  51{
  52	int must_sched;
  53	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
  54
  55	skb_queue_tail(&qp->req_pkts, skb);
  56
  57	must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
  58			(skb_queue_len(&qp->req_pkts) > 1);
  59
  60	if (must_sched)
  61		rxe_sched_task(&qp->resp.task);
  62	else
  63		rxe_run_task(&qp->resp.task);
  64}
  65
  66static inline enum resp_states get_req(struct rxe_qp *qp,
  67				       struct rxe_pkt_info **pkt_p)
  68{
  69	struct sk_buff *skb;
  70
  71	skb = skb_peek(&qp->req_pkts);
  72	if (!skb)
  73		return RESPST_EXIT;
  74
  75	*pkt_p = SKB_TO_PKT(skb);
  76
  77	return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN;
  78}
  79
  80static enum resp_states check_psn(struct rxe_qp *qp,
  81				  struct rxe_pkt_info *pkt)
  82{
  83	int diff = psn_compare(pkt->psn, qp->resp.psn);
  84	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
  85
  86	switch (qp_type(qp)) {
  87	case IB_QPT_RC:
  88		if (diff > 0) {
  89			if (qp->resp.sent_psn_nak)
  90				return RESPST_CLEANUP;
  91
  92			qp->resp.sent_psn_nak = 1;
  93			rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ);
  94			return RESPST_ERR_PSN_OUT_OF_SEQ;
  95
  96		} else if (diff < 0) {
  97			rxe_counter_inc(rxe, RXE_CNT_DUP_REQ);
  98			return RESPST_DUPLICATE_REQUEST;
  99		}
 100
 101		if (qp->resp.sent_psn_nak)
 102			qp->resp.sent_psn_nak = 0;
 103
 104		break;
 105
 106	case IB_QPT_UC:
 107		if (qp->resp.drop_msg || diff != 0) {
 108			if (pkt->mask & RXE_START_MASK) {
 109				qp->resp.drop_msg = 0;
 110				return RESPST_CHK_OP_SEQ;
 111			}
 112
 113			qp->resp.drop_msg = 1;
 114			return RESPST_CLEANUP;
 115		}
 116		break;
 117	default:
 118		break;
 119	}
 120
 121	return RESPST_CHK_OP_SEQ;
 122}
 123
 124static enum resp_states check_op_seq(struct rxe_qp *qp,
 125				     struct rxe_pkt_info *pkt)
 126{
 127	switch (qp_type(qp)) {
 128	case IB_QPT_RC:
 129		switch (qp->resp.opcode) {
 130		case IB_OPCODE_RC_SEND_FIRST:
 131		case IB_OPCODE_RC_SEND_MIDDLE:
 132			switch (pkt->opcode) {
 133			case IB_OPCODE_RC_SEND_MIDDLE:
 134			case IB_OPCODE_RC_SEND_LAST:
 135			case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
 136			case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
 137				return RESPST_CHK_OP_VALID;
 138			default:
 139				return RESPST_ERR_MISSING_OPCODE_LAST_C;
 140			}
 141
 142		case IB_OPCODE_RC_RDMA_WRITE_FIRST:
 143		case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
 144			switch (pkt->opcode) {
 145			case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
 146			case IB_OPCODE_RC_RDMA_WRITE_LAST:
 147			case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
 148				return RESPST_CHK_OP_VALID;
 149			default:
 150				return RESPST_ERR_MISSING_OPCODE_LAST_C;
 151			}
 152
 153		default:
 154			switch (pkt->opcode) {
 155			case IB_OPCODE_RC_SEND_MIDDLE:
 156			case IB_OPCODE_RC_SEND_LAST:
 157			case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
 158			case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
 159			case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
 160			case IB_OPCODE_RC_RDMA_WRITE_LAST:
 161			case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
 162				return RESPST_ERR_MISSING_OPCODE_FIRST;
 163			default:
 164				return RESPST_CHK_OP_VALID;
 165			}
 166		}
 167		break;
 168
 169	case IB_QPT_UC:
 170		switch (qp->resp.opcode) {
 171		case IB_OPCODE_UC_SEND_FIRST:
 172		case IB_OPCODE_UC_SEND_MIDDLE:
 173			switch (pkt->opcode) {
 174			case IB_OPCODE_UC_SEND_MIDDLE:
 175			case IB_OPCODE_UC_SEND_LAST:
 176			case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
 177				return RESPST_CHK_OP_VALID;
 178			default:
 179				return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
 180			}
 181
 182		case IB_OPCODE_UC_RDMA_WRITE_FIRST:
 183		case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
 184			switch (pkt->opcode) {
 185			case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
 186			case IB_OPCODE_UC_RDMA_WRITE_LAST:
 187			case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
 188				return RESPST_CHK_OP_VALID;
 189			default:
 190				return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
 191			}
 192
 193		default:
 194			switch (pkt->opcode) {
 195			case IB_OPCODE_UC_SEND_MIDDLE:
 196			case IB_OPCODE_UC_SEND_LAST:
 197			case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
 198			case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
 199			case IB_OPCODE_UC_RDMA_WRITE_LAST:
 200			case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
 201				qp->resp.drop_msg = 1;
 202				return RESPST_CLEANUP;
 203			default:
 204				return RESPST_CHK_OP_VALID;
 205			}
 206		}
 207		break;
 208
 209	default:
 210		return RESPST_CHK_OP_VALID;
 211	}
 212}
 213
 214static bool check_qp_attr_access(struct rxe_qp *qp,
 215				 struct rxe_pkt_info *pkt)
 216{
 217	if (((pkt->mask & RXE_READ_MASK) &&
 218	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
 219	    ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
 220	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
 221	    ((pkt->mask & RXE_ATOMIC_MASK) &&
 222	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 223		return false;
 224
 225	if (pkt->mask & RXE_FLUSH_MASK) {
 226		u32 flush_type = feth_plt(pkt);
 227
 228		if ((flush_type & IB_FLUSH_GLOBAL &&
 229		     !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) ||
 230		    (flush_type & IB_FLUSH_PERSISTENT &&
 231		     !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT)))
 232			return false;
 233	}
 234
 235	return true;
 236}
 237
 238static enum resp_states check_op_valid(struct rxe_qp *qp,
 239				       struct rxe_pkt_info *pkt)
 240{
 241	switch (qp_type(qp)) {
 242	case IB_QPT_RC:
 243		if (!check_qp_attr_access(qp, pkt))
 244			return RESPST_ERR_UNSUPPORTED_OPCODE;
 245
 246		break;
 247
 248	case IB_QPT_UC:
 249		if ((pkt->mask & RXE_WRITE_MASK) &&
 250		    !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) {
 251			qp->resp.drop_msg = 1;
 252			return RESPST_CLEANUP;
 253		}
 254
 255		break;
 256
 257	case IB_QPT_UD:
 258	case IB_QPT_GSI:
 259		break;
 260
 261	default:
 262		WARN_ON_ONCE(1);
 263		break;
 264	}
 265
 266	return RESPST_CHK_RESOURCE;
 267}
 268
 269static enum resp_states get_srq_wqe(struct rxe_qp *qp)
 270{
 271	struct rxe_srq *srq = qp->srq;
 272	struct rxe_queue *q = srq->rq.queue;
 273	struct rxe_recv_wqe *wqe;
 274	struct ib_event ev;
 275	unsigned int count;
 276	size_t size;
 277	unsigned long flags;
 278
 279	if (srq->error)
 280		return RESPST_ERR_RNR;
 281
 282	spin_lock_irqsave(&srq->rq.consumer_lock, flags);
 283
 284	wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
 285	if (!wqe) {
 286		spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
 287		return RESPST_ERR_RNR;
 288	}
 289
 290	/* don't trust user space data */
 291	if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
 292		spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
 293		rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n");
 294		return RESPST_ERR_MALFORMED_WQE;
 295	}
 296	size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
 297	memcpy(&qp->resp.srq_wqe, wqe, size);
 298
 299	qp->resp.wqe = &qp->resp.srq_wqe.wqe;
 300	queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT);
 301	count = queue_count(q, QUEUE_TYPE_FROM_CLIENT);
 302
 303	if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) {
 304		srq->limit = 0;
 305		goto event;
 306	}
 307
 308	spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
 309	return RESPST_CHK_LENGTH;
 310
 311event:
 312	spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
 313	ev.device = qp->ibqp.device;
 314	ev.element.srq = qp->ibqp.srq;
 315	ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
 316	srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context);
 317	return RESPST_CHK_LENGTH;
 318}
 319
 320static enum resp_states check_resource(struct rxe_qp *qp,
 321				       struct rxe_pkt_info *pkt)
 322{
 323	struct rxe_srq *srq = qp->srq;
 324
 325	if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
 326		/* it is the requesters job to not send
 327		 * too many read/atomic ops, we just
 328		 * recycle the responder resource queue
 329		 */
 330		if (likely(qp->attr.max_dest_rd_atomic > 0))
 331			return RESPST_CHK_LENGTH;
 332		else
 333			return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ;
 334	}
 335
 336	if (pkt->mask & RXE_RWR_MASK) {
 337		if (srq)
 338			return get_srq_wqe(qp);
 339
 340		qp->resp.wqe = queue_head(qp->rq.queue,
 341				QUEUE_TYPE_FROM_CLIENT);
 342		return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;
 343	}
 344
 345	return RESPST_CHK_LENGTH;
 346}
 347
 348static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
 349					      struct rxe_pkt_info *pkt)
 350{
 351	/*
 352	 * See IBA C9-92
 353	 * For UD QPs we only check if the packet will fit in the
 354	 * receive buffer later. For rmda operations additional
 355	 * length checks are performed in check_rkey.
 356	 */
 
 
 
 
 
 
 
 
 
 
 
 
 
 357	if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
 358					     (qp_type(qp) == IB_QPT_UC))) {
 359		unsigned int mtu = qp->mtu;
 360		unsigned int payload = payload_size(pkt);
 361
 362		if ((pkt->mask & RXE_START_MASK) &&
 363		    (pkt->mask & RXE_END_MASK)) {
 364			if (unlikely(payload > mtu)) {
 365				rxe_dbg_qp(qp, "only packet too long");
 366				return RESPST_ERR_LENGTH;
 367			}
 368		} else if ((pkt->mask & RXE_START_MASK) ||
 369			   (pkt->mask & RXE_MIDDLE_MASK)) {
 370			if (unlikely(payload != mtu)) {
 371				rxe_dbg_qp(qp, "first or middle packet not mtu");
 372				return RESPST_ERR_LENGTH;
 373			}
 374		} else if (pkt->mask & RXE_END_MASK) {
 375			if (unlikely((payload == 0) || (payload > mtu))) {
 376				rxe_dbg_qp(qp, "last packet zero or too long");
 377				return RESPST_ERR_LENGTH;
 378			}
 379		}
 380	}
 381
 382	/* See IBA C9-94 */
 383	if (pkt->mask & RXE_RETH_MASK) {
 384		if (reth_len(pkt) > (1U << 31)) {
 385			rxe_dbg_qp(qp, "dma length too long");
 386			return RESPST_ERR_LENGTH;
 387		}
 388	}
 389
 390	if (pkt->mask & RXE_RDMA_OP_MASK)
 391		return RESPST_CHK_RKEY;
 392	else
 393		return RESPST_EXECUTE;
 394}
 395
 396/* if the reth length field is zero we can assume nothing
 397 * about the rkey value and should not validate or use it.
 398 * Instead set qp->resp.rkey to 0 which is an invalid rkey
 399 * value since the minimum index part is 1.
 400 */
 401static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 402{
 403	unsigned int length = reth_len(pkt);
 404
 405	qp->resp.va = reth_va(pkt);
 406	qp->resp.offset = 0;
 407	qp->resp.resid = length;
 408	qp->resp.length = length;
 409	if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0)
 410		qp->resp.rkey = 0;
 411	else
 412		qp->resp.rkey = reth_rkey(pkt);
 413}
 414
 415static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 416{
 417	qp->resp.va = atmeth_va(pkt);
 418	qp->resp.offset = 0;
 419	qp->resp.rkey = atmeth_rkey(pkt);
 420	qp->resp.resid = sizeof(u64);
 421}
 422
 423/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL
 424 * if an invalid rkey is received or the rdma length is zero. For middle
 425 * or last packets use the stored value of mr.
 426 */
 427static enum resp_states check_rkey(struct rxe_qp *qp,
 428				   struct rxe_pkt_info *pkt)
 429{
 430	struct rxe_mr *mr = NULL;
 431	struct rxe_mw *mw = NULL;
 432	u64 va;
 433	u32 rkey;
 434	u32 resid;
 435	u32 pktlen;
 436	int mtu = qp->mtu;
 437	enum resp_states state;
 438	int access = 0;
 439
 440	/* parse RETH or ATMETH header for first/only packets
 441	 * for va, length, rkey, etc. or use current value for
 442	 * middle/last packets.
 443	 */
 444	if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
 445		if (pkt->mask & RXE_RETH_MASK)
 446			qp_resp_from_reth(qp, pkt);
 447
 448		access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
 449						     : IB_ACCESS_REMOTE_WRITE;
 450	} else if (pkt->mask & RXE_FLUSH_MASK) {
 451		u32 flush_type = feth_plt(pkt);
 452
 453		if (pkt->mask & RXE_RETH_MASK)
 454			qp_resp_from_reth(qp, pkt);
 455
 456		if (flush_type & IB_FLUSH_GLOBAL)
 457			access |= IB_ACCESS_FLUSH_GLOBAL;
 458		if (flush_type & IB_FLUSH_PERSISTENT)
 459			access |= IB_ACCESS_FLUSH_PERSISTENT;
 460	} else if (pkt->mask & RXE_ATOMIC_MASK) {
 461		qp_resp_from_atmeth(qp, pkt);
 462		access = IB_ACCESS_REMOTE_ATOMIC;
 463	} else {
 464		/* shouldn't happen */
 465		WARN_ON(1);
 466	}
 467
 468	/* A zero-byte read or write op is not required to
 469	 * set an addr or rkey. See C9-88
 470	 */
 471	if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
 472	    (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) {
 473		qp->resp.mr = NULL;
 474		return RESPST_EXECUTE;
 475	}
 476
 477	va	= qp->resp.va;
 478	rkey	= qp->resp.rkey;
 479	resid	= qp->resp.resid;
 480	pktlen	= payload_size(pkt);
 481
 482	if (rkey_is_mw(rkey)) {
 483		mw = rxe_lookup_mw(qp, access, rkey);
 484		if (!mw) {
 485			rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey);
 486			state = RESPST_ERR_RKEY_VIOLATION;
 487			goto err;
 488		}
 489
 490		mr = mw->mr;
 491		if (!mr) {
 492			rxe_dbg_qp(qp, "MW doesn't have an MR\n");
 493			state = RESPST_ERR_RKEY_VIOLATION;
 494			goto err;
 495		}
 496
 497		if (mw->access & IB_ZERO_BASED)
 498			qp->resp.offset = mw->addr;
 499
 500		rxe_get(mr);
 501		rxe_put(mw);
 502		mw = NULL;
 503	} else {
 504		mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
 505		if (!mr) {
 506			rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey);
 507			state = RESPST_ERR_RKEY_VIOLATION;
 508			goto err;
 509		}
 510	}
 511
 512	if (pkt->mask & RXE_FLUSH_MASK) {
 513		/* FLUSH MR may not set va or resid
 514		 * no need to check range since we will flush whole mr
 515		 */
 516		if (feth_sel(pkt) == IB_FLUSH_MR)
 517			goto skip_check_range;
 518	}
 519
 520	if (mr_check_range(mr, va + qp->resp.offset, resid)) {
 521		state = RESPST_ERR_RKEY_VIOLATION;
 522		goto err;
 523	}
 524
 525skip_check_range:
 526	if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
 527		if (resid > mtu) {
 528			if (pktlen != mtu || bth_pad(pkt)) {
 529				state = RESPST_ERR_LENGTH;
 530				goto err;
 531			}
 532		} else {
 533			if (pktlen != resid) {
 534				state = RESPST_ERR_LENGTH;
 535				goto err;
 536			}
 537			if ((bth_pad(pkt) != (0x3 & (-resid)))) {
 538				/* This case may not be exactly that
 539				 * but nothing else fits.
 540				 */
 541				state = RESPST_ERR_LENGTH;
 542				goto err;
 543			}
 544		}
 545	}
 546
 547	WARN_ON_ONCE(qp->resp.mr);
 548
 549	qp->resp.mr = mr;
 550	return RESPST_EXECUTE;
 551
 552err:
 553	qp->resp.mr = NULL;
 554	if (mr)
 555		rxe_put(mr);
 556	if (mw)
 557		rxe_put(mw);
 558
 559	return state;
 560}
 561
 562static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
 563				     int data_len)
 564{
 565	int err;
 566
 567	err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
 568			data_addr, data_len, RXE_TO_MR_OBJ);
 569	if (unlikely(err))
 570		return (err == -ENOSPC) ? RESPST_ERR_LENGTH
 571					: RESPST_ERR_MALFORMED_WQE;
 572
 573	return RESPST_NONE;
 574}
 575
 576static enum resp_states write_data_in(struct rxe_qp *qp,
 577				      struct rxe_pkt_info *pkt)
 578{
 579	enum resp_states rc = RESPST_NONE;
 580	int	err;
 581	int data_len = payload_size(pkt);
 582
 583	err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
 584			  payload_addr(pkt), data_len, RXE_TO_MR_OBJ);
 585	if (err) {
 586		rc = RESPST_ERR_RKEY_VIOLATION;
 587		goto out;
 588	}
 589
 590	qp->resp.va += data_len;
 591	qp->resp.resid -= data_len;
 592
 593out:
 594	return rc;
 595}
 596
 597static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
 598					struct rxe_pkt_info *pkt,
 599					int type)
 600{
 601	struct resp_res *res;
 602	u32 pkts;
 603
 604	res = &qp->resp.resources[qp->resp.res_head];
 605	rxe_advance_resp_resource(qp);
 606	free_rd_atomic_resource(res);
 607
 608	res->type = type;
 609	res->replay = 0;
 610
 611	switch (type) {
 612	case RXE_READ_MASK:
 613		res->read.va = qp->resp.va + qp->resp.offset;
 614		res->read.va_org = qp->resp.va + qp->resp.offset;
 615		res->read.resid = qp->resp.resid;
 616		res->read.length = qp->resp.resid;
 617		res->read.rkey = qp->resp.rkey;
 618
 619		pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1);
 620		res->first_psn = pkt->psn;
 621		res->cur_psn = pkt->psn;
 622		res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK;
 623
 624		res->state = rdatm_res_state_new;
 625		break;
 626	case RXE_ATOMIC_MASK:
 627	case RXE_ATOMIC_WRITE_MASK:
 628		res->first_psn = pkt->psn;
 629		res->last_psn = pkt->psn;
 630		res->cur_psn = pkt->psn;
 631		break;
 632	case RXE_FLUSH_MASK:
 633		res->flush.va = qp->resp.va + qp->resp.offset;
 634		res->flush.length = qp->resp.length;
 635		res->flush.type = feth_plt(pkt);
 636		res->flush.level = feth_sel(pkt);
 637	}
 638
 639	return res;
 640}
 641
 642static enum resp_states process_flush(struct rxe_qp *qp,
 643				       struct rxe_pkt_info *pkt)
 644{
 645	u64 length, start;
 646	struct rxe_mr *mr = qp->resp.mr;
 647	struct resp_res *res = qp->resp.res;
 648
 649	/* oA19-14, oA19-15 */
 650	if (res && res->replay)
 651		return RESPST_ACKNOWLEDGE;
 652	else if (!res) {
 653		res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK);
 654		qp->resp.res = res;
 655	}
 656
 657	if (res->flush.level == IB_FLUSH_RANGE) {
 658		start = res->flush.va;
 659		length = res->flush.length;
 660	} else { /* level == IB_FLUSH_MR */
 661		start = mr->ibmr.iova;
 662		length = mr->ibmr.length;
 663	}
 664
 665	if (res->flush.type & IB_FLUSH_PERSISTENT) {
 666		if (rxe_flush_pmem_iova(mr, start, length))
 667			return RESPST_ERR_RKEY_VIOLATION;
 668		/* Make data persistent. */
 669		wmb();
 670	} else if (res->flush.type & IB_FLUSH_GLOBAL) {
 671		/* Make data global visibility. */
 672		wmb();
 673	}
 674
 675	qp->resp.msn++;
 676
 677	/* next expected psn, read handles this separately */
 678	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
 679	qp->resp.ack_psn = qp->resp.psn;
 680
 681	qp->resp.opcode = pkt->opcode;
 682	qp->resp.status = IB_WC_SUCCESS;
 683
 684	return RESPST_ACKNOWLEDGE;
 685}
 686
 687static enum resp_states atomic_reply(struct rxe_qp *qp,
 688				     struct rxe_pkt_info *pkt)
 689{
 690	struct rxe_mr *mr = qp->resp.mr;
 691	struct resp_res *res = qp->resp.res;
 692	int err;
 693
 694	if (!res) {
 695		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK);
 696		qp->resp.res = res;
 697	}
 698
 699	if (!res->replay) {
 700		u64 iova = qp->resp.va + qp->resp.offset;
 701
 702		err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
 703					  atmeth_comp(pkt),
 704					  atmeth_swap_add(pkt),
 705					  &res->atomic.orig_val);
 706		if (err)
 707			return err;
 708
 709		qp->resp.msn++;
 710
 711		/* next expected psn, read handles this separately */
 712		qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
 713		qp->resp.ack_psn = qp->resp.psn;
 714
 715		qp->resp.opcode = pkt->opcode;
 716		qp->resp.status = IB_WC_SUCCESS;
 717	}
 718
 719	return RESPST_ACKNOWLEDGE;
 720}
 721
 722static enum resp_states atomic_write_reply(struct rxe_qp *qp,
 723					   struct rxe_pkt_info *pkt)
 724{
 725	struct resp_res *res = qp->resp.res;
 726	struct rxe_mr *mr;
 727	u64 value;
 728	u64 iova;
 729	int err;
 730
 731	if (!res) {
 732		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
 733		qp->resp.res = res;
 734	}
 735
 736	if (res->replay)
 737		return RESPST_ACKNOWLEDGE;
 738
 739	mr = qp->resp.mr;
 740	value = *(u64 *)payload_addr(pkt);
 741	iova = qp->resp.va + qp->resp.offset;
 742
 743	err = rxe_mr_do_atomic_write(mr, iova, value);
 744	if (err)
 745		return err;
 746
 747	qp->resp.resid = 0;
 748	qp->resp.msn++;
 749
 750	/* next expected psn, read handles this separately */
 751	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
 752	qp->resp.ack_psn = qp->resp.psn;
 753
 754	qp->resp.opcode = pkt->opcode;
 755	qp->resp.status = IB_WC_SUCCESS;
 756
 757	return RESPST_ACKNOWLEDGE;
 758}
 759
 760static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 761					  struct rxe_pkt_info *ack,
 762					  int opcode,
 763					  int payload,
 764					  u32 psn,
 765					  u8 syndrome)
 766{
 767	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 768	struct sk_buff *skb;
 769	int paylen;
 770	int pad;
 771	int err;
 772
 773	/*
 774	 * allocate packet
 775	 */
 776	pad = (-payload) & 0x3;
 777	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
 778
 779	skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack);
 780	if (!skb)
 781		return NULL;
 782
 783	ack->qp = qp;
 784	ack->opcode = opcode;
 785	ack->mask = rxe_opcode[opcode].mask;
 786	ack->paylen = paylen;
 787	ack->psn = psn;
 788
 789	bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL,
 790		 qp->attr.dest_qp_num, 0, psn);
 791
 792	if (ack->mask & RXE_AETH_MASK) {
 793		aeth_set_syn(ack, syndrome);
 794		aeth_set_msn(ack, qp->resp.msn);
 795	}
 796
 797	if (ack->mask & RXE_ATMACK_MASK)
 798		atmack_set_orig(ack, qp->resp.res->atomic.orig_val);
 799
 800	err = rxe_prepare(&qp->pri_av, ack, skb);
 801	if (err) {
 802		kfree_skb(skb);
 803		return NULL;
 804	}
 805
 806	return skb;
 807}
 808
 809/**
 810 * rxe_recheck_mr - revalidate MR from rkey and get a reference
 811 * @qp: the qp
 812 * @rkey: the rkey
 813 *
 814 * This code allows the MR to be invalidated or deregistered or
 815 * the MW if one was used to be invalidated or deallocated.
 816 * It is assumed that the access permissions if originally good
 817 * are OK and the mappings to be unchanged.
 818 *
 819 * TODO: If someone reregisters an MR to change its size or
 820 * access permissions during the processing of an RDMA read
 821 * we should kill the responder resource and complete the
 822 * operation with an error.
 823 *
 824 * Return: mr on success else NULL
 825 */
 826static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
 827{
 828	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 829	struct rxe_mr *mr;
 830	struct rxe_mw *mw;
 831
 832	if (rkey_is_mw(rkey)) {
 833		mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
 834		if (!mw)
 835			return NULL;
 836
 837		mr = mw->mr;
 838		if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID ||
 839		    !mr || mr->state != RXE_MR_STATE_VALID) {
 840			rxe_put(mw);
 841			return NULL;
 842		}
 843
 844		rxe_get(mr);
 845		rxe_put(mw);
 846
 847		return mr;
 848	}
 849
 850	mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
 851	if (!mr)
 852		return NULL;
 853
 854	if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) {
 855		rxe_put(mr);
 856		return NULL;
 857	}
 858
 859	return mr;
 860}
 861
 862/* RDMA read response. If res is not NULL, then we have a current RDMA request
 863 * being processed or replayed.
 864 */
 865static enum resp_states read_reply(struct rxe_qp *qp,
 866				   struct rxe_pkt_info *req_pkt)
 867{
 868	struct rxe_pkt_info ack_pkt;
 869	struct sk_buff *skb;
 870	int mtu = qp->mtu;
 871	enum resp_states state;
 872	int payload;
 873	int opcode;
 874	int err;
 875	struct resp_res *res = qp->resp.res;
 876	struct rxe_mr *mr;
 877
 878	if (!res) {
 879		res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
 880		qp->resp.res = res;
 881	}
 882
 883	if (res->state == rdatm_res_state_new) {
 884		if (!res->replay || qp->resp.length == 0) {
 885			/* if length == 0 mr will be NULL (is ok)
 886			 * otherwise qp->resp.mr holds a ref on mr
 887			 * which we transfer to mr and drop below.
 888			 */
 889			mr = qp->resp.mr;
 890			qp->resp.mr = NULL;
 891		} else {
 892			mr = rxe_recheck_mr(qp, res->read.rkey);
 893			if (!mr)
 894				return RESPST_ERR_RKEY_VIOLATION;
 895		}
 896
 897		if (res->read.resid <= mtu)
 898			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
 899		else
 900			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
 901	} else {
 902		/* re-lookup mr from rkey on all later packets.
 903		 * length will be non-zero. This can fail if someone
 904		 * modifies or destroys the mr since the first packet.
 905		 */
 906		mr = rxe_recheck_mr(qp, res->read.rkey);
 907		if (!mr)
 908			return RESPST_ERR_RKEY_VIOLATION;
 909
 910		if (res->read.resid > mtu)
 911			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE;
 912		else
 913			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST;
 914	}
 915
 916	res->state = rdatm_res_state_next;
 917
 918	payload = min_t(int, res->read.resid, mtu);
 919
 920	skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
 921				 res->cur_psn, AETH_ACK_UNLIMITED);
 922	if (!skb) {
 923		state = RESPST_ERR_RNR;
 924		goto err_out;
 925	}
 926
 927	err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
 928			  payload, RXE_FROM_MR_OBJ);
 929	if (err) {
 930		kfree_skb(skb);
 931		state = RESPST_ERR_RKEY_VIOLATION;
 932		goto err_out;
 933	}
 934
 935	if (bth_pad(&ack_pkt)) {
 936		u8 *pad = payload_addr(&ack_pkt) + payload;
 937
 938		memset(pad, 0, bth_pad(&ack_pkt));
 939	}
 940
 941	/* rxe_xmit_packet always consumes the skb */
 942	err = rxe_xmit_packet(qp, &ack_pkt, skb);
 943	if (err) {
 944		state = RESPST_ERR_RNR;
 945		goto err_out;
 946	}
 947
 948	res->read.va += payload;
 949	res->read.resid -= payload;
 950	res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK;
 951
 952	if (res->read.resid > 0) {
 953		state = RESPST_DONE;
 954	} else {
 955		qp->resp.res = NULL;
 956		if (!res->replay)
 957			qp->resp.opcode = -1;
 958		if (psn_compare(res->cur_psn, qp->resp.psn) >= 0)
 959			qp->resp.psn = res->cur_psn;
 960		state = RESPST_CLEANUP;
 961	}
 962
 963err_out:
 964	if (mr)
 965		rxe_put(mr);
 966	return state;
 967}
 968
 969static int invalidate_rkey(struct rxe_qp *qp, u32 rkey)
 970{
 971	if (rkey_is_mw(rkey))
 972		return rxe_invalidate_mw(qp, rkey);
 973	else
 974		return rxe_invalidate_mr(qp, rkey);
 975}
 976
 977/* Executes a new request. A retried request never reach that function (send
 978 * and writes are discarded, and reads and atomics are retried elsewhere.
 979 */
 980static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 981{
 982	enum resp_states err;
 983	struct sk_buff *skb = PKT_TO_SKB(pkt);
 984	union rdma_network_hdr hdr;
 985
 986	if (pkt->mask & RXE_SEND_MASK) {
 987		if (qp_type(qp) == IB_QPT_UD ||
 988		    qp_type(qp) == IB_QPT_GSI) {
 989			if (skb->protocol == htons(ETH_P_IP)) {
 990				memset(&hdr.reserved, 0,
 991						sizeof(hdr.reserved));
 992				memcpy(&hdr.roce4grh, ip_hdr(skb),
 993						sizeof(hdr.roce4grh));
 994				err = send_data_in(qp, &hdr, sizeof(hdr));
 995			} else {
 996				err = send_data_in(qp, ipv6_hdr(skb),
 997						sizeof(hdr));
 998			}
 999			if (err)
1000				return err;
1001		}
1002		err = send_data_in(qp, payload_addr(pkt), payload_size(pkt));
1003		if (err)
1004			return err;
1005	} else if (pkt->mask & RXE_WRITE_MASK) {
1006		err = write_data_in(qp, pkt);
1007		if (err)
1008			return err;
1009	} else if (pkt->mask & RXE_READ_MASK) {
1010		/* For RDMA Read we can increment the msn now. See C9-148. */
1011		qp->resp.msn++;
1012		return RESPST_READ_REPLY;
1013	} else if (pkt->mask & RXE_ATOMIC_MASK) {
1014		return RESPST_ATOMIC_REPLY;
1015	} else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
1016		return RESPST_ATOMIC_WRITE_REPLY;
1017	} else if (pkt->mask & RXE_FLUSH_MASK) {
1018		return RESPST_PROCESS_FLUSH;
1019	} else {
1020		/* Unreachable */
1021		WARN_ON_ONCE(1);
1022	}
1023
1024	if (pkt->mask & RXE_IETH_MASK) {
1025		u32 rkey = ieth_rkey(pkt);
1026
1027		err = invalidate_rkey(qp, rkey);
1028		if (err)
1029			return RESPST_ERR_INVALIDATE_RKEY;
1030	}
1031
1032	if (pkt->mask & RXE_END_MASK)
1033		/* We successfully processed this new request. */
1034		qp->resp.msn++;
1035
1036	/* next expected psn, read handles this separately */
1037	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
1038	qp->resp.ack_psn = qp->resp.psn;
1039
1040	qp->resp.opcode = pkt->opcode;
1041	qp->resp.status = IB_WC_SUCCESS;
1042
1043	if (pkt->mask & RXE_COMP_MASK)
1044		return RESPST_COMPLETE;
1045	else if (qp_type(qp) == IB_QPT_RC)
1046		return RESPST_ACKNOWLEDGE;
1047	else
1048		return RESPST_CLEANUP;
1049}
1050
1051static enum resp_states do_complete(struct rxe_qp *qp,
1052				    struct rxe_pkt_info *pkt)
1053{
1054	struct rxe_cqe cqe;
1055	struct ib_wc *wc = &cqe.ibwc;
1056	struct ib_uverbs_wc *uwc = &cqe.uibwc;
1057	struct rxe_recv_wqe *wqe = qp->resp.wqe;
1058	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
1059	unsigned long flags;
1060
1061	if (!wqe)
1062		goto finish;
1063
1064	memset(&cqe, 0, sizeof(cqe));
1065
1066	if (qp->rcq->is_user) {
1067		uwc->status		= qp->resp.status;
1068		uwc->qp_num		= qp->ibqp.qp_num;
1069		uwc->wr_id		= wqe->wr_id;
1070	} else {
1071		wc->status		= qp->resp.status;
1072		wc->qp			= &qp->ibqp;
1073		wc->wr_id		= wqe->wr_id;
1074	}
1075
1076	if (wc->status == IB_WC_SUCCESS) {
1077		rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV);
1078		wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
1079				pkt->mask & RXE_WRITE_MASK) ?
1080					IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
1081		wc->byte_len = (pkt->mask & RXE_IMMDT_MASK &&
1082				pkt->mask & RXE_WRITE_MASK) ?
1083					qp->resp.length : wqe->dma.length - wqe->dma.resid;
1084
1085		/* fields after byte_len are different between kernel and user
1086		 * space
1087		 */
1088		if (qp->rcq->is_user) {
1089			uwc->wc_flags = IB_WC_GRH;
1090
1091			if (pkt->mask & RXE_IMMDT_MASK) {
1092				uwc->wc_flags |= IB_WC_WITH_IMM;
1093				uwc->ex.imm_data = immdt_imm(pkt);
1094			}
1095
1096			if (pkt->mask & RXE_IETH_MASK) {
1097				uwc->wc_flags |= IB_WC_WITH_INVALIDATE;
1098				uwc->ex.invalidate_rkey = ieth_rkey(pkt);
1099			}
1100
1101			if (pkt->mask & RXE_DETH_MASK)
1102				uwc->src_qp = deth_sqp(pkt);
1103
1104			uwc->port_num		= qp->attr.port_num;
1105		} else {
1106			struct sk_buff *skb = PKT_TO_SKB(pkt);
1107
1108			wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE;
1109			if (skb->protocol == htons(ETH_P_IP))
1110				wc->network_hdr_type = RDMA_NETWORK_IPV4;
1111			else
1112				wc->network_hdr_type = RDMA_NETWORK_IPV6;
1113
1114			if (is_vlan_dev(skb->dev)) {
1115				wc->wc_flags |= IB_WC_WITH_VLAN;
1116				wc->vlan_id = vlan_dev_vlan_id(skb->dev);
1117			}
1118
1119			if (pkt->mask & RXE_IMMDT_MASK) {
1120				wc->wc_flags |= IB_WC_WITH_IMM;
1121				wc->ex.imm_data = immdt_imm(pkt);
1122			}
1123
1124			if (pkt->mask & RXE_IETH_MASK) {
1125				wc->wc_flags |= IB_WC_WITH_INVALIDATE;
1126				wc->ex.invalidate_rkey = ieth_rkey(pkt);
1127			}
1128
1129			if (pkt->mask & RXE_DETH_MASK)
1130				wc->src_qp = deth_sqp(pkt);
1131
1132			wc->port_num		= qp->attr.port_num;
1133		}
1134	} else {
1135		if (wc->status != IB_WC_WR_FLUSH_ERR)
1136			rxe_err_qp(qp, "non-flush error status = %d",
1137				wc->status);
1138	}
1139
1140	/* have copy for srq and reference for !srq */
1141	if (!qp->srq)
1142		queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT);
1143
1144	qp->resp.wqe = NULL;
1145
1146	if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1))
1147		return RESPST_ERR_CQ_OVERFLOW;
1148
1149finish:
1150	spin_lock_irqsave(&qp->state_lock, flags);
1151	if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
1152		spin_unlock_irqrestore(&qp->state_lock, flags);
1153		return RESPST_CHK_RESOURCE;
1154	}
1155	spin_unlock_irqrestore(&qp->state_lock, flags);
1156
1157	if (unlikely(!pkt))
1158		return RESPST_DONE;
1159	if (qp_type(qp) == IB_QPT_RC)
1160		return RESPST_ACKNOWLEDGE;
1161	else
1162		return RESPST_CLEANUP;
1163}
1164
1165
1166static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
1167				  int opcode, const char *msg)
1168{
1169	int err;
1170	struct rxe_pkt_info ack_pkt;
1171	struct sk_buff *skb;
1172
1173	skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome);
1174	if (!skb)
1175		return -ENOMEM;
1176
1177	err = rxe_xmit_packet(qp, &ack_pkt, skb);
1178	if (err)
1179		rxe_dbg_qp(qp, "Failed sending %s\n", msg);
1180
1181	return err;
1182}
1183
1184static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1185{
1186	return send_common_ack(qp, syndrome, psn,
1187			IB_OPCODE_RC_ACKNOWLEDGE, "ACK");
1188}
1189
1190static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1191{
1192	int ret = send_common_ack(qp, syndrome, psn,
1193			IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK");
1194
1195	/* have to clear this since it is used to trigger
1196	 * long read replies
1197	 */
1198	qp->resp.res = NULL;
1199	return ret;
1200}
1201
1202static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1203{
1204	int ret = send_common_ack(qp, syndrome, psn,
1205			IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY,
1206			"RDMA READ response of length zero ACK");
1207
1208	/* have to clear this since it is used to trigger
1209	 * long read replies
1210	 */
1211	qp->resp.res = NULL;
1212	return ret;
1213}
1214
1215static enum resp_states acknowledge(struct rxe_qp *qp,
1216				    struct rxe_pkt_info *pkt)
1217{
1218	if (qp_type(qp) != IB_QPT_RC)
1219		return RESPST_CLEANUP;
1220
1221	if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)
1222		send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
1223	else if (pkt->mask & RXE_ATOMIC_MASK)
1224		send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1225	else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK))
1226		send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1227	else if (bth_ack(pkt))
1228		send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1229
1230	return RESPST_CLEANUP;
1231}
1232
1233static enum resp_states cleanup(struct rxe_qp *qp,
1234				struct rxe_pkt_info *pkt)
1235{
1236	struct sk_buff *skb;
1237
1238	if (pkt) {
1239		skb = skb_dequeue(&qp->req_pkts);
1240		rxe_put(qp);
1241		kfree_skb(skb);
1242		ib_device_put(qp->ibqp.device);
1243	}
1244
1245	if (qp->resp.mr) {
1246		rxe_put(qp->resp.mr);
1247		qp->resp.mr = NULL;
1248	}
1249
1250	return RESPST_DONE;
1251}
1252
1253static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn)
1254{
1255	int i;
1256
1257	for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
1258		struct resp_res *res = &qp->resp.resources[i];
1259
1260		if (res->type == 0)
1261			continue;
1262
1263		if (psn_compare(psn, res->first_psn) >= 0 &&
1264		    psn_compare(psn, res->last_psn) <= 0) {
1265			return res;
1266		}
1267	}
1268
1269	return NULL;
1270}
1271
1272static enum resp_states duplicate_request(struct rxe_qp *qp,
1273					  struct rxe_pkt_info *pkt)
1274{
1275	enum resp_states rc;
1276	u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK;
1277
1278	if (pkt->mask & RXE_SEND_MASK ||
1279	    pkt->mask & RXE_WRITE_MASK) {
1280		/* SEND. Ack again and cleanup. C9-105. */
1281		send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
1282		return RESPST_CLEANUP;
1283	} else if (pkt->mask & RXE_FLUSH_MASK) {
1284		struct resp_res *res;
1285
1286		/* Find the operation in our list of responder resources. */
1287		res = find_resource(qp, pkt->psn);
1288		if (res) {
1289			res->replay = 1;
1290			res->cur_psn = pkt->psn;
1291			qp->resp.res = res;
1292			rc = RESPST_PROCESS_FLUSH;
1293			goto out;
1294		}
1295
1296		/* Resource not found. Class D error. Drop the request. */
1297		rc = RESPST_CLEANUP;
1298		goto out;
1299	} else if (pkt->mask & RXE_READ_MASK) {
1300		struct resp_res *res;
1301
1302		res = find_resource(qp, pkt->psn);
1303		if (!res) {
1304			/* Resource not found. Class D error.  Drop the
1305			 * request.
1306			 */
1307			rc = RESPST_CLEANUP;
1308			goto out;
1309		} else {
1310			/* Ensure this new request is the same as the previous
1311			 * one or a subset of it.
1312			 */
1313			u64 iova = reth_va(pkt);
1314			u32 resid = reth_len(pkt);
1315
1316			if (iova < res->read.va_org ||
1317			    resid > res->read.length ||
1318			    (iova + resid) > (res->read.va_org +
1319					      res->read.length)) {
1320				rc = RESPST_CLEANUP;
1321				goto out;
1322			}
1323
1324			if (reth_rkey(pkt) != res->read.rkey) {
1325				rc = RESPST_CLEANUP;
1326				goto out;
1327			}
1328
1329			res->cur_psn = pkt->psn;
1330			res->state = (pkt->psn == res->first_psn) ?
1331					rdatm_res_state_new :
1332					rdatm_res_state_replay;
1333			res->replay = 1;
1334
1335			/* Reset the resource, except length. */
1336			res->read.va_org = iova;
1337			res->read.va = iova;
1338			res->read.resid = resid;
1339
1340			/* Replay the RDMA read reply. */
1341			qp->resp.res = res;
1342			rc = RESPST_READ_REPLY;
1343			goto out;
1344		}
1345	} else {
1346		struct resp_res *res;
1347
1348		/* Find the operation in our list of responder resources. */
1349		res = find_resource(qp, pkt->psn);
1350		if (res) {
1351			res->replay = 1;
1352			res->cur_psn = pkt->psn;
1353			qp->resp.res = res;
1354			rc = pkt->mask & RXE_ATOMIC_MASK ?
1355					RESPST_ATOMIC_REPLY :
1356					RESPST_ATOMIC_WRITE_REPLY;
1357			goto out;
1358		}
1359
1360		/* Resource not found. Class D error. Drop the request. */
1361		rc = RESPST_CLEANUP;
1362		goto out;
1363	}
1364out:
1365	return rc;
1366}
1367
1368/* Process a class A or C. Both are treated the same in this implementation. */
1369static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome,
1370			      enum ib_wc_status status)
1371{
1372	qp->resp.aeth_syndrome	= syndrome;
1373	qp->resp.status		= status;
1374
1375	/* indicate that we should go through the ERROR state */
1376	qp->resp.goto_error	= 1;
1377}
1378
1379static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
1380{
1381	/* UC */
1382	if (qp->srq) {
1383		/* Class E */
1384		qp->resp.drop_msg = 1;
1385		if (qp->resp.wqe) {
1386			qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1387			return RESPST_COMPLETE;
1388		} else {
1389			return RESPST_CLEANUP;
1390		}
1391	} else {
1392		/* Class D1. This packet may be the start of a
1393		 * new message and could be valid. The previous
1394		 * message is invalid and ignored. reset the
1395		 * recv wr to its original state
1396		 */
1397		if (qp->resp.wqe) {
1398			qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length;
1399			qp->resp.wqe->dma.cur_sge = 0;
1400			qp->resp.wqe->dma.sge_offset = 0;
1401			qp->resp.opcode = -1;
1402		}
1403
1404		if (qp->resp.mr) {
1405			rxe_put(qp->resp.mr);
1406			qp->resp.mr = NULL;
1407		}
1408
1409		return RESPST_CLEANUP;
1410	}
1411}
1412
1413/* drain incoming request packet queue */
1414static void drain_req_pkts(struct rxe_qp *qp)
1415{
1416	struct sk_buff *skb;
1417
1418	while ((skb = skb_dequeue(&qp->req_pkts))) {
1419		rxe_put(qp);
1420		kfree_skb(skb);
1421		ib_device_put(qp->ibqp.device);
1422	}
1423}
1424
1425/* complete receive wqe with flush error */
1426static int flush_recv_wqe(struct rxe_qp *qp, struct rxe_recv_wqe *wqe)
1427{
1428	struct rxe_cqe cqe = {};
1429	struct ib_wc *wc = &cqe.ibwc;
1430	struct ib_uverbs_wc *uwc = &cqe.uibwc;
1431	int err;
1432
1433	if (qp->rcq->is_user) {
1434		uwc->wr_id = wqe->wr_id;
1435		uwc->status = IB_WC_WR_FLUSH_ERR;
1436		uwc->qp_num = qp_num(qp);
1437	} else {
1438		wc->wr_id = wqe->wr_id;
1439		wc->status = IB_WC_WR_FLUSH_ERR;
1440		wc->qp = &qp->ibqp;
1441	}
1442
1443	err = rxe_cq_post(qp->rcq, &cqe, 0);
1444	if (err)
1445		rxe_dbg_cq(qp->rcq, "post cq failed err = %d", err);
1446
1447	return err;
1448}
1449
1450/* drain and optionally complete the recive queue
1451 * if unable to complete a wqe stop completing and
1452 * just flush the remaining wqes
1453 */
1454static void flush_recv_queue(struct rxe_qp *qp, bool notify)
1455{
1456	struct rxe_queue *q = qp->rq.queue;
1457	struct rxe_recv_wqe *wqe;
1458	int err;
1459
1460	if (qp->srq) {
1461		if (notify && qp->ibqp.event_handler) {
1462			struct ib_event ev;
1463
1464			ev.device = qp->ibqp.device;
1465			ev.element.qp = &qp->ibqp;
1466			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1467			qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1468		}
1469		return;
1470	}
1471
1472	/* recv queue not created. nothing to do. */
1473	if (!qp->rq.queue)
1474		return;
1475
1476	while ((wqe = queue_head(q, q->type))) {
1477		if (notify) {
1478			err = flush_recv_wqe(qp, wqe);
1479			if (err)
1480				notify = 0;
1481		}
1482		queue_advance_consumer(q, q->type);
1483	}
1484
1485	qp->resp.wqe = NULL;
1486}
1487
1488int rxe_responder(struct rxe_qp *qp)
1489{
1490	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
1491	enum resp_states state;
1492	struct rxe_pkt_info *pkt = NULL;
1493	int ret;
1494	unsigned long flags;
1495
1496	spin_lock_irqsave(&qp->state_lock, flags);
1497	if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
1498			  qp_state(qp) == IB_QPS_RESET) {
1499		bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
1500
1501		drain_req_pkts(qp);
1502		flush_recv_queue(qp, notify);
1503		spin_unlock_irqrestore(&qp->state_lock, flags);
1504		goto exit;
1505	}
1506	spin_unlock_irqrestore(&qp->state_lock, flags);
1507
1508	qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
1509
1510	state = RESPST_GET_REQ;
1511
1512	while (1) {
1513		rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]);
1514		switch (state) {
1515		case RESPST_GET_REQ:
1516			state = get_req(qp, &pkt);
1517			break;
1518		case RESPST_CHK_PSN:
1519			state = check_psn(qp, pkt);
1520			break;
1521		case RESPST_CHK_OP_SEQ:
1522			state = check_op_seq(qp, pkt);
1523			break;
1524		case RESPST_CHK_OP_VALID:
1525			state = check_op_valid(qp, pkt);
1526			break;
1527		case RESPST_CHK_RESOURCE:
1528			state = check_resource(qp, pkt);
1529			break;
1530		case RESPST_CHK_LENGTH:
1531			state = rxe_resp_check_length(qp, pkt);
1532			break;
1533		case RESPST_CHK_RKEY:
1534			state = check_rkey(qp, pkt);
1535			break;
1536		case RESPST_EXECUTE:
1537			state = execute(qp, pkt);
1538			break;
1539		case RESPST_COMPLETE:
1540			state = do_complete(qp, pkt);
1541			break;
1542		case RESPST_READ_REPLY:
1543			state = read_reply(qp, pkt);
1544			break;
1545		case RESPST_ATOMIC_REPLY:
1546			state = atomic_reply(qp, pkt);
1547			break;
1548		case RESPST_ATOMIC_WRITE_REPLY:
1549			state = atomic_write_reply(qp, pkt);
1550			break;
1551		case RESPST_PROCESS_FLUSH:
1552			state = process_flush(qp, pkt);
1553			break;
1554		case RESPST_ACKNOWLEDGE:
1555			state = acknowledge(qp, pkt);
1556			break;
1557		case RESPST_CLEANUP:
1558			state = cleanup(qp, pkt);
1559			break;
1560		case RESPST_DUPLICATE_REQUEST:
1561			state = duplicate_request(qp, pkt);
1562			break;
1563		case RESPST_ERR_PSN_OUT_OF_SEQ:
1564			/* RC only - Class B. Drop packet. */
1565			send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
1566			state = RESPST_CLEANUP;
1567			break;
1568
1569		case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ:
1570		case RESPST_ERR_MISSING_OPCODE_FIRST:
1571		case RESPST_ERR_MISSING_OPCODE_LAST_C:
1572		case RESPST_ERR_UNSUPPORTED_OPCODE:
1573		case RESPST_ERR_MISALIGNED_ATOMIC:
1574			/* RC Only - Class C. */
1575			do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1576					  IB_WC_REM_INV_REQ_ERR);
1577			state = RESPST_COMPLETE;
1578			break;
1579
1580		case RESPST_ERR_MISSING_OPCODE_LAST_D1E:
1581			state = do_class_d1e_error(qp);
1582			break;
1583		case RESPST_ERR_RNR:
1584			if (qp_type(qp) == IB_QPT_RC) {
1585				rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
1586				/* RC - class B */
1587				send_ack(qp, AETH_RNR_NAK |
1588					 (~AETH_TYPE_MASK &
1589					 qp->attr.min_rnr_timer),
1590					 pkt->psn);
1591			} else {
1592				/* UD/UC - class D */
1593				qp->resp.drop_msg = 1;
1594			}
1595			state = RESPST_CLEANUP;
1596			break;
1597
1598		case RESPST_ERR_RKEY_VIOLATION:
1599			if (qp_type(qp) == IB_QPT_RC) {
1600				/* Class C */
1601				do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR,
1602						  IB_WC_REM_ACCESS_ERR);
1603				state = RESPST_COMPLETE;
1604			} else {
1605				qp->resp.drop_msg = 1;
1606				if (qp->srq) {
1607					/* UC/SRQ Class D */
1608					qp->resp.status = IB_WC_REM_ACCESS_ERR;
1609					state = RESPST_COMPLETE;
1610				} else {
1611					/* UC/non-SRQ Class E. */
1612					state = RESPST_CLEANUP;
1613				}
1614			}
1615			break;
1616
1617		case RESPST_ERR_INVALIDATE_RKEY:
1618			/* RC - Class J. */
1619			qp->resp.goto_error = 1;
1620			qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1621			state = RESPST_COMPLETE;
1622			break;
1623
1624		case RESPST_ERR_LENGTH:
1625			if (qp_type(qp) == IB_QPT_RC) {
1626				/* Class C */
1627				do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1628						  IB_WC_REM_INV_REQ_ERR);
1629				state = RESPST_COMPLETE;
1630			} else if (qp->srq) {
1631				/* UC/UD - class E */
1632				qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1633				state = RESPST_COMPLETE;
1634			} else {
1635				/* UC/UD - class D */
1636				qp->resp.drop_msg = 1;
1637				state = RESPST_CLEANUP;
1638			}
1639			break;
1640
1641		case RESPST_ERR_MALFORMED_WQE:
1642			/* All, Class A. */
1643			do_class_ac_error(qp, AETH_NAK_REM_OP_ERR,
1644					  IB_WC_LOC_QP_OP_ERR);
1645			state = RESPST_COMPLETE;
1646			break;
1647
1648		case RESPST_ERR_CQ_OVERFLOW:
1649			/* All - Class G */
1650			state = RESPST_ERROR;
1651			break;
1652
1653		case RESPST_DONE:
1654			if (qp->resp.goto_error) {
1655				state = RESPST_ERROR;
1656				break;
1657			}
1658
1659			goto done;
1660
1661		case RESPST_EXIT:
1662			if (qp->resp.goto_error) {
1663				state = RESPST_ERROR;
1664				break;
1665			}
1666
1667			goto exit;
1668
1669		case RESPST_ERROR:
1670			qp->resp.goto_error = 0;
1671			rxe_dbg_qp(qp, "moved to error state\n");
1672			rxe_qp_error(qp);
1673			goto exit;
1674
1675		default:
1676			WARN_ON_ONCE(1);
1677		}
1678	}
1679
1680	/* A non-zero return value will cause rxe_do_task to
1681	 * exit its loop and end the work item. A zero return
1682	 * will continue looping and return to rxe_responder
1683	 */
1684done:
1685	ret = 0;
1686	goto out;
1687exit:
1688	ret = -EAGAIN;
1689out:
1690	return ret;
1691}
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
   4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
   5 */
   6
   7#include <linux/skbuff.h>
   8
   9#include "rxe.h"
  10#include "rxe_loc.h"
  11#include "rxe_queue.h"
  12
  13static char *resp_state_name[] = {
  14	[RESPST_NONE]				= "NONE",
  15	[RESPST_GET_REQ]			= "GET_REQ",
  16	[RESPST_CHK_PSN]			= "CHK_PSN",
  17	[RESPST_CHK_OP_SEQ]			= "CHK_OP_SEQ",
  18	[RESPST_CHK_OP_VALID]			= "CHK_OP_VALID",
  19	[RESPST_CHK_RESOURCE]			= "CHK_RESOURCE",
  20	[RESPST_CHK_LENGTH]			= "CHK_LENGTH",
  21	[RESPST_CHK_RKEY]			= "CHK_RKEY",
  22	[RESPST_EXECUTE]			= "EXECUTE",
  23	[RESPST_READ_REPLY]			= "READ_REPLY",
  24	[RESPST_ATOMIC_REPLY]			= "ATOMIC_REPLY",
  25	[RESPST_ATOMIC_WRITE_REPLY]		= "ATOMIC_WRITE_REPLY",
  26	[RESPST_PROCESS_FLUSH]			= "PROCESS_FLUSH",
  27	[RESPST_COMPLETE]			= "COMPLETE",
  28	[RESPST_ACKNOWLEDGE]			= "ACKNOWLEDGE",
  29	[RESPST_CLEANUP]			= "CLEANUP",
  30	[RESPST_DUPLICATE_REQUEST]		= "DUPLICATE_REQUEST",
  31	[RESPST_ERR_MALFORMED_WQE]		= "ERR_MALFORMED_WQE",
  32	[RESPST_ERR_UNSUPPORTED_OPCODE]		= "ERR_UNSUPPORTED_OPCODE",
  33	[RESPST_ERR_MISALIGNED_ATOMIC]		= "ERR_MISALIGNED_ATOMIC",
  34	[RESPST_ERR_PSN_OUT_OF_SEQ]		= "ERR_PSN_OUT_OF_SEQ",
  35	[RESPST_ERR_MISSING_OPCODE_FIRST]	= "ERR_MISSING_OPCODE_FIRST",
  36	[RESPST_ERR_MISSING_OPCODE_LAST_C]	= "ERR_MISSING_OPCODE_LAST_C",
  37	[RESPST_ERR_MISSING_OPCODE_LAST_D1E]	= "ERR_MISSING_OPCODE_LAST_D1E",
  38	[RESPST_ERR_TOO_MANY_RDMA_ATM_REQ]	= "ERR_TOO_MANY_RDMA_ATM_REQ",
  39	[RESPST_ERR_RNR]			= "ERR_RNR",
  40	[RESPST_ERR_RKEY_VIOLATION]		= "ERR_RKEY_VIOLATION",
  41	[RESPST_ERR_INVALIDATE_RKEY]		= "ERR_INVALIDATE_RKEY_VIOLATION",
  42	[RESPST_ERR_LENGTH]			= "ERR_LENGTH",
  43	[RESPST_ERR_CQ_OVERFLOW]		= "ERR_CQ_OVERFLOW",
  44	[RESPST_ERROR]				= "ERROR",
  45	[RESPST_DONE]				= "DONE",
  46	[RESPST_EXIT]				= "EXIT",
  47};
  48
  49/* rxe_recv calls here to add a request packet to the input queue */
  50void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
  51{
 
 
 
  52	skb_queue_tail(&qp->req_pkts, skb);
  53	rxe_sched_task(&qp->recv_task);
 
 
 
 
 
 
 
  54}
  55
  56static inline enum resp_states get_req(struct rxe_qp *qp,
  57				       struct rxe_pkt_info **pkt_p)
  58{
  59	struct sk_buff *skb;
  60
  61	skb = skb_peek(&qp->req_pkts);
  62	if (!skb)
  63		return RESPST_EXIT;
  64
  65	*pkt_p = SKB_TO_PKT(skb);
  66
  67	return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN;
  68}
  69
  70static enum resp_states check_psn(struct rxe_qp *qp,
  71				  struct rxe_pkt_info *pkt)
  72{
  73	int diff = psn_compare(pkt->psn, qp->resp.psn);
  74	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
  75
  76	switch (qp_type(qp)) {
  77	case IB_QPT_RC:
  78		if (diff > 0) {
  79			if (qp->resp.sent_psn_nak)
  80				return RESPST_CLEANUP;
  81
  82			qp->resp.sent_psn_nak = 1;
  83			rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ);
  84			return RESPST_ERR_PSN_OUT_OF_SEQ;
  85
  86		} else if (diff < 0) {
  87			rxe_counter_inc(rxe, RXE_CNT_DUP_REQ);
  88			return RESPST_DUPLICATE_REQUEST;
  89		}
  90
  91		if (qp->resp.sent_psn_nak)
  92			qp->resp.sent_psn_nak = 0;
  93
  94		break;
  95
  96	case IB_QPT_UC:
  97		if (qp->resp.drop_msg || diff != 0) {
  98			if (pkt->mask & RXE_START_MASK) {
  99				qp->resp.drop_msg = 0;
 100				return RESPST_CHK_OP_SEQ;
 101			}
 102
 103			qp->resp.drop_msg = 1;
 104			return RESPST_CLEANUP;
 105		}
 106		break;
 107	default:
 108		break;
 109	}
 110
 111	return RESPST_CHK_OP_SEQ;
 112}
 113
 114static enum resp_states check_op_seq(struct rxe_qp *qp,
 115				     struct rxe_pkt_info *pkt)
 116{
 117	switch (qp_type(qp)) {
 118	case IB_QPT_RC:
 119		switch (qp->resp.opcode) {
 120		case IB_OPCODE_RC_SEND_FIRST:
 121		case IB_OPCODE_RC_SEND_MIDDLE:
 122			switch (pkt->opcode) {
 123			case IB_OPCODE_RC_SEND_MIDDLE:
 124			case IB_OPCODE_RC_SEND_LAST:
 125			case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
 126			case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
 127				return RESPST_CHK_OP_VALID;
 128			default:
 129				return RESPST_ERR_MISSING_OPCODE_LAST_C;
 130			}
 131
 132		case IB_OPCODE_RC_RDMA_WRITE_FIRST:
 133		case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
 134			switch (pkt->opcode) {
 135			case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
 136			case IB_OPCODE_RC_RDMA_WRITE_LAST:
 137			case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
 138				return RESPST_CHK_OP_VALID;
 139			default:
 140				return RESPST_ERR_MISSING_OPCODE_LAST_C;
 141			}
 142
 143		default:
 144			switch (pkt->opcode) {
 145			case IB_OPCODE_RC_SEND_MIDDLE:
 146			case IB_OPCODE_RC_SEND_LAST:
 147			case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
 148			case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
 149			case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
 150			case IB_OPCODE_RC_RDMA_WRITE_LAST:
 151			case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
 152				return RESPST_ERR_MISSING_OPCODE_FIRST;
 153			default:
 154				return RESPST_CHK_OP_VALID;
 155			}
 156		}
 157		break;
 158
 159	case IB_QPT_UC:
 160		switch (qp->resp.opcode) {
 161		case IB_OPCODE_UC_SEND_FIRST:
 162		case IB_OPCODE_UC_SEND_MIDDLE:
 163			switch (pkt->opcode) {
 164			case IB_OPCODE_UC_SEND_MIDDLE:
 165			case IB_OPCODE_UC_SEND_LAST:
 166			case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
 167				return RESPST_CHK_OP_VALID;
 168			default:
 169				return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
 170			}
 171
 172		case IB_OPCODE_UC_RDMA_WRITE_FIRST:
 173		case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
 174			switch (pkt->opcode) {
 175			case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
 176			case IB_OPCODE_UC_RDMA_WRITE_LAST:
 177			case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
 178				return RESPST_CHK_OP_VALID;
 179			default:
 180				return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
 181			}
 182
 183		default:
 184			switch (pkt->opcode) {
 185			case IB_OPCODE_UC_SEND_MIDDLE:
 186			case IB_OPCODE_UC_SEND_LAST:
 187			case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
 188			case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
 189			case IB_OPCODE_UC_RDMA_WRITE_LAST:
 190			case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
 191				qp->resp.drop_msg = 1;
 192				return RESPST_CLEANUP;
 193			default:
 194				return RESPST_CHK_OP_VALID;
 195			}
 196		}
 197		break;
 198
 199	default:
 200		return RESPST_CHK_OP_VALID;
 201	}
 202}
 203
 204static bool check_qp_attr_access(struct rxe_qp *qp,
 205				 struct rxe_pkt_info *pkt)
 206{
 207	if (((pkt->mask & RXE_READ_MASK) &&
 208	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
 209	    ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
 210	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
 211	    ((pkt->mask & RXE_ATOMIC_MASK) &&
 212	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 213		return false;
 214
 215	if (pkt->mask & RXE_FLUSH_MASK) {
 216		u32 flush_type = feth_plt(pkt);
 217
 218		if ((flush_type & IB_FLUSH_GLOBAL &&
 219		     !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) ||
 220		    (flush_type & IB_FLUSH_PERSISTENT &&
 221		     !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT)))
 222			return false;
 223	}
 224
 225	return true;
 226}
 227
 228static enum resp_states check_op_valid(struct rxe_qp *qp,
 229				       struct rxe_pkt_info *pkt)
 230{
 231	switch (qp_type(qp)) {
 232	case IB_QPT_RC:
 233		if (!check_qp_attr_access(qp, pkt))
 234			return RESPST_ERR_UNSUPPORTED_OPCODE;
 235
 236		break;
 237
 238	case IB_QPT_UC:
 239		if ((pkt->mask & RXE_WRITE_MASK) &&
 240		    !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) {
 241			qp->resp.drop_msg = 1;
 242			return RESPST_CLEANUP;
 243		}
 244
 245		break;
 246
 247	case IB_QPT_UD:
 248	case IB_QPT_GSI:
 249		break;
 250
 251	default:
 252		WARN_ON_ONCE(1);
 253		break;
 254	}
 255
 256	return RESPST_CHK_RESOURCE;
 257}
 258
 259static enum resp_states get_srq_wqe(struct rxe_qp *qp)
 260{
 261	struct rxe_srq *srq = qp->srq;
 262	struct rxe_queue *q = srq->rq.queue;
 263	struct rxe_recv_wqe *wqe;
 264	struct ib_event ev;
 265	unsigned int count;
 266	size_t size;
 267	unsigned long flags;
 268
 269	if (srq->error)
 270		return RESPST_ERR_RNR;
 271
 272	spin_lock_irqsave(&srq->rq.consumer_lock, flags);
 273
 274	wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
 275	if (!wqe) {
 276		spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
 277		return RESPST_ERR_RNR;
 278	}
 279
 280	/* don't trust user space data */
 281	if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
 282		spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
 283		rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n");
 284		return RESPST_ERR_MALFORMED_WQE;
 285	}
 286	size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
 287	memcpy(&qp->resp.srq_wqe, wqe, size);
 288
 289	qp->resp.wqe = &qp->resp.srq_wqe.wqe;
 290	queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT);
 291	count = queue_count(q, QUEUE_TYPE_FROM_CLIENT);
 292
 293	if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) {
 294		srq->limit = 0;
 295		goto event;
 296	}
 297
 298	spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
 299	return RESPST_CHK_LENGTH;
 300
 301event:
 302	spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
 303	ev.device = qp->ibqp.device;
 304	ev.element.srq = qp->ibqp.srq;
 305	ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
 306	srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context);
 307	return RESPST_CHK_LENGTH;
 308}
 309
 310static enum resp_states check_resource(struct rxe_qp *qp,
 311				       struct rxe_pkt_info *pkt)
 312{
 313	struct rxe_srq *srq = qp->srq;
 314
 315	if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
 316		/* it is the requesters job to not send
 317		 * too many read/atomic ops, we just
 318		 * recycle the responder resource queue
 319		 */
 320		if (likely(qp->attr.max_dest_rd_atomic > 0))
 321			return RESPST_CHK_LENGTH;
 322		else
 323			return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ;
 324	}
 325
 326	if (pkt->mask & RXE_RWR_MASK) {
 327		if (srq)
 328			return get_srq_wqe(qp);
 329
 330		qp->resp.wqe = queue_head(qp->rq.queue,
 331				QUEUE_TYPE_FROM_CLIENT);
 332		return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;
 333	}
 334
 335	return RESPST_CHK_LENGTH;
 336}
 337
 338static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
 339					      struct rxe_pkt_info *pkt)
 340{
 341	/*
 342	 * See IBA C9-92
 343	 * For UD QPs we only check if the packet will fit in the
 344	 * receive buffer later. For RDMA operations additional
 345	 * length checks are performed in check_rkey.
 346	 */
 347	if ((qp_type(qp) == IB_QPT_GSI) || (qp_type(qp) == IB_QPT_UD)) {
 348		unsigned int payload = payload_size(pkt);
 349		unsigned int recv_buffer_len = 0;
 350		int i;
 351
 352		for (i = 0; i < qp->resp.wqe->dma.num_sge; i++)
 353			recv_buffer_len += qp->resp.wqe->dma.sge[i].length;
 354		if (payload + sizeof(union rdma_network_hdr) > recv_buffer_len) {
 355			rxe_dbg_qp(qp, "The receive buffer is too small for this UD packet.\n");
 356			return RESPST_ERR_LENGTH;
 357		}
 358	}
 359
 360	if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
 361					     (qp_type(qp) == IB_QPT_UC))) {
 362		unsigned int mtu = qp->mtu;
 363		unsigned int payload = payload_size(pkt);
 364
 365		if ((pkt->mask & RXE_START_MASK) &&
 366		    (pkt->mask & RXE_END_MASK)) {
 367			if (unlikely(payload > mtu)) {
 368				rxe_dbg_qp(qp, "only packet too long\n");
 369				return RESPST_ERR_LENGTH;
 370			}
 371		} else if ((pkt->mask & RXE_START_MASK) ||
 372			   (pkt->mask & RXE_MIDDLE_MASK)) {
 373			if (unlikely(payload != mtu)) {
 374				rxe_dbg_qp(qp, "first or middle packet not mtu\n");
 375				return RESPST_ERR_LENGTH;
 376			}
 377		} else if (pkt->mask & RXE_END_MASK) {
 378			if (unlikely((payload == 0) || (payload > mtu))) {
 379				rxe_dbg_qp(qp, "last packet zero or too long\n");
 380				return RESPST_ERR_LENGTH;
 381			}
 382		}
 383	}
 384
 385	/* See IBA C9-94 */
 386	if (pkt->mask & RXE_RETH_MASK) {
 387		if (reth_len(pkt) > (1U << 31)) {
 388			rxe_dbg_qp(qp, "dma length too long\n");
 389			return RESPST_ERR_LENGTH;
 390		}
 391	}
 392
 393	if (pkt->mask & RXE_RDMA_OP_MASK)
 394		return RESPST_CHK_RKEY;
 395	else
 396		return RESPST_EXECUTE;
 397}
 398
 399/* if the reth length field is zero we can assume nothing
 400 * about the rkey value and should not validate or use it.
 401 * Instead set qp->resp.rkey to 0 which is an invalid rkey
 402 * value since the minimum index part is 1.
 403 */
 404static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 405{
 406	unsigned int length = reth_len(pkt);
 407
 408	qp->resp.va = reth_va(pkt);
 409	qp->resp.offset = 0;
 410	qp->resp.resid = length;
 411	qp->resp.length = length;
 412	if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0)
 413		qp->resp.rkey = 0;
 414	else
 415		qp->resp.rkey = reth_rkey(pkt);
 416}
 417
 418static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 419{
 420	qp->resp.va = atmeth_va(pkt);
 421	qp->resp.offset = 0;
 422	qp->resp.rkey = atmeth_rkey(pkt);
 423	qp->resp.resid = sizeof(u64);
 424}
 425
 426/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL
 427 * if an invalid rkey is received or the rdma length is zero. For middle
 428 * or last packets use the stored value of mr.
 429 */
 430static enum resp_states check_rkey(struct rxe_qp *qp,
 431				   struct rxe_pkt_info *pkt)
 432{
 433	struct rxe_mr *mr = NULL;
 434	struct rxe_mw *mw = NULL;
 435	u64 va;
 436	u32 rkey;
 437	u32 resid;
 438	u32 pktlen;
 439	int mtu = qp->mtu;
 440	enum resp_states state;
 441	int access = 0;
 442
 443	/* parse RETH or ATMETH header for first/only packets
 444	 * for va, length, rkey, etc. or use current value for
 445	 * middle/last packets.
 446	 */
 447	if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
 448		if (pkt->mask & RXE_RETH_MASK)
 449			qp_resp_from_reth(qp, pkt);
 450
 451		access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
 452						     : IB_ACCESS_REMOTE_WRITE;
 453	} else if (pkt->mask & RXE_FLUSH_MASK) {
 454		u32 flush_type = feth_plt(pkt);
 455
 456		if (pkt->mask & RXE_RETH_MASK)
 457			qp_resp_from_reth(qp, pkt);
 458
 459		if (flush_type & IB_FLUSH_GLOBAL)
 460			access |= IB_ACCESS_FLUSH_GLOBAL;
 461		if (flush_type & IB_FLUSH_PERSISTENT)
 462			access |= IB_ACCESS_FLUSH_PERSISTENT;
 463	} else if (pkt->mask & RXE_ATOMIC_MASK) {
 464		qp_resp_from_atmeth(qp, pkt);
 465		access = IB_ACCESS_REMOTE_ATOMIC;
 466	} else {
 467		/* shouldn't happen */
 468		WARN_ON(1);
 469	}
 470
 471	/* A zero-byte read or write op is not required to
 472	 * set an addr or rkey. See C9-88
 473	 */
 474	if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
 475	    (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) {
 476		qp->resp.mr = NULL;
 477		return RESPST_EXECUTE;
 478	}
 479
 480	va	= qp->resp.va;
 481	rkey	= qp->resp.rkey;
 482	resid	= qp->resp.resid;
 483	pktlen	= payload_size(pkt);
 484
 485	if (rkey_is_mw(rkey)) {
 486		mw = rxe_lookup_mw(qp, access, rkey);
 487		if (!mw) {
 488			rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey);
 489			state = RESPST_ERR_RKEY_VIOLATION;
 490			goto err;
 491		}
 492
 493		mr = mw->mr;
 494		if (!mr) {
 495			rxe_dbg_qp(qp, "MW doesn't have an MR\n");
 496			state = RESPST_ERR_RKEY_VIOLATION;
 497			goto err;
 498		}
 499
 500		if (mw->access & IB_ZERO_BASED)
 501			qp->resp.offset = mw->addr;
 502
 503		rxe_get(mr);
 504		rxe_put(mw);
 505		mw = NULL;
 506	} else {
 507		mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
 508		if (!mr) {
 509			rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey);
 510			state = RESPST_ERR_RKEY_VIOLATION;
 511			goto err;
 512		}
 513	}
 514
 515	if (pkt->mask & RXE_FLUSH_MASK) {
 516		/* FLUSH MR may not set va or resid
 517		 * no need to check range since we will flush whole mr
 518		 */
 519		if (feth_sel(pkt) == IB_FLUSH_MR)
 520			goto skip_check_range;
 521	}
 522
 523	if (mr_check_range(mr, va + qp->resp.offset, resid)) {
 524		state = RESPST_ERR_RKEY_VIOLATION;
 525		goto err;
 526	}
 527
 528skip_check_range:
 529	if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
 530		if (resid > mtu) {
 531			if (pktlen != mtu || bth_pad(pkt)) {
 532				state = RESPST_ERR_LENGTH;
 533				goto err;
 534			}
 535		} else {
 536			if (pktlen != resid) {
 537				state = RESPST_ERR_LENGTH;
 538				goto err;
 539			}
 540			if ((bth_pad(pkt) != (0x3 & (-resid)))) {
 541				/* This case may not be exactly that
 542				 * but nothing else fits.
 543				 */
 544				state = RESPST_ERR_LENGTH;
 545				goto err;
 546			}
 547		}
 548	}
 549
 550	WARN_ON_ONCE(qp->resp.mr);
 551
 552	qp->resp.mr = mr;
 553	return RESPST_EXECUTE;
 554
 555err:
 556	qp->resp.mr = NULL;
 557	if (mr)
 558		rxe_put(mr);
 559	if (mw)
 560		rxe_put(mw);
 561
 562	return state;
 563}
 564
 565static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
 566				     int data_len)
 567{
 568	int err;
 569
 570	err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
 571			data_addr, data_len, RXE_TO_MR_OBJ);
 572	if (unlikely(err))
 573		return (err == -ENOSPC) ? RESPST_ERR_LENGTH
 574					: RESPST_ERR_MALFORMED_WQE;
 575
 576	return RESPST_NONE;
 577}
 578
 579static enum resp_states write_data_in(struct rxe_qp *qp,
 580				      struct rxe_pkt_info *pkt)
 581{
 582	enum resp_states rc = RESPST_NONE;
 583	int	err;
 584	int data_len = payload_size(pkt);
 585
 586	err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
 587			  payload_addr(pkt), data_len, RXE_TO_MR_OBJ);
 588	if (err) {
 589		rc = RESPST_ERR_RKEY_VIOLATION;
 590		goto out;
 591	}
 592
 593	qp->resp.va += data_len;
 594	qp->resp.resid -= data_len;
 595
 596out:
 597	return rc;
 598}
 599
 600static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
 601					struct rxe_pkt_info *pkt,
 602					int type)
 603{
 604	struct resp_res *res;
 605	u32 pkts;
 606
 607	res = &qp->resp.resources[qp->resp.res_head];
 608	rxe_advance_resp_resource(qp);
 609	free_rd_atomic_resource(res);
 610
 611	res->type = type;
 612	res->replay = 0;
 613
 614	switch (type) {
 615	case RXE_READ_MASK:
 616		res->read.va = qp->resp.va + qp->resp.offset;
 617		res->read.va_org = qp->resp.va + qp->resp.offset;
 618		res->read.resid = qp->resp.resid;
 619		res->read.length = qp->resp.resid;
 620		res->read.rkey = qp->resp.rkey;
 621
 622		pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1);
 623		res->first_psn = pkt->psn;
 624		res->cur_psn = pkt->psn;
 625		res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK;
 626
 627		res->state = rdatm_res_state_new;
 628		break;
 629	case RXE_ATOMIC_MASK:
 630	case RXE_ATOMIC_WRITE_MASK:
 631		res->first_psn = pkt->psn;
 632		res->last_psn = pkt->psn;
 633		res->cur_psn = pkt->psn;
 634		break;
 635	case RXE_FLUSH_MASK:
 636		res->flush.va = qp->resp.va + qp->resp.offset;
 637		res->flush.length = qp->resp.length;
 638		res->flush.type = feth_plt(pkt);
 639		res->flush.level = feth_sel(pkt);
 640	}
 641
 642	return res;
 643}
 644
 645static enum resp_states process_flush(struct rxe_qp *qp,
 646				       struct rxe_pkt_info *pkt)
 647{
 648	u64 length, start;
 649	struct rxe_mr *mr = qp->resp.mr;
 650	struct resp_res *res = qp->resp.res;
 651
 652	/* oA19-14, oA19-15 */
 653	if (res && res->replay)
 654		return RESPST_ACKNOWLEDGE;
 655	else if (!res) {
 656		res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK);
 657		qp->resp.res = res;
 658	}
 659
 660	if (res->flush.level == IB_FLUSH_RANGE) {
 661		start = res->flush.va;
 662		length = res->flush.length;
 663	} else { /* level == IB_FLUSH_MR */
 664		start = mr->ibmr.iova;
 665		length = mr->ibmr.length;
 666	}
 667
 668	if (res->flush.type & IB_FLUSH_PERSISTENT) {
 669		if (rxe_flush_pmem_iova(mr, start, length))
 670			return RESPST_ERR_RKEY_VIOLATION;
 671		/* Make data persistent. */
 672		wmb();
 673	} else if (res->flush.type & IB_FLUSH_GLOBAL) {
 674		/* Make data global visibility. */
 675		wmb();
 676	}
 677
 678	qp->resp.msn++;
 679
 680	/* next expected psn, read handles this separately */
 681	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
 682	qp->resp.ack_psn = qp->resp.psn;
 683
 684	qp->resp.opcode = pkt->opcode;
 685	qp->resp.status = IB_WC_SUCCESS;
 686
 687	return RESPST_ACKNOWLEDGE;
 688}
 689
 690static enum resp_states atomic_reply(struct rxe_qp *qp,
 691				     struct rxe_pkt_info *pkt)
 692{
 693	struct rxe_mr *mr = qp->resp.mr;
 694	struct resp_res *res = qp->resp.res;
 695	int err;
 696
 697	if (!res) {
 698		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK);
 699		qp->resp.res = res;
 700	}
 701
 702	if (!res->replay) {
 703		u64 iova = qp->resp.va + qp->resp.offset;
 704
 705		err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
 706					  atmeth_comp(pkt),
 707					  atmeth_swap_add(pkt),
 708					  &res->atomic.orig_val);
 709		if (err)
 710			return err;
 711
 712		qp->resp.msn++;
 713
 714		/* next expected psn, read handles this separately */
 715		qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
 716		qp->resp.ack_psn = qp->resp.psn;
 717
 718		qp->resp.opcode = pkt->opcode;
 719		qp->resp.status = IB_WC_SUCCESS;
 720	}
 721
 722	return RESPST_ACKNOWLEDGE;
 723}
 724
 725static enum resp_states atomic_write_reply(struct rxe_qp *qp,
 726					   struct rxe_pkt_info *pkt)
 727{
 728	struct resp_res *res = qp->resp.res;
 729	struct rxe_mr *mr;
 730	u64 value;
 731	u64 iova;
 732	int err;
 733
 734	if (!res) {
 735		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
 736		qp->resp.res = res;
 737	}
 738
 739	if (res->replay)
 740		return RESPST_ACKNOWLEDGE;
 741
 742	mr = qp->resp.mr;
 743	value = *(u64 *)payload_addr(pkt);
 744	iova = qp->resp.va + qp->resp.offset;
 745
 746	err = rxe_mr_do_atomic_write(mr, iova, value);
 747	if (err)
 748		return err;
 749
 750	qp->resp.resid = 0;
 751	qp->resp.msn++;
 752
 753	/* next expected psn, read handles this separately */
 754	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
 755	qp->resp.ack_psn = qp->resp.psn;
 756
 757	qp->resp.opcode = pkt->opcode;
 758	qp->resp.status = IB_WC_SUCCESS;
 759
 760	return RESPST_ACKNOWLEDGE;
 761}
 762
 763static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 764					  struct rxe_pkt_info *ack,
 765					  int opcode,
 766					  int payload,
 767					  u32 psn,
 768					  u8 syndrome)
 769{
 770	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 771	struct sk_buff *skb;
 772	int paylen;
 773	int pad;
 774	int err;
 775
 776	/*
 777	 * allocate packet
 778	 */
 779	pad = (-payload) & 0x3;
 780	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
 781
 782	skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack);
 783	if (!skb)
 784		return NULL;
 785
 786	ack->qp = qp;
 787	ack->opcode = opcode;
 788	ack->mask = rxe_opcode[opcode].mask;
 789	ack->paylen = paylen;
 790	ack->psn = psn;
 791
 792	bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL,
 793		 qp->attr.dest_qp_num, 0, psn);
 794
 795	if (ack->mask & RXE_AETH_MASK) {
 796		aeth_set_syn(ack, syndrome);
 797		aeth_set_msn(ack, qp->resp.msn);
 798	}
 799
 800	if (ack->mask & RXE_ATMACK_MASK)
 801		atmack_set_orig(ack, qp->resp.res->atomic.orig_val);
 802
 803	err = rxe_prepare(&qp->pri_av, ack, skb);
 804	if (err) {
 805		kfree_skb(skb);
 806		return NULL;
 807	}
 808
 809	return skb;
 810}
 811
 812/**
 813 * rxe_recheck_mr - revalidate MR from rkey and get a reference
 814 * @qp: the qp
 815 * @rkey: the rkey
 816 *
 817 * This code allows the MR to be invalidated or deregistered or
 818 * the MW if one was used to be invalidated or deallocated.
 819 * It is assumed that the access permissions if originally good
 820 * are OK and the mappings to be unchanged.
 821 *
 822 * TODO: If someone reregisters an MR to change its size or
 823 * access permissions during the processing of an RDMA read
 824 * we should kill the responder resource and complete the
 825 * operation with an error.
 826 *
 827 * Return: mr on success else NULL
 828 */
 829static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
 830{
 831	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 832	struct rxe_mr *mr;
 833	struct rxe_mw *mw;
 834
 835	if (rkey_is_mw(rkey)) {
 836		mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
 837		if (!mw)
 838			return NULL;
 839
 840		mr = mw->mr;
 841		if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID ||
 842		    !mr || mr->state != RXE_MR_STATE_VALID) {
 843			rxe_put(mw);
 844			return NULL;
 845		}
 846
 847		rxe_get(mr);
 848		rxe_put(mw);
 849
 850		return mr;
 851	}
 852
 853	mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
 854	if (!mr)
 855		return NULL;
 856
 857	if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) {
 858		rxe_put(mr);
 859		return NULL;
 860	}
 861
 862	return mr;
 863}
 864
 865/* RDMA read response. If res is not NULL, then we have a current RDMA request
 866 * being processed or replayed.
 867 */
 868static enum resp_states read_reply(struct rxe_qp *qp,
 869				   struct rxe_pkt_info *req_pkt)
 870{
 871	struct rxe_pkt_info ack_pkt;
 872	struct sk_buff *skb;
 873	int mtu = qp->mtu;
 874	enum resp_states state;
 875	int payload;
 876	int opcode;
 877	int err;
 878	struct resp_res *res = qp->resp.res;
 879	struct rxe_mr *mr;
 880
 881	if (!res) {
 882		res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
 883		qp->resp.res = res;
 884	}
 885
 886	if (res->state == rdatm_res_state_new) {
 887		if (!res->replay || qp->resp.length == 0) {
 888			/* if length == 0 mr will be NULL (is ok)
 889			 * otherwise qp->resp.mr holds a ref on mr
 890			 * which we transfer to mr and drop below.
 891			 */
 892			mr = qp->resp.mr;
 893			qp->resp.mr = NULL;
 894		} else {
 895			mr = rxe_recheck_mr(qp, res->read.rkey);
 896			if (!mr)
 897				return RESPST_ERR_RKEY_VIOLATION;
 898		}
 899
 900		if (res->read.resid <= mtu)
 901			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
 902		else
 903			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
 904	} else {
 905		/* re-lookup mr from rkey on all later packets.
 906		 * length will be non-zero. This can fail if someone
 907		 * modifies or destroys the mr since the first packet.
 908		 */
 909		mr = rxe_recheck_mr(qp, res->read.rkey);
 910		if (!mr)
 911			return RESPST_ERR_RKEY_VIOLATION;
 912
 913		if (res->read.resid > mtu)
 914			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE;
 915		else
 916			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST;
 917	}
 918
 919	res->state = rdatm_res_state_next;
 920
 921	payload = min_t(int, res->read.resid, mtu);
 922
 923	skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
 924				 res->cur_psn, AETH_ACK_UNLIMITED);
 925	if (!skb) {
 926		state = RESPST_ERR_RNR;
 927		goto err_out;
 928	}
 929
 930	err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
 931			  payload, RXE_FROM_MR_OBJ);
 932	if (err) {
 933		kfree_skb(skb);
 934		state = RESPST_ERR_RKEY_VIOLATION;
 935		goto err_out;
 936	}
 937
 938	if (bth_pad(&ack_pkt)) {
 939		u8 *pad = payload_addr(&ack_pkt) + payload;
 940
 941		memset(pad, 0, bth_pad(&ack_pkt));
 942	}
 943
 944	/* rxe_xmit_packet always consumes the skb */
 945	err = rxe_xmit_packet(qp, &ack_pkt, skb);
 946	if (err) {
 947		state = RESPST_ERR_RNR;
 948		goto err_out;
 949	}
 950
 951	res->read.va += payload;
 952	res->read.resid -= payload;
 953	res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK;
 954
 955	if (res->read.resid > 0) {
 956		state = RESPST_DONE;
 957	} else {
 958		qp->resp.res = NULL;
 959		if (!res->replay)
 960			qp->resp.opcode = -1;
 961		if (psn_compare(res->cur_psn, qp->resp.psn) >= 0)
 962			qp->resp.psn = res->cur_psn;
 963		state = RESPST_CLEANUP;
 964	}
 965
 966err_out:
 967	if (mr)
 968		rxe_put(mr);
 969	return state;
 970}
 971
 972static int invalidate_rkey(struct rxe_qp *qp, u32 rkey)
 973{
 974	if (rkey_is_mw(rkey))
 975		return rxe_invalidate_mw(qp, rkey);
 976	else
 977		return rxe_invalidate_mr(qp, rkey);
 978}
 979
 980/* Executes a new request. A retried request never reach that function (send
 981 * and writes are discarded, and reads and atomics are retried elsewhere.
 982 */
 983static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 984{
 985	enum resp_states err;
 986	struct sk_buff *skb = PKT_TO_SKB(pkt);
 987	union rdma_network_hdr hdr;
 988
 989	if (pkt->mask & RXE_SEND_MASK) {
 990		if (qp_type(qp) == IB_QPT_UD ||
 991		    qp_type(qp) == IB_QPT_GSI) {
 992			if (skb->protocol == htons(ETH_P_IP)) {
 993				memset(&hdr.reserved, 0,
 994						sizeof(hdr.reserved));
 995				memcpy(&hdr.roce4grh, ip_hdr(skb),
 996						sizeof(hdr.roce4grh));
 997				err = send_data_in(qp, &hdr, sizeof(hdr));
 998			} else {
 999				err = send_data_in(qp, ipv6_hdr(skb),
1000						sizeof(hdr));
1001			}
1002			if (err)
1003				return err;
1004		}
1005		err = send_data_in(qp, payload_addr(pkt), payload_size(pkt));
1006		if (err)
1007			return err;
1008	} else if (pkt->mask & RXE_WRITE_MASK) {
1009		err = write_data_in(qp, pkt);
1010		if (err)
1011			return err;
1012	} else if (pkt->mask & RXE_READ_MASK) {
1013		/* For RDMA Read we can increment the msn now. See C9-148. */
1014		qp->resp.msn++;
1015		return RESPST_READ_REPLY;
1016	} else if (pkt->mask & RXE_ATOMIC_MASK) {
1017		return RESPST_ATOMIC_REPLY;
1018	} else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
1019		return RESPST_ATOMIC_WRITE_REPLY;
1020	} else if (pkt->mask & RXE_FLUSH_MASK) {
1021		return RESPST_PROCESS_FLUSH;
1022	} else {
1023		/* Unreachable */
1024		WARN_ON_ONCE(1);
1025	}
1026
1027	if (pkt->mask & RXE_IETH_MASK) {
1028		u32 rkey = ieth_rkey(pkt);
1029
1030		err = invalidate_rkey(qp, rkey);
1031		if (err)
1032			return RESPST_ERR_INVALIDATE_RKEY;
1033	}
1034
1035	if (pkt->mask & RXE_END_MASK)
1036		/* We successfully processed this new request. */
1037		qp->resp.msn++;
1038
1039	/* next expected psn, read handles this separately */
1040	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
1041	qp->resp.ack_psn = qp->resp.psn;
1042
1043	qp->resp.opcode = pkt->opcode;
1044	qp->resp.status = IB_WC_SUCCESS;
1045
1046	if (pkt->mask & RXE_COMP_MASK)
1047		return RESPST_COMPLETE;
1048	else if (qp_type(qp) == IB_QPT_RC)
1049		return RESPST_ACKNOWLEDGE;
1050	else
1051		return RESPST_CLEANUP;
1052}
1053
1054static enum resp_states do_complete(struct rxe_qp *qp,
1055				    struct rxe_pkt_info *pkt)
1056{
1057	struct rxe_cqe cqe;
1058	struct ib_wc *wc = &cqe.ibwc;
1059	struct ib_uverbs_wc *uwc = &cqe.uibwc;
1060	struct rxe_recv_wqe *wqe = qp->resp.wqe;
1061	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
1062	unsigned long flags;
1063
1064	if (!wqe)
1065		goto finish;
1066
1067	memset(&cqe, 0, sizeof(cqe));
1068
1069	if (qp->rcq->is_user) {
1070		uwc->status		= qp->resp.status;
1071		uwc->qp_num		= qp->ibqp.qp_num;
1072		uwc->wr_id		= wqe->wr_id;
1073	} else {
1074		wc->status		= qp->resp.status;
1075		wc->qp			= &qp->ibqp;
1076		wc->wr_id		= wqe->wr_id;
1077	}
1078
1079	if (wc->status == IB_WC_SUCCESS) {
1080		rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV);
1081		wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
1082				pkt->mask & RXE_WRITE_MASK) ?
1083					IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
1084		wc->byte_len = (pkt->mask & RXE_IMMDT_MASK &&
1085				pkt->mask & RXE_WRITE_MASK) ?
1086					qp->resp.length : wqe->dma.length - wqe->dma.resid;
1087
1088		/* fields after byte_len are different between kernel and user
1089		 * space
1090		 */
1091		if (qp->rcq->is_user) {
1092			uwc->wc_flags = IB_WC_GRH;
1093
1094			if (pkt->mask & RXE_IMMDT_MASK) {
1095				uwc->wc_flags |= IB_WC_WITH_IMM;
1096				uwc->ex.imm_data = immdt_imm(pkt);
1097			}
1098
1099			if (pkt->mask & RXE_IETH_MASK) {
1100				uwc->wc_flags |= IB_WC_WITH_INVALIDATE;
1101				uwc->ex.invalidate_rkey = ieth_rkey(pkt);
1102			}
1103
1104			if (pkt->mask & RXE_DETH_MASK)
1105				uwc->src_qp = deth_sqp(pkt);
1106
1107			uwc->port_num		= qp->attr.port_num;
1108		} else {
1109			struct sk_buff *skb = PKT_TO_SKB(pkt);
1110
1111			wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE;
1112			if (skb->protocol == htons(ETH_P_IP))
1113				wc->network_hdr_type = RDMA_NETWORK_IPV4;
1114			else
1115				wc->network_hdr_type = RDMA_NETWORK_IPV6;
1116
1117			if (is_vlan_dev(skb->dev)) {
1118				wc->wc_flags |= IB_WC_WITH_VLAN;
1119				wc->vlan_id = vlan_dev_vlan_id(skb->dev);
1120			}
1121
1122			if (pkt->mask & RXE_IMMDT_MASK) {
1123				wc->wc_flags |= IB_WC_WITH_IMM;
1124				wc->ex.imm_data = immdt_imm(pkt);
1125			}
1126
1127			if (pkt->mask & RXE_IETH_MASK) {
1128				wc->wc_flags |= IB_WC_WITH_INVALIDATE;
1129				wc->ex.invalidate_rkey = ieth_rkey(pkt);
1130			}
1131
1132			if (pkt->mask & RXE_DETH_MASK)
1133				wc->src_qp = deth_sqp(pkt);
1134
1135			wc->port_num		= qp->attr.port_num;
1136		}
1137	} else {
1138		if (wc->status != IB_WC_WR_FLUSH_ERR)
1139			rxe_err_qp(qp, "non-flush error status = %d\n",
1140				wc->status);
1141	}
1142
1143	/* have copy for srq and reference for !srq */
1144	if (!qp->srq)
1145		queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT);
1146
1147	qp->resp.wqe = NULL;
1148
1149	if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1))
1150		return RESPST_ERR_CQ_OVERFLOW;
1151
1152finish:
1153	spin_lock_irqsave(&qp->state_lock, flags);
1154	if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
1155		spin_unlock_irqrestore(&qp->state_lock, flags);
1156		return RESPST_CHK_RESOURCE;
1157	}
1158	spin_unlock_irqrestore(&qp->state_lock, flags);
1159
1160	if (unlikely(!pkt))
1161		return RESPST_DONE;
1162	if (qp_type(qp) == IB_QPT_RC)
1163		return RESPST_ACKNOWLEDGE;
1164	else
1165		return RESPST_CLEANUP;
1166}
1167
1168
1169static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
1170				  int opcode, const char *msg)
1171{
1172	int err;
1173	struct rxe_pkt_info ack_pkt;
1174	struct sk_buff *skb;
1175
1176	skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome);
1177	if (!skb)
1178		return -ENOMEM;
1179
1180	err = rxe_xmit_packet(qp, &ack_pkt, skb);
1181	if (err)
1182		rxe_dbg_qp(qp, "Failed sending %s\n", msg);
1183
1184	return err;
1185}
1186
1187static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1188{
1189	return send_common_ack(qp, syndrome, psn,
1190			IB_OPCODE_RC_ACKNOWLEDGE, "ACK");
1191}
1192
1193static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1194{
1195	int ret = send_common_ack(qp, syndrome, psn,
1196			IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK");
1197
1198	/* have to clear this since it is used to trigger
1199	 * long read replies
1200	 */
1201	qp->resp.res = NULL;
1202	return ret;
1203}
1204
1205static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1206{
1207	int ret = send_common_ack(qp, syndrome, psn,
1208			IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY,
1209			"RDMA READ response of length zero ACK");
1210
1211	/* have to clear this since it is used to trigger
1212	 * long read replies
1213	 */
1214	qp->resp.res = NULL;
1215	return ret;
1216}
1217
1218static enum resp_states acknowledge(struct rxe_qp *qp,
1219				    struct rxe_pkt_info *pkt)
1220{
1221	if (qp_type(qp) != IB_QPT_RC)
1222		return RESPST_CLEANUP;
1223
1224	if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)
1225		send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
1226	else if (pkt->mask & RXE_ATOMIC_MASK)
1227		send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1228	else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK))
1229		send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1230	else if (bth_ack(pkt))
1231		send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1232
1233	return RESPST_CLEANUP;
1234}
1235
1236static enum resp_states cleanup(struct rxe_qp *qp,
1237				struct rxe_pkt_info *pkt)
1238{
1239	struct sk_buff *skb;
1240
1241	if (pkt) {
1242		skb = skb_dequeue(&qp->req_pkts);
1243		rxe_put(qp);
1244		kfree_skb(skb);
1245		ib_device_put(qp->ibqp.device);
1246	}
1247
1248	if (qp->resp.mr) {
1249		rxe_put(qp->resp.mr);
1250		qp->resp.mr = NULL;
1251	}
1252
1253	return RESPST_DONE;
1254}
1255
1256static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn)
1257{
1258	int i;
1259
1260	for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
1261		struct resp_res *res = &qp->resp.resources[i];
1262
1263		if (res->type == 0)
1264			continue;
1265
1266		if (psn_compare(psn, res->first_psn) >= 0 &&
1267		    psn_compare(psn, res->last_psn) <= 0) {
1268			return res;
1269		}
1270	}
1271
1272	return NULL;
1273}
1274
1275static enum resp_states duplicate_request(struct rxe_qp *qp,
1276					  struct rxe_pkt_info *pkt)
1277{
1278	enum resp_states rc;
1279	u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK;
1280
1281	if (pkt->mask & RXE_SEND_MASK ||
1282	    pkt->mask & RXE_WRITE_MASK) {
1283		/* SEND. Ack again and cleanup. C9-105. */
1284		send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
1285		return RESPST_CLEANUP;
1286	} else if (pkt->mask & RXE_FLUSH_MASK) {
1287		struct resp_res *res;
1288
1289		/* Find the operation in our list of responder resources. */
1290		res = find_resource(qp, pkt->psn);
1291		if (res) {
1292			res->replay = 1;
1293			res->cur_psn = pkt->psn;
1294			qp->resp.res = res;
1295			rc = RESPST_PROCESS_FLUSH;
1296			goto out;
1297		}
1298
1299		/* Resource not found. Class D error. Drop the request. */
1300		rc = RESPST_CLEANUP;
1301		goto out;
1302	} else if (pkt->mask & RXE_READ_MASK) {
1303		struct resp_res *res;
1304
1305		res = find_resource(qp, pkt->psn);
1306		if (!res) {
1307			/* Resource not found. Class D error.  Drop the
1308			 * request.
1309			 */
1310			rc = RESPST_CLEANUP;
1311			goto out;
1312		} else {
1313			/* Ensure this new request is the same as the previous
1314			 * one or a subset of it.
1315			 */
1316			u64 iova = reth_va(pkt);
1317			u32 resid = reth_len(pkt);
1318
1319			if (iova < res->read.va_org ||
1320			    resid > res->read.length ||
1321			    (iova + resid) > (res->read.va_org +
1322					      res->read.length)) {
1323				rc = RESPST_CLEANUP;
1324				goto out;
1325			}
1326
1327			if (reth_rkey(pkt) != res->read.rkey) {
1328				rc = RESPST_CLEANUP;
1329				goto out;
1330			}
1331
1332			res->cur_psn = pkt->psn;
1333			res->state = (pkt->psn == res->first_psn) ?
1334					rdatm_res_state_new :
1335					rdatm_res_state_replay;
1336			res->replay = 1;
1337
1338			/* Reset the resource, except length. */
1339			res->read.va_org = iova;
1340			res->read.va = iova;
1341			res->read.resid = resid;
1342
1343			/* Replay the RDMA read reply. */
1344			qp->resp.res = res;
1345			rc = RESPST_READ_REPLY;
1346			goto out;
1347		}
1348	} else {
1349		struct resp_res *res;
1350
1351		/* Find the operation in our list of responder resources. */
1352		res = find_resource(qp, pkt->psn);
1353		if (res) {
1354			res->replay = 1;
1355			res->cur_psn = pkt->psn;
1356			qp->resp.res = res;
1357			rc = pkt->mask & RXE_ATOMIC_MASK ?
1358					RESPST_ATOMIC_REPLY :
1359					RESPST_ATOMIC_WRITE_REPLY;
1360			goto out;
1361		}
1362
1363		/* Resource not found. Class D error. Drop the request. */
1364		rc = RESPST_CLEANUP;
1365		goto out;
1366	}
1367out:
1368	return rc;
1369}
1370
1371/* Process a class A or C. Both are treated the same in this implementation. */
1372static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome,
1373			      enum ib_wc_status status)
1374{
1375	qp->resp.aeth_syndrome	= syndrome;
1376	qp->resp.status		= status;
1377
1378	/* indicate that we should go through the ERROR state */
1379	qp->resp.goto_error	= 1;
1380}
1381
1382static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
1383{
1384	/* UC */
1385	if (qp->srq) {
1386		/* Class E */
1387		qp->resp.drop_msg = 1;
1388		if (qp->resp.wqe) {
1389			qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1390			return RESPST_COMPLETE;
1391		} else {
1392			return RESPST_CLEANUP;
1393		}
1394	} else {
1395		/* Class D1. This packet may be the start of a
1396		 * new message and could be valid. The previous
1397		 * message is invalid and ignored. reset the
1398		 * recv wr to its original state
1399		 */
1400		if (qp->resp.wqe) {
1401			qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length;
1402			qp->resp.wqe->dma.cur_sge = 0;
1403			qp->resp.wqe->dma.sge_offset = 0;
1404			qp->resp.opcode = -1;
1405		}
1406
1407		if (qp->resp.mr) {
1408			rxe_put(qp->resp.mr);
1409			qp->resp.mr = NULL;
1410		}
1411
1412		return RESPST_CLEANUP;
1413	}
1414}
1415
1416/* drain incoming request packet queue */
1417static void drain_req_pkts(struct rxe_qp *qp)
1418{
1419	struct sk_buff *skb;
1420
1421	while ((skb = skb_dequeue(&qp->req_pkts))) {
1422		rxe_put(qp);
1423		kfree_skb(skb);
1424		ib_device_put(qp->ibqp.device);
1425	}
1426}
1427
1428/* complete receive wqe with flush error */
1429static int flush_recv_wqe(struct rxe_qp *qp, struct rxe_recv_wqe *wqe)
1430{
1431	struct rxe_cqe cqe = {};
1432	struct ib_wc *wc = &cqe.ibwc;
1433	struct ib_uverbs_wc *uwc = &cqe.uibwc;
1434	int err;
1435
1436	if (qp->rcq->is_user) {
1437		uwc->wr_id = wqe->wr_id;
1438		uwc->status = IB_WC_WR_FLUSH_ERR;
1439		uwc->qp_num = qp_num(qp);
1440	} else {
1441		wc->wr_id = wqe->wr_id;
1442		wc->status = IB_WC_WR_FLUSH_ERR;
1443		wc->qp = &qp->ibqp;
1444	}
1445
1446	err = rxe_cq_post(qp->rcq, &cqe, 0);
1447	if (err)
1448		rxe_dbg_cq(qp->rcq, "post cq failed err = %d\n", err);
1449
1450	return err;
1451}
1452
1453/* drain and optionally complete the recive queue
1454 * if unable to complete a wqe stop completing and
1455 * just flush the remaining wqes
1456 */
1457static void flush_recv_queue(struct rxe_qp *qp, bool notify)
1458{
1459	struct rxe_queue *q = qp->rq.queue;
1460	struct rxe_recv_wqe *wqe;
1461	int err;
1462
1463	if (qp->srq) {
1464		if (notify && qp->ibqp.event_handler) {
1465			struct ib_event ev;
1466
1467			ev.device = qp->ibqp.device;
1468			ev.element.qp = &qp->ibqp;
1469			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1470			qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1471		}
1472		return;
1473	}
1474
1475	/* recv queue not created. nothing to do. */
1476	if (!qp->rq.queue)
1477		return;
1478
1479	while ((wqe = queue_head(q, q->type))) {
1480		if (notify) {
1481			err = flush_recv_wqe(qp, wqe);
1482			if (err)
1483				notify = 0;
1484		}
1485		queue_advance_consumer(q, q->type);
1486	}
1487
1488	qp->resp.wqe = NULL;
1489}
1490
1491int rxe_receiver(struct rxe_qp *qp)
1492{
1493	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
1494	enum resp_states state;
1495	struct rxe_pkt_info *pkt = NULL;
1496	int ret;
1497	unsigned long flags;
1498
1499	spin_lock_irqsave(&qp->state_lock, flags);
1500	if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
1501			  qp_state(qp) == IB_QPS_RESET) {
1502		bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
1503
1504		drain_req_pkts(qp);
1505		flush_recv_queue(qp, notify);
1506		spin_unlock_irqrestore(&qp->state_lock, flags);
1507		goto exit;
1508	}
1509	spin_unlock_irqrestore(&qp->state_lock, flags);
1510
1511	qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
1512
1513	state = RESPST_GET_REQ;
1514
1515	while (1) {
1516		rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]);
1517		switch (state) {
1518		case RESPST_GET_REQ:
1519			state = get_req(qp, &pkt);
1520			break;
1521		case RESPST_CHK_PSN:
1522			state = check_psn(qp, pkt);
1523			break;
1524		case RESPST_CHK_OP_SEQ:
1525			state = check_op_seq(qp, pkt);
1526			break;
1527		case RESPST_CHK_OP_VALID:
1528			state = check_op_valid(qp, pkt);
1529			break;
1530		case RESPST_CHK_RESOURCE:
1531			state = check_resource(qp, pkt);
1532			break;
1533		case RESPST_CHK_LENGTH:
1534			state = rxe_resp_check_length(qp, pkt);
1535			break;
1536		case RESPST_CHK_RKEY:
1537			state = check_rkey(qp, pkt);
1538			break;
1539		case RESPST_EXECUTE:
1540			state = execute(qp, pkt);
1541			break;
1542		case RESPST_COMPLETE:
1543			state = do_complete(qp, pkt);
1544			break;
1545		case RESPST_READ_REPLY:
1546			state = read_reply(qp, pkt);
1547			break;
1548		case RESPST_ATOMIC_REPLY:
1549			state = atomic_reply(qp, pkt);
1550			break;
1551		case RESPST_ATOMIC_WRITE_REPLY:
1552			state = atomic_write_reply(qp, pkt);
1553			break;
1554		case RESPST_PROCESS_FLUSH:
1555			state = process_flush(qp, pkt);
1556			break;
1557		case RESPST_ACKNOWLEDGE:
1558			state = acknowledge(qp, pkt);
1559			break;
1560		case RESPST_CLEANUP:
1561			state = cleanup(qp, pkt);
1562			break;
1563		case RESPST_DUPLICATE_REQUEST:
1564			state = duplicate_request(qp, pkt);
1565			break;
1566		case RESPST_ERR_PSN_OUT_OF_SEQ:
1567			/* RC only - Class B. Drop packet. */
1568			send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
1569			state = RESPST_CLEANUP;
1570			break;
1571
1572		case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ:
1573		case RESPST_ERR_MISSING_OPCODE_FIRST:
1574		case RESPST_ERR_MISSING_OPCODE_LAST_C:
1575		case RESPST_ERR_UNSUPPORTED_OPCODE:
1576		case RESPST_ERR_MISALIGNED_ATOMIC:
1577			/* RC Only - Class C. */
1578			do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1579					  IB_WC_REM_INV_REQ_ERR);
1580			state = RESPST_COMPLETE;
1581			break;
1582
1583		case RESPST_ERR_MISSING_OPCODE_LAST_D1E:
1584			state = do_class_d1e_error(qp);
1585			break;
1586		case RESPST_ERR_RNR:
1587			if (qp_type(qp) == IB_QPT_RC) {
1588				rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
1589				/* RC - class B */
1590				send_ack(qp, AETH_RNR_NAK |
1591					 (~AETH_TYPE_MASK &
1592					 qp->attr.min_rnr_timer),
1593					 pkt->psn);
1594			} else {
1595				/* UD/UC - class D */
1596				qp->resp.drop_msg = 1;
1597			}
1598			state = RESPST_CLEANUP;
1599			break;
1600
1601		case RESPST_ERR_RKEY_VIOLATION:
1602			if (qp_type(qp) == IB_QPT_RC) {
1603				/* Class C */
1604				do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR,
1605						  IB_WC_REM_ACCESS_ERR);
1606				state = RESPST_COMPLETE;
1607			} else {
1608				qp->resp.drop_msg = 1;
1609				if (qp->srq) {
1610					/* UC/SRQ Class D */
1611					qp->resp.status = IB_WC_REM_ACCESS_ERR;
1612					state = RESPST_COMPLETE;
1613				} else {
1614					/* UC/non-SRQ Class E. */
1615					state = RESPST_CLEANUP;
1616				}
1617			}
1618			break;
1619
1620		case RESPST_ERR_INVALIDATE_RKEY:
1621			/* RC - Class J. */
1622			qp->resp.goto_error = 1;
1623			qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1624			state = RESPST_COMPLETE;
1625			break;
1626
1627		case RESPST_ERR_LENGTH:
1628			if (qp_type(qp) == IB_QPT_RC) {
1629				/* Class C */
1630				do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1631						  IB_WC_REM_INV_REQ_ERR);
1632				state = RESPST_COMPLETE;
1633			} else if (qp->srq) {
1634				/* UC/UD - class E */
1635				qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1636				state = RESPST_COMPLETE;
1637			} else {
1638				/* UC/UD - class D */
1639				qp->resp.drop_msg = 1;
1640				state = RESPST_CLEANUP;
1641			}
1642			break;
1643
1644		case RESPST_ERR_MALFORMED_WQE:
1645			/* All, Class A. */
1646			do_class_ac_error(qp, AETH_NAK_REM_OP_ERR,
1647					  IB_WC_LOC_QP_OP_ERR);
1648			state = RESPST_COMPLETE;
1649			break;
1650
1651		case RESPST_ERR_CQ_OVERFLOW:
1652			/* All - Class G */
1653			state = RESPST_ERROR;
1654			break;
1655
1656		case RESPST_DONE:
1657			if (qp->resp.goto_error) {
1658				state = RESPST_ERROR;
1659				break;
1660			}
1661
1662			goto done;
1663
1664		case RESPST_EXIT:
1665			if (qp->resp.goto_error) {
1666				state = RESPST_ERROR;
1667				break;
1668			}
1669
1670			goto exit;
1671
1672		case RESPST_ERROR:
1673			qp->resp.goto_error = 0;
1674			rxe_dbg_qp(qp, "moved to error state\n");
1675			rxe_qp_error(qp);
1676			goto exit;
1677
1678		default:
1679			WARN_ON_ONCE(1);
1680		}
1681	}
1682
1683	/* A non-zero return value will cause rxe_do_task to
1684	 * exit its loop and end the work item. A zero return
1685	 * will continue looping and return to rxe_responder
1686	 */
1687done:
1688	ret = 0;
1689	goto out;
1690exit:
1691	ret = -EAGAIN;
1692out:
1693	return ret;
1694}