Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
   4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
   5 */
   6
   7#include <linux/skbuff.h>
   8
   9#include "rxe.h"
  10#include "rxe_loc.h"
  11#include "rxe_queue.h"
  12
  13enum resp_states {
  14	RESPST_NONE,
  15	RESPST_GET_REQ,
  16	RESPST_CHK_PSN,
  17	RESPST_CHK_OP_SEQ,
  18	RESPST_CHK_OP_VALID,
  19	RESPST_CHK_RESOURCE,
  20	RESPST_CHK_LENGTH,
  21	RESPST_CHK_RKEY,
  22	RESPST_EXECUTE,
  23	RESPST_READ_REPLY,
  24	RESPST_ATOMIC_REPLY,
  25	RESPST_ATOMIC_WRITE_REPLY,
  26	RESPST_PROCESS_FLUSH,
  27	RESPST_COMPLETE,
  28	RESPST_ACKNOWLEDGE,
  29	RESPST_CLEANUP,
  30	RESPST_DUPLICATE_REQUEST,
  31	RESPST_ERR_MALFORMED_WQE,
  32	RESPST_ERR_UNSUPPORTED_OPCODE,
  33	RESPST_ERR_MISALIGNED_ATOMIC,
  34	RESPST_ERR_PSN_OUT_OF_SEQ,
  35	RESPST_ERR_MISSING_OPCODE_FIRST,
  36	RESPST_ERR_MISSING_OPCODE_LAST_C,
  37	RESPST_ERR_MISSING_OPCODE_LAST_D1E,
  38	RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
  39	RESPST_ERR_RNR,
  40	RESPST_ERR_RKEY_VIOLATION,
  41	RESPST_ERR_INVALIDATE_RKEY,
  42	RESPST_ERR_LENGTH,
  43	RESPST_ERR_CQ_OVERFLOW,
  44	RESPST_ERROR,
  45	RESPST_RESET,
  46	RESPST_DONE,
  47	RESPST_EXIT,
  48};
  49
  50static char *resp_state_name[] = {
  51	[RESPST_NONE]				= "NONE",
  52	[RESPST_GET_REQ]			= "GET_REQ",
  53	[RESPST_CHK_PSN]			= "CHK_PSN",
  54	[RESPST_CHK_OP_SEQ]			= "CHK_OP_SEQ",
  55	[RESPST_CHK_OP_VALID]			= "CHK_OP_VALID",
  56	[RESPST_CHK_RESOURCE]			= "CHK_RESOURCE",
  57	[RESPST_CHK_LENGTH]			= "CHK_LENGTH",
  58	[RESPST_CHK_RKEY]			= "CHK_RKEY",
  59	[RESPST_EXECUTE]			= "EXECUTE",
  60	[RESPST_READ_REPLY]			= "READ_REPLY",
  61	[RESPST_ATOMIC_REPLY]			= "ATOMIC_REPLY",
  62	[RESPST_ATOMIC_WRITE_REPLY]		= "ATOMIC_WRITE_REPLY",
  63	[RESPST_PROCESS_FLUSH]			= "PROCESS_FLUSH",
  64	[RESPST_COMPLETE]			= "COMPLETE",
  65	[RESPST_ACKNOWLEDGE]			= "ACKNOWLEDGE",
  66	[RESPST_CLEANUP]			= "CLEANUP",
  67	[RESPST_DUPLICATE_REQUEST]		= "DUPLICATE_REQUEST",
  68	[RESPST_ERR_MALFORMED_WQE]		= "ERR_MALFORMED_WQE",
  69	[RESPST_ERR_UNSUPPORTED_OPCODE]		= "ERR_UNSUPPORTED_OPCODE",
  70	[RESPST_ERR_MISALIGNED_ATOMIC]		= "ERR_MISALIGNED_ATOMIC",
  71	[RESPST_ERR_PSN_OUT_OF_SEQ]		= "ERR_PSN_OUT_OF_SEQ",
  72	[RESPST_ERR_MISSING_OPCODE_FIRST]	= "ERR_MISSING_OPCODE_FIRST",
  73	[RESPST_ERR_MISSING_OPCODE_LAST_C]	= "ERR_MISSING_OPCODE_LAST_C",
  74	[RESPST_ERR_MISSING_OPCODE_LAST_D1E]	= "ERR_MISSING_OPCODE_LAST_D1E",
  75	[RESPST_ERR_TOO_MANY_RDMA_ATM_REQ]	= "ERR_TOO_MANY_RDMA_ATM_REQ",
  76	[RESPST_ERR_RNR]			= "ERR_RNR",
  77	[RESPST_ERR_RKEY_VIOLATION]		= "ERR_RKEY_VIOLATION",
  78	[RESPST_ERR_INVALIDATE_RKEY]		= "ERR_INVALIDATE_RKEY_VIOLATION",
  79	[RESPST_ERR_LENGTH]			= "ERR_LENGTH",
  80	[RESPST_ERR_CQ_OVERFLOW]		= "ERR_CQ_OVERFLOW",
  81	[RESPST_ERROR]				= "ERROR",
  82	[RESPST_RESET]				= "RESET",
  83	[RESPST_DONE]				= "DONE",
  84	[RESPST_EXIT]				= "EXIT",
  85};
  86
  87/* rxe_recv calls here to add a request packet to the input queue */
  88void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
  89{
  90	int must_sched;
  91	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
  92
  93	skb_queue_tail(&qp->req_pkts, skb);
  94
  95	must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
  96			(skb_queue_len(&qp->req_pkts) > 1);
  97
  98	if (must_sched)
  99		rxe_sched_task(&qp->resp.task);
 100	else
 101		rxe_run_task(&qp->resp.task);
 102}
 103
 104static inline enum resp_states get_req(struct rxe_qp *qp,
 105				       struct rxe_pkt_info **pkt_p)
 106{
 107	struct sk_buff *skb;
 108
 109	if (qp->resp.state == QP_STATE_ERROR) {
 110		while ((skb = skb_dequeue(&qp->req_pkts))) {
 111			rxe_put(qp);
 112			kfree_skb(skb);
 113			ib_device_put(qp->ibqp.device);
 114		}
 115
 116		/* go drain recv wr queue */
 117		return RESPST_CHK_RESOURCE;
 118	}
 119
 120	skb = skb_peek(&qp->req_pkts);
 121	if (!skb)
 122		return RESPST_EXIT;
 123
 124	*pkt_p = SKB_TO_PKT(skb);
 125
 126	return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN;
 127}
 128
 129static enum resp_states check_psn(struct rxe_qp *qp,
 130				  struct rxe_pkt_info *pkt)
 131{
 132	int diff = psn_compare(pkt->psn, qp->resp.psn);
 133	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 134
 135	switch (qp_type(qp)) {
 136	case IB_QPT_RC:
 137		if (diff > 0) {
 138			if (qp->resp.sent_psn_nak)
 139				return RESPST_CLEANUP;
 140
 141			qp->resp.sent_psn_nak = 1;
 142			rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ);
 143			return RESPST_ERR_PSN_OUT_OF_SEQ;
 144
 145		} else if (diff < 0) {
 146			rxe_counter_inc(rxe, RXE_CNT_DUP_REQ);
 147			return RESPST_DUPLICATE_REQUEST;
 148		}
 149
 150		if (qp->resp.sent_psn_nak)
 151			qp->resp.sent_psn_nak = 0;
 152
 153		break;
 154
 155	case IB_QPT_UC:
 156		if (qp->resp.drop_msg || diff != 0) {
 157			if (pkt->mask & RXE_START_MASK) {
 158				qp->resp.drop_msg = 0;
 159				return RESPST_CHK_OP_SEQ;
 160			}
 161
 162			qp->resp.drop_msg = 1;
 163			return RESPST_CLEANUP;
 164		}
 165		break;
 166	default:
 167		break;
 168	}
 169
 170	return RESPST_CHK_OP_SEQ;
 171}
 172
 173static enum resp_states check_op_seq(struct rxe_qp *qp,
 174				     struct rxe_pkt_info *pkt)
 175{
 176	switch (qp_type(qp)) {
 177	case IB_QPT_RC:
 178		switch (qp->resp.opcode) {
 179		case IB_OPCODE_RC_SEND_FIRST:
 180		case IB_OPCODE_RC_SEND_MIDDLE:
 181			switch (pkt->opcode) {
 182			case IB_OPCODE_RC_SEND_MIDDLE:
 183			case IB_OPCODE_RC_SEND_LAST:
 184			case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
 185			case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
 186				return RESPST_CHK_OP_VALID;
 187			default:
 188				return RESPST_ERR_MISSING_OPCODE_LAST_C;
 189			}
 190
 191		case IB_OPCODE_RC_RDMA_WRITE_FIRST:
 192		case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
 193			switch (pkt->opcode) {
 194			case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
 195			case IB_OPCODE_RC_RDMA_WRITE_LAST:
 196			case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
 197				return RESPST_CHK_OP_VALID;
 198			default:
 199				return RESPST_ERR_MISSING_OPCODE_LAST_C;
 200			}
 201
 202		default:
 203			switch (pkt->opcode) {
 204			case IB_OPCODE_RC_SEND_MIDDLE:
 205			case IB_OPCODE_RC_SEND_LAST:
 206			case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
 207			case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
 208			case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
 209			case IB_OPCODE_RC_RDMA_WRITE_LAST:
 210			case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
 211				return RESPST_ERR_MISSING_OPCODE_FIRST;
 212			default:
 213				return RESPST_CHK_OP_VALID;
 214			}
 215		}
 216		break;
 217
 218	case IB_QPT_UC:
 219		switch (qp->resp.opcode) {
 220		case IB_OPCODE_UC_SEND_FIRST:
 221		case IB_OPCODE_UC_SEND_MIDDLE:
 222			switch (pkt->opcode) {
 223			case IB_OPCODE_UC_SEND_MIDDLE:
 224			case IB_OPCODE_UC_SEND_LAST:
 225			case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
 226				return RESPST_CHK_OP_VALID;
 227			default:
 228				return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
 229			}
 230
 231		case IB_OPCODE_UC_RDMA_WRITE_FIRST:
 232		case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
 233			switch (pkt->opcode) {
 234			case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
 235			case IB_OPCODE_UC_RDMA_WRITE_LAST:
 236			case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
 237				return RESPST_CHK_OP_VALID;
 238			default:
 239				return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
 240			}
 241
 242		default:
 243			switch (pkt->opcode) {
 244			case IB_OPCODE_UC_SEND_MIDDLE:
 245			case IB_OPCODE_UC_SEND_LAST:
 246			case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
 247			case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
 248			case IB_OPCODE_UC_RDMA_WRITE_LAST:
 249			case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
 250				qp->resp.drop_msg = 1;
 251				return RESPST_CLEANUP;
 252			default:
 253				return RESPST_CHK_OP_VALID;
 254			}
 255		}
 256		break;
 257
 258	default:
 259		return RESPST_CHK_OP_VALID;
 260	}
 261}
 262
 263static bool check_qp_attr_access(struct rxe_qp *qp,
 264				 struct rxe_pkt_info *pkt)
 265{
 266	if (((pkt->mask & RXE_READ_MASK) &&
 267	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
 268	    ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
 269	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
 270	    ((pkt->mask & RXE_ATOMIC_MASK) &&
 271	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 272		return false;
 273
 274	if (pkt->mask & RXE_FLUSH_MASK) {
 275		u32 flush_type = feth_plt(pkt);
 276
 277		if ((flush_type & IB_FLUSH_GLOBAL &&
 278		     !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) ||
 279		    (flush_type & IB_FLUSH_PERSISTENT &&
 280		     !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT)))
 281			return false;
 282	}
 283
 284	return true;
 285}
 286
 287static enum resp_states check_op_valid(struct rxe_qp *qp,
 288				       struct rxe_pkt_info *pkt)
 289{
 290	switch (qp_type(qp)) {
 291	case IB_QPT_RC:
 292		if (!check_qp_attr_access(qp, pkt))
 293			return RESPST_ERR_UNSUPPORTED_OPCODE;
 294
 295		break;
 296
 297	case IB_QPT_UC:
 298		if ((pkt->mask & RXE_WRITE_MASK) &&
 299		    !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) {
 300			qp->resp.drop_msg = 1;
 301			return RESPST_CLEANUP;
 302		}
 303
 304		break;
 305
 306	case IB_QPT_UD:
 307	case IB_QPT_GSI:
 308		break;
 309
 310	default:
 311		WARN_ON_ONCE(1);
 312		break;
 313	}
 314
 315	return RESPST_CHK_RESOURCE;
 316}
 317
 318static enum resp_states get_srq_wqe(struct rxe_qp *qp)
 319{
 320	struct rxe_srq *srq = qp->srq;
 321	struct rxe_queue *q = srq->rq.queue;
 322	struct rxe_recv_wqe *wqe;
 323	struct ib_event ev;
 324	unsigned int count;
 325	size_t size;
 326	unsigned long flags;
 327
 328	if (srq->error)
 329		return RESPST_ERR_RNR;
 330
 331	spin_lock_irqsave(&srq->rq.consumer_lock, flags);
 332
 333	wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
 334	if (!wqe) {
 335		spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
 336		return RESPST_ERR_RNR;
 337	}
 338
 339	/* don't trust user space data */
 340	if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
 341		spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
 342		rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n");
 343		return RESPST_ERR_MALFORMED_WQE;
 344	}
 345	size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
 346	memcpy(&qp->resp.srq_wqe, wqe, size);
 347
 348	qp->resp.wqe = &qp->resp.srq_wqe.wqe;
 349	queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT);
 350	count = queue_count(q, QUEUE_TYPE_FROM_CLIENT);
 351
 352	if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) {
 353		srq->limit = 0;
 354		goto event;
 355	}
 356
 357	spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
 358	return RESPST_CHK_LENGTH;
 359
 360event:
 361	spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
 362	ev.device = qp->ibqp.device;
 363	ev.element.srq = qp->ibqp.srq;
 364	ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
 365	srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context);
 366	return RESPST_CHK_LENGTH;
 367}
 368
 369static enum resp_states check_resource(struct rxe_qp *qp,
 370				       struct rxe_pkt_info *pkt)
 371{
 372	struct rxe_srq *srq = qp->srq;
 373
 374	if (qp->resp.state == QP_STATE_ERROR) {
 375		if (qp->resp.wqe) {
 376			qp->resp.status = IB_WC_WR_FLUSH_ERR;
 377			return RESPST_COMPLETE;
 378		} else if (!srq) {
 379			qp->resp.wqe = queue_head(qp->rq.queue,
 380					QUEUE_TYPE_FROM_CLIENT);
 381			if (qp->resp.wqe) {
 382				qp->resp.status = IB_WC_WR_FLUSH_ERR;
 383				return RESPST_COMPLETE;
 384			} else {
 385				return RESPST_EXIT;
 386			}
 387		} else {
 388			return RESPST_EXIT;
 389		}
 390	}
 391
 392	if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
 393		/* it is the requesters job to not send
 394		 * too many read/atomic ops, we just
 395		 * recycle the responder resource queue
 396		 */
 397		if (likely(qp->attr.max_dest_rd_atomic > 0))
 398			return RESPST_CHK_LENGTH;
 399		else
 400			return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ;
 401	}
 402
 403	if (pkt->mask & RXE_RWR_MASK) {
 404		if (srq)
 405			return get_srq_wqe(qp);
 406
 407		qp->resp.wqe = queue_head(qp->rq.queue,
 408				QUEUE_TYPE_FROM_CLIENT);
 409		return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;
 410	}
 411
 412	return RESPST_CHK_LENGTH;
 413}
 414
 415static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
 416					      struct rxe_pkt_info *pkt)
 417{
 418	/*
 419	 * See IBA C9-92
 420	 * For UD QPs we only check if the packet will fit in the
 421	 * receive buffer later. For rmda operations additional
 422	 * length checks are performed in check_rkey.
 423	 */
 424	if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
 425					     (qp_type(qp) == IB_QPT_UC))) {
 426		unsigned int mtu = qp->mtu;
 427		unsigned int payload = payload_size(pkt);
 428
 429		if ((pkt->mask & RXE_START_MASK) &&
 430		    (pkt->mask & RXE_END_MASK)) {
 431			if (unlikely(payload > mtu)) {
 432				rxe_dbg_qp(qp, "only packet too long");
 433				return RESPST_ERR_LENGTH;
 434			}
 435		} else if ((pkt->mask & RXE_START_MASK) ||
 436			   (pkt->mask & RXE_MIDDLE_MASK)) {
 437			if (unlikely(payload != mtu)) {
 438				rxe_dbg_qp(qp, "first or middle packet not mtu");
 439				return RESPST_ERR_LENGTH;
 440			}
 441		} else if (pkt->mask & RXE_END_MASK) {
 442			if (unlikely((payload == 0) || (payload > mtu))) {
 443				rxe_dbg_qp(qp, "last packet zero or too long");
 444				return RESPST_ERR_LENGTH;
 445			}
 446		}
 447	}
 448
 449	/* See IBA C9-94 */
 450	if (pkt->mask & RXE_RETH_MASK) {
 451		if (reth_len(pkt) > (1U << 31)) {
 452			rxe_dbg_qp(qp, "dma length too long");
 453			return RESPST_ERR_LENGTH;
 454		}
 455	}
 456
 457	return RESPST_CHK_RKEY;
 458}
 459
 460static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 461{
 462	qp->resp.va = reth_va(pkt);
 463	qp->resp.offset = 0;
 464	qp->resp.rkey = reth_rkey(pkt);
 465	qp->resp.resid = reth_len(pkt);
 466	qp->resp.length = reth_len(pkt);
 467}
 468
 469static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 470{
 471	qp->resp.va = atmeth_va(pkt);
 472	qp->resp.offset = 0;
 473	qp->resp.rkey = atmeth_rkey(pkt);
 474	qp->resp.resid = sizeof(u64);
 475}
 476
 477static enum resp_states check_rkey(struct rxe_qp *qp,
 478				   struct rxe_pkt_info *pkt)
 479{
 480	struct rxe_mr *mr = NULL;
 481	struct rxe_mw *mw = NULL;
 482	u64 va;
 483	u32 rkey;
 484	u32 resid;
 485	u32 pktlen;
 486	int mtu = qp->mtu;
 487	enum resp_states state;
 488	int access = 0;
 489
 490	if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
 491		if (pkt->mask & RXE_RETH_MASK)
 492			qp_resp_from_reth(qp, pkt);
 493
 494		access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
 495						     : IB_ACCESS_REMOTE_WRITE;
 496	} else if (pkt->mask & RXE_FLUSH_MASK) {
 497		u32 flush_type = feth_plt(pkt);
 498
 499		if (pkt->mask & RXE_RETH_MASK)
 500			qp_resp_from_reth(qp, pkt);
 501
 502		if (flush_type & IB_FLUSH_GLOBAL)
 503			access |= IB_ACCESS_FLUSH_GLOBAL;
 504		if (flush_type & IB_FLUSH_PERSISTENT)
 505			access |= IB_ACCESS_FLUSH_PERSISTENT;
 506	} else if (pkt->mask & RXE_ATOMIC_MASK) {
 507		qp_resp_from_atmeth(qp, pkt);
 508		access = IB_ACCESS_REMOTE_ATOMIC;
 509	} else {
 510		return RESPST_EXECUTE;
 511	}
 512
 513	/* A zero-byte op is not required to set an addr or rkey. See C9-88 */
 514	if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
 515	    (pkt->mask & RXE_RETH_MASK) &&
 516	    reth_len(pkt) == 0) {
 517		return RESPST_EXECUTE;
 518	}
 519
 520	va	= qp->resp.va;
 521	rkey	= qp->resp.rkey;
 522	resid	= qp->resp.resid;
 523	pktlen	= payload_size(pkt);
 524
 525	if (rkey_is_mw(rkey)) {
 526		mw = rxe_lookup_mw(qp, access, rkey);
 527		if (!mw) {
 528			rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey);
 529			state = RESPST_ERR_RKEY_VIOLATION;
 530			goto err;
 531		}
 532
 533		mr = mw->mr;
 534		if (!mr) {
 535			rxe_dbg_qp(qp, "MW doesn't have an MR\n");
 536			state = RESPST_ERR_RKEY_VIOLATION;
 537			goto err;
 538		}
 539
 540		if (mw->access & IB_ZERO_BASED)
 541			qp->resp.offset = mw->addr;
 542
 543		rxe_put(mw);
 544		rxe_get(mr);
 545	} else {
 546		mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
 547		if (!mr) {
 548			rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey);
 549			state = RESPST_ERR_RKEY_VIOLATION;
 550			goto err;
 551		}
 552	}
 553
 554	if (pkt->mask & RXE_FLUSH_MASK) {
 555		/* FLUSH MR may not set va or resid
 556		 * no need to check range since we will flush whole mr
 557		 */
 558		if (feth_sel(pkt) == IB_FLUSH_MR)
 559			goto skip_check_range;
 560	}
 561
 562	if (mr_check_range(mr, va + qp->resp.offset, resid)) {
 563		state = RESPST_ERR_RKEY_VIOLATION;
 564		goto err;
 565	}
 566
 567skip_check_range:
 568	if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
 569		if (resid > mtu) {
 570			if (pktlen != mtu || bth_pad(pkt)) {
 571				state = RESPST_ERR_LENGTH;
 572				goto err;
 573			}
 574		} else {
 575			if (pktlen != resid) {
 576				state = RESPST_ERR_LENGTH;
 577				goto err;
 578			}
 579			if ((bth_pad(pkt) != (0x3 & (-resid)))) {
 580				/* This case may not be exactly that
 581				 * but nothing else fits.
 582				 */
 583				state = RESPST_ERR_LENGTH;
 584				goto err;
 585			}
 586		}
 587	}
 588
 589	WARN_ON_ONCE(qp->resp.mr);
 590
 591	qp->resp.mr = mr;
 592	return RESPST_EXECUTE;
 593
 594err:
 595	if (mr)
 596		rxe_put(mr);
 597	if (mw)
 598		rxe_put(mw);
 599
 600	return state;
 601}
 602
 603static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
 604				     int data_len)
 605{
 606	int err;
 607
 608	err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
 609			data_addr, data_len, RXE_TO_MR_OBJ);
 610	if (unlikely(err))
 611		return (err == -ENOSPC) ? RESPST_ERR_LENGTH
 612					: RESPST_ERR_MALFORMED_WQE;
 613
 614	return RESPST_NONE;
 615}
 616
 617static enum resp_states write_data_in(struct rxe_qp *qp,
 618				      struct rxe_pkt_info *pkt)
 619{
 620	enum resp_states rc = RESPST_NONE;
 621	int	err;
 622	int data_len = payload_size(pkt);
 623
 624	err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
 625			  payload_addr(pkt), data_len, RXE_TO_MR_OBJ);
 626	if (err) {
 627		rc = RESPST_ERR_RKEY_VIOLATION;
 628		goto out;
 629	}
 630
 631	qp->resp.va += data_len;
 632	qp->resp.resid -= data_len;
 633
 634out:
 635	return rc;
 636}
 637
 638static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
 639					struct rxe_pkt_info *pkt,
 640					int type)
 641{
 642	struct resp_res *res;
 643	u32 pkts;
 644
 645	res = &qp->resp.resources[qp->resp.res_head];
 646	rxe_advance_resp_resource(qp);
 647	free_rd_atomic_resource(res);
 648
 649	res->type = type;
 650	res->replay = 0;
 651
 652	switch (type) {
 653	case RXE_READ_MASK:
 654		res->read.va = qp->resp.va + qp->resp.offset;
 655		res->read.va_org = qp->resp.va + qp->resp.offset;
 656		res->read.resid = qp->resp.resid;
 657		res->read.length = qp->resp.resid;
 658		res->read.rkey = qp->resp.rkey;
 659
 660		pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1);
 661		res->first_psn = pkt->psn;
 662		res->cur_psn = pkt->psn;
 663		res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK;
 664
 665		res->state = rdatm_res_state_new;
 666		break;
 667	case RXE_ATOMIC_MASK:
 668	case RXE_ATOMIC_WRITE_MASK:
 669		res->first_psn = pkt->psn;
 670		res->last_psn = pkt->psn;
 671		res->cur_psn = pkt->psn;
 672		break;
 673	case RXE_FLUSH_MASK:
 674		res->flush.va = qp->resp.va + qp->resp.offset;
 675		res->flush.length = qp->resp.length;
 676		res->flush.type = feth_plt(pkt);
 677		res->flush.level = feth_sel(pkt);
 678	}
 679
 680	return res;
 681}
 682
 683static enum resp_states process_flush(struct rxe_qp *qp,
 684				       struct rxe_pkt_info *pkt)
 685{
 686	u64 length, start;
 687	struct rxe_mr *mr = qp->resp.mr;
 688	struct resp_res *res = qp->resp.res;
 689
 690	/* oA19-14, oA19-15 */
 691	if (res && res->replay)
 692		return RESPST_ACKNOWLEDGE;
 693	else if (!res) {
 694		res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK);
 695		qp->resp.res = res;
 696	}
 697
 698	if (res->flush.level == IB_FLUSH_RANGE) {
 699		start = res->flush.va;
 700		length = res->flush.length;
 701	} else { /* level == IB_FLUSH_MR */
 702		start = mr->ibmr.iova;
 703		length = mr->ibmr.length;
 704	}
 705
 706	if (res->flush.type & IB_FLUSH_PERSISTENT) {
 707		if (rxe_flush_pmem_iova(mr, start, length))
 708			return RESPST_ERR_RKEY_VIOLATION;
 709		/* Make data persistent. */
 710		wmb();
 711	} else if (res->flush.type & IB_FLUSH_GLOBAL) {
 712		/* Make data global visibility. */
 713		wmb();
 714	}
 715
 716	qp->resp.msn++;
 717
 718	/* next expected psn, read handles this separately */
 719	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
 720	qp->resp.ack_psn = qp->resp.psn;
 721
 722	qp->resp.opcode = pkt->opcode;
 723	qp->resp.status = IB_WC_SUCCESS;
 724
 725	return RESPST_ACKNOWLEDGE;
 726}
 727
 728/* Guarantee atomicity of atomic operations at the machine level. */
 729static DEFINE_SPINLOCK(atomic_ops_lock);
 730
 731static enum resp_states atomic_reply(struct rxe_qp *qp,
 732					 struct rxe_pkt_info *pkt)
 733{
 734	u64 *vaddr;
 735	enum resp_states ret;
 736	struct rxe_mr *mr = qp->resp.mr;
 737	struct resp_res *res = qp->resp.res;
 738	u64 value;
 739
 740	if (!res) {
 741		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK);
 742		qp->resp.res = res;
 743	}
 744
 745	if (!res->replay) {
 746		if (mr->state != RXE_MR_STATE_VALID) {
 747			ret = RESPST_ERR_RKEY_VIOLATION;
 748			goto out;
 749		}
 750
 751		vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset,
 752					sizeof(u64));
 753
 754		/* check vaddr is 8 bytes aligned. */
 755		if (!vaddr || (uintptr_t)vaddr & 7) {
 756			ret = RESPST_ERR_MISALIGNED_ATOMIC;
 757			goto out;
 758		}
 759
 760		spin_lock_bh(&atomic_ops_lock);
 761		res->atomic.orig_val = value = *vaddr;
 762
 763		if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) {
 764			if (value == atmeth_comp(pkt))
 765				value = atmeth_swap_add(pkt);
 766		} else {
 767			value += atmeth_swap_add(pkt);
 768		}
 769
 770		*vaddr = value;
 771		spin_unlock_bh(&atomic_ops_lock);
 772
 773		qp->resp.msn++;
 774
 775		/* next expected psn, read handles this separately */
 776		qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
 777		qp->resp.ack_psn = qp->resp.psn;
 778
 779		qp->resp.opcode = pkt->opcode;
 780		qp->resp.status = IB_WC_SUCCESS;
 781	}
 782
 783	ret = RESPST_ACKNOWLEDGE;
 784out:
 785	return ret;
 786}
 787
 788#ifdef CONFIG_64BIT
 789static enum resp_states do_atomic_write(struct rxe_qp *qp,
 790					struct rxe_pkt_info *pkt)
 791{
 792	struct rxe_mr *mr = qp->resp.mr;
 793	int payload = payload_size(pkt);
 794	u64 src, *dst;
 795
 796	if (mr->state != RXE_MR_STATE_VALID)
 797		return RESPST_ERR_RKEY_VIOLATION;
 798
 799	memcpy(&src, payload_addr(pkt), payload);
 800
 801	dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
 802	/* check vaddr is 8 bytes aligned. */
 803	if (!dst || (uintptr_t)dst & 7)
 804		return RESPST_ERR_MISALIGNED_ATOMIC;
 805
 806	/* Do atomic write after all prior operations have completed */
 807	smp_store_release(dst, src);
 808
 809	/* decrease resp.resid to zero */
 810	qp->resp.resid -= sizeof(payload);
 811
 812	qp->resp.msn++;
 813
 814	/* next expected psn, read handles this separately */
 815	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
 816	qp->resp.ack_psn = qp->resp.psn;
 817
 818	qp->resp.opcode = pkt->opcode;
 819	qp->resp.status = IB_WC_SUCCESS;
 820	return RESPST_ACKNOWLEDGE;
 821}
 822#else
 823static enum resp_states do_atomic_write(struct rxe_qp *qp,
 824					struct rxe_pkt_info *pkt)
 825{
 826	return RESPST_ERR_UNSUPPORTED_OPCODE;
 827}
 828#endif /* CONFIG_64BIT */
 829
 830static enum resp_states atomic_write_reply(struct rxe_qp *qp,
 831					   struct rxe_pkt_info *pkt)
 832{
 833	struct resp_res *res = qp->resp.res;
 834
 835	if (!res) {
 836		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
 837		qp->resp.res = res;
 838	}
 839
 840	if (res->replay)
 841		return RESPST_ACKNOWLEDGE;
 842	return do_atomic_write(qp, pkt);
 843}
 844
 845static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 846					  struct rxe_pkt_info *ack,
 847					  int opcode,
 848					  int payload,
 849					  u32 psn,
 850					  u8 syndrome)
 851{
 852	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 853	struct sk_buff *skb;
 854	int paylen;
 855	int pad;
 856	int err;
 857
 858	/*
 859	 * allocate packet
 860	 */
 861	pad = (-payload) & 0x3;
 862	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
 863
 864	skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack);
 865	if (!skb)
 866		return NULL;
 867
 868	ack->qp = qp;
 869	ack->opcode = opcode;
 870	ack->mask = rxe_opcode[opcode].mask;
 871	ack->paylen = paylen;
 872	ack->psn = psn;
 873
 874	bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL,
 875		 qp->attr.dest_qp_num, 0, psn);
 876
 877	if (ack->mask & RXE_AETH_MASK) {
 878		aeth_set_syn(ack, syndrome);
 879		aeth_set_msn(ack, qp->resp.msn);
 880	}
 881
 882	if (ack->mask & RXE_ATMACK_MASK)
 883		atmack_set_orig(ack, qp->resp.res->atomic.orig_val);
 884
 885	err = rxe_prepare(&qp->pri_av, ack, skb);
 886	if (err) {
 887		kfree_skb(skb);
 888		return NULL;
 889	}
 890
 891	return skb;
 892}
 893
 894/**
 895 * rxe_recheck_mr - revalidate MR from rkey and get a reference
 896 * @qp: the qp
 897 * @rkey: the rkey
 898 *
 899 * This code allows the MR to be invalidated or deregistered or
 900 * the MW if one was used to be invalidated or deallocated.
 901 * It is assumed that the access permissions if originally good
 902 * are OK and the mappings to be unchanged.
 903 *
 904 * TODO: If someone reregisters an MR to change its size or
 905 * access permissions during the processing of an RDMA read
 906 * we should kill the responder resource and complete the
 907 * operation with an error.
 908 *
 909 * Return: mr on success else NULL
 910 */
 911static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
 912{
 913	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 914	struct rxe_mr *mr;
 915	struct rxe_mw *mw;
 916
 917	if (rkey_is_mw(rkey)) {
 918		mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
 919		if (!mw)
 920			return NULL;
 921
 922		mr = mw->mr;
 923		if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID ||
 924		    !mr || mr->state != RXE_MR_STATE_VALID) {
 925			rxe_put(mw);
 926			return NULL;
 927		}
 928
 929		rxe_get(mr);
 930		rxe_put(mw);
 931
 932		return mr;
 933	}
 934
 935	mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
 936	if (!mr)
 937		return NULL;
 938
 939	if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) {
 940		rxe_put(mr);
 941		return NULL;
 942	}
 943
 944	return mr;
 945}
 946
 947/* RDMA read response. If res is not NULL, then we have a current RDMA request
 948 * being processed or replayed.
 949 */
 950static enum resp_states read_reply(struct rxe_qp *qp,
 951				   struct rxe_pkt_info *req_pkt)
 952{
 953	struct rxe_pkt_info ack_pkt;
 954	struct sk_buff *skb;
 955	int mtu = qp->mtu;
 956	enum resp_states state;
 957	int payload;
 958	int opcode;
 959	int err;
 960	struct resp_res *res = qp->resp.res;
 961	struct rxe_mr *mr;
 962
 963	if (!res) {
 964		res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
 965		qp->resp.res = res;
 966	}
 967
 968	if (res->state == rdatm_res_state_new) {
 969		if (!res->replay) {
 970			mr = qp->resp.mr;
 971			qp->resp.mr = NULL;
 972		} else {
 973			mr = rxe_recheck_mr(qp, res->read.rkey);
 974			if (!mr)
 975				return RESPST_ERR_RKEY_VIOLATION;
 976		}
 977
 978		if (res->read.resid <= mtu)
 979			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
 980		else
 981			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
 982	} else {
 983		mr = rxe_recheck_mr(qp, res->read.rkey);
 984		if (!mr)
 985			return RESPST_ERR_RKEY_VIOLATION;
 986
 987		if (res->read.resid > mtu)
 988			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE;
 989		else
 990			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST;
 991	}
 992
 993	res->state = rdatm_res_state_next;
 994
 995	payload = min_t(int, res->read.resid, mtu);
 996
 997	skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
 998				 res->cur_psn, AETH_ACK_UNLIMITED);
 999	if (!skb) {
1000		if (mr)
1001			rxe_put(mr);
1002		return RESPST_ERR_RNR;
1003	}
1004
1005	err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
1006			  payload, RXE_FROM_MR_OBJ);
1007	if (mr)
1008		rxe_put(mr);
1009	if (err) {
1010		kfree_skb(skb);
1011		return RESPST_ERR_RKEY_VIOLATION;
1012	}
1013
1014	if (bth_pad(&ack_pkt)) {
1015		u8 *pad = payload_addr(&ack_pkt) + payload;
1016
1017		memset(pad, 0, bth_pad(&ack_pkt));
1018	}
1019
1020	err = rxe_xmit_packet(qp, &ack_pkt, skb);
1021	if (err)
1022		return RESPST_ERR_RNR;
1023
1024	res->read.va += payload;
1025	res->read.resid -= payload;
1026	res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK;
1027
1028	if (res->read.resid > 0) {
1029		state = RESPST_DONE;
1030	} else {
1031		qp->resp.res = NULL;
1032		if (!res->replay)
1033			qp->resp.opcode = -1;
1034		if (psn_compare(res->cur_psn, qp->resp.psn) >= 0)
1035			qp->resp.psn = res->cur_psn;
1036		state = RESPST_CLEANUP;
1037	}
1038
1039	return state;
1040}
1041
1042static int invalidate_rkey(struct rxe_qp *qp, u32 rkey)
1043{
1044	if (rkey_is_mw(rkey))
1045		return rxe_invalidate_mw(qp, rkey);
1046	else
1047		return rxe_invalidate_mr(qp, rkey);
1048}
1049
1050/* Executes a new request. A retried request never reach that function (send
1051 * and writes are discarded, and reads and atomics are retried elsewhere.
1052 */
1053static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
1054{
1055	enum resp_states err;
1056	struct sk_buff *skb = PKT_TO_SKB(pkt);
1057	union rdma_network_hdr hdr;
1058
1059	if (pkt->mask & RXE_SEND_MASK) {
1060		if (qp_type(qp) == IB_QPT_UD ||
1061		    qp_type(qp) == IB_QPT_GSI) {
1062			if (skb->protocol == htons(ETH_P_IP)) {
1063				memset(&hdr.reserved, 0,
1064						sizeof(hdr.reserved));
1065				memcpy(&hdr.roce4grh, ip_hdr(skb),
1066						sizeof(hdr.roce4grh));
1067				err = send_data_in(qp, &hdr, sizeof(hdr));
1068			} else {
1069				err = send_data_in(qp, ipv6_hdr(skb),
1070						sizeof(hdr));
1071			}
1072			if (err)
1073				return err;
1074		}
1075		err = send_data_in(qp, payload_addr(pkt), payload_size(pkt));
1076		if (err)
1077			return err;
1078	} else if (pkt->mask & RXE_WRITE_MASK) {
1079		err = write_data_in(qp, pkt);
1080		if (err)
1081			return err;
1082	} else if (pkt->mask & RXE_READ_MASK) {
1083		/* For RDMA Read we can increment the msn now. See C9-148. */
1084		qp->resp.msn++;
1085		return RESPST_READ_REPLY;
1086	} else if (pkt->mask & RXE_ATOMIC_MASK) {
1087		return RESPST_ATOMIC_REPLY;
1088	} else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
1089		return RESPST_ATOMIC_WRITE_REPLY;
1090	} else if (pkt->mask & RXE_FLUSH_MASK) {
1091		return RESPST_PROCESS_FLUSH;
1092	} else {
1093		/* Unreachable */
1094		WARN_ON_ONCE(1);
1095	}
1096
1097	if (pkt->mask & RXE_IETH_MASK) {
1098		u32 rkey = ieth_rkey(pkt);
1099
1100		err = invalidate_rkey(qp, rkey);
1101		if (err)
1102			return RESPST_ERR_INVALIDATE_RKEY;
1103	}
1104
1105	if (pkt->mask & RXE_END_MASK)
1106		/* We successfully processed this new request. */
1107		qp->resp.msn++;
1108
1109	/* next expected psn, read handles this separately */
1110	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
1111	qp->resp.ack_psn = qp->resp.psn;
1112
1113	qp->resp.opcode = pkt->opcode;
1114	qp->resp.status = IB_WC_SUCCESS;
1115
1116	if (pkt->mask & RXE_COMP_MASK)
1117		return RESPST_COMPLETE;
1118	else if (qp_type(qp) == IB_QPT_RC)
1119		return RESPST_ACKNOWLEDGE;
1120	else
1121		return RESPST_CLEANUP;
1122}
1123
1124static enum resp_states do_complete(struct rxe_qp *qp,
1125				    struct rxe_pkt_info *pkt)
1126{
1127	struct rxe_cqe cqe;
1128	struct ib_wc *wc = &cqe.ibwc;
1129	struct ib_uverbs_wc *uwc = &cqe.uibwc;
1130	struct rxe_recv_wqe *wqe = qp->resp.wqe;
1131	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
1132
1133	if (!wqe)
1134		goto finish;
1135
1136	memset(&cqe, 0, sizeof(cqe));
1137
1138	if (qp->rcq->is_user) {
1139		uwc->status		= qp->resp.status;
1140		uwc->qp_num		= qp->ibqp.qp_num;
1141		uwc->wr_id		= wqe->wr_id;
1142	} else {
1143		wc->status		= qp->resp.status;
1144		wc->qp			= &qp->ibqp;
1145		wc->wr_id		= wqe->wr_id;
1146	}
1147
1148	if (wc->status == IB_WC_SUCCESS) {
1149		rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV);
1150		wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
1151				pkt->mask & RXE_WRITE_MASK) ?
1152					IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
1153		wc->byte_len = (pkt->mask & RXE_IMMDT_MASK &&
1154				pkt->mask & RXE_WRITE_MASK) ?
1155					qp->resp.length : wqe->dma.length - wqe->dma.resid;
1156
1157		/* fields after byte_len are different between kernel and user
1158		 * space
1159		 */
1160		if (qp->rcq->is_user) {
1161			uwc->wc_flags = IB_WC_GRH;
1162
1163			if (pkt->mask & RXE_IMMDT_MASK) {
1164				uwc->wc_flags |= IB_WC_WITH_IMM;
1165				uwc->ex.imm_data = immdt_imm(pkt);
1166			}
1167
1168			if (pkt->mask & RXE_IETH_MASK) {
1169				uwc->wc_flags |= IB_WC_WITH_INVALIDATE;
1170				uwc->ex.invalidate_rkey = ieth_rkey(pkt);
1171			}
1172
1173			if (pkt->mask & RXE_DETH_MASK)
1174				uwc->src_qp = deth_sqp(pkt);
1175
1176			uwc->port_num		= qp->attr.port_num;
1177		} else {
1178			struct sk_buff *skb = PKT_TO_SKB(pkt);
1179
1180			wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE;
1181			if (skb->protocol == htons(ETH_P_IP))
1182				wc->network_hdr_type = RDMA_NETWORK_IPV4;
1183			else
1184				wc->network_hdr_type = RDMA_NETWORK_IPV6;
1185
1186			if (is_vlan_dev(skb->dev)) {
1187				wc->wc_flags |= IB_WC_WITH_VLAN;
1188				wc->vlan_id = vlan_dev_vlan_id(skb->dev);
1189			}
1190
1191			if (pkt->mask & RXE_IMMDT_MASK) {
1192				wc->wc_flags |= IB_WC_WITH_IMM;
1193				wc->ex.imm_data = immdt_imm(pkt);
1194			}
1195
1196			if (pkt->mask & RXE_IETH_MASK) {
1197				wc->wc_flags |= IB_WC_WITH_INVALIDATE;
1198				wc->ex.invalidate_rkey = ieth_rkey(pkt);
1199			}
1200
1201			if (pkt->mask & RXE_DETH_MASK)
1202				wc->src_qp = deth_sqp(pkt);
1203
1204			wc->port_num		= qp->attr.port_num;
1205		}
1206	}
1207
1208	/* have copy for srq and reference for !srq */
1209	if (!qp->srq)
1210		queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT);
1211
1212	qp->resp.wqe = NULL;
1213
1214	if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1))
1215		return RESPST_ERR_CQ_OVERFLOW;
1216
1217finish:
1218	if (unlikely(qp->resp.state == QP_STATE_ERROR))
1219		return RESPST_CHK_RESOURCE;
1220	if (unlikely(!pkt))
1221		return RESPST_DONE;
1222	if (qp_type(qp) == IB_QPT_RC)
1223		return RESPST_ACKNOWLEDGE;
1224	else
1225		return RESPST_CLEANUP;
1226}
1227
1228
1229static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
1230				  int opcode, const char *msg)
1231{
1232	int err;
1233	struct rxe_pkt_info ack_pkt;
1234	struct sk_buff *skb;
1235
1236	skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome);
1237	if (!skb)
1238		return -ENOMEM;
1239
1240	err = rxe_xmit_packet(qp, &ack_pkt, skb);
1241	if (err)
1242		rxe_dbg_qp(qp, "Failed sending %s\n", msg);
1243
1244	return err;
1245}
1246
1247static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1248{
1249	return send_common_ack(qp, syndrome, psn,
1250			IB_OPCODE_RC_ACKNOWLEDGE, "ACK");
1251}
1252
1253static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1254{
1255	int ret = send_common_ack(qp, syndrome, psn,
1256			IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK");
1257
1258	/* have to clear this since it is used to trigger
1259	 * long read replies
1260	 */
1261	qp->resp.res = NULL;
1262	return ret;
1263}
1264
1265static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1266{
1267	int ret = send_common_ack(qp, syndrome, psn,
1268			IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY,
1269			"RDMA READ response of length zero ACK");
1270
1271	/* have to clear this since it is used to trigger
1272	 * long read replies
1273	 */
1274	qp->resp.res = NULL;
1275	return ret;
1276}
1277
1278static enum resp_states acknowledge(struct rxe_qp *qp,
1279				    struct rxe_pkt_info *pkt)
1280{
1281	if (qp_type(qp) != IB_QPT_RC)
1282		return RESPST_CLEANUP;
1283
1284	if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)
1285		send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
1286	else if (pkt->mask & RXE_ATOMIC_MASK)
1287		send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1288	else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK))
1289		send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1290	else if (bth_ack(pkt))
1291		send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1292
1293	return RESPST_CLEANUP;
1294}
1295
1296static enum resp_states cleanup(struct rxe_qp *qp,
1297				struct rxe_pkt_info *pkt)
1298{
1299	struct sk_buff *skb;
1300
1301	if (pkt) {
1302		skb = skb_dequeue(&qp->req_pkts);
1303		rxe_put(qp);
1304		kfree_skb(skb);
1305		ib_device_put(qp->ibqp.device);
1306	}
1307
1308	if (qp->resp.mr) {
1309		rxe_put(qp->resp.mr);
1310		qp->resp.mr = NULL;
1311	}
1312
1313	return RESPST_DONE;
1314}
1315
1316static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn)
1317{
1318	int i;
1319
1320	for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
1321		struct resp_res *res = &qp->resp.resources[i];
1322
1323		if (res->type == 0)
1324			continue;
1325
1326		if (psn_compare(psn, res->first_psn) >= 0 &&
1327		    psn_compare(psn, res->last_psn) <= 0) {
1328			return res;
1329		}
1330	}
1331
1332	return NULL;
1333}
1334
1335static enum resp_states duplicate_request(struct rxe_qp *qp,
1336					  struct rxe_pkt_info *pkt)
1337{
1338	enum resp_states rc;
1339	u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK;
1340
1341	if (pkt->mask & RXE_SEND_MASK ||
1342	    pkt->mask & RXE_WRITE_MASK) {
1343		/* SEND. Ack again and cleanup. C9-105. */
1344		send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
1345		return RESPST_CLEANUP;
1346	} else if (pkt->mask & RXE_FLUSH_MASK) {
1347		struct resp_res *res;
1348
1349		/* Find the operation in our list of responder resources. */
1350		res = find_resource(qp, pkt->psn);
1351		if (res) {
1352			res->replay = 1;
1353			res->cur_psn = pkt->psn;
1354			qp->resp.res = res;
1355			rc = RESPST_PROCESS_FLUSH;
1356			goto out;
1357		}
1358
1359		/* Resource not found. Class D error. Drop the request. */
1360		rc = RESPST_CLEANUP;
1361		goto out;
1362	} else if (pkt->mask & RXE_READ_MASK) {
1363		struct resp_res *res;
1364
1365		res = find_resource(qp, pkt->psn);
1366		if (!res) {
1367			/* Resource not found. Class D error.  Drop the
1368			 * request.
1369			 */
1370			rc = RESPST_CLEANUP;
1371			goto out;
1372		} else {
1373			/* Ensure this new request is the same as the previous
1374			 * one or a subset of it.
1375			 */
1376			u64 iova = reth_va(pkt);
1377			u32 resid = reth_len(pkt);
1378
1379			if (iova < res->read.va_org ||
1380			    resid > res->read.length ||
1381			    (iova + resid) > (res->read.va_org +
1382					      res->read.length)) {
1383				rc = RESPST_CLEANUP;
1384				goto out;
1385			}
1386
1387			if (reth_rkey(pkt) != res->read.rkey) {
1388				rc = RESPST_CLEANUP;
1389				goto out;
1390			}
1391
1392			res->cur_psn = pkt->psn;
1393			res->state = (pkt->psn == res->first_psn) ?
1394					rdatm_res_state_new :
1395					rdatm_res_state_replay;
1396			res->replay = 1;
1397
1398			/* Reset the resource, except length. */
1399			res->read.va_org = iova;
1400			res->read.va = iova;
1401			res->read.resid = resid;
1402
1403			/* Replay the RDMA read reply. */
1404			qp->resp.res = res;
1405			rc = RESPST_READ_REPLY;
1406			goto out;
1407		}
1408	} else {
1409		struct resp_res *res;
1410
1411		/* Find the operation in our list of responder resources. */
1412		res = find_resource(qp, pkt->psn);
1413		if (res) {
1414			res->replay = 1;
1415			res->cur_psn = pkt->psn;
1416			qp->resp.res = res;
1417			rc = pkt->mask & RXE_ATOMIC_MASK ?
1418					RESPST_ATOMIC_REPLY :
1419					RESPST_ATOMIC_WRITE_REPLY;
1420			goto out;
1421		}
1422
1423		/* Resource not found. Class D error. Drop the request. */
1424		rc = RESPST_CLEANUP;
1425		goto out;
1426	}
1427out:
1428	return rc;
1429}
1430
1431/* Process a class A or C. Both are treated the same in this implementation. */
1432static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome,
1433			      enum ib_wc_status status)
1434{
1435	qp->resp.aeth_syndrome	= syndrome;
1436	qp->resp.status		= status;
1437
1438	/* indicate that we should go through the ERROR state */
1439	qp->resp.goto_error	= 1;
1440}
1441
1442static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
1443{
1444	/* UC */
1445	if (qp->srq) {
1446		/* Class E */
1447		qp->resp.drop_msg = 1;
1448		if (qp->resp.wqe) {
1449			qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1450			return RESPST_COMPLETE;
1451		} else {
1452			return RESPST_CLEANUP;
1453		}
1454	} else {
1455		/* Class D1. This packet may be the start of a
1456		 * new message and could be valid. The previous
1457		 * message is invalid and ignored. reset the
1458		 * recv wr to its original state
1459		 */
1460		if (qp->resp.wqe) {
1461			qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length;
1462			qp->resp.wqe->dma.cur_sge = 0;
1463			qp->resp.wqe->dma.sge_offset = 0;
1464			qp->resp.opcode = -1;
1465		}
1466
1467		if (qp->resp.mr) {
1468			rxe_put(qp->resp.mr);
1469			qp->resp.mr = NULL;
1470		}
1471
1472		return RESPST_CLEANUP;
1473	}
1474}
1475
1476static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
1477{
1478	struct sk_buff *skb;
1479	struct rxe_queue *q = qp->rq.queue;
1480
1481	while ((skb = skb_dequeue(&qp->req_pkts))) {
1482		rxe_put(qp);
1483		kfree_skb(skb);
1484		ib_device_put(qp->ibqp.device);
1485	}
1486
1487	if (notify)
1488		return;
1489
1490	while (!qp->srq && q && queue_head(q, q->type))
1491		queue_advance_consumer(q, q->type);
1492}
1493
1494int rxe_responder(void *arg)
1495{
1496	struct rxe_qp *qp = (struct rxe_qp *)arg;
1497	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
1498	enum resp_states state;
1499	struct rxe_pkt_info *pkt = NULL;
1500	int ret;
1501
1502	if (!rxe_get(qp))
1503		return -EAGAIN;
1504
1505	qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
1506
1507	if (!qp->valid)
1508		goto exit;
1509
1510	switch (qp->resp.state) {
1511	case QP_STATE_RESET:
1512		state = RESPST_RESET;
1513		break;
1514
1515	default:
1516		state = RESPST_GET_REQ;
1517		break;
1518	}
1519
1520	while (1) {
1521		rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]);
1522		switch (state) {
1523		case RESPST_GET_REQ:
1524			state = get_req(qp, &pkt);
1525			break;
1526		case RESPST_CHK_PSN:
1527			state = check_psn(qp, pkt);
1528			break;
1529		case RESPST_CHK_OP_SEQ:
1530			state = check_op_seq(qp, pkt);
1531			break;
1532		case RESPST_CHK_OP_VALID:
1533			state = check_op_valid(qp, pkt);
1534			break;
1535		case RESPST_CHK_RESOURCE:
1536			state = check_resource(qp, pkt);
1537			break;
1538		case RESPST_CHK_LENGTH:
1539			state = rxe_resp_check_length(qp, pkt);
1540			break;
1541		case RESPST_CHK_RKEY:
1542			state = check_rkey(qp, pkt);
1543			break;
1544		case RESPST_EXECUTE:
1545			state = execute(qp, pkt);
1546			break;
1547		case RESPST_COMPLETE:
1548			state = do_complete(qp, pkt);
1549			break;
1550		case RESPST_READ_REPLY:
1551			state = read_reply(qp, pkt);
1552			break;
1553		case RESPST_ATOMIC_REPLY:
1554			state = atomic_reply(qp, pkt);
1555			break;
1556		case RESPST_ATOMIC_WRITE_REPLY:
1557			state = atomic_write_reply(qp, pkt);
1558			break;
1559		case RESPST_PROCESS_FLUSH:
1560			state = process_flush(qp, pkt);
1561			break;
1562		case RESPST_ACKNOWLEDGE:
1563			state = acknowledge(qp, pkt);
1564			break;
1565		case RESPST_CLEANUP:
1566			state = cleanup(qp, pkt);
1567			break;
1568		case RESPST_DUPLICATE_REQUEST:
1569			state = duplicate_request(qp, pkt);
1570			break;
1571		case RESPST_ERR_PSN_OUT_OF_SEQ:
1572			/* RC only - Class B. Drop packet. */
1573			send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
1574			state = RESPST_CLEANUP;
1575			break;
1576
1577		case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ:
1578		case RESPST_ERR_MISSING_OPCODE_FIRST:
1579		case RESPST_ERR_MISSING_OPCODE_LAST_C:
1580		case RESPST_ERR_UNSUPPORTED_OPCODE:
1581		case RESPST_ERR_MISALIGNED_ATOMIC:
1582			/* RC Only - Class C. */
1583			do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1584					  IB_WC_REM_INV_REQ_ERR);
1585			state = RESPST_COMPLETE;
1586			break;
1587
1588		case RESPST_ERR_MISSING_OPCODE_LAST_D1E:
1589			state = do_class_d1e_error(qp);
1590			break;
1591		case RESPST_ERR_RNR:
1592			if (qp_type(qp) == IB_QPT_RC) {
1593				rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
1594				/* RC - class B */
1595				send_ack(qp, AETH_RNR_NAK |
1596					 (~AETH_TYPE_MASK &
1597					 qp->attr.min_rnr_timer),
1598					 pkt->psn);
1599			} else {
1600				/* UD/UC - class D */
1601				qp->resp.drop_msg = 1;
1602			}
1603			state = RESPST_CLEANUP;
1604			break;
1605
1606		case RESPST_ERR_RKEY_VIOLATION:
1607			if (qp_type(qp) == IB_QPT_RC) {
1608				/* Class C */
1609				do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR,
1610						  IB_WC_REM_ACCESS_ERR);
1611				state = RESPST_COMPLETE;
1612			} else {
1613				qp->resp.drop_msg = 1;
1614				if (qp->srq) {
1615					/* UC/SRQ Class D */
1616					qp->resp.status = IB_WC_REM_ACCESS_ERR;
1617					state = RESPST_COMPLETE;
1618				} else {
1619					/* UC/non-SRQ Class E. */
1620					state = RESPST_CLEANUP;
1621				}
1622			}
1623			break;
1624
1625		case RESPST_ERR_INVALIDATE_RKEY:
1626			/* RC - Class J. */
1627			qp->resp.goto_error = 1;
1628			qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1629			state = RESPST_COMPLETE;
1630			break;
1631
1632		case RESPST_ERR_LENGTH:
1633			if (qp_type(qp) == IB_QPT_RC) {
1634				/* Class C */
1635				do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1636						  IB_WC_REM_INV_REQ_ERR);
1637				state = RESPST_COMPLETE;
1638			} else if (qp->srq) {
1639				/* UC/UD - class E */
1640				qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1641				state = RESPST_COMPLETE;
1642			} else {
1643				/* UC/UD - class D */
1644				qp->resp.drop_msg = 1;
1645				state = RESPST_CLEANUP;
1646			}
1647			break;
1648
1649		case RESPST_ERR_MALFORMED_WQE:
1650			/* All, Class A. */
1651			do_class_ac_error(qp, AETH_NAK_REM_OP_ERR,
1652					  IB_WC_LOC_QP_OP_ERR);
1653			state = RESPST_COMPLETE;
1654			break;
1655
1656		case RESPST_ERR_CQ_OVERFLOW:
1657			/* All - Class G */
1658			state = RESPST_ERROR;
1659			break;
1660
1661		case RESPST_DONE:
1662			if (qp->resp.goto_error) {
1663				state = RESPST_ERROR;
1664				break;
1665			}
1666
1667			goto done;
1668
1669		case RESPST_EXIT:
1670			if (qp->resp.goto_error) {
1671				state = RESPST_ERROR;
1672				break;
1673			}
1674
1675			goto exit;
1676
1677		case RESPST_RESET:
1678			rxe_drain_req_pkts(qp, false);
1679			qp->resp.wqe = NULL;
1680			goto exit;
1681
1682		case RESPST_ERROR:
1683			qp->resp.goto_error = 0;
1684			rxe_dbg_qp(qp, "moved to error state\n");
1685			rxe_qp_error(qp);
1686			goto exit;
1687
1688		default:
1689			WARN_ON_ONCE(1);
1690		}
1691	}
1692
1693	/* A non-zero return value will cause rxe_do_task to
1694	 * exit its loop and end the tasklet. A zero return
1695	 * will continue looping and return to rxe_responder
1696	 */
1697done:
1698	ret = 0;
1699	goto out;
1700exit:
1701	ret = -EAGAIN;
1702out:
1703	rxe_put(qp);
1704	return ret;
1705}