Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
   2
   3/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
   4/* Copyright (c) 2008-2019, IBM Corporation */
   5
   6#include <linux/errno.h>
   7#include <linux/types.h>
   8#include <linux/net.h>
   9#include <linux/scatterlist.h>
  10#include <linux/highmem.h>
  11
  12#include <rdma/iw_cm.h>
  13#include <rdma/ib_verbs.h>
  14
  15#include "siw.h"
  16#include "siw_verbs.h"
  17#include "siw_mem.h"
  18
  19/*
  20 * siw_rx_umem()
  21 *
  22 * Receive data of @len into target referenced by @dest_addr.
  23 *
  24 * @srx:	Receive Context
  25 * @umem:	siw representation of target memory
  26 * @dest_addr:	user virtual address
  27 * @len:	number of bytes to place
  28 */
  29static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
  30		       u64 dest_addr, int len)
  31{
  32	int copied = 0;
  33
  34	while (len) {
  35		struct page *p;
  36		int pg_off, bytes, rv;
  37		void *dest;
  38
  39		p = siw_get_upage(umem, dest_addr);
  40		if (unlikely(!p)) {
  41			pr_warn("siw: %s: [QP %u]: bogus addr: %pK, %pK\n",
  42				__func__, qp_id(rx_qp(srx)),
  43				(void *)(uintptr_t)dest_addr,
  44				(void *)(uintptr_t)umem->fp_addr);
  45			/* siw internal error */
  46			srx->skb_copied += copied;
  47			srx->skb_new -= copied;
  48
  49			return -EFAULT;
  50		}
  51		pg_off = dest_addr & ~PAGE_MASK;
  52		bytes = min(len, (int)PAGE_SIZE - pg_off);
  53
  54		siw_dbg_qp(rx_qp(srx), "page %pK, bytes=%u\n", p, bytes);
  55
  56		dest = kmap_atomic(p);
  57		rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off,
  58				   bytes);
  59
  60		if (unlikely(rv)) {
  61			kunmap_atomic(dest);
  62			srx->skb_copied += copied;
  63			srx->skb_new -= copied;
  64
  65			pr_warn("siw: [QP %u]: %s, len %d, page %p, rv %d\n",
  66				qp_id(rx_qp(srx)), __func__, len, p, rv);
  67
  68			return -EFAULT;
  69		}
  70		if (srx->mpa_crc_hd) {
  71			if (rdma_is_kernel_res(&rx_qp(srx)->base_qp.res)) {
  72				crypto_shash_update(srx->mpa_crc_hd,
  73					(u8 *)(dest + pg_off), bytes);
  74				kunmap_atomic(dest);
  75			} else {
  76				kunmap_atomic(dest);
  77				/*
  78				 * Do CRC on original, not target buffer.
  79				 * Some user land applications may
  80				 * concurrently write the target buffer,
  81				 * which would yield a broken CRC.
  82				 * Walking the skb twice is very ineffcient.
  83				 * Folding the CRC into skb_copy_bits()
  84				 * would be much better, but is currently
  85				 * not supported.
  86				 */
  87				siw_crc_skb(srx, bytes);
  88			}
  89		} else {
  90			kunmap_atomic(dest);
  91		}
  92		srx->skb_offset += bytes;
  93		copied += bytes;
  94		len -= bytes;
  95		dest_addr += bytes;
  96		pg_off = 0;
  97	}
  98	srx->skb_copied += copied;
  99	srx->skb_new -= copied;
 100
 101	return copied;
 102}
 103
 104static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len)
 105{
 106	int rv;
 107
 108	siw_dbg_qp(rx_qp(srx), "kva: 0x%pK, len: %u\n", kva, len);
 109
 110	rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len);
 111	if (unlikely(rv)) {
 112		pr_warn("siw: [QP %u]: %s, len %d, kva 0x%pK, rv %d\n",
 113			qp_id(rx_qp(srx)), __func__, len, kva, rv);
 114
 115		return rv;
 116	}
 117	if (srx->mpa_crc_hd)
 118		crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len);
 119
 120	srx->skb_offset += len;
 121	srx->skb_copied += len;
 122	srx->skb_new -= len;
 123
 124	return len;
 125}
 126
 127static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx,
 128		      struct siw_mem *mem, u64 addr, int len)
 129{
 130	struct siw_pbl *pbl = mem->pbl;
 131	u64 offset = addr - mem->va;
 132	int copied = 0;
 133
 134	while (len) {
 135		int bytes;
 136		dma_addr_t buf_addr =
 137			siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx);
 138		if (!buf_addr)
 139			break;
 140
 141		bytes = min(bytes, len);
 142		if (siw_rx_kva(srx, (void *)(uintptr_t)buf_addr, bytes) ==
 143		    bytes) {
 144			copied += bytes;
 145			offset += bytes;
 146			len -= bytes;
 147		} else {
 148			break;
 149		}
 150	}
 151	return copied;
 152}
 153
 154/*
 155 * siw_rresp_check_ntoh()
 156 *
 157 * Check incoming RRESP fragment header against expected
 158 * header values and update expected values for potential next
 159 * fragment.
 160 *
 161 * NOTE: This function must be called only if a RRESP DDP segment
 162 *       starts but not for fragmented consecutive pieces of an
 163 *       already started DDP segment.
 164 */
 165static int siw_rresp_check_ntoh(struct siw_rx_stream *srx,
 166				struct siw_rx_fpdu *frx)
 167{
 168	struct iwarp_rdma_rresp *rresp = &srx->hdr.rresp;
 169	struct siw_wqe *wqe = &frx->wqe_active;
 170	enum ddp_ecode ecode;
 171
 172	u32 sink_stag = be32_to_cpu(rresp->sink_stag);
 173	u64 sink_to = be64_to_cpu(rresp->sink_to);
 174
 175	if (frx->first_ddp_seg) {
 176		srx->ddp_stag = wqe->sqe.sge[0].lkey;
 177		srx->ddp_to = wqe->sqe.sge[0].laddr;
 178		frx->pbl_idx = 0;
 179	}
 180	/* Below checks extend beyond the semantics of DDP, and
 181	 * into RDMAP:
 182	 * We check if the read response matches exactly the
 183	 * read request which was send to the remote peer to
 184	 * trigger this read response. RFC5040/5041 do not
 185	 * always have a proper error code for the detected
 186	 * error cases. We choose 'base or bounds error' for
 187	 * cases where the inbound STag is valid, but offset
 188	 * or length do not match our response receive state.
 189	 */
 190	if (unlikely(srx->ddp_stag != sink_stag)) {
 191		pr_warn("siw: [QP %u]: rresp stag: %08x != %08x\n",
 192			qp_id(rx_qp(srx)), sink_stag, srx->ddp_stag);
 193		ecode = DDP_ECODE_T_INVALID_STAG;
 194		goto error;
 195	}
 196	if (unlikely(srx->ddp_to != sink_to)) {
 197		pr_warn("siw: [QP %u]: rresp off: %016llx != %016llx\n",
 198			qp_id(rx_qp(srx)), (unsigned long long)sink_to,
 199			(unsigned long long)srx->ddp_to);
 200		ecode = DDP_ECODE_T_BASE_BOUNDS;
 201		goto error;
 202	}
 203	if (unlikely(!frx->more_ddp_segs &&
 204		     (wqe->processed + srx->fpdu_part_rem != wqe->bytes))) {
 205		pr_warn("siw: [QP %u]: rresp len: %d != %d\n",
 206			qp_id(rx_qp(srx)),
 207			wqe->processed + srx->fpdu_part_rem, wqe->bytes);
 208		ecode = DDP_ECODE_T_BASE_BOUNDS;
 209		goto error;
 210	}
 211	return 0;
 212error:
 213	siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
 214			   DDP_ETYPE_TAGGED_BUF, ecode, 0);
 215	return -EINVAL;
 216}
 217
 218/*
 219 * siw_write_check_ntoh()
 220 *
 221 * Check incoming WRITE fragment header against expected
 222 * header values and update expected values for potential next
 223 * fragment
 224 *
 225 * NOTE: This function must be called only if a WRITE DDP segment
 226 *       starts but not for fragmented consecutive pieces of an
 227 *       already started DDP segment.
 228 */
 229static int siw_write_check_ntoh(struct siw_rx_stream *srx,
 230				struct siw_rx_fpdu *frx)
 231{
 232	struct iwarp_rdma_write *write = &srx->hdr.rwrite;
 233	enum ddp_ecode ecode;
 234
 235	u32 sink_stag = be32_to_cpu(write->sink_stag);
 236	u64 sink_to = be64_to_cpu(write->sink_to);
 237
 238	if (frx->first_ddp_seg) {
 239		srx->ddp_stag = sink_stag;
 240		srx->ddp_to = sink_to;
 241		frx->pbl_idx = 0;
 242	} else {
 243		if (unlikely(srx->ddp_stag != sink_stag)) {
 244			pr_warn("siw: [QP %u]: write stag: %08x != %08x\n",
 245				qp_id(rx_qp(srx)), sink_stag,
 246				srx->ddp_stag);
 247			ecode = DDP_ECODE_T_INVALID_STAG;
 248			goto error;
 249		}
 250		if (unlikely(srx->ddp_to != sink_to)) {
 251			pr_warn("siw: [QP %u]: write off: %016llx != %016llx\n",
 252				qp_id(rx_qp(srx)),
 253				(unsigned long long)sink_to,
 254				(unsigned long long)srx->ddp_to);
 255			ecode = DDP_ECODE_T_BASE_BOUNDS;
 256			goto error;
 257		}
 258	}
 259	return 0;
 260error:
 261	siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
 262			   DDP_ETYPE_TAGGED_BUF, ecode, 0);
 263	return -EINVAL;
 264}
 265
 266/*
 267 * siw_send_check_ntoh()
 268 *
 269 * Check incoming SEND fragment header against expected
 270 * header values and update expected MSN if no next
 271 * fragment expected
 272 *
 273 * NOTE: This function must be called only if a SEND DDP segment
 274 *       starts but not for fragmented consecutive pieces of an
 275 *       already started DDP segment.
 276 */
 277static int siw_send_check_ntoh(struct siw_rx_stream *srx,
 278			       struct siw_rx_fpdu *frx)
 279{
 280	struct iwarp_send_inv *send = &srx->hdr.send_inv;
 281	struct siw_wqe *wqe = &frx->wqe_active;
 282	enum ddp_ecode ecode;
 283
 284	u32 ddp_msn = be32_to_cpu(send->ddp_msn);
 285	u32 ddp_mo = be32_to_cpu(send->ddp_mo);
 286	u32 ddp_qn = be32_to_cpu(send->ddp_qn);
 287
 288	if (unlikely(ddp_qn != RDMAP_UNTAGGED_QN_SEND)) {
 289		pr_warn("siw: [QP %u]: invalid ddp qn %d for send\n",
 290			qp_id(rx_qp(srx)), ddp_qn);
 291		ecode = DDP_ECODE_UT_INVALID_QN;
 292		goto error;
 293	}
 294	if (unlikely(ddp_msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND])) {
 295		pr_warn("siw: [QP %u]: send msn: %u != %u\n",
 296			qp_id(rx_qp(srx)), ddp_msn,
 297			srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]);
 298		ecode = DDP_ECODE_UT_INVALID_MSN_RANGE;
 299		goto error;
 300	}
 301	if (unlikely(ddp_mo != wqe->processed)) {
 302		pr_warn("siw: [QP %u], send mo: %u != %u\n",
 303			qp_id(rx_qp(srx)), ddp_mo, wqe->processed);
 304		ecode = DDP_ECODE_UT_INVALID_MO;
 305		goto error;
 306	}
 307	if (frx->first_ddp_seg) {
 308		/* initialize user memory write position */
 309		frx->sge_idx = 0;
 310		frx->sge_off = 0;
 311		frx->pbl_idx = 0;
 312
 313		/* only valid for SEND_INV and SEND_SE_INV operations */
 314		srx->inval_stag = be32_to_cpu(send->inval_stag);
 315	}
 316	if (unlikely(wqe->bytes < wqe->processed + srx->fpdu_part_rem)) {
 317		siw_dbg_qp(rx_qp(srx), "receive space short: %d - %d < %d\n",
 318			   wqe->bytes, wqe->processed, srx->fpdu_part_rem);
 319		wqe->wc_status = SIW_WC_LOC_LEN_ERR;
 320		ecode = DDP_ECODE_UT_INVALID_MSN_NOBUF;
 321		goto error;
 322	}
 323	return 0;
 324error:
 325	siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
 326			   DDP_ETYPE_UNTAGGED_BUF, ecode, 0);
 327	return -EINVAL;
 328}
 329
 330static struct siw_wqe *siw_rqe_get(struct siw_qp *qp)
 331{
 332	struct siw_rqe *rqe;
 333	struct siw_srq *srq;
 334	struct siw_wqe *wqe = NULL;
 335	bool srq_event = false;
 336	unsigned long flags;
 337
 338	srq = qp->srq;
 339	if (srq) {
 340		spin_lock_irqsave(&srq->lock, flags);
 341		if (unlikely(!srq->num_rqe))
 342			goto out;
 343
 344		rqe = &srq->recvq[srq->rq_get % srq->num_rqe];
 345	} else {
 346		if (unlikely(!qp->recvq))
 347			goto out;
 348
 349		rqe = &qp->recvq[qp->rq_get % qp->attrs.rq_size];
 350	}
 351	if (likely(rqe->flags == SIW_WQE_VALID)) {
 352		int num_sge = rqe->num_sge;
 353
 354		if (likely(num_sge <= SIW_MAX_SGE)) {
 355			int i = 0;
 356
 357			wqe = rx_wqe(&qp->rx_untagged);
 358			rx_type(wqe) = SIW_OP_RECEIVE;
 359			wqe->wr_status = SIW_WR_INPROGRESS;
 360			wqe->bytes = 0;
 361			wqe->processed = 0;
 362
 363			wqe->rqe.id = rqe->id;
 364			wqe->rqe.num_sge = num_sge;
 365
 366			while (i < num_sge) {
 367				wqe->rqe.sge[i].laddr = rqe->sge[i].laddr;
 368				wqe->rqe.sge[i].lkey = rqe->sge[i].lkey;
 369				wqe->rqe.sge[i].length = rqe->sge[i].length;
 370				wqe->bytes += wqe->rqe.sge[i].length;
 371				wqe->mem[i] = NULL;
 372				i++;
 373			}
 374			/* can be re-used by appl */
 375			smp_store_mb(rqe->flags, 0);
 376		} else {
 377			siw_dbg_qp(qp, "too many sge's: %d\n", rqe->num_sge);
 378			if (srq)
 379				spin_unlock_irqrestore(&srq->lock, flags);
 380			return NULL;
 381		}
 382		if (!srq) {
 383			qp->rq_get++;
 384		} else {
 385			if (srq->armed) {
 386				/* Test SRQ limit */
 387				u32 off = (srq->rq_get + srq->limit) %
 388					  srq->num_rqe;
 389				struct siw_rqe *rqe2 = &srq->recvq[off];
 390
 391				if (!(rqe2->flags & SIW_WQE_VALID)) {
 392					srq->armed = false;
 393					srq_event = true;
 394				}
 395			}
 396			srq->rq_get++;
 397		}
 398	}
 399out:
 400	if (srq) {
 401		spin_unlock_irqrestore(&srq->lock, flags);
 402		if (srq_event)
 403			siw_srq_event(srq, IB_EVENT_SRQ_LIMIT_REACHED);
 404	}
 405	return wqe;
 406}
 407
 408/*
 409 * siw_proc_send:
 410 *
 411 * Process one incoming SEND and place data into memory referenced by
 412 * receive wqe.
 413 *
 414 * Function supports partially received sends (suspending/resuming
 415 * current receive wqe processing)
 416 *
 417 * return value:
 418 *	0:       reached the end of a DDP segment
 419 *	-EAGAIN: to be called again to finish the DDP segment
 420 */
 421int siw_proc_send(struct siw_qp *qp)
 422{
 423	struct siw_rx_stream *srx = &qp->rx_stream;
 424	struct siw_rx_fpdu *frx = &qp->rx_untagged;
 425	struct siw_wqe *wqe;
 426	u32 data_bytes; /* all data bytes available */
 427	u32 rcvd_bytes; /* sum of data bytes rcvd */
 428	int rv = 0;
 429
 430	if (frx->first_ddp_seg) {
 431		wqe = siw_rqe_get(qp);
 432		if (unlikely(!wqe)) {
 433			siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 434					   DDP_ETYPE_UNTAGGED_BUF,
 435					   DDP_ECODE_UT_INVALID_MSN_NOBUF, 0);
 436			return -ENOENT;
 437		}
 438	} else {
 439		wqe = rx_wqe(frx);
 440	}
 441	if (srx->state == SIW_GET_DATA_START) {
 442		rv = siw_send_check_ntoh(srx, frx);
 443		if (unlikely(rv)) {
 444			siw_qp_event(qp, IB_EVENT_QP_FATAL);
 445			return rv;
 446		}
 447		if (!srx->fpdu_part_rem) /* zero length SEND */
 448			return 0;
 449	}
 450	data_bytes = min(srx->fpdu_part_rem, srx->skb_new);
 451	rcvd_bytes = 0;
 452
 453	/* A zero length SEND will skip below loop */
 454	while (data_bytes) {
 455		struct ib_pd *pd;
 456		struct siw_mem **mem, *mem_p;
 457		struct siw_sge *sge;
 458		u32 sge_bytes; /* data bytes avail for SGE */
 459
 460		sge = &wqe->rqe.sge[frx->sge_idx];
 461
 462		if (!sge->length) {
 463			/* just skip empty sge's */
 464			frx->sge_idx++;
 465			frx->sge_off = 0;
 466			frx->pbl_idx = 0;
 467			continue;
 468		}
 469		sge_bytes = min(data_bytes, sge->length - frx->sge_off);
 470		mem = &wqe->mem[frx->sge_idx];
 471
 472		/*
 473		 * check with QP's PD if no SRQ present, SRQ's PD otherwise
 474		 */
 475		pd = qp->srq == NULL ? qp->pd : qp->srq->base_srq.pd;
 476
 477		rv = siw_check_sge(pd, sge, mem, IB_ACCESS_LOCAL_WRITE,
 478				   frx->sge_off, sge_bytes);
 479		if (unlikely(rv)) {
 480			siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 481					   DDP_ETYPE_CATASTROPHIC,
 482					   DDP_ECODE_CATASTROPHIC, 0);
 483
 484			siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
 485			break;
 486		}
 487		mem_p = *mem;
 488		if (mem_p->mem_obj == NULL)
 489			rv = siw_rx_kva(srx,
 490				(void *)(uintptr_t)(sge->laddr + frx->sge_off),
 491				sge_bytes);
 492		else if (!mem_p->is_pbl)
 493			rv = siw_rx_umem(srx, mem_p->umem,
 494					 sge->laddr + frx->sge_off, sge_bytes);
 495		else
 496			rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
 497					sge->laddr + frx->sge_off, sge_bytes);
 498
 499		if (unlikely(rv != sge_bytes)) {
 500			wqe->processed += rcvd_bytes;
 501
 502			siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 503					   DDP_ETYPE_CATASTROPHIC,
 504					   DDP_ECODE_CATASTROPHIC, 0);
 505			return -EINVAL;
 506		}
 507		frx->sge_off += rv;
 508
 509		if (frx->sge_off == sge->length) {
 510			frx->sge_idx++;
 511			frx->sge_off = 0;
 512			frx->pbl_idx = 0;
 513		}
 514		data_bytes -= rv;
 515		rcvd_bytes += rv;
 516
 517		srx->fpdu_part_rem -= rv;
 518		srx->fpdu_part_rcvd += rv;
 519	}
 520	wqe->processed += rcvd_bytes;
 521
 522	if (!srx->fpdu_part_rem)
 523		return 0;
 524
 525	return (rv < 0) ? rv : -EAGAIN;
 526}
 527
 528/*
 529 * siw_proc_write:
 530 *
 531 * Place incoming WRITE after referencing and checking target buffer
 532
 533 * Function supports partially received WRITEs (suspending/resuming
 534 * current receive processing)
 535 *
 536 * return value:
 537 *	0:       reached the end of a DDP segment
 538 *	-EAGAIN: to be called again to finish the DDP segment
 539 */
 540int siw_proc_write(struct siw_qp *qp)
 541{
 542	struct siw_rx_stream *srx = &qp->rx_stream;
 543	struct siw_rx_fpdu *frx = &qp->rx_tagged;
 544	struct siw_mem *mem;
 545	int bytes, rv;
 546
 547	if (srx->state == SIW_GET_DATA_START) {
 548		if (!srx->fpdu_part_rem) /* zero length WRITE */
 549			return 0;
 550
 551		rv = siw_write_check_ntoh(srx, frx);
 552		if (unlikely(rv)) {
 553			siw_qp_event(qp, IB_EVENT_QP_FATAL);
 554			return rv;
 555		}
 556	}
 557	bytes = min(srx->fpdu_part_rem, srx->skb_new);
 558
 559	if (frx->first_ddp_seg) {
 560		struct siw_wqe *wqe = rx_wqe(frx);
 561
 562		rx_mem(frx) = siw_mem_id2obj(qp->sdev, srx->ddp_stag >> 8);
 563		if (unlikely(!rx_mem(frx))) {
 564			siw_dbg_qp(qp,
 565				   "sink stag not found/invalid, stag 0x%08x\n",
 566				   srx->ddp_stag);
 567
 568			siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 569					   DDP_ETYPE_TAGGED_BUF,
 570					   DDP_ECODE_T_INVALID_STAG, 0);
 571			return -EINVAL;
 572		}
 573		wqe->rqe.num_sge = 1;
 574		rx_type(wqe) = SIW_OP_WRITE;
 575		wqe->wr_status = SIW_WR_INPROGRESS;
 576	}
 577	mem = rx_mem(frx);
 578
 579	/*
 580	 * Check if application re-registered memory with different
 581	 * key field of STag.
 582	 */
 583	if (unlikely(mem->stag != srx->ddp_stag)) {
 584		siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 585				   DDP_ETYPE_TAGGED_BUF,
 586				   DDP_ECODE_T_INVALID_STAG, 0);
 587		return -EINVAL;
 588	}
 589	rv = siw_check_mem(qp->pd, mem, srx->ddp_to + srx->fpdu_part_rcvd,
 590			   IB_ACCESS_REMOTE_WRITE, bytes);
 591	if (unlikely(rv)) {
 592		siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 593				   DDP_ETYPE_TAGGED_BUF, siw_tagged_error(-rv),
 594				   0);
 595
 596		siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
 597
 598		return -EINVAL;
 599	}
 600
 601	if (mem->mem_obj == NULL)
 602		rv = siw_rx_kva(srx,
 603			(void *)(uintptr_t)(srx->ddp_to + srx->fpdu_part_rcvd),
 604			bytes);
 605	else if (!mem->is_pbl)
 606		rv = siw_rx_umem(srx, mem->umem,
 607				 srx->ddp_to + srx->fpdu_part_rcvd, bytes);
 608	else
 609		rv = siw_rx_pbl(srx, &frx->pbl_idx, mem,
 610				srx->ddp_to + srx->fpdu_part_rcvd, bytes);
 611
 612	if (unlikely(rv != bytes)) {
 613		siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 614				   DDP_ETYPE_CATASTROPHIC,
 615				   DDP_ECODE_CATASTROPHIC, 0);
 616		return -EINVAL;
 617	}
 618	srx->fpdu_part_rem -= rv;
 619	srx->fpdu_part_rcvd += rv;
 620
 621	if (!srx->fpdu_part_rem) {
 622		srx->ddp_to += srx->fpdu_part_rcvd;
 623		return 0;
 624	}
 625	return -EAGAIN;
 626}
 627
 628/*
 629 * Inbound RREQ's cannot carry user data.
 630 */
 631int siw_proc_rreq(struct siw_qp *qp)
 632{
 633	struct siw_rx_stream *srx = &qp->rx_stream;
 634
 635	if (!srx->fpdu_part_rem)
 636		return 0;
 637
 638	pr_warn("siw: [QP %u]: rreq with mpa len %d\n", qp_id(qp),
 639		be16_to_cpu(srx->hdr.ctrl.mpa_len));
 640
 641	return -EPROTO;
 642}
 643
 644/*
 645 * siw_init_rresp:
 646 *
 647 * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE.
 648 * Put it at the tail of the IRQ, if there is another WQE currently in
 649 * transmit processing. If not, make it the current WQE to be processed
 650 * and schedule transmit processing.
 651 *
 652 * Can be called from softirq context and from process
 653 * context (RREAD socket loopback case!)
 654 *
 655 * return value:
 656 *	0:      success,
 657 *		failure code otherwise
 658 */
 659
 660static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
 661{
 662	struct siw_wqe *tx_work = tx_wqe(qp);
 663	struct siw_sqe *resp;
 664
 665	uint64_t raddr = be64_to_cpu(srx->hdr.rreq.sink_to),
 666		 laddr = be64_to_cpu(srx->hdr.rreq.source_to);
 667	uint32_t length = be32_to_cpu(srx->hdr.rreq.read_size),
 668		 lkey = be32_to_cpu(srx->hdr.rreq.source_stag),
 669		 rkey = be32_to_cpu(srx->hdr.rreq.sink_stag),
 670		 msn = be32_to_cpu(srx->hdr.rreq.ddp_msn);
 671
 672	int run_sq = 1, rv = 0;
 673	unsigned long flags;
 674
 675	if (unlikely(msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ])) {
 676		siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 677				   DDP_ETYPE_UNTAGGED_BUF,
 678				   DDP_ECODE_UT_INVALID_MSN_RANGE, 0);
 679		return -EPROTO;
 680	}
 681	spin_lock_irqsave(&qp->sq_lock, flags);
 682
 683	if (unlikely(!qp->attrs.irq_size)) {
 684		run_sq = 0;
 685		goto error_irq;
 686	}
 687	if (tx_work->wr_status == SIW_WR_IDLE) {
 688		/*
 689		 * immediately schedule READ response w/o
 690		 * consuming IRQ entry: IRQ must be empty.
 691		 */
 692		tx_work->processed = 0;
 693		tx_work->mem[0] = NULL;
 694		tx_work->wr_status = SIW_WR_QUEUED;
 695		resp = &tx_work->sqe;
 696	} else {
 697		resp = irq_alloc_free(qp);
 698		run_sq = 0;
 699	}
 700	if (likely(resp)) {
 701		resp->opcode = SIW_OP_READ_RESPONSE;
 702
 703		resp->sge[0].length = length;
 704		resp->sge[0].laddr = laddr;
 705		resp->sge[0].lkey = lkey;
 706
 707		/* Keep aside message sequence number for potential
 708		 * error reporting during Read Response generation.
 709		 */
 710		resp->sge[1].length = msn;
 711
 712		resp->raddr = raddr;
 713		resp->rkey = rkey;
 714		resp->num_sge = length ? 1 : 0;
 715
 716		/* RRESP now valid as current TX wqe or placed into IRQ */
 717		smp_store_mb(resp->flags, SIW_WQE_VALID);
 718	} else {
 719error_irq:
 720		pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n",
 721			qp_id(qp), qp->attrs.irq_size);
 722
 723		siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
 724				   RDMAP_ETYPE_REMOTE_OPERATION,
 725				   RDMAP_ECODE_CATASTROPHIC_STREAM, 0);
 726		rv = -EPROTO;
 727	}
 728
 729	spin_unlock_irqrestore(&qp->sq_lock, flags);
 730
 731	if (run_sq)
 732		rv = siw_sq_start(qp);
 733
 734	return rv;
 735}
 736
 737/*
 738 * Only called at start of Read.Resonse processing.
 739 * Transfer pending Read from tip of ORQ into currrent rx wqe,
 740 * but keep ORQ entry valid until Read.Response processing done.
 741 * No Queue locking needed.
 742 */
 743static int siw_orqe_start_rx(struct siw_qp *qp)
 744{
 745	struct siw_sqe *orqe;
 746	struct siw_wqe *wqe = NULL;
 747
 748	if (unlikely(!qp->attrs.orq_size))
 749		return -EPROTO;
 750
 751	/* make sure ORQ indices are current */
 752	smp_mb();
 753
 754	orqe = orq_get_current(qp);
 755	if (READ_ONCE(orqe->flags) & SIW_WQE_VALID) {
 756		/* RRESP is a TAGGED RDMAP operation */
 757		wqe = rx_wqe(&qp->rx_tagged);
 758		wqe->sqe.id = orqe->id;
 759		wqe->sqe.opcode = orqe->opcode;
 760		wqe->sqe.sge[0].laddr = orqe->sge[0].laddr;
 761		wqe->sqe.sge[0].lkey = orqe->sge[0].lkey;
 762		wqe->sqe.sge[0].length = orqe->sge[0].length;
 763		wqe->sqe.flags = orqe->flags;
 764		wqe->sqe.num_sge = 1;
 765		wqe->bytes = orqe->sge[0].length;
 766		wqe->processed = 0;
 767		wqe->mem[0] = NULL;
 768		/* make sure WQE is completely written before valid */
 769		smp_wmb();
 770		wqe->wr_status = SIW_WR_INPROGRESS;
 771
 772		return 0;
 773	}
 774	return -EPROTO;
 775}
 776
 777/*
 778 * siw_proc_rresp:
 779 *
 780 * Place incoming RRESP data into memory referenced by RREQ WQE
 781 * which is at the tip of the ORQ
 782 *
 783 * Function supports partially received RRESP's (suspending/resuming
 784 * current receive processing)
 785 */
 786int siw_proc_rresp(struct siw_qp *qp)
 787{
 788	struct siw_rx_stream *srx = &qp->rx_stream;
 789	struct siw_rx_fpdu *frx = &qp->rx_tagged;
 790	struct siw_wqe *wqe = rx_wqe(frx);
 791	struct siw_mem **mem, *mem_p;
 792	struct siw_sge *sge;
 793	int bytes, rv;
 794
 795	if (frx->first_ddp_seg) {
 796		if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
 797			pr_warn("siw: [QP %u]: proc RRESP: status %d, op %d\n",
 798				qp_id(qp), wqe->wr_status, wqe->sqe.opcode);
 799			rv = -EPROTO;
 800			goto error_term;
 801		}
 802		/*
 803		 * fetch pending RREQ from orq
 804		 */
 805		rv = siw_orqe_start_rx(qp);
 806		if (rv) {
 807			pr_warn("siw: [QP %u]: ORQ empty, size %d\n",
 808				qp_id(qp), qp->attrs.orq_size);
 809			goto error_term;
 810		}
 811		rv = siw_rresp_check_ntoh(srx, frx);
 812		if (unlikely(rv)) {
 813			siw_qp_event(qp, IB_EVENT_QP_FATAL);
 814			return rv;
 815		}
 816	} else {
 817		if (unlikely(wqe->wr_status != SIW_WR_INPROGRESS)) {
 818			pr_warn("siw: [QP %u]: resume RRESP: status %d\n",
 819				qp_id(qp), wqe->wr_status);
 820			rv = -EPROTO;
 821			goto error_term;
 822		}
 823	}
 824	if (!srx->fpdu_part_rem) /* zero length RRESPONSE */
 825		return 0;
 826
 827	sge = wqe->sqe.sge; /* there is only one */
 828	mem = &wqe->mem[0];
 829
 830	if (!(*mem)) {
 831		/*
 832		 * check target memory which resolves memory on first fragment
 833		 */
 834		rv = siw_check_sge(qp->pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 0,
 835				   wqe->bytes);
 836		if (unlikely(rv)) {
 837			siw_dbg_qp(qp, "target mem check: %d\n", rv);
 838			wqe->wc_status = SIW_WC_LOC_PROT_ERR;
 839
 840			siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
 841					   DDP_ETYPE_TAGGED_BUF,
 842					   siw_tagged_error(-rv), 0);
 843
 844			siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
 845
 846			return -EINVAL;
 847		}
 848	}
 849	mem_p = *mem;
 850
 851	bytes = min(srx->fpdu_part_rem, srx->skb_new);
 852
 853	if (mem_p->mem_obj == NULL)
 854		rv = siw_rx_kva(srx,
 855			(void *)(uintptr_t)(sge->laddr + wqe->processed),
 856			bytes);
 857	else if (!mem_p->is_pbl)
 858		rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed,
 859				 bytes);
 860	else
 861		rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
 862				sge->laddr + wqe->processed, bytes);
 863	if (rv != bytes) {
 864		wqe->wc_status = SIW_WC_GENERAL_ERR;
 865		rv = -EINVAL;
 866		goto error_term;
 867	}
 868	srx->fpdu_part_rem -= rv;
 869	srx->fpdu_part_rcvd += rv;
 870	wqe->processed += rv;
 871
 872	if (!srx->fpdu_part_rem) {
 873		srx->ddp_to += srx->fpdu_part_rcvd;
 874		return 0;
 875	}
 876	return -EAGAIN;
 877
 878error_term:
 879	siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC,
 880			   DDP_ECODE_CATASTROPHIC, 0);
 881	return rv;
 882}
 883
 884int siw_proc_terminate(struct siw_qp *qp)
 885{
 886	struct siw_rx_stream *srx = &qp->rx_stream;
 887	struct sk_buff *skb = srx->skb;
 888	struct iwarp_terminate *term = &srx->hdr.terminate;
 889	union iwarp_hdr term_info;
 890	u8 *infop = (u8 *)&term_info;
 891	enum rdma_opcode op;
 892	u16 to_copy = sizeof(struct iwarp_ctrl);
 893
 894	pr_warn("siw: got TERMINATE. layer %d, type %d, code %d\n",
 895		__rdmap_term_layer(term), __rdmap_term_etype(term),
 896		__rdmap_term_ecode(term));
 897
 898	if (be32_to_cpu(term->ddp_qn) != RDMAP_UNTAGGED_QN_TERMINATE ||
 899	    be32_to_cpu(term->ddp_msn) !=
 900		    qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] ||
 901	    be32_to_cpu(term->ddp_mo) != 0) {
 902		pr_warn("siw: rx bogus TERM [QN x%08x, MSN x%08x, MO x%08x]\n",
 903			be32_to_cpu(term->ddp_qn), be32_to_cpu(term->ddp_msn),
 904			be32_to_cpu(term->ddp_mo));
 905		return -ECONNRESET;
 906	}
 907	/*
 908	 * Receive remaining pieces of TERM if indicated
 909	 */
 910	if (!term->flag_m)
 911		return -ECONNRESET;
 912
 913	/* Do not take the effort to reassemble a network fragmented
 914	 * TERM message
 915	 */
 916	if (srx->skb_new < sizeof(struct iwarp_ctrl_tagged))
 917		return -ECONNRESET;
 918
 919	memset(infop, 0, sizeof(term_info));
 920
 921	skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
 922
 923	op = __rdmap_get_opcode(&term_info.ctrl);
 924	if (op >= RDMAP_TERMINATE)
 925		goto out;
 926
 927	infop += to_copy;
 928	srx->skb_offset += to_copy;
 929	srx->skb_new -= to_copy;
 930	srx->skb_copied += to_copy;
 931	srx->fpdu_part_rcvd += to_copy;
 932	srx->fpdu_part_rem -= to_copy;
 933
 934	to_copy = iwarp_pktinfo[op].hdr_len - to_copy;
 935
 936	/* Again, no network fragmented TERM's */
 937	if (to_copy + MPA_CRC_SIZE > srx->skb_new)
 938		return -ECONNRESET;
 939
 940	skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
 941
 942	if (term->flag_r) {
 943		siw_dbg_qp(qp, "TERM reports RDMAP hdr type %u, len %u (%s)\n",
 944			   op, be16_to_cpu(term_info.ctrl.mpa_len),
 945			   term->flag_m ? "valid" : "invalid");
 946	} else if (term->flag_d) {
 947		siw_dbg_qp(qp, "TERM reports DDP hdr type %u, len %u (%s)\n",
 948			   op, be16_to_cpu(term_info.ctrl.mpa_len),
 949			   term->flag_m ? "valid" : "invalid");
 950	}
 951out:
 952	srx->skb_new -= to_copy;
 953	srx->skb_offset += to_copy;
 954	srx->skb_copied += to_copy;
 955	srx->fpdu_part_rcvd += to_copy;
 956	srx->fpdu_part_rem -= to_copy;
 957
 958	return -ECONNRESET;
 959}
 960
 961static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
 962{
 963	struct sk_buff *skb = srx->skb;
 964	int avail = min(srx->skb_new, srx->fpdu_part_rem);
 965	u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad;
 966	__wsum crc_in, crc_own = 0;
 967
 968	siw_dbg_qp(qp, "expected %d, available %d, pad %u\n",
 969		   srx->fpdu_part_rem, srx->skb_new, srx->pad);
 970
 971	skb_copy_bits(skb, srx->skb_offset, tbuf, avail);
 972
 973	srx->skb_new -= avail;
 974	srx->skb_offset += avail;
 975	srx->skb_copied += avail;
 976	srx->fpdu_part_rem -= avail;
 977
 978	if (srx->fpdu_part_rem)
 979		return -EAGAIN;
 980
 981	if (!srx->mpa_crc_hd)
 982		return 0;
 983
 984	if (srx->pad)
 985		crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
 986	/*
 987	 * CRC32 is computed, transmitted and received directly in NBO,
 988	 * so there's never a reason to convert byte order.
 989	 */
 990	crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own);
 991	crc_in = (__force __wsum)srx->trailer.crc;
 992
 993	if (unlikely(crc_in != crc_own)) {
 994		pr_warn("siw: crc error. in: %08x, own %08x, op %u\n",
 995			crc_in, crc_own, qp->rx_stream.rdmap_op);
 996
 997		siw_init_terminate(qp, TERM_ERROR_LAYER_LLP,
 998				   LLP_ETYPE_MPA,
 999				   LLP_ECODE_RECEIVED_CRC, 0);
1000		return -EINVAL;
1001	}
1002	return 0;
1003}
1004
1005#define MIN_DDP_HDR sizeof(struct iwarp_ctrl_tagged)
1006
1007static int siw_get_hdr(struct siw_rx_stream *srx)
1008{
1009	struct sk_buff *skb = srx->skb;
1010	struct siw_qp *qp = rx_qp(srx);
1011	struct iwarp_ctrl *c_hdr = &srx->hdr.ctrl;
1012	struct siw_rx_fpdu *frx;
1013	u8 opcode;
1014	int bytes;
1015
1016	if (srx->fpdu_part_rcvd < MIN_DDP_HDR) {
1017		/*
1018		 * copy a mimimum sized (tagged) DDP frame control part
1019		 */
1020		bytes = min_t(int, srx->skb_new,
1021			      MIN_DDP_HDR - srx->fpdu_part_rcvd);
1022
1023		skb_copy_bits(skb, srx->skb_offset,
1024			      (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1025
1026		srx->fpdu_part_rcvd += bytes;
1027
1028		srx->skb_new -= bytes;
1029		srx->skb_offset += bytes;
1030		srx->skb_copied += bytes;
1031
1032		if (srx->fpdu_part_rcvd < MIN_DDP_HDR)
1033			return -EAGAIN;
1034
1035		if (unlikely(__ddp_get_version(c_hdr) != DDP_VERSION)) {
1036			enum ddp_etype etype;
1037			enum ddp_ecode ecode;
1038
1039			pr_warn("siw: received ddp version unsupported %d\n",
1040				__ddp_get_version(c_hdr));
1041
1042			if (c_hdr->ddp_rdmap_ctrl & DDP_FLAG_TAGGED) {
1043				etype = DDP_ETYPE_TAGGED_BUF;
1044				ecode = DDP_ECODE_T_VERSION;
1045			} else {
1046				etype = DDP_ETYPE_UNTAGGED_BUF;
1047				ecode = DDP_ECODE_UT_VERSION;
1048			}
1049			siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
1050					   etype, ecode, 0);
1051			return -EINVAL;
1052		}
1053		if (unlikely(__rdmap_get_version(c_hdr) != RDMAP_VERSION)) {
1054			pr_warn("siw: received rdmap version unsupported %d\n",
1055				__rdmap_get_version(c_hdr));
1056
1057			siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1058					   RDMAP_ETYPE_REMOTE_OPERATION,
1059					   RDMAP_ECODE_VERSION, 0);
1060			return -EINVAL;
1061		}
1062		opcode = __rdmap_get_opcode(c_hdr);
1063
1064		if (opcode > RDMAP_TERMINATE) {
1065			pr_warn("siw: received unknown packet type %u\n",
1066				opcode);
1067
1068			siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1069					   RDMAP_ETYPE_REMOTE_OPERATION,
1070					   RDMAP_ECODE_OPCODE, 0);
1071			return -EINVAL;
1072		}
1073		siw_dbg_qp(rx_qp(srx), "new header, opcode %u\n", opcode);
1074	} else {
1075		opcode = __rdmap_get_opcode(c_hdr);
1076	}
1077	set_rx_fpdu_context(qp, opcode);
1078	frx = qp->rx_fpdu;
1079
1080	/*
1081	 * Figure out len of current hdr: variable length of
1082	 * iwarp hdr may force us to copy hdr information in
1083	 * two steps. Only tagged DDP messages are already
1084	 * completely received.
1085	 */
1086	if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) {
1087		int hdrlen = iwarp_pktinfo[opcode].hdr_len;
1088
1089		bytes = min_t(int, hdrlen - MIN_DDP_HDR, srx->skb_new);
1090
1091		skb_copy_bits(skb, srx->skb_offset,
1092			      (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1093
1094		srx->fpdu_part_rcvd += bytes;
1095
1096		srx->skb_new -= bytes;
1097		srx->skb_offset += bytes;
1098		srx->skb_copied += bytes;
1099
1100		if (srx->fpdu_part_rcvd < hdrlen)
1101			return -EAGAIN;
1102	}
1103
1104	/*
1105	 * DDP/RDMAP header receive completed. Check if the current
1106	 * DDP segment starts a new RDMAP message or continues a previously
1107	 * started RDMAP message.
1108	 *
1109	 * Alternating reception of DDP segments (or FPDUs) from incomplete
1110	 * tagged and untagged RDMAP messages is supported, as long as
1111	 * the current tagged or untagged message gets eventually completed
1112	 * w/o intersection from another message of the same type
1113	 * (tagged/untagged). E.g., a WRITE can get intersected by a SEND,
1114	 * but not by a READ RESPONSE etc.
1115	 */
1116	if (srx->mpa_crc_hd) {
1117		/*
1118		 * Restart CRC computation
1119		 */
1120		crypto_shash_init(srx->mpa_crc_hd);
1121		crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr,
1122				    srx->fpdu_part_rcvd);
1123	}
1124	if (frx->more_ddp_segs) {
1125		frx->first_ddp_seg = 0;
1126		if (frx->prev_rdmap_op != opcode) {
1127			pr_warn("siw: packet intersection: %u : %u\n",
1128				frx->prev_rdmap_op, opcode);
1129			/*
1130			 * The last inbound RDMA operation of same type
1131			 * (tagged or untagged) is left unfinished.
1132			 * To complete it in error, make it the current
1133			 * operation again, even with the header already
1134			 * overwritten. For error handling, only the opcode
1135			 * and current rx context are relevant.
1136			 */
1137			set_rx_fpdu_context(qp, frx->prev_rdmap_op);
1138			__rdmap_set_opcode(c_hdr, frx->prev_rdmap_op);
1139			return -EPROTO;
1140		}
1141	} else {
1142		frx->prev_rdmap_op = opcode;
1143		frx->first_ddp_seg = 1;
1144	}
1145	frx->more_ddp_segs = c_hdr->ddp_rdmap_ctrl & DDP_FLAG_LAST ? 0 : 1;
1146
1147	return 0;
1148}
1149
1150static int siw_check_tx_fence(struct siw_qp *qp)
1151{
1152	struct siw_wqe *tx_waiting = tx_wqe(qp);
1153	struct siw_sqe *rreq;
1154	int resume_tx = 0, rv = 0;
1155	unsigned long flags;
1156
1157	spin_lock_irqsave(&qp->orq_lock, flags);
1158
1159	/* free current orq entry */
1160	rreq = orq_get_current(qp);
1161	WRITE_ONCE(rreq->flags, 0);
1162
1163	qp->orq_get++;
1164
1165	if (qp->tx_ctx.orq_fence) {
1166		if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
1167			pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
1168				qp_id(qp), tx_waiting->wr_status);
1169			rv = -EPROTO;
1170			goto out;
1171		}
1172		/* resume SQ processing, if possible */
1173		if (tx_waiting->sqe.opcode == SIW_OP_READ ||
1174		    tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
1175
1176			/* SQ processing was stopped because of a full ORQ */
1177			rreq = orq_get_free(qp);
1178			if (unlikely(!rreq)) {
1179				pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
1180				rv = -EPROTO;
1181				goto out;
1182			}
1183			siw_read_to_orq(rreq, &tx_waiting->sqe);
1184
1185			qp->orq_put++;
1186			qp->tx_ctx.orq_fence = 0;
1187			resume_tx = 1;
1188
1189		} else if (siw_orq_empty(qp)) {
1190			/*
1191			 * SQ processing was stopped by fenced work request.
1192			 * Resume since all previous Read's are now completed.
1193			 */
1194			qp->tx_ctx.orq_fence = 0;
1195			resume_tx = 1;
1196		}
1197	}
1198out:
1199	spin_unlock_irqrestore(&qp->orq_lock, flags);
1200
1201	if (resume_tx)
1202		rv = siw_sq_start(qp);
1203
1204	return rv;
1205}
1206
1207/*
1208 * siw_rdmap_complete()
1209 *
1210 * Complete processing of an RDMA message after receiving all
1211 * DDP segmens or ABort processing after encountering error case.
1212 *
1213 *   o SENDs + RRESPs will need for completion,
1214 *   o RREQs need for  READ RESPONSE initialization
1215 *   o WRITEs need memory dereferencing
1216 *
1217 * TODO: Failed WRITEs need local error to be surfaced.
1218 */
1219static int siw_rdmap_complete(struct siw_qp *qp, int error)
1220{
1221	struct siw_rx_stream *srx = &qp->rx_stream;
1222	struct siw_wqe *wqe = rx_wqe(qp->rx_fpdu);
1223	enum siw_wc_status wc_status = wqe->wc_status;
1224	u8 opcode = __rdmap_get_opcode(&srx->hdr.ctrl);
1225	int rv = 0;
1226
1227	switch (opcode) {
1228	case RDMAP_SEND_SE:
1229	case RDMAP_SEND_SE_INVAL:
1230		wqe->rqe.flags |= SIW_WQE_SOLICITED;
1231		fallthrough;
1232
1233	case RDMAP_SEND:
1234	case RDMAP_SEND_INVAL:
1235		if (wqe->wr_status == SIW_WR_IDLE)
1236			break;
1237
1238		srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++;
1239
1240		if (error != 0 && wc_status == SIW_WC_SUCCESS)
1241			wc_status = SIW_WC_GENERAL_ERR;
1242		/*
1243		 * Handle STag invalidation request
1244		 */
1245		if (wc_status == SIW_WC_SUCCESS &&
1246		    (opcode == RDMAP_SEND_INVAL ||
1247		     opcode == RDMAP_SEND_SE_INVAL)) {
1248			rv = siw_invalidate_stag(qp->pd, srx->inval_stag);
1249			if (rv) {
1250				siw_init_terminate(
1251					qp, TERM_ERROR_LAYER_RDMAP,
1252					rv == -EACCES ?
1253						RDMAP_ETYPE_REMOTE_PROTECTION :
1254						RDMAP_ETYPE_REMOTE_OPERATION,
1255					RDMAP_ECODE_CANNOT_INVALIDATE, 0);
1256
1257				wc_status = SIW_WC_REM_INV_REQ_ERR;
1258			}
1259			rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1260					      rv ? 0 : srx->inval_stag,
1261					      wc_status);
1262		} else {
1263			rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1264					      0, wc_status);
1265		}
1266		siw_wqe_put_mem(wqe, SIW_OP_RECEIVE);
1267		break;
1268
1269	case RDMAP_RDMA_READ_RESP:
1270		if (wqe->wr_status == SIW_WR_IDLE)
1271			break;
1272
1273		if (error != 0) {
1274			if ((srx->state == SIW_GET_HDR &&
1275			     qp->rx_fpdu->first_ddp_seg) || error == -ENODATA)
1276				/* possible RREQ in ORQ left untouched */
1277				break;
1278
1279			if (wc_status == SIW_WC_SUCCESS)
1280				wc_status = SIW_WC_GENERAL_ERR;
1281		} else if (rdma_is_kernel_res(&qp->base_qp.res) &&
1282			   rx_type(wqe) == SIW_OP_READ_LOCAL_INV) {
1283			/*
1284			 * Handle any STag invalidation request
1285			 */
1286			rv = siw_invalidate_stag(qp->pd, wqe->sqe.sge[0].lkey);
1287			if (rv) {
1288				siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
1289						   RDMAP_ETYPE_CATASTROPHIC,
1290						   RDMAP_ECODE_UNSPECIFIED, 0);
1291
1292				if (wc_status == SIW_WC_SUCCESS) {
1293					wc_status = SIW_WC_GENERAL_ERR;
1294					error = rv;
1295				}
1296			}
1297		}
1298		/*
1299		 * All errors turn the wqe into signalled.
1300		 */
1301		if ((wqe->sqe.flags & SIW_WQE_SIGNALLED) || error != 0)
1302			rv = siw_sqe_complete(qp, &wqe->sqe, wqe->processed,
1303					      wc_status);
1304		siw_wqe_put_mem(wqe, SIW_OP_READ);
1305
1306		if (!error) {
1307			rv = siw_check_tx_fence(qp);
1308		} else {
1309			/* Disable current ORQ element */
1310			if (qp->attrs.orq_size)
1311				WRITE_ONCE(orq_get_current(qp)->flags, 0);
1312		}
1313		break;
1314
1315	case RDMAP_RDMA_READ_REQ:
1316		if (!error) {
1317			rv = siw_init_rresp(qp, srx);
1318			srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++;
1319		}
1320		break;
1321
1322	case RDMAP_RDMA_WRITE:
1323		if (wqe->wr_status == SIW_WR_IDLE)
1324			break;
1325
1326		/*
1327		 * Free References from memory object if
1328		 * attached to receive context (inbound WRITE).
1329		 * While a zero-length WRITE is allowed,
1330		 * no memory reference got created.
1331		 */
1332		if (rx_mem(&qp->rx_tagged)) {
1333			siw_mem_put(rx_mem(&qp->rx_tagged));
1334			rx_mem(&qp->rx_tagged) = NULL;
1335		}
1336		break;
1337
1338	default:
1339		break;
1340	}
1341	wqe->wr_status = SIW_WR_IDLE;
1342
1343	return rv;
1344}
1345
1346/*
1347 * siw_tcp_rx_data()
1348 *
1349 * Main routine to consume inbound TCP payload
1350 *
1351 * @rd_desc:	read descriptor
1352 * @skb:	socket buffer
1353 * @off:	offset in skb
1354 * @len:	skb->len - offset : payload in skb
1355 */
1356int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
1357		    unsigned int off, size_t len)
1358{
1359	struct siw_qp *qp = rd_desc->arg.data;
1360	struct siw_rx_stream *srx = &qp->rx_stream;
1361	int rv;
1362
1363	srx->skb = skb;
1364	srx->skb_new = skb->len - off;
1365	srx->skb_offset = off;
1366	srx->skb_copied = 0;
1367
1368	siw_dbg_qp(qp, "new data, len %d\n", srx->skb_new);
1369
1370	while (srx->skb_new) {
1371		int run_completion = 1;
1372
1373		if (unlikely(srx->rx_suspend)) {
1374			/* Do not process any more data */
1375			srx->skb_copied += srx->skb_new;
1376			break;
1377		}
1378		switch (srx->state) {
1379		case SIW_GET_HDR:
1380			rv = siw_get_hdr(srx);
1381			if (!rv) {
1382				srx->fpdu_part_rem =
1383					be16_to_cpu(srx->hdr.ctrl.mpa_len) -
1384					srx->fpdu_part_rcvd + MPA_HDR_SIZE;
1385
1386				if (srx->fpdu_part_rem)
1387					srx->pad = -srx->fpdu_part_rem & 0x3;
1388				else
1389					srx->pad = 0;
1390
1391				srx->state = SIW_GET_DATA_START;
1392				srx->fpdu_part_rcvd = 0;
1393			}
1394			break;
1395
1396		case SIW_GET_DATA_MORE:
1397			/*
1398			 * Another data fragment of the same DDP segment.
1399			 * Setting first_ddp_seg = 0 avoids repeating
1400			 * initializations that shall occur only once per
1401			 * DDP segment.
1402			 */
1403			qp->rx_fpdu->first_ddp_seg = 0;
1404			fallthrough;
1405
1406		case SIW_GET_DATA_START:
1407			/*
1408			 * Headers will be checked by the opcode-specific
1409			 * data receive function below.
1410			 */
1411			rv = iwarp_pktinfo[qp->rx_stream.rdmap_op].rx_data(qp);
1412			if (!rv) {
1413				int mpa_len =
1414					be16_to_cpu(srx->hdr.ctrl.mpa_len)
1415					+ MPA_HDR_SIZE;
1416
1417				srx->fpdu_part_rem = (-mpa_len & 0x3)
1418						      + MPA_CRC_SIZE;
1419				srx->fpdu_part_rcvd = 0;
1420				srx->state = SIW_GET_TRAILER;
1421			} else {
1422				if (unlikely(rv == -ECONNRESET))
1423					run_completion = 0;
1424				else
1425					srx->state = SIW_GET_DATA_MORE;
1426			}
1427			break;
1428
1429		case SIW_GET_TRAILER:
1430			/*
1431			 * read CRC + any padding
1432			 */
1433			rv = siw_get_trailer(qp, srx);
1434			if (likely(!rv)) {
1435				/*
1436				 * FPDU completed.
1437				 * complete RDMAP message if last fragment
1438				 */
1439				srx->state = SIW_GET_HDR;
1440				srx->fpdu_part_rcvd = 0;
1441
1442				if (!(srx->hdr.ctrl.ddp_rdmap_ctrl &
1443				      DDP_FLAG_LAST))
1444					/* more frags */
1445					break;
1446
1447				rv = siw_rdmap_complete(qp, 0);
1448				run_completion = 0;
1449			}
1450			break;
1451
1452		default:
1453			pr_warn("QP[%u]: RX out of state\n", qp_id(qp));
1454			rv = -EPROTO;
1455			run_completion = 0;
1456		}
1457		if (unlikely(rv != 0 && rv != -EAGAIN)) {
1458			if ((srx->state > SIW_GET_HDR ||
1459			     qp->rx_fpdu->more_ddp_segs) && run_completion)
1460				siw_rdmap_complete(qp, rv);
1461
1462			siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv,
1463				   srx->state);
1464
1465			siw_qp_cm_drop(qp, 1);
1466
1467			break;
1468		}
1469		if (rv) {
1470			siw_dbg_qp(qp, "fpdu fragment, state %d, missing %d\n",
1471				   srx->state, srx->fpdu_part_rem);
1472			break;
1473		}
1474	}
1475	return srx->skb_copied;
1476}