Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/* QLogic qedr NIC Driver
   2 * Copyright (c) 2015-2016  QLogic Corporation
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and /or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/dma-mapping.h>
  33#include <linux/crc32.h>
  34#include <net/ip.h>
  35#include <net/ipv6.h>
  36#include <net/udp.h>
  37#include <linux/iommu.h>
  38
  39#include <rdma/ib_verbs.h>
  40#include <rdma/ib_user_verbs.h>
  41#include <rdma/iw_cm.h>
  42#include <rdma/ib_umem.h>
  43#include <rdma/ib_addr.h>
  44#include <rdma/ib_cache.h>
  45#include <rdma/uverbs_ioctl.h>
  46
  47#include <linux/qed/common_hsi.h>
  48#include "qedr_hsi_rdma.h"
  49#include <linux/qed/qed_if.h>
  50#include "qedr.h"
  51#include "verbs.h"
  52#include <rdma/qedr-abi.h>
  53#include "qedr_roce_cm.h"
  54#include "qedr_iw_cm.h"
  55
  56#define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
  57#define	RDMA_MAX_SGE_PER_SRQ	(4)
  58#define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
  59
  60#define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
  61
  62enum {
  63	QEDR_USER_MMAP_IO_WC = 0,
  64	QEDR_USER_MMAP_PHYS_PAGE,
  65};
  66
  67static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
  68					size_t len)
  69{
  70	size_t min_len = min_t(size_t, len, udata->outlen);
  71
  72	return ib_copy_to_udata(udata, src, min_len);
  73}
  74
  75int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
  76{
  77	if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
  78		return -EINVAL;
  79
  80	*pkey = QEDR_ROCE_PKEY_DEFAULT;
  81	return 0;
  82}
  83
  84int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
  85		      int index, union ib_gid *sgid)
  86{
  87	struct qedr_dev *dev = get_qedr_dev(ibdev);
  88
  89	memset(sgid->raw, 0, sizeof(sgid->raw));
  90	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
  91
  92	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
  93		 sgid->global.interface_id, sgid->global.subnet_prefix);
  94
  95	return 0;
  96}
  97
  98int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
  99{
 100	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
 101	struct qedr_device_attr *qattr = &dev->attr;
 102	struct qedr_srq *srq = get_qedr_srq(ibsrq);
 103
 104	srq_attr->srq_limit = srq->srq_limit;
 105	srq_attr->max_wr = qattr->max_srq_wr;
 106	srq_attr->max_sge = qattr->max_sge;
 107
 108	return 0;
 109}
 110
 111int qedr_query_device(struct ib_device *ibdev,
 112		      struct ib_device_attr *attr, struct ib_udata *udata)
 113{
 114	struct qedr_dev *dev = get_qedr_dev(ibdev);
 115	struct qedr_device_attr *qattr = &dev->attr;
 116
 117	if (!dev->rdma_ctx) {
 118		DP_ERR(dev,
 119		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
 120		       dev->rdma_ctx);
 121		return -EINVAL;
 122	}
 123
 124	memset(attr, 0, sizeof(*attr));
 125
 126	attr->fw_ver = qattr->fw_ver;
 127	attr->sys_image_guid = qattr->sys_image_guid;
 128	attr->max_mr_size = qattr->max_mr_size;
 129	attr->page_size_cap = qattr->page_size_caps;
 130	attr->vendor_id = qattr->vendor_id;
 131	attr->vendor_part_id = qattr->vendor_part_id;
 132	attr->hw_ver = qattr->hw_ver;
 133	attr->max_qp = qattr->max_qp;
 134	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
 135	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
 136	    IB_DEVICE_RC_RNR_NAK_GEN |
 137	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
 138
 139	attr->max_send_sge = qattr->max_sge;
 140	attr->max_recv_sge = qattr->max_sge;
 141	attr->max_sge_rd = qattr->max_sge;
 142	attr->max_cq = qattr->max_cq;
 143	attr->max_cqe = qattr->max_cqe;
 144	attr->max_mr = qattr->max_mr;
 145	attr->max_mw = qattr->max_mw;
 146	attr->max_pd = qattr->max_pd;
 147	attr->atomic_cap = dev->atomic_cap;
 148	attr->max_qp_init_rd_atom =
 149	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
 150	attr->max_qp_rd_atom =
 151	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
 152		attr->max_qp_init_rd_atom);
 153
 154	attr->max_srq = qattr->max_srq;
 155	attr->max_srq_sge = qattr->max_srq_sge;
 156	attr->max_srq_wr = qattr->max_srq_wr;
 157
 158	attr->local_ca_ack_delay = qattr->dev_ack_delay;
 159	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
 160	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
 161	attr->max_ah = qattr->max_ah;
 162
 163	return 0;
 164}
 165
 166static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
 167					    u8 *ib_width)
 168{
 169	switch (speed) {
 170	case 1000:
 171		*ib_speed = IB_SPEED_SDR;
 172		*ib_width = IB_WIDTH_1X;
 173		break;
 174	case 10000:
 175		*ib_speed = IB_SPEED_QDR;
 176		*ib_width = IB_WIDTH_1X;
 177		break;
 178
 179	case 20000:
 180		*ib_speed = IB_SPEED_DDR;
 181		*ib_width = IB_WIDTH_4X;
 182		break;
 183
 184	case 25000:
 185		*ib_speed = IB_SPEED_EDR;
 186		*ib_width = IB_WIDTH_1X;
 187		break;
 188
 189	case 40000:
 190		*ib_speed = IB_SPEED_QDR;
 191		*ib_width = IB_WIDTH_4X;
 192		break;
 193
 194	case 50000:
 195		*ib_speed = IB_SPEED_HDR;
 196		*ib_width = IB_WIDTH_1X;
 197		break;
 198
 199	case 100000:
 200		*ib_speed = IB_SPEED_EDR;
 201		*ib_width = IB_WIDTH_4X;
 202		break;
 203
 204	default:
 205		/* Unsupported */
 206		*ib_speed = IB_SPEED_SDR;
 207		*ib_width = IB_WIDTH_1X;
 208	}
 209}
 210
 211int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
 212{
 213	struct qedr_dev *dev;
 214	struct qed_rdma_port *rdma_port;
 215
 216	dev = get_qedr_dev(ibdev);
 217
 218	if (!dev->rdma_ctx) {
 219		DP_ERR(dev, "rdma_ctx is NULL\n");
 220		return -EINVAL;
 221	}
 222
 223	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
 224
 225	/* *attr being zeroed by the caller, avoid zeroing it here */
 226	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
 227		attr->state = IB_PORT_ACTIVE;
 228		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
 229	} else {
 230		attr->state = IB_PORT_DOWN;
 231		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
 232	}
 233	attr->max_mtu = IB_MTU_4096;
 234	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
 235	attr->lid = 0;
 236	attr->lmc = 0;
 237	attr->sm_lid = 0;
 238	attr->sm_sl = 0;
 239	attr->ip_gids = true;
 240	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
 241		attr->gid_tbl_len = 1;
 242	} else {
 243		attr->gid_tbl_len = QEDR_MAX_SGID;
 244		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
 245	}
 246	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
 247	attr->qkey_viol_cntr = 0;
 248	get_link_speed_and_width(rdma_port->link_speed,
 249				 &attr->active_speed, &attr->active_width);
 250	attr->max_msg_sz = rdma_port->max_msg_size;
 251	attr->max_vl_num = 4;
 252
 253	return 0;
 254}
 255
 256int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 257{
 258	struct ib_device *ibdev = uctx->device;
 259	int rc;
 260	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
 261	struct qedr_alloc_ucontext_resp uresp = {};
 262	struct qedr_alloc_ucontext_req ureq = {};
 263	struct qedr_dev *dev = get_qedr_dev(ibdev);
 264	struct qed_rdma_add_user_out_params oparams;
 265	struct qedr_user_mmap_entry *entry;
 266
 267	if (!udata)
 268		return -EFAULT;
 269
 270	if (udata->inlen) {
 271		rc = ib_copy_from_udata(&ureq, udata,
 272					min(sizeof(ureq), udata->inlen));
 273		if (rc) {
 274			DP_ERR(dev, "Problem copying data from user space\n");
 275			return -EFAULT;
 276		}
 277		ctx->edpm_mode = !!(ureq.context_flags &
 278				    QEDR_ALLOC_UCTX_EDPM_MODE);
 279		ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
 280	}
 281
 282	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
 283	if (rc) {
 284		DP_ERR(dev,
 285		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
 286		       rc);
 287		return rc;
 288	}
 289
 290	ctx->dpi = oparams.dpi;
 291	ctx->dpi_addr = oparams.dpi_addr;
 292	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
 293	ctx->dpi_size = oparams.dpi_size;
 294	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 295	if (!entry) {
 296		rc = -ENOMEM;
 297		goto err;
 298	}
 299
 300	entry->io_address = ctx->dpi_phys_addr;
 301	entry->length = ctx->dpi_size;
 302	entry->mmap_flag = QEDR_USER_MMAP_IO_WC;
 303	entry->dpi = ctx->dpi;
 304	entry->dev = dev;
 305	rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry,
 306					 ctx->dpi_size);
 307	if (rc) {
 308		kfree(entry);
 309		goto err;
 310	}
 311	ctx->db_mmap_entry = &entry->rdma_entry;
 312
 313	if (!dev->user_dpm_enabled)
 314		uresp.dpm_flags = 0;
 315	else if (rdma_protocol_iwarp(&dev->ibdev, 1))
 316		uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
 317	else
 318		uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
 319				  QEDR_DPM_TYPE_ROCE_LEGACY |
 320				  QEDR_DPM_TYPE_ROCE_EDPM_MODE;
 321
 322	if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) {
 323		uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
 324		uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
 325		uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
 326		uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE;
 327	}
 328
 329	uresp.wids_enabled = 1;
 330	uresp.wid_count = oparams.wid_count;
 331	uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
 332	uresp.db_size = ctx->dpi_size;
 333	uresp.max_send_wr = dev->attr.max_sqe;
 334	uresp.max_recv_wr = dev->attr.max_rqe;
 335	uresp.max_srq_wr = dev->attr.max_srq_wr;
 336	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
 337	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
 338	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
 339	uresp.max_cqes = QEDR_MAX_CQES;
 340
 341	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 342	if (rc)
 343		goto err;
 344
 345	ctx->dev = dev;
 346
 347	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
 348		 &ctx->ibucontext);
 349	return 0;
 350
 351err:
 352	if (!ctx->db_mmap_entry)
 353		dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi);
 354	else
 355		rdma_user_mmap_entry_remove(ctx->db_mmap_entry);
 356
 357	return rc;
 358}
 359
 360void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
 361{
 362	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
 363
 364	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
 365		 uctx);
 366
 367	rdma_user_mmap_entry_remove(uctx->db_mmap_entry);
 368}
 369
 370void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
 371{
 372	struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry);
 373	struct qedr_dev *dev = entry->dev;
 374
 375	if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
 376		free_page((unsigned long)entry->address);
 377	else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC)
 378		dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi);
 379
 380	kfree(entry);
 381}
 382
 383int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
 384{
 385	struct ib_device *dev = ucontext->device;
 386	size_t length = vma->vm_end - vma->vm_start;
 387	struct rdma_user_mmap_entry *rdma_entry;
 388	struct qedr_user_mmap_entry *entry;
 389	int rc = 0;
 390	u64 pfn;
 391
 392	ibdev_dbg(dev,
 393		  "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
 394		  vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
 395
 396	rdma_entry = rdma_user_mmap_entry_get(ucontext, vma);
 397	if (!rdma_entry) {
 398		ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n",
 399			  vma->vm_pgoff);
 400		return -EINVAL;
 401	}
 402	entry = get_qedr_mmap_entry(rdma_entry);
 403	ibdev_dbg(dev,
 404		  "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
 405		  entry->io_address, length, entry->mmap_flag);
 406
 407	switch (entry->mmap_flag) {
 408	case QEDR_USER_MMAP_IO_WC:
 409		pfn = entry->io_address >> PAGE_SHIFT;
 410		rc = rdma_user_mmap_io(ucontext, vma, pfn, length,
 411				       pgprot_writecombine(vma->vm_page_prot),
 412				       rdma_entry);
 413		break;
 414	case QEDR_USER_MMAP_PHYS_PAGE:
 415		rc = vm_insert_page(vma, vma->vm_start,
 416				    virt_to_page(entry->address));
 417		break;
 418	default:
 419		rc = -EINVAL;
 420	}
 421
 422	if (rc)
 423		ibdev_dbg(dev,
 424			  "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
 425			  entry->io_address, length, entry->mmap_flag, rc);
 426
 427	rdma_user_mmap_entry_put(rdma_entry);
 428	return rc;
 429}
 430
 431int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 432{
 433	struct ib_device *ibdev = ibpd->device;
 434	struct qedr_dev *dev = get_qedr_dev(ibdev);
 435	struct qedr_pd *pd = get_qedr_pd(ibpd);
 436	u16 pd_id;
 437	int rc;
 438
 439	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
 440		 udata ? "User Lib" : "Kernel");
 441
 442	if (!dev->rdma_ctx) {
 443		DP_ERR(dev, "invalid RDMA context\n");
 444		return -EINVAL;
 445	}
 446
 447	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
 448	if (rc)
 449		return rc;
 450
 451	pd->pd_id = pd_id;
 452
 453	if (udata) {
 454		struct qedr_alloc_pd_uresp uresp = {
 455			.pd_id = pd_id,
 456		};
 457		struct qedr_ucontext *context = rdma_udata_to_drv_context(
 458			udata, struct qedr_ucontext, ibucontext);
 459
 460		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 461		if (rc) {
 462			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
 463			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
 464			return rc;
 465		}
 466
 467		pd->uctx = context;
 468		pd->uctx->pd = pd;
 469	}
 470
 471	return 0;
 472}
 473
 474void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 475{
 476	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
 477	struct qedr_pd *pd = get_qedr_pd(ibpd);
 478
 479	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
 480	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
 481}
 482
 483static void qedr_free_pbl(struct qedr_dev *dev,
 484			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
 485{
 486	struct pci_dev *pdev = dev->pdev;
 487	int i;
 488
 489	for (i = 0; i < pbl_info->num_pbls; i++) {
 490		if (!pbl[i].va)
 491			continue;
 492		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
 493				  pbl[i].va, pbl[i].pa);
 494	}
 495
 496	kfree(pbl);
 497}
 498
 499#define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
 500#define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
 501
 502#define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
 503#define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
 504#define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
 505
 506static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
 507					   struct qedr_pbl_info *pbl_info,
 508					   gfp_t flags)
 509{
 510	struct pci_dev *pdev = dev->pdev;
 511	struct qedr_pbl *pbl_table;
 512	dma_addr_t *pbl_main_tbl;
 513	dma_addr_t pa;
 514	void *va;
 515	int i;
 516
 517	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
 518	if (!pbl_table)
 519		return ERR_PTR(-ENOMEM);
 520
 521	for (i = 0; i < pbl_info->num_pbls; i++) {
 522		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
 523					flags);
 524		if (!va)
 525			goto err;
 526
 527		pbl_table[i].va = va;
 528		pbl_table[i].pa = pa;
 529	}
 530
 531	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
 532	 * the first one with physical pointers to all of the rest
 533	 */
 534	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
 535	for (i = 0; i < pbl_info->num_pbls - 1; i++)
 536		pbl_main_tbl[i] = pbl_table[i + 1].pa;
 537
 538	return pbl_table;
 539
 540err:
 541	for (i--; i >= 0; i--)
 542		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
 543				  pbl_table[i].va, pbl_table[i].pa);
 544
 545	qedr_free_pbl(dev, pbl_info, pbl_table);
 546
 547	return ERR_PTR(-ENOMEM);
 548}
 549
 550static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
 551				struct qedr_pbl_info *pbl_info,
 552				u32 num_pbes, int two_layer_capable)
 553{
 554	u32 pbl_capacity;
 555	u32 pbl_size;
 556	u32 num_pbls;
 557
 558	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
 559		if (num_pbes > MAX_PBES_TWO_LAYER) {
 560			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
 561			       num_pbes);
 562			return -EINVAL;
 563		}
 564
 565		/* calculate required pbl page size */
 566		pbl_size = MIN_FW_PBL_PAGE_SIZE;
 567		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
 568			       NUM_PBES_ON_PAGE(pbl_size);
 569
 570		while (pbl_capacity < num_pbes) {
 571			pbl_size *= 2;
 572			pbl_capacity = pbl_size / sizeof(u64);
 573			pbl_capacity = pbl_capacity * pbl_capacity;
 574		}
 575
 576		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
 577		num_pbls++;	/* One for the layer0 ( points to the pbls) */
 578		pbl_info->two_layered = true;
 579	} else {
 580		/* One layered PBL */
 581		num_pbls = 1;
 582		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
 583				 roundup_pow_of_two((num_pbes * sizeof(u64))));
 584		pbl_info->two_layered = false;
 585	}
 586
 587	pbl_info->num_pbls = num_pbls;
 588	pbl_info->pbl_size = pbl_size;
 589	pbl_info->num_pbes = num_pbes;
 590
 591	DP_DEBUG(dev, QEDR_MSG_MR,
 592		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
 593		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
 594
 595	return 0;
 596}
 597
 598static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
 599			       struct qedr_pbl *pbl,
 600			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
 601{
 602	int pbe_cnt, total_num_pbes = 0;
 603	u32 fw_pg_cnt, fw_pg_per_umem_pg;
 604	struct qedr_pbl *pbl_tbl;
 605	struct sg_dma_page_iter sg_iter;
 606	struct regpair *pbe;
 607	u64 pg_addr;
 608
 609	if (!pbl_info->num_pbes)
 610		return;
 611
 612	/* If we have a two layered pbl, the first pbl points to the rest
 613	 * of the pbls and the first entry lays on the second pbl in the table
 614	 */
 615	if (pbl_info->two_layered)
 616		pbl_tbl = &pbl[1];
 617	else
 618		pbl_tbl = pbl;
 619
 620	pbe = (struct regpair *)pbl_tbl->va;
 621	if (!pbe) {
 622		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
 623		return;
 624	}
 625
 626	pbe_cnt = 0;
 627
 628	fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift);
 629
 630	for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
 631		pg_addr = sg_page_iter_dma_address(&sg_iter);
 632		for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
 633			pbe->lo = cpu_to_le32(pg_addr);
 634			pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
 635
 636			pg_addr += BIT(pg_shift);
 637			pbe_cnt++;
 638			total_num_pbes++;
 639			pbe++;
 640
 641			if (total_num_pbes == pbl_info->num_pbes)
 642				return;
 643
 644			/* If the given pbl is full storing the pbes,
 645			 * move to next pbl.
 646			 */
 647			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
 648				pbl_tbl++;
 649				pbe = (struct regpair *)pbl_tbl->va;
 650				pbe_cnt = 0;
 651			}
 652
 653			fw_pg_cnt++;
 654		}
 655	}
 656}
 657
 658static int qedr_db_recovery_add(struct qedr_dev *dev,
 659				void __iomem *db_addr,
 660				void *db_data,
 661				enum qed_db_rec_width db_width,
 662				enum qed_db_rec_space db_space)
 663{
 664	if (!db_data) {
 665		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
 666		return 0;
 667	}
 668
 669	return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
 670						 db_width, db_space);
 671}
 672
 673static void qedr_db_recovery_del(struct qedr_dev *dev,
 674				 void __iomem *db_addr,
 675				 void *db_data)
 676{
 677	if (!db_data) {
 678		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
 679		return;
 680	}
 681
 682	/* Ignore return code as there is not much we can do about it. Error
 683	 * log will be printed inside.
 684	 */
 685	dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
 686}
 687
 688static int qedr_copy_cq_uresp(struct qedr_dev *dev,
 689			      struct qedr_cq *cq, struct ib_udata *udata,
 690			      u32 db_offset)
 691{
 692	struct qedr_create_cq_uresp uresp;
 693	int rc;
 694
 695	memset(&uresp, 0, sizeof(uresp));
 696
 697	uresp.db_offset = db_offset;
 698	uresp.icid = cq->icid;
 699	if (cq->q.db_mmap_entry)
 700		uresp.db_rec_addr =
 701			rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
 702
 703	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 704	if (rc)
 705		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
 706
 707	return rc;
 708}
 709
 710static void consume_cqe(struct qedr_cq *cq)
 711{
 712	if (cq->latest_cqe == cq->toggle_cqe)
 713		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
 714
 715	cq->latest_cqe = qed_chain_consume(&cq->pbl);
 716}
 717
 718static inline int qedr_align_cq_entries(int entries)
 719{
 720	u64 size, aligned_size;
 721
 722	/* We allocate an extra entry that we don't report to the FW. */
 723	size = (entries + 1) * QEDR_CQE_SIZE;
 724	aligned_size = ALIGN(size, PAGE_SIZE);
 725
 726	return aligned_size / QEDR_CQE_SIZE;
 727}
 728
 729static int qedr_init_user_db_rec(struct ib_udata *udata,
 730				 struct qedr_dev *dev, struct qedr_userq *q,
 731				 bool requires_db_rec)
 732{
 733	struct qedr_ucontext *uctx =
 734		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
 735					  ibucontext);
 736	struct qedr_user_mmap_entry *entry;
 737	int rc;
 738
 739	/* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
 740	if (requires_db_rec == 0 || !uctx->db_rec)
 741		return 0;
 742
 743	/* Allocate a page for doorbell recovery, add to mmap */
 744	q->db_rec_data = (void *)get_zeroed_page(GFP_USER);
 745	if (!q->db_rec_data) {
 746		DP_ERR(dev, "get_zeroed_page failed\n");
 747		return -ENOMEM;
 748	}
 749
 750	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 751	if (!entry)
 752		goto err_free_db_data;
 753
 754	entry->address = q->db_rec_data;
 755	entry->length = PAGE_SIZE;
 756	entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE;
 757	rc = rdma_user_mmap_entry_insert(&uctx->ibucontext,
 758					 &entry->rdma_entry,
 759					 PAGE_SIZE);
 760	if (rc)
 761		goto err_free_entry;
 762
 763	q->db_mmap_entry = &entry->rdma_entry;
 764
 765	return 0;
 766
 767err_free_entry:
 768	kfree(entry);
 769
 770err_free_db_data:
 771	free_page((unsigned long)q->db_rec_data);
 772	q->db_rec_data = NULL;
 773	return -ENOMEM;
 774}
 775
 776static inline int qedr_init_user_queue(struct ib_udata *udata,
 777				       struct qedr_dev *dev,
 778				       struct qedr_userq *q, u64 buf_addr,
 779				       size_t buf_len, bool requires_db_rec,
 780				       int access,
 781				       int alloc_and_init)
 782{
 783	u32 fw_pages;
 784	int rc;
 785
 786	q->buf_addr = buf_addr;
 787	q->buf_len = buf_len;
 788	q->umem = ib_umem_get(&dev->ibdev, q->buf_addr, q->buf_len, access);
 789	if (IS_ERR(q->umem)) {
 790		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
 791		       PTR_ERR(q->umem));
 792		return PTR_ERR(q->umem);
 793	}
 794
 795	fw_pages = ib_umem_page_count(q->umem) <<
 796	    (PAGE_SHIFT - FW_PAGE_SHIFT);
 797
 798	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
 799	if (rc)
 800		goto err0;
 801
 802	if (alloc_and_init) {
 803		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
 804		if (IS_ERR(q->pbl_tbl)) {
 805			rc = PTR_ERR(q->pbl_tbl);
 806			goto err0;
 807		}
 808		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
 809				   FW_PAGE_SHIFT);
 810	} else {
 811		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
 812		if (!q->pbl_tbl) {
 813			rc = -ENOMEM;
 814			goto err0;
 815		}
 816	}
 817
 818	/* mmap the user address used to store doorbell data for recovery */
 819	return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
 820
 821err0:
 822	ib_umem_release(q->umem);
 823	q->umem = NULL;
 824
 825	return rc;
 826}
 827
 828static inline void qedr_init_cq_params(struct qedr_cq *cq,
 829				       struct qedr_ucontext *ctx,
 830				       struct qedr_dev *dev, int vector,
 831				       int chain_entries, int page_cnt,
 832				       u64 pbl_ptr,
 833				       struct qed_rdma_create_cq_in_params
 834				       *params)
 835{
 836	memset(params, 0, sizeof(*params));
 837	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
 838	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
 839	params->cnq_id = vector;
 840	params->cq_size = chain_entries - 1;
 841	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
 842	params->pbl_num_pages = page_cnt;
 843	params->pbl_ptr = pbl_ptr;
 844	params->pbl_two_level = 0;
 845}
 846
 847static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
 848{
 849	cq->db.data.agg_flags = flags;
 850	cq->db.data.value = cpu_to_le32(cons);
 851	writeq(cq->db.raw, cq->db_addr);
 852}
 853
 854int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 855{
 856	struct qedr_cq *cq = get_qedr_cq(ibcq);
 857	unsigned long sflags;
 858	struct qedr_dev *dev;
 859
 860	dev = get_qedr_dev(ibcq->device);
 861
 862	if (cq->destroyed) {
 863		DP_ERR(dev,
 864		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
 865		       cq, cq->icid);
 866		return -EINVAL;
 867	}
 868
 869
 870	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
 871		return 0;
 872
 873	spin_lock_irqsave(&cq->cq_lock, sflags);
 874
 875	cq->arm_flags = 0;
 876
 877	if (flags & IB_CQ_SOLICITED)
 878		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
 879
 880	if (flags & IB_CQ_NEXT_COMP)
 881		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
 882
 883	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
 884
 885	spin_unlock_irqrestore(&cq->cq_lock, sflags);
 886
 887	return 0;
 888}
 889
 890int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 891		   struct ib_udata *udata)
 892{
 893	struct ib_device *ibdev = ibcq->device;
 894	struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
 895		udata, struct qedr_ucontext, ibucontext);
 896	struct qed_rdma_destroy_cq_out_params destroy_oparams;
 897	struct qed_rdma_destroy_cq_in_params destroy_iparams;
 898	struct qed_chain_init_params chain_params = {
 899		.mode		= QED_CHAIN_MODE_PBL,
 900		.intended_use	= QED_CHAIN_USE_TO_CONSUME,
 901		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
 902		.elem_size	= sizeof(union rdma_cqe),
 903	};
 904	struct qedr_dev *dev = get_qedr_dev(ibdev);
 905	struct qed_rdma_create_cq_in_params params;
 906	struct qedr_create_cq_ureq ureq = {};
 907	int vector = attr->comp_vector;
 908	int entries = attr->cqe;
 909	struct qedr_cq *cq = get_qedr_cq(ibcq);
 910	int chain_entries;
 911	u32 db_offset;
 912	int page_cnt;
 913	u64 pbl_ptr;
 914	u16 icid;
 915	int rc;
 916
 917	DP_DEBUG(dev, QEDR_MSG_INIT,
 918		 "create_cq: called from %s. entries=%d, vector=%d\n",
 919		 udata ? "User Lib" : "Kernel", entries, vector);
 920
 921	if (entries > QEDR_MAX_CQES) {
 922		DP_ERR(dev,
 923		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
 924		       entries, QEDR_MAX_CQES);
 925		return -EINVAL;
 926	}
 927
 928	chain_entries = qedr_align_cq_entries(entries);
 929	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
 930	chain_params.num_elems = chain_entries;
 931
 932	/* calc db offset. user will add DPI base, kernel will add db addr */
 933	db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
 934
 935	if (udata) {
 936		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
 937							 udata->inlen))) {
 938			DP_ERR(dev,
 939			       "create cq: problem copying data from user space\n");
 940			goto err0;
 941		}
 942
 943		if (!ureq.len) {
 944			DP_ERR(dev,
 945			       "create cq: cannot create a cq with 0 entries\n");
 946			goto err0;
 947		}
 948
 949		cq->cq_type = QEDR_CQ_TYPE_USER;
 950
 951		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
 952					  ureq.len, true, IB_ACCESS_LOCAL_WRITE,
 953					  1);
 954		if (rc)
 955			goto err0;
 956
 957		pbl_ptr = cq->q.pbl_tbl->pa;
 958		page_cnt = cq->q.pbl_info.num_pbes;
 959
 960		cq->ibcq.cqe = chain_entries;
 961		cq->q.db_addr = ctx->dpi_addr + db_offset;
 962	} else {
 963		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
 964
 965		rc = dev->ops->common->chain_alloc(dev->cdev, &cq->pbl,
 966						   &chain_params);
 967		if (rc)
 968			goto err0;
 969
 970		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
 971		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
 972		cq->ibcq.cqe = cq->pbl.capacity;
 973	}
 974
 975	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
 976			    pbl_ptr, &params);
 977
 978	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
 979	if (rc)
 980		goto err1;
 981
 982	cq->icid = icid;
 983	cq->sig = QEDR_CQ_MAGIC_NUMBER;
 984	spin_lock_init(&cq->cq_lock);
 985
 986	if (udata) {
 987		rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
 988		if (rc)
 989			goto err2;
 990
 991		rc = qedr_db_recovery_add(dev, cq->q.db_addr,
 992					  &cq->q.db_rec_data->db_data,
 993					  DB_REC_WIDTH_64B,
 994					  DB_REC_USER);
 995		if (rc)
 996			goto err2;
 997
 998	} else {
 999		/* Generate doorbell address. */
1000		cq->db.data.icid = cq->icid;
1001		cq->db_addr = dev->db_addr + db_offset;
1002		cq->db.data.params = DB_AGG_CMD_SET <<
1003		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
1004
1005		/* point to the very last element, passing it we will toggle */
1006		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
1007		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
1008		cq->latest_cqe = NULL;
1009		consume_cqe(cq);
1010		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
1011
1012		rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
1013					  DB_REC_WIDTH_64B, DB_REC_KERNEL);
1014		if (rc)
1015			goto err2;
1016	}
1017
1018	DP_DEBUG(dev, QEDR_MSG_CQ,
1019		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
1020		 cq->icid, cq, params.cq_size);
1021
1022	return 0;
1023
1024err2:
1025	destroy_iparams.icid = cq->icid;
1026	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
1027				  &destroy_oparams);
1028err1:
1029	if (udata) {
1030		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1031		ib_umem_release(cq->q.umem);
1032		if (cq->q.db_mmap_entry)
1033			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1034	} else {
1035		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1036	}
1037err0:
1038	return -EINVAL;
1039}
1040
1041int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1042{
1043	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1044	struct qedr_cq *cq = get_qedr_cq(ibcq);
1045
1046	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1047
1048	return 0;
1049}
1050
1051#define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1052#define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1053
1054void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1055{
1056	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1057	struct qed_rdma_destroy_cq_out_params oparams;
1058	struct qed_rdma_destroy_cq_in_params iparams;
1059	struct qedr_cq *cq = get_qedr_cq(ibcq);
1060	int iter;
1061
1062	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1063
1064	cq->destroyed = 1;
1065
1066	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1067	if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
1068		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1069		return;
1070	}
1071
1072	iparams.icid = cq->icid;
1073	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1074	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1075
1076	if (udata) {
1077		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1078		ib_umem_release(cq->q.umem);
1079
1080		if (cq->q.db_rec_data) {
1081			qedr_db_recovery_del(dev, cq->q.db_addr,
1082					     &cq->q.db_rec_data->db_data);
1083			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1084		}
1085	} else {
1086		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1087	}
1088
1089	/* We don't want the IRQ handler to handle a non-existing CQ so we
1090	 * wait until all CNQ interrupts, if any, are received. This will always
1091	 * happen and will always happen very fast. If not, then a serious error
1092	 * has occured. That is why we can use a long delay.
1093	 * We spin for a short time so we don’t lose time on context switching
1094	 * in case all the completions are handled in that span. Otherwise
1095	 * we sleep for a while and check again. Since the CNQ may be
1096	 * associated with (only) the current CPU we use msleep to allow the
1097	 * current CPU to be freed.
1098	 * The CNQ notification is increased in qedr_irq_handler().
1099	 */
1100	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1101	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1102		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1103		iter--;
1104	}
1105
1106	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1107	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1108		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1109		iter--;
1110	}
1111
1112	/* Note that we don't need to have explicit code to wait for the
1113	 * completion of the event handler because it is invoked from the EQ.
1114	 * Since the destroy CQ ramrod has also been received on the EQ we can
1115	 * be certain that there's no event handler in process.
1116	 */
1117}
1118
1119static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1120					  struct ib_qp_attr *attr,
1121					  int attr_mask,
1122					  struct qed_rdma_modify_qp_in_params
1123					  *qp_params)
1124{
1125	const struct ib_gid_attr *gid_attr;
1126	enum rdma_network_type nw_type;
1127	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1128	u32 ipv4_addr;
1129	int ret;
1130	int i;
1131
1132	gid_attr = grh->sgid_attr;
1133	ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
1134	if (ret)
1135		return ret;
1136
1137	nw_type = rdma_gid_attr_network_type(gid_attr);
1138	switch (nw_type) {
1139	case RDMA_NETWORK_IPV6:
1140		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1141		       sizeof(qp_params->sgid));
1142		memcpy(&qp_params->dgid.bytes[0],
1143		       &grh->dgid,
1144		       sizeof(qp_params->dgid));
1145		qp_params->roce_mode = ROCE_V2_IPV6;
1146		SET_FIELD(qp_params->modify_flags,
1147			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1148		break;
1149	case RDMA_NETWORK_IB:
1150		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1151		       sizeof(qp_params->sgid));
1152		memcpy(&qp_params->dgid.bytes[0],
1153		       &grh->dgid,
1154		       sizeof(qp_params->dgid));
1155		qp_params->roce_mode = ROCE_V1;
1156		break;
1157	case RDMA_NETWORK_IPV4:
1158		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1159		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1160		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1161		qp_params->sgid.ipv4_addr = ipv4_addr;
1162		ipv4_addr =
1163		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1164		qp_params->dgid.ipv4_addr = ipv4_addr;
1165		SET_FIELD(qp_params->modify_flags,
1166			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1167		qp_params->roce_mode = ROCE_V2_IPV4;
1168		break;
1169	}
1170
1171	for (i = 0; i < 4; i++) {
1172		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1173		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1174	}
1175
1176	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1177		qp_params->vlan_id = 0;
1178
1179	return 0;
1180}
1181
1182static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1183			       struct ib_qp_init_attr *attrs,
1184			       struct ib_udata *udata)
1185{
1186	struct qedr_device_attr *qattr = &dev->attr;
1187
1188	/* QP0... attrs->qp_type == IB_QPT_GSI */
1189	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1190		DP_DEBUG(dev, QEDR_MSG_QP,
1191			 "create qp: unsupported qp type=0x%x requested\n",
1192			 attrs->qp_type);
1193		return -EOPNOTSUPP;
1194	}
1195
1196	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1197		DP_ERR(dev,
1198		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1199		       attrs->cap.max_send_wr, qattr->max_sqe);
1200		return -EINVAL;
1201	}
1202
1203	if (attrs->cap.max_inline_data > qattr->max_inline) {
1204		DP_ERR(dev,
1205		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1206		       attrs->cap.max_inline_data, qattr->max_inline);
1207		return -EINVAL;
1208	}
1209
1210	if (attrs->cap.max_send_sge > qattr->max_sge) {
1211		DP_ERR(dev,
1212		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1213		       attrs->cap.max_send_sge, qattr->max_sge);
1214		return -EINVAL;
1215	}
1216
1217	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1218		DP_ERR(dev,
1219		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1220		       attrs->cap.max_recv_sge, qattr->max_sge);
1221		return -EINVAL;
1222	}
1223
1224	/* Unprivileged user space cannot create special QP */
1225	if (udata && attrs->qp_type == IB_QPT_GSI) {
1226		DP_ERR(dev,
1227		       "create qp: userspace can't create special QPs of type=0x%x\n",
1228		       attrs->qp_type);
1229		return -EINVAL;
1230	}
1231
1232	return 0;
1233}
1234
1235static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1236			       struct qedr_srq *srq, struct ib_udata *udata)
1237{
1238	struct qedr_create_srq_uresp uresp = {};
1239	int rc;
1240
1241	uresp.srq_id = srq->srq_id;
1242
1243	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1244	if (rc)
1245		DP_ERR(dev, "create srq: problem copying data to user space\n");
1246
1247	return rc;
1248}
1249
1250static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1251			      struct qedr_create_qp_uresp *uresp,
1252			      struct qedr_qp *qp)
1253{
1254	/* iWARP requires two doorbells per RQ. */
1255	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1256		uresp->rq_db_offset =
1257		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1258		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1259	} else {
1260		uresp->rq_db_offset =
1261		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1262	}
1263
1264	uresp->rq_icid = qp->icid;
1265	if (qp->urq.db_mmap_entry)
1266		uresp->rq_db_rec_addr =
1267			rdma_user_mmap_get_offset(qp->urq.db_mmap_entry);
1268}
1269
1270static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1271			       struct qedr_create_qp_uresp *uresp,
1272			       struct qedr_qp *qp)
1273{
1274	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1275
1276	/* iWARP uses the same cid for rq and sq */
1277	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1278		uresp->sq_icid = qp->icid;
1279	else
1280		uresp->sq_icid = qp->icid + 1;
1281
1282	if (qp->usq.db_mmap_entry)
1283		uresp->sq_db_rec_addr =
1284			rdma_user_mmap_get_offset(qp->usq.db_mmap_entry);
1285}
1286
1287static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1288			      struct qedr_qp *qp, struct ib_udata *udata,
1289			      struct qedr_create_qp_uresp *uresp)
1290{
1291	int rc;
1292
1293	memset(uresp, 0, sizeof(*uresp));
1294	qedr_copy_sq_uresp(dev, uresp, qp);
1295	qedr_copy_rq_uresp(dev, uresp, qp);
1296
1297	uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1298	uresp->qp_id = qp->qp_id;
1299
1300	rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
1301	if (rc)
1302		DP_ERR(dev,
1303		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1304		       qp->icid);
1305
1306	return rc;
1307}
1308
1309static void qedr_set_common_qp_params(struct qedr_dev *dev,
1310				      struct qedr_qp *qp,
1311				      struct qedr_pd *pd,
1312				      struct ib_qp_init_attr *attrs)
1313{
1314	spin_lock_init(&qp->q_lock);
1315	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1316		kref_init(&qp->refcnt);
1317		init_completion(&qp->iwarp_cm_comp);
1318	}
1319	qp->pd = pd;
1320	qp->qp_type = attrs->qp_type;
1321	qp->max_inline_data = attrs->cap.max_inline_data;
1322	qp->sq.max_sges = attrs->cap.max_send_sge;
1323	qp->state = QED_ROCE_QP_STATE_RESET;
1324	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1325	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1326	qp->dev = dev;
1327
1328	if (attrs->srq) {
1329		qp->srq = get_qedr_srq(attrs->srq);
1330	} else {
1331		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1332		qp->rq.max_sges = attrs->cap.max_recv_sge;
1333		DP_DEBUG(dev, QEDR_MSG_QP,
1334			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1335			 qp->rq.max_sges, qp->rq_cq->icid);
1336	}
1337
1338	DP_DEBUG(dev, QEDR_MSG_QP,
1339		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1340		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1341		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1342	DP_DEBUG(dev, QEDR_MSG_QP,
1343		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1344		 qp->sq.max_sges, qp->sq_cq->icid);
1345}
1346
1347static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1348{
1349	int rc;
1350
1351	qp->sq.db = dev->db_addr +
1352		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1353	qp->sq.db_data.data.icid = qp->icid + 1;
1354	rc = qedr_db_recovery_add(dev, qp->sq.db,
1355				  &qp->sq.db_data,
1356				  DB_REC_WIDTH_32B,
1357				  DB_REC_KERNEL);
1358	if (rc)
1359		return rc;
1360
1361	if (!qp->srq) {
1362		qp->rq.db = dev->db_addr +
1363			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1364		qp->rq.db_data.data.icid = qp->icid;
1365
1366		rc = qedr_db_recovery_add(dev, qp->rq.db,
1367					  &qp->rq.db_data,
1368					  DB_REC_WIDTH_32B,
1369					  DB_REC_KERNEL);
1370		if (rc)
1371			qedr_db_recovery_del(dev, qp->sq.db,
1372					     &qp->sq.db_data);
1373	}
1374
1375	return rc;
1376}
1377
1378static int qedr_check_srq_params(struct qedr_dev *dev,
1379				 struct ib_srq_init_attr *attrs,
1380				 struct ib_udata *udata)
1381{
1382	struct qedr_device_attr *qattr = &dev->attr;
1383
1384	if (attrs->attr.max_wr > qattr->max_srq_wr) {
1385		DP_ERR(dev,
1386		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1387		       attrs->attr.max_wr, qattr->max_srq_wr);
1388		return -EINVAL;
1389	}
1390
1391	if (attrs->attr.max_sge > qattr->max_sge) {
1392		DP_ERR(dev,
1393		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1394		       attrs->attr.max_sge, qattr->max_sge);
1395		return -EINVAL;
1396	}
1397
1398	return 0;
1399}
1400
1401static void qedr_free_srq_user_params(struct qedr_srq *srq)
1402{
1403	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1404	ib_umem_release(srq->usrq.umem);
1405	ib_umem_release(srq->prod_umem);
1406}
1407
1408static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1409{
1410	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1411	struct qedr_dev *dev = srq->dev;
1412
1413	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1414
1415	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1416			  hw_srq->virt_prod_pair_addr,
1417			  hw_srq->phy_prod_pair_addr);
1418}
1419
1420static int qedr_init_srq_user_params(struct ib_udata *udata,
1421				     struct qedr_srq *srq,
1422				     struct qedr_create_srq_ureq *ureq,
1423				     int access)
1424{
1425	struct scatterlist *sg;
1426	int rc;
1427
1428	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1429				  ureq->srq_len, false, access, 1);
1430	if (rc)
1431		return rc;
1432
1433	srq->prod_umem = ib_umem_get(srq->ibsrq.device, ureq->prod_pair_addr,
1434				     sizeof(struct rdma_srq_producers), access);
1435	if (IS_ERR(srq->prod_umem)) {
1436		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1437		ib_umem_release(srq->usrq.umem);
1438		DP_ERR(srq->dev,
1439		       "create srq: failed ib_umem_get for producer, got %ld\n",
1440		       PTR_ERR(srq->prod_umem));
1441		return PTR_ERR(srq->prod_umem);
1442	}
1443
1444	sg = srq->prod_umem->sg_head.sgl;
1445	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1446
1447	return 0;
1448}
1449
1450static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1451					struct qedr_dev *dev,
1452					struct ib_srq_init_attr *init_attr)
1453{
1454	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1455	struct qed_chain_init_params params = {
1456		.mode		= QED_CHAIN_MODE_PBL,
1457		.intended_use	= QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1458		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
1459		.elem_size	= QEDR_SRQ_WQE_ELEM_SIZE,
1460	};
1461	dma_addr_t phy_prod_pair_addr;
1462	u32 num_elems;
1463	void *va;
1464	int rc;
1465
1466	va = dma_alloc_coherent(&dev->pdev->dev,
1467				sizeof(struct rdma_srq_producers),
1468				&phy_prod_pair_addr, GFP_KERNEL);
1469	if (!va) {
1470		DP_ERR(dev,
1471		       "create srq: failed to allocate dma memory for producer\n");
1472		return -ENOMEM;
1473	}
1474
1475	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1476	hw_srq->virt_prod_pair_addr = va;
1477
1478	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1479	params.num_elems = num_elems;
1480
1481	rc = dev->ops->common->chain_alloc(dev->cdev, &hw_srq->pbl, &params);
1482	if (rc)
1483		goto err0;
1484
1485	hw_srq->num_elems = num_elems;
1486
1487	return 0;
1488
1489err0:
1490	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1491			  va, phy_prod_pair_addr);
1492	return rc;
1493}
1494
1495int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1496		    struct ib_udata *udata)
1497{
1498	struct qed_rdma_destroy_srq_in_params destroy_in_params;
1499	struct qed_rdma_create_srq_in_params in_params = {};
1500	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1501	struct qed_rdma_create_srq_out_params out_params;
1502	struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
1503	struct qedr_create_srq_ureq ureq = {};
1504	u64 pbl_base_addr, phy_prod_pair_addr;
1505	struct qedr_srq_hwq_info *hw_srq;
1506	u32 page_cnt, page_size;
1507	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1508	int rc = 0;
1509
1510	DP_DEBUG(dev, QEDR_MSG_QP,
1511		 "create SRQ called from %s (pd %p)\n",
1512		 (udata) ? "User lib" : "kernel", pd);
1513
1514	rc = qedr_check_srq_params(dev, init_attr, udata);
1515	if (rc)
1516		return -EINVAL;
1517
1518	srq->dev = dev;
1519	hw_srq = &srq->hw_srq;
1520	spin_lock_init(&srq->lock);
1521
1522	hw_srq->max_wr = init_attr->attr.max_wr;
1523	hw_srq->max_sges = init_attr->attr.max_sge;
1524
1525	if (udata) {
1526		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1527							 udata->inlen))) {
1528			DP_ERR(dev,
1529			       "create srq: problem copying data from user space\n");
1530			goto err0;
1531		}
1532
1533		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0);
1534		if (rc)
1535			goto err0;
1536
1537		page_cnt = srq->usrq.pbl_info.num_pbes;
1538		pbl_base_addr = srq->usrq.pbl_tbl->pa;
1539		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1540		page_size = PAGE_SIZE;
1541	} else {
1542		struct qed_chain *pbl;
1543
1544		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1545		if (rc)
1546			goto err0;
1547
1548		pbl = &hw_srq->pbl;
1549		page_cnt = qed_chain_get_page_cnt(pbl);
1550		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1551		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1552		page_size = QED_CHAIN_PAGE_SIZE;
1553	}
1554
1555	in_params.pd_id = pd->pd_id;
1556	in_params.pbl_base_addr = pbl_base_addr;
1557	in_params.prod_pair_addr = phy_prod_pair_addr;
1558	in_params.num_pages = page_cnt;
1559	in_params.page_size = page_size;
1560
1561	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1562	if (rc)
1563		goto err1;
1564
1565	srq->srq_id = out_params.srq_id;
1566
1567	if (udata) {
1568		rc = qedr_copy_srq_uresp(dev, srq, udata);
1569		if (rc)
1570			goto err2;
1571	}
1572
1573	rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
1574	if (rc)
1575		goto err2;
1576
1577	DP_DEBUG(dev, QEDR_MSG_SRQ,
1578		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1579	return 0;
1580
1581err2:
1582	destroy_in_params.srq_id = srq->srq_id;
1583
1584	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1585err1:
1586	if (udata)
1587		qedr_free_srq_user_params(srq);
1588	else
1589		qedr_free_srq_kernel_params(srq);
1590err0:
1591	return -EFAULT;
1592}
1593
1594void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1595{
1596	struct qed_rdma_destroy_srq_in_params in_params = {};
1597	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1598	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1599
1600	xa_erase_irq(&dev->srqs, srq->srq_id);
1601	in_params.srq_id = srq->srq_id;
1602	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1603
1604	if (ibsrq->uobject)
1605		qedr_free_srq_user_params(srq);
1606	else
1607		qedr_free_srq_kernel_params(srq);
1608
1609	DP_DEBUG(dev, QEDR_MSG_SRQ,
1610		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1611		 srq->srq_id);
1612}
1613
1614int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1615		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1616{
1617	struct qed_rdma_modify_srq_in_params in_params = {};
1618	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1619	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1620	int rc;
1621
1622	if (attr_mask & IB_SRQ_MAX_WR) {
1623		DP_ERR(dev,
1624		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
1625		       attr_mask, srq);
1626		return -EINVAL;
1627	}
1628
1629	if (attr_mask & IB_SRQ_LIMIT) {
1630		if (attr->srq_limit >= srq->hw_srq.max_wr) {
1631			DP_ERR(dev,
1632			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1633			       attr->srq_limit, srq->hw_srq.max_wr);
1634			return -EINVAL;
1635		}
1636
1637		in_params.srq_id = srq->srq_id;
1638		in_params.wqe_limit = attr->srq_limit;
1639		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1640		if (rc)
1641			return rc;
1642	}
1643
1644	srq->srq_limit = attr->srq_limit;
1645
1646	DP_DEBUG(dev, QEDR_MSG_SRQ,
1647		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1648
1649	return 0;
1650}
1651
1652static inline void
1653qedr_init_common_qp_in_params(struct qedr_dev *dev,
1654			      struct qedr_pd *pd,
1655			      struct qedr_qp *qp,
1656			      struct ib_qp_init_attr *attrs,
1657			      bool fmr_and_reserved_lkey,
1658			      struct qed_rdma_create_qp_in_params *params)
1659{
1660	/* QP handle to be written in an async event */
1661	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1662	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1663
1664	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1665	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1666	params->pd = pd->pd_id;
1667	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1668	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1669	params->stats_queue = 0;
1670	params->srq_id = 0;
1671	params->use_srq = false;
1672
1673	if (!qp->srq) {
1674		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1675
1676	} else {
1677		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1678		params->srq_id = qp->srq->srq_id;
1679		params->use_srq = true;
1680	}
1681}
1682
1683static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1684{
1685	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1686		 "qp=%p. "
1687		 "sq_addr=0x%llx, "
1688		 "sq_len=%zd, "
1689		 "rq_addr=0x%llx, "
1690		 "rq_len=%zd"
1691		 "\n",
1692		 qp,
1693		 qp->usq.buf_addr,
1694		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1695}
1696
1697static inline void
1698qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1699			    struct qedr_qp *qp,
1700			    struct qed_rdma_create_qp_out_params *out_params)
1701{
1702	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1703	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1704
1705	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1706			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1707	if (!qp->srq) {
1708		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1709		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1710	}
1711
1712	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1713			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1714}
1715
1716static void qedr_cleanup_user(struct qedr_dev *dev,
1717			      struct qedr_ucontext *ctx,
1718			      struct qedr_qp *qp)
1719{
1720	ib_umem_release(qp->usq.umem);
1721	qp->usq.umem = NULL;
1722
1723	ib_umem_release(qp->urq.umem);
1724	qp->urq.umem = NULL;
1725
1726	if (rdma_protocol_roce(&dev->ibdev, 1)) {
1727		qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
1728		qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
1729	} else {
1730		kfree(qp->usq.pbl_tbl);
1731		kfree(qp->urq.pbl_tbl);
1732	}
1733
1734	if (qp->usq.db_rec_data) {
1735		qedr_db_recovery_del(dev, qp->usq.db_addr,
1736				     &qp->usq.db_rec_data->db_data);
1737		rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry);
1738	}
1739
1740	if (qp->urq.db_rec_data) {
1741		qedr_db_recovery_del(dev, qp->urq.db_addr,
1742				     &qp->urq.db_rec_data->db_data);
1743		rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry);
1744	}
1745
1746	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1747		qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr,
1748				     &qp->urq.db_rec_db2_data);
1749}
1750
1751static int qedr_create_user_qp(struct qedr_dev *dev,
1752			       struct qedr_qp *qp,
1753			       struct ib_pd *ibpd,
1754			       struct ib_udata *udata,
1755			       struct ib_qp_init_attr *attrs)
1756{
1757	struct qed_rdma_create_qp_in_params in_params;
1758	struct qed_rdma_create_qp_out_params out_params;
1759	struct qedr_pd *pd = get_qedr_pd(ibpd);
1760	struct qedr_create_qp_uresp uresp;
1761	struct qedr_ucontext *ctx = pd ? pd->uctx : NULL;
1762	struct qedr_create_qp_ureq ureq;
1763	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1764	int rc = -EINVAL;
1765
1766	qp->create_type = QEDR_QP_CREATE_USER;
1767	memset(&ureq, 0, sizeof(ureq));
1768	rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen));
1769	if (rc) {
1770		DP_ERR(dev, "Problem copying data from user space\n");
1771		return rc;
1772	}
1773
1774	/* SQ - read access only (0) */
1775	rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1776				  ureq.sq_len, true, 0, alloc_and_init);
1777	if (rc)
1778		return rc;
1779
1780	if (!qp->srq) {
1781		/* RQ - read access only (0) */
1782		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1783					  ureq.rq_len, true, 0, alloc_and_init);
1784		if (rc)
1785			return rc;
1786	}
1787
1788	memset(&in_params, 0, sizeof(in_params));
1789	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1790	in_params.qp_handle_lo = ureq.qp_handle_lo;
1791	in_params.qp_handle_hi = ureq.qp_handle_hi;
1792	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1793	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1794	if (!qp->srq) {
1795		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1796		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1797	}
1798
1799	if (ctx)
1800		SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode);
1801
1802	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1803					      &in_params, &out_params);
1804
1805	if (!qp->qed_qp) {
1806		rc = -ENOMEM;
1807		goto err1;
1808	}
1809
1810	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1811		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1812
1813	qp->qp_id = out_params.qp_id;
1814	qp->icid = out_params.icid;
1815
1816	rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
1817	if (rc)
1818		goto err;
1819
1820	/* db offset was calculated in copy_qp_uresp, now set in the user q */
1821	ctx = pd->uctx;
1822	qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
1823	qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
1824
1825	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1826		qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
1827
1828		/* calculate the db_rec_db2 data since it is constant so no
1829		 *  need to reflect from user
1830		 */
1831		qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
1832		qp->urq.db_rec_db2_data.data.value =
1833			cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
1834	}
1835
1836	rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
1837				  &qp->usq.db_rec_data->db_data,
1838				  DB_REC_WIDTH_32B,
1839				  DB_REC_USER);
1840	if (rc)
1841		goto err;
1842
1843	rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
1844				  &qp->urq.db_rec_data->db_data,
1845				  DB_REC_WIDTH_32B,
1846				  DB_REC_USER);
1847	if (rc)
1848		goto err;
1849
1850	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1851		rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
1852					  &qp->urq.db_rec_db2_data,
1853					  DB_REC_WIDTH_32B,
1854					  DB_REC_USER);
1855		if (rc)
1856			goto err;
1857	}
1858	qedr_qp_user_print(dev, qp);
1859
1860	return rc;
1861err:
1862	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1863	if (rc)
1864		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1865
1866err1:
1867	qedr_cleanup_user(dev, ctx, qp);
1868	return rc;
1869}
1870
1871static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1872{
1873	int rc;
1874
1875	qp->sq.db = dev->db_addr +
1876	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1877	qp->sq.db_data.data.icid = qp->icid;
1878
1879	rc = qedr_db_recovery_add(dev, qp->sq.db,
1880				  &qp->sq.db_data,
1881				  DB_REC_WIDTH_32B,
1882				  DB_REC_KERNEL);
1883	if (rc)
1884		return rc;
1885
1886	qp->rq.db = dev->db_addr +
1887		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1888	qp->rq.db_data.data.icid = qp->icid;
1889	qp->rq.iwarp_db2 = dev->db_addr +
1890			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1891	qp->rq.iwarp_db2_data.data.icid = qp->icid;
1892	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1893
1894	rc = qedr_db_recovery_add(dev, qp->rq.db,
1895				  &qp->rq.db_data,
1896				  DB_REC_WIDTH_32B,
1897				  DB_REC_KERNEL);
1898	if (rc)
1899		return rc;
1900
1901	rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2,
1902				  &qp->rq.iwarp_db2_data,
1903				  DB_REC_WIDTH_32B,
1904				  DB_REC_KERNEL);
1905	return rc;
1906}
1907
1908static int
1909qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1910			   struct qedr_qp *qp,
1911			   struct qed_rdma_create_qp_in_params *in_params,
1912			   u32 n_sq_elems, u32 n_rq_elems)
1913{
1914	struct qed_rdma_create_qp_out_params out_params;
1915	struct qed_chain_init_params params = {
1916		.mode		= QED_CHAIN_MODE_PBL,
1917		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
1918	};
1919	int rc;
1920
1921	params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
1922	params.num_elems = n_sq_elems;
1923	params.elem_size = QEDR_SQE_ELEMENT_SIZE;
1924
1925	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
1926	if (rc)
1927		return rc;
1928
1929	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1930	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1931
1932	params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
1933	params.num_elems = n_rq_elems;
1934	params.elem_size = QEDR_RQE_ELEMENT_SIZE;
1935
1936	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
1937	if (rc)
1938		return rc;
1939
1940	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1941	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1942
1943	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1944					      in_params, &out_params);
1945
1946	if (!qp->qed_qp)
1947		return -EINVAL;
1948
1949	qp->qp_id = out_params.qp_id;
1950	qp->icid = out_params.icid;
1951
1952	return qedr_set_roce_db_info(dev, qp);
1953}
1954
1955static int
1956qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1957			    struct qedr_qp *qp,
1958			    struct qed_rdma_create_qp_in_params *in_params,
1959			    u32 n_sq_elems, u32 n_rq_elems)
1960{
1961	struct qed_rdma_create_qp_out_params out_params;
1962	struct qed_chain_init_params params = {
1963		.mode		= QED_CHAIN_MODE_PBL,
1964		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
1965	};
1966	int rc;
1967
1968	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1969						     QEDR_SQE_ELEMENT_SIZE,
1970						     QED_CHAIN_PAGE_SIZE,
1971						     QED_CHAIN_MODE_PBL);
1972	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1973						     QEDR_RQE_ELEMENT_SIZE,
1974						     QED_CHAIN_PAGE_SIZE,
1975						     QED_CHAIN_MODE_PBL);
1976
1977	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1978					      in_params, &out_params);
1979
1980	if (!qp->qed_qp)
1981		return -EINVAL;
1982
1983	/* Now we allocate the chain */
1984
1985	params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
1986	params.num_elems = n_sq_elems;
1987	params.elem_size = QEDR_SQE_ELEMENT_SIZE;
1988	params.ext_pbl_virt = out_params.sq_pbl_virt;
1989	params.ext_pbl_phys = out_params.sq_pbl_phys;
1990
1991	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
1992	if (rc)
1993		goto err;
1994
1995	params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
1996	params.num_elems = n_rq_elems;
1997	params.elem_size = QEDR_RQE_ELEMENT_SIZE;
1998	params.ext_pbl_virt = out_params.rq_pbl_virt;
1999	params.ext_pbl_phys = out_params.rq_pbl_phys;
2000
2001	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
2002	if (rc)
2003		goto err;
2004
2005	qp->qp_id = out_params.qp_id;
2006	qp->icid = out_params.icid;
2007
2008	return qedr_set_iwarp_db_info(dev, qp);
2009
2010err:
2011	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2012
2013	return rc;
2014}
2015
2016static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
2017{
2018	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
2019	kfree(qp->wqe_wr_id);
2020
2021	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
2022	kfree(qp->rqe_wr_id);
2023
2024	/* GSI qp is not registered to db mechanism so no need to delete */
2025	if (qp->qp_type == IB_QPT_GSI)
2026		return;
2027
2028	qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
2029
2030	if (!qp->srq) {
2031		qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
2032
2033		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2034			qedr_db_recovery_del(dev, qp->rq.iwarp_db2,
2035					     &qp->rq.iwarp_db2_data);
2036	}
2037}
2038
2039static int qedr_create_kernel_qp(struct qedr_dev *dev,
2040				 struct qedr_qp *qp,
2041				 struct ib_pd *ibpd,
2042				 struct ib_qp_init_attr *attrs)
2043{
2044	struct qed_rdma_create_qp_in_params in_params;
2045	struct qedr_pd *pd = get_qedr_pd(ibpd);
2046	int rc = -EINVAL;
2047	u32 n_rq_elems;
2048	u32 n_sq_elems;
2049	u32 n_sq_entries;
2050
2051	memset(&in_params, 0, sizeof(in_params));
2052	qp->create_type = QEDR_QP_CREATE_KERNEL;
2053
2054	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
2055	 * the ring. The ring should allow at least a single WR, even if the
2056	 * user requested none, due to allocation issues.
2057	 * We should add an extra WR since the prod and cons indices of
2058	 * wqe_wr_id are managed in such a way that the WQ is considered full
2059	 * when (prod+1)%max_wr==cons. We currently don't do that because we
2060	 * double the number of entries due an iSER issue that pushes far more
2061	 * WRs than indicated. If we decline its ib_post_send() then we get
2062	 * error prints in the dmesg we'd like to avoid.
2063	 */
2064	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
2065			      dev->attr.max_sqe);
2066
2067	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
2068				GFP_KERNEL);
2069	if (!qp->wqe_wr_id) {
2070		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
2071		return -ENOMEM;
2072	}
2073
2074	/* QP handle to be written in CQE */
2075	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
2076	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
2077
2078	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
2079	 * the ring. There ring should allow at least a single WR, even if the
2080	 * user requested none, due to allocation issues.
2081	 */
2082	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
2083
2084	/* Allocate driver internal RQ array */
2085	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
2086				GFP_KERNEL);
2087	if (!qp->rqe_wr_id) {
2088		DP_ERR(dev,
2089		       "create qp: failed RQ shadow memory allocation\n");
2090		kfree(qp->wqe_wr_id);
2091		return -ENOMEM;
2092	}
2093
2094	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
2095
2096	n_sq_entries = attrs->cap.max_send_wr;
2097	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
2098	n_sq_entries = max_t(u32, n_sq_entries, 1);
2099	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2100
2101	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
2102
2103	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2104		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
2105						 n_sq_elems, n_rq_elems);
2106	else
2107		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
2108						n_sq_elems, n_rq_elems);
2109	if (rc)
2110		qedr_cleanup_kernel(dev, qp);
2111
2112	return rc;
2113}
2114
2115struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
2116			     struct ib_qp_init_attr *attrs,
2117			     struct ib_udata *udata)
2118{
2119	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2120	struct qedr_pd *pd = get_qedr_pd(ibpd);
2121	struct qedr_qp *qp;
2122	struct ib_qp *ibqp;
2123	int rc = 0;
2124
2125	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
2126		 udata ? "user library" : "kernel", pd);
2127
2128	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
2129	if (rc)
2130		return ERR_PTR(rc);
2131
2132	DP_DEBUG(dev, QEDR_MSG_QP,
2133		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
2134		 udata ? "user library" : "kernel", attrs->event_handler, pd,
2135		 get_qedr_cq(attrs->send_cq),
2136		 get_qedr_cq(attrs->send_cq)->icid,
2137		 get_qedr_cq(attrs->recv_cq),
2138		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
2139
2140	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
2141	if (!qp) {
2142		DP_ERR(dev, "create qp: failed allocating memory\n");
2143		return ERR_PTR(-ENOMEM);
2144	}
2145
2146	qedr_set_common_qp_params(dev, qp, pd, attrs);
2147
2148	if (attrs->qp_type == IB_QPT_GSI) {
2149		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
2150		if (IS_ERR(ibqp))
2151			kfree(qp);
2152		return ibqp;
2153	}
2154
2155	if (udata)
2156		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
2157	else
2158		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
2159
2160	if (rc)
2161		goto err;
2162
2163	qp->ibqp.qp_num = qp->qp_id;
2164
2165	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
2166		rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
2167		if (rc)
2168			goto err;
2169	}
2170
2171	return &qp->ibqp;
2172
2173err:
2174	kfree(qp);
2175
2176	return ERR_PTR(-EFAULT);
2177}
2178
2179static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
2180{
2181	switch (qp_state) {
2182	case QED_ROCE_QP_STATE_RESET:
2183		return IB_QPS_RESET;
2184	case QED_ROCE_QP_STATE_INIT:
2185		return IB_QPS_INIT;
2186	case QED_ROCE_QP_STATE_RTR:
2187		return IB_QPS_RTR;
2188	case QED_ROCE_QP_STATE_RTS:
2189		return IB_QPS_RTS;
2190	case QED_ROCE_QP_STATE_SQD:
2191		return IB_QPS_SQD;
2192	case QED_ROCE_QP_STATE_ERR:
2193		return IB_QPS_ERR;
2194	case QED_ROCE_QP_STATE_SQE:
2195		return IB_QPS_SQE;
2196	}
2197	return IB_QPS_ERR;
2198}
2199
2200static enum qed_roce_qp_state qedr_get_state_from_ibqp(
2201					enum ib_qp_state qp_state)
2202{
2203	switch (qp_state) {
2204	case IB_QPS_RESET:
2205		return QED_ROCE_QP_STATE_RESET;
2206	case IB_QPS_INIT:
2207		return QED_ROCE_QP_STATE_INIT;
2208	case IB_QPS_RTR:
2209		return QED_ROCE_QP_STATE_RTR;
2210	case IB_QPS_RTS:
2211		return QED_ROCE_QP_STATE_RTS;
2212	case IB_QPS_SQD:
2213		return QED_ROCE_QP_STATE_SQD;
2214	case IB_QPS_ERR:
2215		return QED_ROCE_QP_STATE_ERR;
2216	default:
2217		return QED_ROCE_QP_STATE_ERR;
2218	}
2219}
2220
2221static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
2222{
2223	qed_chain_reset(&qph->pbl);
2224	qph->prod = 0;
2225	qph->cons = 0;
2226	qph->wqe_cons = 0;
2227	qph->db_data.data.value = cpu_to_le16(0);
2228}
2229
2230static int qedr_update_qp_state(struct qedr_dev *dev,
2231				struct qedr_qp *qp,
2232				enum qed_roce_qp_state cur_state,
2233				enum qed_roce_qp_state new_state)
2234{
2235	int status = 0;
2236
2237	if (new_state == cur_state)
2238		return 0;
2239
2240	switch (cur_state) {
2241	case QED_ROCE_QP_STATE_RESET:
2242		switch (new_state) {
2243		case QED_ROCE_QP_STATE_INIT:
2244			qp->prev_wqe_size = 0;
2245			qedr_reset_qp_hwq_info(&qp->sq);
2246			qedr_reset_qp_hwq_info(&qp->rq);
2247			break;
2248		default:
2249			status = -EINVAL;
2250			break;
2251		}
2252		break;
2253	case QED_ROCE_QP_STATE_INIT:
2254		switch (new_state) {
2255		case QED_ROCE_QP_STATE_RTR:
2256			/* Update doorbell (in case post_recv was
2257			 * done before move to RTR)
2258			 */
2259
2260			if (rdma_protocol_roce(&dev->ibdev, 1)) {
2261				writel(qp->rq.db_data.raw, qp->rq.db);
2262			}
2263			break;
2264		case QED_ROCE_QP_STATE_ERR:
2265			break;
2266		default:
2267			/* Invalid state change. */
2268			status = -EINVAL;
2269			break;
2270		}
2271		break;
2272	case QED_ROCE_QP_STATE_RTR:
2273		/* RTR->XXX */
2274		switch (new_state) {
2275		case QED_ROCE_QP_STATE_RTS:
2276			break;
2277		case QED_ROCE_QP_STATE_ERR:
2278			break;
2279		default:
2280			/* Invalid state change. */
2281			status = -EINVAL;
2282			break;
2283		}
2284		break;
2285	case QED_ROCE_QP_STATE_RTS:
2286		/* RTS->XXX */
2287		switch (new_state) {
2288		case QED_ROCE_QP_STATE_SQD:
2289			break;
2290		case QED_ROCE_QP_STATE_ERR:
2291			break;
2292		default:
2293			/* Invalid state change. */
2294			status = -EINVAL;
2295			break;
2296		}
2297		break;
2298	case QED_ROCE_QP_STATE_SQD:
2299		/* SQD->XXX */
2300		switch (new_state) {
2301		case QED_ROCE_QP_STATE_RTS:
2302		case QED_ROCE_QP_STATE_ERR:
2303			break;
2304		default:
2305			/* Invalid state change. */
2306			status = -EINVAL;
2307			break;
2308		}
2309		break;
2310	case QED_ROCE_QP_STATE_ERR:
2311		/* ERR->XXX */
2312		switch (new_state) {
2313		case QED_ROCE_QP_STATE_RESET:
2314			if ((qp->rq.prod != qp->rq.cons) ||
2315			    (qp->sq.prod != qp->sq.cons)) {
2316				DP_NOTICE(dev,
2317					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2318					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
2319					  qp->sq.cons);
2320				status = -EINVAL;
2321			}
2322			break;
2323		default:
2324			status = -EINVAL;
2325			break;
2326		}
2327		break;
2328	default:
2329		status = -EINVAL;
2330		break;
2331	}
2332
2333	return status;
2334}
2335
2336int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2337		   int attr_mask, struct ib_udata *udata)
2338{
2339	struct qedr_qp *qp = get_qedr_qp(ibqp);
2340	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2341	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2342	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2343	enum ib_qp_state old_qp_state, new_qp_state;
2344	enum qed_roce_qp_state cur_state;
2345	int rc = 0;
2346
2347	DP_DEBUG(dev, QEDR_MSG_QP,
2348		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2349		 attr->qp_state);
2350
2351	old_qp_state = qedr_get_ibqp_state(qp->state);
2352	if (attr_mask & IB_QP_STATE)
2353		new_qp_state = attr->qp_state;
2354	else
2355		new_qp_state = old_qp_state;
2356
2357	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2358		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2359					ibqp->qp_type, attr_mask)) {
2360			DP_ERR(dev,
2361			       "modify qp: invalid attribute mask=0x%x specified for\n"
2362			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2363			       attr_mask, qp->qp_id, ibqp->qp_type,
2364			       old_qp_state, new_qp_state);
2365			rc = -EINVAL;
2366			goto err;
2367		}
2368	}
2369
2370	/* Translate the masks... */
2371	if (attr_mask & IB_QP_STATE) {
2372		SET_FIELD(qp_params.modify_flags,
2373			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2374		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2375	}
2376
2377	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2378		qp_params.sqd_async = true;
2379
2380	if (attr_mask & IB_QP_PKEY_INDEX) {
2381		SET_FIELD(qp_params.modify_flags,
2382			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2383		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2384			rc = -EINVAL;
2385			goto err;
2386		}
2387
2388		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2389	}
2390
2391	if (attr_mask & IB_QP_QKEY)
2392		qp->qkey = attr->qkey;
2393
2394	if (attr_mask & IB_QP_ACCESS_FLAGS) {
2395		SET_FIELD(qp_params.modify_flags,
2396			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2397		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2398						  IB_ACCESS_REMOTE_READ;
2399		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2400						   IB_ACCESS_REMOTE_WRITE;
2401		qp_params.incoming_atomic_en = attr->qp_access_flags &
2402					       IB_ACCESS_REMOTE_ATOMIC;
2403	}
2404
2405	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2406		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2407			return -EINVAL;
2408
2409		if (attr_mask & IB_QP_PATH_MTU) {
2410			if (attr->path_mtu < IB_MTU_256 ||
2411			    attr->path_mtu > IB_MTU_4096) {
2412				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2413				rc = -EINVAL;
2414				goto err;
2415			}
2416			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2417				      ib_mtu_enum_to_int(iboe_get_mtu
2418							 (dev->ndev->mtu)));
2419		}
2420
2421		if (!qp->mtu) {
2422			qp->mtu =
2423			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2424			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2425		}
2426
2427		SET_FIELD(qp_params.modify_flags,
2428			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2429
2430		qp_params.traffic_class_tos = grh->traffic_class;
2431		qp_params.flow_label = grh->flow_label;
2432		qp_params.hop_limit_ttl = grh->hop_limit;
2433
2434		qp->sgid_idx = grh->sgid_index;
2435
2436		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2437		if (rc) {
2438			DP_ERR(dev,
2439			       "modify qp: problems with GID index %d (rc=%d)\n",
2440			       grh->sgid_index, rc);
2441			return rc;
2442		}
2443
2444		rc = qedr_get_dmac(dev, &attr->ah_attr,
2445				   qp_params.remote_mac_addr);
2446		if (rc)
2447			return rc;
2448
2449		qp_params.use_local_mac = true;
2450		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2451
2452		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2453			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2454			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2455		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2456			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2457			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2458		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2459			 qp_params.remote_mac_addr);
2460
2461		qp_params.mtu = qp->mtu;
2462		qp_params.lb_indication = false;
2463	}
2464
2465	if (!qp_params.mtu) {
2466		/* Stay with current MTU */
2467		if (qp->mtu)
2468			qp_params.mtu = qp->mtu;
2469		else
2470			qp_params.mtu =
2471			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2472	}
2473
2474	if (attr_mask & IB_QP_TIMEOUT) {
2475		SET_FIELD(qp_params.modify_flags,
2476			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2477
2478		/* The received timeout value is an exponent used like this:
2479		 *    "12.7.34 LOCAL ACK TIMEOUT
2480		 *    Value representing the transport (ACK) timeout for use by
2481		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2482		 * The FW expects timeout in msec so we need to divide the usec
2483		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2484		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2485		 * The value of zero means infinite so we use a 'max_t' to make
2486		 * sure that sub 1 msec values will be configured as 1 msec.
2487		 */
2488		if (attr->timeout)
2489			qp_params.ack_timeout =
2490					1 << max_t(int, attr->timeout - 8, 0);
2491		else
2492			qp_params.ack_timeout = 0;
2493	}
2494
2495	if (attr_mask & IB_QP_RETRY_CNT) {
2496		SET_FIELD(qp_params.modify_flags,
2497			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2498		qp_params.retry_cnt = attr->retry_cnt;
2499	}
2500
2501	if (attr_mask & IB_QP_RNR_RETRY) {
2502		SET_FIELD(qp_params.modify_flags,
2503			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2504		qp_params.rnr_retry_cnt = attr->rnr_retry;
2505	}
2506
2507	if (attr_mask & IB_QP_RQ_PSN) {
2508		SET_FIELD(qp_params.modify_flags,
2509			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2510		qp_params.rq_psn = attr->rq_psn;
2511		qp->rq_psn = attr->rq_psn;
2512	}
2513
2514	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2515		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2516			rc = -EINVAL;
2517			DP_ERR(dev,
2518			       "unsupported max_rd_atomic=%d, supported=%d\n",
2519			       attr->max_rd_atomic,
2520			       dev->attr.max_qp_req_rd_atomic_resc);
2521			goto err;
2522		}
2523
2524		SET_FIELD(qp_params.modify_flags,
2525			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2526		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2527	}
2528
2529	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2530		SET_FIELD(qp_params.modify_flags,
2531			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2532		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2533	}
2534
2535	if (attr_mask & IB_QP_SQ_PSN) {
2536		SET_FIELD(qp_params.modify_flags,
2537			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2538		qp_params.sq_psn = attr->sq_psn;
2539		qp->sq_psn = attr->sq_psn;
2540	}
2541
2542	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2543		if (attr->max_dest_rd_atomic >
2544		    dev->attr.max_qp_resp_rd_atomic_resc) {
2545			DP_ERR(dev,
2546			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2547			       attr->max_dest_rd_atomic,
2548			       dev->attr.max_qp_resp_rd_atomic_resc);
2549
2550			rc = -EINVAL;
2551			goto err;
2552		}
2553
2554		SET_FIELD(qp_params.modify_flags,
2555			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2556		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2557	}
2558
2559	if (attr_mask & IB_QP_DEST_QPN) {
2560		SET_FIELD(qp_params.modify_flags,
2561			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2562
2563		qp_params.dest_qp = attr->dest_qp_num;
2564		qp->dest_qp_num = attr->dest_qp_num;
2565	}
2566
2567	cur_state = qp->state;
2568
2569	/* Update the QP state before the actual ramrod to prevent a race with
2570	 * fast path. Modifying the QP state to error will cause the device to
2571	 * flush the CQEs and while polling the flushed CQEs will considered as
2572	 * a potential issue if the QP isn't in error state.
2573	 */
2574	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2575	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2576		qp->state = QED_ROCE_QP_STATE_ERR;
2577
2578	if (qp->qp_type != IB_QPT_GSI)
2579		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2580					      qp->qed_qp, &qp_params);
2581
2582	if (attr_mask & IB_QP_STATE) {
2583		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2584			rc = qedr_update_qp_state(dev, qp, cur_state,
2585						  qp_params.new_state);
2586		qp->state = qp_params.new_state;
2587	}
2588
2589err:
2590	return rc;
2591}
2592
2593static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2594{
2595	int ib_qp_acc_flags = 0;
2596
2597	if (params->incoming_rdma_write_en)
2598		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2599	if (params->incoming_rdma_read_en)
2600		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2601	if (params->incoming_atomic_en)
2602		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2603	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2604	return ib_qp_acc_flags;
2605}
2606
2607int qedr_query_qp(struct ib_qp *ibqp,
2608		  struct ib_qp_attr *qp_attr,
2609		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2610{
2611	struct qed_rdma_query_qp_out_params params;
2612	struct qedr_qp *qp = get_qedr_qp(ibqp);
2613	struct qedr_dev *dev = qp->dev;
2614	int rc = 0;
2615
2616	memset(&params, 0, sizeof(params));
2617
2618	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2619	if (rc)
2620		goto err;
2621
2622	memset(qp_attr, 0, sizeof(*qp_attr));
2623	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2624
2625	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2626	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2627	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2628	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2629	qp_attr->rq_psn = params.rq_psn;
2630	qp_attr->sq_psn = params.sq_psn;
2631	qp_attr->dest_qp_num = params.dest_qp;
2632
2633	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2634
2635	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2636	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2637	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2638	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2639	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2640	qp_init_attr->cap = qp_attr->cap;
2641
2642	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2643	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2644			params.flow_label, qp->sgid_idx,
2645			params.hop_limit_ttl, params.traffic_class_tos);
2646	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2647	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2648	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2649	qp_attr->timeout = params.timeout;
2650	qp_attr->rnr_retry = params.rnr_retry;
2651	qp_attr->retry_cnt = params.retry_cnt;
2652	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2653	qp_attr->pkey_index = params.pkey_index;
2654	qp_attr->port_num = 1;
2655	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2656	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2657	qp_attr->alt_pkey_index = 0;
2658	qp_attr->alt_port_num = 0;
2659	qp_attr->alt_timeout = 0;
2660	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2661
2662	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2663	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2664	qp_attr->max_rd_atomic = params.max_rd_atomic;
2665	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2666
2667	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2668		 qp_attr->cap.max_inline_data);
2669
2670err:
2671	return rc;
2672}
2673
2674static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
2675				  struct ib_udata *udata)
2676{
2677	struct qedr_ucontext *ctx =
2678		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
2679					  ibucontext);
2680	int rc;
2681
2682	if (qp->qp_type != IB_QPT_GSI) {
2683		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2684		if (rc)
2685			return rc;
2686	}
2687
2688	if (qp->create_type == QEDR_QP_CREATE_USER)
2689		qedr_cleanup_user(dev, ctx, qp);
2690	else
2691		qedr_cleanup_kernel(dev, qp);
2692
2693	return 0;
2694}
2695
2696int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
2697{
2698	struct qedr_qp *qp = get_qedr_qp(ibqp);
2699	struct qedr_dev *dev = qp->dev;
2700	struct ib_qp_attr attr;
2701	int attr_mask = 0;
2702
2703	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2704		 qp, qp->qp_type);
2705
2706	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2707		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2708		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2709		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2710
2711			attr.qp_state = IB_QPS_ERR;
2712			attr_mask |= IB_QP_STATE;
2713
2714			/* Change the QP state to ERROR */
2715			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2716		}
2717	} else {
2718		/* If connection establishment started the WAIT_FOR_CONNECT
2719		 * bit will be on and we need to Wait for the establishment
2720		 * to complete before destroying the qp.
2721		 */
2722		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
2723				     &qp->iwarp_cm_flags))
2724			wait_for_completion(&qp->iwarp_cm_comp);
2725
2726		/* If graceful disconnect started, the WAIT_FOR_DISCONNECT
2727		 * bit will be on, and we need to wait for the disconnect to
2728		 * complete before continuing. We can use the same completion,
2729		 * iwarp_cm_comp, since this is the only place that waits for
2730		 * this completion and it is sequential. In addition,
2731		 * disconnect can't occur before the connection is fully
2732		 * established, therefore if WAIT_FOR_DISCONNECT is on it
2733		 * means WAIT_FOR_CONNECT is also on and the completion for
2734		 * CONNECT already occurred.
2735		 */
2736		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
2737				     &qp->iwarp_cm_flags))
2738			wait_for_completion(&qp->iwarp_cm_comp);
2739	}
2740
2741	if (qp->qp_type == IB_QPT_GSI)
2742		qedr_destroy_gsi_qp(dev);
2743
2744	/* We need to remove the entry from the xarray before we release the
2745	 * qp_id to avoid a race of the qp_id being reallocated and failing
2746	 * on xa_insert
2747	 */
2748	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2749		xa_erase(&dev->qps, qp->qp_id);
2750
2751	qedr_free_qp_resources(dev, qp, udata);
2752
2753	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2754		qedr_iw_qp_rem_ref(&qp->ibqp);
2755
2756	return 0;
2757}
2758
2759int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
2760		   struct ib_udata *udata)
2761{
2762	struct qedr_ah *ah = get_qedr_ah(ibah);
2763
2764	rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr);
2765
2766	return 0;
2767}
2768
2769void qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2770{
2771	struct qedr_ah *ah = get_qedr_ah(ibah);
2772
2773	rdma_destroy_ah_attr(&ah->attr);
2774}
2775
2776static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2777{
2778	struct qedr_pbl *pbl, *tmp;
2779
2780	if (info->pbl_table)
2781		list_add_tail(&info->pbl_table->list_entry,
2782			      &info->free_pbl_list);
2783
2784	if (!list_empty(&info->inuse_pbl_list))
2785		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2786
2787	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2788		list_del(&pbl->list_entry);
2789		qedr_free_pbl(dev, &info->pbl_info, pbl);
2790	}
2791}
2792
2793static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2794			size_t page_list_len, bool two_layered)
2795{
2796	struct qedr_pbl *tmp;
2797	int rc;
2798
2799	INIT_LIST_HEAD(&info->free_pbl_list);
2800	INIT_LIST_HEAD(&info->inuse_pbl_list);
2801
2802	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2803				  page_list_len, two_layered);
2804	if (rc)
2805		goto done;
2806
2807	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2808	if (IS_ERR(info->pbl_table)) {
2809		rc = PTR_ERR(info->pbl_table);
2810		goto done;
2811	}
2812
2813	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2814		 &info->pbl_table->pa);
2815
2816	/* in usual case we use 2 PBLs, so we add one to free
2817	 * list and allocating another one
2818	 */
2819	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2820	if (IS_ERR(tmp)) {
2821		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2822		goto done;
2823	}
2824
2825	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2826
2827	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2828
2829done:
2830	if (rc)
2831		free_mr_info(dev, info);
2832
2833	return rc;
2834}
2835
2836struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2837			       u64 usr_addr, int acc, struct ib_udata *udata)
2838{
2839	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2840	struct qedr_mr *mr;
2841	struct qedr_pd *pd;
2842	int rc = -ENOMEM;
2843
2844	pd = get_qedr_pd(ibpd);
2845	DP_DEBUG(dev, QEDR_MSG_MR,
2846		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2847		 pd->pd_id, start, len, usr_addr, acc);
2848
2849	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2850		return ERR_PTR(-EINVAL);
2851
2852	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2853	if (!mr)
2854		return ERR_PTR(rc);
2855
2856	mr->type = QEDR_MR_USER;
2857
2858	mr->umem = ib_umem_get(ibpd->device, start, len, acc);
2859	if (IS_ERR(mr->umem)) {
2860		rc = -EFAULT;
2861		goto err0;
2862	}
2863
2864	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2865	if (rc)
2866		goto err1;
2867
2868	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2869			   &mr->info.pbl_info, PAGE_SHIFT);
2870
2871	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2872	if (rc) {
2873		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2874		goto err1;
2875	}
2876
2877	/* Index only, 18 bit long, lkey = itid << 8 | key */
2878	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2879	mr->hw_mr.key = 0;
2880	mr->hw_mr.pd = pd->pd_id;
2881	mr->hw_mr.local_read = 1;
2882	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2883	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2884	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2885	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2886	mr->hw_mr.mw_bind = false;
2887	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2888	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2889	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2890	mr->hw_mr.page_size_log = PAGE_SHIFT;
2891	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2892	mr->hw_mr.length = len;
2893	mr->hw_mr.vaddr = usr_addr;
2894	mr->hw_mr.zbva = false;
2895	mr->hw_mr.phy_mr = false;
2896	mr->hw_mr.dma_mr = false;
2897
2898	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2899	if (rc) {
2900		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2901		goto err2;
2902	}
2903
2904	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2905	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2906	    mr->hw_mr.remote_atomic)
2907		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2908
2909	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2910		 mr->ibmr.lkey);
2911	return &mr->ibmr;
2912
2913err2:
2914	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2915err1:
2916	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2917err0:
2918	kfree(mr);
2919	return ERR_PTR(rc);
2920}
2921
2922int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
2923{
2924	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2925	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2926	int rc = 0;
2927
2928	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2929	if (rc)
2930		return rc;
2931
2932	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2933
2934	if (mr->type != QEDR_MR_DMA)
2935		free_mr_info(dev, &mr->info);
2936
2937	/* it could be user registered memory. */
2938	ib_umem_release(mr->umem);
2939
2940	kfree(mr);
2941
2942	return rc;
2943}
2944
2945static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2946				       int max_page_list_len)
2947{
2948	struct qedr_pd *pd = get_qedr_pd(ibpd);
2949	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2950	struct qedr_mr *mr;
2951	int rc = -ENOMEM;
2952
2953	DP_DEBUG(dev, QEDR_MSG_MR,
2954		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2955		 max_page_list_len);
2956
2957	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2958	if (!mr)
2959		return ERR_PTR(rc);
2960
2961	mr->dev = dev;
2962	mr->type = QEDR_MR_FRMR;
2963
2964	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2965	if (rc)
2966		goto err0;
2967
2968	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2969	if (rc) {
2970		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2971		goto err0;
2972	}
2973
2974	/* Index only, 18 bit long, lkey = itid << 8 | key */
2975	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2976	mr->hw_mr.key = 0;
2977	mr->hw_mr.pd = pd->pd_id;
2978	mr->hw_mr.local_read = 1;
2979	mr->hw_mr.local_write = 0;
2980	mr->hw_mr.remote_read = 0;
2981	mr->hw_mr.remote_write = 0;
2982	mr->hw_mr.remote_atomic = 0;
2983	mr->hw_mr.mw_bind = false;
2984	mr->hw_mr.pbl_ptr = 0;
2985	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2986	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2987	mr->hw_mr.fbo = 0;
2988	mr->hw_mr.length = 0;
2989	mr->hw_mr.vaddr = 0;
2990	mr->hw_mr.zbva = false;
2991	mr->hw_mr.phy_mr = true;
2992	mr->hw_mr.dma_mr = false;
2993
2994	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2995	if (rc) {
2996		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2997		goto err1;
2998	}
2999
3000	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3001	mr->ibmr.rkey = mr->ibmr.lkey;
3002
3003	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
3004	return mr;
3005
3006err1:
3007	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3008err0:
3009	kfree(mr);
3010	return ERR_PTR(rc);
3011}
3012
3013struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
3014			    u32 max_num_sg)
3015{
3016	struct qedr_mr *mr;
3017
3018	if (mr_type != IB_MR_TYPE_MEM_REG)
3019		return ERR_PTR(-EINVAL);
3020
3021	mr = __qedr_alloc_mr(ibpd, max_num_sg);
3022
3023	if (IS_ERR(mr))
3024		return ERR_PTR(-EINVAL);
3025
3026	return &mr->ibmr;
3027}
3028
3029static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
3030{
3031	struct qedr_mr *mr = get_qedr_mr(ibmr);
3032	struct qedr_pbl *pbl_table;
3033	struct regpair *pbe;
3034	u32 pbes_in_page;
3035
3036	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
3037		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
3038		return -ENOMEM;
3039	}
3040
3041	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
3042		 mr->npages, addr);
3043
3044	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
3045	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
3046	pbe = (struct regpair *)pbl_table->va;
3047	pbe +=  mr->npages % pbes_in_page;
3048	pbe->lo = cpu_to_le32((u32)addr);
3049	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
3050
3051	mr->npages++;
3052
3053	return 0;
3054}
3055
3056static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
3057{
3058	int work = info->completed - info->completed_handled - 1;
3059
3060	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
3061	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
3062		struct qedr_pbl *pbl;
3063
3064		/* Free all the page list that are possible to be freed
3065		 * (all the ones that were invalidated), under the assumption
3066		 * that if an FMR was completed successfully that means that
3067		 * if there was an invalidate operation before it also ended
3068		 */
3069		pbl = list_first_entry(&info->inuse_pbl_list,
3070				       struct qedr_pbl, list_entry);
3071		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
3072		info->completed_handled++;
3073	}
3074}
3075
3076int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
3077		   int sg_nents, unsigned int *sg_offset)
3078{
3079	struct qedr_mr *mr = get_qedr_mr(ibmr);
3080
3081	mr->npages = 0;
3082
3083	handle_completed_mrs(mr->dev, &mr->info);
3084	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
3085}
3086
3087struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
3088{
3089	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3090	struct qedr_pd *pd = get_qedr_pd(ibpd);
3091	struct qedr_mr *mr;
3092	int rc;
3093
3094	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3095	if (!mr)
3096		return ERR_PTR(-ENOMEM);
3097
3098	mr->type = QEDR_MR_DMA;
3099
3100	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3101	if (rc) {
3102		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
3103		goto err1;
3104	}
3105
3106	/* index only, 18 bit long, lkey = itid << 8 | key */
3107	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
3108	mr->hw_mr.pd = pd->pd_id;
3109	mr->hw_mr.local_read = 1;
3110	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
3111	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
3112	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
3113	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
3114	mr->hw_mr.dma_mr = true;
3115
3116	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3117	if (rc) {
3118		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3119		goto err2;
3120	}
3121
3122	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3123	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3124	    mr->hw_mr.remote_atomic)
3125		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3126
3127	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
3128	return &mr->ibmr;
3129
3130err2:
3131	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3132err1:
3133	kfree(mr);
3134	return ERR_PTR(rc);
3135}
3136
3137static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
3138{
3139	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
3140}
3141
3142static int sge_data_len(struct ib_sge *sg_list, int num_sge)
3143{
3144	int i, len = 0;
3145
3146	for (i = 0; i < num_sge; i++)
3147		len += sg_list[i].length;
3148
3149	return len;
3150}
3151
3152static void swap_wqe_data64(u64 *p)
3153{
3154	int i;
3155
3156	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
3157		*p = cpu_to_be64(cpu_to_le64(*p));
3158}
3159
3160static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
3161				       struct qedr_qp *qp, u8 *wqe_size,
3162				       const struct ib_send_wr *wr,
3163				       const struct ib_send_wr **bad_wr,
3164				       u8 *bits, u8 bit)
3165{
3166	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
3167	char *seg_prt, *wqe;
3168	int i, seg_siz;
3169
3170	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
3171		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
3172		*bad_wr = wr;
3173		return 0;
3174	}
3175
3176	if (!data_size)
3177		return data_size;
3178
3179	*bits |= bit;
3180
3181	seg_prt = NULL;
3182	wqe = NULL;
3183	seg_siz = 0;
3184
3185	/* Copy data inline */
3186	for (i = 0; i < wr->num_sge; i++) {
3187		u32 len = wr->sg_list[i].length;
3188		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
3189
3190		while (len > 0) {
3191			u32 cur;
3192
3193			/* New segment required */
3194			if (!seg_siz) {
3195				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
3196				seg_prt = wqe;
3197				seg_siz = sizeof(struct rdma_sq_common_wqe);
3198				(*wqe_size)++;
3199			}
3200
3201			/* Calculate currently allowed length */
3202			cur = min_t(u32, len, seg_siz);
3203			memcpy(seg_prt, src, cur);
3204
3205			/* Update segment variables */
3206			seg_prt += cur;
3207			seg_siz -= cur;
3208
3209			/* Update sge variables */
3210			src += cur;
3211			len -= cur;
3212
3213			/* Swap fully-completed segments */
3214			if (!seg_siz)
3215				swap_wqe_data64((u64 *)wqe);
3216		}
3217	}
3218
3219	/* swap last not completed segment */
3220	if (seg_siz)
3221		swap_wqe_data64((u64 *)wqe);
3222
3223	return data_size;
3224}
3225
3226#define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
3227	do {							\
3228		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3229		(sge)->length = cpu_to_le32(vlength);		\
3230		(sge)->flags = cpu_to_le32(vflags);		\
3231	} while (0)
3232
3233#define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
3234	do {							\
3235		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
3236		(hdr)->num_sges = num_sge;			\
3237	} while (0)
3238
3239#define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
3240	do {							\
3241		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3242		(sge)->length = cpu_to_le32(vlength);		\
3243		(sge)->l_key = cpu_to_le32(vlkey);		\
3244	} while (0)
3245
3246static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
3247				const struct ib_send_wr *wr)
3248{
3249	u32 data_size = 0;
3250	int i;
3251
3252	for (i = 0; i < wr->num_sge; i++) {
3253		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
3254
3255		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
3256		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3257		sge->length = cpu_to_le32(wr->sg_list[i].length);
3258		data_size += wr->sg_list[i].length;
3259	}
3260
3261	if (wqe_size)
3262		*wqe_size += wr->num_sge;
3263
3264	return data_size;
3265}
3266
3267static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3268				     struct qedr_qp *qp,
3269				     struct rdma_sq_rdma_wqe_1st *rwqe,
3270				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
3271				     const struct ib_send_wr *wr,
3272				     const struct ib_send_wr **bad_wr)
3273{
3274	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3275	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3276
3277	if (wr->send_flags & IB_SEND_INLINE &&
3278	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3279	     wr->opcode == IB_WR_RDMA_WRITE)) {
3280		u8 flags = 0;
3281
3282		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3283		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3284						   bad_wr, &rwqe->flags, flags);
3285	}
3286
3287	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3288}
3289
3290static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3291				     struct qedr_qp *qp,
3292				     struct rdma_sq_send_wqe_1st *swqe,
3293				     struct rdma_sq_send_wqe_2st *swqe2,
3294				     const struct ib_send_wr *wr,
3295				     const struct ib_send_wr **bad_wr)
3296{
3297	memset(swqe2, 0, sizeof(*swqe2));
3298	if (wr->send_flags & IB_SEND_INLINE) {
3299		u8 flags = 0;
3300
3301		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3302		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3303						   bad_wr, &swqe->flags, flags);
3304	}
3305
3306	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3307}
3308
3309static int qedr_prepare_reg(struct qedr_qp *qp,
3310			    struct rdma_sq_fmr_wqe_1st *fwqe1,
3311			    const struct ib_reg_wr *wr)
3312{
3313	struct qedr_mr *mr = get_qedr_mr(wr->mr);
3314	struct rdma_sq_fmr_wqe_2nd *fwqe2;
3315
3316	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3317	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3318	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3319	fwqe1->l_key = wr->key;
3320
3321	fwqe2->access_ctrl = 0;
3322
3323	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3324		   !!(wr->access & IB_ACCESS_REMOTE_READ));
3325	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3326		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3327	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3328		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3329	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3330	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3331		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3332	fwqe2->fmr_ctrl = 0;
3333
3334	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3335		   ilog2(mr->ibmr.page_size) - 12);
3336
3337	fwqe2->length_hi = 0;
3338	fwqe2->length_lo = mr->ibmr.length;
3339	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3340	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3341
3342	qp->wqe_wr_id[qp->sq.prod].mr = mr;
3343
3344	return 0;
3345}
3346
3347static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3348{
3349	switch (opcode) {
3350	case IB_WR_RDMA_WRITE:
3351	case IB_WR_RDMA_WRITE_WITH_IMM:
3352		return IB_WC_RDMA_WRITE;
3353	case IB_WR_SEND_WITH_IMM:
3354	case IB_WR_SEND:
3355	case IB_WR_SEND_WITH_INV:
3356		return IB_WC_SEND;
3357	case IB_WR_RDMA_READ:
3358	case IB_WR_RDMA_READ_WITH_INV:
3359		return IB_WC_RDMA_READ;
3360	case IB_WR_ATOMIC_CMP_AND_SWP:
3361		return IB_WC_COMP_SWAP;
3362	case IB_WR_ATOMIC_FETCH_AND_ADD:
3363		return IB_WC_FETCH_ADD;
3364	case IB_WR_REG_MR:
3365		return IB_WC_REG_MR;
3366	case IB_WR_LOCAL_INV:
3367		return IB_WC_LOCAL_INV;
3368	default:
3369		return IB_WC_SEND;
3370	}
3371}
3372
3373static inline bool qedr_can_post_send(struct qedr_qp *qp,
3374				      const struct ib_send_wr *wr)
3375{
3376	int wq_is_full, err_wr, pbl_is_full;
3377	struct qedr_dev *dev = qp->dev;
3378
3379	/* prevent SQ overflow and/or processing of a bad WR */
3380	err_wr = wr->num_sge > qp->sq.max_sges;
3381	wq_is_full = qedr_wq_is_full(&qp->sq);
3382	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3383		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3384	if (wq_is_full || err_wr || pbl_is_full) {
3385		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3386			DP_ERR(dev,
3387			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3388			       qp);
3389			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3390		}
3391
3392		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3393			DP_ERR(dev,
3394			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3395			       qp);
3396			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3397		}
3398
3399		if (pbl_is_full &&
3400		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3401			DP_ERR(dev,
3402			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3403			       qp);
3404			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3405		}
3406		return false;
3407	}
3408	return true;
3409}
3410
3411static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3412			    const struct ib_send_wr **bad_wr)
3413{
3414	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3415	struct qedr_qp *qp = get_qedr_qp(ibqp);
3416	struct rdma_sq_atomic_wqe_1st *awqe1;
3417	struct rdma_sq_atomic_wqe_2nd *awqe2;
3418	struct rdma_sq_atomic_wqe_3rd *awqe3;
3419	struct rdma_sq_send_wqe_2st *swqe2;
3420	struct rdma_sq_local_inv_wqe *iwqe;
3421	struct rdma_sq_rdma_wqe_2nd *rwqe2;
3422	struct rdma_sq_send_wqe_1st *swqe;
3423	struct rdma_sq_rdma_wqe_1st *rwqe;
3424	struct rdma_sq_fmr_wqe_1st *fwqe1;
3425	struct rdma_sq_common_wqe *wqe;
3426	u32 length;
3427	int rc = 0;
3428	bool comp;
3429
3430	if (!qedr_can_post_send(qp, wr)) {
3431		*bad_wr = wr;
3432		return -ENOMEM;
3433	}
3434
3435	wqe = qed_chain_produce(&qp->sq.pbl);
3436	qp->wqe_wr_id[qp->sq.prod].signaled =
3437		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3438
3439	wqe->flags = 0;
3440	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3441		   !!(wr->send_flags & IB_SEND_SOLICITED));
3442	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3443	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3444	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3445		   !!(wr->send_flags & IB_SEND_FENCE));
3446	wqe->prev_wqe_size = qp->prev_wqe_size;
3447
3448	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3449
3450	switch (wr->opcode) {
3451	case IB_WR_SEND_WITH_IMM:
3452		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3453			rc = -EINVAL;
3454			*bad_wr = wr;
3455			break;
3456		}
3457		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3458		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3459		swqe->wqe_size = 2;
3460		swqe2 = qed_chain_produce(&qp->sq.pbl);
3461
3462		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3463		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3464						   wr, bad_wr);
3465		swqe->length = cpu_to_le32(length);
3466		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3467		qp->prev_wqe_size = swqe->wqe_size;
3468		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3469		break;
3470	case IB_WR_SEND:
3471		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3472		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3473
3474		swqe->wqe_size = 2;
3475		swqe2 = qed_chain_produce(&qp->sq.pbl);
3476		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3477						   wr, bad_wr);
3478		swqe->length = cpu_to_le32(length);
3479		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3480		qp->prev_wqe_size = swqe->wqe_size;
3481		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3482		break;
3483	case IB_WR_SEND_WITH_INV:
3484		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3485		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3486		swqe2 = qed_chain_produce(&qp->sq.pbl);
3487		swqe->wqe_size = 2;
3488		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3489		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3490						   wr, bad_wr);
3491		swqe->length = cpu_to_le32(length);
3492		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3493		qp->prev_wqe_size = swqe->wqe_size;
3494		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3495		break;
3496
3497	case IB_WR_RDMA_WRITE_WITH_IMM:
3498		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3499			rc = -EINVAL;
3500			*bad_wr = wr;
3501			break;
3502		}
3503		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3504		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3505
3506		rwqe->wqe_size = 2;
3507		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3508		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3509		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3510						   wr, bad_wr);
3511		rwqe->length = cpu_to_le32(length);
3512		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3513		qp->prev_wqe_size = rwqe->wqe_size;
3514		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3515		break;
3516	case IB_WR_RDMA_WRITE:
3517		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3518		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3519
3520		rwqe->wqe_size = 2;
3521		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3522		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3523						   wr, bad_wr);
3524		rwqe->length = cpu_to_le32(length);
3525		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3526		qp->prev_wqe_size = rwqe->wqe_size;
3527		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3528		break;
3529	case IB_WR_RDMA_READ_WITH_INV:
3530		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3531		fallthrough;	/* same is identical to RDMA READ */
3532
3533	case IB_WR_RDMA_READ:
3534		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3535		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3536
3537		rwqe->wqe_size = 2;
3538		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3539		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3540						   wr, bad_wr);
3541		rwqe->length = cpu_to_le32(length);
3542		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3543		qp->prev_wqe_size = rwqe->wqe_size;
3544		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3545		break;
3546
3547	case IB_WR_ATOMIC_CMP_AND_SWP:
3548	case IB_WR_ATOMIC_FETCH_AND_ADD:
3549		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3550		awqe1->wqe_size = 4;
3551
3552		awqe2 = qed_chain_produce(&qp->sq.pbl);
3553		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3554		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3555
3556		awqe3 = qed_chain_produce(&qp->sq.pbl);
3557
3558		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3559			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3560			DMA_REGPAIR_LE(awqe3->swap_data,
3561				       atomic_wr(wr)->compare_add);
3562		} else {
3563			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3564			DMA_REGPAIR_LE(awqe3->swap_data,
3565				       atomic_wr(wr)->swap);
3566			DMA_REGPAIR_LE(awqe3->cmp_data,
3567				       atomic_wr(wr)->compare_add);
3568		}
3569
3570		qedr_prepare_sq_sges(qp, NULL, wr);
3571
3572		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3573		qp->prev_wqe_size = awqe1->wqe_size;
3574		break;
3575
3576	case IB_WR_LOCAL_INV:
3577		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3578		iwqe->wqe_size = 1;
3579
3580		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3581		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3582		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3583		qp->prev_wqe_size = iwqe->wqe_size;
3584		break;
3585	case IB_WR_REG_MR:
3586		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3587		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3588		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3589		fwqe1->wqe_size = 2;
3590
3591		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3592		if (rc) {
3593			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3594			*bad_wr = wr;
3595			break;
3596		}
3597
3598		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3599		qp->prev_wqe_size = fwqe1->wqe_size;
3600		break;
3601	default:
3602		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3603		rc = -EINVAL;
3604		*bad_wr = wr;
3605		break;
3606	}
3607
3608	if (*bad_wr) {
3609		u16 value;
3610
3611		/* Restore prod to its position before
3612		 * this WR was processed
3613		 */
3614		value = le16_to_cpu(qp->sq.db_data.data.value);
3615		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3616
3617		/* Restore prev_wqe_size */
3618		qp->prev_wqe_size = wqe->prev_wqe_size;
3619		rc = -EINVAL;
3620		DP_ERR(dev, "POST SEND FAILED\n");
3621	}
3622
3623	return rc;
3624}
3625
3626int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3627		   const struct ib_send_wr **bad_wr)
3628{
3629	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3630	struct qedr_qp *qp = get_qedr_qp(ibqp);
3631	unsigned long flags;
3632	int rc = 0;
3633
3634	*bad_wr = NULL;
3635
3636	if (qp->qp_type == IB_QPT_GSI)
3637		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3638
3639	spin_lock_irqsave(&qp->q_lock, flags);
3640
3641	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3642		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3643		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3644		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3645			spin_unlock_irqrestore(&qp->q_lock, flags);
3646			*bad_wr = wr;
3647			DP_DEBUG(dev, QEDR_MSG_CQ,
3648				 "QP in wrong state! QP icid=0x%x state %d\n",
3649				 qp->icid, qp->state);
3650			return -EINVAL;
3651		}
3652	}
3653
3654	while (wr) {
3655		rc = __qedr_post_send(ibqp, wr, bad_wr);
3656		if (rc)
3657			break;
3658
3659		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3660
3661		qedr_inc_sw_prod(&qp->sq);
3662
3663		qp->sq.db_data.data.value++;
3664
3665		wr = wr->next;
3666	}
3667
3668	/* Trigger doorbell
3669	 * If there was a failure in the first WR then it will be triggered in
3670	 * vane. However this is not harmful (as long as the producer value is
3671	 * unchanged). For performance reasons we avoid checking for this
3672	 * redundant doorbell.
3673	 *
3674	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3675	 * soon as we give the doorbell, we could get a completion
3676	 * for this wr, therefore we need to make sure that the
3677	 * memory is updated before giving the doorbell.
3678	 * During qedr_poll_cq, rmb is called before accessing the
3679	 * cqe. This covers for the smp_rmb as well.
3680	 */
3681	smp_wmb();
3682	writel(qp->sq.db_data.raw, qp->sq.db);
3683
3684	spin_unlock_irqrestore(&qp->q_lock, flags);
3685
3686	return rc;
3687}
3688
3689static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3690{
3691	u32 used;
3692
3693	/* Calculate number of elements used based on producer
3694	 * count and consumer count and subtract it from max
3695	 * work request supported so that we get elements left.
3696	 */
3697	used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt);
3698
3699	return hw_srq->max_wr - used;
3700}
3701
3702int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3703		       const struct ib_recv_wr **bad_wr)
3704{
3705	struct qedr_srq *srq = get_qedr_srq(ibsrq);
3706	struct qedr_srq_hwq_info *hw_srq;
3707	struct qedr_dev *dev = srq->dev;
3708	struct qed_chain *pbl;
3709	unsigned long flags;
3710	int status = 0;
3711	u32 num_sge;
3712
3713	spin_lock_irqsave(&srq->lock, flags);
3714
3715	hw_srq = &srq->hw_srq;
3716	pbl = &srq->hw_srq.pbl;
3717	while (wr) {
3718		struct rdma_srq_wqe_header *hdr;
3719		int i;
3720
3721		if (!qedr_srq_elem_left(hw_srq) ||
3722		    wr->num_sge > srq->hw_srq.max_sges) {
3723			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
3724			       hw_srq->wr_prod_cnt,
3725			       atomic_read(&hw_srq->wr_cons_cnt),
3726			       wr->num_sge, srq->hw_srq.max_sges);
3727			status = -ENOMEM;
3728			*bad_wr = wr;
3729			break;
3730		}
3731
3732		hdr = qed_chain_produce(pbl);
3733		num_sge = wr->num_sge;
3734		/* Set number of sge and work request id in header */
3735		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3736
3737		srq->hw_srq.wr_prod_cnt++;
3738		hw_srq->wqe_prod++;
3739		hw_srq->sge_prod++;
3740
3741		DP_DEBUG(dev, QEDR_MSG_SRQ,
3742			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3743			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3744
3745		for (i = 0; i < wr->num_sge; i++) {
3746			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3747
3748			/* Set SGE length, lkey and address */
3749			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3750				    wr->sg_list[i].length, wr->sg_list[i].lkey);
3751
3752			DP_DEBUG(dev, QEDR_MSG_SRQ,
3753				 "[%d]: len %d key %x addr %x:%x\n",
3754				 i, srq_sge->length, srq_sge->l_key,
3755				 srq_sge->addr.hi, srq_sge->addr.lo);
3756			hw_srq->sge_prod++;
3757		}
3758
3759		/* Update WQE and SGE information before
3760		 * updating producer.
3761		 */
3762		dma_wmb();
3763
3764		/* SRQ producer is 8 bytes. Need to update SGE producer index
3765		 * in first 4 bytes and need to update WQE producer in
3766		 * next 4 bytes.
3767		 */
3768		srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod;
3769		/* Make sure sge producer is updated first */
3770		dma_wmb();
3771		srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod;
3772
3773		wr = wr->next;
3774	}
3775
3776	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3777		 qed_chain_get_elem_left(pbl));
3778	spin_unlock_irqrestore(&srq->lock, flags);
3779
3780	return status;
3781}
3782
3783int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3784		   const struct ib_recv_wr **bad_wr)
3785{
3786	struct qedr_qp *qp = get_qedr_qp(ibqp);
3787	struct qedr_dev *dev = qp->dev;
3788	unsigned long flags;
3789	int status = 0;
3790
3791	if (qp->qp_type == IB_QPT_GSI)
3792		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3793
3794	spin_lock_irqsave(&qp->q_lock, flags);
3795
3796	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3797		spin_unlock_irqrestore(&qp->q_lock, flags);
3798		*bad_wr = wr;
3799		return -EINVAL;
3800	}
3801
3802	while (wr) {
3803		int i;
3804
3805		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3806		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3807		    wr->num_sge > qp->rq.max_sges) {
3808			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3809			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3810			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3811			       qp->rq.max_sges);
3812			status = -ENOMEM;
3813			*bad_wr = wr;
3814			break;
3815		}
3816		for (i = 0; i < wr->num_sge; i++) {
3817			u32 flags = 0;
3818			struct rdma_rq_sge *rqe =
3819			    qed_chain_produce(&qp->rq.pbl);
3820
3821			/* First one must include the number
3822			 * of SGE in the list
3823			 */
3824			if (!i)
3825				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3826					  wr->num_sge);
3827
3828			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3829				  wr->sg_list[i].lkey);
3830
3831			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3832				   wr->sg_list[i].length, flags);
3833		}
3834
3835		/* Special case of no sges. FW requires between 1-4 sges...
3836		 * in this case we need to post 1 sge with length zero. this is
3837		 * because rdma write with immediate consumes an RQ.
3838		 */
3839		if (!wr->num_sge) {
3840			u32 flags = 0;
3841			struct rdma_rq_sge *rqe =
3842			    qed_chain_produce(&qp->rq.pbl);
3843
3844			/* First one must include the number
3845			 * of SGE in the list
3846			 */
3847			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3848			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3849
3850			RQ_SGE_SET(rqe, 0, 0, flags);
3851			i = 1;
3852		}
3853
3854		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3855		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3856
3857		qedr_inc_sw_prod(&qp->rq);
3858
3859		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3860		 * soon as we give the doorbell, we could get a completion
3861		 * for this wr, therefore we need to make sure that the
3862		 * memory is update before giving the doorbell.
3863		 * During qedr_poll_cq, rmb is called before accessing the
3864		 * cqe. This covers for the smp_rmb as well.
3865		 */
3866		smp_wmb();
3867
3868		qp->rq.db_data.data.value++;
3869
3870		writel(qp->rq.db_data.raw, qp->rq.db);
3871
3872		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3873			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3874		}
3875
3876		wr = wr->next;
3877	}
3878
3879	spin_unlock_irqrestore(&qp->q_lock, flags);
3880
3881	return status;
3882}
3883
3884static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3885{
3886	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3887
3888	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3889		cq->pbl_toggle;
3890}
3891
3892static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3893{
3894	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3895	struct qedr_qp *qp;
3896
3897	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3898						   resp_cqe->qp_handle.lo,
3899						   u64);
3900	return qp;
3901}
3902
3903static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3904{
3905	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3906
3907	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3908}
3909
3910/* Return latest CQE (needs processing) */
3911static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3912{
3913	return cq->latest_cqe;
3914}
3915
3916/* In fmr we need to increase the number of fmr completed counter for the fmr
3917 * algorithm determining whether we can free a pbl or not.
3918 * we need to perform this whether the work request was signaled or not. for
3919 * this purpose we call this function from the condition that checks if a wr
3920 * should be skipped, to make sure we don't miss it ( possibly this fmr
3921 * operation was not signalted)
3922 */
3923static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3924{
3925	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3926		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3927}
3928
3929static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3930		       struct qedr_cq *cq, int num_entries,
3931		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3932		       int force)
3933{
3934	u16 cnt = 0;
3935
3936	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3937		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3938			qedr_chk_if_fmr(qp);
3939			/* skip WC */
3940			goto next_cqe;
3941		}
3942
3943		/* fill WC */
3944		wc->status = status;
3945		wc->vendor_err = 0;
3946		wc->wc_flags = 0;
3947		wc->src_qp = qp->id;
3948		wc->qp = &qp->ibqp;
3949
3950		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3951		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3952
3953		switch (wc->opcode) {
3954		case IB_WC_RDMA_WRITE:
3955			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3956			break;
3957		case IB_WC_COMP_SWAP:
3958		case IB_WC_FETCH_ADD:
3959			wc->byte_len = 8;
3960			break;
3961		case IB_WC_REG_MR:
3962			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3963			break;
3964		case IB_WC_RDMA_READ:
3965		case IB_WC_SEND:
3966			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3967			break;
3968		default:
3969			break;
3970		}
3971
3972		num_entries--;
3973		wc++;
3974		cnt++;
3975next_cqe:
3976		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3977			qed_chain_consume(&qp->sq.pbl);
3978		qedr_inc_sw_cons(&qp->sq);
3979	}
3980
3981	return cnt;
3982}
3983
3984static int qedr_poll_cq_req(struct qedr_dev *dev,
3985			    struct qedr_qp *qp, struct qedr_cq *cq,
3986			    int num_entries, struct ib_wc *wc,
3987			    struct rdma_cqe_requester *req)
3988{
3989	int cnt = 0;
3990
3991	switch (req->status) {
3992	case RDMA_CQE_REQ_STS_OK:
3993		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3994				  IB_WC_SUCCESS, 0);
3995		break;
3996	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3997		if (qp->state != QED_ROCE_QP_STATE_ERR)
3998			DP_DEBUG(dev, QEDR_MSG_CQ,
3999				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4000				 cq->icid, qp->icid);
4001		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
4002				  IB_WC_WR_FLUSH_ERR, 1);
4003		break;
4004	default:
4005		/* process all WQE before the cosumer */
4006		qp->state = QED_ROCE_QP_STATE_ERR;
4007		cnt = process_req(dev, qp, cq, num_entries, wc,
4008				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
4009		wc += cnt;
4010		/* if we have extra WC fill it with actual error info */
4011		if (cnt < num_entries) {
4012			enum ib_wc_status wc_status;
4013
4014			switch (req->status) {
4015			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
4016				DP_ERR(dev,
4017				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4018				       cq->icid, qp->icid);
4019				wc_status = IB_WC_BAD_RESP_ERR;
4020				break;
4021			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
4022				DP_ERR(dev,
4023				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4024				       cq->icid, qp->icid);
4025				wc_status = IB_WC_LOC_LEN_ERR;
4026				break;
4027			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
4028				DP_ERR(dev,
4029				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4030				       cq->icid, qp->icid);
4031				wc_status = IB_WC_LOC_QP_OP_ERR;
4032				break;
4033			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
4034				DP_ERR(dev,
4035				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4036				       cq->icid, qp->icid);
4037				wc_status = IB_WC_LOC_PROT_ERR;
4038				break;
4039			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
4040				DP_ERR(dev,
4041				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4042				       cq->icid, qp->icid);
4043				wc_status = IB_WC_MW_BIND_ERR;
4044				break;
4045			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
4046				DP_ERR(dev,
4047				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4048				       cq->icid, qp->icid);
4049				wc_status = IB_WC_REM_INV_REQ_ERR;
4050				break;
4051			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
4052				DP_ERR(dev,
4053				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4054				       cq->icid, qp->icid);
4055				wc_status = IB_WC_REM_ACCESS_ERR;
4056				break;
4057			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
4058				DP_ERR(dev,
4059				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4060				       cq->icid, qp->icid);
4061				wc_status = IB_WC_REM_OP_ERR;
4062				break;
4063			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
4064				DP_ERR(dev,
4065				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4066				       cq->icid, qp->icid);
4067				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
4068				break;
4069			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
4070				DP_ERR(dev,
4071				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4072				       cq->icid, qp->icid);
4073				wc_status = IB_WC_RETRY_EXC_ERR;
4074				break;
4075			default:
4076				DP_ERR(dev,
4077				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4078				       cq->icid, qp->icid);
4079				wc_status = IB_WC_GENERAL_ERR;
4080			}
4081			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
4082					   wc_status, 1);
4083		}
4084	}
4085
4086	return cnt;
4087}
4088
4089static inline int qedr_cqe_resp_status_to_ib(u8 status)
4090{
4091	switch (status) {
4092	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
4093		return IB_WC_LOC_ACCESS_ERR;
4094	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
4095		return IB_WC_LOC_LEN_ERR;
4096	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
4097		return IB_WC_LOC_QP_OP_ERR;
4098	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
4099		return IB_WC_LOC_PROT_ERR;
4100	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
4101		return IB_WC_MW_BIND_ERR;
4102	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
4103		return IB_WC_REM_INV_RD_REQ_ERR;
4104	case RDMA_CQE_RESP_STS_OK:
4105		return IB_WC_SUCCESS;
4106	default:
4107		return IB_WC_GENERAL_ERR;
4108	}
4109}
4110
4111static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
4112					  struct ib_wc *wc)
4113{
4114	wc->status = IB_WC_SUCCESS;
4115	wc->byte_len = le32_to_cpu(resp->length);
4116
4117	if (resp->flags & QEDR_RESP_IMM) {
4118		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
4119		wc->wc_flags |= IB_WC_WITH_IMM;
4120
4121		if (resp->flags & QEDR_RESP_RDMA)
4122			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
4123
4124		if (resp->flags & QEDR_RESP_INV)
4125			return -EINVAL;
4126
4127	} else if (resp->flags & QEDR_RESP_INV) {
4128		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
4129		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
4130
4131		if (resp->flags & QEDR_RESP_RDMA)
4132			return -EINVAL;
4133
4134	} else if (resp->flags & QEDR_RESP_RDMA) {
4135		return -EINVAL;
4136	}
4137
4138	return 0;
4139}
4140
4141static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4142			       struct qedr_cq *cq, struct ib_wc *wc,
4143			       struct rdma_cqe_responder *resp, u64 wr_id)
4144{
4145	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
4146	wc->opcode = IB_WC_RECV;
4147	wc->wc_flags = 0;
4148
4149	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
4150		if (qedr_set_ok_cqe_resp_wc(resp, wc))
4151			DP_ERR(dev,
4152			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
4153			       cq, cq->icid, resp->flags);
4154
4155	} else {
4156		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
4157		if (wc->status == IB_WC_GENERAL_ERR)
4158			DP_ERR(dev,
4159			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
4160			       cq, cq->icid, resp->status);
4161	}
4162
4163	/* Fill the rest of the WC */
4164	wc->vendor_err = 0;
4165	wc->src_qp = qp->id;
4166	wc->qp = &qp->ibqp;
4167	wc->wr_id = wr_id;
4168}
4169
4170static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4171				struct qedr_cq *cq, struct ib_wc *wc,
4172				struct rdma_cqe_responder *resp)
4173{
4174	struct qedr_srq *srq = qp->srq;
4175	u64 wr_id;
4176
4177	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
4178			 le32_to_cpu(resp->srq_wr_id.lo), u64);
4179
4180	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4181		wc->status = IB_WC_WR_FLUSH_ERR;
4182		wc->vendor_err = 0;
4183		wc->wr_id = wr_id;
4184		wc->byte_len = 0;
4185		wc->src_qp = qp->id;
4186		wc->qp = &qp->ibqp;
4187		wc->wr_id = wr_id;
4188	} else {
4189		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4190	}
4191	atomic_inc(&srq->hw_srq.wr_cons_cnt);
4192
4193	return 1;
4194}
4195static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4196			    struct qedr_cq *cq, struct ib_wc *wc,
4197			    struct rdma_cqe_responder *resp)
4198{
4199	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4200
4201	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4202
4203	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4204		qed_chain_consume(&qp->rq.pbl);
4205	qedr_inc_sw_cons(&qp->rq);
4206
4207	return 1;
4208}
4209
4210static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
4211			      int num_entries, struct ib_wc *wc, u16 hw_cons)
4212{
4213	u16 cnt = 0;
4214
4215	while (num_entries && qp->rq.wqe_cons != hw_cons) {
4216		/* fill WC */
4217		wc->status = IB_WC_WR_FLUSH_ERR;
4218		wc->vendor_err = 0;
4219		wc->wc_flags = 0;
4220		wc->src_qp = qp->id;
4221		wc->byte_len = 0;
4222		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4223		wc->qp = &qp->ibqp;
4224		num_entries--;
4225		wc++;
4226		cnt++;
4227		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4228			qed_chain_consume(&qp->rq.pbl);
4229		qedr_inc_sw_cons(&qp->rq);
4230	}
4231
4232	return cnt;
4233}
4234
4235static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4236				 struct rdma_cqe_responder *resp, int *update)
4237{
4238	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
4239		consume_cqe(cq);
4240		*update |= 1;
4241	}
4242}
4243
4244static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4245				 struct qedr_cq *cq, int num_entries,
4246				 struct ib_wc *wc,
4247				 struct rdma_cqe_responder *resp)
4248{
4249	int cnt;
4250
4251	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4252	consume_cqe(cq);
4253
4254	return cnt;
4255}
4256
4257static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4258			     struct qedr_cq *cq, int num_entries,
4259			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
4260			     int *update)
4261{
4262	int cnt;
4263
4264	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4265		cnt = process_resp_flush(qp, cq, num_entries, wc,
4266					 resp->rq_cons_or_srq_id);
4267		try_consume_resp_cqe(cq, qp, resp, update);
4268	} else {
4269		cnt = process_resp_one(dev, qp, cq, wc, resp);
4270		consume_cqe(cq);
4271		*update |= 1;
4272	}
4273
4274	return cnt;
4275}
4276
4277static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4278				struct rdma_cqe_requester *req, int *update)
4279{
4280	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4281		consume_cqe(cq);
4282		*update |= 1;
4283	}
4284}
4285
4286int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4287{
4288	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4289	struct qedr_cq *cq = get_qedr_cq(ibcq);
4290	union rdma_cqe *cqe;
4291	u32 old_cons, new_cons;
4292	unsigned long flags;
4293	int update = 0;
4294	int done = 0;
4295
4296	if (cq->destroyed) {
4297		DP_ERR(dev,
4298		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4299		       cq, cq->icid);
4300		return 0;
4301	}
4302
4303	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4304		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4305
4306	spin_lock_irqsave(&cq->cq_lock, flags);
4307	cqe = cq->latest_cqe;
4308	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4309	while (num_entries && is_valid_cqe(cq, cqe)) {
4310		struct qedr_qp *qp;
4311		int cnt = 0;
4312
4313		/* prevent speculative reads of any field of CQE */
4314		rmb();
4315
4316		qp = cqe_get_qp(cqe);
4317		if (!qp) {
4318			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4319			break;
4320		}
4321
4322		wc->qp = &qp->ibqp;
4323
4324		switch (cqe_get_type(cqe)) {
4325		case RDMA_CQE_TYPE_REQUESTER:
4326			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4327					       &cqe->req);
4328			try_consume_req_cqe(cq, qp, &cqe->req, &update);
4329			break;
4330		case RDMA_CQE_TYPE_RESPONDER_RQ:
4331			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4332						&cqe->resp, &update);
4333			break;
4334		case RDMA_CQE_TYPE_RESPONDER_SRQ:
4335			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4336						    wc, &cqe->resp);
4337			update = 1;
4338			break;
4339		case RDMA_CQE_TYPE_INVALID:
4340		default:
4341			DP_ERR(dev, "Error: invalid CQE type = %d\n",
4342			       cqe_get_type(cqe));
4343		}
4344		num_entries -= cnt;
4345		wc += cnt;
4346		done += cnt;
4347
4348		cqe = get_cqe(cq);
4349	}
4350	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4351
4352	cq->cq_cons += new_cons - old_cons;
4353
4354	if (update)
4355		/* doorbell notifies abount latest VALID entry,
4356		 * but chain already point to the next INVALID one
4357		 */
4358		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4359
4360	spin_unlock_irqrestore(&cq->cq_lock, flags);
4361	return done;
4362}
4363
4364int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4365		     u8 port_num, const struct ib_wc *in_wc,
4366		     const struct ib_grh *in_grh, const struct ib_mad *in,
4367		     struct ib_mad *out_mad, size_t *out_mad_size,
4368		     u16 *out_mad_pkey_index)
4369{
4370	return IB_MAD_RESULT_SUCCESS;
4371}