Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/* QLogic qedr NIC Driver
   2 * Copyright (c) 2015-2016  QLogic Corporation
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and /or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/dma-mapping.h>
  33#include <linux/crc32.h>
  34#include <net/ip.h>
  35#include <net/ipv6.h>
  36#include <net/udp.h>
  37#include <linux/iommu.h>
  38
  39#include <rdma/ib_verbs.h>
  40#include <rdma/ib_user_verbs.h>
  41#include <rdma/iw_cm.h>
  42#include <rdma/ib_umem.h>
  43#include <rdma/ib_addr.h>
  44#include <rdma/ib_cache.h>
  45
  46#include <linux/qed/common_hsi.h>
  47#include "qedr_hsi_rdma.h"
  48#include <linux/qed/qed_if.h>
  49#include "qedr.h"
  50#include "verbs.h"
  51#include <rdma/qedr-abi.h>
  52#include "qedr_roce_cm.h"
  53
  54#define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
  55
  56static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
  57					size_t len)
  58{
  59	size_t min_len = min_t(size_t, len, udata->outlen);
  60
  61	return ib_copy_to_udata(udata, src, min_len);
  62}
  63
  64int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
  65{
  66	if (index > QEDR_ROCE_PKEY_TABLE_LEN)
  67		return -EINVAL;
  68
  69	*pkey = QEDR_ROCE_PKEY_DEFAULT;
  70	return 0;
  71}
  72
  73int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
  74		      int index, union ib_gid *sgid)
  75{
  76	struct qedr_dev *dev = get_qedr_dev(ibdev);
  77
  78	memset(sgid->raw, 0, sizeof(sgid->raw));
  79	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
  80
  81	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
  82		 sgid->global.interface_id, sgid->global.subnet_prefix);
  83
  84	return 0;
  85}
  86
  87int qedr_query_device(struct ib_device *ibdev,
  88		      struct ib_device_attr *attr, struct ib_udata *udata)
  89{
  90	struct qedr_dev *dev = get_qedr_dev(ibdev);
  91	struct qedr_device_attr *qattr = &dev->attr;
  92
  93	if (!dev->rdma_ctx) {
  94		DP_ERR(dev,
  95		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
  96		       dev->rdma_ctx);
  97		return -EINVAL;
  98	}
  99
 100	memset(attr, 0, sizeof(*attr));
 101
 102	attr->fw_ver = qattr->fw_ver;
 103	attr->sys_image_guid = qattr->sys_image_guid;
 104	attr->max_mr_size = qattr->max_mr_size;
 105	attr->page_size_cap = qattr->page_size_caps;
 106	attr->vendor_id = qattr->vendor_id;
 107	attr->vendor_part_id = qattr->vendor_part_id;
 108	attr->hw_ver = qattr->hw_ver;
 109	attr->max_qp = qattr->max_qp;
 110	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
 111	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
 112	    IB_DEVICE_RC_RNR_NAK_GEN |
 113	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
 114
 115	attr->max_sge = qattr->max_sge;
 116	attr->max_sge_rd = qattr->max_sge;
 117	attr->max_cq = qattr->max_cq;
 118	attr->max_cqe = qattr->max_cqe;
 119	attr->max_mr = qattr->max_mr;
 120	attr->max_mw = qattr->max_mw;
 121	attr->max_pd = qattr->max_pd;
 122	attr->atomic_cap = dev->atomic_cap;
 123	attr->max_fmr = qattr->max_fmr;
 124	attr->max_map_per_fmr = 16;
 125	attr->max_qp_init_rd_atom =
 126	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
 127	attr->max_qp_rd_atom =
 128	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
 129		attr->max_qp_init_rd_atom);
 130
 131	attr->max_srq = qattr->max_srq;
 132	attr->max_srq_sge = qattr->max_srq_sge;
 133	attr->max_srq_wr = qattr->max_srq_wr;
 134
 135	attr->local_ca_ack_delay = qattr->dev_ack_delay;
 136	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
 137	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
 138	attr->max_ah = qattr->max_ah;
 139
 140	return 0;
 141}
 142
 143#define QEDR_SPEED_SDR		(1)
 144#define QEDR_SPEED_DDR		(2)
 145#define QEDR_SPEED_QDR		(4)
 146#define QEDR_SPEED_FDR10	(8)
 147#define QEDR_SPEED_FDR		(16)
 148#define QEDR_SPEED_EDR		(32)
 149
 150static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
 151					    u8 *ib_width)
 152{
 153	switch (speed) {
 154	case 1000:
 155		*ib_speed = QEDR_SPEED_SDR;
 156		*ib_width = IB_WIDTH_1X;
 157		break;
 158	case 10000:
 159		*ib_speed = QEDR_SPEED_QDR;
 160		*ib_width = IB_WIDTH_1X;
 161		break;
 162
 163	case 20000:
 164		*ib_speed = QEDR_SPEED_DDR;
 165		*ib_width = IB_WIDTH_4X;
 166		break;
 167
 168	case 25000:
 169		*ib_speed = QEDR_SPEED_EDR;
 170		*ib_width = IB_WIDTH_1X;
 171		break;
 172
 173	case 40000:
 174		*ib_speed = QEDR_SPEED_QDR;
 175		*ib_width = IB_WIDTH_4X;
 176		break;
 177
 178	case 50000:
 179		*ib_speed = QEDR_SPEED_QDR;
 180		*ib_width = IB_WIDTH_4X;
 181		break;
 182
 183	case 100000:
 184		*ib_speed = QEDR_SPEED_EDR;
 185		*ib_width = IB_WIDTH_4X;
 186		break;
 187
 188	default:
 189		/* Unsupported */
 190		*ib_speed = QEDR_SPEED_SDR;
 191		*ib_width = IB_WIDTH_1X;
 192	}
 193}
 194
 195int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
 196{
 197	struct qedr_dev *dev;
 198	struct qed_rdma_port *rdma_port;
 199
 200	dev = get_qedr_dev(ibdev);
 201	if (port > 1) {
 202		DP_ERR(dev, "invalid_port=0x%x\n", port);
 203		return -EINVAL;
 204	}
 205
 206	if (!dev->rdma_ctx) {
 207		DP_ERR(dev, "rdma_ctx is NULL\n");
 208		return -EINVAL;
 209	}
 210
 211	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
 212
 213	/* *attr being zeroed by the caller, avoid zeroing it here */
 214	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
 215		attr->state = IB_PORT_ACTIVE;
 216		attr->phys_state = 5;
 217	} else {
 218		attr->state = IB_PORT_DOWN;
 219		attr->phys_state = 3;
 220	}
 221	attr->max_mtu = IB_MTU_4096;
 222	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
 223	attr->lid = 0;
 224	attr->lmc = 0;
 225	attr->sm_lid = 0;
 226	attr->sm_sl = 0;
 227	attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
 228	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
 229		attr->gid_tbl_len = 1;
 230		attr->pkey_tbl_len = 1;
 231	} else {
 232		attr->gid_tbl_len = QEDR_MAX_SGID;
 233		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
 234	}
 235	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
 236	attr->qkey_viol_cntr = 0;
 237	get_link_speed_and_width(rdma_port->link_speed,
 238				 &attr->active_speed, &attr->active_width);
 239	attr->max_msg_sz = rdma_port->max_msg_size;
 240	attr->max_vl_num = 4;
 241
 242	return 0;
 243}
 244
 245int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
 246		     struct ib_port_modify *props)
 247{
 248	struct qedr_dev *dev;
 249
 250	dev = get_qedr_dev(ibdev);
 251	if (port > 1) {
 252		DP_ERR(dev, "invalid_port=0x%x\n", port);
 253		return -EINVAL;
 254	}
 255
 256	return 0;
 257}
 258
 259static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
 260			 unsigned long len)
 261{
 262	struct qedr_mm *mm;
 263
 264	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
 265	if (!mm)
 266		return -ENOMEM;
 267
 268	mm->key.phy_addr = phy_addr;
 269	/* This function might be called with a length which is not a multiple
 270	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
 271	 * forces this granularity by increasing the requested size if needed.
 272	 * When qedr_mmap is called, it will search the list with the updated
 273	 * length as a key. To prevent search failures, the length is rounded up
 274	 * in advance to PAGE_SIZE.
 275	 */
 276	mm->key.len = roundup(len, PAGE_SIZE);
 277	INIT_LIST_HEAD(&mm->entry);
 278
 279	mutex_lock(&uctx->mm_list_lock);
 280	list_add(&mm->entry, &uctx->mm_head);
 281	mutex_unlock(&uctx->mm_list_lock);
 282
 283	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
 284		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
 285		 (unsigned long long)mm->key.phy_addr,
 286		 (unsigned long)mm->key.len, uctx);
 287
 288	return 0;
 289}
 290
 291static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
 292			     unsigned long len)
 293{
 294	bool found = false;
 295	struct qedr_mm *mm;
 296
 297	mutex_lock(&uctx->mm_list_lock);
 298	list_for_each_entry(mm, &uctx->mm_head, entry) {
 299		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
 300			continue;
 301
 302		found = true;
 303		break;
 304	}
 305	mutex_unlock(&uctx->mm_list_lock);
 306	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
 307		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
 308		 mm->key.phy_addr, mm->key.len, uctx, found);
 309
 310	return found;
 311}
 312
 313struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
 314					struct ib_udata *udata)
 315{
 316	int rc;
 317	struct qedr_ucontext *ctx;
 318	struct qedr_alloc_ucontext_resp uresp;
 319	struct qedr_dev *dev = get_qedr_dev(ibdev);
 320	struct qed_rdma_add_user_out_params oparams;
 321
 322	if (!udata)
 323		return ERR_PTR(-EFAULT);
 324
 325	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 326	if (!ctx)
 327		return ERR_PTR(-ENOMEM);
 328
 329	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
 330	if (rc) {
 331		DP_ERR(dev,
 332		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
 333		       rc);
 334		goto err;
 335	}
 336
 337	ctx->dpi = oparams.dpi;
 338	ctx->dpi_addr = oparams.dpi_addr;
 339	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
 340	ctx->dpi_size = oparams.dpi_size;
 341	INIT_LIST_HEAD(&ctx->mm_head);
 342	mutex_init(&ctx->mm_list_lock);
 343
 344	memset(&uresp, 0, sizeof(uresp));
 345
 346	uresp.dpm_enabled = dev->user_dpm_enabled;
 347	uresp.wids_enabled = 1;
 348	uresp.wid_count = oparams.wid_count;
 349	uresp.db_pa = ctx->dpi_phys_addr;
 350	uresp.db_size = ctx->dpi_size;
 351	uresp.max_send_wr = dev->attr.max_sqe;
 352	uresp.max_recv_wr = dev->attr.max_rqe;
 353	uresp.max_srq_wr = dev->attr.max_srq_wr;
 354	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
 355	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
 356	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
 357	uresp.max_cqes = QEDR_MAX_CQES;
 358
 359	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 360	if (rc)
 361		goto err;
 362
 363	ctx->dev = dev;
 364
 365	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
 366	if (rc)
 367		goto err;
 368
 369	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
 370		 &ctx->ibucontext);
 371	return &ctx->ibucontext;
 372
 373err:
 374	kfree(ctx);
 375	return ERR_PTR(rc);
 376}
 377
 378int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
 379{
 380	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
 381	struct qedr_mm *mm, *tmp;
 382	int status = 0;
 383
 384	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
 385		 uctx);
 386	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
 387
 388	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
 389		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
 390			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
 391			 mm->key.phy_addr, mm->key.len, uctx);
 392		list_del(&mm->entry);
 393		kfree(mm);
 394	}
 395
 396	kfree(uctx);
 397	return status;
 398}
 399
 400int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 401{
 402	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
 403	struct qedr_dev *dev = get_qedr_dev(context->device);
 404	unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
 405	unsigned long len = (vma->vm_end - vma->vm_start);
 406	unsigned long dpi_start;
 407
 408	dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
 409
 410	DP_DEBUG(dev, QEDR_MSG_INIT,
 411		 "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
 412		 (void *)vma->vm_start, (void *)vma->vm_end,
 413		 (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
 414
 415	if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
 416		DP_ERR(dev,
 417		       "failed mmap, adrresses must be page aligned: start=0x%pK, end=0x%pK\n",
 418		       (void *)vma->vm_start, (void *)vma->vm_end);
 419		return -EINVAL;
 420	}
 421
 422	if (!qedr_search_mmap(ucontext, phys_addr, len)) {
 423		DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
 424		       vma->vm_pgoff);
 425		return -EINVAL;
 426	}
 427
 428	if (phys_addr < dpi_start ||
 429	    ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
 430		DP_ERR(dev,
 431		       "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
 432		       (void *)phys_addr, (void *)dpi_start,
 433		       ucontext->dpi_size);
 434		return -EINVAL;
 435	}
 436
 437	if (vma->vm_flags & VM_READ) {
 438		DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
 439		return -EINVAL;
 440	}
 441
 442	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 443	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
 444				  vma->vm_page_prot);
 445}
 446
 447struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
 448			    struct ib_ucontext *context, struct ib_udata *udata)
 449{
 450	struct qedr_dev *dev = get_qedr_dev(ibdev);
 451	struct qedr_pd *pd;
 452	u16 pd_id;
 453	int rc;
 454
 455	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
 456		 (udata && context) ? "User Lib" : "Kernel");
 457
 458	if (!dev->rdma_ctx) {
 459		DP_ERR(dev, "invalid RDMA context\n");
 460		return ERR_PTR(-EINVAL);
 461	}
 462
 463	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
 464	if (!pd)
 465		return ERR_PTR(-ENOMEM);
 466
 467	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
 468	if (rc)
 469		goto err;
 470
 471	pd->pd_id = pd_id;
 472
 473	if (udata && context) {
 474		struct qedr_alloc_pd_uresp uresp = {
 475			.pd_id = pd_id,
 476		};
 477
 478		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 479		if (rc) {
 480			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
 481			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
 482			goto err;
 483		}
 484
 485		pd->uctx = get_qedr_ucontext(context);
 486		pd->uctx->pd = pd;
 487	}
 488
 489	return &pd->ibpd;
 490
 491err:
 492	kfree(pd);
 493	return ERR_PTR(rc);
 494}
 495
 496int qedr_dealloc_pd(struct ib_pd *ibpd)
 497{
 498	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
 499	struct qedr_pd *pd = get_qedr_pd(ibpd);
 500
 501	if (!pd) {
 502		pr_err("Invalid PD received in dealloc_pd\n");
 503		return -EINVAL;
 504	}
 505
 506	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
 507	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
 508
 509	kfree(pd);
 510
 511	return 0;
 512}
 513
 514static void qedr_free_pbl(struct qedr_dev *dev,
 515			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
 516{
 517	struct pci_dev *pdev = dev->pdev;
 518	int i;
 519
 520	for (i = 0; i < pbl_info->num_pbls; i++) {
 521		if (!pbl[i].va)
 522			continue;
 523		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
 524				  pbl[i].va, pbl[i].pa);
 525	}
 526
 527	kfree(pbl);
 528}
 529
 530#define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
 531#define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
 532
 533#define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
 534#define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
 535#define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
 536
 537static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
 538					   struct qedr_pbl_info *pbl_info,
 539					   gfp_t flags)
 540{
 541	struct pci_dev *pdev = dev->pdev;
 542	struct qedr_pbl *pbl_table;
 543	dma_addr_t *pbl_main_tbl;
 544	dma_addr_t pa;
 545	void *va;
 546	int i;
 547
 548	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
 549	if (!pbl_table)
 550		return ERR_PTR(-ENOMEM);
 551
 552	for (i = 0; i < pbl_info->num_pbls; i++) {
 553		va = dma_zalloc_coherent(&pdev->dev, pbl_info->pbl_size,
 554					 &pa, flags);
 555		if (!va)
 556			goto err;
 557
 558		pbl_table[i].va = va;
 559		pbl_table[i].pa = pa;
 560	}
 561
 562	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
 563	 * the first one with physical pointers to all of the rest
 564	 */
 565	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
 566	for (i = 0; i < pbl_info->num_pbls - 1; i++)
 567		pbl_main_tbl[i] = pbl_table[i + 1].pa;
 568
 569	return pbl_table;
 570
 571err:
 572	for (i--; i >= 0; i--)
 573		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
 574				  pbl_table[i].va, pbl_table[i].pa);
 575
 576	qedr_free_pbl(dev, pbl_info, pbl_table);
 577
 578	return ERR_PTR(-ENOMEM);
 579}
 580
 581static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
 582				struct qedr_pbl_info *pbl_info,
 583				u32 num_pbes, int two_layer_capable)
 584{
 585	u32 pbl_capacity;
 586	u32 pbl_size;
 587	u32 num_pbls;
 588
 589	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
 590		if (num_pbes > MAX_PBES_TWO_LAYER) {
 591			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
 592			       num_pbes);
 593			return -EINVAL;
 594		}
 595
 596		/* calculate required pbl page size */
 597		pbl_size = MIN_FW_PBL_PAGE_SIZE;
 598		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
 599			       NUM_PBES_ON_PAGE(pbl_size);
 600
 601		while (pbl_capacity < num_pbes) {
 602			pbl_size *= 2;
 603			pbl_capacity = pbl_size / sizeof(u64);
 604			pbl_capacity = pbl_capacity * pbl_capacity;
 605		}
 606
 607		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
 608		num_pbls++;	/* One for the layer0 ( points to the pbls) */
 609		pbl_info->two_layered = true;
 610	} else {
 611		/* One layered PBL */
 612		num_pbls = 1;
 613		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
 614				 roundup_pow_of_two((num_pbes * sizeof(u64))));
 615		pbl_info->two_layered = false;
 616	}
 617
 618	pbl_info->num_pbls = num_pbls;
 619	pbl_info->pbl_size = pbl_size;
 620	pbl_info->num_pbes = num_pbes;
 621
 622	DP_DEBUG(dev, QEDR_MSG_MR,
 623		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
 624		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
 625
 626	return 0;
 627}
 628
 629static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
 630			       struct qedr_pbl *pbl,
 631			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
 632{
 633	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
 634	u32 fw_pg_cnt, fw_pg_per_umem_pg;
 635	struct qedr_pbl *pbl_tbl;
 636	struct scatterlist *sg;
 637	struct regpair *pbe;
 638	u64 pg_addr;
 639	int entry;
 640
 641	if (!pbl_info->num_pbes)
 642		return;
 643
 644	/* If we have a two layered pbl, the first pbl points to the rest
 645	 * of the pbls and the first entry lays on the second pbl in the table
 646	 */
 647	if (pbl_info->two_layered)
 648		pbl_tbl = &pbl[1];
 649	else
 650		pbl_tbl = pbl;
 651
 652	pbe = (struct regpair *)pbl_tbl->va;
 653	if (!pbe) {
 654		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
 655		return;
 656	}
 657
 658	pbe_cnt = 0;
 659
 660	shift = umem->page_shift;
 661
 662	fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
 663
 664	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
 665		pages = sg_dma_len(sg) >> shift;
 666		pg_addr = sg_dma_address(sg);
 667		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
 668			for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
 669				pbe->lo = cpu_to_le32(pg_addr);
 670				pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
 671
 672				pg_addr += BIT(pg_shift);
 673				pbe_cnt++;
 674				total_num_pbes++;
 675				pbe++;
 676
 677				if (total_num_pbes == pbl_info->num_pbes)
 678					return;
 679
 680				/* If the given pbl is full storing the pbes,
 681				 * move to next pbl.
 682				 */
 683				if (pbe_cnt ==
 684				    (pbl_info->pbl_size / sizeof(u64))) {
 685					pbl_tbl++;
 686					pbe = (struct regpair *)pbl_tbl->va;
 687					pbe_cnt = 0;
 688				}
 689
 690				fw_pg_cnt++;
 691			}
 692		}
 693	}
 694}
 695
 696static int qedr_copy_cq_uresp(struct qedr_dev *dev,
 697			      struct qedr_cq *cq, struct ib_udata *udata)
 698{
 699	struct qedr_create_cq_uresp uresp;
 700	int rc;
 701
 702	memset(&uresp, 0, sizeof(uresp));
 703
 704	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
 705	uresp.icid = cq->icid;
 706
 707	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 708	if (rc)
 709		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
 710
 711	return rc;
 712}
 713
 714static void consume_cqe(struct qedr_cq *cq)
 715{
 716	if (cq->latest_cqe == cq->toggle_cqe)
 717		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
 718
 719	cq->latest_cqe = qed_chain_consume(&cq->pbl);
 720}
 721
 722static inline int qedr_align_cq_entries(int entries)
 723{
 724	u64 size, aligned_size;
 725
 726	/* We allocate an extra entry that we don't report to the FW. */
 727	size = (entries + 1) * QEDR_CQE_SIZE;
 728	aligned_size = ALIGN(size, PAGE_SIZE);
 729
 730	return aligned_size / QEDR_CQE_SIZE;
 731}
 732
 733static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
 734				       struct qedr_dev *dev,
 735				       struct qedr_userq *q,
 736				       u64 buf_addr, size_t buf_len,
 737				       int access, int dmasync,
 738				       int alloc_and_init)
 739{
 740	u32 fw_pages;
 741	int rc;
 742
 743	q->buf_addr = buf_addr;
 744	q->buf_len = buf_len;
 745	q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
 746	if (IS_ERR(q->umem)) {
 747		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
 748		       PTR_ERR(q->umem));
 749		return PTR_ERR(q->umem);
 750	}
 751
 752	fw_pages = ib_umem_page_count(q->umem) <<
 753	    (q->umem->page_shift - FW_PAGE_SHIFT);
 754
 755	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
 756	if (rc)
 757		goto err0;
 758
 759	if (alloc_and_init) {
 760		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
 761		if (IS_ERR(q->pbl_tbl)) {
 762			rc = PTR_ERR(q->pbl_tbl);
 763			goto err0;
 764		}
 765		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
 766				   FW_PAGE_SHIFT);
 767	} else {
 768		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
 769		if (!q->pbl_tbl) {
 770			rc = -ENOMEM;
 771			goto err0;
 772		}
 773	}
 774
 775	return 0;
 776
 777err0:
 778	ib_umem_release(q->umem);
 779	q->umem = NULL;
 780
 781	return rc;
 782}
 783
 784static inline void qedr_init_cq_params(struct qedr_cq *cq,
 785				       struct qedr_ucontext *ctx,
 786				       struct qedr_dev *dev, int vector,
 787				       int chain_entries, int page_cnt,
 788				       u64 pbl_ptr,
 789				       struct qed_rdma_create_cq_in_params
 790				       *params)
 791{
 792	memset(params, 0, sizeof(*params));
 793	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
 794	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
 795	params->cnq_id = vector;
 796	params->cq_size = chain_entries - 1;
 797	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
 798	params->pbl_num_pages = page_cnt;
 799	params->pbl_ptr = pbl_ptr;
 800	params->pbl_two_level = 0;
 801}
 802
 803static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
 804{
 805	cq->db.data.agg_flags = flags;
 806	cq->db.data.value = cpu_to_le32(cons);
 807	writeq(cq->db.raw, cq->db_addr);
 808
 809	/* Make sure write would stick */
 810	mmiowb();
 811}
 812
 813int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 814{
 815	struct qedr_cq *cq = get_qedr_cq(ibcq);
 816	unsigned long sflags;
 817	struct qedr_dev *dev;
 818
 819	dev = get_qedr_dev(ibcq->device);
 820
 821	if (cq->destroyed) {
 822		DP_ERR(dev,
 823		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
 824		       cq, cq->icid);
 825		return -EINVAL;
 826	}
 827
 828
 829	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
 830		return 0;
 831
 832	spin_lock_irqsave(&cq->cq_lock, sflags);
 833
 834	cq->arm_flags = 0;
 835
 836	if (flags & IB_CQ_SOLICITED)
 837		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
 838
 839	if (flags & IB_CQ_NEXT_COMP)
 840		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
 841
 842	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
 843
 844	spin_unlock_irqrestore(&cq->cq_lock, sflags);
 845
 846	return 0;
 847}
 848
 849struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
 850			     const struct ib_cq_init_attr *attr,
 851			     struct ib_ucontext *ib_ctx, struct ib_udata *udata)
 852{
 853	struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
 854	struct qed_rdma_destroy_cq_out_params destroy_oparams;
 855	struct qed_rdma_destroy_cq_in_params destroy_iparams;
 856	struct qedr_dev *dev = get_qedr_dev(ibdev);
 857	struct qed_rdma_create_cq_in_params params;
 858	struct qedr_create_cq_ureq ureq;
 859	int vector = attr->comp_vector;
 860	int entries = attr->cqe;
 861	struct qedr_cq *cq;
 862	int chain_entries;
 863	int page_cnt;
 864	u64 pbl_ptr;
 865	u16 icid;
 866	int rc;
 867
 868	DP_DEBUG(dev, QEDR_MSG_INIT,
 869		 "create_cq: called from %s. entries=%d, vector=%d\n",
 870		 udata ? "User Lib" : "Kernel", entries, vector);
 871
 872	if (entries > QEDR_MAX_CQES) {
 873		DP_ERR(dev,
 874		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
 875		       entries, QEDR_MAX_CQES);
 876		return ERR_PTR(-EINVAL);
 877	}
 878
 879	chain_entries = qedr_align_cq_entries(entries);
 880	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
 881
 882	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
 883	if (!cq)
 884		return ERR_PTR(-ENOMEM);
 885
 886	if (udata) {
 887		memset(&ureq, 0, sizeof(ureq));
 888		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
 889			DP_ERR(dev,
 890			       "create cq: problem copying data from user space\n");
 891			goto err0;
 892		}
 893
 894		if (!ureq.len) {
 895			DP_ERR(dev,
 896			       "create cq: cannot create a cq with 0 entries\n");
 897			goto err0;
 898		}
 899
 900		cq->cq_type = QEDR_CQ_TYPE_USER;
 901
 902		rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
 903					  ureq.len, IB_ACCESS_LOCAL_WRITE,
 904					  1, 1);
 905		if (rc)
 906			goto err0;
 907
 908		pbl_ptr = cq->q.pbl_tbl->pa;
 909		page_cnt = cq->q.pbl_info.num_pbes;
 910
 911		cq->ibcq.cqe = chain_entries;
 912	} else {
 913		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
 914
 915		rc = dev->ops->common->chain_alloc(dev->cdev,
 916						   QED_CHAIN_USE_TO_CONSUME,
 917						   QED_CHAIN_MODE_PBL,
 918						   QED_CHAIN_CNT_TYPE_U32,
 919						   chain_entries,
 920						   sizeof(union rdma_cqe),
 921						   &cq->pbl, NULL);
 922		if (rc)
 923			goto err1;
 924
 925		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
 926		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
 927		cq->ibcq.cqe = cq->pbl.capacity;
 928	}
 929
 930	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
 931			    pbl_ptr, &params);
 932
 933	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
 934	if (rc)
 935		goto err2;
 936
 937	cq->icid = icid;
 938	cq->sig = QEDR_CQ_MAGIC_NUMBER;
 939	spin_lock_init(&cq->cq_lock);
 940
 941	if (ib_ctx) {
 942		rc = qedr_copy_cq_uresp(dev, cq, udata);
 943		if (rc)
 944			goto err3;
 945	} else {
 946		/* Generate doorbell address. */
 947		cq->db_addr = dev->db_addr +
 948		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
 949		cq->db.data.icid = cq->icid;
 950		cq->db.data.params = DB_AGG_CMD_SET <<
 951		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
 952
 953		/* point to the very last element, passing it we will toggle */
 954		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
 955		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
 956		cq->latest_cqe = NULL;
 957		consume_cqe(cq);
 958		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
 959	}
 960
 961	DP_DEBUG(dev, QEDR_MSG_CQ,
 962		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
 963		 cq->icid, cq, params.cq_size);
 964
 965	return &cq->ibcq;
 966
 967err3:
 968	destroy_iparams.icid = cq->icid;
 969	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
 970				  &destroy_oparams);
 971err2:
 972	if (udata)
 973		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
 974	else
 975		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
 976err1:
 977	if (udata)
 978		ib_umem_release(cq->q.umem);
 979err0:
 980	kfree(cq);
 981	return ERR_PTR(-EINVAL);
 982}
 983
 984int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
 985{
 986	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
 987	struct qedr_cq *cq = get_qedr_cq(ibcq);
 988
 989	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
 990
 991	return 0;
 992}
 993
 994#define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
 995#define QEDR_DESTROY_CQ_ITER_DURATION		(10)
 996
 997int qedr_destroy_cq(struct ib_cq *ibcq)
 998{
 999	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1000	struct qed_rdma_destroy_cq_out_params oparams;
1001	struct qed_rdma_destroy_cq_in_params iparams;
1002	struct qedr_cq *cq = get_qedr_cq(ibcq);
1003	int iter;
1004	int rc;
1005
1006	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1007
1008	cq->destroyed = 1;
1009
1010	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1011	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1012		goto done;
1013
1014	iparams.icid = cq->icid;
1015	rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1016	if (rc)
1017		return rc;
1018
1019	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1020
1021	if (ibcq->uobject && ibcq->uobject->context) {
1022		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1023		ib_umem_release(cq->q.umem);
1024	}
1025
1026	/* We don't want the IRQ handler to handle a non-existing CQ so we
1027	 * wait until all CNQ interrupts, if any, are received. This will always
1028	 * happen and will always happen very fast. If not, then a serious error
1029	 * has occured. That is why we can use a long delay.
1030	 * We spin for a short time so we don’t lose time on context switching
1031	 * in case all the completions are handled in that span. Otherwise
1032	 * we sleep for a while and check again. Since the CNQ may be
1033	 * associated with (only) the current CPU we use msleep to allow the
1034	 * current CPU to be freed.
1035	 * The CNQ notification is increased in qedr_irq_handler().
1036	 */
1037	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1038	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1039		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1040		iter--;
1041	}
1042
1043	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1044	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1045		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1046		iter--;
1047	}
1048
1049	if (oparams.num_cq_notif != cq->cnq_notif)
1050		goto err;
1051
1052	/* Note that we don't need to have explicit code to wait for the
1053	 * completion of the event handler because it is invoked from the EQ.
1054	 * Since the destroy CQ ramrod has also been received on the EQ we can
1055	 * be certain that there's no event handler in process.
1056	 */
1057done:
1058	cq->sig = ~cq->sig;
1059
1060	kfree(cq);
1061
1062	return 0;
1063
1064err:
1065	DP_ERR(dev,
1066	       "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1067	       cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1068
1069	return -EINVAL;
1070}
1071
1072static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1073					  struct ib_qp_attr *attr,
1074					  int attr_mask,
1075					  struct qed_rdma_modify_qp_in_params
1076					  *qp_params)
1077{
1078	enum rdma_network_type nw_type;
1079	struct ib_gid_attr gid_attr;
1080	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1081	union ib_gid gid;
1082	u32 ipv4_addr;
1083	int rc = 0;
1084	int i;
1085
1086	rc = ib_get_cached_gid(ibqp->device,
1087			       rdma_ah_get_port_num(&attr->ah_attr),
1088			       grh->sgid_index, &gid, &gid_attr);
1089	if (rc)
1090		return rc;
1091
1092	qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1093
1094	dev_put(gid_attr.ndev);
1095	nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1096	switch (nw_type) {
1097	case RDMA_NETWORK_IPV6:
1098		memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1099		       sizeof(qp_params->sgid));
1100		memcpy(&qp_params->dgid.bytes[0],
1101		       &grh->dgid,
1102		       sizeof(qp_params->dgid));
1103		qp_params->roce_mode = ROCE_V2_IPV6;
1104		SET_FIELD(qp_params->modify_flags,
1105			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1106		break;
1107	case RDMA_NETWORK_IB:
1108		memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1109		       sizeof(qp_params->sgid));
1110		memcpy(&qp_params->dgid.bytes[0],
1111		       &grh->dgid,
1112		       sizeof(qp_params->dgid));
1113		qp_params->roce_mode = ROCE_V1;
1114		break;
1115	case RDMA_NETWORK_IPV4:
1116		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1117		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1118		ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1119		qp_params->sgid.ipv4_addr = ipv4_addr;
1120		ipv4_addr =
1121		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1122		qp_params->dgid.ipv4_addr = ipv4_addr;
1123		SET_FIELD(qp_params->modify_flags,
1124			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1125		qp_params->roce_mode = ROCE_V2_IPV4;
1126		break;
1127	}
1128
1129	for (i = 0; i < 4; i++) {
1130		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1131		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1132	}
1133
1134	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1135		qp_params->vlan_id = 0;
1136
1137	return 0;
1138}
1139
1140static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1141			       struct ib_qp_init_attr *attrs)
1142{
1143	struct qedr_device_attr *qattr = &dev->attr;
1144
1145	/* QP0... attrs->qp_type == IB_QPT_GSI */
1146	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1147		DP_DEBUG(dev, QEDR_MSG_QP,
1148			 "create qp: unsupported qp type=0x%x requested\n",
1149			 attrs->qp_type);
1150		return -EINVAL;
1151	}
1152
1153	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1154		DP_ERR(dev,
1155		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1156		       attrs->cap.max_send_wr, qattr->max_sqe);
1157		return -EINVAL;
1158	}
1159
1160	if (attrs->cap.max_inline_data > qattr->max_inline) {
1161		DP_ERR(dev,
1162		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1163		       attrs->cap.max_inline_data, qattr->max_inline);
1164		return -EINVAL;
1165	}
1166
1167	if (attrs->cap.max_send_sge > qattr->max_sge) {
1168		DP_ERR(dev,
1169		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1170		       attrs->cap.max_send_sge, qattr->max_sge);
1171		return -EINVAL;
1172	}
1173
1174	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1175		DP_ERR(dev,
1176		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1177		       attrs->cap.max_recv_sge, qattr->max_sge);
1178		return -EINVAL;
1179	}
1180
1181	/* Unprivileged user space cannot create special QP */
1182	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1183		DP_ERR(dev,
1184		       "create qp: userspace can't create special QPs of type=0x%x\n",
1185		       attrs->qp_type);
1186		return -EINVAL;
1187	}
1188
1189	return 0;
1190}
1191
1192static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1193			       struct qedr_create_qp_uresp *uresp,
1194			       struct qedr_qp *qp)
1195{
1196	/* iWARP requires two doorbells per RQ. */
1197	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1198		uresp->rq_db_offset =
1199		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1200		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1201	} else {
1202		uresp->rq_db_offset =
1203		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1204	}
1205
1206	uresp->rq_icid = qp->icid;
1207}
1208
1209static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1210			       struct qedr_create_qp_uresp *uresp,
1211			       struct qedr_qp *qp)
1212{
1213	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1214
1215	/* iWARP uses the same cid for rq and sq */
1216	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1217		uresp->sq_icid = qp->icid;
1218	else
1219		uresp->sq_icid = qp->icid + 1;
1220}
1221
1222static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1223			      struct qedr_qp *qp, struct ib_udata *udata)
1224{
1225	struct qedr_create_qp_uresp uresp;
1226	int rc;
1227
1228	memset(&uresp, 0, sizeof(uresp));
1229	qedr_copy_sq_uresp(dev, &uresp, qp);
1230	qedr_copy_rq_uresp(dev, &uresp, qp);
1231
1232	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1233	uresp.qp_id = qp->qp_id;
1234
1235	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1236	if (rc)
1237		DP_ERR(dev,
1238		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1239		       qp->icid);
1240
1241	return rc;
1242}
1243
1244static void qedr_set_common_qp_params(struct qedr_dev *dev,
1245				      struct qedr_qp *qp,
1246				      struct qedr_pd *pd,
1247				      struct ib_qp_init_attr *attrs)
1248{
1249	spin_lock_init(&qp->q_lock);
1250	atomic_set(&qp->refcnt, 1);
1251	qp->pd = pd;
1252	qp->qp_type = attrs->qp_type;
1253	qp->max_inline_data = attrs->cap.max_inline_data;
1254	qp->sq.max_sges = attrs->cap.max_send_sge;
1255	qp->state = QED_ROCE_QP_STATE_RESET;
1256	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1257	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1258	qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1259	qp->dev = dev;
1260	qp->rq.max_sges = attrs->cap.max_recv_sge;
1261
1262	DP_DEBUG(dev, QEDR_MSG_QP,
1263		 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1264		 qp->rq.max_sges, qp->rq_cq->icid);
1265	DP_DEBUG(dev, QEDR_MSG_QP,
1266		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1267		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1268		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1269	DP_DEBUG(dev, QEDR_MSG_QP,
1270		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1271		 qp->sq.max_sges, qp->sq_cq->icid);
1272}
1273
1274static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1275{
1276	qp->sq.db = dev->db_addr +
1277		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1278	qp->sq.db_data.data.icid = qp->icid + 1;
1279	qp->rq.db = dev->db_addr +
1280		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1281	qp->rq.db_data.data.icid = qp->icid;
1282}
1283
1284static inline void
1285qedr_init_common_qp_in_params(struct qedr_dev *dev,
1286			      struct qedr_pd *pd,
1287			      struct qedr_qp *qp,
1288			      struct ib_qp_init_attr *attrs,
1289			      bool fmr_and_reserved_lkey,
1290			      struct qed_rdma_create_qp_in_params *params)
1291{
1292	/* QP handle to be written in an async event */
1293	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1294	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1295
1296	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1297	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1298	params->pd = pd->pd_id;
1299	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1300	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1301	params->stats_queue = 0;
1302	params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1303	params->srq_id = 0;
1304	params->use_srq = false;
1305}
1306
1307static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1308{
1309	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1310		 "qp=%p. "
1311		 "sq_addr=0x%llx, "
1312		 "sq_len=%zd, "
1313		 "rq_addr=0x%llx, "
1314		 "rq_len=%zd"
1315		 "\n",
1316		 qp,
1317		 qp->usq.buf_addr,
1318		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1319}
1320
1321static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id)
1322{
1323	int rc;
1324
1325	if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1326		return 0;
1327
1328	idr_preload(GFP_KERNEL);
1329	spin_lock_irq(&dev->idr_lock);
1330
1331	rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC);
1332
1333	spin_unlock_irq(&dev->idr_lock);
1334	idr_preload_end();
1335
1336	return rc < 0 ? rc : 0;
1337}
1338
1339static void qedr_idr_remove(struct qedr_dev *dev, u32 id)
1340{
1341	if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1342		return;
1343
1344	spin_lock_irq(&dev->idr_lock);
1345	idr_remove(&dev->qpidr, id);
1346	spin_unlock_irq(&dev->idr_lock);
1347}
1348
1349static inline void
1350qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1351			    struct qedr_qp *qp,
1352			    struct qed_rdma_create_qp_out_params *out_params)
1353{
1354	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1355	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1356
1357	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1358			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1359
1360	qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1361	qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1362
1363	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1364			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1365}
1366
1367static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1368{
1369	if (qp->usq.umem)
1370		ib_umem_release(qp->usq.umem);
1371	qp->usq.umem = NULL;
1372
1373	if (qp->urq.umem)
1374		ib_umem_release(qp->urq.umem);
1375	qp->urq.umem = NULL;
1376}
1377
1378static int qedr_create_user_qp(struct qedr_dev *dev,
1379			       struct qedr_qp *qp,
1380			       struct ib_pd *ibpd,
1381			       struct ib_udata *udata,
1382			       struct ib_qp_init_attr *attrs)
1383{
1384	struct qed_rdma_create_qp_in_params in_params;
1385	struct qed_rdma_create_qp_out_params out_params;
1386	struct qedr_pd *pd = get_qedr_pd(ibpd);
1387	struct ib_ucontext *ib_ctx = NULL;
1388	struct qedr_create_qp_ureq ureq;
1389	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1390	int rc = -EINVAL;
1391
1392	ib_ctx = ibpd->uobject->context;
1393
1394	memset(&ureq, 0, sizeof(ureq));
1395	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1396	if (rc) {
1397		DP_ERR(dev, "Problem copying data from user space\n");
1398		return rc;
1399	}
1400
1401	/* SQ - read access only (0), dma sync not required (0) */
1402	rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1403				  ureq.sq_len, 0, 0, alloc_and_init);
1404	if (rc)
1405		return rc;
1406
1407	/* RQ - read access only (0), dma sync not required (0) */
1408	rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1409				  ureq.rq_len, 0, 0, alloc_and_init);
1410	if (rc)
1411		return rc;
1412
1413	memset(&in_params, 0, sizeof(in_params));
1414	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1415	in_params.qp_handle_lo = ureq.qp_handle_lo;
1416	in_params.qp_handle_hi = ureq.qp_handle_hi;
1417	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1418	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1419	in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1420	in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1421
1422	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1423					      &in_params, &out_params);
1424
1425	if (!qp->qed_qp) {
1426		rc = -ENOMEM;
1427		goto err1;
1428	}
1429
1430	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1431		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1432
1433	qp->qp_id = out_params.qp_id;
1434	qp->icid = out_params.icid;
1435
1436	rc = qedr_copy_qp_uresp(dev, qp, udata);
1437	if (rc)
1438		goto err;
1439
1440	qedr_qp_user_print(dev, qp);
1441
1442	return 0;
1443err:
1444	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1445	if (rc)
1446		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1447
1448err1:
1449	qedr_cleanup_user(dev, qp);
1450	return rc;
1451}
1452
1453static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1454{
1455	qp->sq.db = dev->db_addr +
1456	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1457	qp->sq.db_data.data.icid = qp->icid;
1458
1459	qp->rq.db = dev->db_addr +
1460		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1461	qp->rq.db_data.data.icid = qp->icid;
1462	qp->rq.iwarp_db2 = dev->db_addr +
1463			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1464	qp->rq.iwarp_db2_data.data.icid = qp->icid;
1465	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1466}
1467
1468static int
1469qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1470			   struct qedr_qp *qp,
1471			   struct qed_rdma_create_qp_in_params *in_params,
1472			   u32 n_sq_elems, u32 n_rq_elems)
1473{
1474	struct qed_rdma_create_qp_out_params out_params;
1475	int rc;
1476
1477	rc = dev->ops->common->chain_alloc(dev->cdev,
1478					   QED_CHAIN_USE_TO_PRODUCE,
1479					   QED_CHAIN_MODE_PBL,
1480					   QED_CHAIN_CNT_TYPE_U32,
1481					   n_sq_elems,
1482					   QEDR_SQE_ELEMENT_SIZE,
1483					   &qp->sq.pbl, NULL);
1484
1485	if (rc)
1486		return rc;
1487
1488	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1489	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1490
1491	rc = dev->ops->common->chain_alloc(dev->cdev,
1492					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1493					   QED_CHAIN_MODE_PBL,
1494					   QED_CHAIN_CNT_TYPE_U32,
1495					   n_rq_elems,
1496					   QEDR_RQE_ELEMENT_SIZE,
1497					   &qp->rq.pbl, NULL);
1498	if (rc)
1499		return rc;
1500
1501	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1502	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1503
1504	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1505					      in_params, &out_params);
1506
1507	if (!qp->qed_qp)
1508		return -EINVAL;
1509
1510	qp->qp_id = out_params.qp_id;
1511	qp->icid = out_params.icid;
1512
1513	qedr_set_roce_db_info(dev, qp);
1514	return rc;
1515}
1516
1517static int
1518qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1519			    struct qedr_qp *qp,
1520			    struct qed_rdma_create_qp_in_params *in_params,
1521			    u32 n_sq_elems, u32 n_rq_elems)
1522{
1523	struct qed_rdma_create_qp_out_params out_params;
1524	struct qed_chain_ext_pbl ext_pbl;
1525	int rc;
1526
1527	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1528						     QEDR_SQE_ELEMENT_SIZE,
1529						     QED_CHAIN_MODE_PBL);
1530	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1531						     QEDR_RQE_ELEMENT_SIZE,
1532						     QED_CHAIN_MODE_PBL);
1533
1534	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1535					      in_params, &out_params);
1536
1537	if (!qp->qed_qp)
1538		return -EINVAL;
1539
1540	/* Now we allocate the chain */
1541	ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1542	ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1543
1544	rc = dev->ops->common->chain_alloc(dev->cdev,
1545					   QED_CHAIN_USE_TO_PRODUCE,
1546					   QED_CHAIN_MODE_PBL,
1547					   QED_CHAIN_CNT_TYPE_U32,
1548					   n_sq_elems,
1549					   QEDR_SQE_ELEMENT_SIZE,
1550					   &qp->sq.pbl, &ext_pbl);
1551
1552	if (rc)
1553		goto err;
1554
1555	ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1556	ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1557
1558	rc = dev->ops->common->chain_alloc(dev->cdev,
1559					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1560					   QED_CHAIN_MODE_PBL,
1561					   QED_CHAIN_CNT_TYPE_U32,
1562					   n_rq_elems,
1563					   QEDR_RQE_ELEMENT_SIZE,
1564					   &qp->rq.pbl, &ext_pbl);
1565
1566	if (rc)
1567		goto err;
1568
1569	qp->qp_id = out_params.qp_id;
1570	qp->icid = out_params.icid;
1571
1572	qedr_set_iwarp_db_info(dev, qp);
1573	return rc;
1574
1575err:
1576	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1577
1578	return rc;
1579}
1580
1581static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1582{
1583	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1584	kfree(qp->wqe_wr_id);
1585
1586	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1587	kfree(qp->rqe_wr_id);
1588}
1589
1590static int qedr_create_kernel_qp(struct qedr_dev *dev,
1591				 struct qedr_qp *qp,
1592				 struct ib_pd *ibpd,
1593				 struct ib_qp_init_attr *attrs)
1594{
1595	struct qed_rdma_create_qp_in_params in_params;
1596	struct qedr_pd *pd = get_qedr_pd(ibpd);
1597	int rc = -EINVAL;
1598	u32 n_rq_elems;
1599	u32 n_sq_elems;
1600	u32 n_sq_entries;
1601
1602	memset(&in_params, 0, sizeof(in_params));
1603
1604	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1605	 * the ring. The ring should allow at least a single WR, even if the
1606	 * user requested none, due to allocation issues.
1607	 * We should add an extra WR since the prod and cons indices of
1608	 * wqe_wr_id are managed in such a way that the WQ is considered full
1609	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1610	 * double the number of entries due an iSER issue that pushes far more
1611	 * WRs than indicated. If we decline its ib_post_send() then we get
1612	 * error prints in the dmesg we'd like to avoid.
1613	 */
1614	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1615			      dev->attr.max_sqe);
1616
1617	qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
1618				GFP_KERNEL);
1619	if (!qp->wqe_wr_id) {
1620		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1621		return -ENOMEM;
1622	}
1623
1624	/* QP handle to be written in CQE */
1625	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1626	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1627
1628	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1629	 * the ring. There ring should allow at least a single WR, even if the
1630	 * user requested none, due to allocation issues.
1631	 */
1632	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1633
1634	/* Allocate driver internal RQ array */
1635	qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
1636				GFP_KERNEL);
1637	if (!qp->rqe_wr_id) {
1638		DP_ERR(dev,
1639		       "create qp: failed RQ shadow memory allocation\n");
1640		kfree(qp->wqe_wr_id);
1641		return -ENOMEM;
1642	}
1643
1644	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1645
1646	n_sq_entries = attrs->cap.max_send_wr;
1647	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1648	n_sq_entries = max_t(u32, n_sq_entries, 1);
1649	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1650
1651	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1652
1653	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1654		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
1655						 n_sq_elems, n_rq_elems);
1656	else
1657		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1658						n_sq_elems, n_rq_elems);
1659	if (rc)
1660		qedr_cleanup_kernel(dev, qp);
1661
1662	return rc;
1663}
1664
1665struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1666			     struct ib_qp_init_attr *attrs,
1667			     struct ib_udata *udata)
1668{
1669	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1670	struct qedr_pd *pd = get_qedr_pd(ibpd);
1671	struct qedr_qp *qp;
1672	struct ib_qp *ibqp;
1673	int rc = 0;
1674
1675	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1676		 udata ? "user library" : "kernel", pd);
1677
1678	rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1679	if (rc)
1680		return ERR_PTR(rc);
1681
1682	if (attrs->srq)
1683		return ERR_PTR(-EINVAL);
1684
1685	DP_DEBUG(dev, QEDR_MSG_QP,
1686		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1687		 udata ? "user library" : "kernel", attrs->event_handler, pd,
1688		 get_qedr_cq(attrs->send_cq),
1689		 get_qedr_cq(attrs->send_cq)->icid,
1690		 get_qedr_cq(attrs->recv_cq),
1691		 get_qedr_cq(attrs->recv_cq)->icid);
1692
1693	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1694	if (!qp) {
1695		DP_ERR(dev, "create qp: failed allocating memory\n");
1696		return ERR_PTR(-ENOMEM);
1697	}
1698
1699	qedr_set_common_qp_params(dev, qp, pd, attrs);
1700
1701	if (attrs->qp_type == IB_QPT_GSI) {
1702		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1703		if (IS_ERR(ibqp))
1704			kfree(qp);
1705		return ibqp;
1706	}
1707
1708	if (udata)
1709		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1710	else
1711		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1712
1713	if (rc)
1714		goto err;
1715
1716	qp->ibqp.qp_num = qp->qp_id;
1717
1718	rc = qedr_idr_add(dev, qp, qp->qp_id);
1719	if (rc)
1720		goto err;
1721
1722	return &qp->ibqp;
1723
1724err:
1725	kfree(qp);
1726
1727	return ERR_PTR(-EFAULT);
1728}
1729
1730static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1731{
1732	switch (qp_state) {
1733	case QED_ROCE_QP_STATE_RESET:
1734		return IB_QPS_RESET;
1735	case QED_ROCE_QP_STATE_INIT:
1736		return IB_QPS_INIT;
1737	case QED_ROCE_QP_STATE_RTR:
1738		return IB_QPS_RTR;
1739	case QED_ROCE_QP_STATE_RTS:
1740		return IB_QPS_RTS;
1741	case QED_ROCE_QP_STATE_SQD:
1742		return IB_QPS_SQD;
1743	case QED_ROCE_QP_STATE_ERR:
1744		return IB_QPS_ERR;
1745	case QED_ROCE_QP_STATE_SQE:
1746		return IB_QPS_SQE;
1747	}
1748	return IB_QPS_ERR;
1749}
1750
1751static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1752					enum ib_qp_state qp_state)
1753{
1754	switch (qp_state) {
1755	case IB_QPS_RESET:
1756		return QED_ROCE_QP_STATE_RESET;
1757	case IB_QPS_INIT:
1758		return QED_ROCE_QP_STATE_INIT;
1759	case IB_QPS_RTR:
1760		return QED_ROCE_QP_STATE_RTR;
1761	case IB_QPS_RTS:
1762		return QED_ROCE_QP_STATE_RTS;
1763	case IB_QPS_SQD:
1764		return QED_ROCE_QP_STATE_SQD;
1765	case IB_QPS_ERR:
1766		return QED_ROCE_QP_STATE_ERR;
1767	default:
1768		return QED_ROCE_QP_STATE_ERR;
1769	}
1770}
1771
1772static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1773{
1774	qed_chain_reset(&qph->pbl);
1775	qph->prod = 0;
1776	qph->cons = 0;
1777	qph->wqe_cons = 0;
1778	qph->db_data.data.value = cpu_to_le16(0);
1779}
1780
1781static int qedr_update_qp_state(struct qedr_dev *dev,
1782				struct qedr_qp *qp,
1783				enum qed_roce_qp_state cur_state,
1784				enum qed_roce_qp_state new_state)
1785{
1786	int status = 0;
1787
1788	if (new_state == cur_state)
1789		return 0;
1790
1791	switch (cur_state) {
1792	case QED_ROCE_QP_STATE_RESET:
1793		switch (new_state) {
1794		case QED_ROCE_QP_STATE_INIT:
1795			qp->prev_wqe_size = 0;
1796			qedr_reset_qp_hwq_info(&qp->sq);
1797			qedr_reset_qp_hwq_info(&qp->rq);
1798			break;
1799		default:
1800			status = -EINVAL;
1801			break;
1802		};
1803		break;
1804	case QED_ROCE_QP_STATE_INIT:
1805		switch (new_state) {
1806		case QED_ROCE_QP_STATE_RTR:
1807			/* Update doorbell (in case post_recv was
1808			 * done before move to RTR)
1809			 */
1810
1811			if (rdma_protocol_roce(&dev->ibdev, 1)) {
1812				writel(qp->rq.db_data.raw, qp->rq.db);
1813				/* Make sure write takes effect */
1814				mmiowb();
1815			}
1816			break;
1817		case QED_ROCE_QP_STATE_ERR:
1818			break;
1819		default:
1820			/* Invalid state change. */
1821			status = -EINVAL;
1822			break;
1823		};
1824		break;
1825	case QED_ROCE_QP_STATE_RTR:
1826		/* RTR->XXX */
1827		switch (new_state) {
1828		case QED_ROCE_QP_STATE_RTS:
1829			break;
1830		case QED_ROCE_QP_STATE_ERR:
1831			break;
1832		default:
1833			/* Invalid state change. */
1834			status = -EINVAL;
1835			break;
1836		};
1837		break;
1838	case QED_ROCE_QP_STATE_RTS:
1839		/* RTS->XXX */
1840		switch (new_state) {
1841		case QED_ROCE_QP_STATE_SQD:
1842			break;
1843		case QED_ROCE_QP_STATE_ERR:
1844			break;
1845		default:
1846			/* Invalid state change. */
1847			status = -EINVAL;
1848			break;
1849		};
1850		break;
1851	case QED_ROCE_QP_STATE_SQD:
1852		/* SQD->XXX */
1853		switch (new_state) {
1854		case QED_ROCE_QP_STATE_RTS:
1855		case QED_ROCE_QP_STATE_ERR:
1856			break;
1857		default:
1858			/* Invalid state change. */
1859			status = -EINVAL;
1860			break;
1861		};
1862		break;
1863	case QED_ROCE_QP_STATE_ERR:
1864		/* ERR->XXX */
1865		switch (new_state) {
1866		case QED_ROCE_QP_STATE_RESET:
1867			if ((qp->rq.prod != qp->rq.cons) ||
1868			    (qp->sq.prod != qp->sq.cons)) {
1869				DP_NOTICE(dev,
1870					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1871					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
1872					  qp->sq.cons);
1873				status = -EINVAL;
1874			}
1875			break;
1876		default:
1877			status = -EINVAL;
1878			break;
1879		};
1880		break;
1881	default:
1882		status = -EINVAL;
1883		break;
1884	};
1885
1886	return status;
1887}
1888
1889int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1890		   int attr_mask, struct ib_udata *udata)
1891{
1892	struct qedr_qp *qp = get_qedr_qp(ibqp);
1893	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1894	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1895	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1896	enum ib_qp_state old_qp_state, new_qp_state;
1897	enum qed_roce_qp_state cur_state;
1898	int rc = 0;
1899
1900	DP_DEBUG(dev, QEDR_MSG_QP,
1901		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1902		 attr->qp_state);
1903
1904	old_qp_state = qedr_get_ibqp_state(qp->state);
1905	if (attr_mask & IB_QP_STATE)
1906		new_qp_state = attr->qp_state;
1907	else
1908		new_qp_state = old_qp_state;
1909
1910	if (rdma_protocol_roce(&dev->ibdev, 1)) {
1911		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
1912					ibqp->qp_type, attr_mask,
1913					IB_LINK_LAYER_ETHERNET)) {
1914			DP_ERR(dev,
1915			       "modify qp: invalid attribute mask=0x%x specified for\n"
1916			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1917			       attr_mask, qp->qp_id, ibqp->qp_type,
1918			       old_qp_state, new_qp_state);
1919			rc = -EINVAL;
1920			goto err;
1921		}
1922	}
1923
1924	/* Translate the masks... */
1925	if (attr_mask & IB_QP_STATE) {
1926		SET_FIELD(qp_params.modify_flags,
1927			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1928		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1929	}
1930
1931	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1932		qp_params.sqd_async = true;
1933
1934	if (attr_mask & IB_QP_PKEY_INDEX) {
1935		SET_FIELD(qp_params.modify_flags,
1936			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1937		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1938			rc = -EINVAL;
1939			goto err;
1940		}
1941
1942		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
1943	}
1944
1945	if (attr_mask & IB_QP_QKEY)
1946		qp->qkey = attr->qkey;
1947
1948	if (attr_mask & IB_QP_ACCESS_FLAGS) {
1949		SET_FIELD(qp_params.modify_flags,
1950			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
1951		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
1952						  IB_ACCESS_REMOTE_READ;
1953		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
1954						   IB_ACCESS_REMOTE_WRITE;
1955		qp_params.incoming_atomic_en = attr->qp_access_flags &
1956					       IB_ACCESS_REMOTE_ATOMIC;
1957	}
1958
1959	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
1960		if (attr_mask & IB_QP_PATH_MTU) {
1961			if (attr->path_mtu < IB_MTU_256 ||
1962			    attr->path_mtu > IB_MTU_4096) {
1963				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
1964				rc = -EINVAL;
1965				goto err;
1966			}
1967			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
1968				      ib_mtu_enum_to_int(iboe_get_mtu
1969							 (dev->ndev->mtu)));
1970		}
1971
1972		if (!qp->mtu) {
1973			qp->mtu =
1974			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1975			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
1976		}
1977
1978		SET_FIELD(qp_params.modify_flags,
1979			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
1980
1981		qp_params.traffic_class_tos = grh->traffic_class;
1982		qp_params.flow_label = grh->flow_label;
1983		qp_params.hop_limit_ttl = grh->hop_limit;
1984
1985		qp->sgid_idx = grh->sgid_index;
1986
1987		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
1988		if (rc) {
1989			DP_ERR(dev,
1990			       "modify qp: problems with GID index %d (rc=%d)\n",
1991			       grh->sgid_index, rc);
1992			return rc;
1993		}
1994
1995		rc = qedr_get_dmac(dev, &attr->ah_attr,
1996				   qp_params.remote_mac_addr);
1997		if (rc)
1998			return rc;
1999
2000		qp_params.use_local_mac = true;
2001		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2002
2003		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2004			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2005			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2006		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2007			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2008			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2009		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2010			 qp_params.remote_mac_addr);
2011
2012		qp_params.mtu = qp->mtu;
2013		qp_params.lb_indication = false;
2014	}
2015
2016	if (!qp_params.mtu) {
2017		/* Stay with current MTU */
2018		if (qp->mtu)
2019			qp_params.mtu = qp->mtu;
2020		else
2021			qp_params.mtu =
2022			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2023	}
2024
2025	if (attr_mask & IB_QP_TIMEOUT) {
2026		SET_FIELD(qp_params.modify_flags,
2027			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2028
2029		/* The received timeout value is an exponent used like this:
2030		 *    "12.7.34 LOCAL ACK TIMEOUT
2031		 *    Value representing the transport (ACK) timeout for use by
2032		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2033		 * The FW expects timeout in msec so we need to divide the usec
2034		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2035		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2036		 * The value of zero means infinite so we use a 'max_t' to make
2037		 * sure that sub 1 msec values will be configured as 1 msec.
2038		 */
2039		if (attr->timeout)
2040			qp_params.ack_timeout =
2041					1 << max_t(int, attr->timeout - 8, 0);
2042		else
2043			qp_params.ack_timeout = 0;
2044	}
2045
2046	if (attr_mask & IB_QP_RETRY_CNT) {
2047		SET_FIELD(qp_params.modify_flags,
2048			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2049		qp_params.retry_cnt = attr->retry_cnt;
2050	}
2051
2052	if (attr_mask & IB_QP_RNR_RETRY) {
2053		SET_FIELD(qp_params.modify_flags,
2054			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2055		qp_params.rnr_retry_cnt = attr->rnr_retry;
2056	}
2057
2058	if (attr_mask & IB_QP_RQ_PSN) {
2059		SET_FIELD(qp_params.modify_flags,
2060			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2061		qp_params.rq_psn = attr->rq_psn;
2062		qp->rq_psn = attr->rq_psn;
2063	}
2064
2065	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2066		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2067			rc = -EINVAL;
2068			DP_ERR(dev,
2069			       "unsupported max_rd_atomic=%d, supported=%d\n",
2070			       attr->max_rd_atomic,
2071			       dev->attr.max_qp_req_rd_atomic_resc);
2072			goto err;
2073		}
2074
2075		SET_FIELD(qp_params.modify_flags,
2076			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2077		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2078	}
2079
2080	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2081		SET_FIELD(qp_params.modify_flags,
2082			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2083		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2084	}
2085
2086	if (attr_mask & IB_QP_SQ_PSN) {
2087		SET_FIELD(qp_params.modify_flags,
2088			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2089		qp_params.sq_psn = attr->sq_psn;
2090		qp->sq_psn = attr->sq_psn;
2091	}
2092
2093	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2094		if (attr->max_dest_rd_atomic >
2095		    dev->attr.max_qp_resp_rd_atomic_resc) {
2096			DP_ERR(dev,
2097			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2098			       attr->max_dest_rd_atomic,
2099			       dev->attr.max_qp_resp_rd_atomic_resc);
2100
2101			rc = -EINVAL;
2102			goto err;
2103		}
2104
2105		SET_FIELD(qp_params.modify_flags,
2106			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2107		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2108	}
2109
2110	if (attr_mask & IB_QP_DEST_QPN) {
2111		SET_FIELD(qp_params.modify_flags,
2112			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2113
2114		qp_params.dest_qp = attr->dest_qp_num;
2115		qp->dest_qp_num = attr->dest_qp_num;
2116	}
2117
2118	cur_state = qp->state;
2119
2120	/* Update the QP state before the actual ramrod to prevent a race with
2121	 * fast path. Modifying the QP state to error will cause the device to
2122	 * flush the CQEs and while polling the flushed CQEs will considered as
2123	 * a potential issue if the QP isn't in error state.
2124	 */
2125	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2126	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2127		qp->state = QED_ROCE_QP_STATE_ERR;
2128
2129	if (qp->qp_type != IB_QPT_GSI)
2130		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2131					      qp->qed_qp, &qp_params);
2132
2133	if (attr_mask & IB_QP_STATE) {
2134		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2135			rc = qedr_update_qp_state(dev, qp, cur_state,
2136						  qp_params.new_state);
2137		qp->state = qp_params.new_state;
2138	}
2139
2140err:
2141	return rc;
2142}
2143
2144static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2145{
2146	int ib_qp_acc_flags = 0;
2147
2148	if (params->incoming_rdma_write_en)
2149		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2150	if (params->incoming_rdma_read_en)
2151		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2152	if (params->incoming_atomic_en)
2153		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2154	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2155	return ib_qp_acc_flags;
2156}
2157
2158int qedr_query_qp(struct ib_qp *ibqp,
2159		  struct ib_qp_attr *qp_attr,
2160		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2161{
2162	struct qed_rdma_query_qp_out_params params;
2163	struct qedr_qp *qp = get_qedr_qp(ibqp);
2164	struct qedr_dev *dev = qp->dev;
2165	int rc = 0;
2166
2167	memset(&params, 0, sizeof(params));
2168
2169	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2170	if (rc)
2171		goto err;
2172
2173	memset(qp_attr, 0, sizeof(*qp_attr));
2174	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2175
2176	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2177	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2178	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2179	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2180	qp_attr->rq_psn = params.rq_psn;
2181	qp_attr->sq_psn = params.sq_psn;
2182	qp_attr->dest_qp_num = params.dest_qp;
2183
2184	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2185
2186	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2187	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2188	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2189	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2190	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2191	qp_init_attr->cap = qp_attr->cap;
2192
2193	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2194	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2195			params.flow_label, qp->sgid_idx,
2196			params.hop_limit_ttl, params.traffic_class_tos);
2197	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2198	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2199	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2200	qp_attr->timeout = params.timeout;
2201	qp_attr->rnr_retry = params.rnr_retry;
2202	qp_attr->retry_cnt = params.retry_cnt;
2203	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2204	qp_attr->pkey_index = params.pkey_index;
2205	qp_attr->port_num = 1;
2206	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2207	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2208	qp_attr->alt_pkey_index = 0;
2209	qp_attr->alt_port_num = 0;
2210	qp_attr->alt_timeout = 0;
2211	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2212
2213	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2214	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2215	qp_attr->max_rd_atomic = params.max_rd_atomic;
2216	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2217
2218	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2219		 qp_attr->cap.max_inline_data);
2220
2221err:
2222	return rc;
2223}
2224
2225static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2226{
2227	int rc = 0;
2228
2229	if (qp->qp_type != IB_QPT_GSI) {
2230		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2231		if (rc)
2232			return rc;
2233	}
2234
2235	if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2236		qedr_cleanup_user(dev, qp);
2237	else
2238		qedr_cleanup_kernel(dev, qp);
2239
2240	return 0;
2241}
2242
2243int qedr_destroy_qp(struct ib_qp *ibqp)
2244{
2245	struct qedr_qp *qp = get_qedr_qp(ibqp);
2246	struct qedr_dev *dev = qp->dev;
2247	struct ib_qp_attr attr;
2248	int attr_mask = 0;
2249	int rc = 0;
2250
2251	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2252		 qp, qp->qp_type);
2253
2254	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2255		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2256		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2257		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2258
2259			attr.qp_state = IB_QPS_ERR;
2260			attr_mask |= IB_QP_STATE;
2261
2262			/* Change the QP state to ERROR */
2263			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2264		}
2265	} else {
2266		/* Wait for the connect/accept to complete */
2267		if (qp->ep) {
2268			int wait_count = 1;
2269
2270			while (qp->ep->during_connect) {
2271				DP_DEBUG(dev, QEDR_MSG_QP,
2272					 "Still in during connect/accept\n");
2273
2274				msleep(100);
2275				if (wait_count++ > 200) {
2276					DP_NOTICE(dev,
2277						  "during connect timeout\n");
2278					break;
2279				}
2280			}
2281		}
2282	}
2283
2284	if (qp->qp_type == IB_QPT_GSI)
2285		qedr_destroy_gsi_qp(dev);
2286
2287	qedr_free_qp_resources(dev, qp);
2288
2289	if (atomic_dec_and_test(&qp->refcnt)) {
2290		qedr_idr_remove(dev, qp->qp_id);
2291		kfree(qp);
2292	}
2293	return rc;
2294}
2295
2296struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2297			     struct ib_udata *udata)
2298{
2299	struct qedr_ah *ah;
2300
2301	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2302	if (!ah)
2303		return ERR_PTR(-ENOMEM);
2304
2305	ah->attr = *attr;
2306
2307	return &ah->ibah;
2308}
2309
2310int qedr_destroy_ah(struct ib_ah *ibah)
2311{
2312	struct qedr_ah *ah = get_qedr_ah(ibah);
2313
2314	kfree(ah);
2315	return 0;
2316}
2317
2318static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2319{
2320	struct qedr_pbl *pbl, *tmp;
2321
2322	if (info->pbl_table)
2323		list_add_tail(&info->pbl_table->list_entry,
2324			      &info->free_pbl_list);
2325
2326	if (!list_empty(&info->inuse_pbl_list))
2327		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2328
2329	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2330		list_del(&pbl->list_entry);
2331		qedr_free_pbl(dev, &info->pbl_info, pbl);
2332	}
2333}
2334
2335static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2336			size_t page_list_len, bool two_layered)
2337{
2338	struct qedr_pbl *tmp;
2339	int rc;
2340
2341	INIT_LIST_HEAD(&info->free_pbl_list);
2342	INIT_LIST_HEAD(&info->inuse_pbl_list);
2343
2344	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2345				  page_list_len, two_layered);
2346	if (rc)
2347		goto done;
2348
2349	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2350	if (IS_ERR(info->pbl_table)) {
2351		rc = PTR_ERR(info->pbl_table);
2352		goto done;
2353	}
2354
2355	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2356		 &info->pbl_table->pa);
2357
2358	/* in usual case we use 2 PBLs, so we add one to free
2359	 * list and allocating another one
2360	 */
2361	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2362	if (IS_ERR(tmp)) {
2363		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2364		goto done;
2365	}
2366
2367	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2368
2369	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2370
2371done:
2372	if (rc)
2373		free_mr_info(dev, info);
2374
2375	return rc;
2376}
2377
2378struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2379			       u64 usr_addr, int acc, struct ib_udata *udata)
2380{
2381	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2382	struct qedr_mr *mr;
2383	struct qedr_pd *pd;
2384	int rc = -ENOMEM;
2385
2386	pd = get_qedr_pd(ibpd);
2387	DP_DEBUG(dev, QEDR_MSG_MR,
2388		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2389		 pd->pd_id, start, len, usr_addr, acc);
2390
2391	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2392		return ERR_PTR(-EINVAL);
2393
2394	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2395	if (!mr)
2396		return ERR_PTR(rc);
2397
2398	mr->type = QEDR_MR_USER;
2399
2400	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2401	if (IS_ERR(mr->umem)) {
2402		rc = -EFAULT;
2403		goto err0;
2404	}
2405
2406	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2407	if (rc)
2408		goto err1;
2409
2410	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2411			   &mr->info.pbl_info, mr->umem->page_shift);
2412
2413	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2414	if (rc) {
2415		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2416		goto err1;
2417	}
2418
2419	/* Index only, 18 bit long, lkey = itid << 8 | key */
2420	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2421	mr->hw_mr.key = 0;
2422	mr->hw_mr.pd = pd->pd_id;
2423	mr->hw_mr.local_read = 1;
2424	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2425	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2426	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2427	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2428	mr->hw_mr.mw_bind = false;
2429	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2430	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2431	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2432	mr->hw_mr.page_size_log = mr->umem->page_shift;
2433	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2434	mr->hw_mr.length = len;
2435	mr->hw_mr.vaddr = usr_addr;
2436	mr->hw_mr.zbva = false;
2437	mr->hw_mr.phy_mr = false;
2438	mr->hw_mr.dma_mr = false;
2439
2440	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2441	if (rc) {
2442		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2443		goto err2;
2444	}
2445
2446	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2447	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2448	    mr->hw_mr.remote_atomic)
2449		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2450
2451	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2452		 mr->ibmr.lkey);
2453	return &mr->ibmr;
2454
2455err2:
2456	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2457err1:
2458	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2459err0:
2460	kfree(mr);
2461	return ERR_PTR(rc);
2462}
2463
2464int qedr_dereg_mr(struct ib_mr *ib_mr)
2465{
2466	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2467	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2468	int rc = 0;
2469
2470	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2471	if (rc)
2472		return rc;
2473
2474	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2475
2476	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2477		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2478
2479	/* it could be user registered memory. */
2480	if (mr->umem)
2481		ib_umem_release(mr->umem);
2482
2483	kfree(mr);
2484
2485	return rc;
2486}
2487
2488static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2489				       int max_page_list_len)
2490{
2491	struct qedr_pd *pd = get_qedr_pd(ibpd);
2492	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2493	struct qedr_mr *mr;
2494	int rc = -ENOMEM;
2495
2496	DP_DEBUG(dev, QEDR_MSG_MR,
2497		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2498		 max_page_list_len);
2499
2500	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2501	if (!mr)
2502		return ERR_PTR(rc);
2503
2504	mr->dev = dev;
2505	mr->type = QEDR_MR_FRMR;
2506
2507	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2508	if (rc)
2509		goto err0;
2510
2511	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2512	if (rc) {
2513		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2514		goto err0;
2515	}
2516
2517	/* Index only, 18 bit long, lkey = itid << 8 | key */
2518	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2519	mr->hw_mr.key = 0;
2520	mr->hw_mr.pd = pd->pd_id;
2521	mr->hw_mr.local_read = 1;
2522	mr->hw_mr.local_write = 0;
2523	mr->hw_mr.remote_read = 0;
2524	mr->hw_mr.remote_write = 0;
2525	mr->hw_mr.remote_atomic = 0;
2526	mr->hw_mr.mw_bind = false;
2527	mr->hw_mr.pbl_ptr = 0;
2528	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2529	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2530	mr->hw_mr.fbo = 0;
2531	mr->hw_mr.length = 0;
2532	mr->hw_mr.vaddr = 0;
2533	mr->hw_mr.zbva = false;
2534	mr->hw_mr.phy_mr = true;
2535	mr->hw_mr.dma_mr = false;
2536
2537	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2538	if (rc) {
2539		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2540		goto err1;
2541	}
2542
2543	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2544	mr->ibmr.rkey = mr->ibmr.lkey;
2545
2546	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2547	return mr;
2548
2549err1:
2550	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2551err0:
2552	kfree(mr);
2553	return ERR_PTR(rc);
2554}
2555
2556struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2557			    enum ib_mr_type mr_type, u32 max_num_sg)
2558{
2559	struct qedr_mr *mr;
2560
2561	if (mr_type != IB_MR_TYPE_MEM_REG)
2562		return ERR_PTR(-EINVAL);
2563
2564	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2565
2566	if (IS_ERR(mr))
2567		return ERR_PTR(-EINVAL);
2568
2569	return &mr->ibmr;
2570}
2571
2572static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2573{
2574	struct qedr_mr *mr = get_qedr_mr(ibmr);
2575	struct qedr_pbl *pbl_table;
2576	struct regpair *pbe;
2577	u32 pbes_in_page;
2578
2579	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2580		DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
2581		return -ENOMEM;
2582	}
2583
2584	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2585		 mr->npages, addr);
2586
2587	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2588	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2589	pbe = (struct regpair *)pbl_table->va;
2590	pbe +=  mr->npages % pbes_in_page;
2591	pbe->lo = cpu_to_le32((u32)addr);
2592	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2593
2594	mr->npages++;
2595
2596	return 0;
2597}
2598
2599static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2600{
2601	int work = info->completed - info->completed_handled - 1;
2602
2603	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2604	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2605		struct qedr_pbl *pbl;
2606
2607		/* Free all the page list that are possible to be freed
2608		 * (all the ones that were invalidated), under the assumption
2609		 * that if an FMR was completed successfully that means that
2610		 * if there was an invalidate operation before it also ended
2611		 */
2612		pbl = list_first_entry(&info->inuse_pbl_list,
2613				       struct qedr_pbl, list_entry);
2614		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2615		info->completed_handled++;
2616	}
2617}
2618
2619int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2620		   int sg_nents, unsigned int *sg_offset)
2621{
2622	struct qedr_mr *mr = get_qedr_mr(ibmr);
2623
2624	mr->npages = 0;
2625
2626	handle_completed_mrs(mr->dev, &mr->info);
2627	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2628}
2629
2630struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2631{
2632	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2633	struct qedr_pd *pd = get_qedr_pd(ibpd);
2634	struct qedr_mr *mr;
2635	int rc;
2636
2637	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2638	if (!mr)
2639		return ERR_PTR(-ENOMEM);
2640
2641	mr->type = QEDR_MR_DMA;
2642
2643	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2644	if (rc) {
2645		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2646		goto err1;
2647	}
2648
2649	/* index only, 18 bit long, lkey = itid << 8 | key */
2650	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2651	mr->hw_mr.pd = pd->pd_id;
2652	mr->hw_mr.local_read = 1;
2653	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2654	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2655	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2656	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2657	mr->hw_mr.dma_mr = true;
2658
2659	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2660	if (rc) {
2661		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2662		goto err2;
2663	}
2664
2665	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2666	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2667	    mr->hw_mr.remote_atomic)
2668		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2669
2670	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2671	return &mr->ibmr;
2672
2673err2:
2674	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2675err1:
2676	kfree(mr);
2677	return ERR_PTR(rc);
2678}
2679
2680static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2681{
2682	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2683}
2684
2685static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2686{
2687	int i, len = 0;
2688
2689	for (i = 0; i < num_sge; i++)
2690		len += sg_list[i].length;
2691
2692	return len;
2693}
2694
2695static void swap_wqe_data64(u64 *p)
2696{
2697	int i;
2698
2699	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2700		*p = cpu_to_be64(cpu_to_le64(*p));
2701}
2702
2703static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2704				       struct qedr_qp *qp, u8 *wqe_size,
2705				       struct ib_send_wr *wr,
2706				       struct ib_send_wr **bad_wr, u8 *bits,
2707				       u8 bit)
2708{
2709	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2710	char *seg_prt, *wqe;
2711	int i, seg_siz;
2712
2713	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2714		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2715		*bad_wr = wr;
2716		return 0;
2717	}
2718
2719	if (!data_size)
2720		return data_size;
2721
2722	*bits |= bit;
2723
2724	seg_prt = NULL;
2725	wqe = NULL;
2726	seg_siz = 0;
2727
2728	/* Copy data inline */
2729	for (i = 0; i < wr->num_sge; i++) {
2730		u32 len = wr->sg_list[i].length;
2731		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2732
2733		while (len > 0) {
2734			u32 cur;
2735
2736			/* New segment required */
2737			if (!seg_siz) {
2738				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2739				seg_prt = wqe;
2740				seg_siz = sizeof(struct rdma_sq_common_wqe);
2741				(*wqe_size)++;
2742			}
2743
2744			/* Calculate currently allowed length */
2745			cur = min_t(u32, len, seg_siz);
2746			memcpy(seg_prt, src, cur);
2747
2748			/* Update segment variables */
2749			seg_prt += cur;
2750			seg_siz -= cur;
2751
2752			/* Update sge variables */
2753			src += cur;
2754			len -= cur;
2755
2756			/* Swap fully-completed segments */
2757			if (!seg_siz)
2758				swap_wqe_data64((u64 *)wqe);
2759		}
2760	}
2761
2762	/* swap last not completed segment */
2763	if (seg_siz)
2764		swap_wqe_data64((u64 *)wqe);
2765
2766	return data_size;
2767}
2768
2769#define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
2770	do {							\
2771		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2772		(sge)->length = cpu_to_le32(vlength);		\
2773		(sge)->flags = cpu_to_le32(vflags);		\
2774	} while (0)
2775
2776#define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
2777	do {							\
2778		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
2779		(hdr)->num_sges = num_sge;			\
2780	} while (0)
2781
2782#define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
2783	do {							\
2784		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2785		(sge)->length = cpu_to_le32(vlength);		\
2786		(sge)->l_key = cpu_to_le32(vlkey);		\
2787	} while (0)
2788
2789static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2790				struct ib_send_wr *wr)
2791{
2792	u32 data_size = 0;
2793	int i;
2794
2795	for (i = 0; i < wr->num_sge; i++) {
2796		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2797
2798		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2799		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2800		sge->length = cpu_to_le32(wr->sg_list[i].length);
2801		data_size += wr->sg_list[i].length;
2802	}
2803
2804	if (wqe_size)
2805		*wqe_size += wr->num_sge;
2806
2807	return data_size;
2808}
2809
2810static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2811				     struct qedr_qp *qp,
2812				     struct rdma_sq_rdma_wqe_1st *rwqe,
2813				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
2814				     struct ib_send_wr *wr,
2815				     struct ib_send_wr **bad_wr)
2816{
2817	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2818	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2819
2820	if (wr->send_flags & IB_SEND_INLINE &&
2821	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2822	     wr->opcode == IB_WR_RDMA_WRITE)) {
2823		u8 flags = 0;
2824
2825		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2826		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2827						   bad_wr, &rwqe->flags, flags);
2828	}
2829
2830	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2831}
2832
2833static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2834				     struct qedr_qp *qp,
2835				     struct rdma_sq_send_wqe_1st *swqe,
2836				     struct rdma_sq_send_wqe_2st *swqe2,
2837				     struct ib_send_wr *wr,
2838				     struct ib_send_wr **bad_wr)
2839{
2840	memset(swqe2, 0, sizeof(*swqe2));
2841	if (wr->send_flags & IB_SEND_INLINE) {
2842		u8 flags = 0;
2843
2844		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2845		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2846						   bad_wr, &swqe->flags, flags);
2847	}
2848
2849	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2850}
2851
2852static int qedr_prepare_reg(struct qedr_qp *qp,
2853			    struct rdma_sq_fmr_wqe_1st *fwqe1,
2854			    struct ib_reg_wr *wr)
2855{
2856	struct qedr_mr *mr = get_qedr_mr(wr->mr);
2857	struct rdma_sq_fmr_wqe_2nd *fwqe2;
2858
2859	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2860	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2861	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2862	fwqe1->l_key = wr->key;
2863
2864	fwqe2->access_ctrl = 0;
2865
2866	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2867		   !!(wr->access & IB_ACCESS_REMOTE_READ));
2868	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2869		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2870	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2871		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2872	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2873	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2874		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2875	fwqe2->fmr_ctrl = 0;
2876
2877	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2878		   ilog2(mr->ibmr.page_size) - 12);
2879
2880	fwqe2->length_hi = 0;
2881	fwqe2->length_lo = mr->ibmr.length;
2882	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2883	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2884
2885	qp->wqe_wr_id[qp->sq.prod].mr = mr;
2886
2887	return 0;
2888}
2889
2890static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2891{
2892	switch (opcode) {
2893	case IB_WR_RDMA_WRITE:
2894	case IB_WR_RDMA_WRITE_WITH_IMM:
2895		return IB_WC_RDMA_WRITE;
2896	case IB_WR_SEND_WITH_IMM:
2897	case IB_WR_SEND:
2898	case IB_WR_SEND_WITH_INV:
2899		return IB_WC_SEND;
2900	case IB_WR_RDMA_READ:
2901	case IB_WR_RDMA_READ_WITH_INV:
2902		return IB_WC_RDMA_READ;
2903	case IB_WR_ATOMIC_CMP_AND_SWP:
2904		return IB_WC_COMP_SWAP;
2905	case IB_WR_ATOMIC_FETCH_AND_ADD:
2906		return IB_WC_FETCH_ADD;
2907	case IB_WR_REG_MR:
2908		return IB_WC_REG_MR;
2909	case IB_WR_LOCAL_INV:
2910		return IB_WC_LOCAL_INV;
2911	default:
2912		return IB_WC_SEND;
2913	}
2914}
2915
2916static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2917{
2918	int wq_is_full, err_wr, pbl_is_full;
2919	struct qedr_dev *dev = qp->dev;
2920
2921	/* prevent SQ overflow and/or processing of a bad WR */
2922	err_wr = wr->num_sge > qp->sq.max_sges;
2923	wq_is_full = qedr_wq_is_full(&qp->sq);
2924	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2925		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2926	if (wq_is_full || err_wr || pbl_is_full) {
2927		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2928			DP_ERR(dev,
2929			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2930			       qp);
2931			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2932		}
2933
2934		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2935			DP_ERR(dev,
2936			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2937			       qp);
2938			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2939		}
2940
2941		if (pbl_is_full &&
2942		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2943			DP_ERR(dev,
2944			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2945			       qp);
2946			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2947		}
2948		return false;
2949	}
2950	return true;
2951}
2952
2953static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2954		     struct ib_send_wr **bad_wr)
2955{
2956	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2957	struct qedr_qp *qp = get_qedr_qp(ibqp);
2958	struct rdma_sq_atomic_wqe_1st *awqe1;
2959	struct rdma_sq_atomic_wqe_2nd *awqe2;
2960	struct rdma_sq_atomic_wqe_3rd *awqe3;
2961	struct rdma_sq_send_wqe_2st *swqe2;
2962	struct rdma_sq_local_inv_wqe *iwqe;
2963	struct rdma_sq_rdma_wqe_2nd *rwqe2;
2964	struct rdma_sq_send_wqe_1st *swqe;
2965	struct rdma_sq_rdma_wqe_1st *rwqe;
2966	struct rdma_sq_fmr_wqe_1st *fwqe1;
2967	struct rdma_sq_common_wqe *wqe;
2968	u32 length;
2969	int rc = 0;
2970	bool comp;
2971
2972	if (!qedr_can_post_send(qp, wr)) {
2973		*bad_wr = wr;
2974		return -ENOMEM;
2975	}
2976
2977	wqe = qed_chain_produce(&qp->sq.pbl);
2978	qp->wqe_wr_id[qp->sq.prod].signaled =
2979		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
2980
2981	wqe->flags = 0;
2982	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
2983		   !!(wr->send_flags & IB_SEND_SOLICITED));
2984	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
2985	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
2986	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
2987		   !!(wr->send_flags & IB_SEND_FENCE));
2988	wqe->prev_wqe_size = qp->prev_wqe_size;
2989
2990	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
2991
2992	switch (wr->opcode) {
2993	case IB_WR_SEND_WITH_IMM:
2994		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
2995			rc = -EINVAL;
2996			*bad_wr = wr;
2997			break;
2998		}
2999		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3000		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3001		swqe->wqe_size = 2;
3002		swqe2 = qed_chain_produce(&qp->sq.pbl);
3003
3004		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3005		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3006						   wr, bad_wr);
3007		swqe->length = cpu_to_le32(length);
3008		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3009		qp->prev_wqe_size = swqe->wqe_size;
3010		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3011		break;
3012	case IB_WR_SEND:
3013		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3014		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3015
3016		swqe->wqe_size = 2;
3017		swqe2 = qed_chain_produce(&qp->sq.pbl);
3018		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3019						   wr, bad_wr);
3020		swqe->length = cpu_to_le32(length);
3021		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3022		qp->prev_wqe_size = swqe->wqe_size;
3023		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3024		break;
3025	case IB_WR_SEND_WITH_INV:
3026		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3027		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3028		swqe2 = qed_chain_produce(&qp->sq.pbl);
3029		swqe->wqe_size = 2;
3030		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3031		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3032						   wr, bad_wr);
3033		swqe->length = cpu_to_le32(length);
3034		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3035		qp->prev_wqe_size = swqe->wqe_size;
3036		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3037		break;
3038
3039	case IB_WR_RDMA_WRITE_WITH_IMM:
3040		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3041			rc = -EINVAL;
3042			*bad_wr = wr;
3043			break;
3044		}
3045		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3046		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3047
3048		rwqe->wqe_size = 2;
3049		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3050		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3051		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3052						   wr, bad_wr);
3053		rwqe->length = cpu_to_le32(length);
3054		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3055		qp->prev_wqe_size = rwqe->wqe_size;
3056		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3057		break;
3058	case IB_WR_RDMA_WRITE:
3059		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3060		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3061
3062		rwqe->wqe_size = 2;
3063		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3064		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3065						   wr, bad_wr);
3066		rwqe->length = cpu_to_le32(length);
3067		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3068		qp->prev_wqe_size = rwqe->wqe_size;
3069		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3070		break;
3071	case IB_WR_RDMA_READ_WITH_INV:
3072		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3073		/* fallthrough -- same is identical to RDMA READ */
3074
3075	case IB_WR_RDMA_READ:
3076		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3077		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3078
3079		rwqe->wqe_size = 2;
3080		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3081		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3082						   wr, bad_wr);
3083		rwqe->length = cpu_to_le32(length);
3084		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3085		qp->prev_wqe_size = rwqe->wqe_size;
3086		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3087		break;
3088
3089	case IB_WR_ATOMIC_CMP_AND_SWP:
3090	case IB_WR_ATOMIC_FETCH_AND_ADD:
3091		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3092		awqe1->wqe_size = 4;
3093
3094		awqe2 = qed_chain_produce(&qp->sq.pbl);
3095		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3096		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3097
3098		awqe3 = qed_chain_produce(&qp->sq.pbl);
3099
3100		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3101			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3102			DMA_REGPAIR_LE(awqe3->swap_data,
3103				       atomic_wr(wr)->compare_add);
3104		} else {
3105			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3106			DMA_REGPAIR_LE(awqe3->swap_data,
3107				       atomic_wr(wr)->swap);
3108			DMA_REGPAIR_LE(awqe3->cmp_data,
3109				       atomic_wr(wr)->compare_add);
3110		}
3111
3112		qedr_prepare_sq_sges(qp, NULL, wr);
3113
3114		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3115		qp->prev_wqe_size = awqe1->wqe_size;
3116		break;
3117
3118	case IB_WR_LOCAL_INV:
3119		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3120		iwqe->wqe_size = 1;
3121
3122		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3123		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3124		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3125		qp->prev_wqe_size = iwqe->wqe_size;
3126		break;
3127	case IB_WR_REG_MR:
3128		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3129		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3130		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3131		fwqe1->wqe_size = 2;
3132
3133		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3134		if (rc) {
3135			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3136			*bad_wr = wr;
3137			break;
3138		}
3139
3140		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3141		qp->prev_wqe_size = fwqe1->wqe_size;
3142		break;
3143	default:
3144		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3145		rc = -EINVAL;
3146		*bad_wr = wr;
3147		break;
3148	}
3149
3150	if (*bad_wr) {
3151		u16 value;
3152
3153		/* Restore prod to its position before
3154		 * this WR was processed
3155		 */
3156		value = le16_to_cpu(qp->sq.db_data.data.value);
3157		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3158
3159		/* Restore prev_wqe_size */
3160		qp->prev_wqe_size = wqe->prev_wqe_size;
3161		rc = -EINVAL;
3162		DP_ERR(dev, "POST SEND FAILED\n");
3163	}
3164
3165	return rc;
3166}
3167
3168int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3169		   struct ib_send_wr **bad_wr)
3170{
3171	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3172	struct qedr_qp *qp = get_qedr_qp(ibqp);
3173	unsigned long flags;
3174	int rc = 0;
3175
3176	*bad_wr = NULL;
3177
3178	if (qp->qp_type == IB_QPT_GSI)
3179		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3180
3181	spin_lock_irqsave(&qp->q_lock, flags);
3182
3183	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3184		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3185		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3186		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3187			spin_unlock_irqrestore(&qp->q_lock, flags);
3188			*bad_wr = wr;
3189			DP_DEBUG(dev, QEDR_MSG_CQ,
3190				 "QP in wrong state! QP icid=0x%x state %d\n",
3191				 qp->icid, qp->state);
3192			return -EINVAL;
3193		}
3194	}
3195
3196	while (wr) {
3197		rc = __qedr_post_send(ibqp, wr, bad_wr);
3198		if (rc)
3199			break;
3200
3201		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3202
3203		qedr_inc_sw_prod(&qp->sq);
3204
3205		qp->sq.db_data.data.value++;
3206
3207		wr = wr->next;
3208	}
3209
3210	/* Trigger doorbell
3211	 * If there was a failure in the first WR then it will be triggered in
3212	 * vane. However this is not harmful (as long as the producer value is
3213	 * unchanged). For performance reasons we avoid checking for this
3214	 * redundant doorbell.
3215	 *
3216	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3217	 * soon as we give the doorbell, we could get a completion
3218	 * for this wr, therefore we need to make sure that the
3219	 * memory is updated before giving the doorbell.
3220	 * During qedr_poll_cq, rmb is called before accessing the
3221	 * cqe. This covers for the smp_rmb as well.
3222	 */
3223	smp_wmb();
3224	writel(qp->sq.db_data.raw, qp->sq.db);
3225
3226	/* Make sure write sticks */
3227	mmiowb();
3228
3229	spin_unlock_irqrestore(&qp->q_lock, flags);
3230
3231	return rc;
3232}
3233
3234int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3235		   struct ib_recv_wr **bad_wr)
3236{
3237	struct qedr_qp *qp = get_qedr_qp(ibqp);
3238	struct qedr_dev *dev = qp->dev;
3239	unsigned long flags;
3240	int status = 0;
3241
3242	if (qp->qp_type == IB_QPT_GSI)
3243		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3244
3245	spin_lock_irqsave(&qp->q_lock, flags);
3246
3247	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3248		spin_unlock_irqrestore(&qp->q_lock, flags);
3249		*bad_wr = wr;
3250		return -EINVAL;
3251	}
3252
3253	while (wr) {
3254		int i;
3255
3256		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3257		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3258		    wr->num_sge > qp->rq.max_sges) {
3259			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3260			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3261			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3262			       qp->rq.max_sges);
3263			status = -ENOMEM;
3264			*bad_wr = wr;
3265			break;
3266		}
3267		for (i = 0; i < wr->num_sge; i++) {
3268			u32 flags = 0;
3269			struct rdma_rq_sge *rqe =
3270			    qed_chain_produce(&qp->rq.pbl);
3271
3272			/* First one must include the number
3273			 * of SGE in the list
3274			 */
3275			if (!i)
3276				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3277					  wr->num_sge);
3278
3279			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
3280				  wr->sg_list[i].lkey);
3281
3282			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3283				   wr->sg_list[i].length, flags);
3284		}
3285
3286		/* Special case of no sges. FW requires between 1-4 sges...
3287		 * in this case we need to post 1 sge with length zero. this is
3288		 * because rdma write with immediate consumes an RQ.
3289		 */
3290		if (!wr->num_sge) {
3291			u32 flags = 0;
3292			struct rdma_rq_sge *rqe =
3293			    qed_chain_produce(&qp->rq.pbl);
3294
3295			/* First one must include the number
3296			 * of SGE in the list
3297			 */
3298			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
3299			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3300
3301			RQ_SGE_SET(rqe, 0, 0, flags);
3302			i = 1;
3303		}
3304
3305		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3306		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3307
3308		qedr_inc_sw_prod(&qp->rq);
3309
3310		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3311		 * soon as we give the doorbell, we could get a completion
3312		 * for this wr, therefore we need to make sure that the
3313		 * memory is update before giving the doorbell.
3314		 * During qedr_poll_cq, rmb is called before accessing the
3315		 * cqe. This covers for the smp_rmb as well.
3316		 */
3317		smp_wmb();
3318
3319		qp->rq.db_data.data.value++;
3320
3321		writel(qp->rq.db_data.raw, qp->rq.db);
3322
3323		/* Make sure write sticks */
3324		mmiowb();
3325
3326		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3327			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3328			mmiowb();	/* for second doorbell */
3329		}
3330
3331		wr = wr->next;
3332	}
3333
3334	spin_unlock_irqrestore(&qp->q_lock, flags);
3335
3336	return status;
3337}
3338
3339static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3340{
3341	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3342
3343	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3344		cq->pbl_toggle;
3345}
3346
3347static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3348{
3349	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3350	struct qedr_qp *qp;
3351
3352	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3353						   resp_cqe->qp_handle.lo,
3354						   u64);
3355	return qp;
3356}
3357
3358static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3359{
3360	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3361
3362	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3363}
3364
3365/* Return latest CQE (needs processing) */
3366static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3367{
3368	return cq->latest_cqe;
3369}
3370
3371/* In fmr we need to increase the number of fmr completed counter for the fmr
3372 * algorithm determining whether we can free a pbl or not.
3373 * we need to perform this whether the work request was signaled or not. for
3374 * this purpose we call this function from the condition that checks if a wr
3375 * should be skipped, to make sure we don't miss it ( possibly this fmr
3376 * operation was not signalted)
3377 */
3378static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3379{
3380	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3381		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3382}
3383
3384static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3385		       struct qedr_cq *cq, int num_entries,
3386		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3387		       int force)
3388{
3389	u16 cnt = 0;
3390
3391	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3392		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3393			qedr_chk_if_fmr(qp);
3394			/* skip WC */
3395			goto next_cqe;
3396		}
3397
3398		/* fill WC */
3399		wc->status = status;
3400		wc->vendor_err = 0;
3401		wc->wc_flags = 0;
3402		wc->src_qp = qp->id;
3403		wc->qp = &qp->ibqp;
3404
3405		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3406		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3407
3408		switch (wc->opcode) {
3409		case IB_WC_RDMA_WRITE:
3410			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3411			break;
3412		case IB_WC_COMP_SWAP:
3413		case IB_WC_FETCH_ADD:
3414			wc->byte_len = 8;
3415			break;
3416		case IB_WC_REG_MR:
3417			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3418			break;
3419		case IB_WC_RDMA_READ:
3420		case IB_WC_SEND:
3421			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3422			break;
3423		default:
3424			break;
3425		}
3426
3427		num_entries--;
3428		wc++;
3429		cnt++;
3430next_cqe:
3431		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3432			qed_chain_consume(&qp->sq.pbl);
3433		qedr_inc_sw_cons(&qp->sq);
3434	}
3435
3436	return cnt;
3437}
3438
3439static int qedr_poll_cq_req(struct qedr_dev *dev,
3440			    struct qedr_qp *qp, struct qedr_cq *cq,
3441			    int num_entries, struct ib_wc *wc,
3442			    struct rdma_cqe_requester *req)
3443{
3444	int cnt = 0;
3445
3446	switch (req->status) {
3447	case RDMA_CQE_REQ_STS_OK:
3448		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3449				  IB_WC_SUCCESS, 0);
3450		break;
3451	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3452		if (qp->state != QED_ROCE_QP_STATE_ERR)
3453			DP_DEBUG(dev, QEDR_MSG_CQ,
3454				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3455				 cq->icid, qp->icid);
3456		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3457				  IB_WC_WR_FLUSH_ERR, 1);
3458		break;
3459	default:
3460		/* process all WQE before the cosumer */
3461		qp->state = QED_ROCE_QP_STATE_ERR;
3462		cnt = process_req(dev, qp, cq, num_entries, wc,
3463				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3464		wc += cnt;
3465		/* if we have extra WC fill it with actual error info */
3466		if (cnt < num_entries) {
3467			enum ib_wc_status wc_status;
3468
3469			switch (req->status) {
3470			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3471				DP_ERR(dev,
3472				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3473				       cq->icid, qp->icid);
3474				wc_status = IB_WC_BAD_RESP_ERR;
3475				break;
3476			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3477				DP_ERR(dev,
3478				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3479				       cq->icid, qp->icid);
3480				wc_status = IB_WC_LOC_LEN_ERR;
3481				break;
3482			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3483				DP_ERR(dev,
3484				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3485				       cq->icid, qp->icid);
3486				wc_status = IB_WC_LOC_QP_OP_ERR;
3487				break;
3488			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3489				DP_ERR(dev,
3490				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3491				       cq->icid, qp->icid);
3492				wc_status = IB_WC_LOC_PROT_ERR;
3493				break;
3494			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3495				DP_ERR(dev,
3496				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3497				       cq->icid, qp->icid);
3498				wc_status = IB_WC_MW_BIND_ERR;
3499				break;
3500			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3501				DP_ERR(dev,
3502				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3503				       cq->icid, qp->icid);
3504				wc_status = IB_WC_REM_INV_REQ_ERR;
3505				break;
3506			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3507				DP_ERR(dev,
3508				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3509				       cq->icid, qp->icid);
3510				wc_status = IB_WC_REM_ACCESS_ERR;
3511				break;
3512			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3513				DP_ERR(dev,
3514				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3515				       cq->icid, qp->icid);
3516				wc_status = IB_WC_REM_OP_ERR;
3517				break;
3518			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3519				DP_ERR(dev,
3520				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3521				       cq->icid, qp->icid);
3522				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3523				break;
3524			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3525				DP_ERR(dev,
3526				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3527				       cq->icid, qp->icid);
3528				wc_status = IB_WC_RETRY_EXC_ERR;
3529				break;
3530			default:
3531				DP_ERR(dev,
3532				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3533				       cq->icid, qp->icid);
3534				wc_status = IB_WC_GENERAL_ERR;
3535			}
3536			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3537					   wc_status, 1);
3538		}
3539	}
3540
3541	return cnt;
3542}
3543
3544static inline int qedr_cqe_resp_status_to_ib(u8 status)
3545{
3546	switch (status) {
3547	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3548		return IB_WC_LOC_ACCESS_ERR;
3549	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3550		return IB_WC_LOC_LEN_ERR;
3551	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3552		return IB_WC_LOC_QP_OP_ERR;
3553	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3554		return IB_WC_LOC_PROT_ERR;
3555	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3556		return IB_WC_MW_BIND_ERR;
3557	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3558		return IB_WC_REM_INV_RD_REQ_ERR;
3559	case RDMA_CQE_RESP_STS_OK:
3560		return IB_WC_SUCCESS;
3561	default:
3562		return IB_WC_GENERAL_ERR;
3563	}
3564}
3565
3566static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3567					  struct ib_wc *wc)
3568{
3569	wc->status = IB_WC_SUCCESS;
3570	wc->byte_len = le32_to_cpu(resp->length);
3571
3572	if (resp->flags & QEDR_RESP_IMM) {
3573		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
3574		wc->wc_flags |= IB_WC_WITH_IMM;
3575
3576		if (resp->flags & QEDR_RESP_RDMA)
3577			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3578
3579		if (resp->flags & QEDR_RESP_INV)
3580			return -EINVAL;
3581
3582	} else if (resp->flags & QEDR_RESP_INV) {
3583		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3584		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3585
3586		if (resp->flags & QEDR_RESP_RDMA)
3587			return -EINVAL;
3588
3589	} else if (resp->flags & QEDR_RESP_RDMA) {
3590		return -EINVAL;
3591	}
3592
3593	return 0;
3594}
3595
3596static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3597			       struct qedr_cq *cq, struct ib_wc *wc,
3598			       struct rdma_cqe_responder *resp, u64 wr_id)
3599{
3600	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3601	wc->opcode = IB_WC_RECV;
3602	wc->wc_flags = 0;
3603
3604	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3605		if (qedr_set_ok_cqe_resp_wc(resp, wc))
3606			DP_ERR(dev,
3607			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3608			       cq, cq->icid, resp->flags);
3609
3610	} else {
3611		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3612		if (wc->status == IB_WC_GENERAL_ERR)
3613			DP_ERR(dev,
3614			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3615			       cq, cq->icid, resp->status);
3616	}
3617
3618	/* Fill the rest of the WC */
3619	wc->vendor_err = 0;
3620	wc->src_qp = qp->id;
3621	wc->qp = &qp->ibqp;
3622	wc->wr_id = wr_id;
3623}
3624
3625static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3626			    struct qedr_cq *cq, struct ib_wc *wc,
3627			    struct rdma_cqe_responder *resp)
3628{
3629	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3630
3631	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3632
3633	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3634		qed_chain_consume(&qp->rq.pbl);
3635	qedr_inc_sw_cons(&qp->rq);
3636
3637	return 1;
3638}
3639
3640static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3641			      int num_entries, struct ib_wc *wc, u16 hw_cons)
3642{
3643	u16 cnt = 0;
3644
3645	while (num_entries && qp->rq.wqe_cons != hw_cons) {
3646		/* fill WC */
3647		wc->status = IB_WC_WR_FLUSH_ERR;
3648		wc->vendor_err = 0;
3649		wc->wc_flags = 0;
3650		wc->src_qp = qp->id;
3651		wc->byte_len = 0;
3652		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3653		wc->qp = &qp->ibqp;
3654		num_entries--;
3655		wc++;
3656		cnt++;
3657		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3658			qed_chain_consume(&qp->rq.pbl);
3659		qedr_inc_sw_cons(&qp->rq);
3660	}
3661
3662	return cnt;
3663}
3664
3665static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3666				 struct rdma_cqe_responder *resp, int *update)
3667{
3668	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
3669		consume_cqe(cq);
3670		*update |= 1;
3671	}
3672}
3673
3674static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
3675			     struct qedr_cq *cq, int num_entries,
3676			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
3677			     int *update)
3678{
3679	int cnt;
3680
3681	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3682		cnt = process_resp_flush(qp, cq, num_entries, wc,
3683					 resp->rq_cons_or_srq_id);
3684		try_consume_resp_cqe(cq, qp, resp, update);
3685	} else {
3686		cnt = process_resp_one(dev, qp, cq, wc, resp);
3687		consume_cqe(cq);
3688		*update |= 1;
3689	}
3690
3691	return cnt;
3692}
3693
3694static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3695				struct rdma_cqe_requester *req, int *update)
3696{
3697	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
3698		consume_cqe(cq);
3699		*update |= 1;
3700	}
3701}
3702
3703int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
3704{
3705	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
3706	struct qedr_cq *cq = get_qedr_cq(ibcq);
3707	union rdma_cqe *cqe;
3708	u32 old_cons, new_cons;
3709	unsigned long flags;
3710	int update = 0;
3711	int done = 0;
3712
3713	if (cq->destroyed) {
3714		DP_ERR(dev,
3715		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
3716		       cq, cq->icid);
3717		return 0;
3718	}
3719
3720	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
3721		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
3722
3723	spin_lock_irqsave(&cq->cq_lock, flags);
3724	cqe = cq->latest_cqe;
3725	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3726	while (num_entries && is_valid_cqe(cq, cqe)) {
3727		struct qedr_qp *qp;
3728		int cnt = 0;
3729
3730		/* prevent speculative reads of any field of CQE */
3731		rmb();
3732
3733		qp = cqe_get_qp(cqe);
3734		if (!qp) {
3735			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
3736			break;
3737		}
3738
3739		wc->qp = &qp->ibqp;
3740
3741		switch (cqe_get_type(cqe)) {
3742		case RDMA_CQE_TYPE_REQUESTER:
3743			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
3744					       &cqe->req);
3745			try_consume_req_cqe(cq, qp, &cqe->req, &update);
3746			break;
3747		case RDMA_CQE_TYPE_RESPONDER_RQ:
3748			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
3749						&cqe->resp, &update);
3750			break;
3751		case RDMA_CQE_TYPE_INVALID:
3752		default:
3753			DP_ERR(dev, "Error: invalid CQE type = %d\n",
3754			       cqe_get_type(cqe));
3755		}
3756		num_entries -= cnt;
3757		wc += cnt;
3758		done += cnt;
3759
3760		cqe = get_cqe(cq);
3761	}
3762	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3763
3764	cq->cq_cons += new_cons - old_cons;
3765
3766	if (update)
3767		/* doorbell notifies abount latest VALID entry,
3768		 * but chain already point to the next INVALID one
3769		 */
3770		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
3771
3772	spin_unlock_irqrestore(&cq->cq_lock, flags);
3773	return done;
3774}
3775
3776int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
3777		     u8 port_num,
3778		     const struct ib_wc *in_wc,
3779		     const struct ib_grh *in_grh,
3780		     const struct ib_mad_hdr *mad_hdr,
3781		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
3782		     size_t *out_mad_size, u16 *out_mad_pkey_index)
3783{
3784	struct qedr_dev *dev = get_qedr_dev(ibdev);
3785
3786	DP_DEBUG(dev, QEDR_MSG_GSI,
3787		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
3788		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
3789		 mad_hdr->class_specific, mad_hdr->class_version,
3790		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
3791	return IB_MAD_RESULT_SUCCESS;
3792}