Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright(c) 2016 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47
  48#include <linux/hash.h>
  49#include <linux/bitops.h>
  50#include <linux/lockdep.h>
  51#include <linux/vmalloc.h>
  52#include <linux/slab.h>
  53#include <rdma/ib_verbs.h>
  54#include "qp.h"
  55#include "vt.h"
  56#include "trace.h"
  57
  58/*
  59 * Note that it is OK to post send work requests in the SQE and ERR
  60 * states; rvt_do_send() will process them and generate error
  61 * completions as per IB 1.2 C10-96.
  62 */
  63const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
  64	[IB_QPS_RESET] = 0,
  65	[IB_QPS_INIT] = RVT_POST_RECV_OK,
  66	[IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK,
  67	[IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
  68	    RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK |
  69	    RVT_PROCESS_NEXT_SEND_OK,
  70	[IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
  71	    RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK,
  72	[IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
  73	    RVT_POST_SEND_OK | RVT_FLUSH_SEND,
  74	[IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV |
  75	    RVT_POST_SEND_OK | RVT_FLUSH_SEND,
  76};
  77EXPORT_SYMBOL(ib_rvt_state_ops);
  78
  79/*
  80 * Translate ib_wr_opcode into ib_wc_opcode.
  81 */
  82const enum ib_wc_opcode ib_rvt_wc_opcode[] = {
  83	[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
  84	[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
  85	[IB_WR_SEND] = IB_WC_SEND,
  86	[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
  87	[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
  88	[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
  89	[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
  90	[IB_WR_SEND_WITH_INV] = IB_WC_SEND,
  91	[IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
  92	[IB_WR_REG_MR] = IB_WC_REG_MR
  93};
  94EXPORT_SYMBOL(ib_rvt_wc_opcode);
  95
  96static void get_map_page(struct rvt_qpn_table *qpt,
  97			 struct rvt_qpn_map *map,
  98			 gfp_t gfp)
  99{
 100	unsigned long page = get_zeroed_page(gfp);
 101
 102	/*
 103	 * Free the page if someone raced with us installing it.
 104	 */
 105
 106	spin_lock(&qpt->lock);
 107	if (map->page)
 108		free_page(page);
 109	else
 110		map->page = (void *)page;
 111	spin_unlock(&qpt->lock);
 112}
 113
 114/**
 115 * init_qpn_table - initialize the QP number table for a device
 116 * @qpt: the QPN table
 117 */
 118static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt)
 119{
 120	u32 offset, i;
 121	struct rvt_qpn_map *map;
 122	int ret = 0;
 123
 124	if (!(rdi->dparms.qpn_res_end >= rdi->dparms.qpn_res_start))
 125		return -EINVAL;
 126
 127	spin_lock_init(&qpt->lock);
 128
 129	qpt->last = rdi->dparms.qpn_start;
 130	qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift;
 131
 132	/*
 133	 * Drivers may want some QPs beyond what we need for verbs let them use
 134	 * our qpn table. No need for two. Lets go ahead and mark the bitmaps
 135	 * for those. The reserved range must be *after* the range which verbs
 136	 * will pick from.
 137	 */
 138
 139	/* Figure out number of bit maps needed before reserved range */
 140	qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE;
 141
 142	/* This should always be zero */
 143	offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK;
 144
 145	/* Starting with the first reserved bit map */
 146	map = &qpt->map[qpt->nmaps];
 147
 148	rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n",
 149		    rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end);
 150	for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) {
 151		if (!map->page) {
 152			get_map_page(qpt, map, GFP_KERNEL);
 153			if (!map->page) {
 154				ret = -ENOMEM;
 155				break;
 156			}
 157		}
 158		set_bit(offset, map->page);
 159		offset++;
 160		if (offset == RVT_BITS_PER_PAGE) {
 161			/* next page */
 162			qpt->nmaps++;
 163			map++;
 164			offset = 0;
 165		}
 166	}
 167	return ret;
 168}
 169
 170/**
 171 * free_qpn_table - free the QP number table for a device
 172 * @qpt: the QPN table
 173 */
 174static void free_qpn_table(struct rvt_qpn_table *qpt)
 175{
 176	int i;
 177
 178	for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
 179		free_page((unsigned long)qpt->map[i].page);
 180}
 181
 182/**
 183 * rvt_driver_qp_init - Init driver qp resources
 184 * @rdi: rvt dev strucutre
 185 *
 186 * Return: 0 on success
 187 */
 188int rvt_driver_qp_init(struct rvt_dev_info *rdi)
 189{
 190	int i;
 191	int ret = -ENOMEM;
 192
 193	if (!rdi->dparms.qp_table_size)
 194		return -EINVAL;
 195
 196	/*
 197	 * If driver is not doing any QP allocation then make sure it is
 198	 * providing the necessary QP functions.
 199	 */
 200	if (!rdi->driver_f.free_all_qps ||
 201	    !rdi->driver_f.qp_priv_alloc ||
 202	    !rdi->driver_f.qp_priv_free ||
 203	    !rdi->driver_f.notify_qp_reset)
 204		return -EINVAL;
 205
 206	/* allocate parent object */
 207	rdi->qp_dev = kzalloc_node(sizeof(*rdi->qp_dev), GFP_KERNEL,
 208				   rdi->dparms.node);
 209	if (!rdi->qp_dev)
 210		return -ENOMEM;
 211
 212	/* allocate hash table */
 213	rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size;
 214	rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size);
 215	rdi->qp_dev->qp_table =
 216		kmalloc_node(rdi->qp_dev->qp_table_size *
 217			     sizeof(*rdi->qp_dev->qp_table),
 218			     GFP_KERNEL, rdi->dparms.node);
 219	if (!rdi->qp_dev->qp_table)
 220		goto no_qp_table;
 221
 222	for (i = 0; i < rdi->qp_dev->qp_table_size; i++)
 223		RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL);
 224
 225	spin_lock_init(&rdi->qp_dev->qpt_lock);
 226
 227	/* initialize qpn map */
 228	if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table))
 229		goto fail_table;
 230
 231	spin_lock_init(&rdi->n_qps_lock);
 232
 233	return 0;
 234
 235fail_table:
 236	kfree(rdi->qp_dev->qp_table);
 237	free_qpn_table(&rdi->qp_dev->qpn_table);
 238
 239no_qp_table:
 240	kfree(rdi->qp_dev);
 241
 242	return ret;
 243}
 244
 245/**
 246 * free_all_qps - check for QPs still in use
 247 * @qpt: the QP table to empty
 248 *
 249 * There should not be any QPs still in use.
 250 * Free memory for table.
 251 */
 252static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
 253{
 254	unsigned long flags;
 255	struct rvt_qp *qp;
 256	unsigned n, qp_inuse = 0;
 257	spinlock_t *ql; /* work around too long line below */
 258
 259	if (rdi->driver_f.free_all_qps)
 260		qp_inuse = rdi->driver_f.free_all_qps(rdi);
 261
 262	qp_inuse += rvt_mcast_tree_empty(rdi);
 263
 264	if (!rdi->qp_dev)
 265		return qp_inuse;
 266
 267	ql = &rdi->qp_dev->qpt_lock;
 268	spin_lock_irqsave(ql, flags);
 269	for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
 270		qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
 271					       lockdep_is_held(ql));
 272		RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
 273
 274		for (; qp; qp = rcu_dereference_protected(qp->next,
 275							  lockdep_is_held(ql)))
 276			qp_inuse++;
 277	}
 278	spin_unlock_irqrestore(ql, flags);
 279	synchronize_rcu();
 280	return qp_inuse;
 281}
 282
 283/**
 284 * rvt_qp_exit - clean up qps on device exit
 285 * @rdi: rvt dev structure
 286 *
 287 * Check for qp leaks and free resources.
 288 */
 289void rvt_qp_exit(struct rvt_dev_info *rdi)
 290{
 291	u32 qps_inuse = rvt_free_all_qps(rdi);
 292
 293	if (qps_inuse)
 294		rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
 295			   qps_inuse);
 296	if (!rdi->qp_dev)
 297		return;
 298
 299	kfree(rdi->qp_dev->qp_table);
 300	free_qpn_table(&rdi->qp_dev->qpn_table);
 301	kfree(rdi->qp_dev);
 302}
 303
 304static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
 305			      struct rvt_qpn_map *map, unsigned off)
 306{
 307	return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
 308}
 309
 310/**
 311 * alloc_qpn - Allocate the next available qpn or zero/one for QP type
 312 *	       IB_QPT_SMI/IB_QPT_GSI
 313 *@rdi:	rvt device info structure
 314 *@qpt: queue pair number table pointer
 315 *@port_num: IB port number, 1 based, comes from core
 316 *
 317 * Return: The queue pair number
 318 */
 319static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
 320		     enum ib_qp_type type, u8 port_num, gfp_t gfp)
 321{
 322	u32 i, offset, max_scan, qpn;
 323	struct rvt_qpn_map *map;
 324	u32 ret;
 325
 326	if (rdi->driver_f.alloc_qpn)
 327		return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, gfp);
 328
 329	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
 330		unsigned n;
 331
 332		ret = type == IB_QPT_GSI;
 333		n = 1 << (ret + 2 * (port_num - 1));
 334		spin_lock(&qpt->lock);
 335		if (qpt->flags & n)
 336			ret = -EINVAL;
 337		else
 338			qpt->flags |= n;
 339		spin_unlock(&qpt->lock);
 340		goto bail;
 341	}
 342
 343	qpn = qpt->last + qpt->incr;
 344	if (qpn >= RVT_QPN_MAX)
 345		qpn = qpt->incr | ((qpt->last & 1) ^ 1);
 346	/* offset carries bit 0 */
 347	offset = qpn & RVT_BITS_PER_PAGE_MASK;
 348	map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
 349	max_scan = qpt->nmaps - !offset;
 350	for (i = 0;;) {
 351		if (unlikely(!map->page)) {
 352			get_map_page(qpt, map, gfp);
 353			if (unlikely(!map->page))
 354				break;
 355		}
 356		do {
 357			if (!test_and_set_bit(offset, map->page)) {
 358				qpt->last = qpn;
 359				ret = qpn;
 360				goto bail;
 361			}
 362			offset += qpt->incr;
 363			/*
 364			 * This qpn might be bogus if offset >= BITS_PER_PAGE.
 365			 * That is OK.   It gets re-assigned below
 366			 */
 367			qpn = mk_qpn(qpt, map, offset);
 368		} while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
 369		/*
 370		 * In order to keep the number of pages allocated to a
 371		 * minimum, we scan the all existing pages before increasing
 372		 * the size of the bitmap table.
 373		 */
 374		if (++i > max_scan) {
 375			if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
 376				break;
 377			map = &qpt->map[qpt->nmaps++];
 378			/* start at incr with current bit 0 */
 379			offset = qpt->incr | (offset & 1);
 380		} else if (map < &qpt->map[qpt->nmaps]) {
 381			++map;
 382			/* start at incr with current bit 0 */
 383			offset = qpt->incr | (offset & 1);
 384		} else {
 385			map = &qpt->map[0];
 386			/* wrap to first map page, invert bit 0 */
 387			offset = qpt->incr | ((offset & 1) ^ 1);
 388		}
 389		/* there can be no set bits in low-order QoS bits */
 390		WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1));
 391		qpn = mk_qpn(qpt, map, offset);
 392	}
 393
 394	ret = -ENOMEM;
 395
 396bail:
 397	return ret;
 398}
 399
 400static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
 401{
 402	struct rvt_qpn_map *map;
 403
 404	map = qpt->map + qpn / RVT_BITS_PER_PAGE;
 405	if (map->page)
 406		clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
 407}
 408
 409/**
 410 * rvt_clear_mr_refs - Drop help mr refs
 411 * @qp: rvt qp data structure
 412 * @clr_sends: If shoudl clear send side or not
 413 */
 414static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
 415{
 416	unsigned n;
 417	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
 418
 419	if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
 420		rvt_put_ss(&qp->s_rdma_read_sge);
 421
 422	rvt_put_ss(&qp->r_sge);
 423
 424	if (clr_sends) {
 425		while (qp->s_last != qp->s_head) {
 426			struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 427			unsigned i;
 428
 429			for (i = 0; i < wqe->wr.num_sge; i++) {
 430				struct rvt_sge *sge = &wqe->sg_list[i];
 431
 432				rvt_put_mr(sge->mr);
 433			}
 434			if (qp->ibqp.qp_type == IB_QPT_UD ||
 435			    qp->ibqp.qp_type == IB_QPT_SMI ||
 436			    qp->ibqp.qp_type == IB_QPT_GSI)
 437				atomic_dec(&ibah_to_rvtah(
 438						wqe->ud_wr.ah)->refcount);
 439			if (++qp->s_last >= qp->s_size)
 440				qp->s_last = 0;
 441			smp_wmb(); /* see qp_set_savail */
 442		}
 443		if (qp->s_rdma_mr) {
 444			rvt_put_mr(qp->s_rdma_mr);
 445			qp->s_rdma_mr = NULL;
 446		}
 447	}
 448
 449	if (qp->ibqp.qp_type != IB_QPT_RC)
 450		return;
 451
 452	for (n = 0; n < rvt_max_atomic(rdi); n++) {
 453		struct rvt_ack_entry *e = &qp->s_ack_queue[n];
 454
 455		if (e->rdma_sge.mr) {
 456			rvt_put_mr(e->rdma_sge.mr);
 457			e->rdma_sge.mr = NULL;
 458		}
 459	}
 460}
 461
 462/**
 463 * rvt_remove_qp - remove qp form table
 464 * @rdi: rvt dev struct
 465 * @qp: qp to remove
 466 *
 467 * Remove the QP from the table so it can't be found asynchronously by
 468 * the receive routine.
 469 */
 470static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 471{
 472	struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
 473	u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
 474	unsigned long flags;
 475	int removed = 1;
 476
 477	spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
 478
 479	if (rcu_dereference_protected(rvp->qp[0],
 480			lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
 481		RCU_INIT_POINTER(rvp->qp[0], NULL);
 482	} else if (rcu_dereference_protected(rvp->qp[1],
 483			lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
 484		RCU_INIT_POINTER(rvp->qp[1], NULL);
 485	} else {
 486		struct rvt_qp *q;
 487		struct rvt_qp __rcu **qpp;
 488
 489		removed = 0;
 490		qpp = &rdi->qp_dev->qp_table[n];
 491		for (; (q = rcu_dereference_protected(*qpp,
 492			lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL;
 493			qpp = &q->next) {
 494			if (q == qp) {
 495				RCU_INIT_POINTER(*qpp,
 496				     rcu_dereference_protected(qp->next,
 497				     lockdep_is_held(&rdi->qp_dev->qpt_lock)));
 498				removed = 1;
 499				trace_rvt_qpremove(qp, n);
 500				break;
 501			}
 502		}
 503	}
 504
 505	spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
 506	if (removed) {
 507		synchronize_rcu();
 508		rvt_put_qp(qp);
 509	}
 510}
 511
 512/**
 513 * rvt_init_qp - initialize the QP state to the reset state
 514 * @qp: the QP to init or reinit
 515 * @type: the QP type
 516 *
 517 * This function is called from both rvt_create_qp() and
 518 * rvt_reset_qp().   The difference is that the reset
 519 * patch the necessary locks to protect against concurent
 520 * access.
 521 */
 522static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 523			enum ib_qp_type type)
 524{
 525	qp->remote_qpn = 0;
 526	qp->qkey = 0;
 527	qp->qp_access_flags = 0;
 528	qp->s_flags &= RVT_S_SIGNAL_REQ_WR;
 529	qp->s_hdrwords = 0;
 530	qp->s_wqe = NULL;
 531	qp->s_draining = 0;
 532	qp->s_next_psn = 0;
 533	qp->s_last_psn = 0;
 534	qp->s_sending_psn = 0;
 535	qp->s_sending_hpsn = 0;
 536	qp->s_psn = 0;
 537	qp->r_psn = 0;
 538	qp->r_msn = 0;
 539	if (type == IB_QPT_RC) {
 540		qp->s_state = IB_OPCODE_RC_SEND_LAST;
 541		qp->r_state = IB_OPCODE_RC_SEND_LAST;
 542	} else {
 543		qp->s_state = IB_OPCODE_UC_SEND_LAST;
 544		qp->r_state = IB_OPCODE_UC_SEND_LAST;
 545	}
 546	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 547	qp->r_nak_state = 0;
 548	qp->r_aflags = 0;
 549	qp->r_flags = 0;
 550	qp->s_head = 0;
 551	qp->s_tail = 0;
 552	qp->s_cur = 0;
 553	qp->s_acked = 0;
 554	qp->s_last = 0;
 555	qp->s_ssn = 1;
 556	qp->s_lsn = 0;
 557	qp->s_mig_state = IB_MIG_MIGRATED;
 558	qp->r_head_ack_queue = 0;
 559	qp->s_tail_ack_queue = 0;
 560	qp->s_num_rd_atomic = 0;
 561	if (qp->r_rq.wq) {
 562		qp->r_rq.wq->head = 0;
 563		qp->r_rq.wq->tail = 0;
 564	}
 565	qp->r_sge.num_sge = 0;
 566	atomic_set(&qp->s_reserved_used, 0);
 567}
 568
 569/**
 570 * rvt_reset_qp - initialize the QP state to the reset state
 571 * @qp: the QP to reset
 572 * @type: the QP type
 573 *
 574 * r_lock, s_hlock, and s_lock are required to be held by the caller
 575 */
 576static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 577			 enum ib_qp_type type)
 578	__must_hold(&qp->s_lock)
 579	__must_hold(&qp->s_hlock)
 580	__must_hold(&qp->r_lock)
 581{
 582	lockdep_assert_held(&qp->r_lock);
 583	lockdep_assert_held(&qp->s_hlock);
 584	lockdep_assert_held(&qp->s_lock);
 585	if (qp->state != IB_QPS_RESET) {
 586		qp->state = IB_QPS_RESET;
 587
 588		/* Let drivers flush their waitlist */
 589		rdi->driver_f.flush_qp_waiters(qp);
 590		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
 591		spin_unlock(&qp->s_lock);
 592		spin_unlock(&qp->s_hlock);
 593		spin_unlock_irq(&qp->r_lock);
 594
 595		/* Stop the send queue and the retry timer */
 596		rdi->driver_f.stop_send_queue(qp);
 597
 598		/* Wait for things to stop */
 599		rdi->driver_f.quiesce_qp(qp);
 600
 601		/* take qp out the hash and wait for it to be unused */
 602		rvt_remove_qp(rdi, qp);
 603		wait_event(qp->wait, !atomic_read(&qp->refcount));
 604
 605		/* grab the lock b/c it was locked at call time */
 606		spin_lock_irq(&qp->r_lock);
 607		spin_lock(&qp->s_hlock);
 608		spin_lock(&qp->s_lock);
 609
 610		rvt_clear_mr_refs(qp, 1);
 611		/*
 612		 * Let the driver do any tear down or re-init it needs to for
 613		 * a qp that has been reset
 614		 */
 615		rdi->driver_f.notify_qp_reset(qp);
 616	}
 617	rvt_init_qp(rdi, qp, type);
 618	lockdep_assert_held(&qp->r_lock);
 619	lockdep_assert_held(&qp->s_hlock);
 620	lockdep_assert_held(&qp->s_lock);
 621}
 622
 623/**
 624 * rvt_create_qp - create a queue pair for a device
 625 * @ibpd: the protection domain who's device we create the queue pair for
 626 * @init_attr: the attributes of the queue pair
 627 * @udata: user data for libibverbs.so
 628 *
 629 * Queue pair creation is mostly an rvt issue. However, drivers have their own
 630 * unique idea of what queue pair numbers mean. For instance there is a reserved
 631 * range for PSM.
 632 *
 633 * Return: the queue pair on success, otherwise returns an errno.
 634 *
 635 * Called by the ib_create_qp() core verbs function.
 636 */
 637struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 638			    struct ib_qp_init_attr *init_attr,
 639			    struct ib_udata *udata)
 640{
 641	struct rvt_qp *qp;
 642	int err;
 643	struct rvt_swqe *swq = NULL;
 644	size_t sz;
 645	size_t sg_list_sz;
 646	struct ib_qp *ret = ERR_PTR(-ENOMEM);
 647	struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
 648	void *priv = NULL;
 649	gfp_t gfp;
 650	size_t sqsize;
 651
 652	if (!rdi)
 653		return ERR_PTR(-EINVAL);
 654
 655	if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
 656	    init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
 657	    init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
 658		return ERR_PTR(-EINVAL);
 659
 660	/* GFP_NOIO is applicable to RC QP's only */
 661
 662	if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
 663	    init_attr->qp_type != IB_QPT_RC)
 664		return ERR_PTR(-EINVAL);
 665
 666	gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
 667						GFP_NOIO : GFP_KERNEL;
 668
 669	/* Check receive queue parameters if no SRQ is specified. */
 670	if (!init_attr->srq) {
 671		if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
 672		    init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
 673			return ERR_PTR(-EINVAL);
 674
 675		if (init_attr->cap.max_send_sge +
 676		    init_attr->cap.max_send_wr +
 677		    init_attr->cap.max_recv_sge +
 678		    init_attr->cap.max_recv_wr == 0)
 679			return ERR_PTR(-EINVAL);
 680	}
 681	sqsize =
 682		init_attr->cap.max_send_wr + 1 +
 683		rdi->dparms.reserved_operations;
 684	switch (init_attr->qp_type) {
 685	case IB_QPT_SMI:
 686	case IB_QPT_GSI:
 687		if (init_attr->port_num == 0 ||
 688		    init_attr->port_num > ibpd->device->phys_port_cnt)
 689			return ERR_PTR(-EINVAL);
 690	case IB_QPT_UC:
 691	case IB_QPT_RC:
 692	case IB_QPT_UD:
 693		sz = sizeof(struct rvt_sge) *
 694			init_attr->cap.max_send_sge +
 695			sizeof(struct rvt_swqe);
 696		if (gfp == GFP_NOIO)
 697			swq = __vmalloc(
 698				sqsize * sz,
 699				gfp | __GFP_ZERO, PAGE_KERNEL);
 700		else
 701			swq = vzalloc_node(
 702				sqsize * sz,
 703				rdi->dparms.node);
 704		if (!swq)
 705			return ERR_PTR(-ENOMEM);
 706
 707		sz = sizeof(*qp);
 708		sg_list_sz = 0;
 709		if (init_attr->srq) {
 710			struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);
 711
 712			if (srq->rq.max_sge > 1)
 713				sg_list_sz = sizeof(*qp->r_sg_list) *
 714					(srq->rq.max_sge - 1);
 715		} else if (init_attr->cap.max_recv_sge > 1)
 716			sg_list_sz = sizeof(*qp->r_sg_list) *
 717				(init_attr->cap.max_recv_sge - 1);
 718		qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node);
 719		if (!qp)
 720			goto bail_swq;
 721
 722		RCU_INIT_POINTER(qp->next, NULL);
 723		if (init_attr->qp_type == IB_QPT_RC) {
 724			qp->s_ack_queue =
 725				kzalloc_node(
 726					sizeof(*qp->s_ack_queue) *
 727					 rvt_max_atomic(rdi),
 728					gfp,
 729					rdi->dparms.node);
 730			if (!qp->s_ack_queue)
 731				goto bail_qp;
 732		}
 733
 734		/*
 735		 * Driver needs to set up it's private QP structure and do any
 736		 * initialization that is needed.
 737		 */
 738		priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
 739		if (IS_ERR(priv)) {
 740			ret = priv;
 741			goto bail_qp;
 742		}
 743		qp->priv = priv;
 744		qp->timeout_jiffies =
 745			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
 746				1000UL);
 747		if (init_attr->srq) {
 748			sz = 0;
 749		} else {
 750			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
 751			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
 752			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
 753				sizeof(struct rvt_rwqe);
 754			if (udata)
 755				qp->r_rq.wq = vmalloc_user(
 756						sizeof(struct rvt_rwq) +
 757						qp->r_rq.size * sz);
 758			else if (gfp == GFP_NOIO)
 759				qp->r_rq.wq = __vmalloc(
 760						sizeof(struct rvt_rwq) +
 761						qp->r_rq.size * sz,
 762						gfp | __GFP_ZERO, PAGE_KERNEL);
 763			else
 764				qp->r_rq.wq = vzalloc_node(
 765						sizeof(struct rvt_rwq) +
 766						qp->r_rq.size * sz,
 767						rdi->dparms.node);
 768			if (!qp->r_rq.wq)
 769				goto bail_driver_priv;
 770		}
 771
 772		/*
 773		 * ib_create_qp() will initialize qp->ibqp
 774		 * except for qp->ibqp.qp_num.
 775		 */
 776		spin_lock_init(&qp->r_lock);
 777		spin_lock_init(&qp->s_hlock);
 778		spin_lock_init(&qp->s_lock);
 779		spin_lock_init(&qp->r_rq.lock);
 780		atomic_set(&qp->refcount, 0);
 781		atomic_set(&qp->local_ops_pending, 0);
 782		init_waitqueue_head(&qp->wait);
 783		init_timer(&qp->s_timer);
 784		qp->s_timer.data = (unsigned long)qp;
 785		INIT_LIST_HEAD(&qp->rspwait);
 786		qp->state = IB_QPS_RESET;
 787		qp->s_wq = swq;
 788		qp->s_size = sqsize;
 789		qp->s_avail = init_attr->cap.max_send_wr;
 790		qp->s_max_sge = init_attr->cap.max_send_sge;
 791		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
 792			qp->s_flags = RVT_S_SIGNAL_REQ_WR;
 793
 794		err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
 795				init_attr->qp_type,
 796				init_attr->port_num, gfp);
 797		if (err < 0) {
 798			ret = ERR_PTR(err);
 799			goto bail_rq_wq;
 800		}
 801		qp->ibqp.qp_num = err;
 802		qp->port_num = init_attr->port_num;
 803		rvt_init_qp(rdi, qp, init_attr->qp_type);
 804		break;
 805
 806	default:
 807		/* Don't support raw QPs */
 808		return ERR_PTR(-EINVAL);
 809	}
 810
 811	init_attr->cap.max_inline_data = 0;
 812
 813	/*
 814	 * Return the address of the RWQ as the offset to mmap.
 815	 * See rvt_mmap() for details.
 816	 */
 817	if (udata && udata->outlen >= sizeof(__u64)) {
 818		if (!qp->r_rq.wq) {
 819			__u64 offset = 0;
 820
 821			err = ib_copy_to_udata(udata, &offset,
 822					       sizeof(offset));
 823			if (err) {
 824				ret = ERR_PTR(err);
 825				goto bail_qpn;
 826			}
 827		} else {
 828			u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
 829
 830			qp->ip = rvt_create_mmap_info(rdi, s,
 831						      ibpd->uobject->context,
 832						      qp->r_rq.wq);
 833			if (!qp->ip) {
 834				ret = ERR_PTR(-ENOMEM);
 835				goto bail_qpn;
 836			}
 837
 838			err = ib_copy_to_udata(udata, &qp->ip->offset,
 839					       sizeof(qp->ip->offset));
 840			if (err) {
 841				ret = ERR_PTR(err);
 842				goto bail_ip;
 843			}
 844		}
 845		qp->pid = current->pid;
 846	}
 847
 848	spin_lock(&rdi->n_qps_lock);
 849	if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
 850		spin_unlock(&rdi->n_qps_lock);
 851		ret = ERR_PTR(-ENOMEM);
 852		goto bail_ip;
 853	}
 854
 855	rdi->n_qps_allocated++;
 856	/*
 857	 * Maintain a busy_jiffies variable that will be added to the timeout
 858	 * period in mod_retry_timer and add_retry_timer. This busy jiffies
 859	 * is scaled by the number of rc qps created for the device to reduce
 860	 * the number of timeouts occurring when there is a large number of
 861	 * qps. busy_jiffies is incremented every rc qp scaling interval.
 862	 * The scaling interval is selected based on extensive performance
 863	 * evaluation of targeted workloads.
 864	 */
 865	if (init_attr->qp_type == IB_QPT_RC) {
 866		rdi->n_rc_qps++;
 867		rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
 868	}
 869	spin_unlock(&rdi->n_qps_lock);
 870
 871	if (qp->ip) {
 872		spin_lock_irq(&rdi->pending_lock);
 873		list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps);
 874		spin_unlock_irq(&rdi->pending_lock);
 875	}
 876
 877	ret = &qp->ibqp;
 878
 879	/*
 880	 * We have our QP and its good, now keep track of what types of opcodes
 881	 * can be processed on this QP. We do this by keeping track of what the
 882	 * 3 high order bits of the opcode are.
 883	 */
 884	switch (init_attr->qp_type) {
 885	case IB_QPT_SMI:
 886	case IB_QPT_GSI:
 887	case IB_QPT_UD:
 888		qp->allowed_ops = IB_OPCODE_UD;
 889		break;
 890	case IB_QPT_RC:
 891		qp->allowed_ops = IB_OPCODE_RC;
 892		break;
 893	case IB_QPT_UC:
 894		qp->allowed_ops = IB_OPCODE_UC;
 895		break;
 896	default:
 897		ret = ERR_PTR(-EINVAL);
 898		goto bail_ip;
 899	}
 900
 901	return ret;
 902
 903bail_ip:
 904	if (qp->ip)
 905		kref_put(&qp->ip->ref, rvt_release_mmap_info);
 906
 907bail_qpn:
 908	free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
 909
 910bail_rq_wq:
 911	if (!qp->ip)
 912		vfree(qp->r_rq.wq);
 913
 914bail_driver_priv:
 915	rdi->driver_f.qp_priv_free(rdi, qp);
 916
 917bail_qp:
 918	kfree(qp->s_ack_queue);
 919	kfree(qp);
 920
 921bail_swq:
 922	vfree(swq);
 923
 924	return ret;
 925}
 926
 927/**
 928 * rvt_error_qp - put a QP into the error state
 929 * @qp: the QP to put into the error state
 930 * @err: the receive completion error to signal if a RWQE is active
 931 *
 932 * Flushes both send and receive work queues.
 933 *
 934 * Return: true if last WQE event should be generated.
 935 * The QP r_lock and s_lock should be held and interrupts disabled.
 936 * If we are already in error state, just return.
 937 */
 938int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
 939{
 940	struct ib_wc wc;
 941	int ret = 0;
 942	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
 943
 944	lockdep_assert_held(&qp->r_lock);
 945	lockdep_assert_held(&qp->s_lock);
 946	if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
 947		goto bail;
 948
 949	qp->state = IB_QPS_ERR;
 950
 951	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
 952		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
 953		del_timer(&qp->s_timer);
 954	}
 955
 956	if (qp->s_flags & RVT_S_ANY_WAIT_SEND)
 957		qp->s_flags &= ~RVT_S_ANY_WAIT_SEND;
 958
 959	rdi->driver_f.notify_error_qp(qp);
 960
 961	/* Schedule the sending tasklet to drain the send work queue. */
 962	if (ACCESS_ONCE(qp->s_last) != qp->s_head)
 963		rdi->driver_f.schedule_send(qp);
 964
 965	rvt_clear_mr_refs(qp, 0);
 966
 967	memset(&wc, 0, sizeof(wc));
 968	wc.qp = &qp->ibqp;
 969	wc.opcode = IB_WC_RECV;
 970
 971	if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) {
 972		wc.wr_id = qp->r_wr_id;
 973		wc.status = err;
 974		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
 975	}
 976	wc.status = IB_WC_WR_FLUSH_ERR;
 977
 978	if (qp->r_rq.wq) {
 979		struct rvt_rwq *wq;
 980		u32 head;
 981		u32 tail;
 982
 983		spin_lock(&qp->r_rq.lock);
 984
 985		/* sanity check pointers before trusting them */
 986		wq = qp->r_rq.wq;
 987		head = wq->head;
 988		if (head >= qp->r_rq.size)
 989			head = 0;
 990		tail = wq->tail;
 991		if (tail >= qp->r_rq.size)
 992			tail = 0;
 993		while (tail != head) {
 994			wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
 995			if (++tail >= qp->r_rq.size)
 996				tail = 0;
 997			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
 998		}
 999		wq->tail = tail;
1000
1001		spin_unlock(&qp->r_rq.lock);
1002	} else if (qp->ibqp.event_handler) {
1003		ret = 1;
1004	}
1005
1006bail:
1007	return ret;
1008}
1009EXPORT_SYMBOL(rvt_error_qp);
1010
1011/*
1012 * Put the QP into the hash table.
1013 * The hash table holds a reference to the QP.
1014 */
1015static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
1016{
1017	struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
1018	unsigned long flags;
1019
1020	rvt_get_qp(qp);
1021	spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
1022
1023	if (qp->ibqp.qp_num <= 1) {
1024		rcu_assign_pointer(rvp->qp[qp->ibqp.qp_num], qp);
1025	} else {
1026		u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
1027
1028		qp->next = rdi->qp_dev->qp_table[n];
1029		rcu_assign_pointer(rdi->qp_dev->qp_table[n], qp);
1030		trace_rvt_qpinsert(qp, n);
1031	}
1032
1033	spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
1034}
1035
1036/**
1037 * rvt_modify_qp - modify the attributes of a queue pair
1038 * @ibqp: the queue pair who's attributes we're modifying
1039 * @attr: the new attributes
1040 * @attr_mask: the mask of attributes to modify
1041 * @udata: user data for libibverbs.so
1042 *
1043 * Return: 0 on success, otherwise returns an errno.
1044 */
1045int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1046		  int attr_mask, struct ib_udata *udata)
1047{
1048	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1049	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1050	enum ib_qp_state cur_state, new_state;
1051	struct ib_event ev;
1052	int lastwqe = 0;
1053	int mig = 0;
1054	int pmtu = 0; /* for gcc warning only */
1055	enum rdma_link_layer link;
1056
1057	link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
1058
1059	spin_lock_irq(&qp->r_lock);
1060	spin_lock(&qp->s_hlock);
1061	spin_lock(&qp->s_lock);
1062
1063	cur_state = attr_mask & IB_QP_CUR_STATE ?
1064		attr->cur_qp_state : qp->state;
1065	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1066
1067	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
1068				attr_mask, link))
1069		goto inval;
1070
1071	if (rdi->driver_f.check_modify_qp &&
1072	    rdi->driver_f.check_modify_qp(qp, attr, attr_mask, udata))
1073		goto inval;
1074
1075	if (attr_mask & IB_QP_AV) {
1076		if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
1077			goto inval;
1078		if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr))
1079			goto inval;
1080	}
1081
1082	if (attr_mask & IB_QP_ALT_PATH) {
1083		if (attr->alt_ah_attr.dlid >=
1084		    be16_to_cpu(IB_MULTICAST_LID_BASE))
1085			goto inval;
1086		if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
1087			goto inval;
1088		if (attr->alt_pkey_index >= rvt_get_npkeys(rdi))
1089			goto inval;
1090	}
1091
1092	if (attr_mask & IB_QP_PKEY_INDEX)
1093		if (attr->pkey_index >= rvt_get_npkeys(rdi))
1094			goto inval;
1095
1096	if (attr_mask & IB_QP_MIN_RNR_TIMER)
1097		if (attr->min_rnr_timer > 31)
1098			goto inval;
1099
1100	if (attr_mask & IB_QP_PORT)
1101		if (qp->ibqp.qp_type == IB_QPT_SMI ||
1102		    qp->ibqp.qp_type == IB_QPT_GSI ||
1103		    attr->port_num == 0 ||
1104		    attr->port_num > ibqp->device->phys_port_cnt)
1105			goto inval;
1106
1107	if (attr_mask & IB_QP_DEST_QPN)
1108		if (attr->dest_qp_num > RVT_QPN_MASK)
1109			goto inval;
1110
1111	if (attr_mask & IB_QP_RETRY_CNT)
1112		if (attr->retry_cnt > 7)
1113			goto inval;
1114
1115	if (attr_mask & IB_QP_RNR_RETRY)
1116		if (attr->rnr_retry > 7)
1117			goto inval;
1118
1119	/*
1120	 * Don't allow invalid path_mtu values.  OK to set greater
1121	 * than the active mtu (or even the max_cap, if we have tuned
1122	 * that to a small mtu.  We'll set qp->path_mtu
1123	 * to the lesser of requested attribute mtu and active,
1124	 * for packetizing messages.
1125	 * Note that the QP port has to be set in INIT and MTU in RTR.
1126	 */
1127	if (attr_mask & IB_QP_PATH_MTU) {
1128		pmtu = rdi->driver_f.get_pmtu_from_attr(rdi, qp, attr);
1129		if (pmtu < 0)
1130			goto inval;
1131	}
1132
1133	if (attr_mask & IB_QP_PATH_MIG_STATE) {
1134		if (attr->path_mig_state == IB_MIG_REARM) {
1135			if (qp->s_mig_state == IB_MIG_ARMED)
1136				goto inval;
1137			if (new_state != IB_QPS_RTS)
1138				goto inval;
1139		} else if (attr->path_mig_state == IB_MIG_MIGRATED) {
1140			if (qp->s_mig_state == IB_MIG_REARM)
1141				goto inval;
1142			if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
1143				goto inval;
1144			if (qp->s_mig_state == IB_MIG_ARMED)
1145				mig = 1;
1146		} else {
1147			goto inval;
1148		}
1149	}
1150
1151	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1152		if (attr->max_dest_rd_atomic > rdi->dparms.max_rdma_atomic)
1153			goto inval;
1154
1155	switch (new_state) {
1156	case IB_QPS_RESET:
1157		if (qp->state != IB_QPS_RESET)
1158			rvt_reset_qp(rdi, qp, ibqp->qp_type);
1159		break;
1160
1161	case IB_QPS_RTR:
1162		/* Allow event to re-trigger if QP set to RTR more than once */
1163		qp->r_flags &= ~RVT_R_COMM_EST;
1164		qp->state = new_state;
1165		break;
1166
1167	case IB_QPS_SQD:
1168		qp->s_draining = qp->s_last != qp->s_cur;
1169		qp->state = new_state;
1170		break;
1171
1172	case IB_QPS_SQE:
1173		if (qp->ibqp.qp_type == IB_QPT_RC)
1174			goto inval;
1175		qp->state = new_state;
1176		break;
1177
1178	case IB_QPS_ERR:
1179		lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1180		break;
1181
1182	default:
1183		qp->state = new_state;
1184		break;
1185	}
1186
1187	if (attr_mask & IB_QP_PKEY_INDEX)
1188		qp->s_pkey_index = attr->pkey_index;
1189
1190	if (attr_mask & IB_QP_PORT)
1191		qp->port_num = attr->port_num;
1192
1193	if (attr_mask & IB_QP_DEST_QPN)
1194		qp->remote_qpn = attr->dest_qp_num;
1195
1196	if (attr_mask & IB_QP_SQ_PSN) {
1197		qp->s_next_psn = attr->sq_psn & rdi->dparms.psn_modify_mask;
1198		qp->s_psn = qp->s_next_psn;
1199		qp->s_sending_psn = qp->s_next_psn;
1200		qp->s_last_psn = qp->s_next_psn - 1;
1201		qp->s_sending_hpsn = qp->s_last_psn;
1202	}
1203
1204	if (attr_mask & IB_QP_RQ_PSN)
1205		qp->r_psn = attr->rq_psn & rdi->dparms.psn_modify_mask;
1206
1207	if (attr_mask & IB_QP_ACCESS_FLAGS)
1208		qp->qp_access_flags = attr->qp_access_flags;
1209
1210	if (attr_mask & IB_QP_AV) {
1211		qp->remote_ah_attr = attr->ah_attr;
1212		qp->s_srate = attr->ah_attr.static_rate;
1213		qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
1214	}
1215
1216	if (attr_mask & IB_QP_ALT_PATH) {
1217		qp->alt_ah_attr = attr->alt_ah_attr;
1218		qp->s_alt_pkey_index = attr->alt_pkey_index;
1219	}
1220
1221	if (attr_mask & IB_QP_PATH_MIG_STATE) {
1222		qp->s_mig_state = attr->path_mig_state;
1223		if (mig) {
1224			qp->remote_ah_attr = qp->alt_ah_attr;
1225			qp->port_num = qp->alt_ah_attr.port_num;
1226			qp->s_pkey_index = qp->s_alt_pkey_index;
1227		}
1228	}
1229
1230	if (attr_mask & IB_QP_PATH_MTU) {
1231		qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu);
1232		qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu);
1233		qp->log_pmtu = ilog2(qp->pmtu);
1234	}
1235
1236	if (attr_mask & IB_QP_RETRY_CNT) {
1237		qp->s_retry_cnt = attr->retry_cnt;
1238		qp->s_retry = attr->retry_cnt;
1239	}
1240
1241	if (attr_mask & IB_QP_RNR_RETRY) {
1242		qp->s_rnr_retry_cnt = attr->rnr_retry;
1243		qp->s_rnr_retry = attr->rnr_retry;
1244	}
1245
1246	if (attr_mask & IB_QP_MIN_RNR_TIMER)
1247		qp->r_min_rnr_timer = attr->min_rnr_timer;
1248
1249	if (attr_mask & IB_QP_TIMEOUT) {
1250		qp->timeout = attr->timeout;
1251		qp->timeout_jiffies =
1252			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
1253				1000UL);
1254	}
1255
1256	if (attr_mask & IB_QP_QKEY)
1257		qp->qkey = attr->qkey;
1258
1259	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1260		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
1261
1262	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
1263		qp->s_max_rd_atomic = attr->max_rd_atomic;
1264
1265	if (rdi->driver_f.modify_qp)
1266		rdi->driver_f.modify_qp(qp, attr, attr_mask, udata);
1267
1268	spin_unlock(&qp->s_lock);
1269	spin_unlock(&qp->s_hlock);
1270	spin_unlock_irq(&qp->r_lock);
1271
1272	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1273		rvt_insert_qp(rdi, qp);
1274
1275	if (lastwqe) {
1276		ev.device = qp->ibqp.device;
1277		ev.element.qp = &qp->ibqp;
1278		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1279		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1280	}
1281	if (mig) {
1282		ev.device = qp->ibqp.device;
1283		ev.element.qp = &qp->ibqp;
1284		ev.event = IB_EVENT_PATH_MIG;
1285		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1286	}
1287	return 0;
1288
1289inval:
1290	spin_unlock(&qp->s_lock);
1291	spin_unlock(&qp->s_hlock);
1292	spin_unlock_irq(&qp->r_lock);
1293	return -EINVAL;
1294}
1295
1296/** rvt_free_qpn - Free a qpn from the bit map
1297 * @qpt: QP table
1298 * @qpn: queue pair number to free
1299 */
1300static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
1301{
1302	struct rvt_qpn_map *map;
1303
1304	map = qpt->map + qpn / RVT_BITS_PER_PAGE;
1305	if (map->page)
1306		clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
1307}
1308
1309/**
1310 * rvt_destroy_qp - destroy a queue pair
1311 * @ibqp: the queue pair to destroy
1312 *
1313 * Note that this can be called while the QP is actively sending or
1314 * receiving!
1315 *
1316 * Return: 0 on success.
1317 */
1318int rvt_destroy_qp(struct ib_qp *ibqp)
1319{
1320	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1321	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1322
1323	spin_lock_irq(&qp->r_lock);
1324	spin_lock(&qp->s_hlock);
1325	spin_lock(&qp->s_lock);
1326	rvt_reset_qp(rdi, qp, ibqp->qp_type);
1327	spin_unlock(&qp->s_lock);
1328	spin_unlock(&qp->s_hlock);
1329	spin_unlock_irq(&qp->r_lock);
1330
1331	/* qpn is now available for use again */
1332	rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
1333
1334	spin_lock(&rdi->n_qps_lock);
1335	rdi->n_qps_allocated--;
1336	if (qp->ibqp.qp_type == IB_QPT_RC) {
1337		rdi->n_rc_qps--;
1338		rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
1339	}
1340	spin_unlock(&rdi->n_qps_lock);
1341
1342	if (qp->ip)
1343		kref_put(&qp->ip->ref, rvt_release_mmap_info);
1344	else
1345		vfree(qp->r_rq.wq);
1346	vfree(qp->s_wq);
1347	rdi->driver_f.qp_priv_free(rdi, qp);
1348	kfree(qp->s_ack_queue);
1349	kfree(qp);
1350	return 0;
1351}
1352
1353/**
1354 * rvt_query_qp - query an ipbq
1355 * @ibqp: IB qp to query
1356 * @attr: attr struct to fill in
1357 * @attr_mask: attr mask ignored
1358 * @init_attr: struct to fill in
1359 *
1360 * Return: always 0
1361 */
1362int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1363		 int attr_mask, struct ib_qp_init_attr *init_attr)
1364{
1365	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1366	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1367
1368	attr->qp_state = qp->state;
1369	attr->cur_qp_state = attr->qp_state;
1370	attr->path_mtu = qp->path_mtu;
1371	attr->path_mig_state = qp->s_mig_state;
1372	attr->qkey = qp->qkey;
1373	attr->rq_psn = qp->r_psn & rdi->dparms.psn_mask;
1374	attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask;
1375	attr->dest_qp_num = qp->remote_qpn;
1376	attr->qp_access_flags = qp->qp_access_flags;
1377	attr->cap.max_send_wr = qp->s_size - 1 -
1378		rdi->dparms.reserved_operations;
1379	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
1380	attr->cap.max_send_sge = qp->s_max_sge;
1381	attr->cap.max_recv_sge = qp->r_rq.max_sge;
1382	attr->cap.max_inline_data = 0;
1383	attr->ah_attr = qp->remote_ah_attr;
1384	attr->alt_ah_attr = qp->alt_ah_attr;
1385	attr->pkey_index = qp->s_pkey_index;
1386	attr->alt_pkey_index = qp->s_alt_pkey_index;
1387	attr->en_sqd_async_notify = 0;
1388	attr->sq_draining = qp->s_draining;
1389	attr->max_rd_atomic = qp->s_max_rd_atomic;
1390	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
1391	attr->min_rnr_timer = qp->r_min_rnr_timer;
1392	attr->port_num = qp->port_num;
1393	attr->timeout = qp->timeout;
1394	attr->retry_cnt = qp->s_retry_cnt;
1395	attr->rnr_retry = qp->s_rnr_retry_cnt;
1396	attr->alt_port_num = qp->alt_ah_attr.port_num;
1397	attr->alt_timeout = qp->alt_timeout;
1398
1399	init_attr->event_handler = qp->ibqp.event_handler;
1400	init_attr->qp_context = qp->ibqp.qp_context;
1401	init_attr->send_cq = qp->ibqp.send_cq;
1402	init_attr->recv_cq = qp->ibqp.recv_cq;
1403	init_attr->srq = qp->ibqp.srq;
1404	init_attr->cap = attr->cap;
1405	if (qp->s_flags & RVT_S_SIGNAL_REQ_WR)
1406		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
1407	else
1408		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
1409	init_attr->qp_type = qp->ibqp.qp_type;
1410	init_attr->port_num = qp->port_num;
1411	return 0;
1412}
1413
1414/**
1415 * rvt_post_receive - post a receive on a QP
1416 * @ibqp: the QP to post the receive on
1417 * @wr: the WR to post
1418 * @bad_wr: the first bad WR is put here
1419 *
1420 * This may be called from interrupt context.
1421 *
1422 * Return: 0 on success otherwise errno
1423 */
1424int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1425		  struct ib_recv_wr **bad_wr)
1426{
1427	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1428	struct rvt_rwq *wq = qp->r_rq.wq;
1429	unsigned long flags;
1430	int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) &&
1431				!qp->ibqp.srq;
1432
1433	/* Check that state is OK to post receive. */
1434	if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) {
1435		*bad_wr = wr;
1436		return -EINVAL;
1437	}
1438
1439	for (; wr; wr = wr->next) {
1440		struct rvt_rwqe *wqe;
1441		u32 next;
1442		int i;
1443
1444		if ((unsigned)wr->num_sge > qp->r_rq.max_sge) {
1445			*bad_wr = wr;
1446			return -EINVAL;
1447		}
1448
1449		spin_lock_irqsave(&qp->r_rq.lock, flags);
1450		next = wq->head + 1;
1451		if (next >= qp->r_rq.size)
1452			next = 0;
1453		if (next == wq->tail) {
1454			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1455			*bad_wr = wr;
1456			return -ENOMEM;
1457		}
1458		if (unlikely(qp_err_flush)) {
1459			struct ib_wc wc;
1460
1461			memset(&wc, 0, sizeof(wc));
1462			wc.qp = &qp->ibqp;
1463			wc.opcode = IB_WC_RECV;
1464			wc.wr_id = wr->wr_id;
1465			wc.status = IB_WC_WR_FLUSH_ERR;
1466			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1467		} else {
1468			wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head);
1469			wqe->wr_id = wr->wr_id;
1470			wqe->num_sge = wr->num_sge;
1471			for (i = 0; i < wr->num_sge; i++)
1472				wqe->sg_list[i] = wr->sg_list[i];
1473			/*
1474			 * Make sure queue entry is written
1475			 * before the head index.
1476			 */
1477			smp_wmb();
1478			wq->head = next;
1479		}
1480		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1481	}
1482	return 0;
1483}
1484
1485/**
1486 * rvt_qp_valid_operation - validate post send wr request
1487 * @qp - the qp
1488 * @post-parms - the post send table for the driver
1489 * @wr - the work request
1490 *
1491 * The routine validates the operation based on the
1492 * validation table an returns the length of the operation
1493 * which can extend beyond the ib_send_bw.  Operation
1494 * dependent flags key atomic operation validation.
1495 *
1496 * There is an exception for UD qps that validates the pd and
1497 * overrides the length to include the additional UD specific
1498 * length.
1499 *
1500 * Returns a negative error or the length of the work request
1501 * for building the swqe.
1502 */
1503static inline int rvt_qp_valid_operation(
1504	struct rvt_qp *qp,
1505	const struct rvt_operation_params *post_parms,
1506	struct ib_send_wr *wr)
1507{
1508	int len;
1509
1510	if (wr->opcode >= RVT_OPERATION_MAX || !post_parms[wr->opcode].length)
1511		return -EINVAL;
1512	if (!(post_parms[wr->opcode].qpt_support & BIT(qp->ibqp.qp_type)))
1513		return -EINVAL;
1514	if ((post_parms[wr->opcode].flags & RVT_OPERATION_PRIV) &&
1515	    ibpd_to_rvtpd(qp->ibqp.pd)->user)
1516		return -EINVAL;
1517	if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC_SGE &&
1518	    (wr->num_sge == 0 ||
1519	     wr->sg_list[0].length < sizeof(u64) ||
1520	     wr->sg_list[0].addr & (sizeof(u64) - 1)))
1521		return -EINVAL;
1522	if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC &&
1523	    !qp->s_max_rd_atomic)
1524		return -EINVAL;
1525	len = post_parms[wr->opcode].length;
1526	/* UD specific */
1527	if (qp->ibqp.qp_type != IB_QPT_UC &&
1528	    qp->ibqp.qp_type != IB_QPT_RC) {
1529		if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
1530			return -EINVAL;
1531		len = sizeof(struct ib_ud_wr);
1532	}
1533	return len;
1534}
1535
1536/**
1537 * rvt_qp_is_avail - determine queue capacity
1538 * @qp - the qp
1539 * @rdi - the rdmavt device
1540 * @reserved_op - is reserved operation
1541 *
1542 * This assumes the s_hlock is held but the s_last
1543 * qp variable is uncontrolled.
1544 *
1545 * For non reserved operations, the qp->s_avail
1546 * may be changed.
1547 *
1548 * The return value is zero or a -ENOMEM.
1549 */
1550static inline int rvt_qp_is_avail(
1551	struct rvt_qp *qp,
1552	struct rvt_dev_info *rdi,
1553	bool reserved_op)
1554{
1555	u32 slast;
1556	u32 avail;
1557	u32 reserved_used;
1558
1559	/* see rvt_qp_wqe_unreserve() */
1560	smp_mb__before_atomic();
1561	reserved_used = atomic_read(&qp->s_reserved_used);
1562	if (unlikely(reserved_op)) {
1563		/* see rvt_qp_wqe_unreserve() */
1564		smp_mb__before_atomic();
1565		if (reserved_used >= rdi->dparms.reserved_operations)
1566			return -ENOMEM;
1567		return 0;
1568	}
1569	/* non-reserved operations */
1570	if (likely(qp->s_avail))
1571		return 0;
1572	smp_read_barrier_depends(); /* see rc.c */
1573	slast = ACCESS_ONCE(qp->s_last);
1574	if (qp->s_head >= slast)
1575		avail = qp->s_size - (qp->s_head - slast);
1576	else
1577		avail = slast - qp->s_head;
1578
1579	/* see rvt_qp_wqe_unreserve() */
1580	smp_mb__before_atomic();
1581	reserved_used = atomic_read(&qp->s_reserved_used);
1582	avail =  avail - 1 -
1583		(rdi->dparms.reserved_operations - reserved_used);
1584	/* insure we don't assign a negative s_avail */
1585	if ((s32)avail <= 0)
1586		return -ENOMEM;
1587	qp->s_avail = avail;
1588	if (WARN_ON(qp->s_avail >
1589		    (qp->s_size - 1 - rdi->dparms.reserved_operations)))
1590		rvt_pr_err(rdi,
1591			   "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u",
1592			   qp->ibqp.qp_num, qp->s_size, qp->s_avail,
1593			   qp->s_head, qp->s_tail, qp->s_cur,
1594			   qp->s_acked, qp->s_last);
1595	return 0;
1596}
1597
1598/**
1599 * rvt_post_one_wr - post one RC, UC, or UD send work request
1600 * @qp: the QP to post on
1601 * @wr: the work request to send
1602 */
1603static int rvt_post_one_wr(struct rvt_qp *qp,
1604			   struct ib_send_wr *wr,
1605			   int *call_send)
1606{
1607	struct rvt_swqe *wqe;
1608	u32 next;
1609	int i;
1610	int j;
1611	int acc;
1612	struct rvt_lkey_table *rkt;
1613	struct rvt_pd *pd;
1614	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
1615	u8 log_pmtu;
1616	int ret;
1617	size_t cplen;
1618	bool reserved_op;
1619	int local_ops_delayed = 0;
1620
1621	BUILD_BUG_ON(IB_QPT_MAX >= (sizeof(u32) * BITS_PER_BYTE));
1622
1623	/* IB spec says that num_sge == 0 is OK. */
1624	if (unlikely(wr->num_sge > qp->s_max_sge))
1625		return -EINVAL;
1626
1627	ret = rvt_qp_valid_operation(qp, rdi->post_parms, wr);
1628	if (ret < 0)
1629		return ret;
1630	cplen = ret;
1631
1632	/*
1633	 * Local operations include fast register and local invalidate.
1634	 * Fast register needs to be processed immediately because the
1635	 * registered lkey may be used by following work requests and the
1636	 * lkey needs to be valid at the time those requests are posted.
1637	 * Local invalidate can be processed immediately if fencing is
1638	 * not required and no previous local invalidate ops are pending.
1639	 * Signaled local operations that have been processed immediately
1640	 * need to have requests with "completion only" flags set posted
1641	 * to the send queue in order to generate completions.
1642	 */
1643	if ((rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL)) {
1644		switch (wr->opcode) {
1645		case IB_WR_REG_MR:
1646			ret = rvt_fast_reg_mr(qp,
1647					      reg_wr(wr)->mr,
1648					      reg_wr(wr)->key,
1649					      reg_wr(wr)->access);
1650			if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
1651				return ret;
1652			break;
1653		case IB_WR_LOCAL_INV:
1654			if ((wr->send_flags & IB_SEND_FENCE) ||
1655			    atomic_read(&qp->local_ops_pending)) {
1656				local_ops_delayed = 1;
1657			} else {
1658				ret = rvt_invalidate_rkey(
1659					qp, wr->ex.invalidate_rkey);
1660				if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
1661					return ret;
1662			}
1663			break;
1664		default:
1665			return -EINVAL;
1666		}
1667	}
1668
1669	reserved_op = rdi->post_parms[wr->opcode].flags &
1670			RVT_OPERATION_USE_RESERVE;
1671	/* check for avail */
1672	ret = rvt_qp_is_avail(qp, rdi, reserved_op);
1673	if (ret)
1674		return ret;
1675	next = qp->s_head + 1;
1676	if (next >= qp->s_size)
1677		next = 0;
1678
1679	rkt = &rdi->lkey_table;
1680	pd = ibpd_to_rvtpd(qp->ibqp.pd);
1681	wqe = rvt_get_swqe_ptr(qp, qp->s_head);
1682
1683	/* cplen has length from above */
1684	memcpy(&wqe->wr, wr, cplen);
1685
1686	wqe->length = 0;
1687	j = 0;
1688	if (wr->num_sge) {
1689		acc = wr->opcode >= IB_WR_RDMA_READ ?
1690			IB_ACCESS_LOCAL_WRITE : 0;
1691		for (i = 0; i < wr->num_sge; i++) {
1692			u32 length = wr->sg_list[i].length;
1693			int ok;
1694
1695			if (length == 0)
1696				continue;
1697			ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
1698					 &wr->sg_list[i], acc);
1699			if (!ok) {
1700				ret = -EINVAL;
1701				goto bail_inval_free;
1702			}
1703			wqe->length += length;
1704			j++;
1705		}
1706		wqe->wr.num_sge = j;
1707	}
1708
1709	/* general part of wqe valid - allow for driver checks */
1710	if (rdi->driver_f.check_send_wqe) {
1711		ret = rdi->driver_f.check_send_wqe(qp, wqe);
1712		if (ret < 0)
1713			goto bail_inval_free;
1714		if (ret)
1715			*call_send = ret;
1716	}
1717
1718	log_pmtu = qp->log_pmtu;
1719	if (qp->ibqp.qp_type != IB_QPT_UC &&
1720	    qp->ibqp.qp_type != IB_QPT_RC) {
1721		struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah);
1722
1723		log_pmtu = ah->log_pmtu;
1724		atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
1725	}
1726
1727	if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) {
1728		if (local_ops_delayed)
1729			atomic_inc(&qp->local_ops_pending);
1730		else
1731			wqe->wr.send_flags |= RVT_SEND_COMPLETION_ONLY;
1732		wqe->ssn = 0;
1733		wqe->psn = 0;
1734		wqe->lpsn = 0;
1735	} else {
1736		wqe->ssn = qp->s_ssn++;
1737		wqe->psn = qp->s_next_psn;
1738		wqe->lpsn = wqe->psn +
1739				(wqe->length ?
1740					((wqe->length - 1) >> log_pmtu) :
1741					0);
1742		qp->s_next_psn = wqe->lpsn + 1;
1743	}
1744	trace_rvt_post_one_wr(qp, wqe);
1745	if (unlikely(reserved_op))
1746		rvt_qp_wqe_reserve(qp, wqe);
1747	else
1748		qp->s_avail--;
1749	smp_wmb(); /* see request builders */
1750	qp->s_head = next;
1751
1752	return 0;
1753
1754bail_inval_free:
1755	/* release mr holds */
1756	while (j) {
1757		struct rvt_sge *sge = &wqe->sg_list[--j];
1758
1759		rvt_put_mr(sge->mr);
1760	}
1761	return ret;
1762}
1763
1764/**
1765 * rvt_post_send - post a send on a QP
1766 * @ibqp: the QP to post the send on
1767 * @wr: the list of work requests to post
1768 * @bad_wr: the first bad WR is put here
1769 *
1770 * This may be called from interrupt context.
1771 *
1772 * Return: 0 on success else errno
1773 */
1774int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1775		  struct ib_send_wr **bad_wr)
1776{
1777	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1778	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1779	unsigned long flags = 0;
1780	int call_send;
1781	unsigned nreq = 0;
1782	int err = 0;
1783
1784	spin_lock_irqsave(&qp->s_hlock, flags);
1785
1786	/*
1787	 * Ensure QP state is such that we can send. If not bail out early,
1788	 * there is no need to do this every time we post a send.
1789	 */
1790	if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) {
1791		spin_unlock_irqrestore(&qp->s_hlock, flags);
1792		return -EINVAL;
1793	}
1794
1795	/*
1796	 * If the send queue is empty, and we only have a single WR then just go
1797	 * ahead and kick the send engine into gear. Otherwise we will always
1798	 * just schedule the send to happen later.
1799	 */
1800	call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next;
1801
1802	for (; wr; wr = wr->next) {
1803		err = rvt_post_one_wr(qp, wr, &call_send);
1804		if (unlikely(err)) {
1805			*bad_wr = wr;
1806			goto bail;
1807		}
1808		nreq++;
1809	}
1810bail:
1811	spin_unlock_irqrestore(&qp->s_hlock, flags);
1812	if (nreq) {
1813		if (call_send)
1814			rdi->driver_f.do_send(qp);
1815		else
1816			rdi->driver_f.schedule_send_no_lock(qp);
1817	}
1818	return err;
1819}
1820
1821/**
1822 * rvt_post_srq_receive - post a receive on a shared receive queue
1823 * @ibsrq: the SRQ to post the receive on
1824 * @wr: the list of work requests to post
1825 * @bad_wr: A pointer to the first WR to cause a problem is put here
1826 *
1827 * This may be called from interrupt context.
1828 *
1829 * Return: 0 on success else errno
1830 */
1831int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
1832		      struct ib_recv_wr **bad_wr)
1833{
1834	struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
1835	struct rvt_rwq *wq;
1836	unsigned long flags;
1837
1838	for (; wr; wr = wr->next) {
1839		struct rvt_rwqe *wqe;
1840		u32 next;
1841		int i;
1842
1843		if ((unsigned)wr->num_sge > srq->rq.max_sge) {
1844			*bad_wr = wr;
1845			return -EINVAL;
1846		}
1847
1848		spin_lock_irqsave(&srq->rq.lock, flags);
1849		wq = srq->rq.wq;
1850		next = wq->head + 1;
1851		if (next >= srq->rq.size)
1852			next = 0;
1853		if (next == wq->tail) {
1854			spin_unlock_irqrestore(&srq->rq.lock, flags);
1855			*bad_wr = wr;
1856			return -ENOMEM;
1857		}
1858
1859		wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head);
1860		wqe->wr_id = wr->wr_id;
1861		wqe->num_sge = wr->num_sge;
1862		for (i = 0; i < wr->num_sge; i++)
1863			wqe->sg_list[i] = wr->sg_list[i];
1864		/* Make sure queue entry is written before the head index. */
1865		smp_wmb();
1866		wq->head = next;
1867		spin_unlock_irqrestore(&srq->rq.lock, flags);
1868	}
1869	return 0;
1870}