Linux Audio

Check our new training course

Loading...
v6.8
   1/*
   2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
   3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <linux/module.h>
  35#include <linux/init.h>
  36#include <linux/slab.h>
  37#include <linux/errno.h>
  38#include <linux/netdevice.h>
  39#include <linux/inetdevice.h>
  40#include <linux/rtnetlink.h>
  41#include <linux/if_vlan.h>
  42#include <linux/sched/mm.h>
  43#include <linux/sched/task.h>
  44
  45#include <net/ipv6.h>
  46#include <net/addrconf.h>
  47#include <net/devlink.h>
  48
  49#include <rdma/ib_smi.h>
  50#include <rdma/ib_user_verbs.h>
  51#include <rdma/ib_addr.h>
  52#include <rdma/ib_cache.h>
  53
  54#include <net/bonding.h>
  55
  56#include <linux/mlx4/driver.h>
  57#include <linux/mlx4/cmd.h>
  58#include <linux/mlx4/qp.h>
  59
  60#include "mlx4_ib.h"
  61#include <rdma/mlx4-abi.h>
  62
  63#define DRV_NAME	MLX4_IB_DRV_NAME
  64#define DRV_VERSION	"4.0-0"
  65
  66#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
  67#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
  68#define MLX4_IB_CARD_REV_A0   0xA0
  69
  70MODULE_AUTHOR("Roland Dreier");
  71MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
  72MODULE_LICENSE("Dual BSD/GPL");
  73
  74int mlx4_ib_sm_guid_assign = 0;
  75module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
  76MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
  77
  78static const char mlx4_ib_version[] =
  79	DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
  80	DRV_VERSION "\n";
  81
  82static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
  83static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
  84						    u32 port_num);
  85static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
  86			 void *param);
  87
  88static struct workqueue_struct *wq;
  89
 
 
 
 
 
 
 
 
  90static int check_flow_steering_support(struct mlx4_dev *dev)
  91{
  92	int eth_num_ports = 0;
  93	int ib_num_ports = 0;
  94
  95	int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
  96
  97	if (dmfs) {
  98		int i;
  99		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
 100			eth_num_ports++;
 101		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
 102			ib_num_ports++;
 103		dmfs &= (!ib_num_ports ||
 104			 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
 105			(!eth_num_ports ||
 106			 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
 107		if (ib_num_ports && mlx4_is_mfunc(dev)) {
 108			pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
 109			dmfs = 0;
 110		}
 111	}
 112	return dmfs;
 113}
 114
 115static int num_ib_ports(struct mlx4_dev *dev)
 116{
 117	int ib_ports = 0;
 118	int i;
 119
 120	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
 121		ib_ports++;
 122
 123	return ib_ports;
 124}
 125
 126static struct net_device *mlx4_ib_get_netdev(struct ib_device *device,
 127					     u32 port_num)
 128{
 129	struct mlx4_ib_dev *ibdev = to_mdev(device);
 130	struct net_device *dev, *ret = NULL;
 131
 132	rcu_read_lock();
 133	for_each_netdev_rcu(&init_net, dev) {
 134		if (dev->dev.parent != ibdev->ib_dev.dev.parent ||
 135		    dev->dev_port + 1 != port_num)
 136			continue;
 137
 
 138		if (mlx4_is_bonded(ibdev->dev)) {
 139			struct net_device *upper;
 140
 141			upper = netdev_master_upper_dev_get_rcu(dev);
 142			if (upper) {
 143				struct net_device *active;
 144
 145				active = bond_option_active_slave_get_rcu(netdev_priv(upper));
 146				if (active)
 147					dev = active;
 148			}
 149		}
 150
 151		dev_hold(dev);
 152		ret = dev;
 153		break;
 154	}
 
 
 155
 156	rcu_read_unlock();
 157	return ret;
 158}
 159
 160static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
 161				  struct mlx4_ib_dev *ibdev,
 162				  u32 port_num)
 163{
 164	struct mlx4_cmd_mailbox *mailbox;
 165	int err;
 166	struct mlx4_dev *dev = ibdev->dev;
 167	int i;
 168	union ib_gid *gid_tbl;
 169
 170	mailbox = mlx4_alloc_cmd_mailbox(dev);
 171	if (IS_ERR(mailbox))
 172		return -ENOMEM;
 173
 174	gid_tbl = mailbox->buf;
 175
 176	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
 177		memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid));
 178
 179	err = mlx4_cmd(dev, mailbox->dma,
 180		       MLX4_SET_PORT_GID_TABLE << 8 | port_num,
 181		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
 182		       MLX4_CMD_WRAPPED);
 183	if (mlx4_is_bonded(dev))
 184		err += mlx4_cmd(dev, mailbox->dma,
 185				MLX4_SET_PORT_GID_TABLE << 8 | 2,
 186				1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
 187				MLX4_CMD_WRAPPED);
 188
 189	mlx4_free_cmd_mailbox(dev, mailbox);
 190	return err;
 191}
 192
 193static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
 194				     struct mlx4_ib_dev *ibdev,
 195				     u32 port_num)
 196{
 197	struct mlx4_cmd_mailbox *mailbox;
 198	int err;
 199	struct mlx4_dev *dev = ibdev->dev;
 200	int i;
 201	struct {
 202		union ib_gid	gid;
 203		__be32		rsrvd1[2];
 204		__be16		rsrvd2;
 205		u8		type;
 206		u8		version;
 207		__be32		rsrvd3;
 208	} *gid_tbl;
 209
 210	mailbox = mlx4_alloc_cmd_mailbox(dev);
 211	if (IS_ERR(mailbox))
 212		return -ENOMEM;
 213
 214	gid_tbl = mailbox->buf;
 215	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
 216		memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
 217		if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
 218			gid_tbl[i].version = 2;
 219			if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
 220				gid_tbl[i].type = 1;
 221		}
 222	}
 223
 224	err = mlx4_cmd(dev, mailbox->dma,
 225		       MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
 226		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
 227		       MLX4_CMD_WRAPPED);
 228	if (mlx4_is_bonded(dev))
 229		err += mlx4_cmd(dev, mailbox->dma,
 230				MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
 231				1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
 232				MLX4_CMD_WRAPPED);
 233
 234	mlx4_free_cmd_mailbox(dev, mailbox);
 235	return err;
 236}
 237
 238static int mlx4_ib_update_gids(struct gid_entry *gids,
 239			       struct mlx4_ib_dev *ibdev,
 240			       u32 port_num)
 241{
 242	if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
 243		return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
 244
 245	return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
 246}
 247
 248static void free_gid_entry(struct gid_entry *entry)
 249{
 250	memset(&entry->gid, 0, sizeof(entry->gid));
 251	kfree(entry->ctx);
 252	entry->ctx = NULL;
 253}
 254
 255static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
 256{
 257	struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
 258	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
 259	struct mlx4_port_gid_table   *port_gid_table;
 260	int free = -1, found = -1;
 261	int ret = 0;
 262	int hw_update = 0;
 263	int i;
 264	struct gid_entry *gids;
 265	u16 vlan_id = 0xffff;
 266	u8 mac[ETH_ALEN];
 267
 268	if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
 269		return -EINVAL;
 270
 271	if (attr->port_num > MLX4_MAX_PORTS)
 272		return -EINVAL;
 273
 274	if (!context)
 275		return -EINVAL;
 276
 277	ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]);
 278	if (ret)
 279		return ret;
 280	port_gid_table = &iboe->gids[attr->port_num - 1];
 281	spin_lock_bh(&iboe->lock);
 282	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
 283		if (!memcmp(&port_gid_table->gids[i].gid,
 284			    &attr->gid, sizeof(attr->gid)) &&
 285		    port_gid_table->gids[i].gid_type == attr->gid_type &&
 286		    port_gid_table->gids[i].vlan_id == vlan_id)  {
 287			found = i;
 288			break;
 289		}
 290		if (free < 0 && rdma_is_zero_gid(&port_gid_table->gids[i].gid))
 291			free = i; /* HW has space */
 292	}
 293
 294	if (found < 0) {
 295		if (free < 0) {
 296			ret = -ENOSPC;
 297		} else {
 298			port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC);
 299			if (!port_gid_table->gids[free].ctx) {
 300				ret = -ENOMEM;
 301			} else {
 302				*context = port_gid_table->gids[free].ctx;
 303				port_gid_table->gids[free].gid = attr->gid;
 304				port_gid_table->gids[free].gid_type = attr->gid_type;
 305				port_gid_table->gids[free].vlan_id = vlan_id;
 306				port_gid_table->gids[free].ctx->real_index = free;
 307				port_gid_table->gids[free].ctx->refcount = 1;
 308				hw_update = 1;
 309			}
 310		}
 311	} else {
 312		struct gid_cache_context *ctx = port_gid_table->gids[found].ctx;
 313		*context = ctx;
 314		ctx->refcount++;
 315	}
 316	if (!ret && hw_update) {
 317		gids = kmalloc_array(MLX4_MAX_PORT_GIDS, sizeof(*gids),
 318				     GFP_ATOMIC);
 319		if (!gids) {
 320			ret = -ENOMEM;
 321			*context = NULL;
 322			free_gid_entry(&port_gid_table->gids[free]);
 323		} else {
 324			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
 325				memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
 326				gids[i].gid_type = port_gid_table->gids[i].gid_type;
 327			}
 328		}
 329	}
 330	spin_unlock_bh(&iboe->lock);
 331
 332	if (!ret && hw_update) {
 333		ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
 334		if (ret) {
 335			spin_lock_bh(&iboe->lock);
 336			*context = NULL;
 337			free_gid_entry(&port_gid_table->gids[free]);
 338			spin_unlock_bh(&iboe->lock);
 339		}
 340		kfree(gids);
 341	}
 342
 343	return ret;
 344}
 345
 346static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
 347{
 348	struct gid_cache_context *ctx = *context;
 349	struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
 350	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
 351	struct mlx4_port_gid_table   *port_gid_table;
 352	int ret = 0;
 353	int hw_update = 0;
 354	struct gid_entry *gids;
 355
 356	if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
 357		return -EINVAL;
 358
 359	if (attr->port_num > MLX4_MAX_PORTS)
 360		return -EINVAL;
 361
 362	port_gid_table = &iboe->gids[attr->port_num - 1];
 363	spin_lock_bh(&iboe->lock);
 364	if (ctx) {
 365		ctx->refcount--;
 366		if (!ctx->refcount) {
 367			unsigned int real_index = ctx->real_index;
 368
 369			free_gid_entry(&port_gid_table->gids[real_index]);
 
 
 370			hw_update = 1;
 371		}
 372	}
 373	if (!ret && hw_update) {
 374		int i;
 375
 376		gids = kmalloc_array(MLX4_MAX_PORT_GIDS, sizeof(*gids),
 377				     GFP_ATOMIC);
 378		if (!gids) {
 379			ret = -ENOMEM;
 380		} else {
 381			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
 382				memcpy(&gids[i].gid,
 383				       &port_gid_table->gids[i].gid,
 384				       sizeof(union ib_gid));
 385				gids[i].gid_type =
 386				    port_gid_table->gids[i].gid_type;
 387			}
 388		}
 389	}
 390	spin_unlock_bh(&iboe->lock);
 391
 392	if (!ret && hw_update) {
 393		ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
 394		kfree(gids);
 395	}
 396	return ret;
 397}
 398
 399int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
 400				    const struct ib_gid_attr *attr)
 401{
 402	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
 403	struct gid_cache_context *ctx = NULL;
 
 404	struct mlx4_port_gid_table   *port_gid_table;
 405	int real_index = -EINVAL;
 406	int i;
 
 407	unsigned long flags;
 408	u32 port_num = attr->port_num;
 409
 410	if (port_num > MLX4_MAX_PORTS)
 411		return -EINVAL;
 412
 413	if (mlx4_is_bonded(ibdev->dev))
 414		port_num = 1;
 415
 416	if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
 417		return attr->index;
 
 
 
 
 
 
 
 418
 419	spin_lock_irqsave(&iboe->lock, flags);
 420	port_gid_table = &iboe->gids[port_num - 1];
 421
 422	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
 423		if (!memcmp(&port_gid_table->gids[i].gid,
 424			    &attr->gid, sizeof(attr->gid)) &&
 425		    attr->gid_type == port_gid_table->gids[i].gid_type) {
 426			ctx = port_gid_table->gids[i].ctx;
 427			break;
 428		}
 429	if (ctx)
 430		real_index = ctx->real_index;
 431	spin_unlock_irqrestore(&iboe->lock, flags);
 432	return real_index;
 433}
 434
 
 
 
 435static int mlx4_ib_query_device(struct ib_device *ibdev,
 436				struct ib_device_attr *props,
 437				struct ib_udata *uhw)
 438{
 439	struct mlx4_ib_dev *dev = to_mdev(ibdev);
 440	struct ib_smp *in_mad;
 441	struct ib_smp *out_mad;
 442	int err;
 443	int have_ib_ports;
 444	struct mlx4_uverbs_ex_query_device cmd;
 445	struct mlx4_uverbs_ex_query_device_resp resp = {};
 446	struct mlx4_clock_params clock_params;
 447
 448	if (uhw->inlen) {
 449		if (uhw->inlen < sizeof(cmd))
 450			return -EINVAL;
 451
 452		err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd));
 453		if (err)
 454			return err;
 455
 456		if (cmd.comp_mask)
 457			return -EINVAL;
 458
 459		if (cmd.reserved)
 460			return -EINVAL;
 461	}
 462
 463	resp.response_length = offsetof(typeof(resp), response_length) +
 464		sizeof(resp.response_length);
 465	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 466	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 467	err = -ENOMEM;
 468	if (!in_mad || !out_mad)
 469		goto out;
 470
 471	ib_init_query_mad(in_mad);
 472	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
 473
 474	err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
 475			   1, NULL, NULL, in_mad, out_mad);
 476	if (err)
 477		goto out;
 478
 479	memset(props, 0, sizeof *props);
 480
 481	have_ib_ports = num_ib_ports(dev->dev);
 482
 483	props->fw_ver = dev->dev->caps.fw_ver;
 484	props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
 485		IB_DEVICE_PORT_ACTIVE_EVENT		|
 486		IB_DEVICE_SYS_IMAGE_GUID		|
 487		IB_DEVICE_RC_RNR_NAK_GEN;
 488	props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK;
 489	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
 490		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
 491	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
 492		props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
 493	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports)
 494		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
 495	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
 496		props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
 497	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
 498		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
 499	if (dev->dev->caps.max_gso_sz &&
 500	    (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
 501	    (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
 502		props->kernel_cap_flags |= IBK_UD_TSO;
 503	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
 504		props->kernel_cap_flags |= IBK_LOCAL_DMA_LKEY;
 505	if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
 506	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
 507	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
 508		props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
 509	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
 510		props->device_cap_flags |= IB_DEVICE_XRC;
 511	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
 512		props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
 513	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
 514		if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
 515			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
 516		else
 517			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
 518	}
 519	if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
 520		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
 521
 522	props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
 523
 524	props->vendor_id	   = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
 525		0xffffff;
 526	props->vendor_part_id	   = dev->dev->persist->pdev->device;
 527	props->hw_ver		   = be32_to_cpup((__be32 *) (out_mad->data + 32));
 528	memcpy(&props->sys_image_guid, out_mad->data +	4, 8);
 529
 530	props->max_mr_size	   = ~0ull;
 531	props->page_size_cap	   = dev->dev->caps.page_size_cap;
 532	props->max_qp		   = dev->dev->quotas.qp;
 533	props->max_qp_wr	   = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
 534	props->max_send_sge =
 535		min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
 536	props->max_recv_sge =
 537		min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
 538	props->max_sge_rd = MLX4_MAX_SGE_RD;
 539	props->max_cq		   = dev->dev->quotas.cq;
 540	props->max_cqe		   = dev->dev->caps.max_cqes;
 541	props->max_mr		   = dev->dev->quotas.mpt;
 542	props->max_pd		   = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
 543	props->max_qp_rd_atom	   = dev->dev->caps.max_qp_dest_rdma;
 544	props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
 545	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
 546	props->max_srq		   = dev->dev->quotas.srq;
 547	props->max_srq_wr	   = dev->dev->caps.max_srq_wqes - 1;
 548	props->max_srq_sge	   = dev->dev->caps.max_srq_sge;
 549	props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
 550	props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
 551	props->atomic_cap	   = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
 552		IB_ATOMIC_HCA : IB_ATOMIC_NONE;
 553	props->masked_atomic_cap   = props->atomic_cap;
 554	props->max_pkeys	   = dev->dev->caps.pkey_table_len[1];
 555	props->max_mcast_grp	   = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
 556	props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
 557	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 558					   props->max_mcast_grp;
 
 559	props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
 560	props->timestamp_mask = 0xFFFFFFFFFFFFULL;
 561	props->max_ah = INT_MAX;
 562
 563	if (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
 564	    mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET) {
 565		if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
 566			props->rss_caps.max_rwq_indirection_tables =
 567				props->max_qp;
 568			props->rss_caps.max_rwq_indirection_table_size =
 569				dev->dev->caps.max_rss_tbl_sz;
 570			props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
 571			props->max_wq_type_rq = props->max_qp;
 572		}
 573
 574		if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)
 575			props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
 576	}
 577
 578	props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT;
 579	props->cq_caps.max_cq_moderation_period = MLX4_MAX_CQ_PERIOD;
 580
 
 
 
 581	if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
 582		resp.response_length += sizeof(resp.hca_core_clock_offset);
 583		if (!mlx4_get_internal_clock_params(dev->dev, &clock_params)) {
 584			resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET;
 585			resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
 586		}
 587	}
 588
 589	if (uhw->outlen >= resp.response_length +
 590	    sizeof(resp.max_inl_recv_sz)) {
 591		resp.response_length += sizeof(resp.max_inl_recv_sz);
 592		resp.max_inl_recv_sz  = dev->dev->caps.max_rq_sg *
 593			sizeof(struct mlx4_wqe_data_seg);
 594	}
 595
 596	if (offsetofend(typeof(resp), rss_caps) <= uhw->outlen) {
 597		if (props->rss_caps.supported_qpts) {
 598			resp.rss_caps.rx_hash_function =
 599				MLX4_IB_RX_HASH_FUNC_TOEPLITZ;
 600
 601			resp.rss_caps.rx_hash_fields_mask =
 602				MLX4_IB_RX_HASH_SRC_IPV4 |
 603				MLX4_IB_RX_HASH_DST_IPV4 |
 604				MLX4_IB_RX_HASH_SRC_IPV6 |
 605				MLX4_IB_RX_HASH_DST_IPV6 |
 606				MLX4_IB_RX_HASH_SRC_PORT_TCP |
 607				MLX4_IB_RX_HASH_DST_PORT_TCP |
 608				MLX4_IB_RX_HASH_SRC_PORT_UDP |
 609				MLX4_IB_RX_HASH_DST_PORT_UDP;
 610
 611			if (dev->dev->caps.tunnel_offload_mode ==
 612			    MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
 613				resp.rss_caps.rx_hash_fields_mask |=
 614					MLX4_IB_RX_HASH_INNER;
 615		}
 616		resp.response_length = offsetof(typeof(resp), rss_caps) +
 617				       sizeof(resp.rss_caps);
 618	}
 619
 620	if (offsetofend(typeof(resp), tso_caps) <= uhw->outlen) {
 621		if (dev->dev->caps.max_gso_sz &&
 622		    ((mlx4_ib_port_link_layer(ibdev, 1) ==
 623		    IB_LINK_LAYER_ETHERNET) ||
 624		    (mlx4_ib_port_link_layer(ibdev, 2) ==
 625		    IB_LINK_LAYER_ETHERNET))) {
 626			resp.tso_caps.max_tso = dev->dev->caps.max_gso_sz;
 627			resp.tso_caps.supported_qpts |=
 628				1 << IB_QPT_RAW_PACKET;
 629		}
 630		resp.response_length = offsetof(typeof(resp), tso_caps) +
 631				       sizeof(resp.tso_caps);
 632	}
 633
 634	if (uhw->outlen) {
 635		err = ib_copy_to_udata(uhw, &resp, resp.response_length);
 636		if (err)
 637			goto out;
 638	}
 639out:
 640	kfree(in_mad);
 641	kfree(out_mad);
 642
 643	return err;
 644}
 645
 646static enum rdma_link_layer
 647mlx4_ib_port_link_layer(struct ib_device *device, u32 port_num)
 648{
 649	struct mlx4_dev *dev = to_mdev(device)->dev;
 650
 651	return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
 652		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
 653}
 654
 655static int ib_link_query_port(struct ib_device *ibdev, u32 port,
 656			      struct ib_port_attr *props, int netw_view)
 657{
 658	struct ib_smp *in_mad;
 659	struct ib_smp *out_mad;
 660	int ext_active_speed;
 661	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
 662	int err = -ENOMEM;
 663
 664	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 665	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 666	if (!in_mad || !out_mad)
 667		goto out;
 668
 669	ib_init_query_mad(in_mad);
 670	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
 671	in_mad->attr_mod = cpu_to_be32(port);
 672
 673	if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
 674		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
 675
 676	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
 677				in_mad, out_mad);
 678	if (err)
 679		goto out;
 680
 681
 682	props->lid		= be16_to_cpup((__be16 *) (out_mad->data + 16));
 683	props->lmc		= out_mad->data[34] & 0x7;
 684	props->sm_lid		= be16_to_cpup((__be16 *) (out_mad->data + 18));
 685	props->sm_sl		= out_mad->data[36] & 0xf;
 686	props->state		= out_mad->data[32] & 0xf;
 687	props->phys_state	= out_mad->data[33] >> 4;
 688	props->port_cap_flags	= be32_to_cpup((__be32 *) (out_mad->data + 20));
 689	if (netw_view)
 690		props->gid_tbl_len = out_mad->data[50];
 691	else
 692		props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
 693	props->max_msg_sz	= to_mdev(ibdev)->dev->caps.max_msg_sz;
 694	props->pkey_tbl_len	= to_mdev(ibdev)->dev->caps.pkey_table_len[port];
 695	props->bad_pkey_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 46));
 696	props->qkey_viol_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 48));
 697	props->active_width	= out_mad->data[31] & 0xf;
 698	props->active_speed	= out_mad->data[35] >> 4;
 699	props->max_mtu		= out_mad->data[41] & 0xf;
 700	props->active_mtu	= out_mad->data[36] >> 4;
 701	props->subnet_timeout	= out_mad->data[51] & 0x1f;
 702	props->max_vl_num	= out_mad->data[37] >> 4;
 703	props->init_type_reply	= out_mad->data[41] >> 4;
 704
 705	/* Check if extended speeds (EDR/FDR/...) are supported */
 706	if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
 707		ext_active_speed = out_mad->data[62] >> 4;
 708
 709		switch (ext_active_speed) {
 710		case 1:
 711			props->active_speed = IB_SPEED_FDR;
 712			break;
 713		case 2:
 714			props->active_speed = IB_SPEED_EDR;
 715			break;
 716		}
 717	}
 718
 719	/* If reported active speed is QDR, check if is FDR-10 */
 720	if (props->active_speed == IB_SPEED_QDR) {
 721		ib_init_query_mad(in_mad);
 722		in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
 723		in_mad->attr_mod = cpu_to_be32(port);
 724
 725		err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
 726				   NULL, NULL, in_mad, out_mad);
 727		if (err)
 728			goto out;
 729
 730		/* Checking LinkSpeedActive for FDR-10 */
 731		if (out_mad->data[15] & 0x1)
 732			props->active_speed = IB_SPEED_FDR10;
 733	}
 734
 735	/* Avoid wrong speed value returned by FW if the IB link is down. */
 736	if (props->state == IB_PORT_DOWN)
 737		 props->active_speed = IB_SPEED_SDR;
 738
 739out:
 740	kfree(in_mad);
 741	kfree(out_mad);
 742	return err;
 743}
 744
 745static u8 state_to_phys_state(enum ib_port_state state)
 746{
 747	return state == IB_PORT_ACTIVE ?
 748		IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
 749}
 750
 751static int eth_link_query_port(struct ib_device *ibdev, u32 port,
 752			       struct ib_port_attr *props)
 753{
 754
 755	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
 756	struct mlx4_ib_iboe *iboe = &mdev->iboe;
 757	struct net_device *ndev;
 758	enum ib_mtu tmp;
 759	struct mlx4_cmd_mailbox *mailbox;
 760	int err = 0;
 761	int is_bonded = mlx4_is_bonded(mdev->dev);
 762
 763	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
 764	if (IS_ERR(mailbox))
 765		return PTR_ERR(mailbox);
 766
 767	err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
 768			   MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
 769			   MLX4_CMD_WRAPPED);
 770	if (err)
 771		goto out;
 772
 773	props->active_width	=  (((u8 *)mailbox->buf)[5] == 0x40) ||
 774				   (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
 775					   IB_WIDTH_4X : IB_WIDTH_1X;
 776	props->active_speed	=  (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
 777					   IB_SPEED_FDR : IB_SPEED_QDR;
 778	props->port_cap_flags	= IB_PORT_CM_SUP;
 779	props->ip_gids = true;
 780	props->gid_tbl_len	= mdev->dev->caps.gid_table_len[port];
 781	props->max_msg_sz	= mdev->dev->caps.max_msg_sz;
 782	if (mdev->dev->caps.pkey_table_len[port])
 783		props->pkey_tbl_len = 1;
 784	props->max_mtu		= IB_MTU_4096;
 785	props->max_vl_num	= 2;
 786	props->state		= IB_PORT_DOWN;
 787	props->phys_state	= state_to_phys_state(props->state);
 788	props->active_mtu	= IB_MTU_256;
 789	spin_lock_bh(&iboe->lock);
 790	ndev = iboe->netdevs[port - 1];
 791	if (ndev && is_bonded) {
 792		rcu_read_lock(); /* required to get upper dev */
 793		ndev = netdev_master_upper_dev_get_rcu(ndev);
 794		rcu_read_unlock();
 795	}
 796	if (!ndev)
 797		goto out_unlock;
 798
 799	tmp = iboe_get_mtu(ndev->mtu);
 800	props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
 801
 802	props->state		= (netif_running(ndev) && netif_carrier_ok(ndev)) ?
 803					IB_PORT_ACTIVE : IB_PORT_DOWN;
 804	props->phys_state	= state_to_phys_state(props->state);
 805out_unlock:
 806	spin_unlock_bh(&iboe->lock);
 807out:
 808	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
 809	return err;
 810}
 811
 812int __mlx4_ib_query_port(struct ib_device *ibdev, u32 port,
 813			 struct ib_port_attr *props, int netw_view)
 814{
 815	int err;
 816
 817	/* props being zeroed by the caller, avoid zeroing it here */
 818
 819	err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
 820		ib_link_query_port(ibdev, port, props, netw_view) :
 821				eth_link_query_port(ibdev, port, props);
 822
 823	return err;
 824}
 825
 826static int mlx4_ib_query_port(struct ib_device *ibdev, u32 port,
 827			      struct ib_port_attr *props)
 828{
 829	/* returns host view */
 830	return __mlx4_ib_query_port(ibdev, port, props, 0);
 831}
 832
 833int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
 834			union ib_gid *gid, int netw_view)
 835{
 836	struct ib_smp *in_mad;
 837	struct ib_smp *out_mad;
 838	int err = -ENOMEM;
 839	struct mlx4_ib_dev *dev = to_mdev(ibdev);
 840	int clear = 0;
 841	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
 842
 843	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 844	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 845	if (!in_mad || !out_mad)
 846		goto out;
 847
 848	ib_init_query_mad(in_mad);
 849	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
 850	in_mad->attr_mod = cpu_to_be32(port);
 851
 852	if (mlx4_is_mfunc(dev->dev) && netw_view)
 853		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
 854
 855	err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
 856	if (err)
 857		goto out;
 858
 859	memcpy(gid->raw, out_mad->data + 8, 8);
 860
 861	if (mlx4_is_mfunc(dev->dev) && !netw_view) {
 862		if (index) {
 863			/* For any index > 0, return the null guid */
 864			err = 0;
 865			clear = 1;
 866			goto out;
 867		}
 868	}
 869
 870	ib_init_query_mad(in_mad);
 871	in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
 872	in_mad->attr_mod = cpu_to_be32(index / 8);
 873
 874	err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
 875			   NULL, NULL, in_mad, out_mad);
 876	if (err)
 877		goto out;
 878
 879	memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
 880
 881out:
 882	if (clear)
 883		memset(gid->raw + 8, 0, 8);
 884	kfree(in_mad);
 885	kfree(out_mad);
 886	return err;
 887}
 888
 889static int mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
 890			     union ib_gid *gid)
 891{
 892	if (rdma_protocol_ib(ibdev, port))
 893		return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
 894	return 0;
 895}
 896
 897static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u32 port,
 898			       u64 *sl2vl_tbl)
 899{
 900	union sl2vl_tbl_to_u64 sl2vl64;
 901	struct ib_smp *in_mad;
 902	struct ib_smp *out_mad;
 903	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
 904	int err = -ENOMEM;
 905	int jj;
 906
 907	if (mlx4_is_slave(to_mdev(ibdev)->dev)) {
 908		*sl2vl_tbl = 0;
 909		return 0;
 910	}
 911
 912	in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
 913	out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
 914	if (!in_mad || !out_mad)
 915		goto out;
 916
 917	ib_init_query_mad(in_mad);
 918	in_mad->attr_id  = IB_SMP_ATTR_SL_TO_VL_TABLE;
 919	in_mad->attr_mod = 0;
 920
 921	if (mlx4_is_mfunc(to_mdev(ibdev)->dev))
 922		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
 923
 924	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
 925			   in_mad, out_mad);
 926	if (err)
 927		goto out;
 928
 929	for (jj = 0; jj < 8; jj++)
 930		sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj];
 931	*sl2vl_tbl = sl2vl64.sl64;
 932
 933out:
 934	kfree(in_mad);
 935	kfree(out_mad);
 936	return err;
 937}
 938
 939static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
 940{
 941	u64 sl2vl;
 942	int i;
 943	int err;
 944
 945	for (i = 1; i <= mdev->dev->caps.num_ports; i++) {
 946		if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
 947			continue;
 948		err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl);
 949		if (err) {
 950			pr_err("Unable to get default sl to vl mapping for port %d.  Using all zeroes (%d)\n",
 951			       i, err);
 952			sl2vl = 0;
 953		}
 954		atomic64_set(&mdev->sl2vl[i - 1], sl2vl);
 955	}
 956}
 957
 958int __mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
 959			 u16 *pkey, int netw_view)
 960{
 961	struct ib_smp *in_mad;
 962	struct ib_smp *out_mad;
 963	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
 964	int err = -ENOMEM;
 965
 966	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 967	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 968	if (!in_mad || !out_mad)
 969		goto out;
 970
 971	ib_init_query_mad(in_mad);
 972	in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
 973	in_mad->attr_mod = cpu_to_be32(index / 32);
 974
 975	if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
 976		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
 977
 978	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
 979			   in_mad, out_mad);
 980	if (err)
 981		goto out;
 982
 983	*pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
 984
 985out:
 986	kfree(in_mad);
 987	kfree(out_mad);
 988	return err;
 989}
 990
 991static int mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
 992			      u16 *pkey)
 993{
 994	return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
 995}
 996
 997static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
 998				 struct ib_device_modify *props)
 999{
1000	struct mlx4_cmd_mailbox *mailbox;
1001	unsigned long flags;
1002
1003	if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
1004		return -EOPNOTSUPP;
1005
1006	if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
1007		return 0;
1008
1009	if (mlx4_is_slave(to_mdev(ibdev)->dev))
1010		return -EOPNOTSUPP;
1011
1012	spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
1013	memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
1014	spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
1015
1016	/*
1017	 * If possible, pass node desc to FW, so it can generate
1018	 * a 144 trap.  If cmd fails, just ignore.
1019	 */
1020	mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
1021	if (IS_ERR(mailbox))
1022		return 0;
1023
1024	memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
1025	mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
1026		 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
1027
1028	mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
1029
1030	return 0;
1031}
1032
1033static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u32 port,
1034			    int reset_qkey_viols, u32 cap_mask)
1035{
1036	struct mlx4_cmd_mailbox *mailbox;
1037	int err;
1038
1039	mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
1040	if (IS_ERR(mailbox))
1041		return PTR_ERR(mailbox);
1042
1043	if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
1044		*(u8 *) mailbox->buf	     = !!reset_qkey_viols << 6;
1045		((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
1046	} else {
1047		((u8 *) mailbox->buf)[3]     = !!reset_qkey_viols;
1048		((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
1049	}
1050
1051	err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE,
1052		       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
1053		       MLX4_CMD_WRAPPED);
1054
1055	mlx4_free_cmd_mailbox(dev->dev, mailbox);
1056	return err;
1057}
1058
1059static int mlx4_ib_modify_port(struct ib_device *ibdev, u32 port, int mask,
1060			       struct ib_port_modify *props)
1061{
1062	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
1063	u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
1064	struct ib_port_attr attr;
1065	u32 cap_mask;
1066	int err;
1067
1068	/* return OK if this is RoCE. CM calls ib_modify_port() regardless
1069	 * of whether port link layer is ETH or IB. For ETH ports, qkey
1070	 * violations and port capabilities are not meaningful.
1071	 */
1072	if (is_eth)
1073		return 0;
1074
1075	mutex_lock(&mdev->cap_mask_mutex);
1076
1077	err = ib_query_port(ibdev, port, &attr);
1078	if (err)
1079		goto out;
1080
1081	cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
1082		~props->clr_port_cap_mask;
1083
1084	err = mlx4_ib_SET_PORT(mdev, port,
1085			       !!(mask & IB_PORT_RESET_QKEY_CNTR),
1086			       cap_mask);
1087
1088out:
1089	mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
1090	return err;
1091}
1092
1093static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx,
1094				  struct ib_udata *udata)
1095{
1096	struct ib_device *ibdev = uctx->device;
1097	struct mlx4_ib_dev *dev = to_mdev(ibdev);
1098	struct mlx4_ib_ucontext *context = to_mucontext(uctx);
1099	struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
1100	struct mlx4_ib_alloc_ucontext_resp resp;
1101	int err;
1102
1103	if (!dev->ib_active)
1104		return -EAGAIN;
1105
1106	if (ibdev->ops.uverbs_abi_ver ==
1107	    MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
1108		resp_v3.qp_tab_size      = dev->dev->caps.num_qps;
1109		resp_v3.bf_reg_size      = dev->dev->caps.bf_reg_size;
1110		resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
1111	} else {
1112		resp.dev_caps	      = dev->dev->caps.userspace_caps;
1113		resp.qp_tab_size      = dev->dev->caps.num_qps;
1114		resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
1115		resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
1116		resp.cqe_size	      = dev->dev->caps.cqe_size;
1117	}
1118
 
 
 
 
1119	err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
1120	if (err)
1121		return err;
 
 
1122
1123	INIT_LIST_HEAD(&context->db_page_list);
1124	mutex_init(&context->db_page_mutex);
1125
1126	INIT_LIST_HEAD(&context->wqn_ranges_list);
1127	mutex_init(&context->wqn_ranges_mutex);
1128
1129	if (ibdev->ops.uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
1130		err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
1131	else
1132		err = ib_copy_to_udata(udata, &resp, sizeof(resp));
1133
1134	if (err) {
1135		mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
1136		return -EFAULT;
 
1137	}
1138
1139	return err;
1140}
1141
1142static void mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
1143{
1144	struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
1145
1146	mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1147}
1148
 
 
 
 
 
1149static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
1150{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1151}
1152
1153static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
1154{
1155	struct mlx4_ib_dev *dev = to_mdev(context->device);
 
 
 
 
 
 
 
 
 
1156
1157	switch (vma->vm_pgoff) {
1158	case 0:
1159		return rdma_user_mmap_io(context, vma,
1160					 to_mucontext(context)->uar.pfn,
1161					 PAGE_SIZE,
1162					 pgprot_noncached(vma->vm_page_prot),
1163					 NULL);
1164
1165	case 1:
1166		if (dev->dev->caps.bf_reg_size == 0)
 
 
 
 
 
 
 
 
1167			return -EINVAL;
1168		return rdma_user_mmap_io(
1169			context, vma,
1170			to_mucontext(context)->uar.pfn +
1171				dev->dev->caps.num_uars,
1172			PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot),
1173			NULL);
1174
1175	case 3: {
 
 
 
 
 
 
 
 
 
 
1176		struct mlx4_clock_params params;
1177		int ret;
1178
 
 
 
 
1179		ret = mlx4_get_internal_clock_params(dev->dev, &params);
 
1180		if (ret)
1181			return ret;
1182
1183		return rdma_user_mmap_io(
1184			context, vma,
1185			(pci_resource_start(dev->dev->persist->pdev,
1186					    params.bar) +
1187			 params.offset) >>
1188				PAGE_SHIFT,
1189			PAGE_SIZE, pgprot_noncached(vma->vm_page_prot),
1190			NULL);
1191	}
1192
1193	default:
 
 
1194		return -EINVAL;
1195	}
 
 
1196}
1197
1198static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 
 
1199{
1200	struct mlx4_ib_pd *pd = to_mpd(ibpd);
1201	struct ib_device *ibdev = ibpd->device;
1202	int err;
1203
1204	err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
1205	if (err)
1206		return err;
1207
1208	if (udata && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
1209		mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
1210		return -EFAULT;
 
1211	}
1212	return 0;
 
 
 
 
 
 
 
1213}
1214
1215static int mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
1216{
1217	mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
 
 
1218	return 0;
1219}
1220
1221static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
 
 
1222{
1223	struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
1224	struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
1225	struct ib_cq_init_attr cq_attr = {};
1226	int err;
1227
1228	if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
1229		return -EOPNOTSUPP;
1230
1231	err = mlx4_xrcd_alloc(dev->dev, &xrcd->xrcdn);
 
 
 
 
1232	if (err)
1233		return err;
1234
1235	xrcd->pd = ib_alloc_pd(ibxrcd->device, 0);
1236	if (IS_ERR(xrcd->pd)) {
1237		err = PTR_ERR(xrcd->pd);
1238		goto err2;
1239	}
1240
1241	cq_attr.cqe = 1;
1242	xrcd->cq = ib_create_cq(ibxrcd->device, NULL, NULL, xrcd, &cq_attr);
1243	if (IS_ERR(xrcd->cq)) {
1244		err = PTR_ERR(xrcd->cq);
1245		goto err3;
1246	}
1247
1248	return 0;
1249
1250err3:
1251	ib_dealloc_pd(xrcd->pd);
1252err2:
1253	mlx4_xrcd_free(dev->dev, xrcd->xrcdn);
1254	return err;
 
 
1255}
1256
1257static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
1258{
1259	ib_destroy_cq(to_mxrcd(xrcd)->cq);
1260	ib_dealloc_pd(to_mxrcd(xrcd)->pd);
1261	mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
 
 
1262	return 0;
1263}
1264
1265static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
1266{
1267	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1268	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1269	struct mlx4_ib_gid_entry *ge;
1270
1271	ge = kzalloc(sizeof *ge, GFP_KERNEL);
1272	if (!ge)
1273		return -ENOMEM;
1274
1275	ge->gid = *gid;
1276	if (mlx4_ib_add_mc(mdev, mqp, gid)) {
1277		ge->port = mqp->port;
1278		ge->added = 1;
1279	}
1280
1281	mutex_lock(&mqp->mutex);
1282	list_add_tail(&ge->list, &mqp->gid_list);
1283	mutex_unlock(&mqp->mutex);
1284
1285	return 0;
1286}
1287
1288static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
1289					  struct mlx4_ib_counters *ctr_table)
1290{
1291	struct counter_index *counter, *tmp_count;
1292
1293	mutex_lock(&ctr_table->mutex);
1294	list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
1295				 list) {
1296		if (counter->allocated)
1297			mlx4_counter_free(ibdev->dev, counter->index);
1298		list_del(&counter->list);
1299		kfree(counter);
1300	}
1301	mutex_unlock(&ctr_table->mutex);
1302}
1303
1304int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
1305		   union ib_gid *gid)
1306{
1307	struct net_device *ndev;
1308	int ret = 0;
1309
1310	if (!mqp->port)
1311		return 0;
1312
1313	spin_lock_bh(&mdev->iboe.lock);
1314	ndev = mdev->iboe.netdevs[mqp->port - 1];
1315	dev_hold(ndev);
 
1316	spin_unlock_bh(&mdev->iboe.lock);
1317
1318	if (ndev) {
1319		ret = 1;
1320		dev_put(ndev);
1321	}
1322
1323	return ret;
1324}
1325
1326struct mlx4_ib_steering {
1327	struct list_head list;
1328	struct mlx4_flow_reg_id reg_id;
1329	union ib_gid gid;
1330};
1331
1332#define LAST_ETH_FIELD vlan_tag
1333#define LAST_IB_FIELD sl
1334#define LAST_IPV4_FIELD dst_ip
1335#define LAST_TCP_UDP_FIELD src_port
1336
1337/* Field is the last supported field */
1338#define FIELDS_NOT_SUPPORTED(filter, field)\
1339	memchr_inv((void *)&filter.field  +\
1340		   sizeof(filter.field), 0,\
1341		   sizeof(filter) -\
1342		   offsetof(typeof(filter), field) -\
1343		   sizeof(filter.field))
1344
1345static int parse_flow_attr(struct mlx4_dev *dev,
1346			   u32 qp_num,
1347			   union ib_flow_spec *ib_spec,
1348			   struct _rule_hw *mlx4_spec)
1349{
1350	enum mlx4_net_trans_rule_id type;
1351
1352	switch (ib_spec->type) {
1353	case IB_FLOW_SPEC_ETH:
1354		if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
1355			return -ENOTSUPP;
1356
1357		type = MLX4_NET_TRANS_RULE_ID_ETH;
1358		memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
1359		       ETH_ALEN);
1360		memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
1361		       ETH_ALEN);
1362		mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
1363		mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
1364		break;
1365	case IB_FLOW_SPEC_IB:
1366		if (FIELDS_NOT_SUPPORTED(ib_spec->ib.mask, LAST_IB_FIELD))
1367			return -ENOTSUPP;
1368
1369		type = MLX4_NET_TRANS_RULE_ID_IB;
1370		mlx4_spec->ib.l3_qpn =
1371			cpu_to_be32(qp_num);
1372		mlx4_spec->ib.qpn_mask =
1373			cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
1374		break;
1375
1376
1377	case IB_FLOW_SPEC_IPV4:
1378		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
1379			return -ENOTSUPP;
1380
1381		type = MLX4_NET_TRANS_RULE_ID_IPV4;
1382		mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
1383		mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
1384		mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
1385		mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
1386		break;
1387
1388	case IB_FLOW_SPEC_TCP:
1389	case IB_FLOW_SPEC_UDP:
1390		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD))
1391			return -ENOTSUPP;
1392
1393		type = ib_spec->type == IB_FLOW_SPEC_TCP ?
1394					MLX4_NET_TRANS_RULE_ID_TCP :
1395					MLX4_NET_TRANS_RULE_ID_UDP;
1396		mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
1397		mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
1398		mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
1399		mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
1400		break;
1401
1402	default:
1403		return -EINVAL;
1404	}
1405	if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
1406	    mlx4_hw_rule_sz(dev, type) < 0)
1407		return -EINVAL;
1408	mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
1409	mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
1410	return mlx4_hw_rule_sz(dev, type);
1411}
1412
1413struct default_rules {
1414	__u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
1415	__u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
1416	__u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
1417	__u8  link_layer;
1418};
1419static const struct default_rules default_table[] = {
1420	{
1421		.mandatory_fields = {IB_FLOW_SPEC_IPV4},
1422		.mandatory_not_fields = {IB_FLOW_SPEC_ETH},
1423		.rules_create_list = {IB_FLOW_SPEC_IB},
1424		.link_layer = IB_LINK_LAYER_INFINIBAND
1425	}
1426};
1427
1428static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
1429					 struct ib_flow_attr *flow_attr)
1430{
1431	int i, j, k;
1432	void *ib_flow;
1433	const struct default_rules *pdefault_rules = default_table;
1434	u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
1435
1436	for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
1437		__u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
1438		memset(&field_types, 0, sizeof(field_types));
1439
1440		if (link_layer != pdefault_rules->link_layer)
1441			continue;
1442
1443		ib_flow = flow_attr + 1;
1444		/* we assume the specs are sorted */
1445		for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
1446		     j < flow_attr->num_of_specs; k++) {
1447			union ib_flow_spec *current_flow =
1448				(union ib_flow_spec *)ib_flow;
1449
1450			/* same layer but different type */
1451			if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
1452			     (pdefault_rules->mandatory_fields[k] &
1453			      IB_FLOW_SPEC_LAYER_MASK)) &&
1454			    (current_flow->type !=
1455			     pdefault_rules->mandatory_fields[k]))
1456				goto out;
1457
1458			/* same layer, try match next one */
1459			if (current_flow->type ==
1460			    pdefault_rules->mandatory_fields[k]) {
1461				j++;
1462				ib_flow +=
1463					((union ib_flow_spec *)ib_flow)->size;
1464			}
1465		}
1466
1467		ib_flow = flow_attr + 1;
1468		for (j = 0; j < flow_attr->num_of_specs;
1469		     j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
1470			for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
1471				/* same layer and same type */
1472				if (((union ib_flow_spec *)ib_flow)->type ==
1473				    pdefault_rules->mandatory_not_fields[k])
1474					goto out;
1475
1476		return i;
1477	}
1478out:
1479	return -1;
1480}
1481
1482static int __mlx4_ib_create_default_rules(
1483		struct mlx4_ib_dev *mdev,
1484		struct ib_qp *qp,
1485		const struct default_rules *pdefault_rules,
1486		struct _rule_hw *mlx4_spec) {
1487	int size = 0;
1488	int i;
1489
1490	for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
1491		union ib_flow_spec ib_spec = {};
1492		int ret;
1493
1494		switch (pdefault_rules->rules_create_list[i]) {
1495		case 0:
1496			/* no rule */
1497			continue;
1498		case IB_FLOW_SPEC_IB:
1499			ib_spec.type = IB_FLOW_SPEC_IB;
1500			ib_spec.size = sizeof(struct ib_flow_spec_ib);
1501
1502			break;
1503		default:
1504			/* invalid rule */
1505			return -EINVAL;
1506		}
1507		/* We must put empty rule, qpn is being ignored */
1508		ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
1509				      mlx4_spec);
1510		if (ret < 0) {
1511			pr_info("invalid parsing\n");
1512			return -EINVAL;
1513		}
1514
1515		mlx4_spec = (void *)mlx4_spec + ret;
1516		size += ret;
1517	}
1518	return size;
1519}
1520
1521static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
1522			  int domain,
1523			  enum mlx4_net_trans_promisc_mode flow_type,
1524			  u64 *reg_id)
1525{
1526	int ret, i;
1527	int size = 0;
1528	void *ib_flow;
1529	struct mlx4_ib_dev *mdev = to_mdev(qp->device);
1530	struct mlx4_cmd_mailbox *mailbox;
1531	struct mlx4_net_trans_rule_hw_ctrl *ctrl;
1532	int default_flow;
1533
 
 
 
 
 
 
 
1534	if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
1535		pr_err("Invalid priority value %d\n", flow_attr->priority);
1536		return -EINVAL;
1537	}
1538
 
 
 
 
 
1539	if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
1540		return -EINVAL;
1541
1542	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
1543	if (IS_ERR(mailbox))
1544		return PTR_ERR(mailbox);
1545	ctrl = mailbox->buf;
1546
1547	ctrl->prio = cpu_to_be16(domain | flow_attr->priority);
 
1548	ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
1549	ctrl->port = flow_attr->port;
1550	ctrl->qpn = cpu_to_be32(qp->qp_num);
1551
1552	ib_flow = flow_attr + 1;
1553	size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
1554	/* Add default flows */
1555	default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
1556	if (default_flow >= 0) {
1557		ret = __mlx4_ib_create_default_rules(
1558				mdev, qp, default_table + default_flow,
1559				mailbox->buf + size);
1560		if (ret < 0) {
1561			mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1562			return -EINVAL;
1563		}
1564		size += ret;
1565	}
1566	for (i = 0; i < flow_attr->num_of_specs; i++) {
1567		ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
1568				      mailbox->buf + size);
1569		if (ret < 0) {
1570			mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1571			return -EINVAL;
1572		}
1573		ib_flow += ((union ib_flow_spec *) ib_flow)->size;
1574		size += ret;
1575	}
1576
1577	if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR &&
1578	    flow_attr->num_of_specs == 1) {
1579		struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1);
1580		enum ib_flow_spec_type header_spec =
1581			((union ib_flow_spec *)(flow_attr + 1))->type;
1582
1583		if (header_spec == IB_FLOW_SPEC_ETH)
1584			mlx4_handle_eth_header_mcast_prio(ctrl, rule_header);
1585	}
1586
1587	ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
1588			   MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
1589			   MLX4_CMD_NATIVE);
1590	if (ret == -ENOMEM)
1591		pr_err("mcg table is full. Fail to register network rule.\n");
1592	else if (ret == -ENXIO)
1593		pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
1594	else if (ret)
1595		pr_err("Invalid argument. Fail to register network rule.\n");
1596
1597	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1598	return ret;
1599}
1600
1601static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
1602{
1603	int err;
1604	err = mlx4_cmd(dev, reg_id, 0, 0,
1605		       MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
1606		       MLX4_CMD_NATIVE);
1607	if (err)
1608		pr_err("Fail to detach network rule. registration id = 0x%llx\n",
1609		       reg_id);
1610	return err;
1611}
1612
1613static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
1614				    u64 *reg_id)
1615{
1616	void *ib_flow;
1617	union ib_flow_spec *ib_spec;
1618	struct mlx4_dev	*dev = to_mdev(qp->device)->dev;
1619	int err = 0;
1620
1621	if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN ||
1622	    dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC)
1623		return 0; /* do nothing */
1624
1625	ib_flow = flow_attr + 1;
1626	ib_spec = (union ib_flow_spec *)ib_flow;
1627
1628	if (ib_spec->type !=  IB_FLOW_SPEC_ETH || flow_attr->num_of_specs != 1)
1629		return 0; /* do nothing */
1630
1631	err = mlx4_tunnel_steer_add(to_mdev(qp->device)->dev, ib_spec->eth.val.dst_mac,
1632				    flow_attr->port, qp->qp_num,
1633				    MLX4_DOMAIN_UVERBS | (flow_attr->priority & 0xff),
1634				    reg_id);
1635	return err;
1636}
1637
1638static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
1639				      struct ib_flow_attr *flow_attr,
1640				      enum mlx4_net_trans_promisc_mode *type)
1641{
1642	int err = 0;
1643
1644	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
1645	    (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
1646	    (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
1647		return -EOPNOTSUPP;
1648	}
1649
1650	if (flow_attr->num_of_specs == 0) {
1651		type[0] = MLX4_FS_MC_SNIFFER;
1652		type[1] = MLX4_FS_UC_SNIFFER;
1653	} else {
1654		union ib_flow_spec *ib_spec;
1655
1656		ib_spec = (union ib_flow_spec *)(flow_attr + 1);
1657		if (ib_spec->type !=  IB_FLOW_SPEC_ETH)
1658			return -EINVAL;
1659
1660		/* if all is zero than MC and UC */
1661		if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
1662			type[0] = MLX4_FS_MC_SNIFFER;
1663			type[1] = MLX4_FS_UC_SNIFFER;
1664		} else {
1665			u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
1666					    ib_spec->eth.mask.dst_mac[1],
1667					    ib_spec->eth.mask.dst_mac[2],
1668					    ib_spec->eth.mask.dst_mac[3],
1669					    ib_spec->eth.mask.dst_mac[4],
1670					    ib_spec->eth.mask.dst_mac[5]};
1671
1672			/* Above xor was only on MC bit, non empty mask is valid
1673			 * only if this bit is set and rest are zero.
1674			 */
1675			if (!is_zero_ether_addr(&mac[0]))
1676				return -EINVAL;
1677
1678			if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
1679				type[0] = MLX4_FS_MC_SNIFFER;
1680			else
1681				type[0] = MLX4_FS_UC_SNIFFER;
1682		}
1683	}
1684
1685	return err;
1686}
1687
1688static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
1689					   struct ib_flow_attr *flow_attr,
1690					   struct ib_udata *udata)
1691{
1692	int err = 0, i = 0, j = 0;
1693	struct mlx4_ib_flow *mflow;
1694	enum mlx4_net_trans_promisc_mode type[2];
1695	struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
1696	int is_bonded = mlx4_is_bonded(dev);
1697
 
 
 
1698	if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)
1699		return ERR_PTR(-EOPNOTSUPP);
1700
1701	if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
1702	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
1703		return ERR_PTR(-EOPNOTSUPP);
1704
1705	if (udata &&
1706	    udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
1707		return ERR_PTR(-EOPNOTSUPP);
1708
1709	memset(type, 0, sizeof(type));
1710
1711	mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
1712	if (!mflow) {
1713		err = -ENOMEM;
1714		goto err_free;
1715	}
1716
1717	switch (flow_attr->type) {
1718	case IB_FLOW_ATTR_NORMAL:
1719		/* If dont trap flag (continue match) is set, under specific
1720		 * condition traffic be replicated to given qp,
1721		 * without stealing it
1722		 */
1723		if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
1724			err = mlx4_ib_add_dont_trap_rule(dev,
1725							 flow_attr,
1726							 type);
1727			if (err)
1728				goto err_free;
1729		} else {
1730			type[0] = MLX4_FS_REGULAR;
1731		}
1732		break;
1733
1734	case IB_FLOW_ATTR_ALL_DEFAULT:
1735		type[0] = MLX4_FS_ALL_DEFAULT;
1736		break;
1737
1738	case IB_FLOW_ATTR_MC_DEFAULT:
1739		type[0] = MLX4_FS_MC_DEFAULT;
1740		break;
1741
1742	case IB_FLOW_ATTR_SNIFFER:
1743		type[0] = MLX4_FS_MIRROR_RX_PORT;
1744		type[1] = MLX4_FS_MIRROR_SX_PORT;
1745		break;
1746
1747	default:
1748		err = -EINVAL;
1749		goto err_free;
1750	}
1751
1752	while (i < ARRAY_SIZE(type) && type[i]) {
1753		err = __mlx4_ib_create_flow(qp, flow_attr, MLX4_DOMAIN_UVERBS,
1754					    type[i], &mflow->reg_id[i].id);
1755		if (err)
1756			goto err_create_flow;
1757		if (is_bonded) {
1758			/* Application always sees one port so the mirror rule
1759			 * must be on port #2
1760			 */
1761			flow_attr->port = 2;
1762			err = __mlx4_ib_create_flow(qp, flow_attr,
1763						    MLX4_DOMAIN_UVERBS, type[j],
1764						    &mflow->reg_id[j].mirror);
1765			flow_attr->port = 1;
1766			if (err)
1767				goto err_create_flow;
1768			j++;
1769		}
1770
1771		i++;
1772	}
1773
1774	if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
1775		err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1776					       &mflow->reg_id[i].id);
1777		if (err)
1778			goto err_create_flow;
1779
1780		if (is_bonded) {
1781			flow_attr->port = 2;
1782			err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1783						       &mflow->reg_id[j].mirror);
1784			flow_attr->port = 1;
1785			if (err)
1786				goto err_create_flow;
1787			j++;
1788		}
1789		/* function to create mirror rule */
1790		i++;
1791	}
1792
1793	return &mflow->ibflow;
1794
1795err_create_flow:
1796	while (i) {
1797		(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1798					     mflow->reg_id[i].id);
1799		i--;
1800	}
1801
1802	while (j) {
1803		(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1804					     mflow->reg_id[j].mirror);
1805		j--;
1806	}
1807err_free:
1808	kfree(mflow);
1809	return ERR_PTR(err);
1810}
1811
1812static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
1813{
1814	int err, ret = 0;
1815	int i = 0;
1816	struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
1817	struct mlx4_ib_flow *mflow = to_mflow(flow_id);
1818
1819	while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
1820		err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
1821		if (err)
1822			ret = err;
1823		if (mflow->reg_id[i].mirror) {
1824			err = __mlx4_ib_destroy_flow(mdev->dev,
1825						     mflow->reg_id[i].mirror);
1826			if (err)
1827				ret = err;
1828		}
1829		i++;
1830	}
1831
1832	kfree(mflow);
1833	return ret;
1834}
1835
1836static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1837{
1838	int err;
1839	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1840	struct mlx4_dev	*dev = mdev->dev;
1841	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1842	struct mlx4_ib_steering *ib_steering = NULL;
1843	enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
1844	struct mlx4_flow_reg_id	reg_id;
1845
1846	if (mdev->dev->caps.steering_mode ==
1847	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
1848		ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
1849		if (!ib_steering)
1850			return -ENOMEM;
1851	}
1852
1853	err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
1854				    !!(mqp->flags &
1855				       MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
1856				    prot, &reg_id.id);
1857	if (err) {
1858		pr_err("multicast attach op failed, err %d\n", err);
1859		goto err_malloc;
1860	}
1861
1862	reg_id.mirror = 0;
1863	if (mlx4_is_bonded(dev)) {
1864		err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
1865					    (mqp->port == 1) ? 2 : 1,
1866					    !!(mqp->flags &
1867					    MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
1868					    prot, &reg_id.mirror);
1869		if (err)
1870			goto err_add;
1871	}
1872
1873	err = add_gid_entry(ibqp, gid);
1874	if (err)
1875		goto err_add;
1876
1877	if (ib_steering) {
1878		memcpy(ib_steering->gid.raw, gid->raw, 16);
1879		ib_steering->reg_id = reg_id;
1880		mutex_lock(&mqp->mutex);
1881		list_add(&ib_steering->list, &mqp->steering_rules);
1882		mutex_unlock(&mqp->mutex);
1883	}
1884	return 0;
1885
1886err_add:
1887	mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1888			      prot, reg_id.id);
1889	if (reg_id.mirror)
1890		mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1891				      prot, reg_id.mirror);
1892err_malloc:
1893	kfree(ib_steering);
1894
1895	return err;
1896}
1897
1898static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
1899{
1900	struct mlx4_ib_gid_entry *ge;
1901	struct mlx4_ib_gid_entry *tmp;
1902	struct mlx4_ib_gid_entry *ret = NULL;
1903
1904	list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
1905		if (!memcmp(raw, ge->gid.raw, 16)) {
1906			ret = ge;
1907			break;
1908		}
1909	}
1910
1911	return ret;
1912}
1913
1914static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1915{
1916	int err;
1917	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1918	struct mlx4_dev *dev = mdev->dev;
1919	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1920	struct net_device *ndev;
1921	struct mlx4_ib_gid_entry *ge;
1922	struct mlx4_flow_reg_id reg_id = {0, 0};
1923	enum mlx4_protocol prot =  MLX4_PROT_IB_IPV6;
1924
1925	if (mdev->dev->caps.steering_mode ==
1926	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
1927		struct mlx4_ib_steering *ib_steering;
1928
1929		mutex_lock(&mqp->mutex);
1930		list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
1931			if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
1932				list_del(&ib_steering->list);
1933				break;
1934			}
1935		}
1936		mutex_unlock(&mqp->mutex);
1937		if (&ib_steering->list == &mqp->steering_rules) {
1938			pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
1939			return -EINVAL;
1940		}
1941		reg_id = ib_steering->reg_id;
1942		kfree(ib_steering);
1943	}
1944
1945	err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1946				    prot, reg_id.id);
1947	if (err)
1948		return err;
1949
1950	if (mlx4_is_bonded(dev)) {
1951		err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1952					    prot, reg_id.mirror);
1953		if (err)
1954			return err;
1955	}
1956
1957	mutex_lock(&mqp->mutex);
1958	ge = find_gid_entry(mqp, gid->raw);
1959	if (ge) {
1960		spin_lock_bh(&mdev->iboe.lock);
1961		ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
1962		dev_hold(ndev);
 
1963		spin_unlock_bh(&mdev->iboe.lock);
1964		dev_put(ndev);
 
1965		list_del(&ge->list);
1966		kfree(ge);
1967	} else
1968		pr_warn("could not find mgid entry\n");
1969
1970	mutex_unlock(&mqp->mutex);
1971
1972	return 0;
1973}
1974
1975static int init_node_data(struct mlx4_ib_dev *dev)
1976{
1977	struct ib_smp *in_mad;
1978	struct ib_smp *out_mad;
1979	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
1980	int err = -ENOMEM;
1981
1982	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
1983	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
1984	if (!in_mad || !out_mad)
1985		goto out;
1986
1987	ib_init_query_mad(in_mad);
1988	in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
1989	if (mlx4_is_master(dev->dev))
1990		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
1991
1992	err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
1993	if (err)
1994		goto out;
1995
1996	memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX);
1997
1998	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
1999
2000	err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
2001	if (err)
2002		goto out;
2003
2004	dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
2005	memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
2006
2007out:
2008	kfree(in_mad);
2009	kfree(out_mad);
2010	return err;
2011}
2012
2013static ssize_t hca_type_show(struct device *device,
2014			     struct device_attribute *attr, char *buf)
2015{
2016	struct mlx4_ib_dev *dev =
2017		rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
2018
2019	return sysfs_emit(buf, "MT%d\n", dev->dev->persist->pdev->device);
2020}
2021static DEVICE_ATTR_RO(hca_type);
2022
2023static ssize_t hw_rev_show(struct device *device,
2024			   struct device_attribute *attr, char *buf)
2025{
2026	struct mlx4_ib_dev *dev =
2027		rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
2028
2029	return sysfs_emit(buf, "%x\n", dev->dev->rev_id);
2030}
2031static DEVICE_ATTR_RO(hw_rev);
2032
2033static ssize_t board_id_show(struct device *device,
2034			     struct device_attribute *attr, char *buf)
2035{
2036	struct mlx4_ib_dev *dev =
2037		rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
2038
2039	return sysfs_emit(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id);
2040}
2041static DEVICE_ATTR_RO(board_id);
2042
2043static struct attribute *mlx4_class_attributes[] = {
2044	&dev_attr_hw_rev.attr,
2045	&dev_attr_hca_type.attr,
2046	&dev_attr_board_id.attr,
2047	NULL
2048};
2049
2050static const struct attribute_group mlx4_attr_group = {
2051	.attrs = mlx4_class_attributes,
2052};
2053
2054struct diag_counter {
2055	const char *name;
2056	u32 offset;
2057};
2058
2059#define DIAG_COUNTER(_name, _offset)			\
2060	{ .name = #_name, .offset = _offset }
2061
2062static const struct diag_counter diag_basic[] = {
2063	DIAG_COUNTER(rq_num_lle, 0x00),
2064	DIAG_COUNTER(sq_num_lle, 0x04),
2065	DIAG_COUNTER(rq_num_lqpoe, 0x08),
2066	DIAG_COUNTER(sq_num_lqpoe, 0x0C),
2067	DIAG_COUNTER(rq_num_lpe, 0x18),
2068	DIAG_COUNTER(sq_num_lpe, 0x1C),
2069	DIAG_COUNTER(rq_num_wrfe, 0x20),
2070	DIAG_COUNTER(sq_num_wrfe, 0x24),
2071	DIAG_COUNTER(sq_num_mwbe, 0x2C),
2072	DIAG_COUNTER(sq_num_bre, 0x34),
2073	DIAG_COUNTER(sq_num_rire, 0x44),
2074	DIAG_COUNTER(rq_num_rire, 0x48),
2075	DIAG_COUNTER(sq_num_rae, 0x4C),
2076	DIAG_COUNTER(rq_num_rae, 0x50),
2077	DIAG_COUNTER(sq_num_roe, 0x54),
2078	DIAG_COUNTER(sq_num_tree, 0x5C),
2079	DIAG_COUNTER(sq_num_rree, 0x64),
2080	DIAG_COUNTER(rq_num_rnr, 0x68),
2081	DIAG_COUNTER(sq_num_rnr, 0x6C),
2082	DIAG_COUNTER(rq_num_oos, 0x100),
2083	DIAG_COUNTER(sq_num_oos, 0x104),
2084};
2085
2086static const struct diag_counter diag_ext[] = {
2087	DIAG_COUNTER(rq_num_dup, 0x130),
2088	DIAG_COUNTER(sq_num_to, 0x134),
2089};
2090
2091static const struct diag_counter diag_device_only[] = {
2092	DIAG_COUNTER(num_cqovf, 0x1A0),
2093	DIAG_COUNTER(rq_num_udsdprd, 0x118),
2094};
2095
2096static struct rdma_hw_stats *
2097mlx4_ib_alloc_hw_device_stats(struct ib_device *ibdev)
2098{
2099	struct mlx4_ib_dev *dev = to_mdev(ibdev);
2100	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2101
2102	if (!diag[0].descs)
2103		return NULL;
2104
2105	return rdma_alloc_hw_stats_struct(diag[0].descs, diag[0].num_counters,
2106					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
2107}
2108
2109static struct rdma_hw_stats *
2110mlx4_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
2111{
2112	struct mlx4_ib_dev *dev = to_mdev(ibdev);
2113	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2114
2115	if (!diag[1].descs)
2116		return NULL;
2117
2118	return rdma_alloc_hw_stats_struct(diag[1].descs, diag[1].num_counters,
2119					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
2120}
2121
2122static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
2123				struct rdma_hw_stats *stats,
2124				u32 port, int index)
2125{
2126	struct mlx4_ib_dev *dev = to_mdev(ibdev);
2127	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2128	u32 hw_value[ARRAY_SIZE(diag_device_only) +
2129		ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};
2130	int ret;
2131	int i;
2132
2133	ret = mlx4_query_diag_counters(dev->dev,
2134				       MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,
2135				       diag[!!port].offset, hw_value,
2136				       diag[!!port].num_counters, port);
2137
2138	if (ret)
2139		return ret;
2140
2141	for (i = 0; i < diag[!!port].num_counters; i++)
2142		stats->value[i] = hw_value[i];
2143
2144	return diag[!!port].num_counters;
2145}
2146
2147static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
2148					 struct rdma_stat_desc **pdescs,
2149					 u32 **offset, u32 *num, bool port)
 
 
2150{
2151	u32 num_counters;
2152
2153	num_counters = ARRAY_SIZE(diag_basic);
2154
2155	if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)
2156		num_counters += ARRAY_SIZE(diag_ext);
2157
2158	if (!port)
2159		num_counters += ARRAY_SIZE(diag_device_only);
2160
2161	*pdescs = kcalloc(num_counters, sizeof(struct rdma_stat_desc),
2162			  GFP_KERNEL);
2163	if (!*pdescs)
2164		return -ENOMEM;
2165
2166	*offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
2167	if (!*offset)
2168		goto err;
2169
2170	*num = num_counters;
2171
2172	return 0;
2173
2174err:
2175	kfree(*pdescs);
2176	return -ENOMEM;
2177}
2178
2179static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
2180				       struct rdma_stat_desc *descs,
2181				       u32 *offset, bool port)
 
2182{
2183	int i;
2184	int j;
2185
2186	for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
2187		descs[i].name = diag_basic[i].name;
2188		offset[i] = diag_basic[i].offset;
2189	}
2190
2191	if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
2192		for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
2193			descs[j].name = diag_ext[i].name;
2194			offset[j] = diag_ext[i].offset;
2195		}
2196	}
2197
2198	if (!port) {
2199		for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
2200			descs[j].name = diag_device_only[i].name;
2201			offset[j] = diag_device_only[i].offset;
2202		}
2203	}
2204}
2205
2206static const struct ib_device_ops mlx4_ib_hw_stats_ops = {
2207	.alloc_hw_device_stats = mlx4_ib_alloc_hw_device_stats,
2208	.alloc_hw_port_stats = mlx4_ib_alloc_hw_port_stats,
2209	.get_hw_stats = mlx4_ib_get_hw_stats,
2210};
2211
2212static const struct ib_device_ops mlx4_ib_hw_stats_ops1 = {
2213	.alloc_hw_device_stats = mlx4_ib_alloc_hw_device_stats,
2214	.get_hw_stats = mlx4_ib_get_hw_stats,
2215};
2216
2217static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
2218{
2219	struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
2220	int i;
2221	int ret;
2222	bool per_port = !!(ibdev->dev->caps.flags2 &
2223		MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
2224
2225	if (mlx4_is_slave(ibdev->dev))
2226		return 0;
2227
2228	for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2229		/*
2230		 * i == 1 means we are building port counters, set a different
2231		 * stats ops without port stats callback.
2232		 */
2233		if (i && !per_port) {
2234			ib_set_device_ops(&ibdev->ib_dev,
2235					  &mlx4_ib_hw_stats_ops1);
2236
2237			return 0;
2238		}
2239
2240		ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].descs,
2241						    &diag[i].offset,
2242						    &diag[i].num_counters, i);
2243		if (ret)
2244			goto err_alloc;
2245
2246		mlx4_ib_fill_diag_counters(ibdev, diag[i].descs,
2247					   diag[i].offset, i);
2248	}
2249
2250	ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_hw_stats_ops);
 
2251
2252	return 0;
2253
2254err_alloc:
2255	if (i) {
2256		kfree(diag[i - 1].descs);
2257		kfree(diag[i - 1].offset);
2258	}
2259
2260	return ret;
2261}
2262
2263static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
2264{
2265	int i;
2266
2267	for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2268		kfree(ibdev->diag_counters[i].offset);
2269		kfree(ibdev->diag_counters[i].descs);
2270	}
2271}
2272
2273#define MLX4_IB_INVALID_MAC	((u64)-1)
2274static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
2275			       struct net_device *dev,
2276			       int port)
2277{
2278	u64 new_smac = 0;
2279	u64 release_mac = MLX4_IB_INVALID_MAC;
2280	struct mlx4_ib_qp *qp;
2281
2282	new_smac = ether_addr_to_u64(dev->dev_addr);
 
 
 
2283	atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
2284
2285	/* no need for update QP1 and mac registration in non-SRIOV */
2286	if (!mlx4_is_mfunc(ibdev->dev))
2287		return;
2288
2289	mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
2290	qp = ibdev->qp1_proxy[port - 1];
2291	if (qp) {
2292		int new_smac_index;
2293		u64 old_smac;
2294		struct mlx4_update_qp_params update_params;
2295
2296		mutex_lock(&qp->mutex);
2297		old_smac = qp->pri.smac;
2298		if (new_smac == old_smac)
2299			goto unlock;
2300
2301		new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac);
2302
2303		if (new_smac_index < 0)
2304			goto unlock;
2305
2306		update_params.smac_index = new_smac_index;
2307		if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC,
2308				   &update_params)) {
2309			release_mac = new_smac;
2310			goto unlock;
2311		}
2312		/* if old port was zero, no mac was yet registered for this QP */
2313		if (qp->pri.smac_port)
2314			release_mac = old_smac;
2315		qp->pri.smac = new_smac;
2316		qp->pri.smac_port = port;
2317		qp->pri.smac_index = new_smac_index;
2318	}
2319
2320unlock:
2321	if (release_mac != MLX4_IB_INVALID_MAC)
2322		mlx4_unregister_mac(ibdev->dev, port, release_mac);
2323	if (qp)
2324		mutex_unlock(&qp->mutex);
2325	mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
2326}
2327
2328static void mlx4_ib_scan_netdev(struct mlx4_ib_dev *ibdev,
2329				struct net_device *dev,
2330				unsigned long event)
2331
2332{
2333	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
 
 
2334
2335	ASSERT_RTNL();
2336
2337	if (dev->dev.parent != ibdev->ib_dev.dev.parent)
2338		return;
2339
2340	spin_lock_bh(&iboe->lock);
 
2341
2342	iboe->netdevs[dev->dev_port] = event != NETDEV_UNREGISTER ? dev : NULL;
 
2343
2344	if (event == NETDEV_UP || event == NETDEV_DOWN) {
2345		enum ib_port_state port_state;
2346		struct ib_event ibev = { };
2347
2348		if (ib_get_cached_port_state(&ibdev->ib_dev, dev->dev_port + 1,
2349					     &port_state))
2350			goto iboe_out;
2351
2352		if (event == NETDEV_UP &&
2353		    (port_state != IB_PORT_ACTIVE ||
2354		     iboe->last_port_state[dev->dev_port] != IB_PORT_DOWN))
2355			goto iboe_out;
2356		if (event == NETDEV_DOWN &&
2357		    (port_state != IB_PORT_DOWN ||
2358		     iboe->last_port_state[dev->dev_port] != IB_PORT_ACTIVE))
2359			goto iboe_out;
2360		iboe->last_port_state[dev->dev_port] = port_state;
2361
2362		ibev.device = &ibdev->ib_dev;
2363		ibev.element.port_num = dev->dev_port + 1;
2364		ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
2365						  IB_EVENT_PORT_ERR;
2366		ib_dispatch_event(&ibev);
2367	}
2368
2369iboe_out:
2370	spin_unlock_bh(&iboe->lock);
2371
2372	if (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
2373	    event == NETDEV_UP || event == NETDEV_CHANGE)
2374		mlx4_ib_update_qps(ibdev, dev, dev->dev_port + 1);
2375}
2376
2377static int mlx4_ib_netdev_event(struct notifier_block *this,
2378				unsigned long event, void *ptr)
2379{
2380	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2381	struct mlx4_ib_dev *ibdev;
2382
2383	if (!net_eq(dev_net(dev), &init_net))
2384		return NOTIFY_DONE;
2385
2386	ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
2387	mlx4_ib_scan_netdev(ibdev, dev, event);
2388
2389	return NOTIFY_DONE;
2390}
2391
2392static void init_pkeys(struct mlx4_ib_dev *ibdev)
2393{
2394	int port;
2395	int slave;
2396	int i;
2397
2398	if (mlx4_is_master(ibdev->dev)) {
2399		for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
2400		     ++slave) {
2401			for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
2402				for (i = 0;
2403				     i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
2404				     ++i) {
2405					ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
2406					/* master has the identity virt2phys pkey mapping */
2407						(slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
2408							ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
2409					mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
2410							     ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
2411				}
2412			}
2413		}
2414		/* initialize pkey cache */
2415		for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
2416			for (i = 0;
2417			     i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
2418			     ++i)
2419				ibdev->pkeys.phys_pkey_cache[port-1][i] =
2420					(i) ? 0 : 0xFFFF;
2421		}
2422	}
2423}
2424
2425static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
2426{
2427	int i, j, eq = 0, total_eqs = 0;
2428
2429	ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors,
2430				  sizeof(ibdev->eq_table[0]), GFP_KERNEL);
2431	if (!ibdev->eq_table)
2432		return;
2433
2434	for (i = 1; i <= dev->caps.num_ports; i++) {
2435		for (j = 0; j < mlx4_get_eqs_per_port(dev, i);
2436		     j++, total_eqs++) {
2437			if (i > 1 &&  mlx4_is_eq_shared(dev, total_eqs))
2438				continue;
2439			ibdev->eq_table[eq] = total_eqs;
2440			if (!mlx4_assign_eq(dev, i,
2441					    &ibdev->eq_table[eq]))
2442				eq++;
2443			else
2444				ibdev->eq_table[eq] = -1;
2445		}
2446	}
2447
2448	for (i = eq; i < dev->caps.num_comp_vectors;
2449	     ibdev->eq_table[i++] = -1)
2450		;
2451
2452	/* Advertise the new number of EQs to clients */
2453	ibdev->ib_dev.num_comp_vectors = eq;
2454}
2455
2456static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
2457{
2458	int i;
2459	int total_eqs = ibdev->ib_dev.num_comp_vectors;
2460
2461	/* no eqs were allocated */
2462	if (!ibdev->eq_table)
2463		return;
2464
2465	/* Reset the advertised EQ number */
2466	ibdev->ib_dev.num_comp_vectors = 0;
2467
2468	for (i = 0; i < total_eqs; i++)
2469		mlx4_release_eq(dev, ibdev->eq_table[i]);
2470
2471	kfree(ibdev->eq_table);
2472	ibdev->eq_table = NULL;
2473}
2474
2475static int mlx4_port_immutable(struct ib_device *ibdev, u32 port_num,
2476			       struct ib_port_immutable *immutable)
2477{
2478	struct ib_port_attr attr;
2479	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
2480	int err;
2481
2482	if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
2483		immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
2484		immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2485	} else {
2486		if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
2487			immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
2488		if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
2489			immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
2490				RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
2491		immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
2492		if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
2493		    RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
2494			immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2495	}
2496
2497	err = ib_query_port(ibdev, port_num, &attr);
2498	if (err)
2499		return err;
2500
2501	immutable->pkey_tbl_len = attr.pkey_tbl_len;
2502	immutable->gid_tbl_len = attr.gid_tbl_len;
2503
2504	return 0;
2505}
2506
2507static void get_fw_ver_str(struct ib_device *device, char *str)
2508{
2509	struct mlx4_ib_dev *dev =
2510		container_of(device, struct mlx4_ib_dev, ib_dev);
2511	snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d",
2512		 (int) (dev->dev->caps.fw_ver >> 32),
2513		 (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
2514		 (int) dev->dev->caps.fw_ver & 0xffff);
2515}
2516
2517static const struct ib_device_ops mlx4_ib_dev_ops = {
2518	.owner = THIS_MODULE,
2519	.driver_id = RDMA_DRIVER_MLX4,
2520	.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION,
2521
2522	.add_gid = mlx4_ib_add_gid,
2523	.alloc_mr = mlx4_ib_alloc_mr,
2524	.alloc_pd = mlx4_ib_alloc_pd,
2525	.alloc_ucontext = mlx4_ib_alloc_ucontext,
2526	.attach_mcast = mlx4_ib_mcg_attach,
2527	.create_ah = mlx4_ib_create_ah,
2528	.create_cq = mlx4_ib_create_cq,
2529	.create_qp = mlx4_ib_create_qp,
2530	.create_srq = mlx4_ib_create_srq,
2531	.dealloc_pd = mlx4_ib_dealloc_pd,
2532	.dealloc_ucontext = mlx4_ib_dealloc_ucontext,
2533	.del_gid = mlx4_ib_del_gid,
2534	.dereg_mr = mlx4_ib_dereg_mr,
2535	.destroy_ah = mlx4_ib_destroy_ah,
2536	.destroy_cq = mlx4_ib_destroy_cq,
2537	.destroy_qp = mlx4_ib_destroy_qp,
2538	.destroy_srq = mlx4_ib_destroy_srq,
2539	.detach_mcast = mlx4_ib_mcg_detach,
2540	.device_group = &mlx4_attr_group,
2541	.disassociate_ucontext = mlx4_ib_disassociate_ucontext,
2542	.drain_rq = mlx4_ib_drain_rq,
2543	.drain_sq = mlx4_ib_drain_sq,
2544	.get_dev_fw_str = get_fw_ver_str,
2545	.get_dma_mr = mlx4_ib_get_dma_mr,
2546	.get_link_layer = mlx4_ib_port_link_layer,
2547	.get_netdev = mlx4_ib_get_netdev,
2548	.get_port_immutable = mlx4_port_immutable,
2549	.map_mr_sg = mlx4_ib_map_mr_sg,
2550	.mmap = mlx4_ib_mmap,
2551	.modify_cq = mlx4_ib_modify_cq,
2552	.modify_device = mlx4_ib_modify_device,
2553	.modify_port = mlx4_ib_modify_port,
2554	.modify_qp = mlx4_ib_modify_qp,
2555	.modify_srq = mlx4_ib_modify_srq,
2556	.poll_cq = mlx4_ib_poll_cq,
2557	.post_recv = mlx4_ib_post_recv,
2558	.post_send = mlx4_ib_post_send,
2559	.post_srq_recv = mlx4_ib_post_srq_recv,
2560	.process_mad = mlx4_ib_process_mad,
2561	.query_ah = mlx4_ib_query_ah,
2562	.query_device = mlx4_ib_query_device,
2563	.query_gid = mlx4_ib_query_gid,
2564	.query_pkey = mlx4_ib_query_pkey,
2565	.query_port = mlx4_ib_query_port,
2566	.query_qp = mlx4_ib_query_qp,
2567	.query_srq = mlx4_ib_query_srq,
2568	.reg_user_mr = mlx4_ib_reg_user_mr,
2569	.req_notify_cq = mlx4_ib_arm_cq,
2570	.rereg_user_mr = mlx4_ib_rereg_user_mr,
2571	.resize_cq = mlx4_ib_resize_cq,
2572
2573	INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah),
2574	INIT_RDMA_OBJ_SIZE(ib_cq, mlx4_ib_cq, ibcq),
2575	INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd),
2576	INIT_RDMA_OBJ_SIZE(ib_qp, mlx4_ib_qp, ibqp),
2577	INIT_RDMA_OBJ_SIZE(ib_srq, mlx4_ib_srq, ibsrq),
2578	INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext),
2579};
2580
2581static const struct ib_device_ops mlx4_ib_dev_wq_ops = {
2582	.create_rwq_ind_table = mlx4_ib_create_rwq_ind_table,
2583	.create_wq = mlx4_ib_create_wq,
2584	.destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table,
2585	.destroy_wq = mlx4_ib_destroy_wq,
2586	.modify_wq = mlx4_ib_modify_wq,
2587
2588	INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx4_ib_rwq_ind_table,
2589			   ib_rwq_ind_tbl),
2590};
2591
2592static const struct ib_device_ops mlx4_ib_dev_mw_ops = {
2593	.alloc_mw = mlx4_ib_alloc_mw,
2594	.dealloc_mw = mlx4_ib_dealloc_mw,
2595
2596	INIT_RDMA_OBJ_SIZE(ib_mw, mlx4_ib_mw, ibmw),
2597};
2598
2599static const struct ib_device_ops mlx4_ib_dev_xrc_ops = {
2600	.alloc_xrcd = mlx4_ib_alloc_xrcd,
2601	.dealloc_xrcd = mlx4_ib_dealloc_xrcd,
2602
2603	INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx4_ib_xrcd, ibxrcd),
2604};
2605
2606static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
2607	.create_flow = mlx4_ib_create_flow,
2608	.destroy_flow = mlx4_ib_destroy_flow,
2609};
2610
2611static int mlx4_ib_probe(struct auxiliary_device *adev,
2612			 const struct auxiliary_device_id *id)
2613{
2614	struct mlx4_adev *madev = container_of(adev, struct mlx4_adev, adev);
2615	struct mlx4_dev *dev = madev->mdev;
2616	struct mlx4_ib_dev *ibdev;
2617	int num_ports = 0;
2618	int i, j;
2619	int err;
2620	struct mlx4_ib_iboe *iboe;
2621	int ib_num_ports = 0;
2622	int num_req_counters;
2623	int allocated;
2624	u32 counter_index;
2625	struct counter_index *new_counter_index;
2626
2627	pr_info_once("%s", mlx4_ib_version);
2628
2629	num_ports = 0;
2630	mlx4_foreach_ib_transport_port(i, dev)
2631		num_ports++;
2632
2633	/* No point in registering a device with no ports... */
2634	if (num_ports == 0)
2635		return -ENODEV;
2636
2637	ibdev = ib_alloc_device(mlx4_ib_dev, ib_dev);
2638	if (!ibdev) {
2639		dev_err(&dev->persist->pdev->dev,
2640			"Device struct alloc failed\n");
2641		return -ENOMEM;
2642	}
2643
2644	iboe = &ibdev->iboe;
2645
2646	err = mlx4_pd_alloc(dev, &ibdev->priv_pdn);
2647	if (err)
2648		goto err_dealloc;
2649
2650	err = mlx4_uar_alloc(dev, &ibdev->priv_uar);
2651	if (err)
2652		goto err_pd;
2653
2654	ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
2655				 PAGE_SIZE);
2656	if (!ibdev->uar_map) {
2657		err = -ENOMEM;
2658		goto err_uar;
2659	}
2660	MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
2661
2662	ibdev->dev = dev;
2663	ibdev->bond_next_port	= 0;
2664
 
 
2665	ibdev->ib_dev.node_type		= RDMA_NODE_IB_CA;
2666	ibdev->ib_dev.local_dma_lkey	= dev->caps.reserved_lkey;
2667	ibdev->num_ports		= num_ports;
2668	ibdev->ib_dev.phys_port_cnt     = mlx4_is_bonded(dev) ?
2669						1 : ibdev->num_ports;
2670	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
2671	ibdev->ib_dev.dev.parent	= &dev->persist->pdev->dev;
 
 
 
2672
2673	ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2674
2675	if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
2676	    ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
2677	    IB_LINK_LAYER_ETHERNET) ||
2678	    (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
2679	    IB_LINK_LAYER_ETHERNET)))
2680		ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2681
2682	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2683	    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
2684		ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops);
 
 
 
 
 
 
2685
2686	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
2687		ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops);
 
 
 
 
2688	}
2689
2690	if (check_flow_steering_support(dev)) {
2691		ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
2692		ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops);
 
 
 
 
 
2693	}
2694
2695	if (!dev->caps.userspace_caps)
2696		ibdev->ib_dev.ops.uverbs_abi_ver =
2697			MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
 
2698
2699	mlx4_ib_alloc_eqs(dev, ibdev);
2700
2701	spin_lock_init(&iboe->lock);
2702
2703	err = init_node_data(ibdev);
2704	if (err)
2705		goto err_map;
2706	mlx4_init_sl2vl_tbl(ibdev);
2707
2708	for (i = 0; i < ibdev->num_ports; ++i) {
2709		mutex_init(&ibdev->counters_table[i].mutex);
2710		INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
2711		iboe->last_port_state[i] = IB_PORT_DOWN;
2712	}
2713
2714	num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
2715	for (i = 0; i < num_req_counters; ++i) {
2716		mutex_init(&ibdev->qp1_proxy_lock[i]);
2717		allocated = 0;
2718		if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
2719						IB_LINK_LAYER_ETHERNET) {
2720			err = mlx4_counter_alloc(ibdev->dev, &counter_index,
2721						 MLX4_RES_USAGE_DRIVER);
2722			/* if failed to allocate a new counter, use default */
2723			if (err)
2724				counter_index =
2725					mlx4_get_default_counter_index(dev,
2726								       i + 1);
2727			else
2728				allocated = 1;
2729		} else { /* IB_LINK_LAYER_INFINIBAND use the default counter */
2730			counter_index = mlx4_get_default_counter_index(dev,
2731								       i + 1);
2732		}
2733		new_counter_index = kmalloc(sizeof(*new_counter_index),
2734					    GFP_KERNEL);
2735		if (!new_counter_index) {
2736			err = -ENOMEM;
2737			if (allocated)
2738				mlx4_counter_free(ibdev->dev, counter_index);
2739			goto err_counter;
2740		}
2741		new_counter_index->index = counter_index;
2742		new_counter_index->allocated = allocated;
2743		list_add_tail(&new_counter_index->list,
2744			      &ibdev->counters_table[i].counters_list);
2745		ibdev->counters_table[i].default_counter = counter_index;
2746		pr_info("counter index %d for port %d allocated %d\n",
2747			counter_index, i + 1, allocated);
2748	}
2749	if (mlx4_is_bonded(dev))
2750		for (i = 1; i < ibdev->num_ports ; ++i) {
2751			new_counter_index =
2752					kmalloc(sizeof(struct counter_index),
2753						GFP_KERNEL);
2754			if (!new_counter_index) {
2755				err = -ENOMEM;
2756				goto err_counter;
2757			}
2758			new_counter_index->index = counter_index;
2759			new_counter_index->allocated = 0;
2760			list_add_tail(&new_counter_index->list,
2761				      &ibdev->counters_table[i].counters_list);
2762			ibdev->counters_table[i].default_counter =
2763								counter_index;
2764		}
2765
2766	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2767		ib_num_ports++;
2768
2769	spin_lock_init(&ibdev->sm_lock);
2770	mutex_init(&ibdev->cap_mask_mutex);
2771	INIT_LIST_HEAD(&ibdev->qp_list);
2772	spin_lock_init(&ibdev->reset_flow_resource_lock);
2773
2774	if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
2775	    ib_num_ports) {
2776		ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
2777		err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
2778					    MLX4_IB_UC_STEER_QPN_ALIGN,
2779					    &ibdev->steer_qpn_base, 0,
2780					    MLX4_RES_USAGE_DRIVER);
2781		if (err)
2782			goto err_counter;
2783
2784		ibdev->ib_uc_qpns_bitmap = bitmap_alloc(ibdev->steer_qpn_count,
2785							GFP_KERNEL);
2786		if (!ibdev->ib_uc_qpns_bitmap) {
2787			err = -ENOMEM;
 
2788			goto err_steer_qp_release;
2789		}
2790
2791		if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) {
2792			bitmap_zero(ibdev->ib_uc_qpns_bitmap,
2793				    ibdev->steer_qpn_count);
2794			err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
2795					dev, ibdev->steer_qpn_base,
2796					ibdev->steer_qpn_base +
2797					ibdev->steer_qpn_count - 1);
2798			if (err)
2799				goto err_steer_free_bitmap;
2800		} else {
2801			bitmap_fill(ibdev->ib_uc_qpns_bitmap,
2802				    ibdev->steer_qpn_count);
2803		}
2804	}
2805
2806	for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
2807		atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
2808
2809	err = mlx4_ib_alloc_diag_counters(ibdev);
2810	if (err)
2811		goto err_steer_free_bitmap;
2812
2813	err = ib_register_device(&ibdev->ib_dev, "mlx4_%d",
2814				 &dev->persist->pdev->dev);
2815	if (err)
2816		goto err_diag_counters;
2817
2818	err = mlx4_ib_mad_init(ibdev);
2819	if (err)
2820		goto err_reg;
2821
2822	err = mlx4_ib_init_sriov(ibdev);
2823	if (err)
2824		goto err_mad;
2825
2826	if (!iboe->nb.notifier_call) {
2827		iboe->nb.notifier_call = mlx4_ib_netdev_event;
2828		err = register_netdevice_notifier(&iboe->nb);
2829		if (err) {
2830			iboe->nb.notifier_call = NULL;
2831			goto err_notif;
2832		}
2833	}
2834	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
2835		err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
2836		if (err)
2837			goto err_notif;
2838	}
2839
 
 
 
 
 
 
2840	ibdev->ib_active = true;
2841	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2842		devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
2843					 &ibdev->ib_dev);
2844
2845	if (mlx4_is_mfunc(ibdev->dev))
2846		init_pkeys(ibdev);
2847
2848	/* create paravirt contexts for any VFs which are active */
2849	if (mlx4_is_master(ibdev->dev)) {
2850		for (j = 0; j < MLX4_MFUNC_MAX; j++) {
2851			if (j == mlx4_master_func_num(ibdev->dev))
2852				continue;
2853			if (mlx4_is_slave_active(ibdev->dev, j))
2854				do_slave_init(ibdev, j, 1);
2855		}
2856	}
2857
2858	/* register mlx4 core notifier */
2859	ibdev->mlx_nb.notifier_call = mlx4_ib_event;
2860	err = mlx4_register_event_notifier(dev, &ibdev->mlx_nb);
2861	WARN(err, "failed to register mlx4 event notifier (%d)", err);
2862
2863	auxiliary_set_drvdata(adev, ibdev);
2864	return 0;
2865
2866err_notif:
2867	if (ibdev->iboe.nb.notifier_call) {
2868		if (unregister_netdevice_notifier(&ibdev->iboe.nb))
2869			pr_warn("failure unregistering notifier\n");
2870		ibdev->iboe.nb.notifier_call = NULL;
2871	}
2872	flush_workqueue(wq);
2873
2874	mlx4_ib_close_sriov(ibdev);
2875
2876err_mad:
2877	mlx4_ib_mad_cleanup(ibdev);
2878
2879err_reg:
2880	ib_unregister_device(&ibdev->ib_dev);
2881
2882err_diag_counters:
2883	mlx4_ib_diag_cleanup(ibdev);
2884
2885err_steer_free_bitmap:
2886	bitmap_free(ibdev->ib_uc_qpns_bitmap);
2887
2888err_steer_qp_release:
2889	mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
2890			      ibdev->steer_qpn_count);
2891err_counter:
2892	for (i = 0; i < ibdev->num_ports; ++i)
2893		mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
2894
2895err_map:
2896	mlx4_ib_free_eqs(dev, ibdev);
2897	iounmap(ibdev->uar_map);
2898
2899err_uar:
2900	mlx4_uar_free(dev, &ibdev->priv_uar);
2901
2902err_pd:
2903	mlx4_pd_free(dev, ibdev->priv_pdn);
2904
2905err_dealloc:
2906	ib_dealloc_device(&ibdev->ib_dev);
2907
2908	return err;
2909}
2910
2911int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
2912{
2913	int offset;
2914
2915	WARN_ON(!dev->ib_uc_qpns_bitmap);
2916
2917	offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
2918					 dev->steer_qpn_count,
2919					 get_count_order(count));
2920	if (offset < 0)
2921		return offset;
2922
2923	*qpn = dev->steer_qpn_base + offset;
2924	return 0;
2925}
2926
2927void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
2928{
2929	if (!qpn ||
2930	    dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
2931		return;
2932
2933	if (WARN(qpn < dev->steer_qpn_base, "qpn = %u, steer_qpn_base = %u\n",
2934		 qpn, dev->steer_qpn_base))
2935		/* not supposed to be here */
2936		return;
2937
2938	bitmap_release_region(dev->ib_uc_qpns_bitmap,
2939			      qpn - dev->steer_qpn_base,
2940			      get_count_order(count));
2941}
2942
2943int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
2944			 int is_attach)
2945{
2946	int err;
2947	size_t flow_size;
2948	struct ib_flow_attr *flow;
2949	struct ib_flow_spec_ib *ib_spec;
2950
2951	if (is_attach) {
2952		flow_size = sizeof(struct ib_flow_attr) +
2953			    sizeof(struct ib_flow_spec_ib);
2954		flow = kzalloc(flow_size, GFP_KERNEL);
2955		if (!flow)
2956			return -ENOMEM;
2957		flow->port = mqp->port;
2958		flow->num_of_specs = 1;
2959		flow->size = flow_size;
2960		ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
2961		ib_spec->type = IB_FLOW_SPEC_IB;
2962		ib_spec->size = sizeof(struct ib_flow_spec_ib);
2963		/* Add an empty rule for IB L2 */
2964		memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
2965
2966		err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC,
2967					    MLX4_FS_REGULAR, &mqp->reg_id);
2968		kfree(flow);
2969		return err;
 
 
2970	}
2971	
2972	return __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
2973}
2974
2975static void mlx4_ib_remove(struct auxiliary_device *adev)
2976{
2977	struct mlx4_adev *madev = container_of(adev, struct mlx4_adev, adev);
2978	struct mlx4_dev *dev = madev->mdev;
2979	struct mlx4_ib_dev *ibdev = auxiliary_get_drvdata(adev);
2980	int p;
2981	int i;
2982
2983	mlx4_unregister_event_notifier(dev, &ibdev->mlx_nb);
2984
2985	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2986		devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
2987	ibdev->ib_active = false;
2988	flush_workqueue(wq);
2989
 
 
 
 
2990	if (ibdev->iboe.nb.notifier_call) {
2991		if (unregister_netdevice_notifier(&ibdev->iboe.nb))
2992			pr_warn("failure unregistering notifier\n");
2993		ibdev->iboe.nb.notifier_call = NULL;
2994	}
2995
2996	mlx4_ib_close_sriov(ibdev);
2997	mlx4_ib_mad_cleanup(ibdev);
2998	ib_unregister_device(&ibdev->ib_dev);
2999	mlx4_ib_diag_cleanup(ibdev);
3000
3001	mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
3002			      ibdev->steer_qpn_count);
3003	bitmap_free(ibdev->ib_uc_qpns_bitmap);
3004
3005	iounmap(ibdev->uar_map);
3006	for (p = 0; p < ibdev->num_ports; ++p)
3007		mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
3008
3009	mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
3010		mlx4_CLOSE_PORT(dev, p);
3011
3012	mlx4_ib_free_eqs(dev, ibdev);
3013
3014	mlx4_uar_free(dev, &ibdev->priv_uar);
3015	mlx4_pd_free(dev, ibdev->priv_pdn);
3016	ib_dealloc_device(&ibdev->ib_dev);
3017}
3018
3019static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
3020{
3021	struct mlx4_ib_demux_work **dm;
3022	struct mlx4_dev *dev = ibdev->dev;
3023	int i;
3024	unsigned long flags;
3025	struct mlx4_active_ports actv_ports;
3026	unsigned int ports;
3027	unsigned int first_port;
3028
3029	if (!mlx4_is_master(dev))
3030		return;
3031
3032	actv_ports = mlx4_get_active_ports(dev, slave);
3033	ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
3034	first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
3035
3036	dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
3037	if (!dm)
3038		return;
3039
3040	for (i = 0; i < ports; i++) {
3041		dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
3042		if (!dm[i]) {
3043			while (--i >= 0)
3044				kfree(dm[i]);
3045			goto out;
3046		}
3047		INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
3048		dm[i]->port = first_port + i + 1;
3049		dm[i]->slave = slave;
3050		dm[i]->do_init = do_init;
3051		dm[i]->dev = ibdev;
3052	}
3053	/* initialize or tear down tunnel QPs for the slave */
3054	spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
3055	if (!ibdev->sriov.is_going_down) {
3056		for (i = 0; i < ports; i++)
3057			queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
3058		spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
3059	} else {
3060		spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
3061		for (i = 0; i < ports; i++)
3062			kfree(dm[i]);
3063	}
3064out:
3065	kfree(dm);
3066	return;
3067}
3068
3069static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev)
3070{
3071	struct mlx4_ib_qp *mqp;
3072	unsigned long flags_qp;
3073	unsigned long flags_cq;
3074	struct mlx4_ib_cq *send_mcq, *recv_mcq;
3075	struct list_head    cq_notify_list;
3076	struct mlx4_cq *mcq;
3077	unsigned long flags;
3078
3079	pr_warn("mlx4_ib_handle_catas_error was started\n");
3080	INIT_LIST_HEAD(&cq_notify_list);
3081
3082	/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
3083	spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
3084
3085	list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
3086		spin_lock_irqsave(&mqp->sq.lock, flags_qp);
3087		if (mqp->sq.tail != mqp->sq.head) {
3088			send_mcq = to_mcq(mqp->ibqp.send_cq);
3089			spin_lock_irqsave(&send_mcq->lock, flags_cq);
3090			if (send_mcq->mcq.comp &&
3091			    mqp->ibqp.send_cq->comp_handler) {
3092				if (!send_mcq->mcq.reset_notify_added) {
3093					send_mcq->mcq.reset_notify_added = 1;
3094					list_add_tail(&send_mcq->mcq.reset_notify,
3095						      &cq_notify_list);
3096				}
3097			}
3098			spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
3099		}
3100		spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
3101		/* Now, handle the QP's receive queue */
3102		spin_lock_irqsave(&mqp->rq.lock, flags_qp);
3103		/* no handling is needed for SRQ */
3104		if (!mqp->ibqp.srq) {
3105			if (mqp->rq.tail != mqp->rq.head) {
3106				recv_mcq = to_mcq(mqp->ibqp.recv_cq);
3107				spin_lock_irqsave(&recv_mcq->lock, flags_cq);
3108				if (recv_mcq->mcq.comp &&
3109				    mqp->ibqp.recv_cq->comp_handler) {
3110					if (!recv_mcq->mcq.reset_notify_added) {
3111						recv_mcq->mcq.reset_notify_added = 1;
3112						list_add_tail(&recv_mcq->mcq.reset_notify,
3113							      &cq_notify_list);
3114					}
3115				}
3116				spin_unlock_irqrestore(&recv_mcq->lock,
3117						       flags_cq);
3118			}
3119		}
3120		spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
3121	}
3122
3123	list_for_each_entry(mcq, &cq_notify_list, reset_notify) {
3124		mcq->comp(mcq);
3125	}
3126	spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
3127	pr_warn("mlx4_ib_handle_catas_error ended\n");
3128}
3129
3130static void handle_bonded_port_state_event(struct work_struct *work)
3131{
3132	struct ib_event_work *ew =
3133		container_of(work, struct ib_event_work, work);
3134	struct mlx4_ib_dev *ibdev = ew->ib_dev;
3135	enum ib_port_state bonded_port_state = IB_PORT_NOP;
3136	int i;
3137	struct ib_event ibev;
3138
3139	kfree(ew);
3140	spin_lock_bh(&ibdev->iboe.lock);
3141	for (i = 0; i < MLX4_MAX_PORTS; ++i) {
3142		struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
3143		enum ib_port_state curr_port_state;
3144
3145		if (!curr_netdev)
3146			continue;
3147
3148		curr_port_state =
3149			(netif_running(curr_netdev) &&
3150			 netif_carrier_ok(curr_netdev)) ?
3151			IB_PORT_ACTIVE : IB_PORT_DOWN;
3152
3153		bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
3154			curr_port_state : IB_PORT_ACTIVE;
3155	}
3156	spin_unlock_bh(&ibdev->iboe.lock);
3157
3158	ibev.device = &ibdev->ib_dev;
3159	ibev.element.port_num = 1;
3160	ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
3161		IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
3162
3163	ib_dispatch_event(&ibev);
3164}
3165
3166void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port)
3167{
3168	u64 sl2vl;
3169	int err;
3170
3171	err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl);
3172	if (err) {
3173		pr_err("Unable to get current sl to vl mapping for port %d.  Using all zeroes (%d)\n",
3174		       port, err);
3175		sl2vl = 0;
3176	}
3177	atomic64_set(&mdev->sl2vl[port - 1], sl2vl);
3178}
3179
3180static void ib_sl2vl_update_work(struct work_struct *work)
3181{
3182	struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
3183	struct mlx4_ib_dev *mdev = ew->ib_dev;
3184	int port = ew->port;
3185
3186	mlx4_ib_sl2vl_update(mdev, port);
3187
3188	kfree(ew);
3189}
3190
3191void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
3192				     int port)
3193{
3194	struct ib_event_work *ew;
3195
3196	ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
3197	if (ew) {
3198		INIT_WORK(&ew->work, ib_sl2vl_update_work);
3199		ew->port = port;
3200		ew->ib_dev = ibdev;
3201		queue_work(wq, &ew->work);
3202	}
3203}
3204
3205static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
3206			 void *param)
3207{
3208	struct mlx4_ib_dev *ibdev =
3209		container_of(this, struct mlx4_ib_dev, mlx_nb);
3210	struct mlx4_dev *dev = ibdev->dev;
3211	struct ib_event ibev;
 
3212	struct mlx4_eqe *eqe = NULL;
3213	struct ib_event_work *ew;
3214	int p = 0;
3215
3216	if (mlx4_is_bonded(dev) &&
3217	    ((event == MLX4_DEV_EVENT_PORT_UP) ||
3218	    (event == MLX4_DEV_EVENT_PORT_DOWN))) {
3219		ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
3220		if (!ew)
3221			return NOTIFY_DONE;
3222		INIT_WORK(&ew->work, handle_bonded_port_state_event);
3223		ew->ib_dev = ibdev;
3224		queue_work(wq, &ew->work);
3225		return NOTIFY_DONE;
3226	}
3227
3228	switch (event) {
3229	case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
3230		break;
3231	case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
3232		eqe = (struct mlx4_eqe *)param;
3233		break;
3234	default:
3235		p = *(int *)param;
3236		break;
3237	}
3238
3239	switch (event) {
3240	case MLX4_DEV_EVENT_PORT_UP:
3241		if (p > ibdev->num_ports)
3242			return NOTIFY_DONE;
3243		if (!mlx4_is_slave(dev) &&
3244		    rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
3245			IB_LINK_LAYER_INFINIBAND) {
3246			if (mlx4_is_master(dev))
3247				mlx4_ib_invalidate_all_guid_record(ibdev, p);
3248			if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST &&
3249			    !(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT))
3250				mlx4_sched_ib_sl2vl_update_work(ibdev, p);
3251		}
3252		ibev.event = IB_EVENT_PORT_ACTIVE;
3253		break;
3254
3255	case MLX4_DEV_EVENT_PORT_DOWN:
3256		if (p > ibdev->num_ports)
3257			return NOTIFY_DONE;
3258		ibev.event = IB_EVENT_PORT_ERR;
3259		break;
3260
3261	case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
3262		ibdev->ib_active = false;
3263		ibev.event = IB_EVENT_DEVICE_FATAL;
3264		mlx4_ib_handle_catas_error(ibdev);
3265		break;
3266
3267	case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
3268		ew = kmalloc(sizeof *ew, GFP_ATOMIC);
3269		if (!ew)
3270			return NOTIFY_DONE;
3271
3272		INIT_WORK(&ew->work, handle_port_mgmt_change_event);
3273		memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
3274		ew->ib_dev = ibdev;
3275		/* need to queue only for port owner, which uses GEN_EQE */
3276		if (mlx4_is_master(dev))
3277			queue_work(wq, &ew->work);
3278		else
3279			handle_port_mgmt_change_event(&ew->work);
3280		return NOTIFY_DONE;
3281
3282	case MLX4_DEV_EVENT_SLAVE_INIT:
3283		/* here, p is the slave id */
3284		do_slave_init(ibdev, p, 1);
3285		if (mlx4_is_master(dev)) {
3286			int i;
3287
3288			for (i = 1; i <= ibdev->num_ports; i++) {
3289				if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
3290					== IB_LINK_LAYER_INFINIBAND)
3291					mlx4_ib_slave_alias_guid_event(ibdev,
3292								       p, i,
3293								       1);
3294			}
3295		}
3296		return NOTIFY_DONE;
3297
3298	case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
3299		if (mlx4_is_master(dev)) {
3300			int i;
3301
3302			for (i = 1; i <= ibdev->num_ports; i++) {
3303				if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
3304					== IB_LINK_LAYER_INFINIBAND)
3305					mlx4_ib_slave_alias_guid_event(ibdev,
3306								       p, i,
3307								       0);
3308			}
3309		}
3310		/* here, p is the slave id */
3311		do_slave_init(ibdev, p, 0);
3312		return NOTIFY_DONE;
3313
3314	default:
3315		return NOTIFY_DONE;
3316	}
3317
3318	ibev.device	      = &ibdev->ib_dev;
3319	ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
3320
3321	ib_dispatch_event(&ibev);
3322	return NOTIFY_DONE;
3323}
3324
3325static const struct auxiliary_device_id mlx4_ib_id_table[] = {
3326	{ .name = MLX4_ADEV_NAME ".ib" },
3327	{},
3328};
3329
3330MODULE_DEVICE_TABLE(auxiliary, mlx4_ib_id_table);
3331
3332static struct mlx4_adrv mlx4_ib_adrv = {
3333	.adrv = {
3334		.name	= "ib",
3335		.probe	= mlx4_ib_probe,
3336		.remove	= mlx4_ib_remove,
3337		.id_table = mlx4_ib_id_table,
3338	},
3339	.protocol	= MLX4_PROT_IB_IPV6,
3340	.flags		= MLX4_INTFF_BONDING
3341};
3342
3343static int __init mlx4_ib_init(void)
3344{
3345	int err;
3346
3347	wq = alloc_ordered_workqueue("mlx4_ib", WQ_MEM_RECLAIM);
3348	if (!wq)
3349		return -ENOMEM;
3350
3351	err = mlx4_ib_qp_event_init();
3352	if (err)
3353		goto clean_qp_event;
3354
3355	err = mlx4_ib_cm_init();
3356	if (err)
3357		goto clean_wq;
3358
3359	err = mlx4_ib_mcg_init();
3360	if (err)
3361		goto clean_cm;
3362
3363	err = mlx4_register_auxiliary_driver(&mlx4_ib_adrv);
3364	if (err)
3365		goto clean_mcg;
3366
3367	return 0;
3368
3369clean_mcg:
3370	mlx4_ib_mcg_destroy();
3371
3372clean_cm:
3373	mlx4_ib_cm_destroy();
3374
3375clean_wq:
3376	mlx4_ib_qp_event_cleanup();
3377
3378clean_qp_event:
3379	destroy_workqueue(wq);
3380	return err;
3381}
3382
3383static void __exit mlx4_ib_cleanup(void)
3384{
3385	mlx4_unregister_auxiliary_driver(&mlx4_ib_adrv);
3386	mlx4_ib_mcg_destroy();
3387	mlx4_ib_cm_destroy();
3388	mlx4_ib_qp_event_cleanup();
3389	destroy_workqueue(wq);
3390}
3391
3392module_init(mlx4_ib_init);
3393module_exit(mlx4_ib_cleanup);
v4.17
   1/*
   2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
   3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <linux/module.h>
  35#include <linux/init.h>
  36#include <linux/slab.h>
  37#include <linux/errno.h>
  38#include <linux/netdevice.h>
  39#include <linux/inetdevice.h>
  40#include <linux/rtnetlink.h>
  41#include <linux/if_vlan.h>
  42#include <linux/sched/mm.h>
  43#include <linux/sched/task.h>
  44
  45#include <net/ipv6.h>
  46#include <net/addrconf.h>
  47#include <net/devlink.h>
  48
  49#include <rdma/ib_smi.h>
  50#include <rdma/ib_user_verbs.h>
  51#include <rdma/ib_addr.h>
  52#include <rdma/ib_cache.h>
  53
  54#include <net/bonding.h>
  55
  56#include <linux/mlx4/driver.h>
  57#include <linux/mlx4/cmd.h>
  58#include <linux/mlx4/qp.h>
  59
  60#include "mlx4_ib.h"
  61#include <rdma/mlx4-abi.h>
  62
  63#define DRV_NAME	MLX4_IB_DRV_NAME
  64#define DRV_VERSION	"4.0-0"
  65
  66#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
  67#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
  68#define MLX4_IB_CARD_REV_A0   0xA0
  69
  70MODULE_AUTHOR("Roland Dreier");
  71MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
  72MODULE_LICENSE("Dual BSD/GPL");
  73
  74int mlx4_ib_sm_guid_assign = 0;
  75module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
  76MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
  77
  78static const char mlx4_ib_version[] =
  79	DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
  80	DRV_VERSION "\n";
  81
  82static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
  83static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
  84						    u8 port_num);
 
 
  85
  86static struct workqueue_struct *wq;
  87
  88static void init_query_mad(struct ib_smp *mad)
  89{
  90	mad->base_version  = 1;
  91	mad->mgmt_class    = IB_MGMT_CLASS_SUBN_LID_ROUTED;
  92	mad->class_version = 1;
  93	mad->method	   = IB_MGMT_METHOD_GET;
  94}
  95
  96static int check_flow_steering_support(struct mlx4_dev *dev)
  97{
  98	int eth_num_ports = 0;
  99	int ib_num_ports = 0;
 100
 101	int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
 102
 103	if (dmfs) {
 104		int i;
 105		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
 106			eth_num_ports++;
 107		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
 108			ib_num_ports++;
 109		dmfs &= (!ib_num_ports ||
 110			 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
 111			(!eth_num_ports ||
 112			 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
 113		if (ib_num_ports && mlx4_is_mfunc(dev)) {
 114			pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
 115			dmfs = 0;
 116		}
 117	}
 118	return dmfs;
 119}
 120
 121static int num_ib_ports(struct mlx4_dev *dev)
 122{
 123	int ib_ports = 0;
 124	int i;
 125
 126	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
 127		ib_ports++;
 128
 129	return ib_ports;
 130}
 131
 132static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
 
 133{
 134	struct mlx4_ib_dev *ibdev = to_mdev(device);
 135	struct net_device *dev;
 136
 137	rcu_read_lock();
 138	dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
 
 
 
 139
 140	if (dev) {
 141		if (mlx4_is_bonded(ibdev->dev)) {
 142			struct net_device *upper = NULL;
 143
 144			upper = netdev_master_upper_dev_get_rcu(dev);
 145			if (upper) {
 146				struct net_device *active;
 147
 148				active = bond_option_active_slave_get_rcu(netdev_priv(upper));
 149				if (active)
 150					dev = active;
 151			}
 152		}
 
 
 
 
 153	}
 154	if (dev)
 155		dev_hold(dev);
 156
 157	rcu_read_unlock();
 158	return dev;
 159}
 160
 161static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
 162				  struct mlx4_ib_dev *ibdev,
 163				  u8 port_num)
 164{
 165	struct mlx4_cmd_mailbox *mailbox;
 166	int err;
 167	struct mlx4_dev *dev = ibdev->dev;
 168	int i;
 169	union ib_gid *gid_tbl;
 170
 171	mailbox = mlx4_alloc_cmd_mailbox(dev);
 172	if (IS_ERR(mailbox))
 173		return -ENOMEM;
 174
 175	gid_tbl = mailbox->buf;
 176
 177	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
 178		memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid));
 179
 180	err = mlx4_cmd(dev, mailbox->dma,
 181		       MLX4_SET_PORT_GID_TABLE << 8 | port_num,
 182		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
 183		       MLX4_CMD_WRAPPED);
 184	if (mlx4_is_bonded(dev))
 185		err += mlx4_cmd(dev, mailbox->dma,
 186				MLX4_SET_PORT_GID_TABLE << 8 | 2,
 187				1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
 188				MLX4_CMD_WRAPPED);
 189
 190	mlx4_free_cmd_mailbox(dev, mailbox);
 191	return err;
 192}
 193
 194static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
 195				     struct mlx4_ib_dev *ibdev,
 196				     u8 port_num)
 197{
 198	struct mlx4_cmd_mailbox *mailbox;
 199	int err;
 200	struct mlx4_dev *dev = ibdev->dev;
 201	int i;
 202	struct {
 203		union ib_gid	gid;
 204		__be32		rsrvd1[2];
 205		__be16		rsrvd2;
 206		u8		type;
 207		u8		version;
 208		__be32		rsrvd3;
 209	} *gid_tbl;
 210
 211	mailbox = mlx4_alloc_cmd_mailbox(dev);
 212	if (IS_ERR(mailbox))
 213		return -ENOMEM;
 214
 215	gid_tbl = mailbox->buf;
 216	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
 217		memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
 218		if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
 219			gid_tbl[i].version = 2;
 220			if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
 221				gid_tbl[i].type = 1;
 222		}
 223	}
 224
 225	err = mlx4_cmd(dev, mailbox->dma,
 226		       MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
 227		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
 228		       MLX4_CMD_WRAPPED);
 229	if (mlx4_is_bonded(dev))
 230		err += mlx4_cmd(dev, mailbox->dma,
 231				MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
 232				1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
 233				MLX4_CMD_WRAPPED);
 234
 235	mlx4_free_cmd_mailbox(dev, mailbox);
 236	return err;
 237}
 238
 239static int mlx4_ib_update_gids(struct gid_entry *gids,
 240			       struct mlx4_ib_dev *ibdev,
 241			       u8 port_num)
 242{
 243	if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
 244		return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
 245
 246	return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
 247}
 248
 249static int mlx4_ib_add_gid(const union ib_gid *gid,
 250			   const struct ib_gid_attr *attr,
 251			   void **context)
 
 
 
 
 
 252{
 253	struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
 254	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
 255	struct mlx4_port_gid_table   *port_gid_table;
 256	int free = -1, found = -1;
 257	int ret = 0;
 258	int hw_update = 0;
 259	int i;
 260	struct gid_entry *gids = NULL;
 
 
 261
 262	if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
 263		return -EINVAL;
 264
 265	if (attr->port_num > MLX4_MAX_PORTS)
 266		return -EINVAL;
 267
 268	if (!context)
 269		return -EINVAL;
 270
 
 
 
 271	port_gid_table = &iboe->gids[attr->port_num - 1];
 272	spin_lock_bh(&iboe->lock);
 273	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
 274		if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
 275		    (port_gid_table->gids[i].gid_type == attr->gid_type))  {
 
 
 276			found = i;
 277			break;
 278		}
 279		if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid)))
 280			free = i; /* HW has space */
 281	}
 282
 283	if (found < 0) {
 284		if (free < 0) {
 285			ret = -ENOSPC;
 286		} else {
 287			port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC);
 288			if (!port_gid_table->gids[free].ctx) {
 289				ret = -ENOMEM;
 290			} else {
 291				*context = port_gid_table->gids[free].ctx;
 292				memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
 293				port_gid_table->gids[free].gid_type = attr->gid_type;
 
 294				port_gid_table->gids[free].ctx->real_index = free;
 295				port_gid_table->gids[free].ctx->refcount = 1;
 296				hw_update = 1;
 297			}
 298		}
 299	} else {
 300		struct gid_cache_context *ctx = port_gid_table->gids[found].ctx;
 301		*context = ctx;
 302		ctx->refcount++;
 303	}
 304	if (!ret && hw_update) {
 305		gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
 
 306		if (!gids) {
 307			ret = -ENOMEM;
 
 
 308		} else {
 309			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
 310				memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
 311				gids[i].gid_type = port_gid_table->gids[i].gid_type;
 312			}
 313		}
 314	}
 315	spin_unlock_bh(&iboe->lock);
 316
 317	if (!ret && hw_update) {
 318		ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
 
 
 
 
 
 
 319		kfree(gids);
 320	}
 321
 322	return ret;
 323}
 324
 325static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
 326{
 327	struct gid_cache_context *ctx = *context;
 328	struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
 329	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
 330	struct mlx4_port_gid_table   *port_gid_table;
 331	int ret = 0;
 332	int hw_update = 0;
 333	struct gid_entry *gids = NULL;
 334
 335	if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
 336		return -EINVAL;
 337
 338	if (attr->port_num > MLX4_MAX_PORTS)
 339		return -EINVAL;
 340
 341	port_gid_table = &iboe->gids[attr->port_num - 1];
 342	spin_lock_bh(&iboe->lock);
 343	if (ctx) {
 344		ctx->refcount--;
 345		if (!ctx->refcount) {
 346			unsigned int real_index = ctx->real_index;
 347
 348			memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid));
 349			kfree(port_gid_table->gids[real_index].ctx);
 350			port_gid_table->gids[real_index].ctx = NULL;
 351			hw_update = 1;
 352		}
 353	}
 354	if (!ret && hw_update) {
 355		int i;
 356
 357		gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
 
 358		if (!gids) {
 359			ret = -ENOMEM;
 360		} else {
 361			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
 362				memcpy(&gids[i].gid,
 363				       &port_gid_table->gids[i].gid,
 364				       sizeof(union ib_gid));
 365				gids[i].gid_type =
 366				    port_gid_table->gids[i].gid_type;
 367			}
 368		}
 369	}
 370	spin_unlock_bh(&iboe->lock);
 371
 372	if (!ret && hw_update) {
 373		ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
 374		kfree(gids);
 375	}
 376	return ret;
 377}
 378
 379int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
 380				    u8 port_num, int index)
 381{
 382	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
 383	struct gid_cache_context *ctx = NULL;
 384	union ib_gid gid;
 385	struct mlx4_port_gid_table   *port_gid_table;
 386	int real_index = -EINVAL;
 387	int i;
 388	int ret;
 389	unsigned long flags;
 390	struct ib_gid_attr attr;
 391
 392	if (port_num > MLX4_MAX_PORTS)
 393		return -EINVAL;
 394
 395	if (mlx4_is_bonded(ibdev->dev))
 396		port_num = 1;
 397
 398	if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
 399		return index;
 400
 401	ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
 402	if (ret)
 403		return ret;
 404
 405	if (attr.ndev)
 406		dev_put(attr.ndev);
 407
 408	spin_lock_irqsave(&iboe->lock, flags);
 409	port_gid_table = &iboe->gids[port_num - 1];
 410
 411	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
 412		if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
 413		    attr.gid_type == port_gid_table->gids[i].gid_type) {
 
 414			ctx = port_gid_table->gids[i].ctx;
 415			break;
 416		}
 417	if (ctx)
 418		real_index = ctx->real_index;
 419	spin_unlock_irqrestore(&iboe->lock, flags);
 420	return real_index;
 421}
 422
 423#define field_avail(type, fld, sz) (offsetof(type, fld) + \
 424				    sizeof(((type *)0)->fld) <= (sz))
 425
 426static int mlx4_ib_query_device(struct ib_device *ibdev,
 427				struct ib_device_attr *props,
 428				struct ib_udata *uhw)
 429{
 430	struct mlx4_ib_dev *dev = to_mdev(ibdev);
 431	struct ib_smp *in_mad  = NULL;
 432	struct ib_smp *out_mad = NULL;
 433	int err;
 434	int have_ib_ports;
 435	struct mlx4_uverbs_ex_query_device cmd;
 436	struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
 437	struct mlx4_clock_params clock_params;
 438
 439	if (uhw->inlen) {
 440		if (uhw->inlen < sizeof(cmd))
 441			return -EINVAL;
 442
 443		err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd));
 444		if (err)
 445			return err;
 446
 447		if (cmd.comp_mask)
 448			return -EINVAL;
 449
 450		if (cmd.reserved)
 451			return -EINVAL;
 452	}
 453
 454	resp.response_length = offsetof(typeof(resp), response_length) +
 455		sizeof(resp.response_length);
 456	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 457	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 458	err = -ENOMEM;
 459	if (!in_mad || !out_mad)
 460		goto out;
 461
 462	init_query_mad(in_mad);
 463	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
 464
 465	err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
 466			   1, NULL, NULL, in_mad, out_mad);
 467	if (err)
 468		goto out;
 469
 470	memset(props, 0, sizeof *props);
 471
 472	have_ib_ports = num_ib_ports(dev->dev);
 473
 474	props->fw_ver = dev->dev->caps.fw_ver;
 475	props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
 476		IB_DEVICE_PORT_ACTIVE_EVENT		|
 477		IB_DEVICE_SYS_IMAGE_GUID		|
 478		IB_DEVICE_RC_RNR_NAK_GEN		|
 479		IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
 480	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
 481		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
 482	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
 483		props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
 484	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports)
 485		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
 486	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
 487		props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
 488	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
 489		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
 490	if (dev->dev->caps.max_gso_sz &&
 491	    (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
 492	    (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
 493		props->device_cap_flags |= IB_DEVICE_UD_TSO;
 494	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
 495		props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
 496	if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
 497	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
 498	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
 499		props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
 500	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
 501		props->device_cap_flags |= IB_DEVICE_XRC;
 502	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
 503		props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
 504	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
 505		if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
 506			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
 507		else
 508			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
 509	}
 510	if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
 511		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
 512
 513	props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
 514
 515	props->vendor_id	   = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
 516		0xffffff;
 517	props->vendor_part_id	   = dev->dev->persist->pdev->device;
 518	props->hw_ver		   = be32_to_cpup((__be32 *) (out_mad->data + 32));
 519	memcpy(&props->sys_image_guid, out_mad->data +	4, 8);
 520
 521	props->max_mr_size	   = ~0ull;
 522	props->page_size_cap	   = dev->dev->caps.page_size_cap;
 523	props->max_qp		   = dev->dev->quotas.qp;
 524	props->max_qp_wr	   = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
 525	props->max_sge		   = min(dev->dev->caps.max_sq_sg,
 526					 dev->dev->caps.max_rq_sg);
 527	props->max_sge_rd	   = MLX4_MAX_SGE_RD;
 
 
 528	props->max_cq		   = dev->dev->quotas.cq;
 529	props->max_cqe		   = dev->dev->caps.max_cqes;
 530	props->max_mr		   = dev->dev->quotas.mpt;
 531	props->max_pd		   = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
 532	props->max_qp_rd_atom	   = dev->dev->caps.max_qp_dest_rdma;
 533	props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
 534	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
 535	props->max_srq		   = dev->dev->quotas.srq;
 536	props->max_srq_wr	   = dev->dev->caps.max_srq_wqes - 1;
 537	props->max_srq_sge	   = dev->dev->caps.max_srq_sge;
 538	props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
 539	props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
 540	props->atomic_cap	   = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
 541		IB_ATOMIC_HCA : IB_ATOMIC_NONE;
 542	props->masked_atomic_cap   = props->atomic_cap;
 543	props->max_pkeys	   = dev->dev->caps.pkey_table_len[1];
 544	props->max_mcast_grp	   = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
 545	props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
 546	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 547					   props->max_mcast_grp;
 548	props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
 549	props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
 550	props->timestamp_mask = 0xFFFFFFFFFFFFULL;
 551	props->max_ah = INT_MAX;
 552
 553	if (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
 554	    mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET) {
 555		if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
 556			props->rss_caps.max_rwq_indirection_tables =
 557				props->max_qp;
 558			props->rss_caps.max_rwq_indirection_table_size =
 559				dev->dev->caps.max_rss_tbl_sz;
 560			props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
 561			props->max_wq_type_rq = props->max_qp;
 562		}
 563
 564		if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)
 565			props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
 566	}
 567
 568	props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT;
 569	props->cq_caps.max_cq_moderation_period = MLX4_MAX_CQ_PERIOD;
 570
 571	if (!mlx4_is_slave(dev->dev))
 572		err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
 573
 574	if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
 575		resp.response_length += sizeof(resp.hca_core_clock_offset);
 576		if (!err && !mlx4_is_slave(dev->dev)) {
 577			resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET;
 578			resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
 579		}
 580	}
 581
 582	if (uhw->outlen >= resp.response_length +
 583	    sizeof(resp.max_inl_recv_sz)) {
 584		resp.response_length += sizeof(resp.max_inl_recv_sz);
 585		resp.max_inl_recv_sz  = dev->dev->caps.max_rq_sg *
 586			sizeof(struct mlx4_wqe_data_seg);
 587	}
 588
 589	if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
 590		if (props->rss_caps.supported_qpts) {
 591			resp.rss_caps.rx_hash_function =
 592				MLX4_IB_RX_HASH_FUNC_TOEPLITZ;
 593
 594			resp.rss_caps.rx_hash_fields_mask =
 595				MLX4_IB_RX_HASH_SRC_IPV4 |
 596				MLX4_IB_RX_HASH_DST_IPV4 |
 597				MLX4_IB_RX_HASH_SRC_IPV6 |
 598				MLX4_IB_RX_HASH_DST_IPV6 |
 599				MLX4_IB_RX_HASH_SRC_PORT_TCP |
 600				MLX4_IB_RX_HASH_DST_PORT_TCP |
 601				MLX4_IB_RX_HASH_SRC_PORT_UDP |
 602				MLX4_IB_RX_HASH_DST_PORT_UDP;
 603
 604			if (dev->dev->caps.tunnel_offload_mode ==
 605			    MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
 606				resp.rss_caps.rx_hash_fields_mask |=
 607					MLX4_IB_RX_HASH_INNER;
 608		}
 609		resp.response_length = offsetof(typeof(resp), rss_caps) +
 610				       sizeof(resp.rss_caps);
 611	}
 612
 613	if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
 614		if (dev->dev->caps.max_gso_sz &&
 615		    ((mlx4_ib_port_link_layer(ibdev, 1) ==
 616		    IB_LINK_LAYER_ETHERNET) ||
 617		    (mlx4_ib_port_link_layer(ibdev, 2) ==
 618		    IB_LINK_LAYER_ETHERNET))) {
 619			resp.tso_caps.max_tso = dev->dev->caps.max_gso_sz;
 620			resp.tso_caps.supported_qpts |=
 621				1 << IB_QPT_RAW_PACKET;
 622		}
 623		resp.response_length = offsetof(typeof(resp), tso_caps) +
 624				       sizeof(resp.tso_caps);
 625	}
 626
 627	if (uhw->outlen) {
 628		err = ib_copy_to_udata(uhw, &resp, resp.response_length);
 629		if (err)
 630			goto out;
 631	}
 632out:
 633	kfree(in_mad);
 634	kfree(out_mad);
 635
 636	return err;
 637}
 638
 639static enum rdma_link_layer
 640mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
 641{
 642	struct mlx4_dev *dev = to_mdev(device)->dev;
 643
 644	return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
 645		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
 646}
 647
 648static int ib_link_query_port(struct ib_device *ibdev, u8 port,
 649			      struct ib_port_attr *props, int netw_view)
 650{
 651	struct ib_smp *in_mad  = NULL;
 652	struct ib_smp *out_mad = NULL;
 653	int ext_active_speed;
 654	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
 655	int err = -ENOMEM;
 656
 657	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 658	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 659	if (!in_mad || !out_mad)
 660		goto out;
 661
 662	init_query_mad(in_mad);
 663	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
 664	in_mad->attr_mod = cpu_to_be32(port);
 665
 666	if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
 667		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
 668
 669	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
 670				in_mad, out_mad);
 671	if (err)
 672		goto out;
 673
 674
 675	props->lid		= be16_to_cpup((__be16 *) (out_mad->data + 16));
 676	props->lmc		= out_mad->data[34] & 0x7;
 677	props->sm_lid		= be16_to_cpup((__be16 *) (out_mad->data + 18));
 678	props->sm_sl		= out_mad->data[36] & 0xf;
 679	props->state		= out_mad->data[32] & 0xf;
 680	props->phys_state	= out_mad->data[33] >> 4;
 681	props->port_cap_flags	= be32_to_cpup((__be32 *) (out_mad->data + 20));
 682	if (netw_view)
 683		props->gid_tbl_len = out_mad->data[50];
 684	else
 685		props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
 686	props->max_msg_sz	= to_mdev(ibdev)->dev->caps.max_msg_sz;
 687	props->pkey_tbl_len	= to_mdev(ibdev)->dev->caps.pkey_table_len[port];
 688	props->bad_pkey_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 46));
 689	props->qkey_viol_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 48));
 690	props->active_width	= out_mad->data[31] & 0xf;
 691	props->active_speed	= out_mad->data[35] >> 4;
 692	props->max_mtu		= out_mad->data[41] & 0xf;
 693	props->active_mtu	= out_mad->data[36] >> 4;
 694	props->subnet_timeout	= out_mad->data[51] & 0x1f;
 695	props->max_vl_num	= out_mad->data[37] >> 4;
 696	props->init_type_reply	= out_mad->data[41] >> 4;
 697
 698	/* Check if extended speeds (EDR/FDR/...) are supported */
 699	if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
 700		ext_active_speed = out_mad->data[62] >> 4;
 701
 702		switch (ext_active_speed) {
 703		case 1:
 704			props->active_speed = IB_SPEED_FDR;
 705			break;
 706		case 2:
 707			props->active_speed = IB_SPEED_EDR;
 708			break;
 709		}
 710	}
 711
 712	/* If reported active speed is QDR, check if is FDR-10 */
 713	if (props->active_speed == IB_SPEED_QDR) {
 714		init_query_mad(in_mad);
 715		in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
 716		in_mad->attr_mod = cpu_to_be32(port);
 717
 718		err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
 719				   NULL, NULL, in_mad, out_mad);
 720		if (err)
 721			goto out;
 722
 723		/* Checking LinkSpeedActive for FDR-10 */
 724		if (out_mad->data[15] & 0x1)
 725			props->active_speed = IB_SPEED_FDR10;
 726	}
 727
 728	/* Avoid wrong speed value returned by FW if the IB link is down. */
 729	if (props->state == IB_PORT_DOWN)
 730		 props->active_speed = IB_SPEED_SDR;
 731
 732out:
 733	kfree(in_mad);
 734	kfree(out_mad);
 735	return err;
 736}
 737
 738static u8 state_to_phys_state(enum ib_port_state state)
 739{
 740	return state == IB_PORT_ACTIVE ? 5 : 3;
 
 741}
 742
 743static int eth_link_query_port(struct ib_device *ibdev, u8 port,
 744			       struct ib_port_attr *props)
 745{
 746
 747	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
 748	struct mlx4_ib_iboe *iboe = &mdev->iboe;
 749	struct net_device *ndev;
 750	enum ib_mtu tmp;
 751	struct mlx4_cmd_mailbox *mailbox;
 752	int err = 0;
 753	int is_bonded = mlx4_is_bonded(mdev->dev);
 754
 755	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
 756	if (IS_ERR(mailbox))
 757		return PTR_ERR(mailbox);
 758
 759	err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
 760			   MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
 761			   MLX4_CMD_WRAPPED);
 762	if (err)
 763		goto out;
 764
 765	props->active_width	=  (((u8 *)mailbox->buf)[5] == 0x40) ||
 766				   (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
 767					   IB_WIDTH_4X : IB_WIDTH_1X;
 768	props->active_speed	=  (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
 769					   IB_SPEED_FDR : IB_SPEED_QDR;
 770	props->port_cap_flags	= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
 
 771	props->gid_tbl_len	= mdev->dev->caps.gid_table_len[port];
 772	props->max_msg_sz	= mdev->dev->caps.max_msg_sz;
 773	props->pkey_tbl_len	= 1;
 
 774	props->max_mtu		= IB_MTU_4096;
 775	props->max_vl_num	= 2;
 776	props->state		= IB_PORT_DOWN;
 777	props->phys_state	= state_to_phys_state(props->state);
 778	props->active_mtu	= IB_MTU_256;
 779	spin_lock_bh(&iboe->lock);
 780	ndev = iboe->netdevs[port - 1];
 781	if (ndev && is_bonded) {
 782		rcu_read_lock(); /* required to get upper dev */
 783		ndev = netdev_master_upper_dev_get_rcu(ndev);
 784		rcu_read_unlock();
 785	}
 786	if (!ndev)
 787		goto out_unlock;
 788
 789	tmp = iboe_get_mtu(ndev->mtu);
 790	props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
 791
 792	props->state		= (netif_running(ndev) && netif_carrier_ok(ndev)) ?
 793					IB_PORT_ACTIVE : IB_PORT_DOWN;
 794	props->phys_state	= state_to_phys_state(props->state);
 795out_unlock:
 796	spin_unlock_bh(&iboe->lock);
 797out:
 798	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
 799	return err;
 800}
 801
 802int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
 803			 struct ib_port_attr *props, int netw_view)
 804{
 805	int err;
 806
 807	/* props being zeroed by the caller, avoid zeroing it here */
 808
 809	err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
 810		ib_link_query_port(ibdev, port, props, netw_view) :
 811				eth_link_query_port(ibdev, port, props);
 812
 813	return err;
 814}
 815
 816static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
 817			      struct ib_port_attr *props)
 818{
 819	/* returns host view */
 820	return __mlx4_ib_query_port(ibdev, port, props, 0);
 821}
 822
 823int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
 824			union ib_gid *gid, int netw_view)
 825{
 826	struct ib_smp *in_mad  = NULL;
 827	struct ib_smp *out_mad = NULL;
 828	int err = -ENOMEM;
 829	struct mlx4_ib_dev *dev = to_mdev(ibdev);
 830	int clear = 0;
 831	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
 832
 833	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 834	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 835	if (!in_mad || !out_mad)
 836		goto out;
 837
 838	init_query_mad(in_mad);
 839	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
 840	in_mad->attr_mod = cpu_to_be32(port);
 841
 842	if (mlx4_is_mfunc(dev->dev) && netw_view)
 843		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
 844
 845	err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
 846	if (err)
 847		goto out;
 848
 849	memcpy(gid->raw, out_mad->data + 8, 8);
 850
 851	if (mlx4_is_mfunc(dev->dev) && !netw_view) {
 852		if (index) {
 853			/* For any index > 0, return the null guid */
 854			err = 0;
 855			clear = 1;
 856			goto out;
 857		}
 858	}
 859
 860	init_query_mad(in_mad);
 861	in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
 862	in_mad->attr_mod = cpu_to_be32(index / 8);
 863
 864	err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
 865			   NULL, NULL, in_mad, out_mad);
 866	if (err)
 867		goto out;
 868
 869	memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
 870
 871out:
 872	if (clear)
 873		memset(gid->raw + 8, 0, 8);
 874	kfree(in_mad);
 875	kfree(out_mad);
 876	return err;
 877}
 878
 879static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
 880			     union ib_gid *gid)
 881{
 882	if (rdma_protocol_ib(ibdev, port))
 883		return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
 884	return 0;
 885}
 886
 887static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
 
 888{
 889	union sl2vl_tbl_to_u64 sl2vl64;
 890	struct ib_smp *in_mad  = NULL;
 891	struct ib_smp *out_mad = NULL;
 892	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
 893	int err = -ENOMEM;
 894	int jj;
 895
 896	if (mlx4_is_slave(to_mdev(ibdev)->dev)) {
 897		*sl2vl_tbl = 0;
 898		return 0;
 899	}
 900
 901	in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
 902	out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
 903	if (!in_mad || !out_mad)
 904		goto out;
 905
 906	init_query_mad(in_mad);
 907	in_mad->attr_id  = IB_SMP_ATTR_SL_TO_VL_TABLE;
 908	in_mad->attr_mod = 0;
 909
 910	if (mlx4_is_mfunc(to_mdev(ibdev)->dev))
 911		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
 912
 913	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
 914			   in_mad, out_mad);
 915	if (err)
 916		goto out;
 917
 918	for (jj = 0; jj < 8; jj++)
 919		sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj];
 920	*sl2vl_tbl = sl2vl64.sl64;
 921
 922out:
 923	kfree(in_mad);
 924	kfree(out_mad);
 925	return err;
 926}
 927
 928static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
 929{
 930	u64 sl2vl;
 931	int i;
 932	int err;
 933
 934	for (i = 1; i <= mdev->dev->caps.num_ports; i++) {
 935		if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
 936			continue;
 937		err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl);
 938		if (err) {
 939			pr_err("Unable to get default sl to vl mapping for port %d.  Using all zeroes (%d)\n",
 940			       i, err);
 941			sl2vl = 0;
 942		}
 943		atomic64_set(&mdev->sl2vl[i - 1], sl2vl);
 944	}
 945}
 946
 947int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 948			 u16 *pkey, int netw_view)
 949{
 950	struct ib_smp *in_mad  = NULL;
 951	struct ib_smp *out_mad = NULL;
 952	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
 953	int err = -ENOMEM;
 954
 955	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 956	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 957	if (!in_mad || !out_mad)
 958		goto out;
 959
 960	init_query_mad(in_mad);
 961	in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
 962	in_mad->attr_mod = cpu_to_be32(index / 32);
 963
 964	if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
 965		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
 966
 967	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
 968			   in_mad, out_mad);
 969	if (err)
 970		goto out;
 971
 972	*pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
 973
 974out:
 975	kfree(in_mad);
 976	kfree(out_mad);
 977	return err;
 978}
 979
 980static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
 
 981{
 982	return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
 983}
 984
 985static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
 986				 struct ib_device_modify *props)
 987{
 988	struct mlx4_cmd_mailbox *mailbox;
 989	unsigned long flags;
 990
 991	if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
 992		return -EOPNOTSUPP;
 993
 994	if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
 995		return 0;
 996
 997	if (mlx4_is_slave(to_mdev(ibdev)->dev))
 998		return -EOPNOTSUPP;
 999
1000	spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
1001	memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
1002	spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
1003
1004	/*
1005	 * If possible, pass node desc to FW, so it can generate
1006	 * a 144 trap.  If cmd fails, just ignore.
1007	 */
1008	mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
1009	if (IS_ERR(mailbox))
1010		return 0;
1011
1012	memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
1013	mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
1014		 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
1015
1016	mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
1017
1018	return 0;
1019}
1020
1021static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
1022			    u32 cap_mask)
1023{
1024	struct mlx4_cmd_mailbox *mailbox;
1025	int err;
1026
1027	mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
1028	if (IS_ERR(mailbox))
1029		return PTR_ERR(mailbox);
1030
1031	if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
1032		*(u8 *) mailbox->buf	     = !!reset_qkey_viols << 6;
1033		((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
1034	} else {
1035		((u8 *) mailbox->buf)[3]     = !!reset_qkey_viols;
1036		((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
1037	}
1038
1039	err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE,
1040		       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
1041		       MLX4_CMD_WRAPPED);
1042
1043	mlx4_free_cmd_mailbox(dev->dev, mailbox);
1044	return err;
1045}
1046
1047static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
1048			       struct ib_port_modify *props)
1049{
1050	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
1051	u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
1052	struct ib_port_attr attr;
1053	u32 cap_mask;
1054	int err;
1055
1056	/* return OK if this is RoCE. CM calls ib_modify_port() regardless
1057	 * of whether port link layer is ETH or IB. For ETH ports, qkey
1058	 * violations and port capabilities are not meaningful.
1059	 */
1060	if (is_eth)
1061		return 0;
1062
1063	mutex_lock(&mdev->cap_mask_mutex);
1064
1065	err = ib_query_port(ibdev, port, &attr);
1066	if (err)
1067		goto out;
1068
1069	cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
1070		~props->clr_port_cap_mask;
1071
1072	err = mlx4_ib_SET_PORT(mdev, port,
1073			       !!(mask & IB_PORT_RESET_QKEY_CNTR),
1074			       cap_mask);
1075
1076out:
1077	mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
1078	return err;
1079}
1080
1081static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
1082						  struct ib_udata *udata)
1083{
 
1084	struct mlx4_ib_dev *dev = to_mdev(ibdev);
1085	struct mlx4_ib_ucontext *context;
1086	struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
1087	struct mlx4_ib_alloc_ucontext_resp resp;
1088	int err;
1089
1090	if (!dev->ib_active)
1091		return ERR_PTR(-EAGAIN);
1092
1093	if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
 
1094		resp_v3.qp_tab_size      = dev->dev->caps.num_qps;
1095		resp_v3.bf_reg_size      = dev->dev->caps.bf_reg_size;
1096		resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
1097	} else {
1098		resp.dev_caps	      = dev->dev->caps.userspace_caps;
1099		resp.qp_tab_size      = dev->dev->caps.num_qps;
1100		resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
1101		resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
1102		resp.cqe_size	      = dev->dev->caps.cqe_size;
1103	}
1104
1105	context = kzalloc(sizeof(*context), GFP_KERNEL);
1106	if (!context)
1107		return ERR_PTR(-ENOMEM);
1108
1109	err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
1110	if (err) {
1111		kfree(context);
1112		return ERR_PTR(err);
1113	}
1114
1115	INIT_LIST_HEAD(&context->db_page_list);
1116	mutex_init(&context->db_page_mutex);
1117
1118	INIT_LIST_HEAD(&context->wqn_ranges_list);
1119	mutex_init(&context->wqn_ranges_mutex);
1120
1121	if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
1122		err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
1123	else
1124		err = ib_copy_to_udata(udata, &resp, sizeof(resp));
1125
1126	if (err) {
1127		mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
1128		kfree(context);
1129		return ERR_PTR(-EFAULT);
1130	}
1131
1132	return &context->ibucontext;
1133}
1134
1135static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
1136{
1137	struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
1138
1139	mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
1140	kfree(context);
1141
1142	return 0;
1143}
1144
1145static void  mlx4_ib_vma_open(struct vm_area_struct *area)
1146{
1147	/* vma_open is called when a new VMA is created on top of our VMA.
1148	 * This is done through either mremap flow or split_vma (usually due
1149	 * to mlock, madvise, munmap, etc.). We do not support a clone of the
1150	 * vma, as this VMA is strongly hardware related. Therefore we set the
1151	 * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
1152	 * calling us again and trying to do incorrect actions. We assume that
1153	 * the original vma size is exactly a single page that there will be no
1154	 * "splitting" operations on.
1155	 */
1156	area->vm_ops = NULL;
1157}
1158
1159static void  mlx4_ib_vma_close(struct vm_area_struct *area)
1160{
1161	struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
1162
1163	/* It's guaranteed that all VMAs opened on a FD are closed before the
1164	 * file itself is closed, therefore no sync is needed with the regular
1165	 * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
1166	 * with accessing the vma as part of mlx4_ib_disassociate_ucontext.
1167	 * The close operation is usually called under mm->mmap_sem except when
1168	 * process is exiting.  The exiting case is handled explicitly as part
1169	 * of mlx4_ib_disassociate_ucontext.
1170	 */
1171	mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
1172				area->vm_private_data;
1173
1174	/* set the vma context pointer to null in the mlx4_ib driver's private
1175	 * data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
1176	 */
1177	mlx4_ib_vma_priv_data->vma = NULL;
1178}
1179
1180static const struct vm_operations_struct mlx4_ib_vm_ops = {
1181	.open = mlx4_ib_vma_open,
1182	.close = mlx4_ib_vma_close
1183};
1184
1185static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
1186{
1187	int i;
1188	int ret = 0;
1189	struct vm_area_struct *vma;
1190	struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
1191	struct task_struct *owning_process  = NULL;
1192	struct mm_struct   *owning_mm       = NULL;
1193
1194	owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
1195	if (!owning_process)
1196		return;
1197
1198	owning_mm = get_task_mm(owning_process);
1199	if (!owning_mm) {
1200		pr_info("no mm, disassociate ucontext is pending task termination\n");
1201		while (1) {
1202			/* make sure that task is dead before returning, it may
1203			 * prevent a rare case of module down in parallel to a
1204			 * call to mlx4_ib_vma_close.
1205			 */
1206			put_task_struct(owning_process);
1207			usleep_range(1000, 2000);
1208			owning_process = get_pid_task(ibcontext->tgid,
1209						      PIDTYPE_PID);
1210			if (!owning_process ||
1211			    owning_process->state == TASK_DEAD) {
1212				pr_info("disassociate ucontext done, task was terminated\n");
1213				/* in case task was dead need to release the task struct */
1214				if (owning_process)
1215					put_task_struct(owning_process);
1216				return;
1217			}
1218		}
1219	}
1220
1221	/* need to protect from a race on closing the vma as part of
1222	 * mlx4_ib_vma_close().
1223	 */
1224	down_write(&owning_mm->mmap_sem);
1225	for (i = 0; i < HW_BAR_COUNT; i++) {
1226		vma = context->hw_bar_info[i].vma;
1227		if (!vma)
1228			continue;
1229
1230		ret = zap_vma_ptes(context->hw_bar_info[i].vma,
1231				   context->hw_bar_info[i].vma->vm_start,
1232				   PAGE_SIZE);
1233		if (ret) {
1234			pr_err("Error: zap_vma_ptes failed for index=%d, ret=%d\n", i, ret);
1235			BUG_ON(1);
1236		}
1237
1238		context->hw_bar_info[i].vma->vm_flags &=
1239			~(VM_SHARED | VM_MAYSHARE);
1240		/* context going to be destroyed, should not access ops any more */
1241		context->hw_bar_info[i].vma->vm_ops = NULL;
1242	}
1243
1244	up_write(&owning_mm->mmap_sem);
1245	mmput(owning_mm);
1246	put_task_struct(owning_process);
1247}
1248
1249static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
1250				 struct mlx4_ib_vma_private_data *vma_private_data)
1251{
1252	vma_private_data->vma = vma;
1253	vma->vm_private_data = vma_private_data;
1254	vma->vm_ops =  &mlx4_ib_vm_ops;
1255}
1256
1257static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
1258{
1259	struct mlx4_ib_dev *dev = to_mdev(context->device);
1260	struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
1261
1262	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1263		return -EINVAL;
1264
1265	if (vma->vm_pgoff == 0) {
1266		/* We prevent double mmaping on same context */
1267		if (mucontext->hw_bar_info[HW_BAR_DB].vma)
1268			return -EINVAL;
1269
1270		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 
 
 
 
 
1271
1272		if (io_remap_pfn_range(vma, vma->vm_start,
1273				       to_mucontext(context)->uar.pfn,
1274				       PAGE_SIZE, vma->vm_page_prot))
1275			return -EAGAIN;
1276
1277		mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
1278
1279	} else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
1280		/* We prevent double mmaping on same context */
1281		if (mucontext->hw_bar_info[HW_BAR_BF].vma)
1282			return -EINVAL;
 
 
 
 
 
 
1283
1284		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
1285
1286		if (io_remap_pfn_range(vma, vma->vm_start,
1287				       to_mucontext(context)->uar.pfn +
1288				       dev->dev->caps.num_uars,
1289				       PAGE_SIZE, vma->vm_page_prot))
1290			return -EAGAIN;
1291
1292		mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
1293
1294	} else if (vma->vm_pgoff == 3) {
1295		struct mlx4_clock_params params;
1296		int ret;
1297
1298		/* We prevent double mmaping on same context */
1299		if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
1300			return -EINVAL;
1301
1302		ret = mlx4_get_internal_clock_params(dev->dev, &params);
1303
1304		if (ret)
1305			return ret;
1306
1307		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1308		if (io_remap_pfn_range(vma, vma->vm_start,
1309				       (pci_resource_start(dev->dev->persist->pdev,
1310							   params.bar) +
1311					params.offset)
1312				       >> PAGE_SHIFT,
1313				       PAGE_SIZE, vma->vm_page_prot))
1314			return -EAGAIN;
 
1315
1316		mlx4_ib_set_vma_data(vma,
1317				     &mucontext->hw_bar_info[HW_BAR_CLOCK]);
1318	} else {
1319		return -EINVAL;
1320	}
1321
1322	return 0;
1323}
1324
1325static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
1326				      struct ib_ucontext *context,
1327				      struct ib_udata *udata)
1328{
1329	struct mlx4_ib_pd *pd;
 
1330	int err;
1331
1332	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
1333	if (!pd)
1334		return ERR_PTR(-ENOMEM);
1335
1336	err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
1337	if (err) {
1338		kfree(pd);
1339		return ERR_PTR(err);
1340	}
1341
1342	if (context)
1343		if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
1344			mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
1345			kfree(pd);
1346			return ERR_PTR(-EFAULT);
1347		}
1348	return &pd->ibpd;
1349}
1350
1351static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
1352{
1353	mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
1354	kfree(pd);
1355
1356	return 0;
1357}
1358
1359static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
1360					  struct ib_ucontext *context,
1361					  struct ib_udata *udata)
1362{
1363	struct mlx4_ib_xrcd *xrcd;
 
1364	struct ib_cq_init_attr cq_attr = {};
1365	int err;
1366
1367	if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
1368		return ERR_PTR(-ENOSYS);
1369
1370	xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
1371	if (!xrcd)
1372		return ERR_PTR(-ENOMEM);
1373
1374	err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
1375	if (err)
1376		goto err1;
1377
1378	xrcd->pd = ib_alloc_pd(ibdev, 0);
1379	if (IS_ERR(xrcd->pd)) {
1380		err = PTR_ERR(xrcd->pd);
1381		goto err2;
1382	}
1383
1384	cq_attr.cqe = 1;
1385	xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr);
1386	if (IS_ERR(xrcd->cq)) {
1387		err = PTR_ERR(xrcd->cq);
1388		goto err3;
1389	}
1390
1391	return &xrcd->ibxrcd;
1392
1393err3:
1394	ib_dealloc_pd(xrcd->pd);
1395err2:
1396	mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
1397err1:
1398	kfree(xrcd);
1399	return ERR_PTR(err);
1400}
1401
1402static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
1403{
1404	ib_destroy_cq(to_mxrcd(xrcd)->cq);
1405	ib_dealloc_pd(to_mxrcd(xrcd)->pd);
1406	mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
1407	kfree(xrcd);
1408
1409	return 0;
1410}
1411
1412static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
1413{
1414	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1415	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1416	struct mlx4_ib_gid_entry *ge;
1417
1418	ge = kzalloc(sizeof *ge, GFP_KERNEL);
1419	if (!ge)
1420		return -ENOMEM;
1421
1422	ge->gid = *gid;
1423	if (mlx4_ib_add_mc(mdev, mqp, gid)) {
1424		ge->port = mqp->port;
1425		ge->added = 1;
1426	}
1427
1428	mutex_lock(&mqp->mutex);
1429	list_add_tail(&ge->list, &mqp->gid_list);
1430	mutex_unlock(&mqp->mutex);
1431
1432	return 0;
1433}
1434
1435static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
1436					  struct mlx4_ib_counters *ctr_table)
1437{
1438	struct counter_index *counter, *tmp_count;
1439
1440	mutex_lock(&ctr_table->mutex);
1441	list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
1442				 list) {
1443		if (counter->allocated)
1444			mlx4_counter_free(ibdev->dev, counter->index);
1445		list_del(&counter->list);
1446		kfree(counter);
1447	}
1448	mutex_unlock(&ctr_table->mutex);
1449}
1450
1451int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
1452		   union ib_gid *gid)
1453{
1454	struct net_device *ndev;
1455	int ret = 0;
1456
1457	if (!mqp->port)
1458		return 0;
1459
1460	spin_lock_bh(&mdev->iboe.lock);
1461	ndev = mdev->iboe.netdevs[mqp->port - 1];
1462	if (ndev)
1463		dev_hold(ndev);
1464	spin_unlock_bh(&mdev->iboe.lock);
1465
1466	if (ndev) {
1467		ret = 1;
1468		dev_put(ndev);
1469	}
1470
1471	return ret;
1472}
1473
1474struct mlx4_ib_steering {
1475	struct list_head list;
1476	struct mlx4_flow_reg_id reg_id;
1477	union ib_gid gid;
1478};
1479
1480#define LAST_ETH_FIELD vlan_tag
1481#define LAST_IB_FIELD sl
1482#define LAST_IPV4_FIELD dst_ip
1483#define LAST_TCP_UDP_FIELD src_port
1484
1485/* Field is the last supported field */
1486#define FIELDS_NOT_SUPPORTED(filter, field)\
1487	memchr_inv((void *)&filter.field  +\
1488		   sizeof(filter.field), 0,\
1489		   sizeof(filter) -\
1490		   offsetof(typeof(filter), field) -\
1491		   sizeof(filter.field))
1492
1493static int parse_flow_attr(struct mlx4_dev *dev,
1494			   u32 qp_num,
1495			   union ib_flow_spec *ib_spec,
1496			   struct _rule_hw *mlx4_spec)
1497{
1498	enum mlx4_net_trans_rule_id type;
1499
1500	switch (ib_spec->type) {
1501	case IB_FLOW_SPEC_ETH:
1502		if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
1503			return -ENOTSUPP;
1504
1505		type = MLX4_NET_TRANS_RULE_ID_ETH;
1506		memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
1507		       ETH_ALEN);
1508		memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
1509		       ETH_ALEN);
1510		mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
1511		mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
1512		break;
1513	case IB_FLOW_SPEC_IB:
1514		if (FIELDS_NOT_SUPPORTED(ib_spec->ib.mask, LAST_IB_FIELD))
1515			return -ENOTSUPP;
1516
1517		type = MLX4_NET_TRANS_RULE_ID_IB;
1518		mlx4_spec->ib.l3_qpn =
1519			cpu_to_be32(qp_num);
1520		mlx4_spec->ib.qpn_mask =
1521			cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
1522		break;
1523
1524
1525	case IB_FLOW_SPEC_IPV4:
1526		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
1527			return -ENOTSUPP;
1528
1529		type = MLX4_NET_TRANS_RULE_ID_IPV4;
1530		mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
1531		mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
1532		mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
1533		mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
1534		break;
1535
1536	case IB_FLOW_SPEC_TCP:
1537	case IB_FLOW_SPEC_UDP:
1538		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD))
1539			return -ENOTSUPP;
1540
1541		type = ib_spec->type == IB_FLOW_SPEC_TCP ?
1542					MLX4_NET_TRANS_RULE_ID_TCP :
1543					MLX4_NET_TRANS_RULE_ID_UDP;
1544		mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
1545		mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
1546		mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
1547		mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
1548		break;
1549
1550	default:
1551		return -EINVAL;
1552	}
1553	if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
1554	    mlx4_hw_rule_sz(dev, type) < 0)
1555		return -EINVAL;
1556	mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
1557	mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
1558	return mlx4_hw_rule_sz(dev, type);
1559}
1560
1561struct default_rules {
1562	__u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
1563	__u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
1564	__u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
1565	__u8  link_layer;
1566};
1567static const struct default_rules default_table[] = {
1568	{
1569		.mandatory_fields = {IB_FLOW_SPEC_IPV4},
1570		.mandatory_not_fields = {IB_FLOW_SPEC_ETH},
1571		.rules_create_list = {IB_FLOW_SPEC_IB},
1572		.link_layer = IB_LINK_LAYER_INFINIBAND
1573	}
1574};
1575
1576static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
1577					 struct ib_flow_attr *flow_attr)
1578{
1579	int i, j, k;
1580	void *ib_flow;
1581	const struct default_rules *pdefault_rules = default_table;
1582	u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
1583
1584	for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
1585		__u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
1586		memset(&field_types, 0, sizeof(field_types));
1587
1588		if (link_layer != pdefault_rules->link_layer)
1589			continue;
1590
1591		ib_flow = flow_attr + 1;
1592		/* we assume the specs are sorted */
1593		for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
1594		     j < flow_attr->num_of_specs; k++) {
1595			union ib_flow_spec *current_flow =
1596				(union ib_flow_spec *)ib_flow;
1597
1598			/* same layer but different type */
1599			if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
1600			     (pdefault_rules->mandatory_fields[k] &
1601			      IB_FLOW_SPEC_LAYER_MASK)) &&
1602			    (current_flow->type !=
1603			     pdefault_rules->mandatory_fields[k]))
1604				goto out;
1605
1606			/* same layer, try match next one */
1607			if (current_flow->type ==
1608			    pdefault_rules->mandatory_fields[k]) {
1609				j++;
1610				ib_flow +=
1611					((union ib_flow_spec *)ib_flow)->size;
1612			}
1613		}
1614
1615		ib_flow = flow_attr + 1;
1616		for (j = 0; j < flow_attr->num_of_specs;
1617		     j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
1618			for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
1619				/* same layer and same type */
1620				if (((union ib_flow_spec *)ib_flow)->type ==
1621				    pdefault_rules->mandatory_not_fields[k])
1622					goto out;
1623
1624		return i;
1625	}
1626out:
1627	return -1;
1628}
1629
1630static int __mlx4_ib_create_default_rules(
1631		struct mlx4_ib_dev *mdev,
1632		struct ib_qp *qp,
1633		const struct default_rules *pdefault_rules,
1634		struct _rule_hw *mlx4_spec) {
1635	int size = 0;
1636	int i;
1637
1638	for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
 
1639		int ret;
1640		union ib_flow_spec ib_spec;
1641		switch (pdefault_rules->rules_create_list[i]) {
1642		case 0:
1643			/* no rule */
1644			continue;
1645		case IB_FLOW_SPEC_IB:
1646			ib_spec.type = IB_FLOW_SPEC_IB;
1647			ib_spec.size = sizeof(struct ib_flow_spec_ib);
1648
1649			break;
1650		default:
1651			/* invalid rule */
1652			return -EINVAL;
1653		}
1654		/* We must put empty rule, qpn is being ignored */
1655		ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
1656				      mlx4_spec);
1657		if (ret < 0) {
1658			pr_info("invalid parsing\n");
1659			return -EINVAL;
1660		}
1661
1662		mlx4_spec = (void *)mlx4_spec + ret;
1663		size += ret;
1664	}
1665	return size;
1666}
1667
1668static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
1669			  int domain,
1670			  enum mlx4_net_trans_promisc_mode flow_type,
1671			  u64 *reg_id)
1672{
1673	int ret, i;
1674	int size = 0;
1675	void *ib_flow;
1676	struct mlx4_ib_dev *mdev = to_mdev(qp->device);
1677	struct mlx4_cmd_mailbox *mailbox;
1678	struct mlx4_net_trans_rule_hw_ctrl *ctrl;
1679	int default_flow;
1680
1681	static const u16 __mlx4_domain[] = {
1682		[IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
1683		[IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
1684		[IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
1685		[IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
1686	};
1687
1688	if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
1689		pr_err("Invalid priority value %d\n", flow_attr->priority);
1690		return -EINVAL;
1691	}
1692
1693	if (domain >= IB_FLOW_DOMAIN_NUM) {
1694		pr_err("Invalid domain value %d\n", domain);
1695		return -EINVAL;
1696	}
1697
1698	if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
1699		return -EINVAL;
1700
1701	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
1702	if (IS_ERR(mailbox))
1703		return PTR_ERR(mailbox);
1704	ctrl = mailbox->buf;
1705
1706	ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
1707				 flow_attr->priority);
1708	ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
1709	ctrl->port = flow_attr->port;
1710	ctrl->qpn = cpu_to_be32(qp->qp_num);
1711
1712	ib_flow = flow_attr + 1;
1713	size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
1714	/* Add default flows */
1715	default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
1716	if (default_flow >= 0) {
1717		ret = __mlx4_ib_create_default_rules(
1718				mdev, qp, default_table + default_flow,
1719				mailbox->buf + size);
1720		if (ret < 0) {
1721			mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1722			return -EINVAL;
1723		}
1724		size += ret;
1725	}
1726	for (i = 0; i < flow_attr->num_of_specs; i++) {
1727		ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
1728				      mailbox->buf + size);
1729		if (ret < 0) {
1730			mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1731			return -EINVAL;
1732		}
1733		ib_flow += ((union ib_flow_spec *) ib_flow)->size;
1734		size += ret;
1735	}
1736
1737	if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR &&
1738	    flow_attr->num_of_specs == 1) {
1739		struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1);
1740		enum ib_flow_spec_type header_spec =
1741			((union ib_flow_spec *)(flow_attr + 1))->type;
1742
1743		if (header_spec == IB_FLOW_SPEC_ETH)
1744			mlx4_handle_eth_header_mcast_prio(ctrl, rule_header);
1745	}
1746
1747	ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
1748			   MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
1749			   MLX4_CMD_NATIVE);
1750	if (ret == -ENOMEM)
1751		pr_err("mcg table is full. Fail to register network rule.\n");
1752	else if (ret == -ENXIO)
1753		pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
1754	else if (ret)
1755		pr_err("Invalid argument. Fail to register network rule.\n");
1756
1757	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1758	return ret;
1759}
1760
1761static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
1762{
1763	int err;
1764	err = mlx4_cmd(dev, reg_id, 0, 0,
1765		       MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
1766		       MLX4_CMD_NATIVE);
1767	if (err)
1768		pr_err("Fail to detach network rule. registration id = 0x%llx\n",
1769		       reg_id);
1770	return err;
1771}
1772
1773static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
1774				    u64 *reg_id)
1775{
1776	void *ib_flow;
1777	union ib_flow_spec *ib_spec;
1778	struct mlx4_dev	*dev = to_mdev(qp->device)->dev;
1779	int err = 0;
1780
1781	if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN ||
1782	    dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC)
1783		return 0; /* do nothing */
1784
1785	ib_flow = flow_attr + 1;
1786	ib_spec = (union ib_flow_spec *)ib_flow;
1787
1788	if (ib_spec->type !=  IB_FLOW_SPEC_ETH || flow_attr->num_of_specs != 1)
1789		return 0; /* do nothing */
1790
1791	err = mlx4_tunnel_steer_add(to_mdev(qp->device)->dev, ib_spec->eth.val.dst_mac,
1792				    flow_attr->port, qp->qp_num,
1793				    MLX4_DOMAIN_UVERBS | (flow_attr->priority & 0xff),
1794				    reg_id);
1795	return err;
1796}
1797
1798static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
1799				      struct ib_flow_attr *flow_attr,
1800				      enum mlx4_net_trans_promisc_mode *type)
1801{
1802	int err = 0;
1803
1804	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
1805	    (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
1806	    (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
1807		return -EOPNOTSUPP;
1808	}
1809
1810	if (flow_attr->num_of_specs == 0) {
1811		type[0] = MLX4_FS_MC_SNIFFER;
1812		type[1] = MLX4_FS_UC_SNIFFER;
1813	} else {
1814		union ib_flow_spec *ib_spec;
1815
1816		ib_spec = (union ib_flow_spec *)(flow_attr + 1);
1817		if (ib_spec->type !=  IB_FLOW_SPEC_ETH)
1818			return -EINVAL;
1819
1820		/* if all is zero than MC and UC */
1821		if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
1822			type[0] = MLX4_FS_MC_SNIFFER;
1823			type[1] = MLX4_FS_UC_SNIFFER;
1824		} else {
1825			u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
1826					    ib_spec->eth.mask.dst_mac[1],
1827					    ib_spec->eth.mask.dst_mac[2],
1828					    ib_spec->eth.mask.dst_mac[3],
1829					    ib_spec->eth.mask.dst_mac[4],
1830					    ib_spec->eth.mask.dst_mac[5]};
1831
1832			/* Above xor was only on MC bit, non empty mask is valid
1833			 * only if this bit is set and rest are zero.
1834			 */
1835			if (!is_zero_ether_addr(&mac[0]))
1836				return -EINVAL;
1837
1838			if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
1839				type[0] = MLX4_FS_MC_SNIFFER;
1840			else
1841				type[0] = MLX4_FS_UC_SNIFFER;
1842		}
1843	}
1844
1845	return err;
1846}
1847
1848static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
1849				    struct ib_flow_attr *flow_attr,
1850				    int domain)
1851{
1852	int err = 0, i = 0, j = 0;
1853	struct mlx4_ib_flow *mflow;
1854	enum mlx4_net_trans_promisc_mode type[2];
1855	struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
1856	int is_bonded = mlx4_is_bonded(dev);
1857
1858	if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
1859		return ERR_PTR(-EINVAL);
1860
1861	if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)
1862		return ERR_PTR(-EOPNOTSUPP);
1863
1864	if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
1865	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
1866		return ERR_PTR(-EOPNOTSUPP);
1867
 
 
 
 
1868	memset(type, 0, sizeof(type));
1869
1870	mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
1871	if (!mflow) {
1872		err = -ENOMEM;
1873		goto err_free;
1874	}
1875
1876	switch (flow_attr->type) {
1877	case IB_FLOW_ATTR_NORMAL:
1878		/* If dont trap flag (continue match) is set, under specific
1879		 * condition traffic be replicated to given qp,
1880		 * without stealing it
1881		 */
1882		if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
1883			err = mlx4_ib_add_dont_trap_rule(dev,
1884							 flow_attr,
1885							 type);
1886			if (err)
1887				goto err_free;
1888		} else {
1889			type[0] = MLX4_FS_REGULAR;
1890		}
1891		break;
1892
1893	case IB_FLOW_ATTR_ALL_DEFAULT:
1894		type[0] = MLX4_FS_ALL_DEFAULT;
1895		break;
1896
1897	case IB_FLOW_ATTR_MC_DEFAULT:
1898		type[0] = MLX4_FS_MC_DEFAULT;
1899		break;
1900
1901	case IB_FLOW_ATTR_SNIFFER:
1902		type[0] = MLX4_FS_MIRROR_RX_PORT;
1903		type[1] = MLX4_FS_MIRROR_SX_PORT;
1904		break;
1905
1906	default:
1907		err = -EINVAL;
1908		goto err_free;
1909	}
1910
1911	while (i < ARRAY_SIZE(type) && type[i]) {
1912		err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
1913					    &mflow->reg_id[i].id);
1914		if (err)
1915			goto err_create_flow;
1916		if (is_bonded) {
1917			/* Application always sees one port so the mirror rule
1918			 * must be on port #2
1919			 */
1920			flow_attr->port = 2;
1921			err = __mlx4_ib_create_flow(qp, flow_attr,
1922						    domain, type[j],
1923						    &mflow->reg_id[j].mirror);
1924			flow_attr->port = 1;
1925			if (err)
1926				goto err_create_flow;
1927			j++;
1928		}
1929
1930		i++;
1931	}
1932
1933	if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
1934		err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1935					       &mflow->reg_id[i].id);
1936		if (err)
1937			goto err_create_flow;
1938
1939		if (is_bonded) {
1940			flow_attr->port = 2;
1941			err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1942						       &mflow->reg_id[j].mirror);
1943			flow_attr->port = 1;
1944			if (err)
1945				goto err_create_flow;
1946			j++;
1947		}
1948		/* function to create mirror rule */
1949		i++;
1950	}
1951
1952	return &mflow->ibflow;
1953
1954err_create_flow:
1955	while (i) {
1956		(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1957					     mflow->reg_id[i].id);
1958		i--;
1959	}
1960
1961	while (j) {
1962		(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1963					     mflow->reg_id[j].mirror);
1964		j--;
1965	}
1966err_free:
1967	kfree(mflow);
1968	return ERR_PTR(err);
1969}
1970
1971static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
1972{
1973	int err, ret = 0;
1974	int i = 0;
1975	struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
1976	struct mlx4_ib_flow *mflow = to_mflow(flow_id);
1977
1978	while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
1979		err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
1980		if (err)
1981			ret = err;
1982		if (mflow->reg_id[i].mirror) {
1983			err = __mlx4_ib_destroy_flow(mdev->dev,
1984						     mflow->reg_id[i].mirror);
1985			if (err)
1986				ret = err;
1987		}
1988		i++;
1989	}
1990
1991	kfree(mflow);
1992	return ret;
1993}
1994
1995static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1996{
1997	int err;
1998	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1999	struct mlx4_dev	*dev = mdev->dev;
2000	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
2001	struct mlx4_ib_steering *ib_steering = NULL;
2002	enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
2003	struct mlx4_flow_reg_id	reg_id;
2004
2005	if (mdev->dev->caps.steering_mode ==
2006	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
2007		ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
2008		if (!ib_steering)
2009			return -ENOMEM;
2010	}
2011
2012	err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
2013				    !!(mqp->flags &
2014				       MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
2015				    prot, &reg_id.id);
2016	if (err) {
2017		pr_err("multicast attach op failed, err %d\n", err);
2018		goto err_malloc;
2019	}
2020
2021	reg_id.mirror = 0;
2022	if (mlx4_is_bonded(dev)) {
2023		err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
2024					    (mqp->port == 1) ? 2 : 1,
2025					    !!(mqp->flags &
2026					    MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
2027					    prot, &reg_id.mirror);
2028		if (err)
2029			goto err_add;
2030	}
2031
2032	err = add_gid_entry(ibqp, gid);
2033	if (err)
2034		goto err_add;
2035
2036	if (ib_steering) {
2037		memcpy(ib_steering->gid.raw, gid->raw, 16);
2038		ib_steering->reg_id = reg_id;
2039		mutex_lock(&mqp->mutex);
2040		list_add(&ib_steering->list, &mqp->steering_rules);
2041		mutex_unlock(&mqp->mutex);
2042	}
2043	return 0;
2044
2045err_add:
2046	mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
2047			      prot, reg_id.id);
2048	if (reg_id.mirror)
2049		mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
2050				      prot, reg_id.mirror);
2051err_malloc:
2052	kfree(ib_steering);
2053
2054	return err;
2055}
2056
2057static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
2058{
2059	struct mlx4_ib_gid_entry *ge;
2060	struct mlx4_ib_gid_entry *tmp;
2061	struct mlx4_ib_gid_entry *ret = NULL;
2062
2063	list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
2064		if (!memcmp(raw, ge->gid.raw, 16)) {
2065			ret = ge;
2066			break;
2067		}
2068	}
2069
2070	return ret;
2071}
2072
2073static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
2074{
2075	int err;
2076	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
2077	struct mlx4_dev *dev = mdev->dev;
2078	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
2079	struct net_device *ndev;
2080	struct mlx4_ib_gid_entry *ge;
2081	struct mlx4_flow_reg_id reg_id = {0, 0};
2082	enum mlx4_protocol prot =  MLX4_PROT_IB_IPV6;
2083
2084	if (mdev->dev->caps.steering_mode ==
2085	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
2086		struct mlx4_ib_steering *ib_steering;
2087
2088		mutex_lock(&mqp->mutex);
2089		list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
2090			if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
2091				list_del(&ib_steering->list);
2092				break;
2093			}
2094		}
2095		mutex_unlock(&mqp->mutex);
2096		if (&ib_steering->list == &mqp->steering_rules) {
2097			pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
2098			return -EINVAL;
2099		}
2100		reg_id = ib_steering->reg_id;
2101		kfree(ib_steering);
2102	}
2103
2104	err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
2105				    prot, reg_id.id);
2106	if (err)
2107		return err;
2108
2109	if (mlx4_is_bonded(dev)) {
2110		err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
2111					    prot, reg_id.mirror);
2112		if (err)
2113			return err;
2114	}
2115
2116	mutex_lock(&mqp->mutex);
2117	ge = find_gid_entry(mqp, gid->raw);
2118	if (ge) {
2119		spin_lock_bh(&mdev->iboe.lock);
2120		ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
2121		if (ndev)
2122			dev_hold(ndev);
2123		spin_unlock_bh(&mdev->iboe.lock);
2124		if (ndev)
2125			dev_put(ndev);
2126		list_del(&ge->list);
2127		kfree(ge);
2128	} else
2129		pr_warn("could not find mgid entry\n");
2130
2131	mutex_unlock(&mqp->mutex);
2132
2133	return 0;
2134}
2135
2136static int init_node_data(struct mlx4_ib_dev *dev)
2137{
2138	struct ib_smp *in_mad  = NULL;
2139	struct ib_smp *out_mad = NULL;
2140	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
2141	int err = -ENOMEM;
2142
2143	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
2144	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
2145	if (!in_mad || !out_mad)
2146		goto out;
2147
2148	init_query_mad(in_mad);
2149	in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
2150	if (mlx4_is_master(dev->dev))
2151		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
2152
2153	err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
2154	if (err)
2155		goto out;
2156
2157	memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX);
2158
2159	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
2160
2161	err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
2162	if (err)
2163		goto out;
2164
2165	dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
2166	memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
2167
2168out:
2169	kfree(in_mad);
2170	kfree(out_mad);
2171	return err;
2172}
2173
2174static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2175			char *buf)
2176{
2177	struct mlx4_ib_dev *dev =
2178		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
2179	return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
 
2180}
 
2181
2182static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2183			char *buf)
2184{
2185	struct mlx4_ib_dev *dev =
2186		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
2187	return sprintf(buf, "%x\n", dev->dev->rev_id);
 
2188}
 
2189
2190static ssize_t show_board(struct device *device, struct device_attribute *attr,
2191			  char *buf)
2192{
2193	struct mlx4_ib_dev *dev =
2194		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
2195	return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
2196		       dev->dev->board_id);
2197}
 
2198
2199static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
2200static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
2201static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
2202
2203static struct device_attribute *mlx4_class_attributes[] = {
2204	&dev_attr_hw_rev,
2205	&dev_attr_hca_type,
2206	&dev_attr_board_id
 
2207};
2208
2209struct diag_counter {
2210	const char *name;
2211	u32 offset;
2212};
2213
2214#define DIAG_COUNTER(_name, _offset)			\
2215	{ .name = #_name, .offset = _offset }
2216
2217static const struct diag_counter diag_basic[] = {
2218	DIAG_COUNTER(rq_num_lle, 0x00),
2219	DIAG_COUNTER(sq_num_lle, 0x04),
2220	DIAG_COUNTER(rq_num_lqpoe, 0x08),
2221	DIAG_COUNTER(sq_num_lqpoe, 0x0C),
2222	DIAG_COUNTER(rq_num_lpe, 0x18),
2223	DIAG_COUNTER(sq_num_lpe, 0x1C),
2224	DIAG_COUNTER(rq_num_wrfe, 0x20),
2225	DIAG_COUNTER(sq_num_wrfe, 0x24),
2226	DIAG_COUNTER(sq_num_mwbe, 0x2C),
2227	DIAG_COUNTER(sq_num_bre, 0x34),
2228	DIAG_COUNTER(sq_num_rire, 0x44),
2229	DIAG_COUNTER(rq_num_rire, 0x48),
2230	DIAG_COUNTER(sq_num_rae, 0x4C),
2231	DIAG_COUNTER(rq_num_rae, 0x50),
2232	DIAG_COUNTER(sq_num_roe, 0x54),
2233	DIAG_COUNTER(sq_num_tree, 0x5C),
2234	DIAG_COUNTER(sq_num_rree, 0x64),
2235	DIAG_COUNTER(rq_num_rnr, 0x68),
2236	DIAG_COUNTER(sq_num_rnr, 0x6C),
2237	DIAG_COUNTER(rq_num_oos, 0x100),
2238	DIAG_COUNTER(sq_num_oos, 0x104),
2239};
2240
2241static const struct diag_counter diag_ext[] = {
2242	DIAG_COUNTER(rq_num_dup, 0x130),
2243	DIAG_COUNTER(sq_num_to, 0x134),
2244};
2245
2246static const struct diag_counter diag_device_only[] = {
2247	DIAG_COUNTER(num_cqovf, 0x1A0),
2248	DIAG_COUNTER(rq_num_udsdprd, 0x118),
2249};
2250
2251static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,
2252						    u8 port_num)
2253{
2254	struct mlx4_ib_dev *dev = to_mdev(ibdev);
2255	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2256
2257	if (!diag[!!port_num].name)
2258		return NULL;
2259
2260	return rdma_alloc_hw_stats_struct(diag[!!port_num].name,
2261					  diag[!!port_num].num_counters,
 
 
 
 
 
 
 
 
 
 
 
 
2262					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
2263}
2264
2265static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
2266				struct rdma_hw_stats *stats,
2267				u8 port, int index)
2268{
2269	struct mlx4_ib_dev *dev = to_mdev(ibdev);
2270	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2271	u32 hw_value[ARRAY_SIZE(diag_device_only) +
2272		ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};
2273	int ret;
2274	int i;
2275
2276	ret = mlx4_query_diag_counters(dev->dev,
2277				       MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,
2278				       diag[!!port].offset, hw_value,
2279				       diag[!!port].num_counters, port);
2280
2281	if (ret)
2282		return ret;
2283
2284	for (i = 0; i < diag[!!port].num_counters; i++)
2285		stats->value[i] = hw_value[i];
2286
2287	return diag[!!port].num_counters;
2288}
2289
2290static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
2291					 const char ***name,
2292					 u32 **offset,
2293					 u32 *num,
2294					 bool port)
2295{
2296	u32 num_counters;
2297
2298	num_counters = ARRAY_SIZE(diag_basic);
2299
2300	if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)
2301		num_counters += ARRAY_SIZE(diag_ext);
2302
2303	if (!port)
2304		num_counters += ARRAY_SIZE(diag_device_only);
2305
2306	*name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);
2307	if (!*name)
 
2308		return -ENOMEM;
2309
2310	*offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
2311	if (!*offset)
2312		goto err_name;
2313
2314	*num = num_counters;
2315
2316	return 0;
2317
2318err_name:
2319	kfree(*name);
2320	return -ENOMEM;
2321}
2322
2323static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
2324				       const char **name,
2325				       u32 *offset,
2326				       bool port)
2327{
2328	int i;
2329	int j;
2330
2331	for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
2332		name[i] = diag_basic[i].name;
2333		offset[i] = diag_basic[i].offset;
2334	}
2335
2336	if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
2337		for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
2338			name[j] = diag_ext[i].name;
2339			offset[j] = diag_ext[i].offset;
2340		}
2341	}
2342
2343	if (!port) {
2344		for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
2345			name[j] = diag_device_only[i].name;
2346			offset[j] = diag_device_only[i].offset;
2347		}
2348	}
2349}
2350
 
 
 
 
 
 
 
 
 
 
 
2351static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
2352{
2353	struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
2354	int i;
2355	int ret;
2356	bool per_port = !!(ibdev->dev->caps.flags2 &
2357		MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
2358
2359	if (mlx4_is_slave(ibdev->dev))
2360		return 0;
2361
2362	for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2363		/* i == 1 means we are building port counters */
2364		if (i && !per_port)
2365			continue;
 
 
 
 
 
 
 
2366
2367		ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
2368						    &diag[i].offset,
2369						    &diag[i].num_counters, i);
2370		if (ret)
2371			goto err_alloc;
2372
2373		mlx4_ib_fill_diag_counters(ibdev, diag[i].name,
2374					   diag[i].offset, i);
2375	}
2376
2377	ibdev->ib_dev.get_hw_stats	= mlx4_ib_get_hw_stats;
2378	ibdev->ib_dev.alloc_hw_stats	= mlx4_ib_alloc_hw_stats;
2379
2380	return 0;
2381
2382err_alloc:
2383	if (i) {
2384		kfree(diag[i - 1].name);
2385		kfree(diag[i - 1].offset);
2386	}
2387
2388	return ret;
2389}
2390
2391static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
2392{
2393	int i;
2394
2395	for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2396		kfree(ibdev->diag_counters[i].offset);
2397		kfree(ibdev->diag_counters[i].name);
2398	}
2399}
2400
2401#define MLX4_IB_INVALID_MAC	((u64)-1)
2402static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
2403			       struct net_device *dev,
2404			       int port)
2405{
2406	u64 new_smac = 0;
2407	u64 release_mac = MLX4_IB_INVALID_MAC;
2408	struct mlx4_ib_qp *qp;
2409
2410	read_lock(&dev_base_lock);
2411	new_smac = mlx4_mac_to_u64(dev->dev_addr);
2412	read_unlock(&dev_base_lock);
2413
2414	atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
2415
2416	/* no need for update QP1 and mac registration in non-SRIOV */
2417	if (!mlx4_is_mfunc(ibdev->dev))
2418		return;
2419
2420	mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
2421	qp = ibdev->qp1_proxy[port - 1];
2422	if (qp) {
2423		int new_smac_index;
2424		u64 old_smac;
2425		struct mlx4_update_qp_params update_params;
2426
2427		mutex_lock(&qp->mutex);
2428		old_smac = qp->pri.smac;
2429		if (new_smac == old_smac)
2430			goto unlock;
2431
2432		new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac);
2433
2434		if (new_smac_index < 0)
2435			goto unlock;
2436
2437		update_params.smac_index = new_smac_index;
2438		if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC,
2439				   &update_params)) {
2440			release_mac = new_smac;
2441			goto unlock;
2442		}
2443		/* if old port was zero, no mac was yet registered for this QP */
2444		if (qp->pri.smac_port)
2445			release_mac = old_smac;
2446		qp->pri.smac = new_smac;
2447		qp->pri.smac_port = port;
2448		qp->pri.smac_index = new_smac_index;
2449	}
2450
2451unlock:
2452	if (release_mac != MLX4_IB_INVALID_MAC)
2453		mlx4_unregister_mac(ibdev->dev, port, release_mac);
2454	if (qp)
2455		mutex_unlock(&qp->mutex);
2456	mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
2457}
2458
2459static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
2460				 struct net_device *dev,
2461				 unsigned long event)
2462
2463{
2464	struct mlx4_ib_iboe *iboe;
2465	int update_qps_port = -1;
2466	int port;
2467
2468	ASSERT_RTNL();
2469
2470	iboe = &ibdev->iboe;
 
2471
2472	spin_lock_bh(&iboe->lock);
2473	mlx4_foreach_ib_transport_port(port, ibdev->dev) {
2474
2475		iboe->netdevs[port - 1] =
2476			mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
2477
2478		if (dev == iboe->netdevs[port - 1] &&
2479		    (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
2480		     event == NETDEV_UP || event == NETDEV_CHANGE))
2481			update_qps_port = port;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2482
2483	}
2484	spin_unlock_bh(&iboe->lock);
2485
2486	if (update_qps_port > 0)
2487		mlx4_ib_update_qps(ibdev, dev, update_qps_port);
 
2488}
2489
2490static int mlx4_ib_netdev_event(struct notifier_block *this,
2491				unsigned long event, void *ptr)
2492{
2493	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2494	struct mlx4_ib_dev *ibdev;
2495
2496	if (!net_eq(dev_net(dev), &init_net))
2497		return NOTIFY_DONE;
2498
2499	ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
2500	mlx4_ib_scan_netdevs(ibdev, dev, event);
2501
2502	return NOTIFY_DONE;
2503}
2504
2505static void init_pkeys(struct mlx4_ib_dev *ibdev)
2506{
2507	int port;
2508	int slave;
2509	int i;
2510
2511	if (mlx4_is_master(ibdev->dev)) {
2512		for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
2513		     ++slave) {
2514			for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
2515				for (i = 0;
2516				     i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
2517				     ++i) {
2518					ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
2519					/* master has the identity virt2phys pkey mapping */
2520						(slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
2521							ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
2522					mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
2523							     ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
2524				}
2525			}
2526		}
2527		/* initialize pkey cache */
2528		for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
2529			for (i = 0;
2530			     i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
2531			     ++i)
2532				ibdev->pkeys.phys_pkey_cache[port-1][i] =
2533					(i) ? 0 : 0xFFFF;
2534		}
2535	}
2536}
2537
2538static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
2539{
2540	int i, j, eq = 0, total_eqs = 0;
2541
2542	ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors,
2543				  sizeof(ibdev->eq_table[0]), GFP_KERNEL);
2544	if (!ibdev->eq_table)
2545		return;
2546
2547	for (i = 1; i <= dev->caps.num_ports; i++) {
2548		for (j = 0; j < mlx4_get_eqs_per_port(dev, i);
2549		     j++, total_eqs++) {
2550			if (i > 1 &&  mlx4_is_eq_shared(dev, total_eqs))
2551				continue;
2552			ibdev->eq_table[eq] = total_eqs;
2553			if (!mlx4_assign_eq(dev, i,
2554					    &ibdev->eq_table[eq]))
2555				eq++;
2556			else
2557				ibdev->eq_table[eq] = -1;
2558		}
2559	}
2560
2561	for (i = eq; i < dev->caps.num_comp_vectors;
2562	     ibdev->eq_table[i++] = -1)
2563		;
2564
2565	/* Advertise the new number of EQs to clients */
2566	ibdev->ib_dev.num_comp_vectors = eq;
2567}
2568
2569static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
2570{
2571	int i;
2572	int total_eqs = ibdev->ib_dev.num_comp_vectors;
2573
2574	/* no eqs were allocated */
2575	if (!ibdev->eq_table)
2576		return;
2577
2578	/* Reset the advertised EQ number */
2579	ibdev->ib_dev.num_comp_vectors = 0;
2580
2581	for (i = 0; i < total_eqs; i++)
2582		mlx4_release_eq(dev, ibdev->eq_table[i]);
2583
2584	kfree(ibdev->eq_table);
2585	ibdev->eq_table = NULL;
2586}
2587
2588static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
2589			       struct ib_port_immutable *immutable)
2590{
2591	struct ib_port_attr attr;
2592	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
2593	int err;
2594
2595	if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
2596		immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
2597		immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2598	} else {
2599		if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
2600			immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
2601		if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
2602			immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
2603				RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
2604		immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
2605		if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
2606		    RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
2607			immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2608	}
2609
2610	err = ib_query_port(ibdev, port_num, &attr);
2611	if (err)
2612		return err;
2613
2614	immutable->pkey_tbl_len = attr.pkey_tbl_len;
2615	immutable->gid_tbl_len = attr.gid_tbl_len;
2616
2617	return 0;
2618}
2619
2620static void get_fw_ver_str(struct ib_device *device, char *str)
2621{
2622	struct mlx4_ib_dev *dev =
2623		container_of(device, struct mlx4_ib_dev, ib_dev);
2624	snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d",
2625		 (int) (dev->dev->caps.fw_ver >> 32),
2626		 (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
2627		 (int) dev->dev->caps.fw_ver & 0xffff);
2628}
2629
2630static void *mlx4_ib_add(struct mlx4_dev *dev)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2631{
 
 
2632	struct mlx4_ib_dev *ibdev;
2633	int num_ports = 0;
2634	int i, j;
2635	int err;
2636	struct mlx4_ib_iboe *iboe;
2637	int ib_num_ports = 0;
2638	int num_req_counters;
2639	int allocated;
2640	u32 counter_index;
2641	struct counter_index *new_counter_index = NULL;
2642
2643	pr_info_once("%s", mlx4_ib_version);
2644
2645	num_ports = 0;
2646	mlx4_foreach_ib_transport_port(i, dev)
2647		num_ports++;
2648
2649	/* No point in registering a device with no ports... */
2650	if (num_ports == 0)
2651		return NULL;
2652
2653	ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
2654	if (!ibdev) {
2655		dev_err(&dev->persist->pdev->dev,
2656			"Device struct alloc failed\n");
2657		return NULL;
2658	}
2659
2660	iboe = &ibdev->iboe;
2661
2662	if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
 
2663		goto err_dealloc;
2664
2665	if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
 
2666		goto err_pd;
2667
2668	ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
2669				 PAGE_SIZE);
2670	if (!ibdev->uar_map)
 
2671		goto err_uar;
 
2672	MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
2673
2674	ibdev->dev = dev;
2675	ibdev->bond_next_port	= 0;
2676
2677	strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
2678	ibdev->ib_dev.owner		= THIS_MODULE;
2679	ibdev->ib_dev.node_type		= RDMA_NODE_IB_CA;
2680	ibdev->ib_dev.local_dma_lkey	= dev->caps.reserved_lkey;
2681	ibdev->num_ports		= num_ports;
2682	ibdev->ib_dev.phys_port_cnt     = mlx4_is_bonded(dev) ?
2683						1 : ibdev->num_ports;
2684	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
2685	ibdev->ib_dev.dev.parent	= &dev->persist->pdev->dev;
2686	ibdev->ib_dev.get_netdev	= mlx4_ib_get_netdev;
2687	ibdev->ib_dev.add_gid		= mlx4_ib_add_gid;
2688	ibdev->ib_dev.del_gid		= mlx4_ib_del_gid;
2689
2690	if (dev->caps.userspace_caps)
2691		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
2692	else
2693		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
2694
2695	ibdev->ib_dev.uverbs_cmd_mask	=
2696		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
2697		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
2698		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
2699		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
2700		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
2701		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
2702		(1ull << IB_USER_VERBS_CMD_REREG_MR)		|
2703		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
2704		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
2705		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
2706		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
2707		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
2708		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
2709		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
2710		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
2711		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
2712		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
2713		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
2714		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
2715		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
2716		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
2717		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
2718		(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)		|
2719		(1ull << IB_USER_VERBS_CMD_OPEN_QP);
2720
2721	ibdev->ib_dev.query_device	= mlx4_ib_query_device;
2722	ibdev->ib_dev.query_port	= mlx4_ib_query_port;
2723	ibdev->ib_dev.get_link_layer	= mlx4_ib_port_link_layer;
2724	ibdev->ib_dev.query_gid		= mlx4_ib_query_gid;
2725	ibdev->ib_dev.query_pkey	= mlx4_ib_query_pkey;
2726	ibdev->ib_dev.modify_device	= mlx4_ib_modify_device;
2727	ibdev->ib_dev.modify_port	= mlx4_ib_modify_port;
2728	ibdev->ib_dev.alloc_ucontext	= mlx4_ib_alloc_ucontext;
2729	ibdev->ib_dev.dealloc_ucontext	= mlx4_ib_dealloc_ucontext;
2730	ibdev->ib_dev.mmap		= mlx4_ib_mmap;
2731	ibdev->ib_dev.alloc_pd		= mlx4_ib_alloc_pd;
2732	ibdev->ib_dev.dealloc_pd	= mlx4_ib_dealloc_pd;
2733	ibdev->ib_dev.create_ah		= mlx4_ib_create_ah;
2734	ibdev->ib_dev.query_ah		= mlx4_ib_query_ah;
2735	ibdev->ib_dev.destroy_ah	= mlx4_ib_destroy_ah;
2736	ibdev->ib_dev.create_srq	= mlx4_ib_create_srq;
2737	ibdev->ib_dev.modify_srq	= mlx4_ib_modify_srq;
2738	ibdev->ib_dev.query_srq		= mlx4_ib_query_srq;
2739	ibdev->ib_dev.destroy_srq	= mlx4_ib_destroy_srq;
2740	ibdev->ib_dev.post_srq_recv	= mlx4_ib_post_srq_recv;
2741	ibdev->ib_dev.create_qp		= mlx4_ib_create_qp;
2742	ibdev->ib_dev.modify_qp		= mlx4_ib_modify_qp;
2743	ibdev->ib_dev.query_qp		= mlx4_ib_query_qp;
2744	ibdev->ib_dev.destroy_qp	= mlx4_ib_destroy_qp;
2745	ibdev->ib_dev.post_send		= mlx4_ib_post_send;
2746	ibdev->ib_dev.post_recv		= mlx4_ib_post_recv;
2747	ibdev->ib_dev.create_cq		= mlx4_ib_create_cq;
2748	ibdev->ib_dev.modify_cq		= mlx4_ib_modify_cq;
2749	ibdev->ib_dev.resize_cq		= mlx4_ib_resize_cq;
2750	ibdev->ib_dev.destroy_cq	= mlx4_ib_destroy_cq;
2751	ibdev->ib_dev.poll_cq		= mlx4_ib_poll_cq;
2752	ibdev->ib_dev.req_notify_cq	= mlx4_ib_arm_cq;
2753	ibdev->ib_dev.get_dma_mr	= mlx4_ib_get_dma_mr;
2754	ibdev->ib_dev.reg_user_mr	= mlx4_ib_reg_user_mr;
2755	ibdev->ib_dev.rereg_user_mr	= mlx4_ib_rereg_user_mr;
2756	ibdev->ib_dev.dereg_mr		= mlx4_ib_dereg_mr;
2757	ibdev->ib_dev.alloc_mr		= mlx4_ib_alloc_mr;
2758	ibdev->ib_dev.map_mr_sg		= mlx4_ib_map_mr_sg;
2759	ibdev->ib_dev.attach_mcast	= mlx4_ib_mcg_attach;
2760	ibdev->ib_dev.detach_mcast	= mlx4_ib_mcg_detach;
2761	ibdev->ib_dev.process_mad	= mlx4_ib_process_mad;
2762	ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
2763	ibdev->ib_dev.get_dev_fw_str    = get_fw_ver_str;
2764	ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
2765
2766	ibdev->ib_dev.uverbs_ex_cmd_mask |=
2767		(1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
2768
2769	if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
2770	    ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
2771	    IB_LINK_LAYER_ETHERNET) ||
2772	    (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
2773	    IB_LINK_LAYER_ETHERNET))) {
2774		ibdev->ib_dev.create_wq		= mlx4_ib_create_wq;
2775		ibdev->ib_dev.modify_wq		= mlx4_ib_modify_wq;
2776		ibdev->ib_dev.destroy_wq	= mlx4_ib_destroy_wq;
2777		ibdev->ib_dev.create_rwq_ind_table  =
2778			mlx4_ib_create_rwq_ind_table;
2779		ibdev->ib_dev.destroy_rwq_ind_table =
2780			mlx4_ib_destroy_rwq_ind_table;
2781		ibdev->ib_dev.uverbs_ex_cmd_mask |=
2782			(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ)	  |
2783			(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ)	  |
2784			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ)	  |
2785			(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
2786			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
2787	}
2788
2789	if (!mlx4_is_slave(ibdev->dev)) {
2790		ibdev->ib_dev.alloc_fmr		= mlx4_ib_fmr_alloc;
2791		ibdev->ib_dev.map_phys_fmr	= mlx4_ib_map_phys_fmr;
2792		ibdev->ib_dev.unmap_fmr		= mlx4_ib_unmap_fmr;
2793		ibdev->ib_dev.dealloc_fmr	= mlx4_ib_fmr_dealloc;
2794	}
2795
2796	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2797	    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
2798		ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
2799		ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
2800
2801		ibdev->ib_dev.uverbs_cmd_mask |=
2802			(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
2803			(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
2804	}
2805
2806	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
2807		ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
2808		ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
2809		ibdev->ib_dev.uverbs_cmd_mask |=
2810			(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2811			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2812	}
2813
2814	if (check_flow_steering_support(dev)) {
2815		ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
2816		ibdev->ib_dev.create_flow	= mlx4_ib_create_flow;
2817		ibdev->ib_dev.destroy_flow	= mlx4_ib_destroy_flow;
2818
2819		ibdev->ib_dev.uverbs_ex_cmd_mask	|=
2820			(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
2821			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
2822	}
2823
2824	ibdev->ib_dev.uverbs_ex_cmd_mask |=
2825		(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
2826		(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
2827		(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
2828
2829	mlx4_ib_alloc_eqs(dev, ibdev);
2830
2831	spin_lock_init(&iboe->lock);
2832
2833	if (init_node_data(ibdev))
 
2834		goto err_map;
2835	mlx4_init_sl2vl_tbl(ibdev);
2836
2837	for (i = 0; i < ibdev->num_ports; ++i) {
2838		mutex_init(&ibdev->counters_table[i].mutex);
2839		INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
 
2840	}
2841
2842	num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
2843	for (i = 0; i < num_req_counters; ++i) {
2844		mutex_init(&ibdev->qp1_proxy_lock[i]);
2845		allocated = 0;
2846		if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
2847						IB_LINK_LAYER_ETHERNET) {
2848			err = mlx4_counter_alloc(ibdev->dev, &counter_index,
2849						 MLX4_RES_USAGE_DRIVER);
2850			/* if failed to allocate a new counter, use default */
2851			if (err)
2852				counter_index =
2853					mlx4_get_default_counter_index(dev,
2854								       i + 1);
2855			else
2856				allocated = 1;
2857		} else { /* IB_LINK_LAYER_INFINIBAND use the default counter */
2858			counter_index = mlx4_get_default_counter_index(dev,
2859								       i + 1);
2860		}
2861		new_counter_index = kmalloc(sizeof(*new_counter_index),
2862					    GFP_KERNEL);
2863		if (!new_counter_index) {
 
2864			if (allocated)
2865				mlx4_counter_free(ibdev->dev, counter_index);
2866			goto err_counter;
2867		}
2868		new_counter_index->index = counter_index;
2869		new_counter_index->allocated = allocated;
2870		list_add_tail(&new_counter_index->list,
2871			      &ibdev->counters_table[i].counters_list);
2872		ibdev->counters_table[i].default_counter = counter_index;
2873		pr_info("counter index %d for port %d allocated %d\n",
2874			counter_index, i + 1, allocated);
2875	}
2876	if (mlx4_is_bonded(dev))
2877		for (i = 1; i < ibdev->num_ports ; ++i) {
2878			new_counter_index =
2879					kmalloc(sizeof(struct counter_index),
2880						GFP_KERNEL);
2881			if (!new_counter_index)
 
2882				goto err_counter;
 
2883			new_counter_index->index = counter_index;
2884			new_counter_index->allocated = 0;
2885			list_add_tail(&new_counter_index->list,
2886				      &ibdev->counters_table[i].counters_list);
2887			ibdev->counters_table[i].default_counter =
2888								counter_index;
2889		}
2890
2891	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2892		ib_num_ports++;
2893
2894	spin_lock_init(&ibdev->sm_lock);
2895	mutex_init(&ibdev->cap_mask_mutex);
2896	INIT_LIST_HEAD(&ibdev->qp_list);
2897	spin_lock_init(&ibdev->reset_flow_resource_lock);
2898
2899	if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
2900	    ib_num_ports) {
2901		ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
2902		err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
2903					    MLX4_IB_UC_STEER_QPN_ALIGN,
2904					    &ibdev->steer_qpn_base, 0,
2905					    MLX4_RES_USAGE_DRIVER);
2906		if (err)
2907			goto err_counter;
2908
2909		ibdev->ib_uc_qpns_bitmap =
2910			kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
2911				sizeof(long),
2912				GFP_KERNEL);
2913		if (!ibdev->ib_uc_qpns_bitmap)
2914			goto err_steer_qp_release;
 
2915
2916		if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) {
2917			bitmap_zero(ibdev->ib_uc_qpns_bitmap,
2918				    ibdev->steer_qpn_count);
2919			err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
2920					dev, ibdev->steer_qpn_base,
2921					ibdev->steer_qpn_base +
2922					ibdev->steer_qpn_count - 1);
2923			if (err)
2924				goto err_steer_free_bitmap;
2925		} else {
2926			bitmap_fill(ibdev->ib_uc_qpns_bitmap,
2927				    ibdev->steer_qpn_count);
2928		}
2929	}
2930
2931	for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
2932		atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
2933
2934	if (mlx4_ib_alloc_diag_counters(ibdev))
 
2935		goto err_steer_free_bitmap;
2936
2937	ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
2938	if (ib_register_device(&ibdev->ib_dev, NULL))
 
2939		goto err_diag_counters;
2940
2941	if (mlx4_ib_mad_init(ibdev))
 
2942		goto err_reg;
2943
2944	if (mlx4_ib_init_sriov(ibdev))
 
2945		goto err_mad;
2946
2947	if (!iboe->nb.notifier_call) {
2948		iboe->nb.notifier_call = mlx4_ib_netdev_event;
2949		err = register_netdevice_notifier(&iboe->nb);
2950		if (err) {
2951			iboe->nb.notifier_call = NULL;
2952			goto err_notif;
2953		}
2954	}
2955	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
2956		err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
2957		if (err)
2958			goto err_notif;
2959	}
2960
2961	for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
2962		if (device_create_file(&ibdev->ib_dev.dev,
2963				       mlx4_class_attributes[j]))
2964			goto err_notif;
2965	}
2966
2967	ibdev->ib_active = true;
2968	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2969		devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
2970					 &ibdev->ib_dev);
2971
2972	if (mlx4_is_mfunc(ibdev->dev))
2973		init_pkeys(ibdev);
2974
2975	/* create paravirt contexts for any VFs which are active */
2976	if (mlx4_is_master(ibdev->dev)) {
2977		for (j = 0; j < MLX4_MFUNC_MAX; j++) {
2978			if (j == mlx4_master_func_num(ibdev->dev))
2979				continue;
2980			if (mlx4_is_slave_active(ibdev->dev, j))
2981				do_slave_init(ibdev, j, 1);
2982		}
2983	}
2984	return ibdev;
 
 
 
 
 
 
 
2985
2986err_notif:
2987	if (ibdev->iboe.nb.notifier_call) {
2988		if (unregister_netdevice_notifier(&ibdev->iboe.nb))
2989			pr_warn("failure unregistering notifier\n");
2990		ibdev->iboe.nb.notifier_call = NULL;
2991	}
2992	flush_workqueue(wq);
2993
2994	mlx4_ib_close_sriov(ibdev);
2995
2996err_mad:
2997	mlx4_ib_mad_cleanup(ibdev);
2998
2999err_reg:
3000	ib_unregister_device(&ibdev->ib_dev);
3001
3002err_diag_counters:
3003	mlx4_ib_diag_cleanup(ibdev);
3004
3005err_steer_free_bitmap:
3006	kfree(ibdev->ib_uc_qpns_bitmap);
3007
3008err_steer_qp_release:
3009	mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
3010			      ibdev->steer_qpn_count);
3011err_counter:
3012	for (i = 0; i < ibdev->num_ports; ++i)
3013		mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
3014
3015err_map:
3016	mlx4_ib_free_eqs(dev, ibdev);
3017	iounmap(ibdev->uar_map);
3018
3019err_uar:
3020	mlx4_uar_free(dev, &ibdev->priv_uar);
3021
3022err_pd:
3023	mlx4_pd_free(dev, ibdev->priv_pdn);
3024
3025err_dealloc:
3026	ib_dealloc_device(&ibdev->ib_dev);
3027
3028	return NULL;
3029}
3030
3031int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
3032{
3033	int offset;
3034
3035	WARN_ON(!dev->ib_uc_qpns_bitmap);
3036
3037	offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
3038					 dev->steer_qpn_count,
3039					 get_count_order(count));
3040	if (offset < 0)
3041		return offset;
3042
3043	*qpn = dev->steer_qpn_base + offset;
3044	return 0;
3045}
3046
3047void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
3048{
3049	if (!qpn ||
3050	    dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
3051		return;
3052
3053	BUG_ON(qpn < dev->steer_qpn_base);
 
 
 
3054
3055	bitmap_release_region(dev->ib_uc_qpns_bitmap,
3056			      qpn - dev->steer_qpn_base,
3057			      get_count_order(count));
3058}
3059
3060int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
3061			 int is_attach)
3062{
3063	int err;
3064	size_t flow_size;
3065	struct ib_flow_attr *flow = NULL;
3066	struct ib_flow_spec_ib *ib_spec;
3067
3068	if (is_attach) {
3069		flow_size = sizeof(struct ib_flow_attr) +
3070			    sizeof(struct ib_flow_spec_ib);
3071		flow = kzalloc(flow_size, GFP_KERNEL);
3072		if (!flow)
3073			return -ENOMEM;
3074		flow->port = mqp->port;
3075		flow->num_of_specs = 1;
3076		flow->size = flow_size;
3077		ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
3078		ib_spec->type = IB_FLOW_SPEC_IB;
3079		ib_spec->size = sizeof(struct ib_flow_spec_ib);
3080		/* Add an empty rule for IB L2 */
3081		memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
3082
3083		err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
3084					    IB_FLOW_DOMAIN_NIC,
3085					    MLX4_FS_REGULAR,
3086					    &mqp->reg_id);
3087	} else {
3088		err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
3089	}
3090	kfree(flow);
3091	return err;
3092}
3093
3094static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
3095{
3096	struct mlx4_ib_dev *ibdev = ibdev_ptr;
 
 
3097	int p;
3098	int i;
3099
 
 
3100	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
3101		devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
3102	ibdev->ib_active = false;
3103	flush_workqueue(wq);
3104
3105	mlx4_ib_close_sriov(ibdev);
3106	mlx4_ib_mad_cleanup(ibdev);
3107	ib_unregister_device(&ibdev->ib_dev);
3108	mlx4_ib_diag_cleanup(ibdev);
3109	if (ibdev->iboe.nb.notifier_call) {
3110		if (unregister_netdevice_notifier(&ibdev->iboe.nb))
3111			pr_warn("failure unregistering notifier\n");
3112		ibdev->iboe.nb.notifier_call = NULL;
3113	}
3114
 
 
 
 
 
3115	mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
3116			      ibdev->steer_qpn_count);
3117	kfree(ibdev->ib_uc_qpns_bitmap);
3118
3119	iounmap(ibdev->uar_map);
3120	for (p = 0; p < ibdev->num_ports; ++p)
3121		mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
3122
3123	mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
3124		mlx4_CLOSE_PORT(dev, p);
3125
3126	mlx4_ib_free_eqs(dev, ibdev);
3127
3128	mlx4_uar_free(dev, &ibdev->priv_uar);
3129	mlx4_pd_free(dev, ibdev->priv_pdn);
3130	ib_dealloc_device(&ibdev->ib_dev);
3131}
3132
3133static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
3134{
3135	struct mlx4_ib_demux_work **dm = NULL;
3136	struct mlx4_dev *dev = ibdev->dev;
3137	int i;
3138	unsigned long flags;
3139	struct mlx4_active_ports actv_ports;
3140	unsigned int ports;
3141	unsigned int first_port;
3142
3143	if (!mlx4_is_master(dev))
3144		return;
3145
3146	actv_ports = mlx4_get_active_ports(dev, slave);
3147	ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
3148	first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
3149
3150	dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
3151	if (!dm)
3152		return;
3153
3154	for (i = 0; i < ports; i++) {
3155		dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
3156		if (!dm[i]) {
3157			while (--i >= 0)
3158				kfree(dm[i]);
3159			goto out;
3160		}
3161		INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
3162		dm[i]->port = first_port + i + 1;
3163		dm[i]->slave = slave;
3164		dm[i]->do_init = do_init;
3165		dm[i]->dev = ibdev;
3166	}
3167	/* initialize or tear down tunnel QPs for the slave */
3168	spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
3169	if (!ibdev->sriov.is_going_down) {
3170		for (i = 0; i < ports; i++)
3171			queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
3172		spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
3173	} else {
3174		spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
3175		for (i = 0; i < ports; i++)
3176			kfree(dm[i]);
3177	}
3178out:
3179	kfree(dm);
3180	return;
3181}
3182
3183static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev)
3184{
3185	struct mlx4_ib_qp *mqp;
3186	unsigned long flags_qp;
3187	unsigned long flags_cq;
3188	struct mlx4_ib_cq *send_mcq, *recv_mcq;
3189	struct list_head    cq_notify_list;
3190	struct mlx4_cq *mcq;
3191	unsigned long flags;
3192
3193	pr_warn("mlx4_ib_handle_catas_error was started\n");
3194	INIT_LIST_HEAD(&cq_notify_list);
3195
3196	/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
3197	spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
3198
3199	list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
3200		spin_lock_irqsave(&mqp->sq.lock, flags_qp);
3201		if (mqp->sq.tail != mqp->sq.head) {
3202			send_mcq = to_mcq(mqp->ibqp.send_cq);
3203			spin_lock_irqsave(&send_mcq->lock, flags_cq);
3204			if (send_mcq->mcq.comp &&
3205			    mqp->ibqp.send_cq->comp_handler) {
3206				if (!send_mcq->mcq.reset_notify_added) {
3207					send_mcq->mcq.reset_notify_added = 1;
3208					list_add_tail(&send_mcq->mcq.reset_notify,
3209						      &cq_notify_list);
3210				}
3211			}
3212			spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
3213		}
3214		spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
3215		/* Now, handle the QP's receive queue */
3216		spin_lock_irqsave(&mqp->rq.lock, flags_qp);
3217		/* no handling is needed for SRQ */
3218		if (!mqp->ibqp.srq) {
3219			if (mqp->rq.tail != mqp->rq.head) {
3220				recv_mcq = to_mcq(mqp->ibqp.recv_cq);
3221				spin_lock_irqsave(&recv_mcq->lock, flags_cq);
3222				if (recv_mcq->mcq.comp &&
3223				    mqp->ibqp.recv_cq->comp_handler) {
3224					if (!recv_mcq->mcq.reset_notify_added) {
3225						recv_mcq->mcq.reset_notify_added = 1;
3226						list_add_tail(&recv_mcq->mcq.reset_notify,
3227							      &cq_notify_list);
3228					}
3229				}
3230				spin_unlock_irqrestore(&recv_mcq->lock,
3231						       flags_cq);
3232			}
3233		}
3234		spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
3235	}
3236
3237	list_for_each_entry(mcq, &cq_notify_list, reset_notify) {
3238		mcq->comp(mcq);
3239	}
3240	spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
3241	pr_warn("mlx4_ib_handle_catas_error ended\n");
3242}
3243
3244static void handle_bonded_port_state_event(struct work_struct *work)
3245{
3246	struct ib_event_work *ew =
3247		container_of(work, struct ib_event_work, work);
3248	struct mlx4_ib_dev *ibdev = ew->ib_dev;
3249	enum ib_port_state bonded_port_state = IB_PORT_NOP;
3250	int i;
3251	struct ib_event ibev;
3252
3253	kfree(ew);
3254	spin_lock_bh(&ibdev->iboe.lock);
3255	for (i = 0; i < MLX4_MAX_PORTS; ++i) {
3256		struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
3257		enum ib_port_state curr_port_state;
3258
3259		if (!curr_netdev)
3260			continue;
3261
3262		curr_port_state =
3263			(netif_running(curr_netdev) &&
3264			 netif_carrier_ok(curr_netdev)) ?
3265			IB_PORT_ACTIVE : IB_PORT_DOWN;
3266
3267		bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
3268			curr_port_state : IB_PORT_ACTIVE;
3269	}
3270	spin_unlock_bh(&ibdev->iboe.lock);
3271
3272	ibev.device = &ibdev->ib_dev;
3273	ibev.element.port_num = 1;
3274	ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
3275		IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
3276
3277	ib_dispatch_event(&ibev);
3278}
3279
3280void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port)
3281{
3282	u64 sl2vl;
3283	int err;
3284
3285	err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl);
3286	if (err) {
3287		pr_err("Unable to get current sl to vl mapping for port %d.  Using all zeroes (%d)\n",
3288		       port, err);
3289		sl2vl = 0;
3290	}
3291	atomic64_set(&mdev->sl2vl[port - 1], sl2vl);
3292}
3293
3294static void ib_sl2vl_update_work(struct work_struct *work)
3295{
3296	struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
3297	struct mlx4_ib_dev *mdev = ew->ib_dev;
3298	int port = ew->port;
3299
3300	mlx4_ib_sl2vl_update(mdev, port);
3301
3302	kfree(ew);
3303}
3304
3305void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
3306				     int port)
3307{
3308	struct ib_event_work *ew;
3309
3310	ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
3311	if (ew) {
3312		INIT_WORK(&ew->work, ib_sl2vl_update_work);
3313		ew->port = port;
3314		ew->ib_dev = ibdev;
3315		queue_work(wq, &ew->work);
3316	}
3317}
3318
3319static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
3320			  enum mlx4_dev_event event, unsigned long param)
3321{
 
 
 
3322	struct ib_event ibev;
3323	struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
3324	struct mlx4_eqe *eqe = NULL;
3325	struct ib_event_work *ew;
3326	int p = 0;
3327
3328	if (mlx4_is_bonded(dev) &&
3329	    ((event == MLX4_DEV_EVENT_PORT_UP) ||
3330	    (event == MLX4_DEV_EVENT_PORT_DOWN))) {
3331		ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
3332		if (!ew)
3333			return;
3334		INIT_WORK(&ew->work, handle_bonded_port_state_event);
3335		ew->ib_dev = ibdev;
3336		queue_work(wq, &ew->work);
3337		return;
3338	}
3339
3340	if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
 
 
 
3341		eqe = (struct mlx4_eqe *)param;
3342	else
3343		p = (int) param;
 
 
 
3344
3345	switch (event) {
3346	case MLX4_DEV_EVENT_PORT_UP:
3347		if (p > ibdev->num_ports)
3348			return;
3349		if (!mlx4_is_slave(dev) &&
3350		    rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
3351			IB_LINK_LAYER_INFINIBAND) {
3352			if (mlx4_is_master(dev))
3353				mlx4_ib_invalidate_all_guid_record(ibdev, p);
3354			if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST &&
3355			    !(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT))
3356				mlx4_sched_ib_sl2vl_update_work(ibdev, p);
3357		}
3358		ibev.event = IB_EVENT_PORT_ACTIVE;
3359		break;
3360
3361	case MLX4_DEV_EVENT_PORT_DOWN:
3362		if (p > ibdev->num_ports)
3363			return;
3364		ibev.event = IB_EVENT_PORT_ERR;
3365		break;
3366
3367	case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
3368		ibdev->ib_active = false;
3369		ibev.event = IB_EVENT_DEVICE_FATAL;
3370		mlx4_ib_handle_catas_error(ibdev);
3371		break;
3372
3373	case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
3374		ew = kmalloc(sizeof *ew, GFP_ATOMIC);
3375		if (!ew)
3376			break;
3377
3378		INIT_WORK(&ew->work, handle_port_mgmt_change_event);
3379		memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
3380		ew->ib_dev = ibdev;
3381		/* need to queue only for port owner, which uses GEN_EQE */
3382		if (mlx4_is_master(dev))
3383			queue_work(wq, &ew->work);
3384		else
3385			handle_port_mgmt_change_event(&ew->work);
3386		return;
3387
3388	case MLX4_DEV_EVENT_SLAVE_INIT:
3389		/* here, p is the slave id */
3390		do_slave_init(ibdev, p, 1);
3391		if (mlx4_is_master(dev)) {
3392			int i;
3393
3394			for (i = 1; i <= ibdev->num_ports; i++) {
3395				if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
3396					== IB_LINK_LAYER_INFINIBAND)
3397					mlx4_ib_slave_alias_guid_event(ibdev,
3398								       p, i,
3399								       1);
3400			}
3401		}
3402		return;
3403
3404	case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
3405		if (mlx4_is_master(dev)) {
3406			int i;
3407
3408			for (i = 1; i <= ibdev->num_ports; i++) {
3409				if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
3410					== IB_LINK_LAYER_INFINIBAND)
3411					mlx4_ib_slave_alias_guid_event(ibdev,
3412								       p, i,
3413								       0);
3414			}
3415		}
3416		/* here, p is the slave id */
3417		do_slave_init(ibdev, p, 0);
3418		return;
3419
3420	default:
3421		return;
3422	}
3423
3424	ibev.device	      = ibdev_ptr;
3425	ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
3426
3427	ib_dispatch_event(&ibev);
 
3428}
3429
3430static struct mlx4_interface mlx4_ib_interface = {
3431	.add		= mlx4_ib_add,
3432	.remove		= mlx4_ib_remove,
3433	.event		= mlx4_ib_event,
 
 
 
 
 
 
 
 
 
 
3434	.protocol	= MLX4_PROT_IB_IPV6,
3435	.flags		= MLX4_INTFF_BONDING
3436};
3437
3438static int __init mlx4_ib_init(void)
3439{
3440	int err;
3441
3442	wq = alloc_ordered_workqueue("mlx4_ib", WQ_MEM_RECLAIM);
3443	if (!wq)
3444		return -ENOMEM;
3445
 
 
 
 
 
 
 
 
3446	err = mlx4_ib_mcg_init();
3447	if (err)
3448		goto clean_wq;
3449
3450	err = mlx4_register_interface(&mlx4_ib_interface);
3451	if (err)
3452		goto clean_mcg;
3453
3454	return 0;
3455
3456clean_mcg:
3457	mlx4_ib_mcg_destroy();
3458
 
 
 
3459clean_wq:
 
 
 
3460	destroy_workqueue(wq);
3461	return err;
3462}
3463
3464static void __exit mlx4_ib_cleanup(void)
3465{
3466	mlx4_unregister_interface(&mlx4_ib_interface);
3467	mlx4_ib_mcg_destroy();
 
 
3468	destroy_workqueue(wq);
3469}
3470
3471module_init(mlx4_ib_init);
3472module_exit(mlx4_ib_cleanup);