Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
   3 *
   4 * Redistribution and use in source and binary forms, with or without
   5 * modification, are permitted provided that the following conditions are met:
   6 *
   7 * 1. Redistributions of source code must retain the above copyright
   8 *    notice, this list of conditions and the following disclaimer.
   9 * 2. Redistributions in binary form must reproduce the above copyright
  10 *    notice, this list of conditions and the following disclaimer in the
  11 *    documentation and/or other materials provided with the distribution.
  12 * 3. Neither the names of the copyright holders nor the names of its
  13 *    contributors may be used to endorse or promote products derived from
  14 *    this software without specific prior written permission.
  15 *
  16 * Alternatively, this software may be distributed under the terms of the
  17 * GNU General Public License ("GPL") version 2 as published by the Free
  18 * Software Foundation.
  19 *
  20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30 * POSSIBILITY OF SUCH DAMAGE.
  31 */
  32
  33#include <linux/module.h>
  34#include <linux/pid.h>
  35#include <linux/pid_namespace.h>
  36#include <net/netlink.h>
  37#include <rdma/rdma_cm.h>
  38#include <rdma/rdma_netlink.h>
  39
  40#include "core_priv.h"
  41#include "cma_priv.h"
  42
  43static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
  44	[RDMA_NLDEV_ATTR_DEV_INDEX]     = { .type = NLA_U32 },
  45	[RDMA_NLDEV_ATTR_DEV_NAME]	= { .type = NLA_NUL_STRING,
  46					    .len = IB_DEVICE_NAME_MAX - 1},
  47	[RDMA_NLDEV_ATTR_PORT_INDEX]	= { .type = NLA_U32 },
  48	[RDMA_NLDEV_ATTR_FW_VERSION]	= { .type = NLA_NUL_STRING,
  49					    .len = IB_FW_VERSION_NAME_MAX - 1},
  50	[RDMA_NLDEV_ATTR_NODE_GUID]	= { .type = NLA_U64 },
  51	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
  52	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]	= { .type = NLA_U64 },
  53	[RDMA_NLDEV_ATTR_LID]		= { .type = NLA_U32 },
  54	[RDMA_NLDEV_ATTR_SM_LID]	= { .type = NLA_U32 },
  55	[RDMA_NLDEV_ATTR_LMC]		= { .type = NLA_U8 },
  56	[RDMA_NLDEV_ATTR_PORT_STATE]	= { .type = NLA_U8 },
  57	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
  58	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
  59	[RDMA_NLDEV_ATTR_RES_SUMMARY]	= { .type = NLA_NESTED },
  60	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
  61	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
  62					     .len = 16 },
  63	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
  64	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
  65	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
  66	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
  67	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
  68	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
  69	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
  70	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
  71	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
  72	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
  73	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
  74	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
  75						    .len = TASK_COMM_LEN },
  76	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
  77	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
  78	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
  79	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]	= {
  80			.len = sizeof(struct __kernel_sockaddr_storage) },
  81	[RDMA_NLDEV_ATTR_RES_DST_ADDR]	= {
  82			.len = sizeof(struct __kernel_sockaddr_storage) },
  83	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
  84	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
  85	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
  86	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
  87	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
  88	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
  89	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
  90	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
  91	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
  92	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
  93	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
  94	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
  95	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
  96	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
  97	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 },
  98	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
  99	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
 100						    .len = IFNAMSIZ },
 101};
 102
 103static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
 104{
 105	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
 106		return -EMSGSIZE;
 107	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
 108		return -EMSGSIZE;
 109
 110	return 0;
 111}
 112
 113static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
 114{
 115	char fw[IB_FW_VERSION_NAME_MAX];
 116
 117	if (fill_nldev_handle(msg, device))
 118		return -EMSGSIZE;
 119
 120	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
 121		return -EMSGSIZE;
 122
 123	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
 124	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
 125			      device->attrs.device_cap_flags, 0))
 126		return -EMSGSIZE;
 127
 128	ib_get_device_fw_str(device, fw);
 129	/* Device without FW has strlen(fw) = 0 */
 130	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
 131		return -EMSGSIZE;
 132
 133	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
 134			      be64_to_cpu(device->node_guid), 0))
 135		return -EMSGSIZE;
 136	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
 137			      be64_to_cpu(device->attrs.sys_image_guid), 0))
 138		return -EMSGSIZE;
 139	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
 140		return -EMSGSIZE;
 141	return 0;
 142}
 143
 144static int fill_port_info(struct sk_buff *msg,
 145			  struct ib_device *device, u32 port,
 146			  const struct net *net)
 147{
 148	struct net_device *netdev = NULL;
 149	struct ib_port_attr attr;
 150	int ret;
 151
 152	if (fill_nldev_handle(msg, device))
 153		return -EMSGSIZE;
 154
 155	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
 156		return -EMSGSIZE;
 157
 158	ret = ib_query_port(device, port, &attr);
 159	if (ret)
 160		return ret;
 161
 162	BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
 163	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
 164			      (u64)attr.port_cap_flags, 0))
 165		return -EMSGSIZE;
 166	if (rdma_protocol_ib(device, port) &&
 167	    nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
 168			      attr.subnet_prefix, 0))
 169		return -EMSGSIZE;
 170	if (rdma_protocol_ib(device, port)) {
 171		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
 172			return -EMSGSIZE;
 173		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
 174			return -EMSGSIZE;
 175		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
 176			return -EMSGSIZE;
 177	}
 178	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
 179		return -EMSGSIZE;
 180	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
 181		return -EMSGSIZE;
 182
 183	if (device->get_netdev)
 184		netdev = device->get_netdev(device, port);
 185
 186	if (netdev && net_eq(dev_net(netdev), net)) {
 187		ret = nla_put_u32(msg,
 188				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
 189		if (ret)
 190			goto out;
 191		ret = nla_put_string(msg,
 192				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
 193	}
 194
 195out:
 196	if (netdev)
 197		dev_put(netdev);
 198	return ret;
 199}
 200
 201static int fill_res_info_entry(struct sk_buff *msg,
 202			       const char *name, u64 curr)
 203{
 204	struct nlattr *entry_attr;
 205
 206	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
 207	if (!entry_attr)
 208		return -EMSGSIZE;
 209
 210	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
 211		goto err;
 212	if (nla_put_u64_64bit(msg,
 213			      RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0))
 214		goto err;
 215
 216	nla_nest_end(msg, entry_attr);
 217	return 0;
 218
 219err:
 220	nla_nest_cancel(msg, entry_attr);
 221	return -EMSGSIZE;
 222}
 223
 224static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
 225{
 226	static const char * const names[RDMA_RESTRACK_MAX] = {
 227		[RDMA_RESTRACK_PD] = "pd",
 228		[RDMA_RESTRACK_CQ] = "cq",
 229		[RDMA_RESTRACK_QP] = "qp",
 230		[RDMA_RESTRACK_CM_ID] = "cm_id",
 231		[RDMA_RESTRACK_MR] = "mr",
 232	};
 233
 234	struct rdma_restrack_root *res = &device->res;
 235	struct nlattr *table_attr;
 236	int ret, i, curr;
 237
 238	if (fill_nldev_handle(msg, device))
 239		return -EMSGSIZE;
 240
 241	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
 242	if (!table_attr)
 243		return -EMSGSIZE;
 244
 245	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
 246		if (!names[i])
 247			continue;
 248		curr = rdma_restrack_count(res, i, task_active_pid_ns(current));
 249		ret = fill_res_info_entry(msg, names[i], curr);
 250		if (ret)
 251			goto err;
 252	}
 253
 254	nla_nest_end(msg, table_attr);
 255	return 0;
 256
 257err:
 258	nla_nest_cancel(msg, table_attr);
 259	return ret;
 260}
 261
 262static int fill_res_name_pid(struct sk_buff *msg,
 263			     struct rdma_restrack_entry *res)
 264{
 265	/*
 266	 * For user resources, user is should read /proc/PID/comm to get the
 267	 * name of the task file.
 268	 */
 269	if (rdma_is_kernel_res(res)) {
 270		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
 271		    res->kern_name))
 272			return -EMSGSIZE;
 273	} else {
 274		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
 275		    task_pid_vnr(res->task)))
 276			return -EMSGSIZE;
 277	}
 278	return 0;
 279}
 280
 281static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
 282			     struct rdma_restrack_entry *res, uint32_t port)
 283{
 284	struct ib_qp *qp = container_of(res, struct ib_qp, res);
 285	struct ib_qp_init_attr qp_init_attr;
 286	struct nlattr *entry_attr;
 287	struct ib_qp_attr qp_attr;
 288	int ret;
 289
 290	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
 291	if (ret)
 292		return ret;
 293
 294	if (port && port != qp_attr.port_num)
 295		return 0;
 296
 297	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
 298	if (!entry_attr)
 299		goto out;
 300
 301	/* In create_qp() port is not set yet */
 302	if (qp_attr.port_num &&
 303	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
 304		goto err;
 305
 306	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
 307		goto err;
 308	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
 309		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
 310				qp_attr.dest_qp_num))
 311			goto err;
 312		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
 313				qp_attr.rq_psn))
 314			goto err;
 315	}
 316
 317	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
 318		goto err;
 319
 320	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
 321	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
 322		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
 323			       qp_attr.path_mig_state))
 324			goto err;
 325	}
 326	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
 327		goto err;
 328	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
 329		goto err;
 330
 331	if (fill_res_name_pid(msg, res))
 332		goto err;
 333
 334	nla_nest_end(msg, entry_attr);
 335	return 0;
 336
 337err:
 338	nla_nest_cancel(msg, entry_attr);
 339out:
 340	return -EMSGSIZE;
 341}
 342
 343static int fill_res_cm_id_entry(struct sk_buff *msg,
 344				struct netlink_callback *cb,
 345				struct rdma_restrack_entry *res, uint32_t port)
 346{
 347	struct rdma_id_private *id_priv =
 348				container_of(res, struct rdma_id_private, res);
 349	struct rdma_cm_id *cm_id = &id_priv->id;
 350	struct nlattr *entry_attr;
 351
 352	if (port && port != cm_id->port_num)
 353		return 0;
 354
 355	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
 356	if (!entry_attr)
 357		goto out;
 358
 359	if (cm_id->port_num &&
 360	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
 361		goto err;
 362
 363	if (id_priv->qp_num) {
 364		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
 365			goto err;
 366		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
 367			goto err;
 368	}
 369
 370	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
 371		goto err;
 372
 373	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
 374		goto err;
 375
 376	if (cm_id->route.addr.src_addr.ss_family &&
 377	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
 378		    sizeof(cm_id->route.addr.src_addr),
 379		    &cm_id->route.addr.src_addr))
 380		goto err;
 381	if (cm_id->route.addr.dst_addr.ss_family &&
 382	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
 383		    sizeof(cm_id->route.addr.dst_addr),
 384		    &cm_id->route.addr.dst_addr))
 385		goto err;
 386
 387	if (fill_res_name_pid(msg, res))
 388		goto err;
 389
 390	nla_nest_end(msg, entry_attr);
 391	return 0;
 392
 393err:
 394	nla_nest_cancel(msg, entry_attr);
 395out:
 396	return -EMSGSIZE;
 397}
 398
 399static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
 400			     struct rdma_restrack_entry *res, uint32_t port)
 401{
 402	struct ib_cq *cq = container_of(res, struct ib_cq, res);
 403	struct nlattr *entry_attr;
 404
 405	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY);
 406	if (!entry_attr)
 407		goto out;
 408
 409	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
 410		goto err;
 411	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
 412			      atomic_read(&cq->usecnt), 0))
 413		goto err;
 414
 415	/* Poll context is only valid for kernel CQs */
 416	if (rdma_is_kernel_res(res) &&
 417	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
 418		goto err;
 419
 420	if (fill_res_name_pid(msg, res))
 421		goto err;
 422
 423	nla_nest_end(msg, entry_attr);
 424	return 0;
 425
 426err:
 427	nla_nest_cancel(msg, entry_attr);
 428out:
 429	return -EMSGSIZE;
 430}
 431
 432static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
 433			     struct rdma_restrack_entry *res, uint32_t port)
 434{
 435	struct ib_mr *mr = container_of(res, struct ib_mr, res);
 436	struct nlattr *entry_attr;
 437
 438	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
 439	if (!entry_attr)
 440		goto out;
 441
 442	if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
 443		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
 444			goto err;
 445		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
 446			goto err;
 447		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
 448				      mr->iova, 0))
 449			goto err;
 450	}
 451
 452	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
 453		goto err;
 454
 455	if (fill_res_name_pid(msg, res))
 456		goto err;
 457
 458	nla_nest_end(msg, entry_attr);
 459	return 0;
 460
 461err:
 462	nla_nest_cancel(msg, entry_attr);
 463out:
 464	return -EMSGSIZE;
 465}
 466
 467static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
 468			     struct rdma_restrack_entry *res, uint32_t port)
 469{
 470	struct ib_pd *pd = container_of(res, struct ib_pd, res);
 471	struct nlattr *entry_attr;
 472
 473	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY);
 474	if (!entry_attr)
 475		goto out;
 476
 477	if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
 478		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
 479				pd->local_dma_lkey))
 480			goto err;
 481		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
 482		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
 483				pd->unsafe_global_rkey))
 484			goto err;
 485	}
 486	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
 487			      atomic_read(&pd->usecnt), 0))
 488		goto err;
 489	if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
 490	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
 491			pd->unsafe_global_rkey))
 492		goto err;
 493
 494	if (fill_res_name_pid(msg, res))
 495		goto err;
 496
 497	nla_nest_end(msg, entry_attr);
 498	return 0;
 499
 500err:
 501	nla_nest_cancel(msg, entry_attr);
 502out:
 503	return -EMSGSIZE;
 504}
 505
 506static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 507			  struct netlink_ext_ack *extack)
 508{
 509	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 510	struct ib_device *device;
 511	struct sk_buff *msg;
 512	u32 index;
 513	int err;
 514
 515	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
 516			  nldev_policy, extack);
 517	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
 518		return -EINVAL;
 519
 520	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
 521
 522	device = ib_device_get_by_index(index);
 523	if (!device)
 524		return -EINVAL;
 525
 526	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 527	if (!msg) {
 528		err = -ENOMEM;
 529		goto err;
 530	}
 531
 532	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 533			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
 534			0, 0);
 535
 536	err = fill_dev_info(msg, device);
 537	if (err)
 538		goto err_free;
 539
 540	nlmsg_end(msg, nlh);
 541
 542	put_device(&device->dev);
 543	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
 544
 545err_free:
 546	nlmsg_free(msg);
 547err:
 548	put_device(&device->dev);
 549	return err;
 550}
 551
 552static int _nldev_get_dumpit(struct ib_device *device,
 553			     struct sk_buff *skb,
 554			     struct netlink_callback *cb,
 555			     unsigned int idx)
 556{
 557	int start = cb->args[0];
 558	struct nlmsghdr *nlh;
 559
 560	if (idx < start)
 561		return 0;
 562
 563	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 564			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
 565			0, NLM_F_MULTI);
 566
 567	if (fill_dev_info(skb, device)) {
 568		nlmsg_cancel(skb, nlh);
 569		goto out;
 570	}
 571
 572	nlmsg_end(skb, nlh);
 573
 574	idx++;
 575
 576out:	cb->args[0] = idx;
 577	return skb->len;
 578}
 579
 580static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 581{
 582	/*
 583	 * There is no need to take lock, because
 584	 * we are relying on ib_core's lists_rwsem
 585	 */
 586	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
 587}
 588
 589static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 590			       struct netlink_ext_ack *extack)
 591{
 592	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 593	struct ib_device *device;
 594	struct sk_buff *msg;
 595	u32 index;
 596	u32 port;
 597	int err;
 598
 599	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
 600			  nldev_policy, extack);
 601	if (err ||
 602	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
 603	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
 604		return -EINVAL;
 605
 606	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
 607	device = ib_device_get_by_index(index);
 608	if (!device)
 609		return -EINVAL;
 610
 611	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
 612	if (!rdma_is_port_valid(device, port)) {
 613		err = -EINVAL;
 614		goto err;
 615	}
 616
 617	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 618	if (!msg) {
 619		err = -ENOMEM;
 620		goto err;
 621	}
 622
 623	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 624			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
 625			0, 0);
 626
 627	err = fill_port_info(msg, device, port, sock_net(skb->sk));
 628	if (err)
 629		goto err_free;
 630
 631	nlmsg_end(msg, nlh);
 632	put_device(&device->dev);
 633
 634	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
 635
 636err_free:
 637	nlmsg_free(msg);
 638err:
 639	put_device(&device->dev);
 640	return err;
 641}
 642
 643static int nldev_port_get_dumpit(struct sk_buff *skb,
 644				 struct netlink_callback *cb)
 645{
 646	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 647	struct ib_device *device;
 648	int start = cb->args[0];
 649	struct nlmsghdr *nlh;
 650	u32 idx = 0;
 651	u32 ifindex;
 652	int err;
 653	u32 p;
 654
 655	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
 656			  nldev_policy, NULL);
 657	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
 658		return -EINVAL;
 659
 660	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
 661	device = ib_device_get_by_index(ifindex);
 662	if (!device)
 663		return -EINVAL;
 664
 665	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
 666		/*
 667		 * The dumpit function returns all information from specific
 668		 * index. This specific index is taken from the netlink
 669		 * messages request sent by user and it is available
 670		 * in cb->args[0].
 671		 *
 672		 * Usually, the user doesn't fill this field and it causes
 673		 * to return everything.
 674		 *
 675		 */
 676		if (idx < start) {
 677			idx++;
 678			continue;
 679		}
 680
 681		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
 682				cb->nlh->nlmsg_seq,
 683				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
 684						 RDMA_NLDEV_CMD_PORT_GET),
 685				0, NLM_F_MULTI);
 686
 687		if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
 688			nlmsg_cancel(skb, nlh);
 689			goto out;
 690		}
 691		idx++;
 692		nlmsg_end(skb, nlh);
 693	}
 694
 695out:
 696	put_device(&device->dev);
 697	cb->args[0] = idx;
 698	return skb->len;
 699}
 700
 701static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 702			      struct netlink_ext_ack *extack)
 703{
 704	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 705	struct ib_device *device;
 706	struct sk_buff *msg;
 707	u32 index;
 708	int ret;
 709
 710	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
 711			  nldev_policy, extack);
 712	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
 713		return -EINVAL;
 714
 715	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
 716	device = ib_device_get_by_index(index);
 717	if (!device)
 718		return -EINVAL;
 719
 720	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 721	if (!msg) {
 722		ret = -ENOMEM;
 723		goto err;
 724	}
 725
 726	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 727			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
 728			0, 0);
 729
 730	ret = fill_res_info(msg, device);
 731	if (ret)
 732		goto err_free;
 733
 734	nlmsg_end(msg, nlh);
 735	put_device(&device->dev);
 736	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
 737
 738err_free:
 739	nlmsg_free(msg);
 740err:
 741	put_device(&device->dev);
 742	return ret;
 743}
 744
 745static int _nldev_res_get_dumpit(struct ib_device *device,
 746				 struct sk_buff *skb,
 747				 struct netlink_callback *cb,
 748				 unsigned int idx)
 749{
 750	int start = cb->args[0];
 751	struct nlmsghdr *nlh;
 752
 753	if (idx < start)
 754		return 0;
 755
 756	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 757			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
 758			0, NLM_F_MULTI);
 759
 760	if (fill_res_info(skb, device)) {
 761		nlmsg_cancel(skb, nlh);
 762		goto out;
 763	}
 764
 765	nlmsg_end(skb, nlh);
 766
 767	idx++;
 768
 769out:
 770	cb->args[0] = idx;
 771	return skb->len;
 772}
 773
 774static int nldev_res_get_dumpit(struct sk_buff *skb,
 775				struct netlink_callback *cb)
 776{
 777	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
 778}
 779
 780struct nldev_fill_res_entry {
 781	int (*fill_res_func)(struct sk_buff *msg, struct netlink_callback *cb,
 782			     struct rdma_restrack_entry *res, u32 port);
 783	enum rdma_nldev_attr nldev_attr;
 784	enum rdma_nldev_command nldev_cmd;
 785};
 786
 787static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
 788	[RDMA_RESTRACK_QP] = {
 789		.fill_res_func = fill_res_qp_entry,
 790		.nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
 791		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
 792	},
 793	[RDMA_RESTRACK_CM_ID] = {
 794		.fill_res_func = fill_res_cm_id_entry,
 795		.nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
 796		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
 797	},
 798	[RDMA_RESTRACK_CQ] = {
 799		.fill_res_func = fill_res_cq_entry,
 800		.nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
 801		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
 802	},
 803	[RDMA_RESTRACK_MR] = {
 804		.fill_res_func = fill_res_mr_entry,
 805		.nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
 806		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
 807	},
 808	[RDMA_RESTRACK_PD] = {
 809		.fill_res_func = fill_res_pd_entry,
 810		.nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
 811		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
 812	},
 813};
 814
 815static int res_get_common_dumpit(struct sk_buff *skb,
 816				 struct netlink_callback *cb,
 817				 enum rdma_restrack_type res_type)
 818{
 819	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
 820	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 821	struct rdma_restrack_entry *res;
 822	int err, ret = 0, idx = 0;
 823	struct nlattr *table_attr;
 824	struct ib_device *device;
 825	int start = cb->args[0];
 826	struct nlmsghdr *nlh;
 827	u32 index, port = 0;
 828	bool filled = false;
 829
 830	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
 831			  nldev_policy, NULL);
 832	/*
 833	 * Right now, we are expecting the device index to get res information,
 834	 * but it is possible to extend this code to return all devices in
 835	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
 836	 * if it doesn't exist, we will iterate over all devices.
 837	 *
 838	 * But it is not needed for now.
 839	 */
 840	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
 841		return -EINVAL;
 842
 843	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
 844	device = ib_device_get_by_index(index);
 845	if (!device)
 846		return -EINVAL;
 847
 848	/*
 849	 * If no PORT_INDEX is supplied, we will return all QPs from that device
 850	 */
 851	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
 852		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
 853		if (!rdma_is_port_valid(device, port)) {
 854			ret = -EINVAL;
 855			goto err_index;
 856		}
 857	}
 858
 859	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 860			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
 861			0, NLM_F_MULTI);
 862
 863	if (fill_nldev_handle(skb, device)) {
 864		ret = -EMSGSIZE;
 865		goto err;
 866	}
 867
 868	table_attr = nla_nest_start(skb, fe->nldev_attr);
 869	if (!table_attr) {
 870		ret = -EMSGSIZE;
 871		goto err;
 872	}
 873
 874	down_read(&device->res.rwsem);
 875	hash_for_each_possible(device->res.hash, res, node, res_type) {
 876		if (idx < start)
 877			goto next;
 878
 879		if ((rdma_is_kernel_res(res) &&
 880		     task_active_pid_ns(current) != &init_pid_ns) ||
 881		    (!rdma_is_kernel_res(res) && task_active_pid_ns(current) !=
 882		     task_active_pid_ns(res->task)))
 883			/*
 884			 * 1. Kern resources should be visible in init
 885			 *    namspace only
 886			 * 2. Present only resources visible in the current
 887			 *    namespace
 888			 */
 889			goto next;
 890
 891		if (!rdma_restrack_get(res))
 892			/*
 893			 * Resource is under release now, but we are not
 894			 * relesing lock now, so it will be released in
 895			 * our next pass, once we will get ->next pointer.
 896			 */
 897			goto next;
 898
 899		filled = true;
 900
 901		up_read(&device->res.rwsem);
 902		ret = fe->fill_res_func(skb, cb, res, port);
 903		down_read(&device->res.rwsem);
 904		/*
 905		 * Return resource back, but it won't be released till
 906		 * the &device->res.rwsem will be released for write.
 907		 */
 908		rdma_restrack_put(res);
 909
 910		if (ret == -EMSGSIZE)
 911			/*
 912			 * There is a chance to optimize here.
 913			 * It can be done by using list_prepare_entry
 914			 * and list_for_each_entry_continue afterwards.
 915			 */
 916			break;
 917		if (ret)
 918			goto res_err;
 919next:		idx++;
 920	}
 921	up_read(&device->res.rwsem);
 922
 923	nla_nest_end(skb, table_attr);
 924	nlmsg_end(skb, nlh);
 925	cb->args[0] = idx;
 926
 927	/*
 928	 * No more entries to fill, cancel the message and
 929	 * return 0 to mark end of dumpit.
 930	 */
 931	if (!filled)
 932		goto err;
 933
 934	put_device(&device->dev);
 935	return skb->len;
 936
 937res_err:
 938	nla_nest_cancel(skb, table_attr);
 939	up_read(&device->res.rwsem);
 940
 941err:
 942	nlmsg_cancel(skb, nlh);
 943
 944err_index:
 945	put_device(&device->dev);
 946	return ret;
 947}
 948
 949static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
 950				   struct netlink_callback *cb)
 951{
 952	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_QP);
 953}
 954
 955static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
 956				      struct netlink_callback *cb)
 957{
 958	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CM_ID);
 959}
 960
 961static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
 962				   struct netlink_callback *cb)
 963{
 964	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CQ);
 965}
 966
 967static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
 968				   struct netlink_callback *cb)
 969{
 970	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR);
 971}
 972
 973static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
 974				   struct netlink_callback *cb)
 975{
 976	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
 977}
 978
 979static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 980	[RDMA_NLDEV_CMD_GET] = {
 981		.doit = nldev_get_doit,
 982		.dump = nldev_get_dumpit,
 983	},
 984	[RDMA_NLDEV_CMD_PORT_GET] = {
 985		.doit = nldev_port_get_doit,
 986		.dump = nldev_port_get_dumpit,
 987	},
 988	[RDMA_NLDEV_CMD_RES_GET] = {
 989		.doit = nldev_res_get_doit,
 990		.dump = nldev_res_get_dumpit,
 991	},
 992	[RDMA_NLDEV_CMD_RES_QP_GET] = {
 993		.dump = nldev_res_get_qp_dumpit,
 994		/*
 995		 * .doit is not implemented yet for two reasons:
 996		 * 1. It is not needed yet.
 997		 * 2. There is a need to provide identifier, while it is easy
 998		 * for the QPs (device index + port index + LQPN), it is not
 999		 * the case for the rest of resources (PD and CQ). Because it
1000		 * is better to provide similar interface for all resources,
1001		 * let's wait till we will have other resources implemented
1002		 * too.
1003		 */
1004	},
1005	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
1006		.dump = nldev_res_get_cm_id_dumpit,
1007	},
1008	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
1009		.dump = nldev_res_get_cq_dumpit,
1010	},
1011	[RDMA_NLDEV_CMD_RES_MR_GET] = {
1012		.dump = nldev_res_get_mr_dumpit,
1013	},
1014	[RDMA_NLDEV_CMD_RES_PD_GET] = {
1015		.dump = nldev_res_get_pd_dumpit,
1016	},
1017};
1018
1019void __init nldev_init(void)
1020{
1021	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
1022}
1023
1024void __exit nldev_exit(void)
1025{
1026	rdma_nl_unregister(RDMA_NL_NLDEV);
1027}
1028
1029MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);