Linux Audio

Check our new training course

Linux debugging, profiling, tracing and performance analysis training

Mar 24-27, 2025, special US time zones
Register
Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
   4 */
   5
   6#include "mlx5_ib.h"
   7#include <linux/mlx5/eswitch.h>
   8#include <linux/mlx5/vport.h>
   9#include "counters.h"
  10#include "ib_rep.h"
  11#include "qp.h"
  12
  13struct mlx5_ib_counter {
  14	const char *name;
  15	size_t offset;
  16	u32 type;
  17};
  18
  19#define INIT_Q_COUNTER(_name)		\
  20	{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
  21
  22#define INIT_VPORT_Q_COUNTER(_name)		\
  23	{ .name = "vport_" #_name, .offset =	\
  24		MLX5_BYTE_OFF(query_q_counter_out, _name)}
  25
  26static const struct mlx5_ib_counter basic_q_cnts[] = {
  27	INIT_Q_COUNTER(rx_write_requests),
  28	INIT_Q_COUNTER(rx_read_requests),
  29	INIT_Q_COUNTER(rx_atomic_requests),
  30	INIT_Q_COUNTER(rx_dct_connect),
  31	INIT_Q_COUNTER(out_of_buffer),
  32};
  33
  34static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
  35	INIT_Q_COUNTER(out_of_sequence),
  36};
  37
  38static const struct mlx5_ib_counter retrans_q_cnts[] = {
  39	INIT_Q_COUNTER(duplicate_request),
  40	INIT_Q_COUNTER(rnr_nak_retry_err),
  41	INIT_Q_COUNTER(packet_seq_err),
  42	INIT_Q_COUNTER(implied_nak_seq_err),
  43	INIT_Q_COUNTER(local_ack_timeout_err),
  44};
  45
  46static const struct mlx5_ib_counter vport_basic_q_cnts[] = {
  47	INIT_VPORT_Q_COUNTER(rx_write_requests),
  48	INIT_VPORT_Q_COUNTER(rx_read_requests),
  49	INIT_VPORT_Q_COUNTER(rx_atomic_requests),
  50	INIT_VPORT_Q_COUNTER(rx_dct_connect),
  51	INIT_VPORT_Q_COUNTER(out_of_buffer),
  52};
  53
  54static const struct mlx5_ib_counter vport_out_of_seq_q_cnts[] = {
  55	INIT_VPORT_Q_COUNTER(out_of_sequence),
  56};
  57
  58static const struct mlx5_ib_counter vport_retrans_q_cnts[] = {
  59	INIT_VPORT_Q_COUNTER(duplicate_request),
  60	INIT_VPORT_Q_COUNTER(rnr_nak_retry_err),
  61	INIT_VPORT_Q_COUNTER(packet_seq_err),
  62	INIT_VPORT_Q_COUNTER(implied_nak_seq_err),
  63	INIT_VPORT_Q_COUNTER(local_ack_timeout_err),
  64};
  65
  66#define INIT_CONG_COUNTER(_name)		\
  67	{ .name = #_name, .offset =	\
  68		MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
  69
  70static const struct mlx5_ib_counter cong_cnts[] = {
  71	INIT_CONG_COUNTER(rp_cnp_ignored),
  72	INIT_CONG_COUNTER(rp_cnp_handled),
  73	INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
  74	INIT_CONG_COUNTER(np_cnp_sent),
  75};
  76
  77static const struct mlx5_ib_counter extended_err_cnts[] = {
  78	INIT_Q_COUNTER(resp_local_length_error),
  79	INIT_Q_COUNTER(resp_cqe_error),
  80	INIT_Q_COUNTER(req_cqe_error),
  81	INIT_Q_COUNTER(req_remote_invalid_request),
  82	INIT_Q_COUNTER(req_remote_access_errors),
  83	INIT_Q_COUNTER(resp_remote_access_errors),
  84	INIT_Q_COUNTER(resp_cqe_flush_error),
  85	INIT_Q_COUNTER(req_cqe_flush_error),
  86	INIT_Q_COUNTER(req_transport_retries_exceeded),
  87	INIT_Q_COUNTER(req_rnr_retries_exceeded),
  88};
  89
  90static const struct mlx5_ib_counter roce_accl_cnts[] = {
  91	INIT_Q_COUNTER(roce_adp_retrans),
  92	INIT_Q_COUNTER(roce_adp_retrans_to),
  93	INIT_Q_COUNTER(roce_slow_restart),
  94	INIT_Q_COUNTER(roce_slow_restart_cnps),
  95	INIT_Q_COUNTER(roce_slow_restart_trans),
  96};
  97
  98static const struct mlx5_ib_counter vport_extended_err_cnts[] = {
  99	INIT_VPORT_Q_COUNTER(resp_local_length_error),
 100	INIT_VPORT_Q_COUNTER(resp_cqe_error),
 101	INIT_VPORT_Q_COUNTER(req_cqe_error),
 102	INIT_VPORT_Q_COUNTER(req_remote_invalid_request),
 103	INIT_VPORT_Q_COUNTER(req_remote_access_errors),
 104	INIT_VPORT_Q_COUNTER(resp_remote_access_errors),
 105	INIT_VPORT_Q_COUNTER(resp_cqe_flush_error),
 106	INIT_VPORT_Q_COUNTER(req_cqe_flush_error),
 107	INIT_VPORT_Q_COUNTER(req_transport_retries_exceeded),
 108	INIT_VPORT_Q_COUNTER(req_rnr_retries_exceeded),
 109};
 110
 111static const struct mlx5_ib_counter vport_roce_accl_cnts[] = {
 112	INIT_VPORT_Q_COUNTER(roce_adp_retrans),
 113	INIT_VPORT_Q_COUNTER(roce_adp_retrans_to),
 114	INIT_VPORT_Q_COUNTER(roce_slow_restart),
 115	INIT_VPORT_Q_COUNTER(roce_slow_restart_cnps),
 116	INIT_VPORT_Q_COUNTER(roce_slow_restart_trans),
 117};
 118
 119#define INIT_EXT_PPCNT_COUNTER(_name)		\
 120	{ .name = #_name, .offset =	\
 121	MLX5_BYTE_OFF(ppcnt_reg, \
 122		      counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
 123
 124static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
 125	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
 126};
 127
 128#define INIT_OP_COUNTER(_name, _type)		\
 129	{ .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
 130
 131static const struct mlx5_ib_counter basic_op_cnts[] = {
 132	INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
 133};
 134
 135static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
 136	INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
 137};
 138
 139static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
 140	INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
 141};
 142
 143static int mlx5_ib_read_counters(struct ib_counters *counters,
 144				 struct ib_counters_read_attr *read_attr,
 145				 struct uverbs_attr_bundle *attrs)
 146{
 147	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
 148	struct mlx5_read_counters_attr mread_attr = {};
 149	struct mlx5_ib_flow_counters_desc *desc;
 150	int ret, i;
 151
 152	mutex_lock(&mcounters->mcntrs_mutex);
 153	if (mcounters->cntrs_max_index > read_attr->ncounters) {
 154		ret = -EINVAL;
 155		goto err_bound;
 156	}
 157
 158	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
 159				 GFP_KERNEL);
 160	if (!mread_attr.out) {
 161		ret = -ENOMEM;
 162		goto err_bound;
 163	}
 164
 165	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
 166	mread_attr.flags = read_attr->flags;
 167	ret = mcounters->read_counters(counters->device, &mread_attr);
 168	if (ret)
 169		goto err_read;
 170
 171	/* do the pass over the counters data array to assign according to the
 172	 * descriptions and indexing pairs
 173	 */
 174	desc = mcounters->counters_data;
 175	for (i = 0; i < mcounters->ncounters; i++)
 176		read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
 177
 178err_read:
 179	kfree(mread_attr.out);
 180err_bound:
 181	mutex_unlock(&mcounters->mcntrs_mutex);
 182	return ret;
 183}
 184
 185static int mlx5_ib_destroy_counters(struct ib_counters *counters)
 186{
 187	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
 188
 189	mlx5_ib_counters_clear_description(counters);
 190	if (mcounters->hw_cntrs_hndl)
 191		mlx5_fc_destroy(to_mdev(counters->device)->mdev,
 192				mcounters->hw_cntrs_hndl);
 193	return 0;
 194}
 195
 196static int mlx5_ib_create_counters(struct ib_counters *counters,
 197				   struct uverbs_attr_bundle *attrs)
 198{
 199	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
 200
 201	mutex_init(&mcounters->mcntrs_mutex);
 202	return 0;
 203}
 204
 205static bool vport_qcounters_supported(struct mlx5_ib_dev *dev)
 206{
 207	return MLX5_CAP_GEN(dev->mdev, q_counter_other_vport) &&
 208	       MLX5_CAP_GEN(dev->mdev, q_counter_aggregation);
 209}
 210
 211static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
 212						   u32 port_num)
 213{
 214	if ((is_mdev_switchdev_mode(dev->mdev) &&
 215	     !vport_qcounters_supported(dev)) || !port_num)
 216		return &dev->port[0].cnts;
 217
 218	return is_mdev_switchdev_mode(dev->mdev) ?
 219	       &dev->port[1].cnts : &dev->port[port_num - 1].cnts;
 220}
 221
 222/**
 223 * mlx5_ib_get_counters_id - Returns counters id to use for device+port
 224 * @dev:	Pointer to mlx5 IB device
 225 * @port_num:	Zero based port number
 226 *
 227 * mlx5_ib_get_counters_id() Returns counters set id to use for given
 228 * device port combination in switchdev and non switchdev mode of the
 229 * parent device.
 230 */
 231u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
 232{
 233	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num + 1);
 234
 235	return cnts->set_id;
 236}
 237
 238static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
 239{
 240	struct rdma_hw_stats *stats;
 241	u32 num_hw_counters;
 242	int i;
 243
 244	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
 245			  cnts->num_ext_ppcnt_counters;
 246	stats = rdma_alloc_hw_stats_struct(cnts->descs,
 247					   num_hw_counters +
 248					   cnts->num_op_counters,
 249					   RDMA_HW_STATS_DEFAULT_LIFESPAN);
 250	if (!stats)
 251		return NULL;
 252
 253	for (i = 0; i < cnts->num_op_counters; i++)
 254		set_bit(num_hw_counters + i, stats->is_disabled);
 255
 256	return stats;
 257}
 258
 259static struct rdma_hw_stats *
 260mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
 261{
 262	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 263	const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
 264
 265	return do_alloc_stats(cnts);
 266}
 267
 268static struct rdma_hw_stats *
 269mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
 270{
 271	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 272	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
 273
 274	return do_alloc_stats(cnts);
 275}
 276
 277static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
 278				    const struct mlx5_ib_counters *cnts,
 279				    struct rdma_hw_stats *stats,
 280				    u16 set_id)
 281{
 282	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
 283	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
 284	__be32 val;
 285	int ret, i;
 286
 287	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
 288	MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
 289	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
 290	if (ret)
 291		return ret;
 292
 293	for (i = 0; i < cnts->num_q_counters; i++) {
 294		val = *(__be32 *)((void *)out + cnts->offsets[i]);
 295		stats->value[i] = (u64)be32_to_cpu(val);
 296	}
 297
 298	return 0;
 299}
 300
 301static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
 302					    const struct mlx5_ib_counters *cnts,
 303					    struct rdma_hw_stats *stats)
 304{
 305	int offset = cnts->num_q_counters + cnts->num_cong_counters;
 306	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
 307	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
 308	int ret, i;
 309	void *out;
 310
 311	out = kvzalloc(sz, GFP_KERNEL);
 312	if (!out)
 313		return -ENOMEM;
 314
 315	MLX5_SET(ppcnt_reg, in, local_port, 1);
 316	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
 317	ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
 318				   0, 0);
 319	if (ret)
 320		goto free;
 321
 322	for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
 323		stats->value[i + offset] =
 324			be64_to_cpup((__be64 *)(out +
 325				    cnts->offsets[i + offset]));
 326free:
 327	kvfree(out);
 328	return ret;
 329}
 330
 331static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
 332					  u32 port_num,
 333					  const struct mlx5_ib_counters *cnts,
 334					  struct rdma_hw_stats *stats)
 335
 336{
 337	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
 338	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
 339	struct mlx5_core_dev *mdev;
 340	__be32 val;
 341	int ret, i;
 342
 343	if (!dev->port[port_num].rep ||
 344	    dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
 345		return 0;
 346
 347	mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
 348	if (!mdev)
 349		return -EOPNOTSUPP;
 350
 351	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
 352	MLX5_SET(query_q_counter_in, in, other_vport, 1);
 353	MLX5_SET(query_q_counter_in, in, vport_number,
 354		 dev->port[port_num].rep->vport);
 355	MLX5_SET(query_q_counter_in, in, aggregate, 1);
 356	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
 357	if (ret)
 358		return ret;
 359
 360	for (i = 0; i < cnts->num_q_counters; i++) {
 361		val = *(__be32 *)((void *)out + cnts->offsets[i]);
 362		stats->value[i] = (u64)be32_to_cpu(val);
 363	}
 364
 365	return 0;
 366}
 367
 368static int do_get_hw_stats(struct ib_device *ibdev,
 369			   struct rdma_hw_stats *stats,
 370			   u32 port_num, int index)
 371{
 372	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 373	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
 374	struct mlx5_core_dev *mdev;
 375	int ret, num_counters;
 376
 377	if (!stats)
 378		return -EINVAL;
 379
 380	num_counters = cnts->num_q_counters +
 381		       cnts->num_cong_counters +
 382		       cnts->num_ext_ppcnt_counters;
 383
 384	if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
 385		ret = mlx5_ib_query_q_counters_vport(dev, port_num - 1, cnts,
 386						     stats);
 387	else
 388		ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats,
 389					       cnts->set_id);
 390	if (ret)
 391		return ret;
 392
 393	/* We don't expose device counters over Vports */
 394	if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
 395		goto done;
 396
 397	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
 398		ret =  mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
 399		if (ret)
 400			return ret;
 401	}
 402
 403	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
 404		if (!port_num)
 405			port_num = 1;
 406		mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
 407		if (!mdev) {
 408			/* If port is not affiliated yet, its in down state
 409			 * which doesn't have any counters yet, so it would be
 410			 * zero. So no need to read from the HCA.
 411			 */
 412			goto done;
 413		}
 414		ret = mlx5_lag_query_cong_counters(dev->mdev,
 415						   stats->value +
 416						   cnts->num_q_counters,
 417						   cnts->num_cong_counters,
 418						   cnts->offsets +
 419						   cnts->num_q_counters);
 420
 421		mlx5_ib_put_native_port_mdev(dev, port_num);
 422		if (ret)
 423			return ret;
 424	}
 425
 426done:
 427	return num_counters;
 428}
 429
 430static int do_get_op_stat(struct ib_device *ibdev,
 431			  struct rdma_hw_stats *stats,
 432			  u32 port_num, int index)
 433{
 434	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 435	const struct mlx5_ib_counters *cnts;
 436	const struct mlx5_ib_op_fc *opfcs;
 437	u64 packets = 0, bytes;
 438	u32 type;
 439	int ret;
 440
 441	cnts = get_counters(dev, port_num);
 442
 443	opfcs = cnts->opfcs;
 444	type = *(u32 *)cnts->descs[index].priv;
 445	if (type >= MLX5_IB_OPCOUNTER_MAX)
 446		return -EINVAL;
 447
 448	if (!opfcs[type].fc)
 449		goto out;
 450
 451	ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
 452			    &packets, &bytes);
 453	if (ret)
 454		return ret;
 455
 456out:
 457	stats->value[index] = packets;
 458	return index;
 459}
 460
 461static int do_get_op_stats(struct ib_device *ibdev,
 462			   struct rdma_hw_stats *stats,
 463			   u32 port_num)
 464{
 465	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 466	const struct mlx5_ib_counters *cnts;
 467	int index, ret, num_hw_counters;
 468
 469	cnts = get_counters(dev, port_num);
 470	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
 471			  cnts->num_ext_ppcnt_counters;
 472	for (index = num_hw_counters;
 473	     index < (num_hw_counters + cnts->num_op_counters); index++) {
 474		ret = do_get_op_stat(ibdev, stats, port_num, index);
 475		if (ret != index)
 476			return ret;
 477	}
 478
 479	return cnts->num_op_counters;
 480}
 481
 482static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
 483				struct rdma_hw_stats *stats,
 484				u32 port_num, int index)
 485{
 486	int num_counters, num_hw_counters, num_op_counters;
 487	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 488	const struct mlx5_ib_counters *cnts;
 489
 490	cnts = get_counters(dev, port_num);
 491	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
 492		cnts->num_ext_ppcnt_counters;
 493	num_counters = num_hw_counters + cnts->num_op_counters;
 494
 495	if (index < 0 || index > num_counters)
 496		return -EINVAL;
 497	else if (index > 0 && index < num_hw_counters)
 498		return do_get_hw_stats(ibdev, stats, port_num, index);
 499	else if (index >= num_hw_counters && index < num_counters)
 500		return do_get_op_stat(ibdev, stats, port_num, index);
 501
 502	num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
 503	if (num_hw_counters < 0)
 504		return num_hw_counters;
 505
 506	num_op_counters = do_get_op_stats(ibdev, stats, port_num);
 507	if (num_op_counters < 0)
 508		return num_op_counters;
 509
 510	return num_hw_counters + num_op_counters;
 511}
 512
 513static struct rdma_hw_stats *
 514mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
 515{
 516	struct mlx5_ib_dev *dev = to_mdev(counter->device);
 517	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
 518
 519	return do_alloc_stats(cnts);
 520}
 521
 522static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
 523{
 524	struct mlx5_ib_dev *dev = to_mdev(counter->device);
 525	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
 526
 527	return mlx5_ib_query_q_counters(dev->mdev, cnts,
 528					counter->stats, counter->id);
 529}
 530
 531static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
 532{
 533	struct mlx5_ib_dev *dev = to_mdev(counter->device);
 534	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
 535
 536	if (!counter->id)
 537		return 0;
 538
 539	MLX5_SET(dealloc_q_counter_in, in, opcode,
 540		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
 541	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
 542	return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
 543}
 544
 545static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
 546				   struct ib_qp *qp)
 547{
 548	struct mlx5_ib_dev *dev = to_mdev(qp->device);
 549	bool new = false;
 550	int err;
 551
 552	if (!counter->id) {
 553		u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
 554		u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
 555
 556		MLX5_SET(alloc_q_counter_in, in, opcode,
 557			 MLX5_CMD_OP_ALLOC_Q_COUNTER);
 558		MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
 559		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
 560		if (err)
 561			return err;
 562		counter->id =
 563			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
 564		new = true;
 565	}
 566
 567	err = mlx5_ib_qp_set_counter(qp, counter);
 568	if (err)
 569		goto fail_set_counter;
 570
 571	return 0;
 572
 573fail_set_counter:
 574	if (new) {
 575		mlx5_ib_counter_dealloc(counter);
 576		counter->id = 0;
 577	}
 578
 579	return err;
 580}
 581
 582static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
 583{
 584	return mlx5_ib_qp_set_counter(qp, NULL);
 585}
 586
 587static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
 588				  struct rdma_stat_desc *descs, size_t *offsets,
 589				  u32 port_num)
 590{
 591	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
 592			port_num != MLX5_VPORT_PF;
 593	const struct mlx5_ib_counter *names;
 594	int j = 0, i, size;
 595
 596	names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
 597	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
 598			  ARRAY_SIZE(basic_q_cnts);
 599	for (i = 0; i < size; i++, j++) {
 600		descs[j].name = names[i].name;
 601		offsets[j] = names[i].offset;
 602	}
 603
 604	names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
 605	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
 606			  ARRAY_SIZE(out_of_seq_q_cnts);
 607	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
 608		for (i = 0; i < size; i++, j++) {
 609			descs[j].name = names[i].name;
 610			offsets[j] = names[i].offset;
 611		}
 612	}
 613
 614	names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
 615	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
 616			  ARRAY_SIZE(retrans_q_cnts);
 617	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
 618		for (i = 0; i < size; i++, j++) {
 619			descs[j].name = names[i].name;
 620			offsets[j] = names[i].offset;
 621		}
 622	}
 623
 624	names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
 625	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
 626			  ARRAY_SIZE(extended_err_cnts);
 627	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
 628		for (i = 0; i < size; i++, j++) {
 629			descs[j].name = names[i].name;
 630			offsets[j] = names[i].offset;
 631		}
 632	}
 633
 634	names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
 635	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
 636			  ARRAY_SIZE(roce_accl_cnts);
 637	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
 638		for (i = 0; i < size; i++, j++) {
 639			descs[j].name = names[i].name;
 640			offsets[j] = names[i].offset;
 641		}
 642	}
 643
 644	if (is_vport)
 645		return;
 646
 647	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
 648		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
 649			descs[j].name = cong_cnts[i].name;
 650			offsets[j] = cong_cnts[i].offset;
 651		}
 652	}
 653
 654	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
 655		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
 656			descs[j].name = ext_ppcnt_cnts[i].name;
 657			offsets[j] = ext_ppcnt_cnts[i].offset;
 658		}
 659	}
 660
 661	for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
 662		descs[j].name = basic_op_cnts[i].name;
 663		descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
 664		descs[j].priv = &basic_op_cnts[i].type;
 665	}
 666
 667	if (MLX5_CAP_FLOWTABLE(dev->mdev,
 668			       ft_field_support_2_nic_receive_rdma.bth_opcode)) {
 669		for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
 670			descs[j].name = rdmarx_cnp_op_cnts[i].name;
 671			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
 672			descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
 673		}
 674	}
 675
 676	if (MLX5_CAP_FLOWTABLE(dev->mdev,
 677			       ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
 678		for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
 679			descs[j].name = rdmatx_cnp_op_cnts[i].name;
 680			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
 681			descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
 682		}
 683	}
 684}
 685
 686
 687static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
 688				    struct mlx5_ib_counters *cnts, u32 port_num)
 689{
 690	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
 691			port_num != MLX5_VPORT_PF;
 692	u32 num_counters, num_op_counters = 0, size;
 693
 694	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
 695			  ARRAY_SIZE(basic_q_cnts);
 696	num_counters = size;
 697
 698	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
 699			  ARRAY_SIZE(out_of_seq_q_cnts);
 700	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
 701		num_counters += size;
 702
 703	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
 704			  ARRAY_SIZE(retrans_q_cnts);
 705	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
 706		num_counters += size;
 707
 708	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
 709			  ARRAY_SIZE(extended_err_cnts);
 710	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
 711		num_counters += size;
 712
 713	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
 714			  ARRAY_SIZE(roce_accl_cnts);
 715	if (MLX5_CAP_GEN(dev->mdev, roce_accl))
 716		num_counters += size;
 717
 718	cnts->num_q_counters = num_counters;
 719
 720	if (is_vport)
 721		goto skip_non_qcounters;
 722
 723	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
 724		cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
 725		num_counters += ARRAY_SIZE(cong_cnts);
 726	}
 727	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
 728		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
 729		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
 730	}
 731
 732	num_op_counters = ARRAY_SIZE(basic_op_cnts);
 733
 734	if (MLX5_CAP_FLOWTABLE(dev->mdev,
 735			       ft_field_support_2_nic_receive_rdma.bth_opcode))
 736		num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
 737
 738	if (MLX5_CAP_FLOWTABLE(dev->mdev,
 739			       ft_field_support_2_nic_transmit_rdma.bth_opcode))
 740		num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
 741
 742skip_non_qcounters:
 743	cnts->num_op_counters = num_op_counters;
 744	num_counters += num_op_counters;
 745	cnts->descs = kcalloc(num_counters,
 746			      sizeof(struct rdma_stat_desc), GFP_KERNEL);
 747	if (!cnts->descs)
 748		return -ENOMEM;
 749
 750	cnts->offsets = kcalloc(num_counters,
 751				sizeof(*cnts->offsets), GFP_KERNEL);
 752	if (!cnts->offsets)
 753		goto err;
 754
 755	return 0;
 756
 757err:
 758	kfree(cnts->descs);
 759	cnts->descs = NULL;
 760	return -ENOMEM;
 761}
 762
 763static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
 764{
 765	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
 766	int num_cnt_ports = dev->num_ports;
 767	int i, j;
 768
 769	if (is_mdev_switchdev_mode(dev->mdev))
 770		num_cnt_ports = min(2, num_cnt_ports);
 771
 772	MLX5_SET(dealloc_q_counter_in, in, opcode,
 773		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
 774
 775	for (i = 0; i < num_cnt_ports; i++) {
 776		if (dev->port[i].cnts.set_id) {
 777			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
 778				 dev->port[i].cnts.set_id);
 779			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
 780		}
 781		kfree(dev->port[i].cnts.descs);
 782		kfree(dev->port[i].cnts.offsets);
 783
 784		for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
 785			if (!dev->port[i].cnts.opfcs[j].fc)
 786				continue;
 787
 788			if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
 789				mlx5_ib_fs_remove_op_fc(dev,
 790					&dev->port[i].cnts.opfcs[j], j);
 791			mlx5_fc_destroy(dev->mdev,
 792					dev->port[i].cnts.opfcs[j].fc);
 793			dev->port[i].cnts.opfcs[j].fc = NULL;
 794		}
 795	}
 796}
 797
 798static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
 799{
 800	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
 801	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
 802	int num_cnt_ports = dev->num_ports;
 803	int err = 0;
 804	int i;
 805	bool is_shared;
 806
 807	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
 808	is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
 809
 810	/*
 811	 * In switchdev we need to allocate two ports, one that is used for
 812	 * the device Q_counters and it is essentially the real Q_counters of
 813	 * this device, while the other is used as a helper for PF to be able to
 814	 * query all other vports.
 815	 */
 816	if (is_mdev_switchdev_mode(dev->mdev))
 817		num_cnt_ports = min(2, num_cnt_ports);
 818
 819	for (i = 0; i < num_cnt_ports; i++) {
 820		err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
 821		if (err)
 822			goto err_alloc;
 823
 824		mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
 825				      dev->port[i].cnts.offsets, i);
 826
 827		MLX5_SET(alloc_q_counter_in, in, uid,
 828			 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
 829
 830		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
 831		if (err) {
 832			mlx5_ib_warn(dev,
 833				     "couldn't allocate queue counter for port %d, err %d\n",
 834				     i + 1, err);
 835			goto err_alloc;
 836		}
 837
 838		dev->port[i].cnts.set_id =
 839			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
 840	}
 841	return 0;
 842
 843err_alloc:
 844	mlx5_ib_dealloc_counters(dev);
 845	return err;
 846}
 847
 848static int read_flow_counters(struct ib_device *ibdev,
 849			      struct mlx5_read_counters_attr *read_attr)
 850{
 851	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
 852	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 853
 854	return mlx5_fc_query(dev->mdev, fc,
 855			     &read_attr->out[IB_COUNTER_PACKETS],
 856			     &read_attr->out[IB_COUNTER_BYTES]);
 857}
 858
 859/* flow counters currently expose two counters packets and bytes */
 860#define FLOW_COUNTERS_NUM 2
 861static int counters_set_description(
 862	struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
 863	struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
 864{
 865	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
 866	u32 cntrs_max_index = 0;
 867	int i;
 868
 869	if (counters_type != MLX5_IB_COUNTERS_FLOW)
 870		return -EINVAL;
 871
 872	/* init the fields for the object */
 873	mcounters->type = counters_type;
 874	mcounters->read_counters = read_flow_counters;
 875	mcounters->counters_num = FLOW_COUNTERS_NUM;
 876	mcounters->ncounters = ncounters;
 877	/* each counter entry have both description and index pair */
 878	for (i = 0; i < ncounters; i++) {
 879		if (desc_data[i].description > IB_COUNTER_BYTES)
 880			return -EINVAL;
 881
 882		if (cntrs_max_index <= desc_data[i].index)
 883			cntrs_max_index = desc_data[i].index + 1;
 884	}
 885
 886	mutex_lock(&mcounters->mcntrs_mutex);
 887	mcounters->counters_data = desc_data;
 888	mcounters->cntrs_max_index = cntrs_max_index;
 889	mutex_unlock(&mcounters->mcntrs_mutex);
 890
 891	return 0;
 892}
 893
 894#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
 895int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
 896				   struct mlx5_ib_create_flow *ucmd)
 897{
 898	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
 899	struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
 900	struct mlx5_ib_flow_counters_desc *desc_data = NULL;
 901	bool hw_hndl = false;
 902	int ret = 0;
 903
 904	if (ucmd && ucmd->ncounters_data != 0) {
 905		cntrs_data = ucmd->data;
 906		if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
 907			return -EINVAL;
 908
 909		desc_data = kcalloc(cntrs_data->ncounters,
 910				    sizeof(*desc_data),
 911				    GFP_KERNEL);
 912		if (!desc_data)
 913			return  -ENOMEM;
 914
 915		if (copy_from_user(desc_data,
 916				   u64_to_user_ptr(cntrs_data->counters_data),
 917				   sizeof(*desc_data) * cntrs_data->ncounters)) {
 918			ret = -EFAULT;
 919			goto free;
 920		}
 921	}
 922
 923	if (!mcounters->hw_cntrs_hndl) {
 924		mcounters->hw_cntrs_hndl = mlx5_fc_create(
 925			to_mdev(ibcounters->device)->mdev, false);
 926		if (IS_ERR(mcounters->hw_cntrs_hndl)) {
 927			ret = PTR_ERR(mcounters->hw_cntrs_hndl);
 928			goto free;
 929		}
 930		hw_hndl = true;
 931	}
 932
 933	if (desc_data) {
 934		/* counters already bound to at least one flow */
 935		if (mcounters->cntrs_max_index) {
 936			ret = -EINVAL;
 937			goto free_hndl;
 938		}
 939
 940		ret = counters_set_description(ibcounters,
 941					       MLX5_IB_COUNTERS_FLOW,
 942					       desc_data,
 943					       cntrs_data->ncounters);
 944		if (ret)
 945			goto free_hndl;
 946
 947	} else if (!mcounters->cntrs_max_index) {
 948		/* counters not bound yet, must have udata passed */
 949		ret = -EINVAL;
 950		goto free_hndl;
 951	}
 952
 953	return 0;
 954
 955free_hndl:
 956	if (hw_hndl) {
 957		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
 958				mcounters->hw_cntrs_hndl);
 959		mcounters->hw_cntrs_hndl = NULL;
 960	}
 961free:
 962	kfree(desc_data);
 963	return ret;
 964}
 965
 966void mlx5_ib_counters_clear_description(struct ib_counters *counters)
 967{
 968	struct mlx5_ib_mcounters *mcounters;
 969
 970	if (!counters || atomic_read(&counters->usecnt) != 1)
 971		return;
 972
 973	mcounters = to_mcounters(counters);
 974
 975	mutex_lock(&mcounters->mcntrs_mutex);
 976	kfree(mcounters->counters_data);
 977	mcounters->counters_data = NULL;
 978	mcounters->cntrs_max_index = 0;
 979	mutex_unlock(&mcounters->mcntrs_mutex);
 980}
 981
 982static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
 983			       unsigned int index, bool enable)
 984{
 985	struct mlx5_ib_dev *dev = to_mdev(device);
 986	struct mlx5_ib_counters *cnts;
 987	struct mlx5_ib_op_fc *opfc;
 988	u32 num_hw_counters, type;
 989	int ret;
 990
 991	cnts = &dev->port[port - 1].cnts;
 992	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
 993		cnts->num_ext_ppcnt_counters;
 994	if (index < num_hw_counters ||
 995	    index >= (num_hw_counters + cnts->num_op_counters))
 996		return -EINVAL;
 997
 998	if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
 999		return -EINVAL;
1000
1001	type = *(u32 *)cnts->descs[index].priv;
1002	if (type >= MLX5_IB_OPCOUNTER_MAX)
1003		return -EINVAL;
1004
1005	opfc = &cnts->opfcs[type];
1006
1007	if (enable) {
1008		if (opfc->fc)
1009			return -EEXIST;
1010
1011		opfc->fc = mlx5_fc_create(dev->mdev, false);
1012		if (IS_ERR(opfc->fc))
1013			return PTR_ERR(opfc->fc);
1014
1015		ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
1016		if (ret) {
1017			mlx5_fc_destroy(dev->mdev, opfc->fc);
1018			opfc->fc = NULL;
1019		}
1020		return ret;
1021	}
1022
1023	if (!opfc->fc)
1024		return -EINVAL;
1025
1026	mlx5_ib_fs_remove_op_fc(dev, opfc, type);
1027	mlx5_fc_destroy(dev->mdev, opfc->fc);
1028	opfc->fc = NULL;
1029	return 0;
1030}
1031
1032static const struct ib_device_ops hw_stats_ops = {
1033	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1034	.get_hw_stats = mlx5_ib_get_hw_stats,
1035	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1036	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1037	.counter_dealloc = mlx5_ib_counter_dealloc,
1038	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1039	.counter_update_stats = mlx5_ib_counter_update_stats,
1040	.modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
1041			  mlx5_ib_modify_stat : NULL,
1042};
1043
1044static const struct ib_device_ops hw_switchdev_vport_op = {
1045	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1046};
1047
1048static const struct ib_device_ops hw_switchdev_stats_ops = {
1049	.alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
1050	.get_hw_stats = mlx5_ib_get_hw_stats,
1051	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1052	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1053	.counter_dealloc = mlx5_ib_counter_dealloc,
1054	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1055	.counter_update_stats = mlx5_ib_counter_update_stats,
1056};
1057
1058static const struct ib_device_ops counters_ops = {
1059	.create_counters = mlx5_ib_create_counters,
1060	.destroy_counters = mlx5_ib_destroy_counters,
1061	.read_counters = mlx5_ib_read_counters,
1062
1063	INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
1064};
1065
1066int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
1067{
1068	ib_set_device_ops(&dev->ib_dev, &counters_ops);
1069
1070	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1071		return 0;
1072
1073	if (is_mdev_switchdev_mode(dev->mdev)) {
1074		ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
1075		if (vport_qcounters_supported(dev))
1076			ib_set_device_ops(&dev->ib_dev, &hw_switchdev_vport_op);
1077	} else
1078		ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
1079	return mlx5_ib_alloc_counters(dev);
1080}
1081
1082void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
1083{
1084	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1085		return;
1086
1087	mlx5_ib_dealloc_counters(dev);
1088}