Linux Audio

Check our new training course

Loading...
v6.8
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
   4 */
   5
   6#include "mlx5_ib.h"
   7#include <linux/mlx5/eswitch.h>
   8#include <linux/mlx5/vport.h>
   9#include "counters.h"
  10#include "ib_rep.h"
  11#include "qp.h"
  12
  13struct mlx5_ib_counter {
  14	const char *name;
  15	size_t offset;
  16	u32 type;
  17};
  18
  19#define INIT_Q_COUNTER(_name)		\
  20	{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
  21
  22#define INIT_VPORT_Q_COUNTER(_name)		\
  23	{ .name = "vport_" #_name, .offset =	\
  24		MLX5_BYTE_OFF(query_q_counter_out, _name)}
  25
  26static const struct mlx5_ib_counter basic_q_cnts[] = {
  27	INIT_Q_COUNTER(rx_write_requests),
  28	INIT_Q_COUNTER(rx_read_requests),
  29	INIT_Q_COUNTER(rx_atomic_requests),
  30	INIT_Q_COUNTER(rx_dct_connect),
  31	INIT_Q_COUNTER(out_of_buffer),
  32};
  33
  34static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
  35	INIT_Q_COUNTER(out_of_sequence),
  36};
  37
  38static const struct mlx5_ib_counter retrans_q_cnts[] = {
  39	INIT_Q_COUNTER(duplicate_request),
  40	INIT_Q_COUNTER(rnr_nak_retry_err),
  41	INIT_Q_COUNTER(packet_seq_err),
  42	INIT_Q_COUNTER(implied_nak_seq_err),
  43	INIT_Q_COUNTER(local_ack_timeout_err),
  44};
  45
  46static const struct mlx5_ib_counter vport_basic_q_cnts[] = {
  47	INIT_VPORT_Q_COUNTER(rx_write_requests),
  48	INIT_VPORT_Q_COUNTER(rx_read_requests),
  49	INIT_VPORT_Q_COUNTER(rx_atomic_requests),
  50	INIT_VPORT_Q_COUNTER(rx_dct_connect),
  51	INIT_VPORT_Q_COUNTER(out_of_buffer),
  52};
  53
  54static const struct mlx5_ib_counter vport_out_of_seq_q_cnts[] = {
  55	INIT_VPORT_Q_COUNTER(out_of_sequence),
  56};
  57
  58static const struct mlx5_ib_counter vport_retrans_q_cnts[] = {
  59	INIT_VPORT_Q_COUNTER(duplicate_request),
  60	INIT_VPORT_Q_COUNTER(rnr_nak_retry_err),
  61	INIT_VPORT_Q_COUNTER(packet_seq_err),
  62	INIT_VPORT_Q_COUNTER(implied_nak_seq_err),
  63	INIT_VPORT_Q_COUNTER(local_ack_timeout_err),
  64};
  65
  66#define INIT_CONG_COUNTER(_name)		\
  67	{ .name = #_name, .offset =	\
  68		MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
  69
  70static const struct mlx5_ib_counter cong_cnts[] = {
  71	INIT_CONG_COUNTER(rp_cnp_ignored),
  72	INIT_CONG_COUNTER(rp_cnp_handled),
  73	INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
  74	INIT_CONG_COUNTER(np_cnp_sent),
  75};
  76
  77static const struct mlx5_ib_counter extended_err_cnts[] = {
  78	INIT_Q_COUNTER(resp_local_length_error),
  79	INIT_Q_COUNTER(resp_cqe_error),
  80	INIT_Q_COUNTER(req_cqe_error),
  81	INIT_Q_COUNTER(req_remote_invalid_request),
  82	INIT_Q_COUNTER(req_remote_access_errors),
  83	INIT_Q_COUNTER(resp_remote_access_errors),
  84	INIT_Q_COUNTER(resp_cqe_flush_error),
  85	INIT_Q_COUNTER(req_cqe_flush_error),
  86};
  87
  88static const struct mlx5_ib_counter roce_accl_cnts[] = {
  89	INIT_Q_COUNTER(roce_adp_retrans),
  90	INIT_Q_COUNTER(roce_adp_retrans_to),
  91	INIT_Q_COUNTER(roce_slow_restart),
  92	INIT_Q_COUNTER(roce_slow_restart_cnps),
  93	INIT_Q_COUNTER(roce_slow_restart_trans),
  94};
  95
  96static const struct mlx5_ib_counter vport_extended_err_cnts[] = {
  97	INIT_VPORT_Q_COUNTER(resp_local_length_error),
  98	INIT_VPORT_Q_COUNTER(resp_cqe_error),
  99	INIT_VPORT_Q_COUNTER(req_cqe_error),
 100	INIT_VPORT_Q_COUNTER(req_remote_invalid_request),
 101	INIT_VPORT_Q_COUNTER(req_remote_access_errors),
 102	INIT_VPORT_Q_COUNTER(resp_remote_access_errors),
 103	INIT_VPORT_Q_COUNTER(resp_cqe_flush_error),
 104	INIT_VPORT_Q_COUNTER(req_cqe_flush_error),
 105};
 106
 107static const struct mlx5_ib_counter vport_roce_accl_cnts[] = {
 108	INIT_VPORT_Q_COUNTER(roce_adp_retrans),
 109	INIT_VPORT_Q_COUNTER(roce_adp_retrans_to),
 110	INIT_VPORT_Q_COUNTER(roce_slow_restart),
 111	INIT_VPORT_Q_COUNTER(roce_slow_restart_cnps),
 112	INIT_VPORT_Q_COUNTER(roce_slow_restart_trans),
 113};
 114
 115#define INIT_EXT_PPCNT_COUNTER(_name)		\
 116	{ .name = #_name, .offset =	\
 117	MLX5_BYTE_OFF(ppcnt_reg, \
 118		      counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
 119
 120static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
 121	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
 122};
 123
 124#define INIT_OP_COUNTER(_name, _type)		\
 125	{ .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
 126
 127static const struct mlx5_ib_counter basic_op_cnts[] = {
 128	INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
 129};
 130
 131static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
 132	INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
 133};
 134
 135static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
 136	INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
 137};
 138
 139static int mlx5_ib_read_counters(struct ib_counters *counters,
 140				 struct ib_counters_read_attr *read_attr,
 141				 struct uverbs_attr_bundle *attrs)
 142{
 143	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
 144	struct mlx5_read_counters_attr mread_attr = {};
 145	struct mlx5_ib_flow_counters_desc *desc;
 146	int ret, i;
 147
 148	mutex_lock(&mcounters->mcntrs_mutex);
 149	if (mcounters->cntrs_max_index > read_attr->ncounters) {
 150		ret = -EINVAL;
 151		goto err_bound;
 152	}
 153
 154	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
 155				 GFP_KERNEL);
 156	if (!mread_attr.out) {
 157		ret = -ENOMEM;
 158		goto err_bound;
 159	}
 160
 161	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
 162	mread_attr.flags = read_attr->flags;
 163	ret = mcounters->read_counters(counters->device, &mread_attr);
 164	if (ret)
 165		goto err_read;
 166
 167	/* do the pass over the counters data array to assign according to the
 168	 * descriptions and indexing pairs
 169	 */
 170	desc = mcounters->counters_data;
 171	for (i = 0; i < mcounters->ncounters; i++)
 172		read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
 173
 174err_read:
 175	kfree(mread_attr.out);
 176err_bound:
 177	mutex_unlock(&mcounters->mcntrs_mutex);
 178	return ret;
 179}
 180
 181static int mlx5_ib_destroy_counters(struct ib_counters *counters)
 182{
 183	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
 184
 185	mlx5_ib_counters_clear_description(counters);
 186	if (mcounters->hw_cntrs_hndl)
 187		mlx5_fc_destroy(to_mdev(counters->device)->mdev,
 188				mcounters->hw_cntrs_hndl);
 189	return 0;
 190}
 191
 192static int mlx5_ib_create_counters(struct ib_counters *counters,
 193				   struct uverbs_attr_bundle *attrs)
 194{
 195	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
 196
 197	mutex_init(&mcounters->mcntrs_mutex);
 198	return 0;
 199}
 200
 201static bool vport_qcounters_supported(struct mlx5_ib_dev *dev)
 202{
 203	return MLX5_CAP_GEN(dev->mdev, q_counter_other_vport) &&
 204	       MLX5_CAP_GEN(dev->mdev, q_counter_aggregation);
 205}
 206
 207static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
 208						   u32 port_num)
 209{
 210	if ((is_mdev_switchdev_mode(dev->mdev) &&
 211	     !vport_qcounters_supported(dev)) || !port_num)
 212		return &dev->port[0].cnts;
 213
 214	return is_mdev_switchdev_mode(dev->mdev) ?
 215	       &dev->port[1].cnts : &dev->port[port_num - 1].cnts;
 216}
 217
 218/**
 219 * mlx5_ib_get_counters_id - Returns counters id to use for device+port
 220 * @dev:	Pointer to mlx5 IB device
 221 * @port_num:	Zero based port number
 222 *
 223 * mlx5_ib_get_counters_id() Returns counters set id to use for given
 224 * device port combination in switchdev and non switchdev mode of the
 225 * parent device.
 226 */
 227u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
 228{
 229	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num + 1);
 230
 231	return cnts->set_id;
 232}
 233
 234static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
 235{
 236	struct rdma_hw_stats *stats;
 237	u32 num_hw_counters;
 238	int i;
 239
 240	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
 241			  cnts->num_ext_ppcnt_counters;
 242	stats = rdma_alloc_hw_stats_struct(cnts->descs,
 243					   num_hw_counters +
 244					   cnts->num_op_counters,
 245					   RDMA_HW_STATS_DEFAULT_LIFESPAN);
 246	if (!stats)
 247		return NULL;
 248
 249	for (i = 0; i < cnts->num_op_counters; i++)
 250		set_bit(num_hw_counters + i, stats->is_disabled);
 251
 252	return stats;
 253}
 254
 255static struct rdma_hw_stats *
 256mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
 257{
 258	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 259	const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
 260
 261	return do_alloc_stats(cnts);
 262}
 263
 264static struct rdma_hw_stats *
 265mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
 266{
 267	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 268	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
 269
 270	return do_alloc_stats(cnts);
 271}
 272
 273static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
 274				    const struct mlx5_ib_counters *cnts,
 275				    struct rdma_hw_stats *stats,
 276				    u16 set_id)
 277{
 278	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
 279	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
 280	__be32 val;
 281	int ret, i;
 282
 283	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
 284	MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
 285	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
 286	if (ret)
 287		return ret;
 288
 289	for (i = 0; i < cnts->num_q_counters; i++) {
 290		val = *(__be32 *)((void *)out + cnts->offsets[i]);
 291		stats->value[i] = (u64)be32_to_cpu(val);
 292	}
 293
 294	return 0;
 295}
 296
 297static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
 298					    const struct mlx5_ib_counters *cnts,
 299					    struct rdma_hw_stats *stats)
 300{
 301	int offset = cnts->num_q_counters + cnts->num_cong_counters;
 302	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
 303	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
 304	int ret, i;
 305	void *out;
 306
 307	out = kvzalloc(sz, GFP_KERNEL);
 308	if (!out)
 309		return -ENOMEM;
 310
 311	MLX5_SET(ppcnt_reg, in, local_port, 1);
 312	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
 313	ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
 314				   0, 0);
 315	if (ret)
 316		goto free;
 317
 318	for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
 319		stats->value[i + offset] =
 320			be64_to_cpup((__be64 *)(out +
 321				    cnts->offsets[i + offset]));
 322free:
 323	kvfree(out);
 324	return ret;
 325}
 326
 327static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
 328					  u32 port_num,
 329					  const struct mlx5_ib_counters *cnts,
 330					  struct rdma_hw_stats *stats)
 331
 332{
 333	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
 334	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
 335	struct mlx5_core_dev *mdev;
 336	__be32 val;
 337	int ret, i;
 338
 339	if (!dev->port[port_num].rep ||
 340	    dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
 341		return 0;
 342
 343	mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
 344	if (!mdev)
 345		return -EOPNOTSUPP;
 346
 347	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
 348	MLX5_SET(query_q_counter_in, in, other_vport, 1);
 349	MLX5_SET(query_q_counter_in, in, vport_number,
 350		 dev->port[port_num].rep->vport);
 351	MLX5_SET(query_q_counter_in, in, aggregate, 1);
 352	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
 353	if (ret)
 354		return ret;
 355
 356	for (i = 0; i < cnts->num_q_counters; i++) {
 357		val = *(__be32 *)((void *)out + cnts->offsets[i]);
 358		stats->value[i] = (u64)be32_to_cpu(val);
 359	}
 360
 361	return 0;
 362}
 363
 364static int do_get_hw_stats(struct ib_device *ibdev,
 365			   struct rdma_hw_stats *stats,
 366			   u32 port_num, int index)
 367{
 368	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 369	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
 370	struct mlx5_core_dev *mdev;
 371	int ret, num_counters;
 372
 373	if (!stats)
 374		return -EINVAL;
 375
 376	num_counters = cnts->num_q_counters +
 377		       cnts->num_cong_counters +
 378		       cnts->num_ext_ppcnt_counters;
 379
 380	if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
 381		ret = mlx5_ib_query_q_counters_vport(dev, port_num - 1, cnts,
 382						     stats);
 383	else
 384		ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats,
 385					       cnts->set_id);
 386	if (ret)
 387		return ret;
 388
 389	/* We don't expose device counters over Vports */
 390	if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
 391		goto done;
 392
 393	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
 394		ret =  mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
 395		if (ret)
 396			return ret;
 397	}
 398
 399	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
 400		if (!port_num)
 401			port_num = 1;
 402		mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
 403		if (!mdev) {
 404			/* If port is not affiliated yet, its in down state
 405			 * which doesn't have any counters yet, so it would be
 406			 * zero. So no need to read from the HCA.
 407			 */
 408			goto done;
 409		}
 410		ret = mlx5_lag_query_cong_counters(dev->mdev,
 411						   stats->value +
 412						   cnts->num_q_counters,
 413						   cnts->num_cong_counters,
 414						   cnts->offsets +
 415						   cnts->num_q_counters);
 416
 417		mlx5_ib_put_native_port_mdev(dev, port_num);
 418		if (ret)
 419			return ret;
 420	}
 421
 422done:
 423	return num_counters;
 424}
 425
 426static int do_get_op_stat(struct ib_device *ibdev,
 427			  struct rdma_hw_stats *stats,
 428			  u32 port_num, int index)
 429{
 430	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 431	const struct mlx5_ib_counters *cnts;
 432	const struct mlx5_ib_op_fc *opfcs;
 433	u64 packets = 0, bytes;
 434	u32 type;
 435	int ret;
 436
 437	cnts = get_counters(dev, port_num);
 438
 439	opfcs = cnts->opfcs;
 440	type = *(u32 *)cnts->descs[index].priv;
 441	if (type >= MLX5_IB_OPCOUNTER_MAX)
 442		return -EINVAL;
 443
 444	if (!opfcs[type].fc)
 445		goto out;
 446
 447	ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
 448			    &packets, &bytes);
 449	if (ret)
 450		return ret;
 451
 452out:
 453	stats->value[index] = packets;
 454	return index;
 455}
 456
 457static int do_get_op_stats(struct ib_device *ibdev,
 458			   struct rdma_hw_stats *stats,
 459			   u32 port_num)
 460{
 461	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 462	const struct mlx5_ib_counters *cnts;
 463	int index, ret, num_hw_counters;
 464
 465	cnts = get_counters(dev, port_num);
 466	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
 467			  cnts->num_ext_ppcnt_counters;
 468	for (index = num_hw_counters;
 469	     index < (num_hw_counters + cnts->num_op_counters); index++) {
 470		ret = do_get_op_stat(ibdev, stats, port_num, index);
 471		if (ret != index)
 472			return ret;
 473	}
 474
 475	return cnts->num_op_counters;
 476}
 477
 478static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
 479				struct rdma_hw_stats *stats,
 480				u32 port_num, int index)
 481{
 482	int num_counters, num_hw_counters, num_op_counters;
 483	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 484	const struct mlx5_ib_counters *cnts;
 485
 486	cnts = get_counters(dev, port_num);
 487	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
 488		cnts->num_ext_ppcnt_counters;
 489	num_counters = num_hw_counters + cnts->num_op_counters;
 490
 491	if (index < 0 || index > num_counters)
 492		return -EINVAL;
 493	else if (index > 0 && index < num_hw_counters)
 494		return do_get_hw_stats(ibdev, stats, port_num, index);
 495	else if (index >= num_hw_counters && index < num_counters)
 496		return do_get_op_stat(ibdev, stats, port_num, index);
 497
 498	num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
 499	if (num_hw_counters < 0)
 500		return num_hw_counters;
 501
 502	num_op_counters = do_get_op_stats(ibdev, stats, port_num);
 503	if (num_op_counters < 0)
 504		return num_op_counters;
 505
 506	return num_hw_counters + num_op_counters;
 507}
 508
 509static struct rdma_hw_stats *
 510mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
 511{
 512	struct mlx5_ib_dev *dev = to_mdev(counter->device);
 513	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
 514
 515	return do_alloc_stats(cnts);
 516}
 517
 518static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
 519{
 520	struct mlx5_ib_dev *dev = to_mdev(counter->device);
 521	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
 522
 523	return mlx5_ib_query_q_counters(dev->mdev, cnts,
 524					counter->stats, counter->id);
 525}
 526
 527static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
 528{
 529	struct mlx5_ib_dev *dev = to_mdev(counter->device);
 530	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
 531
 532	if (!counter->id)
 533		return 0;
 534
 535	MLX5_SET(dealloc_q_counter_in, in, opcode,
 536		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
 537	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
 538	return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
 539}
 540
 541static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
 542				   struct ib_qp *qp)
 543{
 544	struct mlx5_ib_dev *dev = to_mdev(qp->device);
 545	int err;
 546
 547	if (!counter->id) {
 548		u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
 549		u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
 550
 551		MLX5_SET(alloc_q_counter_in, in, opcode,
 552			 MLX5_CMD_OP_ALLOC_Q_COUNTER);
 553		MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
 554		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
 555		if (err)
 556			return err;
 557		counter->id =
 558			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
 559	}
 560
 561	err = mlx5_ib_qp_set_counter(qp, counter);
 562	if (err)
 563		goto fail_set_counter;
 564
 565	return 0;
 566
 567fail_set_counter:
 568	mlx5_ib_counter_dealloc(counter);
 569	counter->id = 0;
 570
 571	return err;
 572}
 573
 574static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
 575{
 576	return mlx5_ib_qp_set_counter(qp, NULL);
 577}
 578
 579static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
 580				  struct rdma_stat_desc *descs, size_t *offsets,
 581				  u32 port_num)
 582{
 583	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
 584			port_num != MLX5_VPORT_PF;
 585	const struct mlx5_ib_counter *names;
 586	int j = 0, i, size;
 587
 588	names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
 589	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
 590			  ARRAY_SIZE(basic_q_cnts);
 591	for (i = 0; i < size; i++, j++) {
 592		descs[j].name = names[i].name;
 593		offsets[j] = names[i].offset;
 594	}
 595
 596	names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
 597	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
 598			  ARRAY_SIZE(out_of_seq_q_cnts);
 599	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
 600		for (i = 0; i < size; i++, j++) {
 601			descs[j].name = names[i].name;
 602			offsets[j] = names[i].offset;
 603		}
 604	}
 605
 606	names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
 607	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
 608			  ARRAY_SIZE(retrans_q_cnts);
 609	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
 610		for (i = 0; i < size; i++, j++) {
 611			descs[j].name = names[i].name;
 612			offsets[j] = names[i].offset;
 613		}
 614	}
 615
 616	names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
 617	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
 618			  ARRAY_SIZE(extended_err_cnts);
 619	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
 620		for (i = 0; i < size; i++, j++) {
 621			descs[j].name = names[i].name;
 622			offsets[j] = names[i].offset;
 623		}
 624	}
 625
 626	names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
 627	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
 628			  ARRAY_SIZE(roce_accl_cnts);
 629	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
 630		for (i = 0; i < size; i++, j++) {
 631			descs[j].name = names[i].name;
 632			offsets[j] = names[i].offset;
 633		}
 634	}
 635
 636	if (is_vport)
 637		return;
 638
 639	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
 640		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
 641			descs[j].name = cong_cnts[i].name;
 642			offsets[j] = cong_cnts[i].offset;
 643		}
 644	}
 645
 646	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
 647		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
 648			descs[j].name = ext_ppcnt_cnts[i].name;
 649			offsets[j] = ext_ppcnt_cnts[i].offset;
 650		}
 651	}
 652
 653	for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
 654		descs[j].name = basic_op_cnts[i].name;
 655		descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
 656		descs[j].priv = &basic_op_cnts[i].type;
 657	}
 658
 659	if (MLX5_CAP_FLOWTABLE(dev->mdev,
 660			       ft_field_support_2_nic_receive_rdma.bth_opcode)) {
 661		for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
 662			descs[j].name = rdmarx_cnp_op_cnts[i].name;
 663			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
 664			descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
 665		}
 666	}
 667
 668	if (MLX5_CAP_FLOWTABLE(dev->mdev,
 669			       ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
 670		for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
 671			descs[j].name = rdmatx_cnp_op_cnts[i].name;
 672			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
 673			descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
 674		}
 675	}
 676}
 677
 678
 679static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
 680				    struct mlx5_ib_counters *cnts, u32 port_num)
 681{
 682	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
 683			port_num != MLX5_VPORT_PF;
 684	u32 num_counters, num_op_counters = 0, size;
 685
 686	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
 687			  ARRAY_SIZE(basic_q_cnts);
 688	num_counters = size;
 689
 690	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
 691			  ARRAY_SIZE(out_of_seq_q_cnts);
 692	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
 693		num_counters += size;
 694
 695	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
 696			  ARRAY_SIZE(retrans_q_cnts);
 697	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
 698		num_counters += size;
 699
 700	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
 701			  ARRAY_SIZE(extended_err_cnts);
 702	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
 703		num_counters += size;
 704
 705	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
 706			  ARRAY_SIZE(roce_accl_cnts);
 707	if (MLX5_CAP_GEN(dev->mdev, roce_accl))
 708		num_counters += size;
 709
 710	cnts->num_q_counters = num_counters;
 711
 712	if (is_vport)
 713		goto skip_non_qcounters;
 714
 715	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
 716		cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
 717		num_counters += ARRAY_SIZE(cong_cnts);
 718	}
 719	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
 720		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
 721		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
 722	}
 723
 724	num_op_counters = ARRAY_SIZE(basic_op_cnts);
 725
 726	if (MLX5_CAP_FLOWTABLE(dev->mdev,
 727			       ft_field_support_2_nic_receive_rdma.bth_opcode))
 728		num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
 729
 730	if (MLX5_CAP_FLOWTABLE(dev->mdev,
 731			       ft_field_support_2_nic_transmit_rdma.bth_opcode))
 732		num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
 733
 734skip_non_qcounters:
 735	cnts->num_op_counters = num_op_counters;
 736	num_counters += num_op_counters;
 737	cnts->descs = kcalloc(num_counters,
 738			      sizeof(struct rdma_stat_desc), GFP_KERNEL);
 739	if (!cnts->descs)
 740		return -ENOMEM;
 741
 742	cnts->offsets = kcalloc(num_counters,
 743				sizeof(*cnts->offsets), GFP_KERNEL);
 744	if (!cnts->offsets)
 745		goto err;
 746
 747	return 0;
 748
 749err:
 750	kfree(cnts->descs);
 751	cnts->descs = NULL;
 752	return -ENOMEM;
 753}
 754
 755static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
 756{
 757	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
 758	int num_cnt_ports = dev->num_ports;
 759	int i, j;
 760
 761	if (is_mdev_switchdev_mode(dev->mdev))
 762		num_cnt_ports = min(2, num_cnt_ports);
 763
 764	MLX5_SET(dealloc_q_counter_in, in, opcode,
 765		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
 766
 767	for (i = 0; i < num_cnt_ports; i++) {
 768		if (dev->port[i].cnts.set_id) {
 769			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
 770				 dev->port[i].cnts.set_id);
 771			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
 772		}
 773		kfree(dev->port[i].cnts.descs);
 774		kfree(dev->port[i].cnts.offsets);
 775
 776		for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
 777			if (!dev->port[i].cnts.opfcs[j].fc)
 778				continue;
 779
 780			if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
 781				mlx5_ib_fs_remove_op_fc(dev,
 782					&dev->port[i].cnts.opfcs[j], j);
 783			mlx5_fc_destroy(dev->mdev,
 784					dev->port[i].cnts.opfcs[j].fc);
 785			dev->port[i].cnts.opfcs[j].fc = NULL;
 786		}
 787	}
 788}
 789
 790static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
 791{
 792	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
 793	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
 794	int num_cnt_ports = dev->num_ports;
 795	int err = 0;
 796	int i;
 797	bool is_shared;
 798
 799	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
 800	is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
 801
 802	/*
 803	 * In switchdev we need to allocate two ports, one that is used for
 804	 * the device Q_counters and it is essentially the real Q_counters of
 805	 * this device, while the other is used as a helper for PF to be able to
 806	 * query all other vports.
 807	 */
 808	if (is_mdev_switchdev_mode(dev->mdev))
 809		num_cnt_ports = min(2, num_cnt_ports);
 810
 811	for (i = 0; i < num_cnt_ports; i++) {
 812		err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
 813		if (err)
 814			goto err_alloc;
 815
 816		mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
 817				      dev->port[i].cnts.offsets, i);
 818
 819		MLX5_SET(alloc_q_counter_in, in, uid,
 820			 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
 821
 822		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
 823		if (err) {
 824			mlx5_ib_warn(dev,
 825				     "couldn't allocate queue counter for port %d, err %d\n",
 826				     i + 1, err);
 827			goto err_alloc;
 828		}
 829
 830		dev->port[i].cnts.set_id =
 831			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
 832	}
 833	return 0;
 834
 835err_alloc:
 836	mlx5_ib_dealloc_counters(dev);
 837	return err;
 838}
 839
 840static int read_flow_counters(struct ib_device *ibdev,
 841			      struct mlx5_read_counters_attr *read_attr)
 842{
 843	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
 844	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 845
 846	return mlx5_fc_query(dev->mdev, fc,
 847			     &read_attr->out[IB_COUNTER_PACKETS],
 848			     &read_attr->out[IB_COUNTER_BYTES]);
 849}
 850
 851/* flow counters currently expose two counters packets and bytes */
 852#define FLOW_COUNTERS_NUM 2
 853static int counters_set_description(
 854	struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
 855	struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
 856{
 857	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
 858	u32 cntrs_max_index = 0;
 859	int i;
 860
 861	if (counters_type != MLX5_IB_COUNTERS_FLOW)
 862		return -EINVAL;
 863
 864	/* init the fields for the object */
 865	mcounters->type = counters_type;
 866	mcounters->read_counters = read_flow_counters;
 867	mcounters->counters_num = FLOW_COUNTERS_NUM;
 868	mcounters->ncounters = ncounters;
 869	/* each counter entry have both description and index pair */
 870	for (i = 0; i < ncounters; i++) {
 871		if (desc_data[i].description > IB_COUNTER_BYTES)
 872			return -EINVAL;
 873
 874		if (cntrs_max_index <= desc_data[i].index)
 875			cntrs_max_index = desc_data[i].index + 1;
 876	}
 877
 878	mutex_lock(&mcounters->mcntrs_mutex);
 879	mcounters->counters_data = desc_data;
 880	mcounters->cntrs_max_index = cntrs_max_index;
 881	mutex_unlock(&mcounters->mcntrs_mutex);
 882
 883	return 0;
 884}
 885
 886#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
 887int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
 888				   struct mlx5_ib_create_flow *ucmd)
 889{
 890	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
 891	struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
 892	struct mlx5_ib_flow_counters_desc *desc_data = NULL;
 893	bool hw_hndl = false;
 894	int ret = 0;
 895
 896	if (ucmd && ucmd->ncounters_data != 0) {
 897		cntrs_data = ucmd->data;
 898		if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
 899			return -EINVAL;
 900
 901		desc_data = kcalloc(cntrs_data->ncounters,
 902				    sizeof(*desc_data),
 903				    GFP_KERNEL);
 904		if (!desc_data)
 905			return  -ENOMEM;
 906
 907		if (copy_from_user(desc_data,
 908				   u64_to_user_ptr(cntrs_data->counters_data),
 909				   sizeof(*desc_data) * cntrs_data->ncounters)) {
 910			ret = -EFAULT;
 911			goto free;
 912		}
 913	}
 914
 915	if (!mcounters->hw_cntrs_hndl) {
 916		mcounters->hw_cntrs_hndl = mlx5_fc_create(
 917			to_mdev(ibcounters->device)->mdev, false);
 918		if (IS_ERR(mcounters->hw_cntrs_hndl)) {
 919			ret = PTR_ERR(mcounters->hw_cntrs_hndl);
 920			goto free;
 921		}
 922		hw_hndl = true;
 923	}
 924
 925	if (desc_data) {
 926		/* counters already bound to at least one flow */
 927		if (mcounters->cntrs_max_index) {
 928			ret = -EINVAL;
 929			goto free_hndl;
 930		}
 931
 932		ret = counters_set_description(ibcounters,
 933					       MLX5_IB_COUNTERS_FLOW,
 934					       desc_data,
 935					       cntrs_data->ncounters);
 936		if (ret)
 937			goto free_hndl;
 938
 939	} else if (!mcounters->cntrs_max_index) {
 940		/* counters not bound yet, must have udata passed */
 941		ret = -EINVAL;
 942		goto free_hndl;
 943	}
 944
 945	return 0;
 946
 947free_hndl:
 948	if (hw_hndl) {
 949		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
 950				mcounters->hw_cntrs_hndl);
 951		mcounters->hw_cntrs_hndl = NULL;
 952	}
 953free:
 954	kfree(desc_data);
 955	return ret;
 956}
 957
 958void mlx5_ib_counters_clear_description(struct ib_counters *counters)
 959{
 960	struct mlx5_ib_mcounters *mcounters;
 961
 962	if (!counters || atomic_read(&counters->usecnt) != 1)
 963		return;
 964
 965	mcounters = to_mcounters(counters);
 966
 967	mutex_lock(&mcounters->mcntrs_mutex);
 968	kfree(mcounters->counters_data);
 969	mcounters->counters_data = NULL;
 970	mcounters->cntrs_max_index = 0;
 971	mutex_unlock(&mcounters->mcntrs_mutex);
 972}
 973
 974static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
 975			       unsigned int index, bool enable)
 976{
 977	struct mlx5_ib_dev *dev = to_mdev(device);
 978	struct mlx5_ib_counters *cnts;
 979	struct mlx5_ib_op_fc *opfc;
 980	u32 num_hw_counters, type;
 981	int ret;
 982
 983	cnts = &dev->port[port - 1].cnts;
 984	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
 985		cnts->num_ext_ppcnt_counters;
 986	if (index < num_hw_counters ||
 987	    index >= (num_hw_counters + cnts->num_op_counters))
 988		return -EINVAL;
 989
 990	if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
 991		return -EINVAL;
 992
 993	type = *(u32 *)cnts->descs[index].priv;
 994	if (type >= MLX5_IB_OPCOUNTER_MAX)
 995		return -EINVAL;
 996
 997	opfc = &cnts->opfcs[type];
 998
 999	if (enable) {
1000		if (opfc->fc)
1001			return -EEXIST;
1002
1003		opfc->fc = mlx5_fc_create(dev->mdev, false);
1004		if (IS_ERR(opfc->fc))
1005			return PTR_ERR(opfc->fc);
1006
1007		ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
1008		if (ret) {
1009			mlx5_fc_destroy(dev->mdev, opfc->fc);
1010			opfc->fc = NULL;
1011		}
1012		return ret;
1013	}
1014
1015	if (!opfc->fc)
1016		return -EINVAL;
1017
1018	mlx5_ib_fs_remove_op_fc(dev, opfc, type);
1019	mlx5_fc_destroy(dev->mdev, opfc->fc);
1020	opfc->fc = NULL;
1021	return 0;
1022}
1023
1024static const struct ib_device_ops hw_stats_ops = {
1025	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1026	.get_hw_stats = mlx5_ib_get_hw_stats,
1027	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1028	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1029	.counter_dealloc = mlx5_ib_counter_dealloc,
1030	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1031	.counter_update_stats = mlx5_ib_counter_update_stats,
1032	.modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
1033			  mlx5_ib_modify_stat : NULL,
1034};
1035
1036static const struct ib_device_ops hw_switchdev_vport_op = {
1037	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1038};
1039
1040static const struct ib_device_ops hw_switchdev_stats_ops = {
1041	.alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
1042	.get_hw_stats = mlx5_ib_get_hw_stats,
1043	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1044	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1045	.counter_dealloc = mlx5_ib_counter_dealloc,
1046	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1047	.counter_update_stats = mlx5_ib_counter_update_stats,
1048};
1049
1050static const struct ib_device_ops counters_ops = {
1051	.create_counters = mlx5_ib_create_counters,
1052	.destroy_counters = mlx5_ib_destroy_counters,
1053	.read_counters = mlx5_ib_read_counters,
1054
1055	INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
1056};
1057
1058int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
1059{
1060	ib_set_device_ops(&dev->ib_dev, &counters_ops);
1061
1062	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1063		return 0;
1064
1065	if (is_mdev_switchdev_mode(dev->mdev)) {
1066		ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
1067		if (vport_qcounters_supported(dev))
1068			ib_set_device_ops(&dev->ib_dev, &hw_switchdev_vport_op);
1069	} else
1070		ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
1071	return mlx5_ib_alloc_counters(dev);
1072}
1073
1074void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
1075{
1076	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1077		return;
1078
1079	mlx5_ib_dealloc_counters(dev);
1080}
v6.9.4
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
   4 */
   5
   6#include "mlx5_ib.h"
   7#include <linux/mlx5/eswitch.h>
   8#include <linux/mlx5/vport.h>
   9#include "counters.h"
  10#include "ib_rep.h"
  11#include "qp.h"
  12
  13struct mlx5_ib_counter {
  14	const char *name;
  15	size_t offset;
  16	u32 type;
  17};
  18
  19#define INIT_Q_COUNTER(_name)		\
  20	{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
  21
  22#define INIT_VPORT_Q_COUNTER(_name)		\
  23	{ .name = "vport_" #_name, .offset =	\
  24		MLX5_BYTE_OFF(query_q_counter_out, _name)}
  25
  26static const struct mlx5_ib_counter basic_q_cnts[] = {
  27	INIT_Q_COUNTER(rx_write_requests),
  28	INIT_Q_COUNTER(rx_read_requests),
  29	INIT_Q_COUNTER(rx_atomic_requests),
  30	INIT_Q_COUNTER(rx_dct_connect),
  31	INIT_Q_COUNTER(out_of_buffer),
  32};
  33
  34static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
  35	INIT_Q_COUNTER(out_of_sequence),
  36};
  37
  38static const struct mlx5_ib_counter retrans_q_cnts[] = {
  39	INIT_Q_COUNTER(duplicate_request),
  40	INIT_Q_COUNTER(rnr_nak_retry_err),
  41	INIT_Q_COUNTER(packet_seq_err),
  42	INIT_Q_COUNTER(implied_nak_seq_err),
  43	INIT_Q_COUNTER(local_ack_timeout_err),
  44};
  45
  46static const struct mlx5_ib_counter vport_basic_q_cnts[] = {
  47	INIT_VPORT_Q_COUNTER(rx_write_requests),
  48	INIT_VPORT_Q_COUNTER(rx_read_requests),
  49	INIT_VPORT_Q_COUNTER(rx_atomic_requests),
  50	INIT_VPORT_Q_COUNTER(rx_dct_connect),
  51	INIT_VPORT_Q_COUNTER(out_of_buffer),
  52};
  53
  54static const struct mlx5_ib_counter vport_out_of_seq_q_cnts[] = {
  55	INIT_VPORT_Q_COUNTER(out_of_sequence),
  56};
  57
  58static const struct mlx5_ib_counter vport_retrans_q_cnts[] = {
  59	INIT_VPORT_Q_COUNTER(duplicate_request),
  60	INIT_VPORT_Q_COUNTER(rnr_nak_retry_err),
  61	INIT_VPORT_Q_COUNTER(packet_seq_err),
  62	INIT_VPORT_Q_COUNTER(implied_nak_seq_err),
  63	INIT_VPORT_Q_COUNTER(local_ack_timeout_err),
  64};
  65
  66#define INIT_CONG_COUNTER(_name)		\
  67	{ .name = #_name, .offset =	\
  68		MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
  69
  70static const struct mlx5_ib_counter cong_cnts[] = {
  71	INIT_CONG_COUNTER(rp_cnp_ignored),
  72	INIT_CONG_COUNTER(rp_cnp_handled),
  73	INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
  74	INIT_CONG_COUNTER(np_cnp_sent),
  75};
  76
  77static const struct mlx5_ib_counter extended_err_cnts[] = {
  78	INIT_Q_COUNTER(resp_local_length_error),
  79	INIT_Q_COUNTER(resp_cqe_error),
  80	INIT_Q_COUNTER(req_cqe_error),
  81	INIT_Q_COUNTER(req_remote_invalid_request),
  82	INIT_Q_COUNTER(req_remote_access_errors),
  83	INIT_Q_COUNTER(resp_remote_access_errors),
  84	INIT_Q_COUNTER(resp_cqe_flush_error),
  85	INIT_Q_COUNTER(req_cqe_flush_error),
  86};
  87
  88static const struct mlx5_ib_counter roce_accl_cnts[] = {
  89	INIT_Q_COUNTER(roce_adp_retrans),
  90	INIT_Q_COUNTER(roce_adp_retrans_to),
  91	INIT_Q_COUNTER(roce_slow_restart),
  92	INIT_Q_COUNTER(roce_slow_restart_cnps),
  93	INIT_Q_COUNTER(roce_slow_restart_trans),
  94};
  95
  96static const struct mlx5_ib_counter vport_extended_err_cnts[] = {
  97	INIT_VPORT_Q_COUNTER(resp_local_length_error),
  98	INIT_VPORT_Q_COUNTER(resp_cqe_error),
  99	INIT_VPORT_Q_COUNTER(req_cqe_error),
 100	INIT_VPORT_Q_COUNTER(req_remote_invalid_request),
 101	INIT_VPORT_Q_COUNTER(req_remote_access_errors),
 102	INIT_VPORT_Q_COUNTER(resp_remote_access_errors),
 103	INIT_VPORT_Q_COUNTER(resp_cqe_flush_error),
 104	INIT_VPORT_Q_COUNTER(req_cqe_flush_error),
 105};
 106
 107static const struct mlx5_ib_counter vport_roce_accl_cnts[] = {
 108	INIT_VPORT_Q_COUNTER(roce_adp_retrans),
 109	INIT_VPORT_Q_COUNTER(roce_adp_retrans_to),
 110	INIT_VPORT_Q_COUNTER(roce_slow_restart),
 111	INIT_VPORT_Q_COUNTER(roce_slow_restart_cnps),
 112	INIT_VPORT_Q_COUNTER(roce_slow_restart_trans),
 113};
 114
 115#define INIT_EXT_PPCNT_COUNTER(_name)		\
 116	{ .name = #_name, .offset =	\
 117	MLX5_BYTE_OFF(ppcnt_reg, \
 118		      counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
 119
 120static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
 121	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
 122};
 123
 124#define INIT_OP_COUNTER(_name, _type)		\
 125	{ .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
 126
 127static const struct mlx5_ib_counter basic_op_cnts[] = {
 128	INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
 129};
 130
 131static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
 132	INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
 133};
 134
 135static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
 136	INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
 137};
 138
 139static int mlx5_ib_read_counters(struct ib_counters *counters,
 140				 struct ib_counters_read_attr *read_attr,
 141				 struct uverbs_attr_bundle *attrs)
 142{
 143	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
 144	struct mlx5_read_counters_attr mread_attr = {};
 145	struct mlx5_ib_flow_counters_desc *desc;
 146	int ret, i;
 147
 148	mutex_lock(&mcounters->mcntrs_mutex);
 149	if (mcounters->cntrs_max_index > read_attr->ncounters) {
 150		ret = -EINVAL;
 151		goto err_bound;
 152	}
 153
 154	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
 155				 GFP_KERNEL);
 156	if (!mread_attr.out) {
 157		ret = -ENOMEM;
 158		goto err_bound;
 159	}
 160
 161	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
 162	mread_attr.flags = read_attr->flags;
 163	ret = mcounters->read_counters(counters->device, &mread_attr);
 164	if (ret)
 165		goto err_read;
 166
 167	/* do the pass over the counters data array to assign according to the
 168	 * descriptions and indexing pairs
 169	 */
 170	desc = mcounters->counters_data;
 171	for (i = 0; i < mcounters->ncounters; i++)
 172		read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
 173
 174err_read:
 175	kfree(mread_attr.out);
 176err_bound:
 177	mutex_unlock(&mcounters->mcntrs_mutex);
 178	return ret;
 179}
 180
 181static int mlx5_ib_destroy_counters(struct ib_counters *counters)
 182{
 183	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
 184
 185	mlx5_ib_counters_clear_description(counters);
 186	if (mcounters->hw_cntrs_hndl)
 187		mlx5_fc_destroy(to_mdev(counters->device)->mdev,
 188				mcounters->hw_cntrs_hndl);
 189	return 0;
 190}
 191
 192static int mlx5_ib_create_counters(struct ib_counters *counters,
 193				   struct uverbs_attr_bundle *attrs)
 194{
 195	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
 196
 197	mutex_init(&mcounters->mcntrs_mutex);
 198	return 0;
 199}
 200
 201static bool vport_qcounters_supported(struct mlx5_ib_dev *dev)
 202{
 203	return MLX5_CAP_GEN(dev->mdev, q_counter_other_vport) &&
 204	       MLX5_CAP_GEN(dev->mdev, q_counter_aggregation);
 205}
 206
 207static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
 208						   u32 port_num)
 209{
 210	if ((is_mdev_switchdev_mode(dev->mdev) &&
 211	     !vport_qcounters_supported(dev)) || !port_num)
 212		return &dev->port[0].cnts;
 213
 214	return is_mdev_switchdev_mode(dev->mdev) ?
 215	       &dev->port[1].cnts : &dev->port[port_num - 1].cnts;
 216}
 217
 218/**
 219 * mlx5_ib_get_counters_id - Returns counters id to use for device+port
 220 * @dev:	Pointer to mlx5 IB device
 221 * @port_num:	Zero based port number
 222 *
 223 * mlx5_ib_get_counters_id() Returns counters set id to use for given
 224 * device port combination in switchdev and non switchdev mode of the
 225 * parent device.
 226 */
 227u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
 228{
 229	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num + 1);
 230
 231	return cnts->set_id;
 232}
 233
 234static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
 235{
 236	struct rdma_hw_stats *stats;
 237	u32 num_hw_counters;
 238	int i;
 239
 240	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
 241			  cnts->num_ext_ppcnt_counters;
 242	stats = rdma_alloc_hw_stats_struct(cnts->descs,
 243					   num_hw_counters +
 244					   cnts->num_op_counters,
 245					   RDMA_HW_STATS_DEFAULT_LIFESPAN);
 246	if (!stats)
 247		return NULL;
 248
 249	for (i = 0; i < cnts->num_op_counters; i++)
 250		set_bit(num_hw_counters + i, stats->is_disabled);
 251
 252	return stats;
 253}
 254
 255static struct rdma_hw_stats *
 256mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
 257{
 258	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 259	const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
 260
 261	return do_alloc_stats(cnts);
 262}
 263
 264static struct rdma_hw_stats *
 265mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
 266{
 267	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 268	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
 269
 270	return do_alloc_stats(cnts);
 271}
 272
 273static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
 274				    const struct mlx5_ib_counters *cnts,
 275				    struct rdma_hw_stats *stats,
 276				    u16 set_id)
 277{
 278	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
 279	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
 280	__be32 val;
 281	int ret, i;
 282
 283	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
 284	MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
 285	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
 286	if (ret)
 287		return ret;
 288
 289	for (i = 0; i < cnts->num_q_counters; i++) {
 290		val = *(__be32 *)((void *)out + cnts->offsets[i]);
 291		stats->value[i] = (u64)be32_to_cpu(val);
 292	}
 293
 294	return 0;
 295}
 296
 297static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
 298					    const struct mlx5_ib_counters *cnts,
 299					    struct rdma_hw_stats *stats)
 300{
 301	int offset = cnts->num_q_counters + cnts->num_cong_counters;
 302	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
 303	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
 304	int ret, i;
 305	void *out;
 306
 307	out = kvzalloc(sz, GFP_KERNEL);
 308	if (!out)
 309		return -ENOMEM;
 310
 311	MLX5_SET(ppcnt_reg, in, local_port, 1);
 312	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
 313	ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
 314				   0, 0);
 315	if (ret)
 316		goto free;
 317
 318	for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
 319		stats->value[i + offset] =
 320			be64_to_cpup((__be64 *)(out +
 321				    cnts->offsets[i + offset]));
 322free:
 323	kvfree(out);
 324	return ret;
 325}
 326
 327static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
 328					  u32 port_num,
 329					  const struct mlx5_ib_counters *cnts,
 330					  struct rdma_hw_stats *stats)
 331
 332{
 333	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
 334	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
 335	struct mlx5_core_dev *mdev;
 336	__be32 val;
 337	int ret, i;
 338
 339	if (!dev->port[port_num].rep ||
 340	    dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
 341		return 0;
 342
 343	mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
 344	if (!mdev)
 345		return -EOPNOTSUPP;
 346
 347	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
 348	MLX5_SET(query_q_counter_in, in, other_vport, 1);
 349	MLX5_SET(query_q_counter_in, in, vport_number,
 350		 dev->port[port_num].rep->vport);
 351	MLX5_SET(query_q_counter_in, in, aggregate, 1);
 352	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
 353	if (ret)
 354		return ret;
 355
 356	for (i = 0; i < cnts->num_q_counters; i++) {
 357		val = *(__be32 *)((void *)out + cnts->offsets[i]);
 358		stats->value[i] = (u64)be32_to_cpu(val);
 359	}
 360
 361	return 0;
 362}
 363
 364static int do_get_hw_stats(struct ib_device *ibdev,
 365			   struct rdma_hw_stats *stats,
 366			   u32 port_num, int index)
 367{
 368	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 369	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
 370	struct mlx5_core_dev *mdev;
 371	int ret, num_counters;
 372
 373	if (!stats)
 374		return -EINVAL;
 375
 376	num_counters = cnts->num_q_counters +
 377		       cnts->num_cong_counters +
 378		       cnts->num_ext_ppcnt_counters;
 379
 380	if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
 381		ret = mlx5_ib_query_q_counters_vport(dev, port_num - 1, cnts,
 382						     stats);
 383	else
 384		ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats,
 385					       cnts->set_id);
 386	if (ret)
 387		return ret;
 388
 389	/* We don't expose device counters over Vports */
 390	if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
 391		goto done;
 392
 393	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
 394		ret =  mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
 395		if (ret)
 396			return ret;
 397	}
 398
 399	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
 400		if (!port_num)
 401			port_num = 1;
 402		mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
 403		if (!mdev) {
 404			/* If port is not affiliated yet, its in down state
 405			 * which doesn't have any counters yet, so it would be
 406			 * zero. So no need to read from the HCA.
 407			 */
 408			goto done;
 409		}
 410		ret = mlx5_lag_query_cong_counters(dev->mdev,
 411						   stats->value +
 412						   cnts->num_q_counters,
 413						   cnts->num_cong_counters,
 414						   cnts->offsets +
 415						   cnts->num_q_counters);
 416
 417		mlx5_ib_put_native_port_mdev(dev, port_num);
 418		if (ret)
 419			return ret;
 420	}
 421
 422done:
 423	return num_counters;
 424}
 425
 426static int do_get_op_stat(struct ib_device *ibdev,
 427			  struct rdma_hw_stats *stats,
 428			  u32 port_num, int index)
 429{
 430	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 431	const struct mlx5_ib_counters *cnts;
 432	const struct mlx5_ib_op_fc *opfcs;
 433	u64 packets = 0, bytes;
 434	u32 type;
 435	int ret;
 436
 437	cnts = get_counters(dev, port_num);
 438
 439	opfcs = cnts->opfcs;
 440	type = *(u32 *)cnts->descs[index].priv;
 441	if (type >= MLX5_IB_OPCOUNTER_MAX)
 442		return -EINVAL;
 443
 444	if (!opfcs[type].fc)
 445		goto out;
 446
 447	ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
 448			    &packets, &bytes);
 449	if (ret)
 450		return ret;
 451
 452out:
 453	stats->value[index] = packets;
 454	return index;
 455}
 456
 457static int do_get_op_stats(struct ib_device *ibdev,
 458			   struct rdma_hw_stats *stats,
 459			   u32 port_num)
 460{
 461	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 462	const struct mlx5_ib_counters *cnts;
 463	int index, ret, num_hw_counters;
 464
 465	cnts = get_counters(dev, port_num);
 466	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
 467			  cnts->num_ext_ppcnt_counters;
 468	for (index = num_hw_counters;
 469	     index < (num_hw_counters + cnts->num_op_counters); index++) {
 470		ret = do_get_op_stat(ibdev, stats, port_num, index);
 471		if (ret != index)
 472			return ret;
 473	}
 474
 475	return cnts->num_op_counters;
 476}
 477
 478static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
 479				struct rdma_hw_stats *stats,
 480				u32 port_num, int index)
 481{
 482	int num_counters, num_hw_counters, num_op_counters;
 483	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 484	const struct mlx5_ib_counters *cnts;
 485
 486	cnts = get_counters(dev, port_num);
 487	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
 488		cnts->num_ext_ppcnt_counters;
 489	num_counters = num_hw_counters + cnts->num_op_counters;
 490
 491	if (index < 0 || index > num_counters)
 492		return -EINVAL;
 493	else if (index > 0 && index < num_hw_counters)
 494		return do_get_hw_stats(ibdev, stats, port_num, index);
 495	else if (index >= num_hw_counters && index < num_counters)
 496		return do_get_op_stat(ibdev, stats, port_num, index);
 497
 498	num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
 499	if (num_hw_counters < 0)
 500		return num_hw_counters;
 501
 502	num_op_counters = do_get_op_stats(ibdev, stats, port_num);
 503	if (num_op_counters < 0)
 504		return num_op_counters;
 505
 506	return num_hw_counters + num_op_counters;
 507}
 508
 509static struct rdma_hw_stats *
 510mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
 511{
 512	struct mlx5_ib_dev *dev = to_mdev(counter->device);
 513	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
 514
 515	return do_alloc_stats(cnts);
 516}
 517
 518static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
 519{
 520	struct mlx5_ib_dev *dev = to_mdev(counter->device);
 521	const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
 522
 523	return mlx5_ib_query_q_counters(dev->mdev, cnts,
 524					counter->stats, counter->id);
 525}
 526
 527static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
 528{
 529	struct mlx5_ib_dev *dev = to_mdev(counter->device);
 530	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
 531
 532	if (!counter->id)
 533		return 0;
 534
 535	MLX5_SET(dealloc_q_counter_in, in, opcode,
 536		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
 537	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
 538	return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
 539}
 540
 541static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
 542				   struct ib_qp *qp)
 543{
 544	struct mlx5_ib_dev *dev = to_mdev(qp->device);
 545	int err;
 546
 547	if (!counter->id) {
 548		u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
 549		u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
 550
 551		MLX5_SET(alloc_q_counter_in, in, opcode,
 552			 MLX5_CMD_OP_ALLOC_Q_COUNTER);
 553		MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
 554		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
 555		if (err)
 556			return err;
 557		counter->id =
 558			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
 559	}
 560
 561	err = mlx5_ib_qp_set_counter(qp, counter);
 562	if (err)
 563		goto fail_set_counter;
 564
 565	return 0;
 566
 567fail_set_counter:
 568	mlx5_ib_counter_dealloc(counter);
 569	counter->id = 0;
 570
 571	return err;
 572}
 573
 574static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
 575{
 576	return mlx5_ib_qp_set_counter(qp, NULL);
 577}
 578
 579static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
 580				  struct rdma_stat_desc *descs, size_t *offsets,
 581				  u32 port_num)
 582{
 583	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
 584			port_num != MLX5_VPORT_PF;
 585	const struct mlx5_ib_counter *names;
 586	int j = 0, i, size;
 587
 588	names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
 589	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
 590			  ARRAY_SIZE(basic_q_cnts);
 591	for (i = 0; i < size; i++, j++) {
 592		descs[j].name = names[i].name;
 593		offsets[j] = names[i].offset;
 594	}
 595
 596	names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
 597	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
 598			  ARRAY_SIZE(out_of_seq_q_cnts);
 599	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
 600		for (i = 0; i < size; i++, j++) {
 601			descs[j].name = names[i].name;
 602			offsets[j] = names[i].offset;
 603		}
 604	}
 605
 606	names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
 607	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
 608			  ARRAY_SIZE(retrans_q_cnts);
 609	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
 610		for (i = 0; i < size; i++, j++) {
 611			descs[j].name = names[i].name;
 612			offsets[j] = names[i].offset;
 613		}
 614	}
 615
 616	names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
 617	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
 618			  ARRAY_SIZE(extended_err_cnts);
 619	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
 620		for (i = 0; i < size; i++, j++) {
 621			descs[j].name = names[i].name;
 622			offsets[j] = names[i].offset;
 623		}
 624	}
 625
 626	names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
 627	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
 628			  ARRAY_SIZE(roce_accl_cnts);
 629	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
 630		for (i = 0; i < size; i++, j++) {
 631			descs[j].name = names[i].name;
 632			offsets[j] = names[i].offset;
 633		}
 634	}
 635
 636	if (is_vport)
 637		return;
 638
 639	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
 640		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
 641			descs[j].name = cong_cnts[i].name;
 642			offsets[j] = cong_cnts[i].offset;
 643		}
 644	}
 645
 646	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
 647		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
 648			descs[j].name = ext_ppcnt_cnts[i].name;
 649			offsets[j] = ext_ppcnt_cnts[i].offset;
 650		}
 651	}
 652
 653	for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
 654		descs[j].name = basic_op_cnts[i].name;
 655		descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
 656		descs[j].priv = &basic_op_cnts[i].type;
 657	}
 658
 659	if (MLX5_CAP_FLOWTABLE(dev->mdev,
 660			       ft_field_support_2_nic_receive_rdma.bth_opcode)) {
 661		for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
 662			descs[j].name = rdmarx_cnp_op_cnts[i].name;
 663			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
 664			descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
 665		}
 666	}
 667
 668	if (MLX5_CAP_FLOWTABLE(dev->mdev,
 669			       ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
 670		for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
 671			descs[j].name = rdmatx_cnp_op_cnts[i].name;
 672			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
 673			descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
 674		}
 675	}
 676}
 677
 678
 679static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
 680				    struct mlx5_ib_counters *cnts, u32 port_num)
 681{
 682	bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
 683			port_num != MLX5_VPORT_PF;
 684	u32 num_counters, num_op_counters = 0, size;
 685
 686	size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
 687			  ARRAY_SIZE(basic_q_cnts);
 688	num_counters = size;
 689
 690	size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
 691			  ARRAY_SIZE(out_of_seq_q_cnts);
 692	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
 693		num_counters += size;
 694
 695	size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
 696			  ARRAY_SIZE(retrans_q_cnts);
 697	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
 698		num_counters += size;
 699
 700	size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
 701			  ARRAY_SIZE(extended_err_cnts);
 702	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
 703		num_counters += size;
 704
 705	size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
 706			  ARRAY_SIZE(roce_accl_cnts);
 707	if (MLX5_CAP_GEN(dev->mdev, roce_accl))
 708		num_counters += size;
 709
 710	cnts->num_q_counters = num_counters;
 711
 712	if (is_vport)
 713		goto skip_non_qcounters;
 714
 715	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
 716		cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
 717		num_counters += ARRAY_SIZE(cong_cnts);
 718	}
 719	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
 720		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
 721		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
 722	}
 723
 724	num_op_counters = ARRAY_SIZE(basic_op_cnts);
 725
 726	if (MLX5_CAP_FLOWTABLE(dev->mdev,
 727			       ft_field_support_2_nic_receive_rdma.bth_opcode))
 728		num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
 729
 730	if (MLX5_CAP_FLOWTABLE(dev->mdev,
 731			       ft_field_support_2_nic_transmit_rdma.bth_opcode))
 732		num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
 733
 734skip_non_qcounters:
 735	cnts->num_op_counters = num_op_counters;
 736	num_counters += num_op_counters;
 737	cnts->descs = kcalloc(num_counters,
 738			      sizeof(struct rdma_stat_desc), GFP_KERNEL);
 739	if (!cnts->descs)
 740		return -ENOMEM;
 741
 742	cnts->offsets = kcalloc(num_counters,
 743				sizeof(*cnts->offsets), GFP_KERNEL);
 744	if (!cnts->offsets)
 745		goto err;
 746
 747	return 0;
 748
 749err:
 750	kfree(cnts->descs);
 751	cnts->descs = NULL;
 752	return -ENOMEM;
 753}
 754
 755static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
 756{
 757	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
 758	int num_cnt_ports = dev->num_ports;
 759	int i, j;
 760
 761	if (is_mdev_switchdev_mode(dev->mdev))
 762		num_cnt_ports = min(2, num_cnt_ports);
 763
 764	MLX5_SET(dealloc_q_counter_in, in, opcode,
 765		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
 766
 767	for (i = 0; i < num_cnt_ports; i++) {
 768		if (dev->port[i].cnts.set_id) {
 769			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
 770				 dev->port[i].cnts.set_id);
 771			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
 772		}
 773		kfree(dev->port[i].cnts.descs);
 774		kfree(dev->port[i].cnts.offsets);
 775
 776		for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
 777			if (!dev->port[i].cnts.opfcs[j].fc)
 778				continue;
 779
 780			if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
 781				mlx5_ib_fs_remove_op_fc(dev,
 782					&dev->port[i].cnts.opfcs[j], j);
 783			mlx5_fc_destroy(dev->mdev,
 784					dev->port[i].cnts.opfcs[j].fc);
 785			dev->port[i].cnts.opfcs[j].fc = NULL;
 786		}
 787	}
 788}
 789
 790static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
 791{
 792	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
 793	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
 794	int num_cnt_ports = dev->num_ports;
 795	int err = 0;
 796	int i;
 797	bool is_shared;
 798
 799	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
 800	is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
 801
 802	/*
 803	 * In switchdev we need to allocate two ports, one that is used for
 804	 * the device Q_counters and it is essentially the real Q_counters of
 805	 * this device, while the other is used as a helper for PF to be able to
 806	 * query all other vports.
 807	 */
 808	if (is_mdev_switchdev_mode(dev->mdev))
 809		num_cnt_ports = min(2, num_cnt_ports);
 810
 811	for (i = 0; i < num_cnt_ports; i++) {
 812		err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
 813		if (err)
 814			goto err_alloc;
 815
 816		mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
 817				      dev->port[i].cnts.offsets, i);
 818
 819		MLX5_SET(alloc_q_counter_in, in, uid,
 820			 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
 821
 822		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
 823		if (err) {
 824			mlx5_ib_warn(dev,
 825				     "couldn't allocate queue counter for port %d, err %d\n",
 826				     i + 1, err);
 827			goto err_alloc;
 828		}
 829
 830		dev->port[i].cnts.set_id =
 831			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
 832	}
 833	return 0;
 834
 835err_alloc:
 836	mlx5_ib_dealloc_counters(dev);
 837	return err;
 838}
 839
 840static int read_flow_counters(struct ib_device *ibdev,
 841			      struct mlx5_read_counters_attr *read_attr)
 842{
 843	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
 844	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 845
 846	return mlx5_fc_query(dev->mdev, fc,
 847			     &read_attr->out[IB_COUNTER_PACKETS],
 848			     &read_attr->out[IB_COUNTER_BYTES]);
 849}
 850
 851/* flow counters currently expose two counters packets and bytes */
 852#define FLOW_COUNTERS_NUM 2
 853static int counters_set_description(
 854	struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
 855	struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
 856{
 857	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
 858	u32 cntrs_max_index = 0;
 859	int i;
 860
 861	if (counters_type != MLX5_IB_COUNTERS_FLOW)
 862		return -EINVAL;
 863
 864	/* init the fields for the object */
 865	mcounters->type = counters_type;
 866	mcounters->read_counters = read_flow_counters;
 867	mcounters->counters_num = FLOW_COUNTERS_NUM;
 868	mcounters->ncounters = ncounters;
 869	/* each counter entry have both description and index pair */
 870	for (i = 0; i < ncounters; i++) {
 871		if (desc_data[i].description > IB_COUNTER_BYTES)
 872			return -EINVAL;
 873
 874		if (cntrs_max_index <= desc_data[i].index)
 875			cntrs_max_index = desc_data[i].index + 1;
 876	}
 877
 878	mutex_lock(&mcounters->mcntrs_mutex);
 879	mcounters->counters_data = desc_data;
 880	mcounters->cntrs_max_index = cntrs_max_index;
 881	mutex_unlock(&mcounters->mcntrs_mutex);
 882
 883	return 0;
 884}
 885
 886#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
 887int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
 888				   struct mlx5_ib_create_flow *ucmd)
 889{
 890	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
 891	struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
 892	struct mlx5_ib_flow_counters_desc *desc_data = NULL;
 893	bool hw_hndl = false;
 894	int ret = 0;
 895
 896	if (ucmd && ucmd->ncounters_data != 0) {
 897		cntrs_data = ucmd->data;
 898		if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
 899			return -EINVAL;
 900
 901		desc_data = kcalloc(cntrs_data->ncounters,
 902				    sizeof(*desc_data),
 903				    GFP_KERNEL);
 904		if (!desc_data)
 905			return  -ENOMEM;
 906
 907		if (copy_from_user(desc_data,
 908				   u64_to_user_ptr(cntrs_data->counters_data),
 909				   sizeof(*desc_data) * cntrs_data->ncounters)) {
 910			ret = -EFAULT;
 911			goto free;
 912		}
 913	}
 914
 915	if (!mcounters->hw_cntrs_hndl) {
 916		mcounters->hw_cntrs_hndl = mlx5_fc_create(
 917			to_mdev(ibcounters->device)->mdev, false);
 918		if (IS_ERR(mcounters->hw_cntrs_hndl)) {
 919			ret = PTR_ERR(mcounters->hw_cntrs_hndl);
 920			goto free;
 921		}
 922		hw_hndl = true;
 923	}
 924
 925	if (desc_data) {
 926		/* counters already bound to at least one flow */
 927		if (mcounters->cntrs_max_index) {
 928			ret = -EINVAL;
 929			goto free_hndl;
 930		}
 931
 932		ret = counters_set_description(ibcounters,
 933					       MLX5_IB_COUNTERS_FLOW,
 934					       desc_data,
 935					       cntrs_data->ncounters);
 936		if (ret)
 937			goto free_hndl;
 938
 939	} else if (!mcounters->cntrs_max_index) {
 940		/* counters not bound yet, must have udata passed */
 941		ret = -EINVAL;
 942		goto free_hndl;
 943	}
 944
 945	return 0;
 946
 947free_hndl:
 948	if (hw_hndl) {
 949		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
 950				mcounters->hw_cntrs_hndl);
 951		mcounters->hw_cntrs_hndl = NULL;
 952	}
 953free:
 954	kfree(desc_data);
 955	return ret;
 956}
 957
 958void mlx5_ib_counters_clear_description(struct ib_counters *counters)
 959{
 960	struct mlx5_ib_mcounters *mcounters;
 961
 962	if (!counters || atomic_read(&counters->usecnt) != 1)
 963		return;
 964
 965	mcounters = to_mcounters(counters);
 966
 967	mutex_lock(&mcounters->mcntrs_mutex);
 968	kfree(mcounters->counters_data);
 969	mcounters->counters_data = NULL;
 970	mcounters->cntrs_max_index = 0;
 971	mutex_unlock(&mcounters->mcntrs_mutex);
 972}
 973
 974static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
 975			       unsigned int index, bool enable)
 976{
 977	struct mlx5_ib_dev *dev = to_mdev(device);
 978	struct mlx5_ib_counters *cnts;
 979	struct mlx5_ib_op_fc *opfc;
 980	u32 num_hw_counters, type;
 981	int ret;
 982
 983	cnts = &dev->port[port - 1].cnts;
 984	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
 985		cnts->num_ext_ppcnt_counters;
 986	if (index < num_hw_counters ||
 987	    index >= (num_hw_counters + cnts->num_op_counters))
 988		return -EINVAL;
 989
 990	if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
 991		return -EINVAL;
 992
 993	type = *(u32 *)cnts->descs[index].priv;
 994	if (type >= MLX5_IB_OPCOUNTER_MAX)
 995		return -EINVAL;
 996
 997	opfc = &cnts->opfcs[type];
 998
 999	if (enable) {
1000		if (opfc->fc)
1001			return -EEXIST;
1002
1003		opfc->fc = mlx5_fc_create(dev->mdev, false);
1004		if (IS_ERR(opfc->fc))
1005			return PTR_ERR(opfc->fc);
1006
1007		ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
1008		if (ret) {
1009			mlx5_fc_destroy(dev->mdev, opfc->fc);
1010			opfc->fc = NULL;
1011		}
1012		return ret;
1013	}
1014
1015	if (!opfc->fc)
1016		return -EINVAL;
1017
1018	mlx5_ib_fs_remove_op_fc(dev, opfc, type);
1019	mlx5_fc_destroy(dev->mdev, opfc->fc);
1020	opfc->fc = NULL;
1021	return 0;
1022}
1023
1024static const struct ib_device_ops hw_stats_ops = {
1025	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1026	.get_hw_stats = mlx5_ib_get_hw_stats,
1027	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1028	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1029	.counter_dealloc = mlx5_ib_counter_dealloc,
1030	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1031	.counter_update_stats = mlx5_ib_counter_update_stats,
1032	.modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
1033			  mlx5_ib_modify_stat : NULL,
1034};
1035
1036static const struct ib_device_ops hw_switchdev_vport_op = {
1037	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
1038};
1039
1040static const struct ib_device_ops hw_switchdev_stats_ops = {
1041	.alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
1042	.get_hw_stats = mlx5_ib_get_hw_stats,
1043	.counter_bind_qp = mlx5_ib_counter_bind_qp,
1044	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
1045	.counter_dealloc = mlx5_ib_counter_dealloc,
1046	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
1047	.counter_update_stats = mlx5_ib_counter_update_stats,
1048};
1049
1050static const struct ib_device_ops counters_ops = {
1051	.create_counters = mlx5_ib_create_counters,
1052	.destroy_counters = mlx5_ib_destroy_counters,
1053	.read_counters = mlx5_ib_read_counters,
1054
1055	INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
1056};
1057
1058int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
1059{
1060	ib_set_device_ops(&dev->ib_dev, &counters_ops);
1061
1062	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1063		return 0;
1064
1065	if (is_mdev_switchdev_mode(dev->mdev)) {
1066		ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
1067		if (vport_qcounters_supported(dev))
1068			ib_set_device_ops(&dev->ib_dev, &hw_switchdev_vport_op);
1069	} else
1070		ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
1071	return mlx5_ib_alloc_counters(dev);
1072}
1073
1074void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
1075{
1076	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
1077		return;
1078
1079	mlx5_ib_dealloc_counters(dev);
1080}