Linux Audio

Check our new training course

Loading...
v6.8
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
   4 */
   5
   6#include <rdma/ib_user_verbs.h>
   7#include <rdma/ib_verbs.h>
   8#include <rdma/uverbs_types.h>
   9#include <rdma/uverbs_ioctl.h>
  10#include <rdma/uverbs_std_types.h>
  11#include <rdma/mlx5_user_ioctl_cmds.h>
  12#include <rdma/mlx5_user_ioctl_verbs.h>
  13#include <rdma/ib_hdrs.h>
  14#include <rdma/ib_umem.h>
  15#include <linux/mlx5/driver.h>
  16#include <linux/mlx5/fs.h>
  17#include <linux/mlx5/fs_helpers.h>
  18#include <linux/mlx5/eswitch.h>
  19#include <net/inet_ecn.h>
  20#include "mlx5_ib.h"
  21#include "counters.h"
  22#include "devx.h"
  23#include "fs.h"
  24
  25#define UVERBS_MODULE_NAME mlx5_ib
  26#include <rdma/uverbs_named_ioctl.h>
  27
  28enum {
  29	MATCH_CRITERIA_ENABLE_OUTER_BIT,
  30	MATCH_CRITERIA_ENABLE_MISC_BIT,
  31	MATCH_CRITERIA_ENABLE_INNER_BIT,
  32	MATCH_CRITERIA_ENABLE_MISC2_BIT
  33};
  34
  35#define HEADER_IS_ZERO(match_criteria, headers)			           \
  36	!(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
  37		    0, MLX5_FLD_SZ_BYTES(fte_match_param, headers)))       \
  38
  39static u8 get_match_criteria_enable(u32 *match_criteria)
  40{
  41	u8 match_criteria_enable;
  42
  43	match_criteria_enable =
  44		(!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
  45		MATCH_CRITERIA_ENABLE_OUTER_BIT;
  46	match_criteria_enable |=
  47		(!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
  48		MATCH_CRITERIA_ENABLE_MISC_BIT;
  49	match_criteria_enable |=
  50		(!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
  51		MATCH_CRITERIA_ENABLE_INNER_BIT;
  52	match_criteria_enable |=
  53		(!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
  54		MATCH_CRITERIA_ENABLE_MISC2_BIT;
  55
  56	return match_criteria_enable;
  57}
  58
  59static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
  60{
  61	u8 entry_mask;
  62	u8 entry_val;
  63	int err = 0;
  64
  65	if (!mask)
  66		goto out;
  67
  68	entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
  69			      ip_protocol);
  70	entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
  71			     ip_protocol);
  72	if (!entry_mask) {
  73		MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
  74		MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
  75		goto out;
  76	}
  77	/* Don't override existing ip protocol */
  78	if (mask != entry_mask || val != entry_val)
  79		err = -EINVAL;
  80out:
  81	return err;
  82}
  83
  84static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
  85			   bool inner)
  86{
  87	if (inner) {
  88		MLX5_SET(fte_match_set_misc,
  89			 misc_c, inner_ipv6_flow_label, mask);
  90		MLX5_SET(fte_match_set_misc,
  91			 misc_v, inner_ipv6_flow_label, val);
  92	} else {
  93		MLX5_SET(fte_match_set_misc,
  94			 misc_c, outer_ipv6_flow_label, mask);
  95		MLX5_SET(fte_match_set_misc,
  96			 misc_v, outer_ipv6_flow_label, val);
  97	}
  98}
  99
 100static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 101{
 102	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
 103	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
 104	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
 105	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
 106}
 107
 108static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
 109{
 110	if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
 111	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
 112		return -EOPNOTSUPP;
 113
 114	if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
 115	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
 116		return -EOPNOTSUPP;
 117
 118	if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
 119	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
 120		return -EOPNOTSUPP;
 121
 122	if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
 123	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
 124		return -EOPNOTSUPP;
 125
 126	return 0;
 127}
 128
 129#define LAST_ETH_FIELD vlan_tag
 130#define LAST_IPV4_FIELD tos
 131#define LAST_IPV6_FIELD traffic_class
 132#define LAST_TCP_UDP_FIELD src_port
 133#define LAST_TUNNEL_FIELD tunnel_id
 134#define LAST_FLOW_TAG_FIELD tag_id
 135#define LAST_DROP_FIELD size
 136#define LAST_COUNTERS_FIELD counters
 137
 138/* Field is the last supported field */
 139#define FIELDS_NOT_SUPPORTED(filter, field)                                    \
 140	memchr_inv((void *)&filter.field + sizeof(filter.field), 0,            \
 141		   sizeof(filter) - offsetofend(typeof(filter), field))
 142
 143int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
 144			   bool is_egress,
 145			   struct mlx5_flow_act *action)
 146{
 147
 148	switch (maction->ib_action.type) {
 149	case IB_FLOW_ACTION_UNSPECIFIED:
 150		if (maction->flow_action_raw.sub_type ==
 151		    MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
 152			if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 153				return -EINVAL;
 154			action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 155			action->modify_hdr =
 156				maction->flow_action_raw.modify_hdr;
 157			return 0;
 158		}
 159		if (maction->flow_action_raw.sub_type ==
 160		    MLX5_IB_FLOW_ACTION_DECAP) {
 161			if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
 162				return -EINVAL;
 163			action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
 164			return 0;
 165		}
 166		if (maction->flow_action_raw.sub_type ==
 167		    MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
 168			if (action->action &
 169			    MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
 170				return -EINVAL;
 171			action->action |=
 172				MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
 173			action->pkt_reformat =
 174				maction->flow_action_raw.pkt_reformat;
 175			return 0;
 176		}
 177		fallthrough;
 178	default:
 179		return -EOPNOTSUPP;
 180	}
 181}
 182
 183static int parse_flow_attr(struct mlx5_core_dev *mdev,
 184			   struct mlx5_flow_spec *spec,
 185			   const union ib_flow_spec *ib_spec,
 186			   const struct ib_flow_attr *flow_attr,
 187			   struct mlx5_flow_act *action, u32 prev_type)
 188{
 189	struct mlx5_flow_context *flow_context = &spec->flow_context;
 190	u32 *match_c = spec->match_criteria;
 191	u32 *match_v = spec->match_value;
 192	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
 193					   misc_parameters);
 194	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
 195					   misc_parameters);
 196	void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
 197					    misc_parameters_2);
 198	void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
 199					    misc_parameters_2);
 200	void *headers_c;
 201	void *headers_v;
 202	int match_ipv;
 203	int ret;
 204
 205	if (ib_spec->type & IB_FLOW_SPEC_INNER) {
 206		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
 207					 inner_headers);
 208		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
 209					 inner_headers);
 210		match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 211					ft_field_support.inner_ip_version);
 212	} else {
 213		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
 214					 outer_headers);
 215		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
 216					 outer_headers);
 217		match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 218					ft_field_support.outer_ip_version);
 219	}
 220
 221	switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
 222	case IB_FLOW_SPEC_ETH:
 223		if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
 224			return -EOPNOTSUPP;
 225
 226		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 227					     dmac_47_16),
 228				ib_spec->eth.mask.dst_mac);
 229		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 230					     dmac_47_16),
 231				ib_spec->eth.val.dst_mac);
 232
 233		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 234					     smac_47_16),
 235				ib_spec->eth.mask.src_mac);
 236		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 237					     smac_47_16),
 238				ib_spec->eth.val.src_mac);
 239
 240		if (ib_spec->eth.mask.vlan_tag) {
 241			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 242				 cvlan_tag, 1);
 243			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 244				 cvlan_tag, 1);
 245
 246			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 247				 first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
 248			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 249				 first_vid, ntohs(ib_spec->eth.val.vlan_tag));
 250
 251			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 252				 first_cfi,
 253				 ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
 254			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 255				 first_cfi,
 256				 ntohs(ib_spec->eth.val.vlan_tag) >> 12);
 257
 258			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 259				 first_prio,
 260				 ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
 261			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 262				 first_prio,
 263				 ntohs(ib_spec->eth.val.vlan_tag) >> 13);
 264		}
 265		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 266			 ethertype, ntohs(ib_spec->eth.mask.ether_type));
 267		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 268			 ethertype, ntohs(ib_spec->eth.val.ether_type));
 269		break;
 270	case IB_FLOW_SPEC_IPV4:
 271		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
 272			return -EOPNOTSUPP;
 273
 274		if (match_ipv) {
 275			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 276				 ip_version, 0xf);
 277			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 278				 ip_version, MLX5_FS_IPV4_VERSION);
 279		} else {
 280			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 281				 ethertype, 0xffff);
 282			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 283				 ethertype, ETH_P_IP);
 284		}
 285
 286		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 287				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 288		       &ib_spec->ipv4.mask.src_ip,
 289		       sizeof(ib_spec->ipv4.mask.src_ip));
 290		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 291				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 292		       &ib_spec->ipv4.val.src_ip,
 293		       sizeof(ib_spec->ipv4.val.src_ip));
 294		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 295				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 296		       &ib_spec->ipv4.mask.dst_ip,
 297		       sizeof(ib_spec->ipv4.mask.dst_ip));
 298		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 299				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 300		       &ib_spec->ipv4.val.dst_ip,
 301		       sizeof(ib_spec->ipv4.val.dst_ip));
 302
 303		set_tos(headers_c, headers_v,
 304			ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
 305
 306		if (set_proto(headers_c, headers_v,
 307			      ib_spec->ipv4.mask.proto,
 308			      ib_spec->ipv4.val.proto))
 309			return -EINVAL;
 310		break;
 311	case IB_FLOW_SPEC_IPV6:
 312		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
 313			return -EOPNOTSUPP;
 314
 315		if (match_ipv) {
 316			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 317				 ip_version, 0xf);
 318			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 319				 ip_version, MLX5_FS_IPV6_VERSION);
 320		} else {
 321			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 322				 ethertype, 0xffff);
 323			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 324				 ethertype, ETH_P_IPV6);
 325		}
 326
 327		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 328				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 329		       &ib_spec->ipv6.mask.src_ip,
 330		       sizeof(ib_spec->ipv6.mask.src_ip));
 331		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 332				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 333		       &ib_spec->ipv6.val.src_ip,
 334		       sizeof(ib_spec->ipv6.val.src_ip));
 335		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 336				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 337		       &ib_spec->ipv6.mask.dst_ip,
 338		       sizeof(ib_spec->ipv6.mask.dst_ip));
 339		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 340				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 341		       &ib_spec->ipv6.val.dst_ip,
 342		       sizeof(ib_spec->ipv6.val.dst_ip));
 343
 344		set_tos(headers_c, headers_v,
 345			ib_spec->ipv6.mask.traffic_class,
 346			ib_spec->ipv6.val.traffic_class);
 347
 348		if (set_proto(headers_c, headers_v,
 349			      ib_spec->ipv6.mask.next_hdr,
 350			      ib_spec->ipv6.val.next_hdr))
 351			return -EINVAL;
 352
 353		set_flow_label(misc_params_c, misc_params_v,
 354			       ntohl(ib_spec->ipv6.mask.flow_label),
 355			       ntohl(ib_spec->ipv6.val.flow_label),
 356			       ib_spec->type & IB_FLOW_SPEC_INNER);
 357		break;
 358	case IB_FLOW_SPEC_ESP:
 359		return -EOPNOTSUPP;
 360	case IB_FLOW_SPEC_TCP:
 361		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
 362					 LAST_TCP_UDP_FIELD))
 363			return -EOPNOTSUPP;
 364
 365		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
 366			return -EINVAL;
 367
 368		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
 369			 ntohs(ib_spec->tcp_udp.mask.src_port));
 370		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
 371			 ntohs(ib_spec->tcp_udp.val.src_port));
 372
 373		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
 374			 ntohs(ib_spec->tcp_udp.mask.dst_port));
 375		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
 376			 ntohs(ib_spec->tcp_udp.val.dst_port));
 377		break;
 378	case IB_FLOW_SPEC_UDP:
 379		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
 380					 LAST_TCP_UDP_FIELD))
 381			return -EOPNOTSUPP;
 382
 383		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
 384			return -EINVAL;
 385
 386		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
 387			 ntohs(ib_spec->tcp_udp.mask.src_port));
 388		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
 389			 ntohs(ib_spec->tcp_udp.val.src_port));
 390
 391		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
 392			 ntohs(ib_spec->tcp_udp.mask.dst_port));
 393		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
 394			 ntohs(ib_spec->tcp_udp.val.dst_port));
 395		break;
 396	case IB_FLOW_SPEC_GRE:
 397		if (ib_spec->gre.mask.c_ks_res0_ver)
 398			return -EOPNOTSUPP;
 399
 400		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
 401			return -EINVAL;
 402
 403		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 404			 0xff);
 405		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
 406			 IPPROTO_GRE);
 407
 408		MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
 409			 ntohs(ib_spec->gre.mask.protocol));
 410		MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
 411			 ntohs(ib_spec->gre.val.protocol));
 412
 413		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
 414				    gre_key.nvgre.hi),
 415		       &ib_spec->gre.mask.key,
 416		       sizeof(ib_spec->gre.mask.key));
 417		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
 418				    gre_key.nvgre.hi),
 419		       &ib_spec->gre.val.key,
 420		       sizeof(ib_spec->gre.val.key));
 421		break;
 422	case IB_FLOW_SPEC_MPLS:
 423		switch (prev_type) {
 424		case IB_FLOW_SPEC_UDP:
 425			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 426						   ft_field_support.outer_first_mpls_over_udp),
 427						   &ib_spec->mpls.mask.tag))
 428				return -EOPNOTSUPP;
 429
 430			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
 431					    outer_first_mpls_over_udp),
 432			       &ib_spec->mpls.val.tag,
 433			       sizeof(ib_spec->mpls.val.tag));
 434			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
 435					    outer_first_mpls_over_udp),
 436			       &ib_spec->mpls.mask.tag,
 437			       sizeof(ib_spec->mpls.mask.tag));
 438			break;
 439		case IB_FLOW_SPEC_GRE:
 440			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 441						   ft_field_support.outer_first_mpls_over_gre),
 442						   &ib_spec->mpls.mask.tag))
 443				return -EOPNOTSUPP;
 444
 445			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
 446					    outer_first_mpls_over_gre),
 447			       &ib_spec->mpls.val.tag,
 448			       sizeof(ib_spec->mpls.val.tag));
 449			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
 450					    outer_first_mpls_over_gre),
 451			       &ib_spec->mpls.mask.tag,
 452			       sizeof(ib_spec->mpls.mask.tag));
 453			break;
 454		default:
 455			if (ib_spec->type & IB_FLOW_SPEC_INNER) {
 456				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 457							   ft_field_support.inner_first_mpls),
 458							   &ib_spec->mpls.mask.tag))
 459					return -EOPNOTSUPP;
 460
 461				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
 462						    inner_first_mpls),
 463				       &ib_spec->mpls.val.tag,
 464				       sizeof(ib_spec->mpls.val.tag));
 465				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
 466						    inner_first_mpls),
 467				       &ib_spec->mpls.mask.tag,
 468				       sizeof(ib_spec->mpls.mask.tag));
 469			} else {
 470				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 471							   ft_field_support.outer_first_mpls),
 472							   &ib_spec->mpls.mask.tag))
 473					return -EOPNOTSUPP;
 474
 475				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
 476						    outer_first_mpls),
 477				       &ib_spec->mpls.val.tag,
 478				       sizeof(ib_spec->mpls.val.tag));
 479				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
 480						    outer_first_mpls),
 481				       &ib_spec->mpls.mask.tag,
 482				       sizeof(ib_spec->mpls.mask.tag));
 483			}
 484		}
 485		break;
 486	case IB_FLOW_SPEC_VXLAN_TUNNEL:
 487		if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
 488					 LAST_TUNNEL_FIELD))
 489			return -EOPNOTSUPP;
 490
 491		MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
 492			 ntohl(ib_spec->tunnel.mask.tunnel_id));
 493		MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
 494			 ntohl(ib_spec->tunnel.val.tunnel_id));
 495		break;
 496	case IB_FLOW_SPEC_ACTION_TAG:
 497		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
 498					 LAST_FLOW_TAG_FIELD))
 499			return -EOPNOTSUPP;
 500		if (ib_spec->flow_tag.tag_id >= BIT(24))
 501			return -EINVAL;
 502
 503		flow_context->flow_tag = ib_spec->flow_tag.tag_id;
 504		flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
 505		break;
 506	case IB_FLOW_SPEC_ACTION_DROP:
 507		if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
 508					 LAST_DROP_FIELD))
 509			return -EOPNOTSUPP;
 510		action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
 511		break;
 512	case IB_FLOW_SPEC_ACTION_HANDLE:
 513		ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
 514			flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
 515		if (ret)
 516			return ret;
 517		break;
 518	case IB_FLOW_SPEC_ACTION_COUNT:
 519		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
 520					 LAST_COUNTERS_FIELD))
 521			return -EOPNOTSUPP;
 522
 523		/* for now support only one counters spec per flow */
 524		if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
 525			return -EINVAL;
 526
 527		action->counters = ib_spec->flow_count.counters;
 528		action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
 529		break;
 530	default:
 531		return -EINVAL;
 532	}
 533
 534	return 0;
 535}
 536
 537/* If a flow could catch both multicast and unicast packets,
 538 * it won't fall into the multicast flow steering table and this rule
 539 * could steal other multicast packets.
 540 */
 541static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
 542{
 543	union ib_flow_spec *flow_spec;
 544
 545	if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
 546	    ib_attr->num_of_specs < 1)
 547		return false;
 548
 549	flow_spec = (union ib_flow_spec *)(ib_attr + 1);
 550	if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
 551		struct ib_flow_spec_ipv4 *ipv4_spec;
 552
 553		ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
 554		if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
 555			return true;
 556
 557		return false;
 558	}
 559
 560	if (flow_spec->type == IB_FLOW_SPEC_ETH) {
 561		struct ib_flow_spec_eth *eth_spec;
 562
 563		eth_spec = (struct ib_flow_spec_eth *)flow_spec;
 564		return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
 565		       is_multicast_ether_addr(eth_spec->val.dst_mac);
 566	}
 567
 568	return false;
 569}
 570
 571static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
 572			       const struct ib_flow_attr *flow_attr,
 573			       bool check_inner)
 574{
 575	union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
 576	int match_ipv = check_inner ?
 577			MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 578					ft_field_support.inner_ip_version) :
 579			MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 580					ft_field_support.outer_ip_version);
 581	int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
 582	bool ipv4_spec_valid, ipv6_spec_valid;
 583	unsigned int ip_spec_type = 0;
 584	bool has_ethertype = false;
 585	unsigned int spec_index;
 586	bool mask_valid = true;
 587	u16 eth_type = 0;
 588	bool type_valid;
 589
 590	/* Validate that ethertype is correct */
 591	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
 592		if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
 593		    ib_spec->eth.mask.ether_type) {
 594			mask_valid = (ib_spec->eth.mask.ether_type ==
 595				      htons(0xffff));
 596			has_ethertype = true;
 597			eth_type = ntohs(ib_spec->eth.val.ether_type);
 598		} else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
 599			   (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
 600			ip_spec_type = ib_spec->type;
 601		}
 602		ib_spec = (void *)ib_spec + ib_spec->size;
 603	}
 604
 605	type_valid = (!has_ethertype) || (!ip_spec_type);
 606	if (!type_valid && mask_valid) {
 607		ipv4_spec_valid = (eth_type == ETH_P_IP) &&
 608			(ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
 609		ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
 610			(ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
 611
 612		type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
 613			     (((eth_type == ETH_P_MPLS_UC) ||
 614			       (eth_type == ETH_P_MPLS_MC)) && match_ipv);
 615	}
 616
 617	return type_valid;
 618}
 619
 620static bool is_valid_attr(struct mlx5_core_dev *mdev,
 621			  const struct ib_flow_attr *flow_attr)
 622{
 623	return is_valid_ethertype(mdev, flow_attr, false) &&
 624	       is_valid_ethertype(mdev, flow_attr, true);
 625}
 626
 627static void put_flow_table(struct mlx5_ib_dev *dev,
 628			   struct mlx5_ib_flow_prio *prio, bool ft_added)
 629{
 630	prio->refcount -= !!ft_added;
 631	if (!prio->refcount) {
 632		mlx5_destroy_flow_table(prio->flow_table);
 633		prio->flow_table = NULL;
 634	}
 635}
 636
 637static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 638{
 639	struct mlx5_ib_flow_handler *handler = container_of(flow_id,
 640							  struct mlx5_ib_flow_handler,
 641							  ibflow);
 642	struct mlx5_ib_flow_handler *iter, *tmp;
 643	struct mlx5_ib_dev *dev = handler->dev;
 644
 645	mutex_lock(&dev->flow_db->lock);
 646
 647	list_for_each_entry_safe(iter, tmp, &handler->list, list) {
 648		mlx5_del_flow_rules(iter->rule);
 649		put_flow_table(dev, iter->prio, true);
 650		list_del(&iter->list);
 651		kfree(iter);
 652	}
 653
 654	mlx5_del_flow_rules(handler->rule);
 655	put_flow_table(dev, handler->prio, true);
 656	mlx5_ib_counters_clear_description(handler->ibcounters);
 657	mutex_unlock(&dev->flow_db->lock);
 658	if (handler->flow_matcher)
 659		atomic_dec(&handler->flow_matcher->usecnt);
 660	kfree(handler);
 661
 662	return 0;
 663}
 664
 665static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
 666{
 667	priority *= 2;
 668	if (!dont_trap)
 669		priority++;
 670	return priority;
 671}
 672
 673enum flow_table_type {
 674	MLX5_IB_FT_RX,
 675	MLX5_IB_FT_TX
 676};
 677
 678#define MLX5_FS_MAX_TYPES	 6
 679#define MLX5_FS_MAX_ENTRIES	 BIT(16)
 680
 681static bool mlx5_ib_shared_ft_allowed(struct ib_device *device)
 682{
 683	struct mlx5_ib_dev *dev = to_mdev(device);
 684
 685	return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed);
 686}
 687
 688static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
 689					   struct mlx5_flow_namespace *ns,
 690					   struct mlx5_ib_flow_prio *prio,
 691					   int priority,
 692					   int num_entries, int num_groups,
 693					   u32 flags)
 694{
 695	struct mlx5_flow_table_attr ft_attr = {};
 696	struct mlx5_flow_table *ft;
 697
 698	ft_attr.prio = priority;
 699	ft_attr.max_fte = num_entries;
 700	ft_attr.flags = flags;
 701	ft_attr.autogroup.max_num_groups = num_groups;
 702	ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
 703	if (IS_ERR(ft))
 704		return ERR_CAST(ft);
 705
 706	prio->flow_table = ft;
 707	prio->refcount = 0;
 708	return prio;
 709}
 710
 711static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 712						struct ib_flow_attr *flow_attr,
 713						enum flow_table_type ft_type)
 714{
 715	bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
 716	struct mlx5_flow_namespace *ns = NULL;
 717	enum mlx5_flow_namespace_type fn_type;
 718	struct mlx5_ib_flow_prio *prio;
 719	struct mlx5_flow_table *ft;
 720	int max_table_size;
 721	int num_entries;
 722	int num_groups;
 723	bool esw_encap;
 724	u32 flags = 0;
 725	int priority;
 726
 727	max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 728						       log_max_ft_size));
 729	esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
 730		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 731	switch (flow_attr->type) {
 732	case IB_FLOW_ATTR_NORMAL:
 733		if (flow_is_multicast_only(flow_attr) && !dont_trap)
 734			priority = MLX5_IB_FLOW_MCAST_PRIO;
 735		else
 736			priority = ib_prio_to_core_prio(flow_attr->priority,
 737							dont_trap);
 738		if (ft_type == MLX5_IB_FT_RX) {
 739			fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
 740			prio = &dev->flow_db->prios[priority];
 741			if (!dev->is_rep && !esw_encap &&
 742			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
 743				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
 744			if (!dev->is_rep && !esw_encap &&
 745			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 746						      reformat_l3_tunnel_to_l2))
 747				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
 748		} else {
 749			max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
 750				dev->mdev, log_max_ft_size));
 751			fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
 752			prio = &dev->flow_db->egress_prios[priority];
 753			if (!dev->is_rep && !esw_encap &&
 754			    MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
 755				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
 756		}
 757		ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
 758		num_entries = MLX5_FS_MAX_ENTRIES;
 759		num_groups = MLX5_FS_MAX_TYPES;
 760		break;
 761	case IB_FLOW_ATTR_ALL_DEFAULT:
 762	case IB_FLOW_ATTR_MC_DEFAULT:
 763		ns = mlx5_get_flow_namespace(dev->mdev,
 764					     MLX5_FLOW_NAMESPACE_LEFTOVERS);
 765		build_leftovers_ft_param(&priority, &num_entries, &num_groups);
 766		prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
 767		break;
 768	case IB_FLOW_ATTR_SNIFFER:
 769		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
 770					allow_sniffer_and_nic_rx_shared_tir))
 771			return ERR_PTR(-EOPNOTSUPP);
 772
 773		ns = mlx5_get_flow_namespace(
 774			dev->mdev, ft_type == MLX5_IB_FT_RX ?
 775					   MLX5_FLOW_NAMESPACE_SNIFFER_RX :
 776					   MLX5_FLOW_NAMESPACE_SNIFFER_TX);
 777
 778		prio = &dev->flow_db->sniffer[ft_type];
 779		priority = 0;
 780		num_entries = 1;
 781		num_groups = 1;
 782		break;
 783	default:
 784		break;
 785	}
 786
 787	if (!ns)
 788		return ERR_PTR(-EOPNOTSUPP);
 789
 790	max_table_size = min_t(int, num_entries, max_table_size);
 791
 792	ft = prio->flow_table;
 793	if (!ft)
 794		return _get_prio(dev, ns, prio, priority, max_table_size,
 795				 num_groups, flags);
 796
 797	return prio;
 798}
 799
 800enum {
 801	RDMA_RX_ECN_OPCOUNTER_PRIO,
 802	RDMA_RX_CNP_OPCOUNTER_PRIO,
 803};
 804
 805enum {
 806	RDMA_TX_CNP_OPCOUNTER_PRIO,
 807};
 808
 809static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
 810			      struct mlx5_flow_spec *spec)
 811{
 812	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
 813					ft_field_support.source_vhca_port) ||
 814	    !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
 815					ft_field_support.source_vhca_port))
 816		return -EOPNOTSUPP;
 817
 818	MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
 819			 misc_parameters.source_vhca_port);
 820	MLX5_SET(fte_match_param, &spec->match_value,
 821		 misc_parameters.source_vhca_port, port_num);
 822
 823	return 0;
 824}
 825
 826static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
 827			   struct mlx5_flow_spec *spec, int ipv)
 828{
 829	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
 830					ft_field_support.outer_ip_version))
 831		return -EOPNOTSUPP;
 832
 833	if (mlx5_core_mp_enabled(dev->mdev) &&
 834	    set_vhca_port_spec(dev, port_num, spec))
 835		return -EOPNOTSUPP;
 836
 837	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 838			 outer_headers.ip_ecn);
 839	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
 840		 INET_ECN_CE);
 841	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 842			 outer_headers.ip_version);
 843	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
 844		 ipv);
 845
 846	spec->match_criteria_enable =
 847		get_match_criteria_enable(spec->match_criteria);
 848
 849	return 0;
 850}
 851
 852static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
 853			struct mlx5_flow_spec *spec)
 854{
 855	if (mlx5_core_mp_enabled(dev->mdev) &&
 856	    set_vhca_port_spec(dev, port_num, spec))
 857		return -EOPNOTSUPP;
 858
 859	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 860			 misc_parameters.bth_opcode);
 861	MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
 862		 IB_BTH_OPCODE_CNP);
 863
 864	spec->match_criteria_enable =
 865		get_match_criteria_enable(spec->match_criteria);
 866
 867	return 0;
 868}
 869
 870int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
 871			 struct mlx5_ib_op_fc *opfc,
 872			 enum mlx5_ib_optional_counter_type type)
 873{
 874	enum mlx5_flow_namespace_type fn_type;
 875	int priority, i, err, spec_num;
 876	struct mlx5_flow_act flow_act = {};
 877	struct mlx5_flow_destination dst;
 878	struct mlx5_flow_namespace *ns;
 879	struct mlx5_ib_flow_prio *prio;
 880	struct mlx5_flow_spec *spec;
 881
 882	spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
 883	if (!spec)
 884		return -ENOMEM;
 885
 886	switch (type) {
 887	case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
 888		if (set_ecn_ce_spec(dev, port_num, &spec[0],
 889				    MLX5_FS_IPV4_VERSION) ||
 890		    set_ecn_ce_spec(dev, port_num, &spec[1],
 891				    MLX5_FS_IPV6_VERSION)) {
 892			err = -EOPNOTSUPP;
 893			goto free;
 894		}
 895		spec_num = 2;
 896		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
 897		priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
 898		break;
 899
 900	case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
 901		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
 902					ft_field_support_2_nic_receive_rdma.bth_opcode) ||
 903		    set_cnp_spec(dev, port_num, &spec[0])) {
 904			err = -EOPNOTSUPP;
 905			goto free;
 906		}
 907		spec_num = 1;
 908		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
 909		priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
 910		break;
 911
 912	case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
 913		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
 914					ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
 915		    set_cnp_spec(dev, port_num, &spec[0])) {
 916			err = -EOPNOTSUPP;
 917			goto free;
 918		}
 919		spec_num = 1;
 920		fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
 921		priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
 922		break;
 923
 924	default:
 925		err = -EOPNOTSUPP;
 926		goto free;
 927	}
 928
 929	ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
 930	if (!ns) {
 931		err = -EOPNOTSUPP;
 932		goto free;
 933	}
 934
 935	prio = &dev->flow_db->opfcs[type];
 936	if (!prio->flow_table) {
 937		prio = _get_prio(dev, ns, prio, priority,
 938				 dev->num_ports * MAX_OPFC_RULES, 1, 0);
 939		if (IS_ERR(prio)) {
 940			err = PTR_ERR(prio);
 941			goto free;
 942		}
 943	}
 944
 945	dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
 946	dst.counter_id = mlx5_fc_id(opfc->fc);
 947
 948	flow_act.action =
 949		MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
 950
 951	for (i = 0; i < spec_num; i++) {
 952		opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
 953						    &flow_act, &dst, 1);
 954		if (IS_ERR(opfc->rule[i])) {
 955			err = PTR_ERR(opfc->rule[i]);
 956			goto del_rules;
 957		}
 958	}
 959	prio->refcount += spec_num;
 960	kfree(spec);
 961
 962	return 0;
 963
 964del_rules:
 965	for (i -= 1; i >= 0; i--)
 966		mlx5_del_flow_rules(opfc->rule[i]);
 967	put_flow_table(dev, prio, false);
 968free:
 969	kfree(spec);
 970	return err;
 971}
 972
 973void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
 974			     struct mlx5_ib_op_fc *opfc,
 975			     enum mlx5_ib_optional_counter_type type)
 976{
 977	int i;
 978
 979	for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
 980		mlx5_del_flow_rules(opfc->rule[i]);
 981		put_flow_table(dev, &dev->flow_db->opfcs[type], true);
 982	}
 983}
 984
 985static void set_underlay_qp(struct mlx5_ib_dev *dev,
 986			    struct mlx5_flow_spec *spec,
 987			    u32 underlay_qpn)
 988{
 989	void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
 990					   spec->match_criteria,
 991					   misc_parameters);
 992	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 993					   misc_parameters);
 994
 995	if (underlay_qpn &&
 996	    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 997				      ft_field_support.bth_dst_qp)) {
 998		MLX5_SET(fte_match_set_misc,
 999			 misc_params_v, bth_dst_qp, underlay_qpn);
1000		MLX5_SET(fte_match_set_misc,
1001			 misc_params_c, bth_dst_qp, 0xffffff);
1002	}
1003}
1004
1005static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
1006					 struct mlx5_flow_spec *spec,
1007					 struct mlx5_eswitch_rep *rep)
1008{
1009	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
1010	void *misc;
1011
1012	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1013		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1014				    misc_parameters_2);
1015
1016		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1017			 mlx5_eswitch_get_vport_metadata_for_match(rep->esw,
1018								   rep->vport));
1019		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1020				    misc_parameters_2);
1021
1022		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1023			 mlx5_eswitch_get_vport_metadata_mask());
1024	} else {
1025		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1026				    misc_parameters);
1027
1028		MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
1029
1030		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1031				    misc_parameters);
1032
1033		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
1034	}
1035}
1036
1037static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
1038						      struct mlx5_ib_flow_prio *ft_prio,
1039						      const struct ib_flow_attr *flow_attr,
1040						      struct mlx5_flow_destination *dst,
1041						      u32 underlay_qpn,
1042						      struct mlx5_ib_create_flow *ucmd)
1043{
1044	struct mlx5_flow_table	*ft = ft_prio->flow_table;
1045	struct mlx5_ib_flow_handler *handler;
1046	struct mlx5_flow_act flow_act = {};
1047	struct mlx5_flow_spec *spec;
1048	struct mlx5_flow_destination dest_arr[2] = {};
1049	struct mlx5_flow_destination *rule_dst = dest_arr;
1050	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
1051	unsigned int spec_index;
1052	u32 prev_type = 0;
1053	int err = 0;
1054	int dest_num = 0;
1055	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1056
1057	if (!is_valid_attr(dev->mdev, flow_attr))
1058		return ERR_PTR(-EINVAL);
1059
1060	if (dev->is_rep && is_egress)
1061		return ERR_PTR(-EINVAL);
1062
1063	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1064	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1065	if (!handler || !spec) {
1066		err = -ENOMEM;
1067		goto free;
1068	}
1069
1070	INIT_LIST_HEAD(&handler->list);
1071
1072	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
1073		err = parse_flow_attr(dev->mdev, spec,
1074				      ib_flow, flow_attr, &flow_act,
1075				      prev_type);
1076		if (err < 0)
1077			goto free;
1078
1079		prev_type = ((union ib_flow_spec *)ib_flow)->type;
1080		ib_flow += ((union ib_flow_spec *)ib_flow)->size;
1081	}
1082
1083	if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
1084		memcpy(&dest_arr[0], dst, sizeof(*dst));
1085		dest_num++;
1086	}
1087
1088	if (!flow_is_multicast_only(flow_attr))
1089		set_underlay_qp(dev, spec, underlay_qpn);
1090
1091	if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
1092		struct mlx5_eswitch_rep *rep;
1093
1094		rep = dev->port[flow_attr->port - 1].rep;
1095		if (!rep) {
1096			err = -EINVAL;
1097			goto free;
1098		}
1099
1100		mlx5_ib_set_rule_source_port(dev, spec, rep);
1101	}
1102
1103	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
1104
1105	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1106		struct mlx5_ib_mcounters *mcounters;
1107
1108		err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
1109		if (err)
1110			goto free;
1111
1112		mcounters = to_mcounters(flow_act.counters);
1113		handler->ibcounters = flow_act.counters;
1114		dest_arr[dest_num].type =
1115			MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1116		dest_arr[dest_num].counter_id =
1117			mlx5_fc_id(mcounters->hw_cntrs_hndl);
1118		dest_num++;
1119	}
1120
1121	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
1122		if (!dest_num)
1123			rule_dst = NULL;
1124	} else {
1125		if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
1126			flow_act.action |=
1127				MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
1128		if (is_egress)
1129			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1130		else if (dest_num)
1131			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1132	}
1133
1134	if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG)  &&
1135	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1136	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1137		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
1138			     spec->flow_context.flow_tag, flow_attr->type);
1139		err = -EINVAL;
1140		goto free;
1141	}
1142	handler->rule = mlx5_add_flow_rules(ft, spec,
1143					    &flow_act,
1144					    rule_dst, dest_num);
1145
1146	if (IS_ERR(handler->rule)) {
1147		err = PTR_ERR(handler->rule);
1148		goto free;
1149	}
1150
1151	ft_prio->refcount++;
1152	handler->prio = ft_prio;
1153	handler->dev = dev;
1154
1155	ft_prio->flow_table = ft;
1156free:
1157	if (err && handler) {
1158		mlx5_ib_counters_clear_description(handler->ibcounters);
1159		kfree(handler);
1160	}
1161	kvfree(spec);
1162	return err ? ERR_PTR(err) : handler;
1163}
1164
1165static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
1166						     struct mlx5_ib_flow_prio *ft_prio,
1167						     const struct ib_flow_attr *flow_attr,
1168						     struct mlx5_flow_destination *dst)
1169{
1170	return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
1171}
1172
1173enum {
1174	LEFTOVERS_MC,
1175	LEFTOVERS_UC,
1176};
1177
1178static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
1179							  struct mlx5_ib_flow_prio *ft_prio,
1180							  struct ib_flow_attr *flow_attr,
1181							  struct mlx5_flow_destination *dst)
1182{
1183	struct mlx5_ib_flow_handler *handler_ucast = NULL;
1184	struct mlx5_ib_flow_handler *handler = NULL;
1185
1186	static struct {
1187		struct ib_flow_attr	flow_attr;
1188		struct ib_flow_spec_eth eth_flow;
1189	} leftovers_specs[] = {
1190		[LEFTOVERS_MC] = {
1191			.flow_attr = {
1192				.num_of_specs = 1,
1193				.size = sizeof(leftovers_specs[0])
1194			},
1195			.eth_flow = {
1196				.type = IB_FLOW_SPEC_ETH,
1197				.size = sizeof(struct ib_flow_spec_eth),
1198				.mask = {.dst_mac = {0x1} },
1199				.val =  {.dst_mac = {0x1} }
1200			}
1201		},
1202		[LEFTOVERS_UC] = {
1203			.flow_attr = {
1204				.num_of_specs = 1,
1205				.size = sizeof(leftovers_specs[0])
1206			},
1207			.eth_flow = {
1208				.type = IB_FLOW_SPEC_ETH,
1209				.size = sizeof(struct ib_flow_spec_eth),
1210				.mask = {.dst_mac = {0x1} },
1211				.val = {.dst_mac = {} }
1212			}
1213		}
1214	};
1215
1216	handler = create_flow_rule(dev, ft_prio,
1217				   &leftovers_specs[LEFTOVERS_MC].flow_attr,
1218				   dst);
1219	if (!IS_ERR(handler) &&
1220	    flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
1221		handler_ucast = create_flow_rule(dev, ft_prio,
1222						 &leftovers_specs[LEFTOVERS_UC].flow_attr,
1223						 dst);
1224		if (IS_ERR(handler_ucast)) {
1225			mlx5_del_flow_rules(handler->rule);
1226			ft_prio->refcount--;
1227			kfree(handler);
1228			handler = handler_ucast;
1229		} else {
1230			list_add(&handler_ucast->list, &handler->list);
1231		}
1232	}
1233
1234	return handler;
1235}
1236
1237static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
1238							struct mlx5_ib_flow_prio *ft_rx,
1239							struct mlx5_ib_flow_prio *ft_tx,
1240							struct mlx5_flow_destination *dst)
1241{
1242	struct mlx5_ib_flow_handler *handler_rx;
1243	struct mlx5_ib_flow_handler *handler_tx;
1244	int err;
1245	static const struct ib_flow_attr flow_attr  = {
1246		.num_of_specs = 0,
1247		.type = IB_FLOW_ATTR_SNIFFER,
1248		.size = sizeof(flow_attr)
1249	};
1250
1251	handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
1252	if (IS_ERR(handler_rx)) {
1253		err = PTR_ERR(handler_rx);
1254		goto err;
1255	}
1256
1257	handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
1258	if (IS_ERR(handler_tx)) {
1259		err = PTR_ERR(handler_tx);
1260		goto err_tx;
1261	}
1262
1263	list_add(&handler_tx->list, &handler_rx->list);
1264
1265	return handler_rx;
1266
1267err_tx:
1268	mlx5_del_flow_rules(handler_rx->rule);
1269	ft_rx->refcount--;
1270	kfree(handler_rx);
1271err:
1272	return ERR_PTR(err);
1273}
1274
1275static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
1276					   struct ib_flow_attr *flow_attr,
1277					   struct ib_udata *udata)
1278{
1279	struct mlx5_ib_dev *dev = to_mdev(qp->device);
1280	struct mlx5_ib_qp *mqp = to_mqp(qp);
1281	struct mlx5_ib_flow_handler *handler = NULL;
1282	struct mlx5_flow_destination *dst = NULL;
1283	struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
1284	struct mlx5_ib_flow_prio *ft_prio;
1285	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1286	struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
1287	size_t min_ucmd_sz, required_ucmd_sz;
1288	int err;
1289	int underlay_qpn;
1290
1291	if (udata && udata->inlen) {
1292		min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
1293		if (udata->inlen < min_ucmd_sz)
1294			return ERR_PTR(-EOPNOTSUPP);
1295
1296		err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
1297		if (err)
1298			return ERR_PTR(err);
1299
1300		/* currently supports only one counters data */
1301		if (ucmd_hdr.ncounters_data > 1)
1302			return ERR_PTR(-EINVAL);
1303
1304		required_ucmd_sz = min_ucmd_sz +
1305			sizeof(struct mlx5_ib_flow_counters_data) *
1306			ucmd_hdr.ncounters_data;
1307		if (udata->inlen > required_ucmd_sz &&
1308		    !ib_is_udata_cleared(udata, required_ucmd_sz,
1309					 udata->inlen - required_ucmd_sz))
1310			return ERR_PTR(-EOPNOTSUPP);
1311
1312		ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
1313		if (!ucmd)
1314			return ERR_PTR(-ENOMEM);
1315
1316		err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
1317		if (err)
1318			goto free_ucmd;
1319	}
1320
1321	if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
1322		err = -ENOMEM;
1323		goto free_ucmd;
1324	}
1325
1326	if (flow_attr->flags &
1327	    ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) {
1328		err = -EINVAL;
1329		goto free_ucmd;
1330	}
1331
1332	if (is_egress &&
1333	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1334	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1335		err = -EINVAL;
1336		goto free_ucmd;
1337	}
1338
1339	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1340	if (!dst) {
1341		err = -ENOMEM;
1342		goto free_ucmd;
1343	}
1344
1345	mutex_lock(&dev->flow_db->lock);
1346
1347	ft_prio = get_flow_table(dev, flow_attr,
1348				 is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
1349	if (IS_ERR(ft_prio)) {
1350		err = PTR_ERR(ft_prio);
1351		goto unlock;
1352	}
1353	if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
1354		ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
1355		if (IS_ERR(ft_prio_tx)) {
1356			err = PTR_ERR(ft_prio_tx);
1357			ft_prio_tx = NULL;
1358			goto destroy_ft;
1359		}
1360	}
1361
1362	if (is_egress) {
1363		dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1364	} else {
1365		dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1366		if (mqp->is_rss)
1367			dst->tir_num = mqp->rss_qp.tirn;
1368		else
1369			dst->tir_num = mqp->raw_packet_qp.rq.tirn;
1370	}
1371
1372	switch (flow_attr->type) {
1373	case IB_FLOW_ATTR_NORMAL:
1374		underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
1375				       mqp->underlay_qpn :
1376				       0;
1377		handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
1378					    underlay_qpn, ucmd);
1379		break;
1380	case IB_FLOW_ATTR_ALL_DEFAULT:
1381	case IB_FLOW_ATTR_MC_DEFAULT:
1382		handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
1383		break;
1384	case IB_FLOW_ATTR_SNIFFER:
1385		handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
1386		break;
1387	default:
1388		err = -EINVAL;
1389		goto destroy_ft;
1390	}
1391
1392	if (IS_ERR(handler)) {
1393		err = PTR_ERR(handler);
1394		handler = NULL;
1395		goto destroy_ft;
1396	}
1397
1398	mutex_unlock(&dev->flow_db->lock);
1399	kfree(dst);
1400	kfree(ucmd);
1401
1402	return &handler->ibflow;
1403
1404destroy_ft:
1405	put_flow_table(dev, ft_prio, false);
1406	if (ft_prio_tx)
1407		put_flow_table(dev, ft_prio_tx, false);
1408unlock:
1409	mutex_unlock(&dev->flow_db->lock);
1410	kfree(dst);
1411free_ucmd:
1412	kfree(ucmd);
1413	return ERR_PTR(err);
1414}
1415
1416static struct mlx5_ib_flow_prio *
1417_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
1418		enum mlx5_flow_namespace_type ns_type,
1419		bool mcast)
1420{
1421	struct mlx5_flow_namespace *ns = NULL;
1422	struct mlx5_ib_flow_prio *prio = NULL;
1423	int max_table_size = 0;
1424	bool esw_encap;
1425	u32 flags = 0;
1426	int priority;
1427
1428	if (mcast)
1429		priority = MLX5_IB_FLOW_MCAST_PRIO;
1430	else
1431		priority = ib_prio_to_core_prio(user_priority, false);
1432
1433	esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
1434		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
1435	switch (ns_type) {
1436	case MLX5_FLOW_NAMESPACE_BYPASS:
1437		max_table_size = BIT(
1438			MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
1439		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
1440			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1441		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
1442					      reformat_l3_tunnel_to_l2) &&
1443		    !esw_encap)
1444			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1445		break;
1446	case MLX5_FLOW_NAMESPACE_EGRESS:
1447		max_table_size = BIT(
1448			MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
1449		if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
1450		    !esw_encap)
1451			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1452		break;
1453	case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1454		max_table_size = BIT(
1455			MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
1456		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
1457			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1458		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
1459					       reformat_l3_tunnel_to_l2) &&
1460		    esw_encap)
1461			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1462		priority = user_priority;
1463		break;
1464	case MLX5_FLOW_NAMESPACE_RDMA_RX:
1465		max_table_size = BIT(
1466			MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
1467		priority = user_priority;
1468		break;
1469	case MLX5_FLOW_NAMESPACE_RDMA_TX:
1470		max_table_size = BIT(
1471			MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
1472		priority = user_priority;
1473		break;
1474	default:
1475		break;
1476	}
1477
1478	max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
1479
1480	ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
1481	if (!ns)
1482		return ERR_PTR(-EOPNOTSUPP);
1483
1484	switch (ns_type) {
1485	case MLX5_FLOW_NAMESPACE_BYPASS:
1486		prio = &dev->flow_db->prios[priority];
1487		break;
1488	case MLX5_FLOW_NAMESPACE_EGRESS:
1489		prio = &dev->flow_db->egress_prios[priority];
1490		break;
1491	case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1492		prio = &dev->flow_db->fdb[priority];
1493		break;
1494	case MLX5_FLOW_NAMESPACE_RDMA_RX:
1495		prio = &dev->flow_db->rdma_rx[priority];
1496		break;
1497	case MLX5_FLOW_NAMESPACE_RDMA_TX:
1498		prio = &dev->flow_db->rdma_tx[priority];
1499		break;
1500	default: return ERR_PTR(-EINVAL);
1501	}
1502
1503	if (!prio)
1504		return ERR_PTR(-EINVAL);
1505
1506	if (prio->flow_table)
1507		return prio;
1508
1509	return _get_prio(dev, ns, prio, priority, max_table_size,
1510			 MLX5_FS_MAX_TYPES, flags);
1511}
1512
1513static struct mlx5_ib_flow_handler *
1514_create_raw_flow_rule(struct mlx5_ib_dev *dev,
1515		      struct mlx5_ib_flow_prio *ft_prio,
1516		      struct mlx5_flow_destination *dst,
1517		      struct mlx5_ib_flow_matcher  *fs_matcher,
1518		      struct mlx5_flow_context *flow_context,
1519		      struct mlx5_flow_act *flow_act,
1520		      void *cmd_in, int inlen,
1521		      int dst_num)
1522{
1523	struct mlx5_ib_flow_handler *handler;
1524	struct mlx5_flow_spec *spec;
1525	struct mlx5_flow_table *ft = ft_prio->flow_table;
1526	int err = 0;
1527
1528	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1529	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1530	if (!handler || !spec) {
1531		err = -ENOMEM;
1532		goto free;
1533	}
1534
1535	INIT_LIST_HEAD(&handler->list);
1536
1537	memcpy(spec->match_value, cmd_in, inlen);
1538	memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
1539	       fs_matcher->mask_len);
1540	spec->match_criteria_enable = fs_matcher->match_criteria_enable;
1541	spec->flow_context = *flow_context;
1542
1543	handler->rule = mlx5_add_flow_rules(ft, spec,
1544					    flow_act, dst, dst_num);
1545
1546	if (IS_ERR(handler->rule)) {
1547		err = PTR_ERR(handler->rule);
1548		goto free;
1549	}
1550
1551	ft_prio->refcount++;
1552	handler->prio = ft_prio;
1553	handler->dev = dev;
1554	ft_prio->flow_table = ft;
1555
1556free:
1557	if (err)
1558		kfree(handler);
1559	kvfree(spec);
1560	return err ? ERR_PTR(err) : handler;
1561}
1562
1563static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
1564				void *match_v)
1565{
1566	void *match_c;
1567	void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
1568	void *dmac, *dmac_mask;
1569	void *ipv4, *ipv4_mask;
1570
1571	if (!(fs_matcher->match_criteria_enable &
1572	      (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
1573		return false;
1574
1575	match_c = fs_matcher->matcher_mask.match_params;
1576	match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
1577					   outer_headers);
1578	match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
1579					   outer_headers);
1580
1581	dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1582			    dmac_47_16);
1583	dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1584				 dmac_47_16);
1585
1586	if (is_multicast_ether_addr(dmac) &&
1587	    is_multicast_ether_addr(dmac_mask))
1588		return true;
1589
1590	ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1591			    dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1592
1593	ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1594				 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1595
1596	if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
1597	    ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
1598		return true;
1599
1600	return false;
1601}
1602
1603static struct mlx5_ib_flow_handler *raw_fs_rule_add(
1604	struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
1605	struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
1606	u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type)
1607{
1608	struct mlx5_flow_destination *dst;
1609	struct mlx5_ib_flow_prio *ft_prio;
1610	struct mlx5_ib_flow_handler *handler;
1611	int dst_num = 0;
1612	bool mcast;
1613	int err;
1614
1615	if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
1616		return ERR_PTR(-EOPNOTSUPP);
1617
1618	if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
1619		return ERR_PTR(-ENOMEM);
1620
1621	dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
1622	if (!dst)
1623		return ERR_PTR(-ENOMEM);
1624
1625	mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
1626	mutex_lock(&dev->flow_db->lock);
1627
1628	ft_prio = _get_flow_table(dev, fs_matcher->priority,
1629				  fs_matcher->ns_type, mcast);
1630	if (IS_ERR(ft_prio)) {
1631		err = PTR_ERR(ft_prio);
1632		goto unlock;
1633	}
1634
1635	switch (dest_type) {
1636	case MLX5_FLOW_DESTINATION_TYPE_TIR:
1637		dst[dst_num].type = dest_type;
1638		dst[dst_num++].tir_num = dest_id;
1639		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1640		break;
1641	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
1642		dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
1643		dst[dst_num++].ft_num = dest_id;
1644		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1645		break;
1646	case MLX5_FLOW_DESTINATION_TYPE_PORT:
1647		dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1648		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1649		break;
1650	default:
1651		break;
1652	}
1653
1654	if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1655		dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1656		dst[dst_num].counter_id = counter_id;
1657		dst_num++;
1658	}
1659
1660	handler = _create_raw_flow_rule(dev, ft_prio, dst_num ? dst : NULL,
1661					fs_matcher, flow_context, flow_act,
1662					cmd_in, inlen, dst_num);
1663
1664	if (IS_ERR(handler)) {
1665		err = PTR_ERR(handler);
1666		goto destroy_ft;
1667	}
1668
1669	mutex_unlock(&dev->flow_db->lock);
1670	atomic_inc(&fs_matcher->usecnt);
1671	handler->flow_matcher = fs_matcher;
1672
1673	kfree(dst);
1674
1675	return handler;
1676
1677destroy_ft:
1678	put_flow_table(dev, ft_prio, false);
1679unlock:
1680	mutex_unlock(&dev->flow_db->lock);
1681	kfree(dst);
1682
1683	return ERR_PTR(err);
1684}
1685
1686static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
1687{
1688	switch (maction->flow_action_raw.sub_type) {
1689	case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
1690		mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
1691					   maction->flow_action_raw.modify_hdr);
1692		break;
1693	case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
1694		mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
1695					     maction->flow_action_raw.pkt_reformat);
1696		break;
1697	case MLX5_IB_FLOW_ACTION_DECAP:
1698		break;
1699	default:
1700		break;
1701	}
1702}
1703
1704static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
1705{
1706	struct mlx5_ib_flow_action *maction = to_mflow_act(action);
1707
1708	switch (action->type) {
1709	case IB_FLOW_ACTION_UNSPECIFIED:
1710		destroy_flow_action_raw(maction);
1711		break;
1712	default:
1713		WARN_ON(true);
1714		break;
1715	}
1716
1717	kfree(maction);
1718	return 0;
1719}
1720
1721static int
1722mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
1723			     enum mlx5_flow_namespace_type *namespace)
1724{
1725	switch (table_type) {
1726	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
1727		*namespace = MLX5_FLOW_NAMESPACE_BYPASS;
1728		break;
1729	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
1730		*namespace = MLX5_FLOW_NAMESPACE_EGRESS;
1731		break;
1732	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
1733		*namespace = MLX5_FLOW_NAMESPACE_FDB_BYPASS;
1734		break;
1735	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
1736		*namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
1737		break;
1738	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
1739		*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
1740		break;
1741	default:
1742		return -EINVAL;
1743	}
1744
1745	return 0;
1746}
1747
1748static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
1749	[MLX5_IB_FLOW_TYPE_NORMAL] = {
1750		.type = UVERBS_ATTR_TYPE_PTR_IN,
1751		.u.ptr = {
1752			.len = sizeof(u16), /* data is priority */
1753			.min_len = sizeof(u16),
1754		}
1755	},
1756	[MLX5_IB_FLOW_TYPE_SNIFFER] = {
1757		.type = UVERBS_ATTR_TYPE_PTR_IN,
1758		UVERBS_ATTR_NO_DATA(),
1759	},
1760	[MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
1761		.type = UVERBS_ATTR_TYPE_PTR_IN,
1762		UVERBS_ATTR_NO_DATA(),
1763	},
1764	[MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
1765		.type = UVERBS_ATTR_TYPE_PTR_IN,
1766		UVERBS_ATTR_NO_DATA(),
1767	},
1768};
1769
1770static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
1771{
1772	struct devx_obj *devx_obj = obj;
1773	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
1774
1775	switch (opcode) {
1776	case MLX5_CMD_OP_DESTROY_TIR:
1777		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1778		*dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
1779				    obj_id);
1780		return true;
1781
1782	case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
1783		*dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1784		*dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
1785				    table_id);
1786		return true;
1787	default:
1788		return false;
1789	}
1790}
1791
1792static int get_dests(struct uverbs_attr_bundle *attrs,
1793		     struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
1794		     int *dest_type, struct ib_qp **qp, u32 *flags)
1795{
1796	bool dest_devx, dest_qp;
1797	void *devx_obj;
1798	int err;
1799
1800	dest_devx = uverbs_attr_is_valid(attrs,
1801					 MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
1802	dest_qp = uverbs_attr_is_valid(attrs,
1803				       MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
1804
1805	*flags = 0;
1806	err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
1807				 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
1808					 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
1809	if (err)
1810		return err;
1811
1812	/* Both flags are not allowed */
1813	if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
1814	    *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
1815		return -EINVAL;
1816
1817	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
1818		if (dest_devx && (dest_qp || *flags))
1819			return -EINVAL;
1820		else if (dest_qp && *flags)
1821			return -EINVAL;
1822	}
1823
1824	/* Allow only DEVX object, drop as dest for FDB */
1825	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
1826	    !(dest_devx || (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
1827		return -EINVAL;
1828
1829	/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
1830	if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1831	    ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
1832		return -EINVAL;
1833
1834	*qp = NULL;
1835	if (dest_devx) {
1836		devx_obj =
1837			uverbs_attr_get_obj(attrs,
1838					    MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
1839
1840		/* Verify that the given DEVX object is a flow
1841		 * steering destination.
1842		 */
1843		if (!is_flow_dest(devx_obj, dest_id, dest_type))
1844			return -EINVAL;
1845		/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
1846		if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
1847		     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1848		    *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
1849			return -EINVAL;
1850	} else if (dest_qp) {
1851		struct mlx5_ib_qp *mqp;
1852
1853		*qp = uverbs_attr_get_obj(attrs,
1854					  MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
1855		if (IS_ERR(*qp))
1856			return PTR_ERR(*qp);
1857
1858		if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
1859			return -EINVAL;
1860
1861		mqp = to_mqp(*qp);
1862		if (mqp->is_rss)
1863			*dest_id = mqp->rss_qp.tirn;
1864		else
1865			*dest_id = mqp->raw_packet_qp.rq.tirn;
1866		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1867	} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1868		    fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
1869		   !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
1870		*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1871	}
1872
1873	if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
1874	    (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1875	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
1876		return -EINVAL;
1877
1878	return 0;
1879}
1880
1881static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
1882{
1883	struct devx_obj *devx_obj = obj;
1884	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
1885
1886	if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
1887
1888		if (offset && offset >= devx_obj->flow_counter_bulk_size)
1889			return false;
1890
1891		*counter_id = MLX5_GET(dealloc_flow_counter_in,
1892				       devx_obj->dinbox,
1893				       flow_counter_id);
1894		*counter_id += offset;
1895		return true;
1896	}
1897
1898	return false;
1899}
1900
1901#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
1902static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
1903	struct uverbs_attr_bundle *attrs)
1904{
1905	struct mlx5_flow_context flow_context = {.flow_tag =
1906		MLX5_FS_DEFAULT_FLOW_TAG};
1907	u32 *offset_attr, offset = 0, counter_id = 0;
1908	int dest_id, dest_type = -1, inlen, len, ret, i;
1909	struct mlx5_ib_flow_handler *flow_handler;
1910	struct mlx5_ib_flow_matcher *fs_matcher;
1911	struct ib_uobject **arr_flow_actions;
1912	struct ib_uflow_resources *uflow_res;
1913	struct mlx5_flow_act flow_act = {};
1914	struct ib_qp *qp = NULL;
1915	void *devx_obj, *cmd_in;
1916	struct ib_uobject *uobj;
1917	struct mlx5_ib_dev *dev;
1918	u32 flags;
1919
1920	if (!capable(CAP_NET_RAW))
1921		return -EPERM;
1922
1923	fs_matcher = uverbs_attr_get_obj(attrs,
1924					 MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
1925	uobj =  uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
1926	dev = mlx5_udata_to_mdev(&attrs->driver_udata);
1927
1928	if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
1929		return -EINVAL;
1930
1931	if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
1932		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
1933
1934	if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
1935		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
1936
1937	len = uverbs_attr_get_uobjs_arr(attrs,
1938		MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
1939	if (len) {
1940		devx_obj = arr_flow_actions[0]->object;
1941
1942		if (uverbs_attr_is_valid(attrs,
1943					 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
1944
1945			int num_offsets = uverbs_attr_ptr_get_array_size(
1946				attrs,
1947				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
1948				sizeof(u32));
1949
1950			if (num_offsets != 1)
1951				return -EINVAL;
1952
1953			offset_attr = uverbs_attr_get_alloced_ptr(
1954				attrs,
1955				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
1956			offset = *offset_attr;
1957		}
1958
1959		if (!is_flow_counter(devx_obj, offset, &counter_id))
1960			return -EINVAL;
1961
1962		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1963	}
1964
1965	cmd_in = uverbs_attr_get_alloced_ptr(
1966		attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
1967	inlen = uverbs_attr_get_len(attrs,
1968				    MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
1969
1970	uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
1971	if (!uflow_res)
1972		return -ENOMEM;
1973
1974	len = uverbs_attr_get_uobjs_arr(attrs,
1975		MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
1976	for (i = 0; i < len; i++) {
1977		struct mlx5_ib_flow_action *maction =
1978			to_mflow_act(arr_flow_actions[i]->object);
1979
1980		ret = parse_flow_flow_action(maction, false, &flow_act);
1981		if (ret)
1982			goto err_out;
1983		flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
1984				   arr_flow_actions[i]->object);
1985	}
1986
1987	ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
1988			       MLX5_IB_ATTR_CREATE_FLOW_TAG);
1989	if (!ret) {
1990		if (flow_context.flow_tag >= BIT(24)) {
1991			ret = -EINVAL;
1992			goto err_out;
1993		}
1994		flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
1995	}
1996
1997	flow_handler =
1998		raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
1999				counter_id, cmd_in, inlen, dest_id, dest_type);
2000	if (IS_ERR(flow_handler)) {
2001		ret = PTR_ERR(flow_handler);
2002		goto err_out;
2003	}
2004
2005	ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
2006
2007	return 0;
2008err_out:
2009	ib_uverbs_flow_resources_free(uflow_res);
2010	return ret;
2011}
2012
2013static int flow_matcher_cleanup(struct ib_uobject *uobject,
2014				enum rdma_remove_reason why,
2015				struct uverbs_attr_bundle *attrs)
2016{
2017	struct mlx5_ib_flow_matcher *obj = uobject->object;
2018
2019	if (atomic_read(&obj->usecnt))
2020		return -EBUSY;
2021
2022	kfree(obj);
2023	return 0;
2024}
2025
2026static int steering_anchor_create_ft(struct mlx5_ib_dev *dev,
2027				     struct mlx5_ib_flow_prio *ft_prio,
2028				     enum mlx5_flow_namespace_type ns_type)
2029{
2030	struct mlx5_flow_table_attr ft_attr = {};
2031	struct mlx5_flow_namespace *ns;
2032	struct mlx5_flow_table *ft;
2033
2034	if (ft_prio->anchor.ft)
2035		return 0;
2036
2037	ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
2038	if (!ns)
2039		return -EOPNOTSUPP;
2040
2041	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
2042	ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
2043	ft_attr.prio = 0;
2044	ft_attr.max_fte = 2;
2045	ft_attr.level = 1;
2046
2047	ft = mlx5_create_flow_table(ns, &ft_attr);
2048	if (IS_ERR(ft))
2049		return PTR_ERR(ft);
2050
2051	ft_prio->anchor.ft = ft;
2052
2053	return 0;
2054}
2055
2056static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio)
2057{
2058	if (ft_prio->anchor.ft) {
2059		mlx5_destroy_flow_table(ft_prio->anchor.ft);
2060		ft_prio->anchor.ft = NULL;
2061	}
2062}
2063
2064static int
2065steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
2066{
2067	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2068	struct mlx5_flow_group *fg;
2069	void *flow_group_in;
2070	int err = 0;
2071
2072	if (ft_prio->anchor.fg_drop)
2073		return 0;
2074
2075	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2076	if (!flow_group_in)
2077		return -ENOMEM;
2078
2079	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
2080	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
2081
2082	fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
2083	if (IS_ERR(fg)) {
2084		err = PTR_ERR(fg);
2085		goto out;
2086	}
2087
2088	ft_prio->anchor.fg_drop = fg;
2089
2090out:
2091	kvfree(flow_group_in);
2092
2093	return err;
2094}
2095
2096static void
2097steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
2098{
2099	if (ft_prio->anchor.fg_drop) {
2100		mlx5_destroy_flow_group(ft_prio->anchor.fg_drop);
2101		ft_prio->anchor.fg_drop = NULL;
2102	}
2103}
2104
2105static int
2106steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2107{
2108	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2109	struct mlx5_flow_group *fg;
2110	void *flow_group_in;
2111	int err = 0;
2112
2113	if (ft_prio->anchor.fg_goto_table)
2114		return 0;
2115
2116	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2117	if (!flow_group_in)
2118		return -ENOMEM;
2119
2120	fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
2121	if (IS_ERR(fg)) {
2122		err = PTR_ERR(fg);
2123		goto out;
2124	}
2125	ft_prio->anchor.fg_goto_table = fg;
2126
2127out:
2128	kvfree(flow_group_in);
2129
2130	return err;
2131}
2132
2133static void
2134steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2135{
2136	if (ft_prio->anchor.fg_goto_table) {
2137		mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table);
2138		ft_prio->anchor.fg_goto_table = NULL;
2139	}
2140}
2141
2142static int
2143steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
2144{
2145	struct mlx5_flow_act flow_act = {};
2146	struct mlx5_flow_handle *handle;
2147
2148	if (ft_prio->anchor.rule_drop)
2149		return 0;
2150
2151	flow_act.fg = ft_prio->anchor.fg_drop;
2152	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
2153
2154	handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
2155				     NULL, 0);
2156	if (IS_ERR(handle))
2157		return PTR_ERR(handle);
2158
2159	ft_prio->anchor.rule_drop = handle;
2160
2161	return 0;
2162}
2163
2164static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
2165{
2166	if (ft_prio->anchor.rule_drop) {
2167		mlx5_del_flow_rules(ft_prio->anchor.rule_drop);
2168		ft_prio->anchor.rule_drop = NULL;
2169	}
2170}
2171
2172static int
2173steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2174{
2175	struct mlx5_flow_destination dest = {};
2176	struct mlx5_flow_act flow_act = {};
2177	struct mlx5_flow_handle *handle;
2178
2179	if (ft_prio->anchor.rule_goto_table)
2180		return 0;
2181
2182	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2183	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
2184	flow_act.fg = ft_prio->anchor.fg_goto_table;
2185
2186	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
2187	dest.ft = ft_prio->flow_table;
2188
2189	handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
2190				     &dest, 1);
2191	if (IS_ERR(handle))
2192		return PTR_ERR(handle);
2193
2194	ft_prio->anchor.rule_goto_table = handle;
2195
2196	return 0;
2197}
2198
2199static void
2200steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2201{
2202	if (ft_prio->anchor.rule_goto_table) {
2203		mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table);
2204		ft_prio->anchor.rule_goto_table = NULL;
2205	}
2206}
2207
2208static int steering_anchor_create_res(struct mlx5_ib_dev *dev,
2209				      struct mlx5_ib_flow_prio *ft_prio,
2210				      enum mlx5_flow_namespace_type ns_type)
2211{
2212	int err;
2213
2214	err = steering_anchor_create_ft(dev, ft_prio, ns_type);
2215	if (err)
2216		return err;
2217
2218	err = steering_anchor_create_fg_drop(ft_prio);
2219	if (err)
2220		goto destroy_ft;
2221
2222	err = steering_anchor_create_fg_goto_table(ft_prio);
2223	if (err)
2224		goto destroy_fg_drop;
2225
2226	err = steering_anchor_create_rule_drop(ft_prio);
2227	if (err)
2228		goto destroy_fg_goto_table;
2229
2230	err = steering_anchor_create_rule_goto_table(ft_prio);
2231	if (err)
2232		goto destroy_rule_drop;
2233
2234	return 0;
2235
2236destroy_rule_drop:
2237	steering_anchor_destroy_rule_drop(ft_prio);
2238destroy_fg_goto_table:
2239	steering_anchor_destroy_fg_goto_table(ft_prio);
2240destroy_fg_drop:
2241	steering_anchor_destroy_fg_drop(ft_prio);
2242destroy_ft:
2243	steering_anchor_destroy_ft(ft_prio);
2244
2245	return err;
2246}
2247
2248static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio)
2249{
2250	steering_anchor_destroy_rule_goto_table(ft_prio);
2251	steering_anchor_destroy_rule_drop(ft_prio);
2252	steering_anchor_destroy_fg_goto_table(ft_prio);
2253	steering_anchor_destroy_fg_drop(ft_prio);
2254	steering_anchor_destroy_ft(ft_prio);
2255}
2256
2257static int steering_anchor_cleanup(struct ib_uobject *uobject,
2258				   enum rdma_remove_reason why,
2259				   struct uverbs_attr_bundle *attrs)
2260{
2261	struct mlx5_ib_steering_anchor *obj = uobject->object;
2262
2263	if (atomic_read(&obj->usecnt))
2264		return -EBUSY;
2265
2266	mutex_lock(&obj->dev->flow_db->lock);
2267	if (!--obj->ft_prio->anchor.rule_goto_table_ref)
2268		steering_anchor_destroy_rule_goto_table(obj->ft_prio);
2269
2270	put_flow_table(obj->dev, obj->ft_prio, true);
2271	mutex_unlock(&obj->dev->flow_db->lock);
2272
2273	kfree(obj);
2274	return 0;
2275}
2276
2277static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio,
2278			      int count)
2279{
2280	while (count--)
2281		mlx5_steering_anchor_destroy_res(&prio[count]);
2282}
2283
2284void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev)
2285{
2286	fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT);
2287	fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT);
2288	fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS);
2289	fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS);
2290	fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS);
2291	fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT);
2292	fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT);
2293}
2294
2295static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
2296			      struct mlx5_ib_flow_matcher *obj)
2297{
2298	enum mlx5_ib_uapi_flow_table_type ft_type =
2299		MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
2300	u32 flags;
2301	int err;
2302
2303	/* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
2304	 * users should switch to it. We leave this to not break userspace
2305	 */
2306	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
2307	    uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
2308		return -EINVAL;
2309
2310	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
2311		err = uverbs_get_const(&ft_type, attrs,
2312				       MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
2313		if (err)
2314			return err;
2315
2316		err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
2317		if (err)
2318			return err;
2319
2320		return 0;
2321	}
2322
2323	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
2324		err = uverbs_get_flags32(&flags, attrs,
2325					 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2326					 IB_FLOW_ATTR_FLAGS_EGRESS);
2327		if (err)
2328			return err;
2329
2330		if (flags)
2331			return mlx5_ib_ft_type_to_namespace(
2332				MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
2333				&obj->ns_type);
2334	}
2335
2336	obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
2337
2338	return 0;
2339}
2340
2341static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
2342	struct uverbs_attr_bundle *attrs)
2343{
2344	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2345		attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
2346	struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2347	struct mlx5_ib_flow_matcher *obj;
2348	int err;
2349
2350	obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
2351	if (!obj)
2352		return -ENOMEM;
2353
2354	obj->mask_len = uverbs_attr_get_len(
2355		attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2356	err = uverbs_copy_from(&obj->matcher_mask,
2357			       attrs,
2358			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2359	if (err)
2360		goto end;
2361
2362	obj->flow_type = uverbs_attr_get_enum_id(
2363		attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2364
2365	if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
2366		err = uverbs_copy_from(&obj->priority,
2367				       attrs,
2368				       MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2369		if (err)
2370			goto end;
2371	}
2372
2373	err = uverbs_copy_from(&obj->match_criteria_enable,
2374			       attrs,
2375			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
2376	if (err)
2377		goto end;
2378
2379	err = mlx5_ib_matcher_ns(attrs, obj);
2380	if (err)
2381		goto end;
2382
2383	if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
2384	    mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
2385		err = -EINVAL;
2386		goto end;
2387	}
2388
2389	uobj->object = obj;
2390	obj->mdev = dev->mdev;
2391	atomic_set(&obj->usecnt, 0);
2392	return 0;
2393
2394end:
2395	kfree(obj);
2396	return err;
2397}
2398
2399static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
2400	struct uverbs_attr_bundle *attrs)
2401{
2402	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2403		attrs, MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE);
2404	struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2405	enum mlx5_ib_uapi_flow_table_type ib_uapi_ft_type;
2406	enum mlx5_flow_namespace_type ns_type;
2407	struct mlx5_ib_steering_anchor *obj;
2408	struct mlx5_ib_flow_prio *ft_prio;
2409	u16 priority;
2410	u32 ft_id;
2411	int err;
2412
2413	if (!capable(CAP_NET_RAW))
2414		return -EPERM;
2415
2416	err = uverbs_get_const(&ib_uapi_ft_type, attrs,
2417			       MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE);
2418	if (err)
2419		return err;
2420
2421	err = mlx5_ib_ft_type_to_namespace(ib_uapi_ft_type, &ns_type);
2422	if (err)
2423		return err;
2424
2425	err = uverbs_copy_from(&priority, attrs,
2426			       MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY);
2427	if (err)
2428		return err;
2429
2430	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
2431	if (!obj)
2432		return -ENOMEM;
2433
2434	mutex_lock(&dev->flow_db->lock);
2435
2436	ft_prio = _get_flow_table(dev, priority, ns_type, 0);
2437	if (IS_ERR(ft_prio)) {
2438		err = PTR_ERR(ft_prio);
2439		goto free_obj;
2440	}
2441
2442	ft_prio->refcount++;
2443
2444	if (!ft_prio->anchor.rule_goto_table_ref) {
2445		err = steering_anchor_create_res(dev, ft_prio, ns_type);
2446		if (err)
2447			goto put_flow_table;
2448	}
2449
2450	ft_prio->anchor.rule_goto_table_ref++;
2451
2452	ft_id = mlx5_flow_table_id(ft_prio->anchor.ft);
2453
2454	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
2455			     &ft_id, sizeof(ft_id));
2456	if (err)
2457		goto destroy_res;
2458
2459	mutex_unlock(&dev->flow_db->lock);
2460
2461	uobj->object = obj;
2462	obj->dev = dev;
2463	obj->ft_prio = ft_prio;
2464	atomic_set(&obj->usecnt, 0);
2465
2466	return 0;
2467
2468destroy_res:
2469	--ft_prio->anchor.rule_goto_table_ref;
2470	mlx5_steering_anchor_destroy_res(ft_prio);
2471put_flow_table:
2472	put_flow_table(dev, ft_prio, true);
2473free_obj:
2474	mutex_unlock(&dev->flow_db->lock);
2475	kfree(obj);
2476
2477	return err;
2478}
2479
2480static struct ib_flow_action *
2481mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
2482			     enum mlx5_ib_uapi_flow_table_type ft_type,
2483			     u8 num_actions, void *in)
2484{
2485	enum mlx5_flow_namespace_type namespace;
2486	struct mlx5_ib_flow_action *maction;
2487	int ret;
2488
2489	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2490	if (ret)
2491		return ERR_PTR(-EINVAL);
2492
2493	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2494	if (!maction)
2495		return ERR_PTR(-ENOMEM);
2496
2497	maction->flow_action_raw.modify_hdr =
2498		mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
2499
2500	if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
2501		ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
2502		kfree(maction);
2503		return ERR_PTR(ret);
2504	}
2505	maction->flow_action_raw.sub_type =
2506		MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
2507	maction->flow_action_raw.dev = dev;
2508
2509	return &maction->ib_action;
2510}
2511
2512static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
2513{
2514	return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
2515					 max_modify_header_actions) ||
2516	       MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
2517					 max_modify_header_actions) ||
2518	       MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
2519					 max_modify_header_actions);
2520}
2521
2522static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
2523	struct uverbs_attr_bundle *attrs)
2524{
2525	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2526		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
2527	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2528	enum mlx5_ib_uapi_flow_table_type ft_type;
2529	struct ib_flow_action *action;
2530	int num_actions;
2531	void *in;
2532	int ret;
2533
2534	if (!mlx5_ib_modify_header_supported(mdev))
2535		return -EOPNOTSUPP;
2536
2537	in = uverbs_attr_get_alloced_ptr(attrs,
2538		MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
2539
2540	num_actions = uverbs_attr_ptr_get_array_size(
2541		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2542		MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
2543	if (num_actions < 0)
2544		return num_actions;
2545
2546	ret = uverbs_get_const(&ft_type, attrs,
2547			       MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
2548	if (ret)
2549		return ret;
2550	action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
2551	if (IS_ERR(action))
2552		return PTR_ERR(action);
2553
2554	uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
2555				       IB_FLOW_ACTION_UNSPECIFIED);
2556
2557	return 0;
2558}
2559
2560static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
2561						      u8 packet_reformat_type,
2562						      u8 ft_type)
2563{
2564	switch (packet_reformat_type) {
2565	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2566		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2567			return MLX5_CAP_FLOWTABLE(ibdev->mdev,
2568						  encap_general_header);
2569		break;
2570	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2571		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2572			return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
2573				reformat_l2_to_l3_tunnel);
2574		break;
2575	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2576		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2577			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
2578				reformat_l3_tunnel_to_l2);
2579		break;
2580	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
2581		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2582			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
2583		break;
2584	default:
2585		break;
2586	}
2587
2588	return false;
2589}
2590
2591static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
2592{
2593	switch (dv_prt) {
2594	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2595		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
2596		break;
2597	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2598		*prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
2599		break;
2600	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2601		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
2602		break;
2603	default:
2604		return -EINVAL;
2605	}
2606
2607	return 0;
2608}
2609
2610static int mlx5_ib_flow_action_create_packet_reformat_ctx(
2611	struct mlx5_ib_dev *dev,
2612	struct mlx5_ib_flow_action *maction,
2613	u8 ft_type, u8 dv_prt,
2614	void *in, size_t len)
2615{
2616	struct mlx5_pkt_reformat_params reformat_params;
2617	enum mlx5_flow_namespace_type namespace;
2618	u8 prm_prt;
2619	int ret;
2620
2621	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2622	if (ret)
2623		return ret;
2624
2625	ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
2626	if (ret)
2627		return ret;
2628
2629	memset(&reformat_params, 0, sizeof(reformat_params));
2630	reformat_params.type = prm_prt;
2631	reformat_params.size = len;
2632	reformat_params.data = in;
2633	maction->flow_action_raw.pkt_reformat =
2634		mlx5_packet_reformat_alloc(dev->mdev, &reformat_params,
2635					   namespace);
2636	if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
2637		ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
2638		return ret;
2639	}
2640
2641	maction->flow_action_raw.sub_type =
2642		MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
2643	maction->flow_action_raw.dev = dev;
2644
2645	return 0;
2646}
2647
2648static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
2649	struct uverbs_attr_bundle *attrs)
2650{
2651	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
2652		MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
2653	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2654	enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
2655	enum mlx5_ib_uapi_flow_table_type ft_type;
2656	struct mlx5_ib_flow_action *maction;
2657	int ret;
2658
2659	ret = uverbs_get_const(&ft_type, attrs,
2660			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
2661	if (ret)
2662		return ret;
2663
2664	ret = uverbs_get_const(&dv_prt, attrs,
2665			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
2666	if (ret)
2667		return ret;
2668
2669	if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
2670		return -EOPNOTSUPP;
2671
2672	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2673	if (!maction)
2674		return -ENOMEM;
2675
2676	if (dv_prt ==
2677	    MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
2678		maction->flow_action_raw.sub_type =
2679			MLX5_IB_FLOW_ACTION_DECAP;
2680		maction->flow_action_raw.dev = mdev;
2681	} else {
2682		void *in;
2683		int len;
2684
2685		in = uverbs_attr_get_alloced_ptr(attrs,
2686			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2687		if (IS_ERR(in)) {
2688			ret = PTR_ERR(in);
2689			goto free_maction;
2690		}
2691
2692		len = uverbs_attr_get_len(attrs,
2693			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2694
2695		ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
2696			maction, ft_type, dv_prt, in, len);
2697		if (ret)
2698			goto free_maction;
2699	}
2700
2701	uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
2702				       IB_FLOW_ACTION_UNSPECIFIED);
2703	return 0;
2704
2705free_maction:
2706	kfree(maction);
2707	return ret;
2708}
2709
2710DECLARE_UVERBS_NAMED_METHOD(
2711	MLX5_IB_METHOD_CREATE_FLOW,
2712	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2713			UVERBS_OBJECT_FLOW,
2714			UVERBS_ACCESS_NEW,
2715			UA_MANDATORY),
2716	UVERBS_ATTR_PTR_IN(
2717		MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
2718		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2719		UA_MANDATORY,
2720		UA_ALLOC_AND_COPY),
2721	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
2722			MLX5_IB_OBJECT_FLOW_MATCHER,
2723			UVERBS_ACCESS_READ,
2724			UA_MANDATORY),
2725	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
2726			UVERBS_OBJECT_QP,
2727			UVERBS_ACCESS_READ),
2728	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
2729			MLX5_IB_OBJECT_DEVX_OBJ,
2730			UVERBS_ACCESS_READ),
2731	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
2732			     UVERBS_OBJECT_FLOW_ACTION,
2733			     UVERBS_ACCESS_READ, 1,
2734			     MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
2735			     UA_OPTIONAL),
2736	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
2737			   UVERBS_ATTR_TYPE(u32),
2738			   UA_OPTIONAL),
2739	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
2740			     MLX5_IB_OBJECT_DEVX_OBJ,
2741			     UVERBS_ACCESS_READ, 1, 1,
2742			     UA_OPTIONAL),
2743	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
2744			   UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
2745			   UA_OPTIONAL,
2746			   UA_ALLOC_AND_COPY),
2747	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
2748			     enum mlx5_ib_create_flow_flags,
2749			     UA_OPTIONAL));
2750
2751DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2752	MLX5_IB_METHOD_DESTROY_FLOW,
2753	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2754			UVERBS_OBJECT_FLOW,
2755			UVERBS_ACCESS_DESTROY,
2756			UA_MANDATORY));
2757
2758ADD_UVERBS_METHODS(mlx5_ib_fs,
2759		   UVERBS_OBJECT_FLOW,
2760		   &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
2761		   &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
2762
2763DECLARE_UVERBS_NAMED_METHOD(
2764	MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
2765	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
2766			UVERBS_OBJECT_FLOW_ACTION,
2767			UVERBS_ACCESS_NEW,
2768			UA_MANDATORY),
2769	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2770			   UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
2771				   set_add_copy_action_in_auto)),
2772			   UA_MANDATORY,
2773			   UA_ALLOC_AND_COPY),
2774	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
2775			     enum mlx5_ib_uapi_flow_table_type,
2776			     UA_MANDATORY));
2777
2778DECLARE_UVERBS_NAMED_METHOD(
2779	MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
2780	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
2781			UVERBS_OBJECT_FLOW_ACTION,
2782			UVERBS_ACCESS_NEW,
2783			UA_MANDATORY),
2784	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
2785			   UVERBS_ATTR_MIN_SIZE(1),
2786			   UA_ALLOC_AND_COPY,
2787			   UA_OPTIONAL),
2788	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
2789			     enum mlx5_ib_uapi_flow_action_packet_reformat_type,
2790			     UA_MANDATORY),
2791	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
2792			     enum mlx5_ib_uapi_flow_table_type,
2793			     UA_MANDATORY));
2794
2795ADD_UVERBS_METHODS(
2796	mlx5_ib_flow_actions,
2797	UVERBS_OBJECT_FLOW_ACTION,
2798	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
2799	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
2800
2801DECLARE_UVERBS_NAMED_METHOD(
2802	MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
2803	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
2804			MLX5_IB_OBJECT_FLOW_MATCHER,
2805			UVERBS_ACCESS_NEW,
2806			UA_MANDATORY),
2807	UVERBS_ATTR_PTR_IN(
2808		MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
2809		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2810		UA_MANDATORY),
2811	UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
2812			    mlx5_ib_flow_type,
2813			    UA_MANDATORY),
2814	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
2815			   UVERBS_ATTR_TYPE(u8),
2816			   UA_MANDATORY),
2817	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2818			     enum ib_flow_flags,
2819			     UA_OPTIONAL),
2820	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
2821			     enum mlx5_ib_uapi_flow_table_type,
2822			     UA_OPTIONAL));
2823
2824DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2825	MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
2826	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
2827			MLX5_IB_OBJECT_FLOW_MATCHER,
2828			UVERBS_ACCESS_DESTROY,
2829			UA_MANDATORY));
2830
2831DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
2832			    UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
2833			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
2834			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
2835
2836DECLARE_UVERBS_NAMED_METHOD(
2837	MLX5_IB_METHOD_STEERING_ANCHOR_CREATE,
2838	UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE,
2839			MLX5_IB_OBJECT_STEERING_ANCHOR,
2840			UVERBS_ACCESS_NEW,
2841			UA_MANDATORY),
2842	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE,
2843			     enum mlx5_ib_uapi_flow_table_type,
2844			     UA_MANDATORY),
2845	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY,
2846			   UVERBS_ATTR_TYPE(u16),
2847			   UA_MANDATORY),
2848	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
2849			   UVERBS_ATTR_TYPE(u32),
2850			   UA_MANDATORY));
2851
2852DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2853	MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY,
2854	UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_DESTROY_HANDLE,
2855			MLX5_IB_OBJECT_STEERING_ANCHOR,
2856			UVERBS_ACCESS_DESTROY,
2857			UA_MANDATORY));
2858
2859DECLARE_UVERBS_NAMED_OBJECT(
2860	MLX5_IB_OBJECT_STEERING_ANCHOR,
2861	UVERBS_TYPE_ALLOC_IDR(steering_anchor_cleanup),
2862	&UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE),
2863	&UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY));
2864
2865const struct uapi_definition mlx5_ib_flow_defs[] = {
2866	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2867		MLX5_IB_OBJECT_FLOW_MATCHER),
2868	UAPI_DEF_CHAIN_OBJ_TREE(
2869		UVERBS_OBJECT_FLOW,
2870		&mlx5_ib_fs),
2871	UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
2872				&mlx5_ib_flow_actions),
2873	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2874		MLX5_IB_OBJECT_STEERING_ANCHOR,
2875		UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)),
2876	{},
2877};
2878
2879static const struct ib_device_ops flow_ops = {
2880	.create_flow = mlx5_ib_create_flow,
2881	.destroy_flow = mlx5_ib_destroy_flow,
2882	.destroy_flow_action = mlx5_ib_destroy_flow_action,
2883};
2884
2885int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
2886{
2887	dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
2888
2889	if (!dev->flow_db)
2890		return -ENOMEM;
2891
2892	mutex_init(&dev->flow_db->lock);
2893
2894	ib_set_device_ops(&dev->ib_dev, &flow_ops);
2895	return 0;
2896}
v6.9.4
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
   4 */
   5
   6#include <rdma/ib_user_verbs.h>
   7#include <rdma/ib_verbs.h>
   8#include <rdma/uverbs_types.h>
   9#include <rdma/uverbs_ioctl.h>
  10#include <rdma/uverbs_std_types.h>
  11#include <rdma/mlx5_user_ioctl_cmds.h>
  12#include <rdma/mlx5_user_ioctl_verbs.h>
  13#include <rdma/ib_hdrs.h>
  14#include <rdma/ib_umem.h>
  15#include <linux/mlx5/driver.h>
  16#include <linux/mlx5/fs.h>
  17#include <linux/mlx5/fs_helpers.h>
  18#include <linux/mlx5/eswitch.h>
  19#include <net/inet_ecn.h>
  20#include "mlx5_ib.h"
  21#include "counters.h"
  22#include "devx.h"
  23#include "fs.h"
  24
  25#define UVERBS_MODULE_NAME mlx5_ib
  26#include <rdma/uverbs_named_ioctl.h>
  27
  28enum {
  29	MATCH_CRITERIA_ENABLE_OUTER_BIT,
  30	MATCH_CRITERIA_ENABLE_MISC_BIT,
  31	MATCH_CRITERIA_ENABLE_INNER_BIT,
  32	MATCH_CRITERIA_ENABLE_MISC2_BIT
  33};
  34
  35#define HEADER_IS_ZERO(match_criteria, headers)			           \
  36	!(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
  37		    0, MLX5_FLD_SZ_BYTES(fte_match_param, headers)))       \
  38
  39static u8 get_match_criteria_enable(u32 *match_criteria)
  40{
  41	u8 match_criteria_enable;
  42
  43	match_criteria_enable =
  44		(!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
  45		MATCH_CRITERIA_ENABLE_OUTER_BIT;
  46	match_criteria_enable |=
  47		(!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
  48		MATCH_CRITERIA_ENABLE_MISC_BIT;
  49	match_criteria_enable |=
  50		(!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
  51		MATCH_CRITERIA_ENABLE_INNER_BIT;
  52	match_criteria_enable |=
  53		(!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
  54		MATCH_CRITERIA_ENABLE_MISC2_BIT;
  55
  56	return match_criteria_enable;
  57}
  58
  59static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
  60{
  61	u8 entry_mask;
  62	u8 entry_val;
  63	int err = 0;
  64
  65	if (!mask)
  66		goto out;
  67
  68	entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
  69			      ip_protocol);
  70	entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
  71			     ip_protocol);
  72	if (!entry_mask) {
  73		MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
  74		MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
  75		goto out;
  76	}
  77	/* Don't override existing ip protocol */
  78	if (mask != entry_mask || val != entry_val)
  79		err = -EINVAL;
  80out:
  81	return err;
  82}
  83
  84static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
  85			   bool inner)
  86{
  87	if (inner) {
  88		MLX5_SET(fte_match_set_misc,
  89			 misc_c, inner_ipv6_flow_label, mask);
  90		MLX5_SET(fte_match_set_misc,
  91			 misc_v, inner_ipv6_flow_label, val);
  92	} else {
  93		MLX5_SET(fte_match_set_misc,
  94			 misc_c, outer_ipv6_flow_label, mask);
  95		MLX5_SET(fte_match_set_misc,
  96			 misc_v, outer_ipv6_flow_label, val);
  97	}
  98}
  99
 100static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 101{
 102	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
 103	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
 104	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
 105	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
 106}
 107
 108static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
 109{
 110	if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
 111	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
 112		return -EOPNOTSUPP;
 113
 114	if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
 115	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
 116		return -EOPNOTSUPP;
 117
 118	if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
 119	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
 120		return -EOPNOTSUPP;
 121
 122	if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
 123	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
 124		return -EOPNOTSUPP;
 125
 126	return 0;
 127}
 128
 129#define LAST_ETH_FIELD vlan_tag
 130#define LAST_IPV4_FIELD tos
 131#define LAST_IPV6_FIELD traffic_class
 132#define LAST_TCP_UDP_FIELD src_port
 133#define LAST_TUNNEL_FIELD tunnel_id
 134#define LAST_FLOW_TAG_FIELD tag_id
 135#define LAST_DROP_FIELD size
 136#define LAST_COUNTERS_FIELD counters
 137
 138/* Field is the last supported field */
 139#define FIELDS_NOT_SUPPORTED(filter, field)                                    \
 140	memchr_inv((void *)&filter.field + sizeof(filter.field), 0,            \
 141		   sizeof(filter) - offsetofend(typeof(filter), field))
 142
 143int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
 144			   bool is_egress,
 145			   struct mlx5_flow_act *action)
 146{
 147
 148	switch (maction->ib_action.type) {
 149	case IB_FLOW_ACTION_UNSPECIFIED:
 150		if (maction->flow_action_raw.sub_type ==
 151		    MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
 152			if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 153				return -EINVAL;
 154			action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 155			action->modify_hdr =
 156				maction->flow_action_raw.modify_hdr;
 157			return 0;
 158		}
 159		if (maction->flow_action_raw.sub_type ==
 160		    MLX5_IB_FLOW_ACTION_DECAP) {
 161			if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
 162				return -EINVAL;
 163			action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
 164			return 0;
 165		}
 166		if (maction->flow_action_raw.sub_type ==
 167		    MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
 168			if (action->action &
 169			    MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
 170				return -EINVAL;
 171			action->action |=
 172				MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
 173			action->pkt_reformat =
 174				maction->flow_action_raw.pkt_reformat;
 175			return 0;
 176		}
 177		fallthrough;
 178	default:
 179		return -EOPNOTSUPP;
 180	}
 181}
 182
 183static int parse_flow_attr(struct mlx5_core_dev *mdev,
 184			   struct mlx5_flow_spec *spec,
 185			   const union ib_flow_spec *ib_spec,
 186			   const struct ib_flow_attr *flow_attr,
 187			   struct mlx5_flow_act *action, u32 prev_type)
 188{
 189	struct mlx5_flow_context *flow_context = &spec->flow_context;
 190	u32 *match_c = spec->match_criteria;
 191	u32 *match_v = spec->match_value;
 192	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
 193					   misc_parameters);
 194	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
 195					   misc_parameters);
 196	void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
 197					    misc_parameters_2);
 198	void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
 199					    misc_parameters_2);
 200	void *headers_c;
 201	void *headers_v;
 202	int match_ipv;
 203	int ret;
 204
 205	if (ib_spec->type & IB_FLOW_SPEC_INNER) {
 206		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
 207					 inner_headers);
 208		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
 209					 inner_headers);
 210		match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 211					ft_field_support.inner_ip_version);
 212	} else {
 213		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
 214					 outer_headers);
 215		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
 216					 outer_headers);
 217		match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 218					ft_field_support.outer_ip_version);
 219	}
 220
 221	switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
 222	case IB_FLOW_SPEC_ETH:
 223		if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
 224			return -EOPNOTSUPP;
 225
 226		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 227					     dmac_47_16),
 228				ib_spec->eth.mask.dst_mac);
 229		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 230					     dmac_47_16),
 231				ib_spec->eth.val.dst_mac);
 232
 233		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 234					     smac_47_16),
 235				ib_spec->eth.mask.src_mac);
 236		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 237					     smac_47_16),
 238				ib_spec->eth.val.src_mac);
 239
 240		if (ib_spec->eth.mask.vlan_tag) {
 241			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 242				 cvlan_tag, 1);
 243			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 244				 cvlan_tag, 1);
 245
 246			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 247				 first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
 248			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 249				 first_vid, ntohs(ib_spec->eth.val.vlan_tag));
 250
 251			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 252				 first_cfi,
 253				 ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
 254			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 255				 first_cfi,
 256				 ntohs(ib_spec->eth.val.vlan_tag) >> 12);
 257
 258			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 259				 first_prio,
 260				 ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
 261			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 262				 first_prio,
 263				 ntohs(ib_spec->eth.val.vlan_tag) >> 13);
 264		}
 265		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 266			 ethertype, ntohs(ib_spec->eth.mask.ether_type));
 267		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 268			 ethertype, ntohs(ib_spec->eth.val.ether_type));
 269		break;
 270	case IB_FLOW_SPEC_IPV4:
 271		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
 272			return -EOPNOTSUPP;
 273
 274		if (match_ipv) {
 275			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 276				 ip_version, 0xf);
 277			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 278				 ip_version, MLX5_FS_IPV4_VERSION);
 279		} else {
 280			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 281				 ethertype, 0xffff);
 282			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 283				 ethertype, ETH_P_IP);
 284		}
 285
 286		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 287				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 288		       &ib_spec->ipv4.mask.src_ip,
 289		       sizeof(ib_spec->ipv4.mask.src_ip));
 290		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 291				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 292		       &ib_spec->ipv4.val.src_ip,
 293		       sizeof(ib_spec->ipv4.val.src_ip));
 294		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 295				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 296		       &ib_spec->ipv4.mask.dst_ip,
 297		       sizeof(ib_spec->ipv4.mask.dst_ip));
 298		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 299				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 300		       &ib_spec->ipv4.val.dst_ip,
 301		       sizeof(ib_spec->ipv4.val.dst_ip));
 302
 303		set_tos(headers_c, headers_v,
 304			ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
 305
 306		if (set_proto(headers_c, headers_v,
 307			      ib_spec->ipv4.mask.proto,
 308			      ib_spec->ipv4.val.proto))
 309			return -EINVAL;
 310		break;
 311	case IB_FLOW_SPEC_IPV6:
 312		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
 313			return -EOPNOTSUPP;
 314
 315		if (match_ipv) {
 316			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 317				 ip_version, 0xf);
 318			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 319				 ip_version, MLX5_FS_IPV6_VERSION);
 320		} else {
 321			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 322				 ethertype, 0xffff);
 323			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 324				 ethertype, ETH_P_IPV6);
 325		}
 326
 327		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 328				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 329		       &ib_spec->ipv6.mask.src_ip,
 330		       sizeof(ib_spec->ipv6.mask.src_ip));
 331		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 332				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 333		       &ib_spec->ipv6.val.src_ip,
 334		       sizeof(ib_spec->ipv6.val.src_ip));
 335		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 336				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 337		       &ib_spec->ipv6.mask.dst_ip,
 338		       sizeof(ib_spec->ipv6.mask.dst_ip));
 339		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 340				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 341		       &ib_spec->ipv6.val.dst_ip,
 342		       sizeof(ib_spec->ipv6.val.dst_ip));
 343
 344		set_tos(headers_c, headers_v,
 345			ib_spec->ipv6.mask.traffic_class,
 346			ib_spec->ipv6.val.traffic_class);
 347
 348		if (set_proto(headers_c, headers_v,
 349			      ib_spec->ipv6.mask.next_hdr,
 350			      ib_spec->ipv6.val.next_hdr))
 351			return -EINVAL;
 352
 353		set_flow_label(misc_params_c, misc_params_v,
 354			       ntohl(ib_spec->ipv6.mask.flow_label),
 355			       ntohl(ib_spec->ipv6.val.flow_label),
 356			       ib_spec->type & IB_FLOW_SPEC_INNER);
 357		break;
 358	case IB_FLOW_SPEC_ESP:
 359		return -EOPNOTSUPP;
 360	case IB_FLOW_SPEC_TCP:
 361		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
 362					 LAST_TCP_UDP_FIELD))
 363			return -EOPNOTSUPP;
 364
 365		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
 366			return -EINVAL;
 367
 368		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
 369			 ntohs(ib_spec->tcp_udp.mask.src_port));
 370		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
 371			 ntohs(ib_spec->tcp_udp.val.src_port));
 372
 373		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
 374			 ntohs(ib_spec->tcp_udp.mask.dst_port));
 375		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
 376			 ntohs(ib_spec->tcp_udp.val.dst_port));
 377		break;
 378	case IB_FLOW_SPEC_UDP:
 379		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
 380					 LAST_TCP_UDP_FIELD))
 381			return -EOPNOTSUPP;
 382
 383		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
 384			return -EINVAL;
 385
 386		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
 387			 ntohs(ib_spec->tcp_udp.mask.src_port));
 388		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
 389			 ntohs(ib_spec->tcp_udp.val.src_port));
 390
 391		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
 392			 ntohs(ib_spec->tcp_udp.mask.dst_port));
 393		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
 394			 ntohs(ib_spec->tcp_udp.val.dst_port));
 395		break;
 396	case IB_FLOW_SPEC_GRE:
 397		if (ib_spec->gre.mask.c_ks_res0_ver)
 398			return -EOPNOTSUPP;
 399
 400		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
 401			return -EINVAL;
 402
 403		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 404			 0xff);
 405		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
 406			 IPPROTO_GRE);
 407
 408		MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
 409			 ntohs(ib_spec->gre.mask.protocol));
 410		MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
 411			 ntohs(ib_spec->gre.val.protocol));
 412
 413		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
 414				    gre_key.nvgre.hi),
 415		       &ib_spec->gre.mask.key,
 416		       sizeof(ib_spec->gre.mask.key));
 417		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
 418				    gre_key.nvgre.hi),
 419		       &ib_spec->gre.val.key,
 420		       sizeof(ib_spec->gre.val.key));
 421		break;
 422	case IB_FLOW_SPEC_MPLS:
 423		switch (prev_type) {
 424		case IB_FLOW_SPEC_UDP:
 425			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 426						   ft_field_support.outer_first_mpls_over_udp),
 427						   &ib_spec->mpls.mask.tag))
 428				return -EOPNOTSUPP;
 429
 430			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
 431					    outer_first_mpls_over_udp),
 432			       &ib_spec->mpls.val.tag,
 433			       sizeof(ib_spec->mpls.val.tag));
 434			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
 435					    outer_first_mpls_over_udp),
 436			       &ib_spec->mpls.mask.tag,
 437			       sizeof(ib_spec->mpls.mask.tag));
 438			break;
 439		case IB_FLOW_SPEC_GRE:
 440			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 441						   ft_field_support.outer_first_mpls_over_gre),
 442						   &ib_spec->mpls.mask.tag))
 443				return -EOPNOTSUPP;
 444
 445			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
 446					    outer_first_mpls_over_gre),
 447			       &ib_spec->mpls.val.tag,
 448			       sizeof(ib_spec->mpls.val.tag));
 449			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
 450					    outer_first_mpls_over_gre),
 451			       &ib_spec->mpls.mask.tag,
 452			       sizeof(ib_spec->mpls.mask.tag));
 453			break;
 454		default:
 455			if (ib_spec->type & IB_FLOW_SPEC_INNER) {
 456				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 457							   ft_field_support.inner_first_mpls),
 458							   &ib_spec->mpls.mask.tag))
 459					return -EOPNOTSUPP;
 460
 461				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
 462						    inner_first_mpls),
 463				       &ib_spec->mpls.val.tag,
 464				       sizeof(ib_spec->mpls.val.tag));
 465				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
 466						    inner_first_mpls),
 467				       &ib_spec->mpls.mask.tag,
 468				       sizeof(ib_spec->mpls.mask.tag));
 469			} else {
 470				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 471							   ft_field_support.outer_first_mpls),
 472							   &ib_spec->mpls.mask.tag))
 473					return -EOPNOTSUPP;
 474
 475				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
 476						    outer_first_mpls),
 477				       &ib_spec->mpls.val.tag,
 478				       sizeof(ib_spec->mpls.val.tag));
 479				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
 480						    outer_first_mpls),
 481				       &ib_spec->mpls.mask.tag,
 482				       sizeof(ib_spec->mpls.mask.tag));
 483			}
 484		}
 485		break;
 486	case IB_FLOW_SPEC_VXLAN_TUNNEL:
 487		if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
 488					 LAST_TUNNEL_FIELD))
 489			return -EOPNOTSUPP;
 490
 491		MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
 492			 ntohl(ib_spec->tunnel.mask.tunnel_id));
 493		MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
 494			 ntohl(ib_spec->tunnel.val.tunnel_id));
 495		break;
 496	case IB_FLOW_SPEC_ACTION_TAG:
 497		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
 498					 LAST_FLOW_TAG_FIELD))
 499			return -EOPNOTSUPP;
 500		if (ib_spec->flow_tag.tag_id >= BIT(24))
 501			return -EINVAL;
 502
 503		flow_context->flow_tag = ib_spec->flow_tag.tag_id;
 504		flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
 505		break;
 506	case IB_FLOW_SPEC_ACTION_DROP:
 507		if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
 508					 LAST_DROP_FIELD))
 509			return -EOPNOTSUPP;
 510		action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
 511		break;
 512	case IB_FLOW_SPEC_ACTION_HANDLE:
 513		ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
 514			flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
 515		if (ret)
 516			return ret;
 517		break;
 518	case IB_FLOW_SPEC_ACTION_COUNT:
 519		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
 520					 LAST_COUNTERS_FIELD))
 521			return -EOPNOTSUPP;
 522
 523		/* for now support only one counters spec per flow */
 524		if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
 525			return -EINVAL;
 526
 527		action->counters = ib_spec->flow_count.counters;
 528		action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
 529		break;
 530	default:
 531		return -EINVAL;
 532	}
 533
 534	return 0;
 535}
 536
 537/* If a flow could catch both multicast and unicast packets,
 538 * it won't fall into the multicast flow steering table and this rule
 539 * could steal other multicast packets.
 540 */
 541static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
 542{
 543	union ib_flow_spec *flow_spec;
 544
 545	if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
 546	    ib_attr->num_of_specs < 1)
 547		return false;
 548
 549	flow_spec = (union ib_flow_spec *)(ib_attr + 1);
 550	if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
 551		struct ib_flow_spec_ipv4 *ipv4_spec;
 552
 553		ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
 554		if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
 555			return true;
 556
 557		return false;
 558	}
 559
 560	if (flow_spec->type == IB_FLOW_SPEC_ETH) {
 561		struct ib_flow_spec_eth *eth_spec;
 562
 563		eth_spec = (struct ib_flow_spec_eth *)flow_spec;
 564		return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
 565		       is_multicast_ether_addr(eth_spec->val.dst_mac);
 566	}
 567
 568	return false;
 569}
 570
 571static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
 572			       const struct ib_flow_attr *flow_attr,
 573			       bool check_inner)
 574{
 575	union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
 576	int match_ipv = check_inner ?
 577			MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 578					ft_field_support.inner_ip_version) :
 579			MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
 580					ft_field_support.outer_ip_version);
 581	int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
 582	bool ipv4_spec_valid, ipv6_spec_valid;
 583	unsigned int ip_spec_type = 0;
 584	bool has_ethertype = false;
 585	unsigned int spec_index;
 586	bool mask_valid = true;
 587	u16 eth_type = 0;
 588	bool type_valid;
 589
 590	/* Validate that ethertype is correct */
 591	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
 592		if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
 593		    ib_spec->eth.mask.ether_type) {
 594			mask_valid = (ib_spec->eth.mask.ether_type ==
 595				      htons(0xffff));
 596			has_ethertype = true;
 597			eth_type = ntohs(ib_spec->eth.val.ether_type);
 598		} else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
 599			   (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
 600			ip_spec_type = ib_spec->type;
 601		}
 602		ib_spec = (void *)ib_spec + ib_spec->size;
 603	}
 604
 605	type_valid = (!has_ethertype) || (!ip_spec_type);
 606	if (!type_valid && mask_valid) {
 607		ipv4_spec_valid = (eth_type == ETH_P_IP) &&
 608			(ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
 609		ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
 610			(ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
 611
 612		type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
 613			     (((eth_type == ETH_P_MPLS_UC) ||
 614			       (eth_type == ETH_P_MPLS_MC)) && match_ipv);
 615	}
 616
 617	return type_valid;
 618}
 619
 620static bool is_valid_attr(struct mlx5_core_dev *mdev,
 621			  const struct ib_flow_attr *flow_attr)
 622{
 623	return is_valid_ethertype(mdev, flow_attr, false) &&
 624	       is_valid_ethertype(mdev, flow_attr, true);
 625}
 626
 627static void put_flow_table(struct mlx5_ib_dev *dev,
 628			   struct mlx5_ib_flow_prio *prio, bool ft_added)
 629{
 630	prio->refcount -= !!ft_added;
 631	if (!prio->refcount) {
 632		mlx5_destroy_flow_table(prio->flow_table);
 633		prio->flow_table = NULL;
 634	}
 635}
 636
 637static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 638{
 639	struct mlx5_ib_flow_handler *handler = container_of(flow_id,
 640							  struct mlx5_ib_flow_handler,
 641							  ibflow);
 642	struct mlx5_ib_flow_handler *iter, *tmp;
 643	struct mlx5_ib_dev *dev = handler->dev;
 644
 645	mutex_lock(&dev->flow_db->lock);
 646
 647	list_for_each_entry_safe(iter, tmp, &handler->list, list) {
 648		mlx5_del_flow_rules(iter->rule);
 649		put_flow_table(dev, iter->prio, true);
 650		list_del(&iter->list);
 651		kfree(iter);
 652	}
 653
 654	mlx5_del_flow_rules(handler->rule);
 655	put_flow_table(dev, handler->prio, true);
 656	mlx5_ib_counters_clear_description(handler->ibcounters);
 657	mutex_unlock(&dev->flow_db->lock);
 658	if (handler->flow_matcher)
 659		atomic_dec(&handler->flow_matcher->usecnt);
 660	kfree(handler);
 661
 662	return 0;
 663}
 664
 665static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
 666{
 667	priority *= 2;
 668	if (!dont_trap)
 669		priority++;
 670	return priority;
 671}
 672
 673enum flow_table_type {
 674	MLX5_IB_FT_RX,
 675	MLX5_IB_FT_TX
 676};
 677
 678#define MLX5_FS_MAX_TYPES	 6
 679#define MLX5_FS_MAX_ENTRIES	 BIT(16)
 680
 681static bool mlx5_ib_shared_ft_allowed(struct ib_device *device)
 682{
 683	struct mlx5_ib_dev *dev = to_mdev(device);
 684
 685	return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed);
 686}
 687
 688static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
 689					   struct mlx5_flow_namespace *ns,
 690					   struct mlx5_ib_flow_prio *prio,
 691					   int priority,
 692					   int num_entries, int num_groups,
 693					   u32 flags)
 694{
 695	struct mlx5_flow_table_attr ft_attr = {};
 696	struct mlx5_flow_table *ft;
 697
 698	ft_attr.prio = priority;
 699	ft_attr.max_fte = num_entries;
 700	ft_attr.flags = flags;
 701	ft_attr.autogroup.max_num_groups = num_groups;
 702	ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
 703	if (IS_ERR(ft))
 704		return ERR_CAST(ft);
 705
 706	prio->flow_table = ft;
 707	prio->refcount = 0;
 708	return prio;
 709}
 710
 711static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 712						struct ib_flow_attr *flow_attr,
 713						enum flow_table_type ft_type)
 714{
 715	bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
 716	struct mlx5_flow_namespace *ns = NULL;
 717	enum mlx5_flow_namespace_type fn_type;
 718	struct mlx5_ib_flow_prio *prio;
 719	struct mlx5_flow_table *ft;
 720	int max_table_size;
 721	int num_entries;
 722	int num_groups;
 723	bool esw_encap;
 724	u32 flags = 0;
 725	int priority;
 726
 727	max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 728						       log_max_ft_size));
 729	esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
 730		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 731	switch (flow_attr->type) {
 732	case IB_FLOW_ATTR_NORMAL:
 733		if (flow_is_multicast_only(flow_attr) && !dont_trap)
 734			priority = MLX5_IB_FLOW_MCAST_PRIO;
 735		else
 736			priority = ib_prio_to_core_prio(flow_attr->priority,
 737							dont_trap);
 738		if (ft_type == MLX5_IB_FT_RX) {
 739			fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
 740			prio = &dev->flow_db->prios[priority];
 741			if (!dev->is_rep && !esw_encap &&
 742			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
 743				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
 744			if (!dev->is_rep && !esw_encap &&
 745			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 746						      reformat_l3_tunnel_to_l2))
 747				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
 748		} else {
 749			max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
 750				dev->mdev, log_max_ft_size));
 751			fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
 752			prio = &dev->flow_db->egress_prios[priority];
 753			if (!dev->is_rep && !esw_encap &&
 754			    MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
 755				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
 756		}
 757		ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
 758		num_entries = MLX5_FS_MAX_ENTRIES;
 759		num_groups = MLX5_FS_MAX_TYPES;
 760		break;
 761	case IB_FLOW_ATTR_ALL_DEFAULT:
 762	case IB_FLOW_ATTR_MC_DEFAULT:
 763		ns = mlx5_get_flow_namespace(dev->mdev,
 764					     MLX5_FLOW_NAMESPACE_LEFTOVERS);
 765		build_leftovers_ft_param(&priority, &num_entries, &num_groups);
 766		prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
 767		break;
 768	case IB_FLOW_ATTR_SNIFFER:
 769		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
 770					allow_sniffer_and_nic_rx_shared_tir))
 771			return ERR_PTR(-EOPNOTSUPP);
 772
 773		ns = mlx5_get_flow_namespace(
 774			dev->mdev, ft_type == MLX5_IB_FT_RX ?
 775					   MLX5_FLOW_NAMESPACE_SNIFFER_RX :
 776					   MLX5_FLOW_NAMESPACE_SNIFFER_TX);
 777
 778		prio = &dev->flow_db->sniffer[ft_type];
 779		priority = 0;
 780		num_entries = 1;
 781		num_groups = 1;
 782		break;
 783	default:
 784		break;
 785	}
 786
 787	if (!ns)
 788		return ERR_PTR(-EOPNOTSUPP);
 789
 790	max_table_size = min_t(int, num_entries, max_table_size);
 791
 792	ft = prio->flow_table;
 793	if (!ft)
 794		return _get_prio(dev, ns, prio, priority, max_table_size,
 795				 num_groups, flags);
 796
 797	return prio;
 798}
 799
 800enum {
 801	RDMA_RX_ECN_OPCOUNTER_PRIO,
 802	RDMA_RX_CNP_OPCOUNTER_PRIO,
 803};
 804
 805enum {
 806	RDMA_TX_CNP_OPCOUNTER_PRIO,
 807};
 808
 809static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
 810			      struct mlx5_flow_spec *spec)
 811{
 812	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
 813					ft_field_support.source_vhca_port) ||
 814	    !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
 815					ft_field_support.source_vhca_port))
 816		return -EOPNOTSUPP;
 817
 818	MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
 819			 misc_parameters.source_vhca_port);
 820	MLX5_SET(fte_match_param, &spec->match_value,
 821		 misc_parameters.source_vhca_port, port_num);
 822
 823	return 0;
 824}
 825
 826static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
 827			   struct mlx5_flow_spec *spec, int ipv)
 828{
 829	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
 830					ft_field_support.outer_ip_version))
 831		return -EOPNOTSUPP;
 832
 833	if (mlx5_core_mp_enabled(dev->mdev) &&
 834	    set_vhca_port_spec(dev, port_num, spec))
 835		return -EOPNOTSUPP;
 836
 837	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 838			 outer_headers.ip_ecn);
 839	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
 840		 INET_ECN_CE);
 841	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 842			 outer_headers.ip_version);
 843	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
 844		 ipv);
 845
 846	spec->match_criteria_enable =
 847		get_match_criteria_enable(spec->match_criteria);
 848
 849	return 0;
 850}
 851
 852static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
 853			struct mlx5_flow_spec *spec)
 854{
 855	if (mlx5_core_mp_enabled(dev->mdev) &&
 856	    set_vhca_port_spec(dev, port_num, spec))
 857		return -EOPNOTSUPP;
 858
 859	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 860			 misc_parameters.bth_opcode);
 861	MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
 862		 IB_BTH_OPCODE_CNP);
 863
 864	spec->match_criteria_enable =
 865		get_match_criteria_enable(spec->match_criteria);
 866
 867	return 0;
 868}
 869
 870int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
 871			 struct mlx5_ib_op_fc *opfc,
 872			 enum mlx5_ib_optional_counter_type type)
 873{
 874	enum mlx5_flow_namespace_type fn_type;
 875	int priority, i, err, spec_num;
 876	struct mlx5_flow_act flow_act = {};
 877	struct mlx5_flow_destination dst;
 878	struct mlx5_flow_namespace *ns;
 879	struct mlx5_ib_flow_prio *prio;
 880	struct mlx5_flow_spec *spec;
 881
 882	spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
 883	if (!spec)
 884		return -ENOMEM;
 885
 886	switch (type) {
 887	case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
 888		if (set_ecn_ce_spec(dev, port_num, &spec[0],
 889				    MLX5_FS_IPV4_VERSION) ||
 890		    set_ecn_ce_spec(dev, port_num, &spec[1],
 891				    MLX5_FS_IPV6_VERSION)) {
 892			err = -EOPNOTSUPP;
 893			goto free;
 894		}
 895		spec_num = 2;
 896		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
 897		priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
 898		break;
 899
 900	case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
 901		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
 902					ft_field_support_2_nic_receive_rdma.bth_opcode) ||
 903		    set_cnp_spec(dev, port_num, &spec[0])) {
 904			err = -EOPNOTSUPP;
 905			goto free;
 906		}
 907		spec_num = 1;
 908		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
 909		priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
 910		break;
 911
 912	case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
 913		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
 914					ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
 915		    set_cnp_spec(dev, port_num, &spec[0])) {
 916			err = -EOPNOTSUPP;
 917			goto free;
 918		}
 919		spec_num = 1;
 920		fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
 921		priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
 922		break;
 923
 924	default:
 925		err = -EOPNOTSUPP;
 926		goto free;
 927	}
 928
 929	ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
 930	if (!ns) {
 931		err = -EOPNOTSUPP;
 932		goto free;
 933	}
 934
 935	prio = &dev->flow_db->opfcs[type];
 936	if (!prio->flow_table) {
 937		prio = _get_prio(dev, ns, prio, priority,
 938				 dev->num_ports * MAX_OPFC_RULES, 1, 0);
 939		if (IS_ERR(prio)) {
 940			err = PTR_ERR(prio);
 941			goto free;
 942		}
 943	}
 944
 945	dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
 946	dst.counter_id = mlx5_fc_id(opfc->fc);
 947
 948	flow_act.action =
 949		MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
 950
 951	for (i = 0; i < spec_num; i++) {
 952		opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
 953						    &flow_act, &dst, 1);
 954		if (IS_ERR(opfc->rule[i])) {
 955			err = PTR_ERR(opfc->rule[i]);
 956			goto del_rules;
 957		}
 958	}
 959	prio->refcount += spec_num;
 960	kfree(spec);
 961
 962	return 0;
 963
 964del_rules:
 965	for (i -= 1; i >= 0; i--)
 966		mlx5_del_flow_rules(opfc->rule[i]);
 967	put_flow_table(dev, prio, false);
 968free:
 969	kfree(spec);
 970	return err;
 971}
 972
 973void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
 974			     struct mlx5_ib_op_fc *opfc,
 975			     enum mlx5_ib_optional_counter_type type)
 976{
 977	int i;
 978
 979	for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
 980		mlx5_del_flow_rules(opfc->rule[i]);
 981		put_flow_table(dev, &dev->flow_db->opfcs[type], true);
 982	}
 983}
 984
 985static void set_underlay_qp(struct mlx5_ib_dev *dev,
 986			    struct mlx5_flow_spec *spec,
 987			    u32 underlay_qpn)
 988{
 989	void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
 990					   spec->match_criteria,
 991					   misc_parameters);
 992	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 993					   misc_parameters);
 994
 995	if (underlay_qpn &&
 996	    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 997				      ft_field_support.bth_dst_qp)) {
 998		MLX5_SET(fte_match_set_misc,
 999			 misc_params_v, bth_dst_qp, underlay_qpn);
1000		MLX5_SET(fte_match_set_misc,
1001			 misc_params_c, bth_dst_qp, 0xffffff);
1002	}
1003}
1004
1005static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
1006					 struct mlx5_flow_spec *spec,
1007					 struct mlx5_eswitch_rep *rep)
1008{
1009	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
1010	void *misc;
1011
1012	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1013		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1014				    misc_parameters_2);
1015
1016		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1017			 mlx5_eswitch_get_vport_metadata_for_match(rep->esw,
1018								   rep->vport));
1019		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1020				    misc_parameters_2);
1021
1022		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1023			 mlx5_eswitch_get_vport_metadata_mask());
1024	} else {
1025		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1026				    misc_parameters);
1027
1028		MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
1029
1030		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1031				    misc_parameters);
1032
1033		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
1034	}
1035}
1036
1037static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
1038						      struct mlx5_ib_flow_prio *ft_prio,
1039						      const struct ib_flow_attr *flow_attr,
1040						      struct mlx5_flow_destination *dst,
1041						      u32 underlay_qpn,
1042						      struct mlx5_ib_create_flow *ucmd)
1043{
1044	struct mlx5_flow_table	*ft = ft_prio->flow_table;
1045	struct mlx5_ib_flow_handler *handler;
1046	struct mlx5_flow_act flow_act = {};
1047	struct mlx5_flow_spec *spec;
1048	struct mlx5_flow_destination dest_arr[2] = {};
1049	struct mlx5_flow_destination *rule_dst = dest_arr;
1050	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
1051	unsigned int spec_index;
1052	u32 prev_type = 0;
1053	int err = 0;
1054	int dest_num = 0;
1055	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1056
1057	if (!is_valid_attr(dev->mdev, flow_attr))
1058		return ERR_PTR(-EINVAL);
1059
1060	if (dev->is_rep && is_egress)
1061		return ERR_PTR(-EINVAL);
1062
1063	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1064	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1065	if (!handler || !spec) {
1066		err = -ENOMEM;
1067		goto free;
1068	}
1069
1070	INIT_LIST_HEAD(&handler->list);
1071
1072	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
1073		err = parse_flow_attr(dev->mdev, spec,
1074				      ib_flow, flow_attr, &flow_act,
1075				      prev_type);
1076		if (err < 0)
1077			goto free;
1078
1079		prev_type = ((union ib_flow_spec *)ib_flow)->type;
1080		ib_flow += ((union ib_flow_spec *)ib_flow)->size;
1081	}
1082
1083	if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
1084		memcpy(&dest_arr[0], dst, sizeof(*dst));
1085		dest_num++;
1086	}
1087
1088	if (!flow_is_multicast_only(flow_attr))
1089		set_underlay_qp(dev, spec, underlay_qpn);
1090
1091	if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
1092		struct mlx5_eswitch_rep *rep;
1093
1094		rep = dev->port[flow_attr->port - 1].rep;
1095		if (!rep) {
1096			err = -EINVAL;
1097			goto free;
1098		}
1099
1100		mlx5_ib_set_rule_source_port(dev, spec, rep);
1101	}
1102
1103	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
1104
1105	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1106		struct mlx5_ib_mcounters *mcounters;
1107
1108		err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
1109		if (err)
1110			goto free;
1111
1112		mcounters = to_mcounters(flow_act.counters);
1113		handler->ibcounters = flow_act.counters;
1114		dest_arr[dest_num].type =
1115			MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1116		dest_arr[dest_num].counter_id =
1117			mlx5_fc_id(mcounters->hw_cntrs_hndl);
1118		dest_num++;
1119	}
1120
1121	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
1122		if (!dest_num)
1123			rule_dst = NULL;
1124	} else {
1125		if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
1126			flow_act.action |=
1127				MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
1128		if (is_egress)
1129			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1130		else if (dest_num)
1131			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1132	}
1133
1134	if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG)  &&
1135	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1136	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1137		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
1138			     spec->flow_context.flow_tag, flow_attr->type);
1139		err = -EINVAL;
1140		goto free;
1141	}
1142	handler->rule = mlx5_add_flow_rules(ft, spec,
1143					    &flow_act,
1144					    rule_dst, dest_num);
1145
1146	if (IS_ERR(handler->rule)) {
1147		err = PTR_ERR(handler->rule);
1148		goto free;
1149	}
1150
1151	ft_prio->refcount++;
1152	handler->prio = ft_prio;
1153	handler->dev = dev;
1154
1155	ft_prio->flow_table = ft;
1156free:
1157	if (err && handler) {
1158		mlx5_ib_counters_clear_description(handler->ibcounters);
1159		kfree(handler);
1160	}
1161	kvfree(spec);
1162	return err ? ERR_PTR(err) : handler;
1163}
1164
1165static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
1166						     struct mlx5_ib_flow_prio *ft_prio,
1167						     const struct ib_flow_attr *flow_attr,
1168						     struct mlx5_flow_destination *dst)
1169{
1170	return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
1171}
1172
1173enum {
1174	LEFTOVERS_MC,
1175	LEFTOVERS_UC,
1176};
1177
1178static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
1179							  struct mlx5_ib_flow_prio *ft_prio,
1180							  struct ib_flow_attr *flow_attr,
1181							  struct mlx5_flow_destination *dst)
1182{
1183	struct mlx5_ib_flow_handler *handler_ucast = NULL;
1184	struct mlx5_ib_flow_handler *handler = NULL;
1185
1186	static struct {
1187		struct ib_flow_attr	flow_attr;
1188		struct ib_flow_spec_eth eth_flow;
1189	} leftovers_specs[] = {
1190		[LEFTOVERS_MC] = {
1191			.flow_attr = {
1192				.num_of_specs = 1,
1193				.size = sizeof(leftovers_specs[0])
1194			},
1195			.eth_flow = {
1196				.type = IB_FLOW_SPEC_ETH,
1197				.size = sizeof(struct ib_flow_spec_eth),
1198				.mask = {.dst_mac = {0x1} },
1199				.val =  {.dst_mac = {0x1} }
1200			}
1201		},
1202		[LEFTOVERS_UC] = {
1203			.flow_attr = {
1204				.num_of_specs = 1,
1205				.size = sizeof(leftovers_specs[0])
1206			},
1207			.eth_flow = {
1208				.type = IB_FLOW_SPEC_ETH,
1209				.size = sizeof(struct ib_flow_spec_eth),
1210				.mask = {.dst_mac = {0x1} },
1211				.val = {.dst_mac = {} }
1212			}
1213		}
1214	};
1215
1216	handler = create_flow_rule(dev, ft_prio,
1217				   &leftovers_specs[LEFTOVERS_MC].flow_attr,
1218				   dst);
1219	if (!IS_ERR(handler) &&
1220	    flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
1221		handler_ucast = create_flow_rule(dev, ft_prio,
1222						 &leftovers_specs[LEFTOVERS_UC].flow_attr,
1223						 dst);
1224		if (IS_ERR(handler_ucast)) {
1225			mlx5_del_flow_rules(handler->rule);
1226			ft_prio->refcount--;
1227			kfree(handler);
1228			handler = handler_ucast;
1229		} else {
1230			list_add(&handler_ucast->list, &handler->list);
1231		}
1232	}
1233
1234	return handler;
1235}
1236
1237static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
1238							struct mlx5_ib_flow_prio *ft_rx,
1239							struct mlx5_ib_flow_prio *ft_tx,
1240							struct mlx5_flow_destination *dst)
1241{
1242	struct mlx5_ib_flow_handler *handler_rx;
1243	struct mlx5_ib_flow_handler *handler_tx;
1244	int err;
1245	static const struct ib_flow_attr flow_attr  = {
1246		.num_of_specs = 0,
1247		.type = IB_FLOW_ATTR_SNIFFER,
1248		.size = sizeof(flow_attr)
1249	};
1250
1251	handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
1252	if (IS_ERR(handler_rx)) {
1253		err = PTR_ERR(handler_rx);
1254		goto err;
1255	}
1256
1257	handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
1258	if (IS_ERR(handler_tx)) {
1259		err = PTR_ERR(handler_tx);
1260		goto err_tx;
1261	}
1262
1263	list_add(&handler_tx->list, &handler_rx->list);
1264
1265	return handler_rx;
1266
1267err_tx:
1268	mlx5_del_flow_rules(handler_rx->rule);
1269	ft_rx->refcount--;
1270	kfree(handler_rx);
1271err:
1272	return ERR_PTR(err);
1273}
1274
1275static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
1276					   struct ib_flow_attr *flow_attr,
1277					   struct ib_udata *udata)
1278{
1279	struct mlx5_ib_dev *dev = to_mdev(qp->device);
1280	struct mlx5_ib_qp *mqp = to_mqp(qp);
1281	struct mlx5_ib_flow_handler *handler = NULL;
1282	struct mlx5_flow_destination *dst = NULL;
1283	struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
1284	struct mlx5_ib_flow_prio *ft_prio;
1285	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1286	struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
1287	size_t min_ucmd_sz, required_ucmd_sz;
1288	int err;
1289	int underlay_qpn;
1290
1291	if (udata && udata->inlen) {
1292		min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
1293		if (udata->inlen < min_ucmd_sz)
1294			return ERR_PTR(-EOPNOTSUPP);
1295
1296		err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
1297		if (err)
1298			return ERR_PTR(err);
1299
1300		/* currently supports only one counters data */
1301		if (ucmd_hdr.ncounters_data > 1)
1302			return ERR_PTR(-EINVAL);
1303
1304		required_ucmd_sz = min_ucmd_sz +
1305			sizeof(struct mlx5_ib_flow_counters_data) *
1306			ucmd_hdr.ncounters_data;
1307		if (udata->inlen > required_ucmd_sz &&
1308		    !ib_is_udata_cleared(udata, required_ucmd_sz,
1309					 udata->inlen - required_ucmd_sz))
1310			return ERR_PTR(-EOPNOTSUPP);
1311
1312		ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
1313		if (!ucmd)
1314			return ERR_PTR(-ENOMEM);
1315
1316		err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
1317		if (err)
1318			goto free_ucmd;
1319	}
1320
1321	if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
1322		err = -ENOMEM;
1323		goto free_ucmd;
1324	}
1325
1326	if (flow_attr->flags &
1327	    ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) {
1328		err = -EINVAL;
1329		goto free_ucmd;
1330	}
1331
1332	if (is_egress &&
1333	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1334	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1335		err = -EINVAL;
1336		goto free_ucmd;
1337	}
1338
1339	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1340	if (!dst) {
1341		err = -ENOMEM;
1342		goto free_ucmd;
1343	}
1344
1345	mutex_lock(&dev->flow_db->lock);
1346
1347	ft_prio = get_flow_table(dev, flow_attr,
1348				 is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
1349	if (IS_ERR(ft_prio)) {
1350		err = PTR_ERR(ft_prio);
1351		goto unlock;
1352	}
1353	if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
1354		ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
1355		if (IS_ERR(ft_prio_tx)) {
1356			err = PTR_ERR(ft_prio_tx);
1357			ft_prio_tx = NULL;
1358			goto destroy_ft;
1359		}
1360	}
1361
1362	if (is_egress) {
1363		dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1364	} else {
1365		dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1366		if (mqp->is_rss)
1367			dst->tir_num = mqp->rss_qp.tirn;
1368		else
1369			dst->tir_num = mqp->raw_packet_qp.rq.tirn;
1370	}
1371
1372	switch (flow_attr->type) {
1373	case IB_FLOW_ATTR_NORMAL:
1374		underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
1375				       mqp->underlay_qpn :
1376				       0;
1377		handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
1378					    underlay_qpn, ucmd);
1379		break;
1380	case IB_FLOW_ATTR_ALL_DEFAULT:
1381	case IB_FLOW_ATTR_MC_DEFAULT:
1382		handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
1383		break;
1384	case IB_FLOW_ATTR_SNIFFER:
1385		handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
1386		break;
1387	default:
1388		err = -EINVAL;
1389		goto destroy_ft;
1390	}
1391
1392	if (IS_ERR(handler)) {
1393		err = PTR_ERR(handler);
1394		handler = NULL;
1395		goto destroy_ft;
1396	}
1397
1398	mutex_unlock(&dev->flow_db->lock);
1399	kfree(dst);
1400	kfree(ucmd);
1401
1402	return &handler->ibflow;
1403
1404destroy_ft:
1405	put_flow_table(dev, ft_prio, false);
1406	if (ft_prio_tx)
1407		put_flow_table(dev, ft_prio_tx, false);
1408unlock:
1409	mutex_unlock(&dev->flow_db->lock);
1410	kfree(dst);
1411free_ucmd:
1412	kfree(ucmd);
1413	return ERR_PTR(err);
1414}
1415
1416static struct mlx5_ib_flow_prio *
1417_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
1418		enum mlx5_flow_namespace_type ns_type,
1419		bool mcast)
1420{
1421	struct mlx5_flow_namespace *ns = NULL;
1422	struct mlx5_ib_flow_prio *prio = NULL;
1423	int max_table_size = 0;
1424	bool esw_encap;
1425	u32 flags = 0;
1426	int priority;
1427
1428	if (mcast)
1429		priority = MLX5_IB_FLOW_MCAST_PRIO;
1430	else
1431		priority = ib_prio_to_core_prio(user_priority, false);
1432
1433	esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
1434		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
1435	switch (ns_type) {
1436	case MLX5_FLOW_NAMESPACE_BYPASS:
1437		max_table_size = BIT(
1438			MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
1439		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
1440			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1441		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
1442					      reformat_l3_tunnel_to_l2) &&
1443		    !esw_encap)
1444			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1445		break;
1446	case MLX5_FLOW_NAMESPACE_EGRESS:
1447		max_table_size = BIT(
1448			MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
1449		if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
1450		    !esw_encap)
1451			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1452		break;
1453	case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1454		max_table_size = BIT(
1455			MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
1456		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
1457			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1458		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
1459					       reformat_l3_tunnel_to_l2) &&
1460		    esw_encap)
1461			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1462		priority = user_priority;
1463		break;
1464	case MLX5_FLOW_NAMESPACE_RDMA_RX:
1465		max_table_size = BIT(
1466			MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
1467		priority = user_priority;
1468		break;
1469	case MLX5_FLOW_NAMESPACE_RDMA_TX:
1470		max_table_size = BIT(
1471			MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
1472		priority = user_priority;
1473		break;
1474	default:
1475		break;
1476	}
1477
1478	max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
1479
1480	ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
1481	if (!ns)
1482		return ERR_PTR(-EOPNOTSUPP);
1483
1484	switch (ns_type) {
1485	case MLX5_FLOW_NAMESPACE_BYPASS:
1486		prio = &dev->flow_db->prios[priority];
1487		break;
1488	case MLX5_FLOW_NAMESPACE_EGRESS:
1489		prio = &dev->flow_db->egress_prios[priority];
1490		break;
1491	case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1492		prio = &dev->flow_db->fdb[priority];
1493		break;
1494	case MLX5_FLOW_NAMESPACE_RDMA_RX:
1495		prio = &dev->flow_db->rdma_rx[priority];
1496		break;
1497	case MLX5_FLOW_NAMESPACE_RDMA_TX:
1498		prio = &dev->flow_db->rdma_tx[priority];
1499		break;
1500	default: return ERR_PTR(-EINVAL);
1501	}
1502
1503	if (!prio)
1504		return ERR_PTR(-EINVAL);
1505
1506	if (prio->flow_table)
1507		return prio;
1508
1509	return _get_prio(dev, ns, prio, priority, max_table_size,
1510			 MLX5_FS_MAX_TYPES, flags);
1511}
1512
1513static struct mlx5_ib_flow_handler *
1514_create_raw_flow_rule(struct mlx5_ib_dev *dev,
1515		      struct mlx5_ib_flow_prio *ft_prio,
1516		      struct mlx5_flow_destination *dst,
1517		      struct mlx5_ib_flow_matcher  *fs_matcher,
1518		      struct mlx5_flow_context *flow_context,
1519		      struct mlx5_flow_act *flow_act,
1520		      void *cmd_in, int inlen,
1521		      int dst_num)
1522{
1523	struct mlx5_ib_flow_handler *handler;
1524	struct mlx5_flow_spec *spec;
1525	struct mlx5_flow_table *ft = ft_prio->flow_table;
1526	int err = 0;
1527
1528	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1529	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1530	if (!handler || !spec) {
1531		err = -ENOMEM;
1532		goto free;
1533	}
1534
1535	INIT_LIST_HEAD(&handler->list);
1536
1537	memcpy(spec->match_value, cmd_in, inlen);
1538	memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
1539	       fs_matcher->mask_len);
1540	spec->match_criteria_enable = fs_matcher->match_criteria_enable;
1541	spec->flow_context = *flow_context;
1542
1543	handler->rule = mlx5_add_flow_rules(ft, spec,
1544					    flow_act, dst, dst_num);
1545
1546	if (IS_ERR(handler->rule)) {
1547		err = PTR_ERR(handler->rule);
1548		goto free;
1549	}
1550
1551	ft_prio->refcount++;
1552	handler->prio = ft_prio;
1553	handler->dev = dev;
1554	ft_prio->flow_table = ft;
1555
1556free:
1557	if (err)
1558		kfree(handler);
1559	kvfree(spec);
1560	return err ? ERR_PTR(err) : handler;
1561}
1562
1563static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
1564				void *match_v)
1565{
1566	void *match_c;
1567	void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
1568	void *dmac, *dmac_mask;
1569	void *ipv4, *ipv4_mask;
1570
1571	if (!(fs_matcher->match_criteria_enable &
1572	      (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
1573		return false;
1574
1575	match_c = fs_matcher->matcher_mask.match_params;
1576	match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
1577					   outer_headers);
1578	match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
1579					   outer_headers);
1580
1581	dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1582			    dmac_47_16);
1583	dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1584				 dmac_47_16);
1585
1586	if (is_multicast_ether_addr(dmac) &&
1587	    is_multicast_ether_addr(dmac_mask))
1588		return true;
1589
1590	ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1591			    dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1592
1593	ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1594				 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1595
1596	if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
1597	    ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
1598		return true;
1599
1600	return false;
1601}
1602
1603static struct mlx5_ib_flow_handler *raw_fs_rule_add(
1604	struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
1605	struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
1606	u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type)
1607{
1608	struct mlx5_flow_destination *dst;
1609	struct mlx5_ib_flow_prio *ft_prio;
1610	struct mlx5_ib_flow_handler *handler;
1611	int dst_num = 0;
1612	bool mcast;
1613	int err;
1614
1615	if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
1616		return ERR_PTR(-EOPNOTSUPP);
1617
1618	if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
1619		return ERR_PTR(-ENOMEM);
1620
1621	dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
1622	if (!dst)
1623		return ERR_PTR(-ENOMEM);
1624
1625	mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
1626	mutex_lock(&dev->flow_db->lock);
1627
1628	ft_prio = _get_flow_table(dev, fs_matcher->priority,
1629				  fs_matcher->ns_type, mcast);
1630	if (IS_ERR(ft_prio)) {
1631		err = PTR_ERR(ft_prio);
1632		goto unlock;
1633	}
1634
1635	switch (dest_type) {
1636	case MLX5_FLOW_DESTINATION_TYPE_TIR:
1637		dst[dst_num].type = dest_type;
1638		dst[dst_num++].tir_num = dest_id;
1639		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1640		break;
1641	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
1642		dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
1643		dst[dst_num++].ft_num = dest_id;
1644		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1645		break;
1646	case MLX5_FLOW_DESTINATION_TYPE_PORT:
1647		dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1648		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1649		break;
1650	default:
1651		break;
1652	}
1653
1654	if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1655		dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1656		dst[dst_num].counter_id = counter_id;
1657		dst_num++;
1658	}
1659
1660	handler = _create_raw_flow_rule(dev, ft_prio, dst_num ? dst : NULL,
1661					fs_matcher, flow_context, flow_act,
1662					cmd_in, inlen, dst_num);
1663
1664	if (IS_ERR(handler)) {
1665		err = PTR_ERR(handler);
1666		goto destroy_ft;
1667	}
1668
1669	mutex_unlock(&dev->flow_db->lock);
1670	atomic_inc(&fs_matcher->usecnt);
1671	handler->flow_matcher = fs_matcher;
1672
1673	kfree(dst);
1674
1675	return handler;
1676
1677destroy_ft:
1678	put_flow_table(dev, ft_prio, false);
1679unlock:
1680	mutex_unlock(&dev->flow_db->lock);
1681	kfree(dst);
1682
1683	return ERR_PTR(err);
1684}
1685
1686static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
1687{
1688	switch (maction->flow_action_raw.sub_type) {
1689	case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
1690		mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
1691					   maction->flow_action_raw.modify_hdr);
1692		break;
1693	case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
1694		mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
1695					     maction->flow_action_raw.pkt_reformat);
1696		break;
1697	case MLX5_IB_FLOW_ACTION_DECAP:
1698		break;
1699	default:
1700		break;
1701	}
1702}
1703
1704static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
1705{
1706	struct mlx5_ib_flow_action *maction = to_mflow_act(action);
1707
1708	switch (action->type) {
1709	case IB_FLOW_ACTION_UNSPECIFIED:
1710		destroy_flow_action_raw(maction);
1711		break;
1712	default:
1713		WARN_ON(true);
1714		break;
1715	}
1716
1717	kfree(maction);
1718	return 0;
1719}
1720
1721static int
1722mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
1723			     enum mlx5_flow_namespace_type *namespace)
1724{
1725	switch (table_type) {
1726	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
1727		*namespace = MLX5_FLOW_NAMESPACE_BYPASS;
1728		break;
1729	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
1730		*namespace = MLX5_FLOW_NAMESPACE_EGRESS;
1731		break;
1732	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
1733		*namespace = MLX5_FLOW_NAMESPACE_FDB_BYPASS;
1734		break;
1735	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
1736		*namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
1737		break;
1738	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
1739		*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
1740		break;
1741	default:
1742		return -EINVAL;
1743	}
1744
1745	return 0;
1746}
1747
1748static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
1749	[MLX5_IB_FLOW_TYPE_NORMAL] = {
1750		.type = UVERBS_ATTR_TYPE_PTR_IN,
1751		.u.ptr = {
1752			.len = sizeof(u16), /* data is priority */
1753			.min_len = sizeof(u16),
1754		}
1755	},
1756	[MLX5_IB_FLOW_TYPE_SNIFFER] = {
1757		.type = UVERBS_ATTR_TYPE_PTR_IN,
1758		UVERBS_ATTR_NO_DATA(),
1759	},
1760	[MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
1761		.type = UVERBS_ATTR_TYPE_PTR_IN,
1762		UVERBS_ATTR_NO_DATA(),
1763	},
1764	[MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
1765		.type = UVERBS_ATTR_TYPE_PTR_IN,
1766		UVERBS_ATTR_NO_DATA(),
1767	},
1768};
1769
1770static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
1771{
1772	struct devx_obj *devx_obj = obj;
1773	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
1774
1775	switch (opcode) {
1776	case MLX5_CMD_OP_DESTROY_TIR:
1777		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1778		*dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
1779				    obj_id);
1780		return true;
1781
1782	case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
1783		*dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1784		*dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
1785				    table_id);
1786		return true;
1787	default:
1788		return false;
1789	}
1790}
1791
1792static int get_dests(struct uverbs_attr_bundle *attrs,
1793		     struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
1794		     int *dest_type, struct ib_qp **qp, u32 *flags)
1795{
1796	bool dest_devx, dest_qp;
1797	void *devx_obj;
1798	int err;
1799
1800	dest_devx = uverbs_attr_is_valid(attrs,
1801					 MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
1802	dest_qp = uverbs_attr_is_valid(attrs,
1803				       MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
1804
1805	*flags = 0;
1806	err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
1807				 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
1808					 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
1809	if (err)
1810		return err;
1811
1812	/* Both flags are not allowed */
1813	if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
1814	    *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
1815		return -EINVAL;
1816
1817	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
1818		if (dest_devx && (dest_qp || *flags))
1819			return -EINVAL;
1820		else if (dest_qp && *flags)
1821			return -EINVAL;
1822	}
1823
1824	/* Allow only DEVX object, drop as dest for FDB */
1825	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
1826	    !(dest_devx || (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
1827		return -EINVAL;
1828
1829	/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
1830	if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1831	    ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
1832		return -EINVAL;
1833
1834	*qp = NULL;
1835	if (dest_devx) {
1836		devx_obj =
1837			uverbs_attr_get_obj(attrs,
1838					    MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
1839
1840		/* Verify that the given DEVX object is a flow
1841		 * steering destination.
1842		 */
1843		if (!is_flow_dest(devx_obj, dest_id, dest_type))
1844			return -EINVAL;
1845		/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
1846		if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
1847		     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1848		    *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
1849			return -EINVAL;
1850	} else if (dest_qp) {
1851		struct mlx5_ib_qp *mqp;
1852
1853		*qp = uverbs_attr_get_obj(attrs,
1854					  MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
1855		if (IS_ERR(*qp))
1856			return PTR_ERR(*qp);
1857
1858		if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
1859			return -EINVAL;
1860
1861		mqp = to_mqp(*qp);
1862		if (mqp->is_rss)
1863			*dest_id = mqp->rss_qp.tirn;
1864		else
1865			*dest_id = mqp->raw_packet_qp.rq.tirn;
1866		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1867	} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1868		    fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
1869		   !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
1870		*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1871	}
1872
1873	if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
1874	    (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1875	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
1876		return -EINVAL;
1877
1878	return 0;
1879}
1880
1881static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
1882{
1883	struct devx_obj *devx_obj = obj;
1884	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
1885
1886	if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
1887
1888		if (offset && offset >= devx_obj->flow_counter_bulk_size)
1889			return false;
1890
1891		*counter_id = MLX5_GET(dealloc_flow_counter_in,
1892				       devx_obj->dinbox,
1893				       flow_counter_id);
1894		*counter_id += offset;
1895		return true;
1896	}
1897
1898	return false;
1899}
1900
1901#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
1902static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
1903	struct uverbs_attr_bundle *attrs)
1904{
1905	struct mlx5_flow_context flow_context = {.flow_tag =
1906		MLX5_FS_DEFAULT_FLOW_TAG};
1907	u32 *offset_attr, offset = 0, counter_id = 0;
1908	int dest_id, dest_type = -1, inlen, len, ret, i;
1909	struct mlx5_ib_flow_handler *flow_handler;
1910	struct mlx5_ib_flow_matcher *fs_matcher;
1911	struct ib_uobject **arr_flow_actions;
1912	struct ib_uflow_resources *uflow_res;
1913	struct mlx5_flow_act flow_act = {};
1914	struct ib_qp *qp = NULL;
1915	void *devx_obj, *cmd_in;
1916	struct ib_uobject *uobj;
1917	struct mlx5_ib_dev *dev;
1918	u32 flags;
1919
1920	if (!capable(CAP_NET_RAW))
1921		return -EPERM;
1922
1923	fs_matcher = uverbs_attr_get_obj(attrs,
1924					 MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
1925	uobj =  uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
1926	dev = mlx5_udata_to_mdev(&attrs->driver_udata);
1927
1928	if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
1929		return -EINVAL;
1930
1931	if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
1932		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
1933
1934	if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
1935		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
1936
1937	len = uverbs_attr_get_uobjs_arr(attrs,
1938		MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
1939	if (len) {
1940		devx_obj = arr_flow_actions[0]->object;
1941
1942		if (uverbs_attr_is_valid(attrs,
1943					 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
1944
1945			int num_offsets = uverbs_attr_ptr_get_array_size(
1946				attrs,
1947				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
1948				sizeof(u32));
1949
1950			if (num_offsets != 1)
1951				return -EINVAL;
1952
1953			offset_attr = uverbs_attr_get_alloced_ptr(
1954				attrs,
1955				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
1956			offset = *offset_attr;
1957		}
1958
1959		if (!is_flow_counter(devx_obj, offset, &counter_id))
1960			return -EINVAL;
1961
1962		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1963	}
1964
1965	cmd_in = uverbs_attr_get_alloced_ptr(
1966		attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
1967	inlen = uverbs_attr_get_len(attrs,
1968				    MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
1969
1970	uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
1971	if (!uflow_res)
1972		return -ENOMEM;
1973
1974	len = uverbs_attr_get_uobjs_arr(attrs,
1975		MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
1976	for (i = 0; i < len; i++) {
1977		struct mlx5_ib_flow_action *maction =
1978			to_mflow_act(arr_flow_actions[i]->object);
1979
1980		ret = parse_flow_flow_action(maction, false, &flow_act);
1981		if (ret)
1982			goto err_out;
1983		flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
1984				   arr_flow_actions[i]->object);
1985	}
1986
1987	ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
1988			       MLX5_IB_ATTR_CREATE_FLOW_TAG);
1989	if (!ret) {
1990		if (flow_context.flow_tag >= BIT(24)) {
1991			ret = -EINVAL;
1992			goto err_out;
1993		}
1994		flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
1995	}
1996
1997	flow_handler =
1998		raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
1999				counter_id, cmd_in, inlen, dest_id, dest_type);
2000	if (IS_ERR(flow_handler)) {
2001		ret = PTR_ERR(flow_handler);
2002		goto err_out;
2003	}
2004
2005	ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
2006
2007	return 0;
2008err_out:
2009	ib_uverbs_flow_resources_free(uflow_res);
2010	return ret;
2011}
2012
2013static int flow_matcher_cleanup(struct ib_uobject *uobject,
2014				enum rdma_remove_reason why,
2015				struct uverbs_attr_bundle *attrs)
2016{
2017	struct mlx5_ib_flow_matcher *obj = uobject->object;
2018
2019	if (atomic_read(&obj->usecnt))
2020		return -EBUSY;
2021
2022	kfree(obj);
2023	return 0;
2024}
2025
2026static int steering_anchor_create_ft(struct mlx5_ib_dev *dev,
2027				     struct mlx5_ib_flow_prio *ft_prio,
2028				     enum mlx5_flow_namespace_type ns_type)
2029{
2030	struct mlx5_flow_table_attr ft_attr = {};
2031	struct mlx5_flow_namespace *ns;
2032	struct mlx5_flow_table *ft;
2033
2034	if (ft_prio->anchor.ft)
2035		return 0;
2036
2037	ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
2038	if (!ns)
2039		return -EOPNOTSUPP;
2040
2041	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
2042	ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
2043	ft_attr.prio = 0;
2044	ft_attr.max_fte = 2;
2045	ft_attr.level = 1;
2046
2047	ft = mlx5_create_flow_table(ns, &ft_attr);
2048	if (IS_ERR(ft))
2049		return PTR_ERR(ft);
2050
2051	ft_prio->anchor.ft = ft;
2052
2053	return 0;
2054}
2055
2056static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio)
2057{
2058	if (ft_prio->anchor.ft) {
2059		mlx5_destroy_flow_table(ft_prio->anchor.ft);
2060		ft_prio->anchor.ft = NULL;
2061	}
2062}
2063
2064static int
2065steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
2066{
2067	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2068	struct mlx5_flow_group *fg;
2069	void *flow_group_in;
2070	int err = 0;
2071
2072	if (ft_prio->anchor.fg_drop)
2073		return 0;
2074
2075	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2076	if (!flow_group_in)
2077		return -ENOMEM;
2078
2079	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
2080	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
2081
2082	fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
2083	if (IS_ERR(fg)) {
2084		err = PTR_ERR(fg);
2085		goto out;
2086	}
2087
2088	ft_prio->anchor.fg_drop = fg;
2089
2090out:
2091	kvfree(flow_group_in);
2092
2093	return err;
2094}
2095
2096static void
2097steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
2098{
2099	if (ft_prio->anchor.fg_drop) {
2100		mlx5_destroy_flow_group(ft_prio->anchor.fg_drop);
2101		ft_prio->anchor.fg_drop = NULL;
2102	}
2103}
2104
2105static int
2106steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2107{
2108	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2109	struct mlx5_flow_group *fg;
2110	void *flow_group_in;
2111	int err = 0;
2112
2113	if (ft_prio->anchor.fg_goto_table)
2114		return 0;
2115
2116	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2117	if (!flow_group_in)
2118		return -ENOMEM;
2119
2120	fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
2121	if (IS_ERR(fg)) {
2122		err = PTR_ERR(fg);
2123		goto out;
2124	}
2125	ft_prio->anchor.fg_goto_table = fg;
2126
2127out:
2128	kvfree(flow_group_in);
2129
2130	return err;
2131}
2132
2133static void
2134steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2135{
2136	if (ft_prio->anchor.fg_goto_table) {
2137		mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table);
2138		ft_prio->anchor.fg_goto_table = NULL;
2139	}
2140}
2141
2142static int
2143steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
2144{
2145	struct mlx5_flow_act flow_act = {};
2146	struct mlx5_flow_handle *handle;
2147
2148	if (ft_prio->anchor.rule_drop)
2149		return 0;
2150
2151	flow_act.fg = ft_prio->anchor.fg_drop;
2152	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
2153
2154	handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
2155				     NULL, 0);
2156	if (IS_ERR(handle))
2157		return PTR_ERR(handle);
2158
2159	ft_prio->anchor.rule_drop = handle;
2160
2161	return 0;
2162}
2163
2164static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
2165{
2166	if (ft_prio->anchor.rule_drop) {
2167		mlx5_del_flow_rules(ft_prio->anchor.rule_drop);
2168		ft_prio->anchor.rule_drop = NULL;
2169	}
2170}
2171
2172static int
2173steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2174{
2175	struct mlx5_flow_destination dest = {};
2176	struct mlx5_flow_act flow_act = {};
2177	struct mlx5_flow_handle *handle;
2178
2179	if (ft_prio->anchor.rule_goto_table)
2180		return 0;
2181
2182	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2183	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
2184	flow_act.fg = ft_prio->anchor.fg_goto_table;
2185
2186	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
2187	dest.ft = ft_prio->flow_table;
2188
2189	handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
2190				     &dest, 1);
2191	if (IS_ERR(handle))
2192		return PTR_ERR(handle);
2193
2194	ft_prio->anchor.rule_goto_table = handle;
2195
2196	return 0;
2197}
2198
2199static void
2200steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2201{
2202	if (ft_prio->anchor.rule_goto_table) {
2203		mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table);
2204		ft_prio->anchor.rule_goto_table = NULL;
2205	}
2206}
2207
2208static int steering_anchor_create_res(struct mlx5_ib_dev *dev,
2209				      struct mlx5_ib_flow_prio *ft_prio,
2210				      enum mlx5_flow_namespace_type ns_type)
2211{
2212	int err;
2213
2214	err = steering_anchor_create_ft(dev, ft_prio, ns_type);
2215	if (err)
2216		return err;
2217
2218	err = steering_anchor_create_fg_drop(ft_prio);
2219	if (err)
2220		goto destroy_ft;
2221
2222	err = steering_anchor_create_fg_goto_table(ft_prio);
2223	if (err)
2224		goto destroy_fg_drop;
2225
2226	err = steering_anchor_create_rule_drop(ft_prio);
2227	if (err)
2228		goto destroy_fg_goto_table;
2229
2230	err = steering_anchor_create_rule_goto_table(ft_prio);
2231	if (err)
2232		goto destroy_rule_drop;
2233
2234	return 0;
2235
2236destroy_rule_drop:
2237	steering_anchor_destroy_rule_drop(ft_prio);
2238destroy_fg_goto_table:
2239	steering_anchor_destroy_fg_goto_table(ft_prio);
2240destroy_fg_drop:
2241	steering_anchor_destroy_fg_drop(ft_prio);
2242destroy_ft:
2243	steering_anchor_destroy_ft(ft_prio);
2244
2245	return err;
2246}
2247
2248static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio)
2249{
2250	steering_anchor_destroy_rule_goto_table(ft_prio);
2251	steering_anchor_destroy_rule_drop(ft_prio);
2252	steering_anchor_destroy_fg_goto_table(ft_prio);
2253	steering_anchor_destroy_fg_drop(ft_prio);
2254	steering_anchor_destroy_ft(ft_prio);
2255}
2256
2257static int steering_anchor_cleanup(struct ib_uobject *uobject,
2258				   enum rdma_remove_reason why,
2259				   struct uverbs_attr_bundle *attrs)
2260{
2261	struct mlx5_ib_steering_anchor *obj = uobject->object;
2262
2263	if (atomic_read(&obj->usecnt))
2264		return -EBUSY;
2265
2266	mutex_lock(&obj->dev->flow_db->lock);
2267	if (!--obj->ft_prio->anchor.rule_goto_table_ref)
2268		steering_anchor_destroy_rule_goto_table(obj->ft_prio);
2269
2270	put_flow_table(obj->dev, obj->ft_prio, true);
2271	mutex_unlock(&obj->dev->flow_db->lock);
2272
2273	kfree(obj);
2274	return 0;
2275}
2276
2277static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio,
2278			      int count)
2279{
2280	while (count--)
2281		mlx5_steering_anchor_destroy_res(&prio[count]);
2282}
2283
2284void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev)
2285{
2286	fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT);
2287	fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT);
2288	fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS);
2289	fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS);
2290	fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS);
2291	fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT);
2292	fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT);
2293}
2294
2295static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
2296			      struct mlx5_ib_flow_matcher *obj)
2297{
2298	enum mlx5_ib_uapi_flow_table_type ft_type =
2299		MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
2300	u32 flags;
2301	int err;
2302
2303	/* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
2304	 * users should switch to it. We leave this to not break userspace
2305	 */
2306	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
2307	    uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
2308		return -EINVAL;
2309
2310	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
2311		err = uverbs_get_const(&ft_type, attrs,
2312				       MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
2313		if (err)
2314			return err;
2315
2316		err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
2317		if (err)
2318			return err;
2319
2320		return 0;
2321	}
2322
2323	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
2324		err = uverbs_get_flags32(&flags, attrs,
2325					 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2326					 IB_FLOW_ATTR_FLAGS_EGRESS);
2327		if (err)
2328			return err;
2329
2330		if (flags)
2331			return mlx5_ib_ft_type_to_namespace(
2332				MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
2333				&obj->ns_type);
2334	}
2335
2336	obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
2337
2338	return 0;
2339}
2340
2341static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
2342	struct uverbs_attr_bundle *attrs)
2343{
2344	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2345		attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
2346	struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2347	struct mlx5_ib_flow_matcher *obj;
2348	int err;
2349
2350	obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
2351	if (!obj)
2352		return -ENOMEM;
2353
2354	obj->mask_len = uverbs_attr_get_len(
2355		attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2356	err = uverbs_copy_from(&obj->matcher_mask,
2357			       attrs,
2358			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2359	if (err)
2360		goto end;
2361
2362	obj->flow_type = uverbs_attr_get_enum_id(
2363		attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2364
2365	if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
2366		err = uverbs_copy_from(&obj->priority,
2367				       attrs,
2368				       MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2369		if (err)
2370			goto end;
2371	}
2372
2373	err = uverbs_copy_from(&obj->match_criteria_enable,
2374			       attrs,
2375			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
2376	if (err)
2377		goto end;
2378
2379	err = mlx5_ib_matcher_ns(attrs, obj);
2380	if (err)
2381		goto end;
2382
2383	if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
2384	    mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
2385		err = -EINVAL;
2386		goto end;
2387	}
2388
2389	uobj->object = obj;
2390	obj->mdev = dev->mdev;
2391	atomic_set(&obj->usecnt, 0);
2392	return 0;
2393
2394end:
2395	kfree(obj);
2396	return err;
2397}
2398
2399static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
2400	struct uverbs_attr_bundle *attrs)
2401{
2402	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2403		attrs, MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE);
2404	struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2405	enum mlx5_ib_uapi_flow_table_type ib_uapi_ft_type;
2406	enum mlx5_flow_namespace_type ns_type;
2407	struct mlx5_ib_steering_anchor *obj;
2408	struct mlx5_ib_flow_prio *ft_prio;
2409	u16 priority;
2410	u32 ft_id;
2411	int err;
2412
2413	if (!capable(CAP_NET_RAW))
2414		return -EPERM;
2415
2416	err = uverbs_get_const(&ib_uapi_ft_type, attrs,
2417			       MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE);
2418	if (err)
2419		return err;
2420
2421	err = mlx5_ib_ft_type_to_namespace(ib_uapi_ft_type, &ns_type);
2422	if (err)
2423		return err;
2424
2425	err = uverbs_copy_from(&priority, attrs,
2426			       MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY);
2427	if (err)
2428		return err;
2429
2430	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
2431	if (!obj)
2432		return -ENOMEM;
2433
2434	mutex_lock(&dev->flow_db->lock);
2435
2436	ft_prio = _get_flow_table(dev, priority, ns_type, 0);
2437	if (IS_ERR(ft_prio)) {
2438		err = PTR_ERR(ft_prio);
2439		goto free_obj;
2440	}
2441
2442	ft_prio->refcount++;
2443
2444	if (!ft_prio->anchor.rule_goto_table_ref) {
2445		err = steering_anchor_create_res(dev, ft_prio, ns_type);
2446		if (err)
2447			goto put_flow_table;
2448	}
2449
2450	ft_prio->anchor.rule_goto_table_ref++;
2451
2452	ft_id = mlx5_flow_table_id(ft_prio->anchor.ft);
2453
2454	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
2455			     &ft_id, sizeof(ft_id));
2456	if (err)
2457		goto destroy_res;
2458
2459	mutex_unlock(&dev->flow_db->lock);
2460
2461	uobj->object = obj;
2462	obj->dev = dev;
2463	obj->ft_prio = ft_prio;
2464	atomic_set(&obj->usecnt, 0);
2465
2466	return 0;
2467
2468destroy_res:
2469	--ft_prio->anchor.rule_goto_table_ref;
2470	mlx5_steering_anchor_destroy_res(ft_prio);
2471put_flow_table:
2472	put_flow_table(dev, ft_prio, true);
2473free_obj:
2474	mutex_unlock(&dev->flow_db->lock);
2475	kfree(obj);
2476
2477	return err;
2478}
2479
2480static struct ib_flow_action *
2481mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
2482			     enum mlx5_ib_uapi_flow_table_type ft_type,
2483			     u8 num_actions, void *in)
2484{
2485	enum mlx5_flow_namespace_type namespace;
2486	struct mlx5_ib_flow_action *maction;
2487	int ret;
2488
2489	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2490	if (ret)
2491		return ERR_PTR(-EINVAL);
2492
2493	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2494	if (!maction)
2495		return ERR_PTR(-ENOMEM);
2496
2497	maction->flow_action_raw.modify_hdr =
2498		mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
2499
2500	if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
2501		ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
2502		kfree(maction);
2503		return ERR_PTR(ret);
2504	}
2505	maction->flow_action_raw.sub_type =
2506		MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
2507	maction->flow_action_raw.dev = dev;
2508
2509	return &maction->ib_action;
2510}
2511
2512static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
2513{
2514	return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
2515					 max_modify_header_actions) ||
2516	       MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
2517					 max_modify_header_actions) ||
2518	       MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
2519					 max_modify_header_actions);
2520}
2521
2522static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
2523	struct uverbs_attr_bundle *attrs)
2524{
2525	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2526		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
2527	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2528	enum mlx5_ib_uapi_flow_table_type ft_type;
2529	struct ib_flow_action *action;
2530	int num_actions;
2531	void *in;
2532	int ret;
2533
2534	if (!mlx5_ib_modify_header_supported(mdev))
2535		return -EOPNOTSUPP;
2536
2537	in = uverbs_attr_get_alloced_ptr(attrs,
2538		MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
2539
2540	num_actions = uverbs_attr_ptr_get_array_size(
2541		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2542		MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
2543	if (num_actions < 0)
2544		return num_actions;
2545
2546	ret = uverbs_get_const(&ft_type, attrs,
2547			       MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
2548	if (ret)
2549		return ret;
2550	action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
2551	if (IS_ERR(action))
2552		return PTR_ERR(action);
2553
2554	uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
2555				       IB_FLOW_ACTION_UNSPECIFIED);
2556
2557	return 0;
2558}
2559
2560static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
2561						      u8 packet_reformat_type,
2562						      u8 ft_type)
2563{
2564	switch (packet_reformat_type) {
2565	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2566		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2567			return MLX5_CAP_FLOWTABLE(ibdev->mdev,
2568						  encap_general_header);
2569		break;
2570	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2571		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2572			return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
2573				reformat_l2_to_l3_tunnel);
2574		break;
2575	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2576		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2577			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
2578				reformat_l3_tunnel_to_l2);
2579		break;
2580	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
2581		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2582			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
2583		break;
2584	default:
2585		break;
2586	}
2587
2588	return false;
2589}
2590
2591static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
2592{
2593	switch (dv_prt) {
2594	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2595		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
2596		break;
2597	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2598		*prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
2599		break;
2600	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2601		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
2602		break;
2603	default:
2604		return -EINVAL;
2605	}
2606
2607	return 0;
2608}
2609
2610static int mlx5_ib_flow_action_create_packet_reformat_ctx(
2611	struct mlx5_ib_dev *dev,
2612	struct mlx5_ib_flow_action *maction,
2613	u8 ft_type, u8 dv_prt,
2614	void *in, size_t len)
2615{
2616	struct mlx5_pkt_reformat_params reformat_params;
2617	enum mlx5_flow_namespace_type namespace;
2618	u8 prm_prt;
2619	int ret;
2620
2621	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2622	if (ret)
2623		return ret;
2624
2625	ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
2626	if (ret)
2627		return ret;
2628
2629	memset(&reformat_params, 0, sizeof(reformat_params));
2630	reformat_params.type = prm_prt;
2631	reformat_params.size = len;
2632	reformat_params.data = in;
2633	maction->flow_action_raw.pkt_reformat =
2634		mlx5_packet_reformat_alloc(dev->mdev, &reformat_params,
2635					   namespace);
2636	if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
2637		ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
2638		return ret;
2639	}
2640
2641	maction->flow_action_raw.sub_type =
2642		MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
2643	maction->flow_action_raw.dev = dev;
2644
2645	return 0;
2646}
2647
2648static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
2649	struct uverbs_attr_bundle *attrs)
2650{
2651	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
2652		MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
2653	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2654	enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
2655	enum mlx5_ib_uapi_flow_table_type ft_type;
2656	struct mlx5_ib_flow_action *maction;
2657	int ret;
2658
2659	ret = uverbs_get_const(&ft_type, attrs,
2660			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
2661	if (ret)
2662		return ret;
2663
2664	ret = uverbs_get_const(&dv_prt, attrs,
2665			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
2666	if (ret)
2667		return ret;
2668
2669	if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
2670		return -EOPNOTSUPP;
2671
2672	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2673	if (!maction)
2674		return -ENOMEM;
2675
2676	if (dv_prt ==
2677	    MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
2678		maction->flow_action_raw.sub_type =
2679			MLX5_IB_FLOW_ACTION_DECAP;
2680		maction->flow_action_raw.dev = mdev;
2681	} else {
2682		void *in;
2683		int len;
2684
2685		in = uverbs_attr_get_alloced_ptr(attrs,
2686			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2687		if (IS_ERR(in)) {
2688			ret = PTR_ERR(in);
2689			goto free_maction;
2690		}
2691
2692		len = uverbs_attr_get_len(attrs,
2693			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2694
2695		ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
2696			maction, ft_type, dv_prt, in, len);
2697		if (ret)
2698			goto free_maction;
2699	}
2700
2701	uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
2702				       IB_FLOW_ACTION_UNSPECIFIED);
2703	return 0;
2704
2705free_maction:
2706	kfree(maction);
2707	return ret;
2708}
2709
2710DECLARE_UVERBS_NAMED_METHOD(
2711	MLX5_IB_METHOD_CREATE_FLOW,
2712	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2713			UVERBS_OBJECT_FLOW,
2714			UVERBS_ACCESS_NEW,
2715			UA_MANDATORY),
2716	UVERBS_ATTR_PTR_IN(
2717		MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
2718		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2719		UA_MANDATORY,
2720		UA_ALLOC_AND_COPY),
2721	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
2722			MLX5_IB_OBJECT_FLOW_MATCHER,
2723			UVERBS_ACCESS_READ,
2724			UA_MANDATORY),
2725	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
2726			UVERBS_OBJECT_QP,
2727			UVERBS_ACCESS_READ),
2728	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
2729			MLX5_IB_OBJECT_DEVX_OBJ,
2730			UVERBS_ACCESS_READ),
2731	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
2732			     UVERBS_OBJECT_FLOW_ACTION,
2733			     UVERBS_ACCESS_READ, 1,
2734			     MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
2735			     UA_OPTIONAL),
2736	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
2737			   UVERBS_ATTR_TYPE(u32),
2738			   UA_OPTIONAL),
2739	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
2740			     MLX5_IB_OBJECT_DEVX_OBJ,
2741			     UVERBS_ACCESS_READ, 1, 1,
2742			     UA_OPTIONAL),
2743	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
2744			   UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
2745			   UA_OPTIONAL,
2746			   UA_ALLOC_AND_COPY),
2747	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
2748			     enum mlx5_ib_create_flow_flags,
2749			     UA_OPTIONAL));
2750
2751DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2752	MLX5_IB_METHOD_DESTROY_FLOW,
2753	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2754			UVERBS_OBJECT_FLOW,
2755			UVERBS_ACCESS_DESTROY,
2756			UA_MANDATORY));
2757
2758ADD_UVERBS_METHODS(mlx5_ib_fs,
2759		   UVERBS_OBJECT_FLOW,
2760		   &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
2761		   &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
2762
2763DECLARE_UVERBS_NAMED_METHOD(
2764	MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
2765	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
2766			UVERBS_OBJECT_FLOW_ACTION,
2767			UVERBS_ACCESS_NEW,
2768			UA_MANDATORY),
2769	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2770			   UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
2771				   set_add_copy_action_in_auto)),
2772			   UA_MANDATORY,
2773			   UA_ALLOC_AND_COPY),
2774	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
2775			     enum mlx5_ib_uapi_flow_table_type,
2776			     UA_MANDATORY));
2777
2778DECLARE_UVERBS_NAMED_METHOD(
2779	MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
2780	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
2781			UVERBS_OBJECT_FLOW_ACTION,
2782			UVERBS_ACCESS_NEW,
2783			UA_MANDATORY),
2784	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
2785			   UVERBS_ATTR_MIN_SIZE(1),
2786			   UA_ALLOC_AND_COPY,
2787			   UA_OPTIONAL),
2788	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
2789			     enum mlx5_ib_uapi_flow_action_packet_reformat_type,
2790			     UA_MANDATORY),
2791	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
2792			     enum mlx5_ib_uapi_flow_table_type,
2793			     UA_MANDATORY));
2794
2795ADD_UVERBS_METHODS(
2796	mlx5_ib_flow_actions,
2797	UVERBS_OBJECT_FLOW_ACTION,
2798	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
2799	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
2800
2801DECLARE_UVERBS_NAMED_METHOD(
2802	MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
2803	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
2804			MLX5_IB_OBJECT_FLOW_MATCHER,
2805			UVERBS_ACCESS_NEW,
2806			UA_MANDATORY),
2807	UVERBS_ATTR_PTR_IN(
2808		MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
2809		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2810		UA_MANDATORY),
2811	UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
2812			    mlx5_ib_flow_type,
2813			    UA_MANDATORY),
2814	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
2815			   UVERBS_ATTR_TYPE(u8),
2816			   UA_MANDATORY),
2817	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2818			     enum ib_flow_flags,
2819			     UA_OPTIONAL),
2820	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
2821			     enum mlx5_ib_uapi_flow_table_type,
2822			     UA_OPTIONAL));
2823
2824DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2825	MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
2826	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
2827			MLX5_IB_OBJECT_FLOW_MATCHER,
2828			UVERBS_ACCESS_DESTROY,
2829			UA_MANDATORY));
2830
2831DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
2832			    UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
2833			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
2834			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
2835
2836DECLARE_UVERBS_NAMED_METHOD(
2837	MLX5_IB_METHOD_STEERING_ANCHOR_CREATE,
2838	UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE,
2839			MLX5_IB_OBJECT_STEERING_ANCHOR,
2840			UVERBS_ACCESS_NEW,
2841			UA_MANDATORY),
2842	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE,
2843			     enum mlx5_ib_uapi_flow_table_type,
2844			     UA_MANDATORY),
2845	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY,
2846			   UVERBS_ATTR_TYPE(u16),
2847			   UA_MANDATORY),
2848	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
2849			   UVERBS_ATTR_TYPE(u32),
2850			   UA_MANDATORY));
2851
2852DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2853	MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY,
2854	UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_DESTROY_HANDLE,
2855			MLX5_IB_OBJECT_STEERING_ANCHOR,
2856			UVERBS_ACCESS_DESTROY,
2857			UA_MANDATORY));
2858
2859DECLARE_UVERBS_NAMED_OBJECT(
2860	MLX5_IB_OBJECT_STEERING_ANCHOR,
2861	UVERBS_TYPE_ALLOC_IDR(steering_anchor_cleanup),
2862	&UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE),
2863	&UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY));
2864
2865const struct uapi_definition mlx5_ib_flow_defs[] = {
2866	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2867		MLX5_IB_OBJECT_FLOW_MATCHER),
2868	UAPI_DEF_CHAIN_OBJ_TREE(
2869		UVERBS_OBJECT_FLOW,
2870		&mlx5_ib_fs),
2871	UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
2872				&mlx5_ib_flow_actions),
2873	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2874		MLX5_IB_OBJECT_STEERING_ANCHOR,
2875		UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)),
2876	{},
2877};
2878
2879static const struct ib_device_ops flow_ops = {
2880	.create_flow = mlx5_ib_create_flow,
2881	.destroy_flow = mlx5_ib_destroy_flow,
2882	.destroy_flow_action = mlx5_ib_destroy_flow_action,
2883};
2884
2885int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
2886{
2887	dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
2888
2889	if (!dev->flow_db)
2890		return -ENOMEM;
2891
2892	mutex_init(&dev->flow_db->lock);
2893
2894	ib_set_device_ops(&dev->ib_dev, &flow_ops);
2895	return 0;
2896}