Linux Audio

Check our new training course

Open-source upstreaming

Need help get the support for your hardware in upstream Linux?
Loading...
Note: File does not exist in v3.15.
   1// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
   2/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
   3
   4#include <linux/kernel.h>
   5#include <linux/types.h>
   6#include <linux/rhashtable.h>
   7#include <linux/bitops.h>
   8#include <linux/in6.h>
   9#include <linux/notifier.h>
  10#include <linux/inetdevice.h>
  11#include <linux/netdevice.h>
  12#include <linux/if_bridge.h>
  13#include <linux/socket.h>
  14#include <linux/route.h>
  15#include <linux/gcd.h>
  16#include <linux/if_macvlan.h>
  17#include <linux/refcount.h>
  18#include <linux/jhash.h>
  19#include <net/netevent.h>
  20#include <net/neighbour.h>
  21#include <net/arp.h>
  22#include <net/ip_fib.h>
  23#include <net/ip6_fib.h>
  24#include <net/nexthop.h>
  25#include <net/fib_rules.h>
  26#include <net/ip_tunnels.h>
  27#include <net/l3mdev.h>
  28#include <net/addrconf.h>
  29#include <net/ndisc.h>
  30#include <net/ipv6.h>
  31#include <net/fib_notifier.h>
  32#include <net/switchdev.h>
  33
  34#include "spectrum.h"
  35#include "core.h"
  36#include "reg.h"
  37#include "spectrum_cnt.h"
  38#include "spectrum_dpipe.h"
  39#include "spectrum_ipip.h"
  40#include "spectrum_mr.h"
  41#include "spectrum_mr_tcam.h"
  42#include "spectrum_router.h"
  43#include "spectrum_span.h"
  44
  45struct mlxsw_sp_fib;
  46struct mlxsw_sp_vr;
  47struct mlxsw_sp_lpm_tree;
  48struct mlxsw_sp_rif_ops;
  49
  50struct mlxsw_sp_router {
  51	struct mlxsw_sp *mlxsw_sp;
  52	struct mlxsw_sp_rif **rifs;
  53	struct mlxsw_sp_vr *vrs;
  54	struct rhashtable neigh_ht;
  55	struct rhashtable nexthop_group_ht;
  56	struct rhashtable nexthop_ht;
  57	struct list_head nexthop_list;
  58	struct {
  59		/* One tree for each protocol: IPv4 and IPv6 */
  60		struct mlxsw_sp_lpm_tree *proto_trees[2];
  61		struct mlxsw_sp_lpm_tree *trees;
  62		unsigned int tree_count;
  63	} lpm;
  64	struct {
  65		struct delayed_work dw;
  66		unsigned long interval;	/* ms */
  67	} neighs_update;
  68	struct delayed_work nexthop_probe_dw;
  69#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
  70	struct list_head nexthop_neighs_list;
  71	struct list_head ipip_list;
  72	bool aborted;
  73	struct notifier_block fib_nb;
  74	struct notifier_block netevent_nb;
  75	struct notifier_block inetaddr_nb;
  76	struct notifier_block inet6addr_nb;
  77	const struct mlxsw_sp_rif_ops **rif_ops_arr;
  78	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
  79};
  80
  81struct mlxsw_sp_rif {
  82	struct list_head nexthop_list;
  83	struct list_head neigh_list;
  84	struct net_device *dev; /* NULL for underlay RIF */
  85	struct mlxsw_sp_fid *fid;
  86	unsigned char addr[ETH_ALEN];
  87	int mtu;
  88	u16 rif_index;
  89	u16 vr_id;
  90	const struct mlxsw_sp_rif_ops *ops;
  91	struct mlxsw_sp *mlxsw_sp;
  92
  93	unsigned int counter_ingress;
  94	bool counter_ingress_valid;
  95	unsigned int counter_egress;
  96	bool counter_egress_valid;
  97};
  98
  99struct mlxsw_sp_rif_params {
 100	struct net_device *dev;
 101	union {
 102		u16 system_port;
 103		u16 lag_id;
 104	};
 105	u16 vid;
 106	bool lag;
 107};
 108
 109struct mlxsw_sp_rif_subport {
 110	struct mlxsw_sp_rif common;
 111	refcount_t ref_count;
 112	union {
 113		u16 system_port;
 114		u16 lag_id;
 115	};
 116	u16 vid;
 117	bool lag;
 118};
 119
 120struct mlxsw_sp_rif_ipip_lb {
 121	struct mlxsw_sp_rif common;
 122	struct mlxsw_sp_rif_ipip_lb_config lb_config;
 123	u16 ul_vr_id; /* Reserved for Spectrum-2. */
 124	u16 ul_rif_id; /* Reserved for Spectrum. */
 125};
 126
 127struct mlxsw_sp_rif_params_ipip_lb {
 128	struct mlxsw_sp_rif_params common;
 129	struct mlxsw_sp_rif_ipip_lb_config lb_config;
 130};
 131
 132struct mlxsw_sp_rif_ops {
 133	enum mlxsw_sp_rif_type type;
 134	size_t rif_size;
 135
 136	void (*setup)(struct mlxsw_sp_rif *rif,
 137		      const struct mlxsw_sp_rif_params *params);
 138	int (*configure)(struct mlxsw_sp_rif *rif);
 139	void (*deconfigure)(struct mlxsw_sp_rif *rif);
 140	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
 141					 struct netlink_ext_ack *extack);
 142	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
 143};
 144
 145static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
 146static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
 147static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
 148				  struct mlxsw_sp_lpm_tree *lpm_tree);
 149static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
 150				     const struct mlxsw_sp_fib *fib,
 151				     u8 tree_id);
 152static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
 153				       const struct mlxsw_sp_fib *fib);
 154
 155static unsigned int *
 156mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
 157			   enum mlxsw_sp_rif_counter_dir dir)
 158{
 159	switch (dir) {
 160	case MLXSW_SP_RIF_COUNTER_EGRESS:
 161		return &rif->counter_egress;
 162	case MLXSW_SP_RIF_COUNTER_INGRESS:
 163		return &rif->counter_ingress;
 164	}
 165	return NULL;
 166}
 167
 168static bool
 169mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
 170			       enum mlxsw_sp_rif_counter_dir dir)
 171{
 172	switch (dir) {
 173	case MLXSW_SP_RIF_COUNTER_EGRESS:
 174		return rif->counter_egress_valid;
 175	case MLXSW_SP_RIF_COUNTER_INGRESS:
 176		return rif->counter_ingress_valid;
 177	}
 178	return false;
 179}
 180
 181static void
 182mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
 183			       enum mlxsw_sp_rif_counter_dir dir,
 184			       bool valid)
 185{
 186	switch (dir) {
 187	case MLXSW_SP_RIF_COUNTER_EGRESS:
 188		rif->counter_egress_valid = valid;
 189		break;
 190	case MLXSW_SP_RIF_COUNTER_INGRESS:
 191		rif->counter_ingress_valid = valid;
 192		break;
 193	}
 194}
 195
 196static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
 197				     unsigned int counter_index, bool enable,
 198				     enum mlxsw_sp_rif_counter_dir dir)
 199{
 200	char ritr_pl[MLXSW_REG_RITR_LEN];
 201	bool is_egress = false;
 202	int err;
 203
 204	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
 205		is_egress = true;
 206	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
 207	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
 208	if (err)
 209		return err;
 210
 211	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
 212				    is_egress);
 213	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
 214}
 215
 216int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
 217				   struct mlxsw_sp_rif *rif,
 218				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
 219{
 220	char ricnt_pl[MLXSW_REG_RICNT_LEN];
 221	unsigned int *p_counter_index;
 222	bool valid;
 223	int err;
 224
 225	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
 226	if (!valid)
 227		return -EINVAL;
 228
 229	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
 230	if (!p_counter_index)
 231		return -EINVAL;
 232	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
 233			     MLXSW_REG_RICNT_OPCODE_NOP);
 234	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
 235	if (err)
 236		return err;
 237	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
 238	return 0;
 239}
 240
 241static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
 242				      unsigned int counter_index)
 243{
 244	char ricnt_pl[MLXSW_REG_RICNT_LEN];
 245
 246	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
 247			     MLXSW_REG_RICNT_OPCODE_CLEAR);
 248	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
 249}
 250
 251int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
 252			       struct mlxsw_sp_rif *rif,
 253			       enum mlxsw_sp_rif_counter_dir dir)
 254{
 255	unsigned int *p_counter_index;
 256	int err;
 257
 258	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
 259	if (!p_counter_index)
 260		return -EINVAL;
 261	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
 262				     p_counter_index);
 263	if (err)
 264		return err;
 265
 266	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
 267	if (err)
 268		goto err_counter_clear;
 269
 270	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
 271					*p_counter_index, true, dir);
 272	if (err)
 273		goto err_counter_edit;
 274	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
 275	return 0;
 276
 277err_counter_edit:
 278err_counter_clear:
 279	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
 280			      *p_counter_index);
 281	return err;
 282}
 283
 284void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
 285			       struct mlxsw_sp_rif *rif,
 286			       enum mlxsw_sp_rif_counter_dir dir)
 287{
 288	unsigned int *p_counter_index;
 289
 290	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
 291		return;
 292
 293	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
 294	if (WARN_ON(!p_counter_index))
 295		return;
 296	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
 297				  *p_counter_index, false, dir);
 298	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
 299			      *p_counter_index);
 300	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
 301}
 302
 303static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
 304{
 305	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
 306	struct devlink *devlink;
 307
 308	devlink = priv_to_devlink(mlxsw_sp->core);
 309	if (!devlink_dpipe_table_counter_enabled(devlink,
 310						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
 311		return;
 312	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
 313}
 314
 315static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
 316{
 317	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
 318
 319	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
 320}
 321
 322#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
 323
 324struct mlxsw_sp_prefix_usage {
 325	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
 326};
 327
 328#define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
 329	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
 330
 331static bool
 332mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
 333			 struct mlxsw_sp_prefix_usage *prefix_usage2)
 334{
 335	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
 336}
 337
 338static void
 339mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
 340			  struct mlxsw_sp_prefix_usage *prefix_usage2)
 341{
 342	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
 343}
 344
 345static void
 346mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
 347			  unsigned char prefix_len)
 348{
 349	set_bit(prefix_len, prefix_usage->b);
 350}
 351
 352static void
 353mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
 354			    unsigned char prefix_len)
 355{
 356	clear_bit(prefix_len, prefix_usage->b);
 357}
 358
 359struct mlxsw_sp_fib_key {
 360	unsigned char addr[sizeof(struct in6_addr)];
 361	unsigned char prefix_len;
 362};
 363
 364enum mlxsw_sp_fib_entry_type {
 365	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
 366	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
 367	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
 368	MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
 369
 370	/* This is a special case of local delivery, where a packet should be
 371	 * decapsulated on reception. Note that there is no corresponding ENCAP,
 372	 * because that's a type of next hop, not of FIB entry. (There can be
 373	 * several next hops in a REMOTE entry, and some of them may be
 374	 * encapsulating entries.)
 375	 */
 376	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
 377	MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
 378};
 379
 380struct mlxsw_sp_nexthop_group;
 381
 382struct mlxsw_sp_fib_node {
 383	struct list_head entry_list;
 384	struct list_head list;
 385	struct rhash_head ht_node;
 386	struct mlxsw_sp_fib *fib;
 387	struct mlxsw_sp_fib_key key;
 388};
 389
 390struct mlxsw_sp_fib_entry_decap {
 391	struct mlxsw_sp_ipip_entry *ipip_entry;
 392	u32 tunnel_index;
 393};
 394
 395struct mlxsw_sp_fib_entry {
 396	struct list_head list;
 397	struct mlxsw_sp_fib_node *fib_node;
 398	enum mlxsw_sp_fib_entry_type type;
 399	struct list_head nexthop_group_node;
 400	struct mlxsw_sp_nexthop_group *nh_group;
 401	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
 402};
 403
 404struct mlxsw_sp_fib4_entry {
 405	struct mlxsw_sp_fib_entry common;
 406	u32 tb_id;
 407	u32 prio;
 408	u8 tos;
 409	u8 type;
 410};
 411
 412struct mlxsw_sp_fib6_entry {
 413	struct mlxsw_sp_fib_entry common;
 414	struct list_head rt6_list;
 415	unsigned int nrt6;
 416};
 417
 418struct mlxsw_sp_rt6 {
 419	struct list_head list;
 420	struct fib6_info *rt;
 421};
 422
 423struct mlxsw_sp_lpm_tree {
 424	u8 id; /* tree ID */
 425	unsigned int ref_count;
 426	enum mlxsw_sp_l3proto proto;
 427	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
 428	struct mlxsw_sp_prefix_usage prefix_usage;
 429};
 430
 431struct mlxsw_sp_fib {
 432	struct rhashtable ht;
 433	struct list_head node_list;
 434	struct mlxsw_sp_vr *vr;
 435	struct mlxsw_sp_lpm_tree *lpm_tree;
 436	enum mlxsw_sp_l3proto proto;
 437};
 438
 439struct mlxsw_sp_vr {
 440	u16 id; /* virtual router ID */
 441	u32 tb_id; /* kernel fib table id */
 442	unsigned int rif_count;
 443	struct mlxsw_sp_fib *fib4;
 444	struct mlxsw_sp_fib *fib6;
 445	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
 446	struct mlxsw_sp_rif *ul_rif;
 447	refcount_t ul_rif_refcnt;
 448};
 449
 450static const struct rhashtable_params mlxsw_sp_fib_ht_params;
 451
 452static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
 453						struct mlxsw_sp_vr *vr,
 454						enum mlxsw_sp_l3proto proto)
 455{
 456	struct mlxsw_sp_lpm_tree *lpm_tree;
 457	struct mlxsw_sp_fib *fib;
 458	int err;
 459
 460	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
 461	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
 462	if (!fib)
 463		return ERR_PTR(-ENOMEM);
 464	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
 465	if (err)
 466		goto err_rhashtable_init;
 467	INIT_LIST_HEAD(&fib->node_list);
 468	fib->proto = proto;
 469	fib->vr = vr;
 470	fib->lpm_tree = lpm_tree;
 471	mlxsw_sp_lpm_tree_hold(lpm_tree);
 472	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
 473	if (err)
 474		goto err_lpm_tree_bind;
 475	return fib;
 476
 477err_lpm_tree_bind:
 478	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
 479err_rhashtable_init:
 480	kfree(fib);
 481	return ERR_PTR(err);
 482}
 483
 484static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
 485				 struct mlxsw_sp_fib *fib)
 486{
 487	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
 488	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
 489	WARN_ON(!list_empty(&fib->node_list));
 490	rhashtable_destroy(&fib->ht);
 491	kfree(fib);
 492}
 493
 494static struct mlxsw_sp_lpm_tree *
 495mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
 496{
 497	static struct mlxsw_sp_lpm_tree *lpm_tree;
 498	int i;
 499
 500	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
 501		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
 502		if (lpm_tree->ref_count == 0)
 503			return lpm_tree;
 504	}
 505	return NULL;
 506}
 507
 508static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
 509				   struct mlxsw_sp_lpm_tree *lpm_tree)
 510{
 511	char ralta_pl[MLXSW_REG_RALTA_LEN];
 512
 513	mlxsw_reg_ralta_pack(ralta_pl, true,
 514			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
 515			     lpm_tree->id);
 516	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
 517}
 518
 519static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
 520				   struct mlxsw_sp_lpm_tree *lpm_tree)
 521{
 522	char ralta_pl[MLXSW_REG_RALTA_LEN];
 523
 524	mlxsw_reg_ralta_pack(ralta_pl, false,
 525			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
 526			     lpm_tree->id);
 527	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
 528}
 529
 530static int
 531mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
 532				  struct mlxsw_sp_prefix_usage *prefix_usage,
 533				  struct mlxsw_sp_lpm_tree *lpm_tree)
 534{
 535	char ralst_pl[MLXSW_REG_RALST_LEN];
 536	u8 root_bin = 0;
 537	u8 prefix;
 538	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
 539
 540	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
 541		root_bin = prefix;
 542
 543	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
 544	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
 545		if (prefix == 0)
 546			continue;
 547		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
 548					 MLXSW_REG_RALST_BIN_NO_CHILD);
 549		last_prefix = prefix;
 550	}
 551	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
 552}
 553
 554static struct mlxsw_sp_lpm_tree *
 555mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
 556			 struct mlxsw_sp_prefix_usage *prefix_usage,
 557			 enum mlxsw_sp_l3proto proto)
 558{
 559	struct mlxsw_sp_lpm_tree *lpm_tree;
 560	int err;
 561
 562	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
 563	if (!lpm_tree)
 564		return ERR_PTR(-EBUSY);
 565	lpm_tree->proto = proto;
 566	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
 567	if (err)
 568		return ERR_PTR(err);
 569
 570	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
 571						lpm_tree);
 572	if (err)
 573		goto err_left_struct_set;
 574	memcpy(&lpm_tree->prefix_usage, prefix_usage,
 575	       sizeof(lpm_tree->prefix_usage));
 576	memset(&lpm_tree->prefix_ref_count, 0,
 577	       sizeof(lpm_tree->prefix_ref_count));
 578	lpm_tree->ref_count = 1;
 579	return lpm_tree;
 580
 581err_left_struct_set:
 582	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
 583	return ERR_PTR(err);
 584}
 585
 586static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
 587				      struct mlxsw_sp_lpm_tree *lpm_tree)
 588{
 589	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
 590}
 591
 592static struct mlxsw_sp_lpm_tree *
 593mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
 594		      struct mlxsw_sp_prefix_usage *prefix_usage,
 595		      enum mlxsw_sp_l3proto proto)
 596{
 597	struct mlxsw_sp_lpm_tree *lpm_tree;
 598	int i;
 599
 600	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
 601		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
 602		if (lpm_tree->ref_count != 0 &&
 603		    lpm_tree->proto == proto &&
 604		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
 605					     prefix_usage)) {
 606			mlxsw_sp_lpm_tree_hold(lpm_tree);
 607			return lpm_tree;
 608		}
 609	}
 610	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
 611}
 612
 613static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
 614{
 615	lpm_tree->ref_count++;
 616}
 617
 618static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
 619				  struct mlxsw_sp_lpm_tree *lpm_tree)
 620{
 621	if (--lpm_tree->ref_count == 0)
 622		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
 623}
 624
 625#define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
 626
 627static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
 628{
 629	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
 630	struct mlxsw_sp_lpm_tree *lpm_tree;
 631	u64 max_trees;
 632	int err, i;
 633
 634	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
 635		return -EIO;
 636
 637	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
 638	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
 639	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
 640					     sizeof(struct mlxsw_sp_lpm_tree),
 641					     GFP_KERNEL);
 642	if (!mlxsw_sp->router->lpm.trees)
 643		return -ENOMEM;
 644
 645	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
 646		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
 647		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
 648	}
 649
 650	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
 651					 MLXSW_SP_L3_PROTO_IPV4);
 652	if (IS_ERR(lpm_tree)) {
 653		err = PTR_ERR(lpm_tree);
 654		goto err_ipv4_tree_get;
 655	}
 656	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
 657
 658	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
 659					 MLXSW_SP_L3_PROTO_IPV6);
 660	if (IS_ERR(lpm_tree)) {
 661		err = PTR_ERR(lpm_tree);
 662		goto err_ipv6_tree_get;
 663	}
 664	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
 665
 666	return 0;
 667
 668err_ipv6_tree_get:
 669	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
 670	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
 671err_ipv4_tree_get:
 672	kfree(mlxsw_sp->router->lpm.trees);
 673	return err;
 674}
 675
 676static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
 677{
 678	struct mlxsw_sp_lpm_tree *lpm_tree;
 679
 680	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
 681	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
 682
 683	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
 684	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
 685
 686	kfree(mlxsw_sp->router->lpm.trees);
 687}
 688
 689static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
 690{
 691	return !!vr->fib4 || !!vr->fib6 ||
 692	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
 693	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
 694}
 695
 696static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
 697{
 698	struct mlxsw_sp_vr *vr;
 699	int i;
 700
 701	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
 702		vr = &mlxsw_sp->router->vrs[i];
 703		if (!mlxsw_sp_vr_is_used(vr))
 704			return vr;
 705	}
 706	return NULL;
 707}
 708
 709static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
 710				     const struct mlxsw_sp_fib *fib, u8 tree_id)
 711{
 712	char raltb_pl[MLXSW_REG_RALTB_LEN];
 713
 714	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
 715			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
 716			     tree_id);
 717	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
 718}
 719
 720static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
 721				       const struct mlxsw_sp_fib *fib)
 722{
 723	char raltb_pl[MLXSW_REG_RALTB_LEN];
 724
 725	/* Bind to tree 0 which is default */
 726	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
 727			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
 728	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
 729}
 730
 731static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
 732{
 733	/* For our purpose, squash main, default and local tables into one */
 734	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
 735		tb_id = RT_TABLE_MAIN;
 736	return tb_id;
 737}
 738
 739static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
 740					    u32 tb_id)
 741{
 742	struct mlxsw_sp_vr *vr;
 743	int i;
 744
 745	tb_id = mlxsw_sp_fix_tb_id(tb_id);
 746
 747	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
 748		vr = &mlxsw_sp->router->vrs[i];
 749		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
 750			return vr;
 751	}
 752	return NULL;
 753}
 754
 755int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
 756				u16 *vr_id)
 757{
 758	struct mlxsw_sp_vr *vr;
 759
 760	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
 761	if (!vr)
 762		return -ESRCH;
 763	*vr_id = vr->id;
 764
 765	return 0;
 766}
 767
 768static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
 769					    enum mlxsw_sp_l3proto proto)
 770{
 771	switch (proto) {
 772	case MLXSW_SP_L3_PROTO_IPV4:
 773		return vr->fib4;
 774	case MLXSW_SP_L3_PROTO_IPV6:
 775		return vr->fib6;
 776	}
 777	return NULL;
 778}
 779
 780static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
 781					      u32 tb_id,
 782					      struct netlink_ext_ack *extack)
 783{
 784	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
 785	struct mlxsw_sp_fib *fib4;
 786	struct mlxsw_sp_fib *fib6;
 787	struct mlxsw_sp_vr *vr;
 788	int err;
 789
 790	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
 791	if (!vr) {
 792		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
 793		return ERR_PTR(-EBUSY);
 794	}
 795	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
 796	if (IS_ERR(fib4))
 797		return ERR_CAST(fib4);
 798	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
 799	if (IS_ERR(fib6)) {
 800		err = PTR_ERR(fib6);
 801		goto err_fib6_create;
 802	}
 803	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
 804					     MLXSW_SP_L3_PROTO_IPV4);
 805	if (IS_ERR(mr4_table)) {
 806		err = PTR_ERR(mr4_table);
 807		goto err_mr4_table_create;
 808	}
 809	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
 810					     MLXSW_SP_L3_PROTO_IPV6);
 811	if (IS_ERR(mr6_table)) {
 812		err = PTR_ERR(mr6_table);
 813		goto err_mr6_table_create;
 814	}
 815
 816	vr->fib4 = fib4;
 817	vr->fib6 = fib6;
 818	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
 819	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
 820	vr->tb_id = tb_id;
 821	return vr;
 822
 823err_mr6_table_create:
 824	mlxsw_sp_mr_table_destroy(mr4_table);
 825err_mr4_table_create:
 826	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
 827err_fib6_create:
 828	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
 829	return ERR_PTR(err);
 830}
 831
 832static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
 833				struct mlxsw_sp_vr *vr)
 834{
 835	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
 836	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
 837	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
 838	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
 839	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
 840	vr->fib6 = NULL;
 841	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
 842	vr->fib4 = NULL;
 843}
 844
 845static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
 846					   struct netlink_ext_ack *extack)
 847{
 848	struct mlxsw_sp_vr *vr;
 849
 850	tb_id = mlxsw_sp_fix_tb_id(tb_id);
 851	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
 852	if (!vr)
 853		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
 854	return vr;
 855}
 856
 857static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
 858{
 859	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
 860	    list_empty(&vr->fib6->node_list) &&
 861	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
 862	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
 863		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
 864}
 865
 866static bool
 867mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
 868				    enum mlxsw_sp_l3proto proto, u8 tree_id)
 869{
 870	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
 871
 872	if (!mlxsw_sp_vr_is_used(vr))
 873		return false;
 874	if (fib->lpm_tree->id == tree_id)
 875		return true;
 876	return false;
 877}
 878
 879static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
 880					struct mlxsw_sp_fib *fib,
 881					struct mlxsw_sp_lpm_tree *new_tree)
 882{
 883	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
 884	int err;
 885
 886	fib->lpm_tree = new_tree;
 887	mlxsw_sp_lpm_tree_hold(new_tree);
 888	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
 889	if (err)
 890		goto err_tree_bind;
 891	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
 892	return 0;
 893
 894err_tree_bind:
 895	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
 896	fib->lpm_tree = old_tree;
 897	return err;
 898}
 899
 900static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
 901					 struct mlxsw_sp_fib *fib,
 902					 struct mlxsw_sp_lpm_tree *new_tree)
 903{
 904	enum mlxsw_sp_l3proto proto = fib->proto;
 905	struct mlxsw_sp_lpm_tree *old_tree;
 906	u8 old_id, new_id = new_tree->id;
 907	struct mlxsw_sp_vr *vr;
 908	int i, err;
 909
 910	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
 911	old_id = old_tree->id;
 912
 913	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
 914		vr = &mlxsw_sp->router->vrs[i];
 915		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
 916			continue;
 917		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
 918						   mlxsw_sp_vr_fib(vr, proto),
 919						   new_tree);
 920		if (err)
 921			goto err_tree_replace;
 922	}
 923
 924	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
 925	       sizeof(new_tree->prefix_ref_count));
 926	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
 927	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
 928
 929	return 0;
 930
 931err_tree_replace:
 932	for (i--; i >= 0; i--) {
 933		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
 934			continue;
 935		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
 936					     mlxsw_sp_vr_fib(vr, proto),
 937					     old_tree);
 938	}
 939	return err;
 940}
 941
 942static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
 943{
 944	struct mlxsw_sp_vr *vr;
 945	u64 max_vrs;
 946	int i;
 947
 948	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
 949		return -EIO;
 950
 951	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
 952	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
 953					GFP_KERNEL);
 954	if (!mlxsw_sp->router->vrs)
 955		return -ENOMEM;
 956
 957	for (i = 0; i < max_vrs; i++) {
 958		vr = &mlxsw_sp->router->vrs[i];
 959		vr->id = i;
 960	}
 961
 962	return 0;
 963}
 964
 965static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
 966
 967static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
 968{
 969	/* At this stage we're guaranteed not to have new incoming
 970	 * FIB notifications and the work queue is free from FIBs
 971	 * sitting on top of mlxsw netdevs. However, we can still
 972	 * have other FIBs queued. Flush the queue before flushing
 973	 * the device's tables. No need for locks, as we're the only
 974	 * writer.
 975	 */
 976	mlxsw_core_flush_owq();
 977	mlxsw_sp_router_fib_flush(mlxsw_sp);
 978	kfree(mlxsw_sp->router->vrs);
 979}
 980
 981static struct net_device *
 982__mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
 983{
 984	struct ip_tunnel *tun = netdev_priv(ol_dev);
 985	struct net *net = dev_net(ol_dev);
 986
 987	return __dev_get_by_index(net, tun->parms.link);
 988}
 989
 990u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
 991{
 992	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
 993
 994	if (d)
 995		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
 996	else
 997		return RT_TABLE_MAIN;
 998}
 999
1000static struct mlxsw_sp_rif *
1001mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1002		    const struct mlxsw_sp_rif_params *params,
1003		    struct netlink_ext_ack *extack);
1004
1005static struct mlxsw_sp_rif_ipip_lb *
1006mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1007				enum mlxsw_sp_ipip_type ipipt,
1008				struct net_device *ol_dev,
1009				struct netlink_ext_ack *extack)
1010{
1011	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1012	const struct mlxsw_sp_ipip_ops *ipip_ops;
1013	struct mlxsw_sp_rif *rif;
1014
1015	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1016	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1017		.common.dev = ol_dev,
1018		.common.lag = false,
1019		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1020	};
1021
1022	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1023	if (IS_ERR(rif))
1024		return ERR_CAST(rif);
1025	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1026}
1027
1028static struct mlxsw_sp_ipip_entry *
1029mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1030			  enum mlxsw_sp_ipip_type ipipt,
1031			  struct net_device *ol_dev)
1032{
1033	const struct mlxsw_sp_ipip_ops *ipip_ops;
1034	struct mlxsw_sp_ipip_entry *ipip_entry;
1035	struct mlxsw_sp_ipip_entry *ret = NULL;
1036
1037	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1038	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1039	if (!ipip_entry)
1040		return ERR_PTR(-ENOMEM);
1041
1042	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1043							    ol_dev, NULL);
1044	if (IS_ERR(ipip_entry->ol_lb)) {
1045		ret = ERR_CAST(ipip_entry->ol_lb);
1046		goto err_ol_ipip_lb_create;
1047	}
1048
1049	ipip_entry->ipipt = ipipt;
1050	ipip_entry->ol_dev = ol_dev;
1051
1052	switch (ipip_ops->ul_proto) {
1053	case MLXSW_SP_L3_PROTO_IPV4:
1054		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1055		break;
1056	case MLXSW_SP_L3_PROTO_IPV6:
1057		WARN_ON(1);
1058		break;
1059	}
1060
1061	return ipip_entry;
1062
1063err_ol_ipip_lb_create:
1064	kfree(ipip_entry);
1065	return ret;
1066}
1067
1068static void
1069mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1070{
1071	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1072	kfree(ipip_entry);
1073}
1074
1075static bool
1076mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1077				  const enum mlxsw_sp_l3proto ul_proto,
1078				  union mlxsw_sp_l3addr saddr,
1079				  u32 ul_tb_id,
1080				  struct mlxsw_sp_ipip_entry *ipip_entry)
1081{
1082	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1083	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1084	union mlxsw_sp_l3addr tun_saddr;
1085
1086	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1087		return false;
1088
1089	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1090	return tun_ul_tb_id == ul_tb_id &&
1091	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1092}
1093
1094static int
1095mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1096			      struct mlxsw_sp_fib_entry *fib_entry,
1097			      struct mlxsw_sp_ipip_entry *ipip_entry)
1098{
1099	u32 tunnel_index;
1100	int err;
1101
1102	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1103				  1, &tunnel_index);
1104	if (err)
1105		return err;
1106
1107	ipip_entry->decap_fib_entry = fib_entry;
1108	fib_entry->decap.ipip_entry = ipip_entry;
1109	fib_entry->decap.tunnel_index = tunnel_index;
1110	return 0;
1111}
1112
1113static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1114					  struct mlxsw_sp_fib_entry *fib_entry)
1115{
1116	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1117	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1118	fib_entry->decap.ipip_entry = NULL;
1119	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1120			   1, fib_entry->decap.tunnel_index);
1121}
1122
1123static struct mlxsw_sp_fib_node *
1124mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1125			 size_t addr_len, unsigned char prefix_len);
1126static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1127				     struct mlxsw_sp_fib_entry *fib_entry);
1128
1129static void
1130mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1131				 struct mlxsw_sp_ipip_entry *ipip_entry)
1132{
1133	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1134
1135	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1136	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1137
1138	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1139}
1140
1141static void
1142mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1143				  struct mlxsw_sp_ipip_entry *ipip_entry,
1144				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1145{
1146	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1147					  ipip_entry))
1148		return;
1149	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1150
1151	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1152		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1153}
1154
1155static struct mlxsw_sp_fib_entry *
1156mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1157				     enum mlxsw_sp_l3proto proto,
1158				     const union mlxsw_sp_l3addr *addr,
1159				     enum mlxsw_sp_fib_entry_type type)
1160{
1161	struct mlxsw_sp_fib_entry *fib_entry;
1162	struct mlxsw_sp_fib_node *fib_node;
1163	unsigned char addr_prefix_len;
1164	struct mlxsw_sp_fib *fib;
1165	struct mlxsw_sp_vr *vr;
1166	const void *addrp;
1167	size_t addr_len;
1168	u32 addr4;
1169
1170	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1171	if (!vr)
1172		return NULL;
1173	fib = mlxsw_sp_vr_fib(vr, proto);
1174
1175	switch (proto) {
1176	case MLXSW_SP_L3_PROTO_IPV4:
1177		addr4 = be32_to_cpu(addr->addr4);
1178		addrp = &addr4;
1179		addr_len = 4;
1180		addr_prefix_len = 32;
1181		break;
1182	case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
1183	default:
1184		WARN_ON(1);
1185		return NULL;
1186	}
1187
1188	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1189					    addr_prefix_len);
1190	if (!fib_node || list_empty(&fib_node->entry_list))
1191		return NULL;
1192
1193	fib_entry = list_first_entry(&fib_node->entry_list,
1194				     struct mlxsw_sp_fib_entry, list);
1195	if (fib_entry->type != type)
1196		return NULL;
1197
1198	return fib_entry;
1199}
1200
1201/* Given an IPIP entry, find the corresponding decap route. */
1202static struct mlxsw_sp_fib_entry *
1203mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1204			       struct mlxsw_sp_ipip_entry *ipip_entry)
1205{
1206	static struct mlxsw_sp_fib_node *fib_node;
1207	const struct mlxsw_sp_ipip_ops *ipip_ops;
1208	struct mlxsw_sp_fib_entry *fib_entry;
1209	unsigned char saddr_prefix_len;
1210	union mlxsw_sp_l3addr saddr;
1211	struct mlxsw_sp_fib *ul_fib;
1212	struct mlxsw_sp_vr *ul_vr;
1213	const void *saddrp;
1214	size_t saddr_len;
1215	u32 ul_tb_id;
1216	u32 saddr4;
1217
1218	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1219
1220	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1221	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1222	if (!ul_vr)
1223		return NULL;
1224
1225	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1226	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1227					   ipip_entry->ol_dev);
1228
1229	switch (ipip_ops->ul_proto) {
1230	case MLXSW_SP_L3_PROTO_IPV4:
1231		saddr4 = be32_to_cpu(saddr.addr4);
1232		saddrp = &saddr4;
1233		saddr_len = 4;
1234		saddr_prefix_len = 32;
1235		break;
1236	case MLXSW_SP_L3_PROTO_IPV6:
1237		WARN_ON(1);
1238		return NULL;
1239	}
1240
1241	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1242					    saddr_prefix_len);
1243	if (!fib_node || list_empty(&fib_node->entry_list))
1244		return NULL;
1245
1246	fib_entry = list_first_entry(&fib_node->entry_list,
1247				     struct mlxsw_sp_fib_entry, list);
1248	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1249		return NULL;
1250
1251	return fib_entry;
1252}
1253
1254static struct mlxsw_sp_ipip_entry *
1255mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1256			   enum mlxsw_sp_ipip_type ipipt,
1257			   struct net_device *ol_dev)
1258{
1259	struct mlxsw_sp_ipip_entry *ipip_entry;
1260
1261	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1262	if (IS_ERR(ipip_entry))
1263		return ipip_entry;
1264
1265	list_add_tail(&ipip_entry->ipip_list_node,
1266		      &mlxsw_sp->router->ipip_list);
1267
1268	return ipip_entry;
1269}
1270
1271static void
1272mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1273			    struct mlxsw_sp_ipip_entry *ipip_entry)
1274{
1275	list_del(&ipip_entry->ipip_list_node);
1276	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1277}
1278
1279static bool
1280mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1281				  const struct net_device *ul_dev,
1282				  enum mlxsw_sp_l3proto ul_proto,
1283				  union mlxsw_sp_l3addr ul_dip,
1284				  struct mlxsw_sp_ipip_entry *ipip_entry)
1285{
1286	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1287	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1288
1289	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1290		return false;
1291
1292	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1293						 ul_tb_id, ipip_entry);
1294}
1295
1296/* Given decap parameters, find the corresponding IPIP entry. */
1297static struct mlxsw_sp_ipip_entry *
1298mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1299				  const struct net_device *ul_dev,
1300				  enum mlxsw_sp_l3proto ul_proto,
1301				  union mlxsw_sp_l3addr ul_dip)
1302{
1303	struct mlxsw_sp_ipip_entry *ipip_entry;
1304
1305	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1306			    ipip_list_node)
1307		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1308						      ul_proto, ul_dip,
1309						      ipip_entry))
1310			return ipip_entry;
1311
1312	return NULL;
1313}
1314
1315static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1316				      const struct net_device *dev,
1317				      enum mlxsw_sp_ipip_type *p_type)
1318{
1319	struct mlxsw_sp_router *router = mlxsw_sp->router;
1320	const struct mlxsw_sp_ipip_ops *ipip_ops;
1321	enum mlxsw_sp_ipip_type ipipt;
1322
1323	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1324		ipip_ops = router->ipip_ops_arr[ipipt];
1325		if (dev->type == ipip_ops->dev_type) {
1326			if (p_type)
1327				*p_type = ipipt;
1328			return true;
1329		}
1330	}
1331	return false;
1332}
1333
1334bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1335				const struct net_device *dev)
1336{
1337	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1338}
1339
1340static struct mlxsw_sp_ipip_entry *
1341mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1342				   const struct net_device *ol_dev)
1343{
1344	struct mlxsw_sp_ipip_entry *ipip_entry;
1345
1346	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1347			    ipip_list_node)
1348		if (ipip_entry->ol_dev == ol_dev)
1349			return ipip_entry;
1350
1351	return NULL;
1352}
1353
1354static struct mlxsw_sp_ipip_entry *
1355mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1356				   const struct net_device *ul_dev,
1357				   struct mlxsw_sp_ipip_entry *start)
1358{
1359	struct mlxsw_sp_ipip_entry *ipip_entry;
1360
1361	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1362					ipip_list_node);
1363	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1364				     ipip_list_node) {
1365		struct net_device *ipip_ul_dev =
1366			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1367
1368		if (ipip_ul_dev == ul_dev)
1369			return ipip_entry;
1370	}
1371
1372	return NULL;
1373}
1374
1375bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1376				const struct net_device *dev)
1377{
1378	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1379}
1380
1381static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1382						const struct net_device *ol_dev,
1383						enum mlxsw_sp_ipip_type ipipt)
1384{
1385	const struct mlxsw_sp_ipip_ops *ops
1386		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1387
1388	/* For deciding whether decap should be offloaded, we don't care about
1389	 * overlay protocol, so ask whether either one is supported.
1390	 */
1391	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1392	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1393}
1394
1395static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1396						struct net_device *ol_dev)
1397{
1398	struct mlxsw_sp_ipip_entry *ipip_entry;
1399	enum mlxsw_sp_l3proto ul_proto;
1400	enum mlxsw_sp_ipip_type ipipt;
1401	union mlxsw_sp_l3addr saddr;
1402	u32 ul_tb_id;
1403
1404	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1405	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1406		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1407		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1408		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1409		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1410							  saddr, ul_tb_id,
1411							  NULL)) {
1412			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1413								ol_dev);
1414			if (IS_ERR(ipip_entry))
1415				return PTR_ERR(ipip_entry);
1416		}
1417	}
1418
1419	return 0;
1420}
1421
1422static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1423						   struct net_device *ol_dev)
1424{
1425	struct mlxsw_sp_ipip_entry *ipip_entry;
1426
1427	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1428	if (ipip_entry)
1429		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1430}
1431
1432static void
1433mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1434				struct mlxsw_sp_ipip_entry *ipip_entry)
1435{
1436	struct mlxsw_sp_fib_entry *decap_fib_entry;
1437
1438	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1439	if (decap_fib_entry)
1440		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1441						  decap_fib_entry);
1442}
1443
1444static int
1445mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1446			u16 ul_rif_id, bool enable)
1447{
1448	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1449	struct mlxsw_sp_rif *rif = &lb_rif->common;
1450	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1451	char ritr_pl[MLXSW_REG_RITR_LEN];
1452	u32 saddr4;
1453
1454	switch (lb_cf.ul_protocol) {
1455	case MLXSW_SP_L3_PROTO_IPV4:
1456		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1457		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1458				    rif->rif_index, rif->vr_id, rif->dev->mtu);
1459		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1460			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1461			    ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
1462		break;
1463
1464	case MLXSW_SP_L3_PROTO_IPV6:
1465		return -EAFNOSUPPORT;
1466	}
1467
1468	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1469}
1470
1471static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1472						 struct net_device *ol_dev)
1473{
1474	struct mlxsw_sp_ipip_entry *ipip_entry;
1475	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1476	int err = 0;
1477
1478	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1479	if (ipip_entry) {
1480		lb_rif = ipip_entry->ol_lb;
1481		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1482					      lb_rif->ul_rif_id, true);
1483		if (err)
1484			goto out;
1485		lb_rif->common.mtu = ol_dev->mtu;
1486	}
1487
1488out:
1489	return err;
1490}
1491
1492static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1493						struct net_device *ol_dev)
1494{
1495	struct mlxsw_sp_ipip_entry *ipip_entry;
1496
1497	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1498	if (ipip_entry)
1499		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1500}
1501
1502static void
1503mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1504				  struct mlxsw_sp_ipip_entry *ipip_entry)
1505{
1506	if (ipip_entry->decap_fib_entry)
1507		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1508}
1509
1510static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1511						  struct net_device *ol_dev)
1512{
1513	struct mlxsw_sp_ipip_entry *ipip_entry;
1514
1515	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1516	if (ipip_entry)
1517		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1518}
1519
1520static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1521					 struct mlxsw_sp_rif *old_rif,
1522					 struct mlxsw_sp_rif *new_rif);
1523static int
1524mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1525				 struct mlxsw_sp_ipip_entry *ipip_entry,
1526				 bool keep_encap,
1527				 struct netlink_ext_ack *extack)
1528{
1529	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1530	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1531
1532	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1533						     ipip_entry->ipipt,
1534						     ipip_entry->ol_dev,
1535						     extack);
1536	if (IS_ERR(new_lb_rif))
1537		return PTR_ERR(new_lb_rif);
1538	ipip_entry->ol_lb = new_lb_rif;
1539
1540	if (keep_encap)
1541		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1542					     &new_lb_rif->common);
1543
1544	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1545
1546	return 0;
1547}
1548
1549static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1550					struct mlxsw_sp_rif *rif);
1551
1552/**
1553 * Update the offload related to an IPIP entry. This always updates decap, and
1554 * in addition to that it also:
1555 * @recreate_loopback: recreates the associated loopback RIF
1556 * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1557 *              relevant when recreate_loopback is true.
1558 * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1559 *                   is only relevant when recreate_loopback is false.
1560 */
1561int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1562					struct mlxsw_sp_ipip_entry *ipip_entry,
1563					bool recreate_loopback,
1564					bool keep_encap,
1565					bool update_nexthops,
1566					struct netlink_ext_ack *extack)
1567{
1568	int err;
1569
1570	/* RIFs can't be edited, so to update loopback, we need to destroy and
1571	 * recreate it. That creates a window of opportunity where RALUE and
1572	 * RATR registers end up referencing a RIF that's already gone. RATRs
1573	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1574	 * of RALUE, demote the decap route back.
1575	 */
1576	if (ipip_entry->decap_fib_entry)
1577		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1578
1579	if (recreate_loopback) {
1580		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1581						       keep_encap, extack);
1582		if (err)
1583			return err;
1584	} else if (update_nexthops) {
1585		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1586					    &ipip_entry->ol_lb->common);
1587	}
1588
1589	if (ipip_entry->ol_dev->flags & IFF_UP)
1590		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1591
1592	return 0;
1593}
1594
1595static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1596						struct net_device *ol_dev,
1597						struct netlink_ext_ack *extack)
1598{
1599	struct mlxsw_sp_ipip_entry *ipip_entry =
1600		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1601
1602	if (!ipip_entry)
1603		return 0;
1604
1605	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1606						   true, false, false, extack);
1607}
1608
1609static int
1610mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1611				     struct mlxsw_sp_ipip_entry *ipip_entry,
1612				     struct net_device *ul_dev,
1613				     struct netlink_ext_ack *extack)
1614{
1615	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1616						   true, true, false, extack);
1617}
1618
1619static int
1620mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1621				    struct mlxsw_sp_ipip_entry *ipip_entry,
1622				    struct net_device *ul_dev)
1623{
1624	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1625						   false, false, true, NULL);
1626}
1627
1628static int
1629mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1630				      struct mlxsw_sp_ipip_entry *ipip_entry,
1631				      struct net_device *ul_dev)
1632{
1633	/* A down underlay device causes encapsulated packets to not be
1634	 * forwarded, but decap still works. So refresh next hops without
1635	 * touching anything else.
1636	 */
1637	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1638						   false, false, true, NULL);
1639}
1640
1641static int
1642mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1643					struct net_device *ol_dev,
1644					struct netlink_ext_ack *extack)
1645{
1646	const struct mlxsw_sp_ipip_ops *ipip_ops;
1647	struct mlxsw_sp_ipip_entry *ipip_entry;
1648	int err;
1649
1650	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1651	if (!ipip_entry)
1652		/* A change might make a tunnel eligible for offloading, but
1653		 * that is currently not implemented. What falls to slow path
1654		 * stays there.
1655		 */
1656		return 0;
1657
1658	/* A change might make a tunnel not eligible for offloading. */
1659	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1660						 ipip_entry->ipipt)) {
1661		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1662		return 0;
1663	}
1664
1665	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1666	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1667	return err;
1668}
1669
1670void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1671				       struct mlxsw_sp_ipip_entry *ipip_entry)
1672{
1673	struct net_device *ol_dev = ipip_entry->ol_dev;
1674
1675	if (ol_dev->flags & IFF_UP)
1676		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1677	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1678}
1679
1680/* The configuration where several tunnels have the same local address in the
1681 * same underlay table needs special treatment in the HW. That is currently not
1682 * implemented in the driver. This function finds and demotes the first tunnel
1683 * with a given source address, except the one passed in in the argument
1684 * `except'.
1685 */
1686bool
1687mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1688				     enum mlxsw_sp_l3proto ul_proto,
1689				     union mlxsw_sp_l3addr saddr,
1690				     u32 ul_tb_id,
1691				     const struct mlxsw_sp_ipip_entry *except)
1692{
1693	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1694
1695	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1696				 ipip_list_node) {
1697		if (ipip_entry != except &&
1698		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1699						      ul_tb_id, ipip_entry)) {
1700			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1701			return true;
1702		}
1703	}
1704
1705	return false;
1706}
1707
1708static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1709						     struct net_device *ul_dev)
1710{
1711	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1712
1713	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1714				 ipip_list_node) {
1715		struct net_device *ipip_ul_dev =
1716			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1717
1718		if (ipip_ul_dev == ul_dev)
1719			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1720	}
1721}
1722
1723int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1724				     struct net_device *ol_dev,
1725				     unsigned long event,
1726				     struct netdev_notifier_info *info)
1727{
1728	struct netdev_notifier_changeupper_info *chup;
1729	struct netlink_ext_ack *extack;
1730
1731	switch (event) {
1732	case NETDEV_REGISTER:
1733		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1734	case NETDEV_UNREGISTER:
1735		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1736		return 0;
1737	case NETDEV_UP:
1738		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1739		return 0;
1740	case NETDEV_DOWN:
1741		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1742		return 0;
1743	case NETDEV_CHANGEUPPER:
1744		chup = container_of(info, typeof(*chup), info);
1745		extack = info->extack;
1746		if (netif_is_l3_master(chup->upper_dev))
1747			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1748								    ol_dev,
1749								    extack);
1750		return 0;
1751	case NETDEV_CHANGE:
1752		extack = info->extack;
1753		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1754							       ol_dev, extack);
1755	case NETDEV_CHANGEMTU:
1756		return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1757	}
1758	return 0;
1759}
1760
1761static int
1762__mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1763				   struct mlxsw_sp_ipip_entry *ipip_entry,
1764				   struct net_device *ul_dev,
1765				   unsigned long event,
1766				   struct netdev_notifier_info *info)
1767{
1768	struct netdev_notifier_changeupper_info *chup;
1769	struct netlink_ext_ack *extack;
1770
1771	switch (event) {
1772	case NETDEV_CHANGEUPPER:
1773		chup = container_of(info, typeof(*chup), info);
1774		extack = info->extack;
1775		if (netif_is_l3_master(chup->upper_dev))
1776			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1777								    ipip_entry,
1778								    ul_dev,
1779								    extack);
1780		break;
1781
1782	case NETDEV_UP:
1783		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1784							   ul_dev);
1785	case NETDEV_DOWN:
1786		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1787							     ipip_entry,
1788							     ul_dev);
1789	}
1790	return 0;
1791}
1792
1793int
1794mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1795				 struct net_device *ul_dev,
1796				 unsigned long event,
1797				 struct netdev_notifier_info *info)
1798{
1799	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1800	int err;
1801
1802	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1803								ul_dev,
1804								ipip_entry))) {
1805		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1806							 ul_dev, event, info);
1807		if (err) {
1808			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1809								 ul_dev);
1810			return err;
1811		}
1812	}
1813
1814	return 0;
1815}
1816
1817int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1818				      enum mlxsw_sp_l3proto ul_proto,
1819				      const union mlxsw_sp_l3addr *ul_sip,
1820				      u32 tunnel_index)
1821{
1822	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1823	struct mlxsw_sp_fib_entry *fib_entry;
1824	int err;
1825
1826	/* It is valid to create a tunnel with a local IP and only later
1827	 * assign this IP address to a local interface
1828	 */
1829	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1830							 ul_proto, ul_sip,
1831							 type);
1832	if (!fib_entry)
1833		return 0;
1834
1835	fib_entry->decap.tunnel_index = tunnel_index;
1836	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1837
1838	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1839	if (err)
1840		goto err_fib_entry_update;
1841
1842	return 0;
1843
1844err_fib_entry_update:
1845	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1846	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1847	return err;
1848}
1849
1850void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1851				      enum mlxsw_sp_l3proto ul_proto,
1852				      const union mlxsw_sp_l3addr *ul_sip)
1853{
1854	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1855	struct mlxsw_sp_fib_entry *fib_entry;
1856
1857	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1858							 ul_proto, ul_sip,
1859							 type);
1860	if (!fib_entry)
1861		return;
1862
1863	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1864	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1865}
1866
1867struct mlxsw_sp_neigh_key {
1868	struct neighbour *n;
1869};
1870
1871struct mlxsw_sp_neigh_entry {
1872	struct list_head rif_list_node;
1873	struct rhash_head ht_node;
1874	struct mlxsw_sp_neigh_key key;
1875	u16 rif;
1876	bool connected;
1877	unsigned char ha[ETH_ALEN];
1878	struct list_head nexthop_list; /* list of nexthops using
1879					* this neigh entry
1880					*/
1881	struct list_head nexthop_neighs_list_node;
1882	unsigned int counter_index;
1883	bool counter_valid;
1884};
1885
1886static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1887	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1888	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1889	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1890};
1891
1892struct mlxsw_sp_neigh_entry *
1893mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1894			struct mlxsw_sp_neigh_entry *neigh_entry)
1895{
1896	if (!neigh_entry) {
1897		if (list_empty(&rif->neigh_list))
1898			return NULL;
1899		else
1900			return list_first_entry(&rif->neigh_list,
1901						typeof(*neigh_entry),
1902						rif_list_node);
1903	}
1904	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1905		return NULL;
1906	return list_next_entry(neigh_entry, rif_list_node);
1907}
1908
1909int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1910{
1911	return neigh_entry->key.n->tbl->family;
1912}
1913
1914unsigned char *
1915mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1916{
1917	return neigh_entry->ha;
1918}
1919
1920u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1921{
1922	struct neighbour *n;
1923
1924	n = neigh_entry->key.n;
1925	return ntohl(*((__be32 *) n->primary_key));
1926}
1927
1928struct in6_addr *
1929mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1930{
1931	struct neighbour *n;
1932
1933	n = neigh_entry->key.n;
1934	return (struct in6_addr *) &n->primary_key;
1935}
1936
1937int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1938			       struct mlxsw_sp_neigh_entry *neigh_entry,
1939			       u64 *p_counter)
1940{
1941	if (!neigh_entry->counter_valid)
1942		return -EINVAL;
1943
1944	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1945					 p_counter, NULL);
1946}
1947
1948static struct mlxsw_sp_neigh_entry *
1949mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1950			   u16 rif)
1951{
1952	struct mlxsw_sp_neigh_entry *neigh_entry;
1953
1954	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1955	if (!neigh_entry)
1956		return NULL;
1957
1958	neigh_entry->key.n = n;
1959	neigh_entry->rif = rif;
1960	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1961
1962	return neigh_entry;
1963}
1964
1965static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1966{
1967	kfree(neigh_entry);
1968}
1969
1970static int
1971mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1972			    struct mlxsw_sp_neigh_entry *neigh_entry)
1973{
1974	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1975				      &neigh_entry->ht_node,
1976				      mlxsw_sp_neigh_ht_params);
1977}
1978
1979static void
1980mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1981			    struct mlxsw_sp_neigh_entry *neigh_entry)
1982{
1983	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1984			       &neigh_entry->ht_node,
1985			       mlxsw_sp_neigh_ht_params);
1986}
1987
1988static bool
1989mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1990				    struct mlxsw_sp_neigh_entry *neigh_entry)
1991{
1992	struct devlink *devlink;
1993	const char *table_name;
1994
1995	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1996	case AF_INET:
1997		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1998		break;
1999	case AF_INET6:
2000		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2001		break;
2002	default:
2003		WARN_ON(1);
2004		return false;
2005	}
2006
2007	devlink = priv_to_devlink(mlxsw_sp->core);
2008	return devlink_dpipe_table_counter_enabled(devlink, table_name);
2009}
2010
2011static void
2012mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2013			     struct mlxsw_sp_neigh_entry *neigh_entry)
2014{
2015	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2016		return;
2017
2018	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2019		return;
2020
2021	neigh_entry->counter_valid = true;
2022}
2023
2024static void
2025mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2026			    struct mlxsw_sp_neigh_entry *neigh_entry)
2027{
2028	if (!neigh_entry->counter_valid)
2029		return;
2030	mlxsw_sp_flow_counter_free(mlxsw_sp,
2031				   neigh_entry->counter_index);
2032	neigh_entry->counter_valid = false;
2033}
2034
2035static struct mlxsw_sp_neigh_entry *
2036mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2037{
2038	struct mlxsw_sp_neigh_entry *neigh_entry;
2039	struct mlxsw_sp_rif *rif;
2040	int err;
2041
2042	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2043	if (!rif)
2044		return ERR_PTR(-EINVAL);
2045
2046	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2047	if (!neigh_entry)
2048		return ERR_PTR(-ENOMEM);
2049
2050	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2051	if (err)
2052		goto err_neigh_entry_insert;
2053
2054	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2055	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2056
2057	return neigh_entry;
2058
2059err_neigh_entry_insert:
2060	mlxsw_sp_neigh_entry_free(neigh_entry);
2061	return ERR_PTR(err);
2062}
2063
2064static void
2065mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2066			     struct mlxsw_sp_neigh_entry *neigh_entry)
2067{
2068	list_del(&neigh_entry->rif_list_node);
2069	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2070	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2071	mlxsw_sp_neigh_entry_free(neigh_entry);
2072}
2073
2074static struct mlxsw_sp_neigh_entry *
2075mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2076{
2077	struct mlxsw_sp_neigh_key key;
2078
2079	key.n = n;
2080	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2081				      &key, mlxsw_sp_neigh_ht_params);
2082}
2083
2084static void
2085mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2086{
2087	unsigned long interval;
2088
2089#if IS_ENABLED(CONFIG_IPV6)
2090	interval = min_t(unsigned long,
2091			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2092			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2093#else
2094	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2095#endif
2096	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2097}
2098
2099static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2100						   char *rauhtd_pl,
2101						   int ent_index)
2102{
2103	struct net_device *dev;
2104	struct neighbour *n;
2105	__be32 dipn;
2106	u32 dip;
2107	u16 rif;
2108
2109	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2110
2111	if (!mlxsw_sp->router->rifs[rif]) {
2112		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2113		return;
2114	}
2115
2116	dipn = htonl(dip);
2117	dev = mlxsw_sp->router->rifs[rif]->dev;
2118	n = neigh_lookup(&arp_tbl, &dipn, dev);
2119	if (!n)
2120		return;
2121
2122	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2123	neigh_event_send(n, NULL);
2124	neigh_release(n);
2125}
2126
2127#if IS_ENABLED(CONFIG_IPV6)
2128static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2129						   char *rauhtd_pl,
2130						   int rec_index)
2131{
2132	struct net_device *dev;
2133	struct neighbour *n;
2134	struct in6_addr dip;
2135	u16 rif;
2136
2137	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2138					 (char *) &dip);
2139
2140	if (!mlxsw_sp->router->rifs[rif]) {
2141		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2142		return;
2143	}
2144
2145	dev = mlxsw_sp->router->rifs[rif]->dev;
2146	n = neigh_lookup(&nd_tbl, &dip, dev);
2147	if (!n)
2148		return;
2149
2150	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2151	neigh_event_send(n, NULL);
2152	neigh_release(n);
2153}
2154#else
2155static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2156						   char *rauhtd_pl,
2157						   int rec_index)
2158{
2159}
2160#endif
2161
2162static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2163						   char *rauhtd_pl,
2164						   int rec_index)
2165{
2166	u8 num_entries;
2167	int i;
2168
2169	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2170								rec_index);
2171	/* Hardware starts counting at 0, so add 1. */
2172	num_entries++;
2173
2174	/* Each record consists of several neighbour entries. */
2175	for (i = 0; i < num_entries; i++) {
2176		int ent_index;
2177
2178		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2179		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2180						       ent_index);
2181	}
2182
2183}
2184
2185static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2186						   char *rauhtd_pl,
2187						   int rec_index)
2188{
2189	/* One record contains one entry. */
2190	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2191					       rec_index);
2192}
2193
2194static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2195					      char *rauhtd_pl, int rec_index)
2196{
2197	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2198	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2199		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2200						       rec_index);
2201		break;
2202	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2203		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2204						       rec_index);
2205		break;
2206	}
2207}
2208
2209static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2210{
2211	u8 num_rec, last_rec_index, num_entries;
2212
2213	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2214	last_rec_index = num_rec - 1;
2215
2216	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2217		return false;
2218	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2219	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2220		return true;
2221
2222	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2223								last_rec_index);
2224	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2225		return true;
2226	return false;
2227}
2228
2229static int
2230__mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2231				       char *rauhtd_pl,
2232				       enum mlxsw_reg_rauhtd_type type)
2233{
2234	int i, num_rec;
2235	int err;
2236
2237	/* Make sure the neighbour's netdev isn't removed in the
2238	 * process.
2239	 */
2240	rtnl_lock();
2241	do {
2242		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2243		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2244				      rauhtd_pl);
2245		if (err) {
2246			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2247			break;
2248		}
2249		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2250		for (i = 0; i < num_rec; i++)
2251			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2252							  i);
2253	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2254	rtnl_unlock();
2255
2256	return err;
2257}
2258
2259static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2260{
2261	enum mlxsw_reg_rauhtd_type type;
2262	char *rauhtd_pl;
2263	int err;
2264
2265	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2266	if (!rauhtd_pl)
2267		return -ENOMEM;
2268
2269	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2270	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2271	if (err)
2272		goto out;
2273
2274	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2275	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2276out:
2277	kfree(rauhtd_pl);
2278	return err;
2279}
2280
2281static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2282{
2283	struct mlxsw_sp_neigh_entry *neigh_entry;
2284
2285	/* Take RTNL mutex here to prevent lists from changes */
2286	rtnl_lock();
2287	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2288			    nexthop_neighs_list_node)
2289		/* If this neigh have nexthops, make the kernel think this neigh
2290		 * is active regardless of the traffic.
2291		 */
2292		neigh_event_send(neigh_entry->key.n, NULL);
2293	rtnl_unlock();
2294}
2295
2296static void
2297mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2298{
2299	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2300
2301	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2302			       msecs_to_jiffies(interval));
2303}
2304
2305static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2306{
2307	struct mlxsw_sp_router *router;
2308	int err;
2309
2310	router = container_of(work, struct mlxsw_sp_router,
2311			      neighs_update.dw.work);
2312	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2313	if (err)
2314		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2315
2316	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2317
2318	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2319}
2320
2321static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2322{
2323	struct mlxsw_sp_neigh_entry *neigh_entry;
2324	struct mlxsw_sp_router *router;
2325
2326	router = container_of(work, struct mlxsw_sp_router,
2327			      nexthop_probe_dw.work);
2328	/* Iterate over nexthop neighbours, find those who are unresolved and
2329	 * send arp on them. This solves the chicken-egg problem when
2330	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2331	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2332	 * using different nexthop.
2333	 *
2334	 * Take RTNL mutex here to prevent lists from changes.
2335	 */
2336	rtnl_lock();
2337	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2338			    nexthop_neighs_list_node)
2339		if (!neigh_entry->connected)
2340			neigh_event_send(neigh_entry->key.n, NULL);
2341	rtnl_unlock();
2342
2343	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2344			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2345}
2346
2347static void
2348mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2349			      struct mlxsw_sp_neigh_entry *neigh_entry,
2350			      bool removing, bool dead);
2351
2352static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2353{
2354	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2355			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2356}
2357
2358static int
2359mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2360				struct mlxsw_sp_neigh_entry *neigh_entry,
2361				enum mlxsw_reg_rauht_op op)
2362{
2363	struct neighbour *n = neigh_entry->key.n;
2364	u32 dip = ntohl(*((__be32 *) n->primary_key));
2365	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2366
2367	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2368			      dip);
2369	if (neigh_entry->counter_valid)
2370		mlxsw_reg_rauht_pack_counter(rauht_pl,
2371					     neigh_entry->counter_index);
2372	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2373}
2374
2375static int
2376mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2377				struct mlxsw_sp_neigh_entry *neigh_entry,
2378				enum mlxsw_reg_rauht_op op)
2379{
2380	struct neighbour *n = neigh_entry->key.n;
2381	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2382	const char *dip = n->primary_key;
2383
2384	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2385			      dip);
2386	if (neigh_entry->counter_valid)
2387		mlxsw_reg_rauht_pack_counter(rauht_pl,
2388					     neigh_entry->counter_index);
2389	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2390}
2391
2392bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2393{
2394	struct neighbour *n = neigh_entry->key.n;
2395
2396	/* Packets with a link-local destination address are trapped
2397	 * after LPM lookup and never reach the neighbour table, so
2398	 * there is no need to program such neighbours to the device.
2399	 */
2400	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2401	    IPV6_ADDR_LINKLOCAL)
2402		return true;
2403	return false;
2404}
2405
2406static void
2407mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2408			    struct mlxsw_sp_neigh_entry *neigh_entry,
2409			    bool adding)
2410{
2411	enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2412	int err;
2413
2414	if (!adding && !neigh_entry->connected)
2415		return;
2416	neigh_entry->connected = adding;
2417	if (neigh_entry->key.n->tbl->family == AF_INET) {
2418		err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2419						      op);
2420		if (err)
2421			return;
2422	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2423		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2424			return;
2425		err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2426						      op);
2427		if (err)
2428			return;
2429	} else {
2430		WARN_ON_ONCE(1);
2431		return;
2432	}
2433
2434	if (adding)
2435		neigh_entry->key.n->flags |= NTF_OFFLOADED;
2436	else
2437		neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2438}
2439
2440void
2441mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2442				    struct mlxsw_sp_neigh_entry *neigh_entry,
2443				    bool adding)
2444{
2445	if (adding)
2446		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2447	else
2448		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2449	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2450}
2451
2452struct mlxsw_sp_netevent_work {
2453	struct work_struct work;
2454	struct mlxsw_sp *mlxsw_sp;
2455	struct neighbour *n;
2456};
2457
2458static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2459{
2460	struct mlxsw_sp_netevent_work *net_work =
2461		container_of(work, struct mlxsw_sp_netevent_work, work);
2462	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2463	struct mlxsw_sp_neigh_entry *neigh_entry;
2464	struct neighbour *n = net_work->n;
2465	unsigned char ha[ETH_ALEN];
2466	bool entry_connected;
2467	u8 nud_state, dead;
2468
2469	/* If these parameters are changed after we release the lock,
2470	 * then we are guaranteed to receive another event letting us
2471	 * know about it.
2472	 */
2473	read_lock_bh(&n->lock);
2474	memcpy(ha, n->ha, ETH_ALEN);
2475	nud_state = n->nud_state;
2476	dead = n->dead;
2477	read_unlock_bh(&n->lock);
2478
2479	rtnl_lock();
2480	mlxsw_sp_span_respin(mlxsw_sp);
2481
2482	entry_connected = nud_state & NUD_VALID && !dead;
2483	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2484	if (!entry_connected && !neigh_entry)
2485		goto out;
2486	if (!neigh_entry) {
2487		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2488		if (IS_ERR(neigh_entry))
2489			goto out;
2490	}
2491
2492	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2493	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2494	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2495				      dead);
2496
2497	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2498		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2499
2500out:
2501	rtnl_unlock();
2502	neigh_release(n);
2503	kfree(net_work);
2504}
2505
2506static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2507
2508static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2509{
2510	struct mlxsw_sp_netevent_work *net_work =
2511		container_of(work, struct mlxsw_sp_netevent_work, work);
2512	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2513
2514	mlxsw_sp_mp_hash_init(mlxsw_sp);
2515	kfree(net_work);
2516}
2517
2518static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2519
2520static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2521{
2522	struct mlxsw_sp_netevent_work *net_work =
2523		container_of(work, struct mlxsw_sp_netevent_work, work);
2524	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2525
2526	__mlxsw_sp_router_init(mlxsw_sp);
2527	kfree(net_work);
2528}
2529
2530static int mlxsw_sp_router_schedule_work(struct net *net,
2531					 struct notifier_block *nb,
2532					 void (*cb)(struct work_struct *))
2533{
2534	struct mlxsw_sp_netevent_work *net_work;
2535	struct mlxsw_sp_router *router;
2536
2537	if (!net_eq(net, &init_net))
2538		return NOTIFY_DONE;
2539
2540	net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2541	if (!net_work)
2542		return NOTIFY_BAD;
2543
2544	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2545	INIT_WORK(&net_work->work, cb);
2546	net_work->mlxsw_sp = router->mlxsw_sp;
2547	mlxsw_core_schedule_work(&net_work->work);
2548	return NOTIFY_DONE;
2549}
2550
2551static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2552					  unsigned long event, void *ptr)
2553{
2554	struct mlxsw_sp_netevent_work *net_work;
2555	struct mlxsw_sp_port *mlxsw_sp_port;
2556	struct mlxsw_sp *mlxsw_sp;
2557	unsigned long interval;
2558	struct neigh_parms *p;
2559	struct neighbour *n;
2560
2561	switch (event) {
2562	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2563		p = ptr;
2564
2565		/* We don't care about changes in the default table. */
2566		if (!p->dev || (p->tbl->family != AF_INET &&
2567				p->tbl->family != AF_INET6))
2568			return NOTIFY_DONE;
2569
2570		/* We are in atomic context and can't take RTNL mutex,
2571		 * so use RCU variant to walk the device chain.
2572		 */
2573		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2574		if (!mlxsw_sp_port)
2575			return NOTIFY_DONE;
2576
2577		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2578		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2579		mlxsw_sp->router->neighs_update.interval = interval;
2580
2581		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2582		break;
2583	case NETEVENT_NEIGH_UPDATE:
2584		n = ptr;
2585
2586		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2587			return NOTIFY_DONE;
2588
2589		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2590		if (!mlxsw_sp_port)
2591			return NOTIFY_DONE;
2592
2593		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2594		if (!net_work) {
2595			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2596			return NOTIFY_BAD;
2597		}
2598
2599		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2600		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2601		net_work->n = n;
2602
2603		/* Take a reference to ensure the neighbour won't be
2604		 * destructed until we drop the reference in delayed
2605		 * work.
2606		 */
2607		neigh_clone(n);
2608		mlxsw_core_schedule_work(&net_work->work);
2609		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2610		break;
2611	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2612	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2613		return mlxsw_sp_router_schedule_work(ptr, nb,
2614				mlxsw_sp_router_mp_hash_event_work);
2615
2616	case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2617		return mlxsw_sp_router_schedule_work(ptr, nb,
2618				mlxsw_sp_router_update_priority_work);
2619	}
2620
2621	return NOTIFY_DONE;
2622}
2623
2624static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2625{
2626	int err;
2627
2628	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2629			      &mlxsw_sp_neigh_ht_params);
2630	if (err)
2631		return err;
2632
2633	/* Initialize the polling interval according to the default
2634	 * table.
2635	 */
2636	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2637
2638	/* Create the delayed works for the activity_update */
2639	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2640			  mlxsw_sp_router_neighs_update_work);
2641	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2642			  mlxsw_sp_router_probe_unresolved_nexthops);
2643	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2644	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2645	return 0;
2646}
2647
2648static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2649{
2650	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2651	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2652	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2653}
2654
2655static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2656					 struct mlxsw_sp_rif *rif)
2657{
2658	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2659
2660	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2661				 rif_list_node) {
2662		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2663		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2664	}
2665}
2666
2667enum mlxsw_sp_nexthop_type {
2668	MLXSW_SP_NEXTHOP_TYPE_ETH,
2669	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2670};
2671
2672struct mlxsw_sp_nexthop_key {
2673	struct fib_nh *fib_nh;
2674};
2675
2676struct mlxsw_sp_nexthop {
2677	struct list_head neigh_list_node; /* member of neigh entry list */
2678	struct list_head rif_list_node;
2679	struct list_head router_list_node;
2680	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2681						* this belongs to
2682						*/
2683	struct rhash_head ht_node;
2684	struct mlxsw_sp_nexthop_key key;
2685	unsigned char gw_addr[sizeof(struct in6_addr)];
2686	int ifindex;
2687	int nh_weight;
2688	int norm_nh_weight;
2689	int num_adj_entries;
2690	struct mlxsw_sp_rif *rif;
2691	u8 should_offload:1, /* set indicates this neigh is connected and
2692			      * should be put to KVD linear area of this group.
2693			      */
2694	   offloaded:1, /* set in case the neigh is actually put into
2695			 * KVD linear area of this group.
2696			 */
2697	   update:1; /* set indicates that MAC of this neigh should be
2698		      * updated in HW
2699		      */
2700	enum mlxsw_sp_nexthop_type type;
2701	union {
2702		struct mlxsw_sp_neigh_entry *neigh_entry;
2703		struct mlxsw_sp_ipip_entry *ipip_entry;
2704	};
2705	unsigned int counter_index;
2706	bool counter_valid;
2707};
2708
2709struct mlxsw_sp_nexthop_group {
2710	void *priv;
2711	struct rhash_head ht_node;
2712	struct list_head fib_list; /* list of fib entries that use this group */
2713	struct neigh_table *neigh_tbl;
2714	u8 adj_index_valid:1,
2715	   gateway:1; /* routes using the group use a gateway */
2716	u32 adj_index;
2717	u16 ecmp_size;
2718	u16 count;
2719	int sum_norm_weight;
2720	struct mlxsw_sp_nexthop nexthops[0];
2721#define nh_rif	nexthops[0].rif
2722};
2723
2724void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2725				    struct mlxsw_sp_nexthop *nh)
2726{
2727	struct devlink *devlink;
2728
2729	devlink = priv_to_devlink(mlxsw_sp->core);
2730	if (!devlink_dpipe_table_counter_enabled(devlink,
2731						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2732		return;
2733
2734	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2735		return;
2736
2737	nh->counter_valid = true;
2738}
2739
2740void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2741				   struct mlxsw_sp_nexthop *nh)
2742{
2743	if (!nh->counter_valid)
2744		return;
2745	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2746	nh->counter_valid = false;
2747}
2748
2749int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2750				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2751{
2752	if (!nh->counter_valid)
2753		return -EINVAL;
2754
2755	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2756					 p_counter, NULL);
2757}
2758
2759struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2760					       struct mlxsw_sp_nexthop *nh)
2761{
2762	if (!nh) {
2763		if (list_empty(&router->nexthop_list))
2764			return NULL;
2765		else
2766			return list_first_entry(&router->nexthop_list,
2767						typeof(*nh), router_list_node);
2768	}
2769	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2770		return NULL;
2771	return list_next_entry(nh, router_list_node);
2772}
2773
2774bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2775{
2776	return nh->offloaded;
2777}
2778
2779unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2780{
2781	if (!nh->offloaded)
2782		return NULL;
2783	return nh->neigh_entry->ha;
2784}
2785
2786int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2787			     u32 *p_adj_size, u32 *p_adj_hash_index)
2788{
2789	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2790	u32 adj_hash_index = 0;
2791	int i;
2792
2793	if (!nh->offloaded || !nh_grp->adj_index_valid)
2794		return -EINVAL;
2795
2796	*p_adj_index = nh_grp->adj_index;
2797	*p_adj_size = nh_grp->ecmp_size;
2798
2799	for (i = 0; i < nh_grp->count; i++) {
2800		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2801
2802		if (nh_iter == nh)
2803			break;
2804		if (nh_iter->offloaded)
2805			adj_hash_index += nh_iter->num_adj_entries;
2806	}
2807
2808	*p_adj_hash_index = adj_hash_index;
2809	return 0;
2810}
2811
2812struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2813{
2814	return nh->rif;
2815}
2816
2817bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2818{
2819	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2820	int i;
2821
2822	for (i = 0; i < nh_grp->count; i++) {
2823		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2824
2825		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2826			return true;
2827	}
2828	return false;
2829}
2830
2831static struct fib_info *
2832mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2833{
2834	return nh_grp->priv;
2835}
2836
2837struct mlxsw_sp_nexthop_group_cmp_arg {
2838	enum mlxsw_sp_l3proto proto;
2839	union {
2840		struct fib_info *fi;
2841		struct mlxsw_sp_fib6_entry *fib6_entry;
2842	};
2843};
2844
2845static bool
2846mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2847				    const struct in6_addr *gw, int ifindex,
2848				    int weight)
2849{
2850	int i;
2851
2852	for (i = 0; i < nh_grp->count; i++) {
2853		const struct mlxsw_sp_nexthop *nh;
2854
2855		nh = &nh_grp->nexthops[i];
2856		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2857		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2858			return true;
2859	}
2860
2861	return false;
2862}
2863
2864static bool
2865mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2866			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2867{
2868	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2869
2870	if (nh_grp->count != fib6_entry->nrt6)
2871		return false;
2872
2873	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2874		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
2875		struct in6_addr *gw;
2876		int ifindex, weight;
2877
2878		ifindex = fib6_nh->fib_nh_dev->ifindex;
2879		weight = fib6_nh->fib_nh_weight;
2880		gw = &fib6_nh->fib_nh_gw6;
2881		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2882							 weight))
2883			return false;
2884	}
2885
2886	return true;
2887}
2888
2889static int
2890mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2891{
2892	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2893	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2894
2895	switch (cmp_arg->proto) {
2896	case MLXSW_SP_L3_PROTO_IPV4:
2897		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2898	case MLXSW_SP_L3_PROTO_IPV6:
2899		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2900						    cmp_arg->fib6_entry);
2901	default:
2902		WARN_ON(1);
2903		return 1;
2904	}
2905}
2906
2907static int
2908mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2909{
2910	return nh_grp->neigh_tbl->family;
2911}
2912
2913static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2914{
2915	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2916	const struct mlxsw_sp_nexthop *nh;
2917	struct fib_info *fi;
2918	unsigned int val;
2919	int i;
2920
2921	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2922	case AF_INET:
2923		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2924		return jhash(&fi, sizeof(fi), seed);
2925	case AF_INET6:
2926		val = nh_grp->count;
2927		for (i = 0; i < nh_grp->count; i++) {
2928			nh = &nh_grp->nexthops[i];
2929			val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
2930		}
2931		return jhash(&val, sizeof(val), seed);
2932	default:
2933		WARN_ON(1);
2934		return 0;
2935	}
2936}
2937
2938static u32
2939mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2940{
2941	unsigned int val = fib6_entry->nrt6;
2942	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2943	struct net_device *dev;
2944
2945	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2946		dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev;
2947		val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
2948	}
2949
2950	return jhash(&val, sizeof(val), seed);
2951}
2952
2953static u32
2954mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2955{
2956	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2957
2958	switch (cmp_arg->proto) {
2959	case MLXSW_SP_L3_PROTO_IPV4:
2960		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2961	case MLXSW_SP_L3_PROTO_IPV6:
2962		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2963	default:
2964		WARN_ON(1);
2965		return 0;
2966	}
2967}
2968
2969static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2970	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2971	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2972	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2973	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2974};
2975
2976static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2977					 struct mlxsw_sp_nexthop_group *nh_grp)
2978{
2979	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2980	    !nh_grp->gateway)
2981		return 0;
2982
2983	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2984				      &nh_grp->ht_node,
2985				      mlxsw_sp_nexthop_group_ht_params);
2986}
2987
2988static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2989					  struct mlxsw_sp_nexthop_group *nh_grp)
2990{
2991	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2992	    !nh_grp->gateway)
2993		return;
2994
2995	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2996			       &nh_grp->ht_node,
2997			       mlxsw_sp_nexthop_group_ht_params);
2998}
2999
3000static struct mlxsw_sp_nexthop_group *
3001mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3002			       struct fib_info *fi)
3003{
3004	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3005
3006	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
3007	cmp_arg.fi = fi;
3008	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3009				      &cmp_arg,
3010				      mlxsw_sp_nexthop_group_ht_params);
3011}
3012
3013static struct mlxsw_sp_nexthop_group *
3014mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3015			       struct mlxsw_sp_fib6_entry *fib6_entry)
3016{
3017	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3018
3019	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
3020	cmp_arg.fib6_entry = fib6_entry;
3021	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3022				      &cmp_arg,
3023				      mlxsw_sp_nexthop_group_ht_params);
3024}
3025
3026static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3027	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3028	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3029	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
3030};
3031
3032static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3033				   struct mlxsw_sp_nexthop *nh)
3034{
3035	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3036				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3037}
3038
3039static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3040				    struct mlxsw_sp_nexthop *nh)
3041{
3042	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3043			       mlxsw_sp_nexthop_ht_params);
3044}
3045
3046static struct mlxsw_sp_nexthop *
3047mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3048			struct mlxsw_sp_nexthop_key key)
3049{
3050	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3051				      mlxsw_sp_nexthop_ht_params);
3052}
3053
3054static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3055					     const struct mlxsw_sp_fib *fib,
3056					     u32 adj_index, u16 ecmp_size,
3057					     u32 new_adj_index,
3058					     u16 new_ecmp_size)
3059{
3060	char raleu_pl[MLXSW_REG_RALEU_LEN];
3061
3062	mlxsw_reg_raleu_pack(raleu_pl,
3063			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
3064			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
3065			     new_ecmp_size);
3066	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3067}
3068
3069static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3070					  struct mlxsw_sp_nexthop_group *nh_grp,
3071					  u32 old_adj_index, u16 old_ecmp_size)
3072{
3073	struct mlxsw_sp_fib_entry *fib_entry;
3074	struct mlxsw_sp_fib *fib = NULL;
3075	int err;
3076
3077	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3078		if (fib == fib_entry->fib_node->fib)
3079			continue;
3080		fib = fib_entry->fib_node->fib;
3081		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
3082							old_adj_index,
3083							old_ecmp_size,
3084							nh_grp->adj_index,
3085							nh_grp->ecmp_size);
3086		if (err)
3087			return err;
3088	}
3089	return 0;
3090}
3091
3092static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3093				     struct mlxsw_sp_nexthop *nh)
3094{
3095	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3096	char ratr_pl[MLXSW_REG_RATR_LEN];
3097
3098	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3099			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
3100			    adj_index, neigh_entry->rif);
3101	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3102	if (nh->counter_valid)
3103		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3104	else
3105		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3106
3107	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3108}
3109
3110int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3111			    struct mlxsw_sp_nexthop *nh)
3112{
3113	int i;
3114
3115	for (i = 0; i < nh->num_adj_entries; i++) {
3116		int err;
3117
3118		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3119		if (err)
3120			return err;
3121	}
3122
3123	return 0;
3124}
3125
3126static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3127					  u32 adj_index,
3128					  struct mlxsw_sp_nexthop *nh)
3129{
3130	const struct mlxsw_sp_ipip_ops *ipip_ops;
3131
3132	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3133	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3134}
3135
3136static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3137					u32 adj_index,
3138					struct mlxsw_sp_nexthop *nh)
3139{
3140	int i;
3141
3142	for (i = 0; i < nh->num_adj_entries; i++) {
3143		int err;
3144
3145		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3146						     nh);
3147		if (err)
3148			return err;
3149	}
3150
3151	return 0;
3152}
3153
3154static int
3155mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3156			      struct mlxsw_sp_nexthop_group *nh_grp,
3157			      bool reallocate)
3158{
3159	u32 adj_index = nh_grp->adj_index; /* base */
3160	struct mlxsw_sp_nexthop *nh;
3161	int i;
3162	int err;
3163
3164	for (i = 0; i < nh_grp->count; i++) {
3165		nh = &nh_grp->nexthops[i];
3166
3167		if (!nh->should_offload) {
3168			nh->offloaded = 0;
3169			continue;
3170		}
3171
3172		if (nh->update || reallocate) {
3173			switch (nh->type) {
3174			case MLXSW_SP_NEXTHOP_TYPE_ETH:
3175				err = mlxsw_sp_nexthop_update
3176					    (mlxsw_sp, adj_index, nh);
3177				break;
3178			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3179				err = mlxsw_sp_nexthop_ipip_update
3180					    (mlxsw_sp, adj_index, nh);
3181				break;
3182			}
3183			if (err)
3184				return err;
3185			nh->update = 0;
3186			nh->offloaded = 1;
3187		}
3188		adj_index += nh->num_adj_entries;
3189	}
3190	return 0;
3191}
3192
3193static bool
3194mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3195				 const struct mlxsw_sp_fib_entry *fib_entry);
3196
3197static int
3198mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3199				    struct mlxsw_sp_nexthop_group *nh_grp)
3200{
3201	struct mlxsw_sp_fib_entry *fib_entry;
3202	int err;
3203
3204	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3205		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3206						      fib_entry))
3207			continue;
3208		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3209		if (err)
3210			return err;
3211	}
3212	return 0;
3213}
3214
3215static void
3216mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3217				   enum mlxsw_reg_ralue_op op, int err);
3218
3219static void
3220mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3221{
3222	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3223	struct mlxsw_sp_fib_entry *fib_entry;
3224
3225	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3226		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3227						      fib_entry))
3228			continue;
3229		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3230	}
3231}
3232
3233static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3234{
3235	/* Valid sizes for an adjacency group are:
3236	 * 1-64, 512, 1024, 2048 and 4096.
3237	 */
3238	if (*p_adj_grp_size <= 64)
3239		return;
3240	else if (*p_adj_grp_size <= 512)
3241		*p_adj_grp_size = 512;
3242	else if (*p_adj_grp_size <= 1024)
3243		*p_adj_grp_size = 1024;
3244	else if (*p_adj_grp_size <= 2048)
3245		*p_adj_grp_size = 2048;
3246	else
3247		*p_adj_grp_size = 4096;
3248}
3249
3250static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3251					     unsigned int alloc_size)
3252{
3253	if (alloc_size >= 4096)
3254		*p_adj_grp_size = 4096;
3255	else if (alloc_size >= 2048)
3256		*p_adj_grp_size = 2048;
3257	else if (alloc_size >= 1024)
3258		*p_adj_grp_size = 1024;
3259	else if (alloc_size >= 512)
3260		*p_adj_grp_size = 512;
3261}
3262
3263static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3264				     u16 *p_adj_grp_size)
3265{
3266	unsigned int alloc_size;
3267	int err;
3268
3269	/* Round up the requested group size to the next size supported
3270	 * by the device and make sure the request can be satisfied.
3271	 */
3272	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3273	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3274					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3275					      *p_adj_grp_size, &alloc_size);
3276	if (err)
3277		return err;
3278	/* It is possible the allocation results in more allocated
3279	 * entries than requested. Try to use as much of them as
3280	 * possible.
3281	 */
3282	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3283
3284	return 0;
3285}
3286
3287static void
3288mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3289{
3290	int i, g = 0, sum_norm_weight = 0;
3291	struct mlxsw_sp_nexthop *nh;
3292
3293	for (i = 0; i < nh_grp->count; i++) {
3294		nh = &nh_grp->nexthops[i];
3295
3296		if (!nh->should_offload)
3297			continue;
3298		if (g > 0)
3299			g = gcd(nh->nh_weight, g);
3300		else
3301			g = nh->nh_weight;
3302	}
3303
3304	for (i = 0; i < nh_grp->count; i++) {
3305		nh = &nh_grp->nexthops[i];
3306
3307		if (!nh->should_offload)
3308			continue;
3309		nh->norm_nh_weight = nh->nh_weight / g;
3310		sum_norm_weight += nh->norm_nh_weight;
3311	}
3312
3313	nh_grp->sum_norm_weight = sum_norm_weight;
3314}
3315
3316static void
3317mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3318{
3319	int total = nh_grp->sum_norm_weight;
3320	u16 ecmp_size = nh_grp->ecmp_size;
3321	int i, weight = 0, lower_bound = 0;
3322
3323	for (i = 0; i < nh_grp->count; i++) {
3324		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3325		int upper_bound;
3326
3327		if (!nh->should_offload)
3328			continue;
3329		weight += nh->norm_nh_weight;
3330		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3331		nh->num_adj_entries = upper_bound - lower_bound;
3332		lower_bound = upper_bound;
3333	}
3334}
3335
3336static void
3337mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3338			       struct mlxsw_sp_nexthop_group *nh_grp)
3339{
3340	u16 ecmp_size, old_ecmp_size;
3341	struct mlxsw_sp_nexthop *nh;
3342	bool offload_change = false;
3343	u32 adj_index;
3344	bool old_adj_index_valid;
3345	u32 old_adj_index;
3346	int i;
3347	int err;
3348
3349	if (!nh_grp->gateway) {
3350		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3351		return;
3352	}
3353
3354	for (i = 0; i < nh_grp->count; i++) {
3355		nh = &nh_grp->nexthops[i];
3356
3357		if (nh->should_offload != nh->offloaded) {
3358			offload_change = true;
3359			if (nh->should_offload)
3360				nh->update = 1;
3361		}
3362	}
3363	if (!offload_change) {
3364		/* Nothing was added or removed, so no need to reallocate. Just
3365		 * update MAC on existing adjacency indexes.
3366		 */
3367		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3368		if (err) {
3369			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3370			goto set_trap;
3371		}
3372		return;
3373	}
3374	mlxsw_sp_nexthop_group_normalize(nh_grp);
3375	if (!nh_grp->sum_norm_weight)
3376		/* No neigh of this group is connected so we just set
3377		 * the trap and let everthing flow through kernel.
3378		 */
3379		goto set_trap;
3380
3381	ecmp_size = nh_grp->sum_norm_weight;
3382	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3383	if (err)
3384		/* No valid allocation size available. */
3385		goto set_trap;
3386
3387	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3388				  ecmp_size, &adj_index);
3389	if (err) {
3390		/* We ran out of KVD linear space, just set the
3391		 * trap and let everything flow through kernel.
3392		 */
3393		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3394		goto set_trap;
3395	}
3396	old_adj_index_valid = nh_grp->adj_index_valid;
3397	old_adj_index = nh_grp->adj_index;
3398	old_ecmp_size = nh_grp->ecmp_size;
3399	nh_grp->adj_index_valid = 1;
3400	nh_grp->adj_index = adj_index;
3401	nh_grp->ecmp_size = ecmp_size;
3402	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3403	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3404	if (err) {
3405		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3406		goto set_trap;
3407	}
3408
3409	if (!old_adj_index_valid) {
3410		/* The trap was set for fib entries, so we have to call
3411		 * fib entry update to unset it and use adjacency index.
3412		 */
3413		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3414		if (err) {
3415			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3416			goto set_trap;
3417		}
3418		return;
3419	}
3420
3421	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3422					     old_adj_index, old_ecmp_size);
3423	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3424			   old_ecmp_size, old_adj_index);
3425	if (err) {
3426		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3427		goto set_trap;
3428	}
3429
3430	/* Offload state within the group changed, so update the flags. */
3431	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3432
3433	return;
3434
3435set_trap:
3436	old_adj_index_valid = nh_grp->adj_index_valid;
3437	nh_grp->adj_index_valid = 0;
3438	for (i = 0; i < nh_grp->count; i++) {
3439		nh = &nh_grp->nexthops[i];
3440		nh->offloaded = 0;
3441	}
3442	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3443	if (err)
3444		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3445	if (old_adj_index_valid)
3446		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3447				   nh_grp->ecmp_size, nh_grp->adj_index);
3448}
3449
3450static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3451					    bool removing)
3452{
3453	if (!removing)
3454		nh->should_offload = 1;
3455	else
3456		nh->should_offload = 0;
3457	nh->update = 1;
3458}
3459
3460static int
3461mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
3462				    struct mlxsw_sp_neigh_entry *neigh_entry)
3463{
3464	struct neighbour *n, *old_n = neigh_entry->key.n;
3465	struct mlxsw_sp_nexthop *nh;
3466	bool entry_connected;
3467	u8 nud_state, dead;
3468	int err;
3469
3470	nh = list_first_entry(&neigh_entry->nexthop_list,
3471			      struct mlxsw_sp_nexthop, neigh_list_node);
3472
3473	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3474	if (!n) {
3475		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3476				 nh->rif->dev);
3477		if (IS_ERR(n))
3478			return PTR_ERR(n);
3479		neigh_event_send(n, NULL);
3480	}
3481
3482	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
3483	neigh_entry->key.n = n;
3484	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3485	if (err)
3486		goto err_neigh_entry_insert;
3487
3488	read_lock_bh(&n->lock);
3489	nud_state = n->nud_state;
3490	dead = n->dead;
3491	read_unlock_bh(&n->lock);
3492	entry_connected = nud_state & NUD_VALID && !dead;
3493
3494	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3495			    neigh_list_node) {
3496		neigh_release(old_n);
3497		neigh_clone(n);
3498		__mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
3499		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3500	}
3501
3502	neigh_release(n);
3503
3504	return 0;
3505
3506err_neigh_entry_insert:
3507	neigh_entry->key.n = old_n;
3508	mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3509	neigh_release(n);
3510	return err;
3511}
3512
3513static void
3514mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3515			      struct mlxsw_sp_neigh_entry *neigh_entry,
3516			      bool removing, bool dead)
3517{
3518	struct mlxsw_sp_nexthop *nh;
3519
3520	if (list_empty(&neigh_entry->nexthop_list))
3521		return;
3522
3523	if (dead) {
3524		int err;
3525
3526		err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
3527							  neigh_entry);
3528		if (err)
3529			dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
3530		return;
3531	}
3532
3533	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3534			    neigh_list_node) {
3535		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3536		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3537	}
3538}
3539
3540static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3541				      struct mlxsw_sp_rif *rif)
3542{
3543	if (nh->rif)
3544		return;
3545
3546	nh->rif = rif;
3547	list_add(&nh->rif_list_node, &rif->nexthop_list);
3548}
3549
3550static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3551{
3552	if (!nh->rif)
3553		return;
3554
3555	list_del(&nh->rif_list_node);
3556	nh->rif = NULL;
3557}
3558
3559static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3560				       struct mlxsw_sp_nexthop *nh)
3561{
3562	struct mlxsw_sp_neigh_entry *neigh_entry;
3563	struct neighbour *n;
3564	u8 nud_state, dead;
3565	int err;
3566
3567	if (!nh->nh_grp->gateway || nh->neigh_entry)
3568		return 0;
3569
3570	/* Take a reference of neigh here ensuring that neigh would
3571	 * not be destructed before the nexthop entry is finished.
3572	 * The reference is taken either in neigh_lookup() or
3573	 * in neigh_create() in case n is not found.
3574	 */
3575	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3576	if (!n) {
3577		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3578				 nh->rif->dev);
3579		if (IS_ERR(n))
3580			return PTR_ERR(n);
3581		neigh_event_send(n, NULL);
3582	}
3583	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3584	if (!neigh_entry) {
3585		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3586		if (IS_ERR(neigh_entry)) {
3587			err = -EINVAL;
3588			goto err_neigh_entry_create;
3589		}
3590	}
3591
3592	/* If that is the first nexthop connected to that neigh, add to
3593	 * nexthop_neighs_list
3594	 */
3595	if (list_empty(&neigh_entry->nexthop_list))
3596		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3597			      &mlxsw_sp->router->nexthop_neighs_list);
3598
3599	nh->neigh_entry = neigh_entry;
3600	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3601	read_lock_bh(&n->lock);
3602	nud_state = n->nud_state;
3603	dead = n->dead;
3604	read_unlock_bh(&n->lock);
3605	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3606
3607	return 0;
3608
3609err_neigh_entry_create:
3610	neigh_release(n);
3611	return err;
3612}
3613
3614static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3615					struct mlxsw_sp_nexthop *nh)
3616{
3617	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3618	struct neighbour *n;
3619
3620	if (!neigh_entry)
3621		return;
3622	n = neigh_entry->key.n;
3623
3624	__mlxsw_sp_nexthop_neigh_update(nh, true);
3625	list_del(&nh->neigh_list_node);
3626	nh->neigh_entry = NULL;
3627
3628	/* If that is the last nexthop connected to that neigh, remove from
3629	 * nexthop_neighs_list
3630	 */
3631	if (list_empty(&neigh_entry->nexthop_list))
3632		list_del(&neigh_entry->nexthop_neighs_list_node);
3633
3634	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3635		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3636
3637	neigh_release(n);
3638}
3639
3640static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3641{
3642	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3643
3644	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3645}
3646
3647static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3648				       struct mlxsw_sp_nexthop *nh,
3649				       struct mlxsw_sp_ipip_entry *ipip_entry)
3650{
3651	bool removing;
3652
3653	if (!nh->nh_grp->gateway || nh->ipip_entry)
3654		return;
3655
3656	nh->ipip_entry = ipip_entry;
3657	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3658	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3659	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3660}
3661
3662static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3663				       struct mlxsw_sp_nexthop *nh)
3664{
3665	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3666
3667	if (!ipip_entry)
3668		return;
3669
3670	__mlxsw_sp_nexthop_neigh_update(nh, true);
3671	nh->ipip_entry = NULL;
3672}
3673
3674static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3675					const struct fib_nh *fib_nh,
3676					enum mlxsw_sp_ipip_type *p_ipipt)
3677{
3678	struct net_device *dev = fib_nh->fib_nh_dev;
3679
3680	return dev &&
3681	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3682	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3683}
3684
3685static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3686				       struct mlxsw_sp_nexthop *nh)
3687{
3688	switch (nh->type) {
3689	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3690		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3691		mlxsw_sp_nexthop_rif_fini(nh);
3692		break;
3693	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3694		mlxsw_sp_nexthop_rif_fini(nh);
3695		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3696		break;
3697	}
3698}
3699
3700static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3701				       struct mlxsw_sp_nexthop *nh,
3702				       struct fib_nh *fib_nh)
3703{
3704	const struct mlxsw_sp_ipip_ops *ipip_ops;
3705	struct net_device *dev = fib_nh->fib_nh_dev;
3706	struct mlxsw_sp_ipip_entry *ipip_entry;
3707	struct mlxsw_sp_rif *rif;
3708	int err;
3709
3710	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3711	if (ipip_entry) {
3712		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3713		if (ipip_ops->can_offload(mlxsw_sp, dev,
3714					  MLXSW_SP_L3_PROTO_IPV4)) {
3715			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3716			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3717			return 0;
3718		}
3719	}
3720
3721	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3722	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3723	if (!rif)
3724		return 0;
3725
3726	mlxsw_sp_nexthop_rif_init(nh, rif);
3727	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3728	if (err)
3729		goto err_neigh_init;
3730
3731	return 0;
3732
3733err_neigh_init:
3734	mlxsw_sp_nexthop_rif_fini(nh);
3735	return err;
3736}
3737
3738static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3739					struct mlxsw_sp_nexthop *nh)
3740{
3741	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3742}
3743
3744static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3745				  struct mlxsw_sp_nexthop_group *nh_grp,
3746				  struct mlxsw_sp_nexthop *nh,
3747				  struct fib_nh *fib_nh)
3748{
3749	struct net_device *dev = fib_nh->fib_nh_dev;
3750	struct in_device *in_dev;
3751	int err;
3752
3753	nh->nh_grp = nh_grp;
3754	nh->key.fib_nh = fib_nh;
3755#ifdef CONFIG_IP_ROUTE_MULTIPATH
3756	nh->nh_weight = fib_nh->fib_nh_weight;
3757#else
3758	nh->nh_weight = 1;
3759#endif
3760	memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
3761	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3762	if (err)
3763		return err;
3764
3765	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3766	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3767
3768	if (!dev)
3769		return 0;
3770
3771	in_dev = __in_dev_get_rtnl(dev);
3772	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3773	    fib_nh->fib_nh_flags & RTNH_F_LINKDOWN)
3774		return 0;
3775
3776	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3777	if (err)
3778		goto err_nexthop_neigh_init;
3779
3780	return 0;
3781
3782err_nexthop_neigh_init:
3783	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3784	return err;
3785}
3786
3787static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3788				   struct mlxsw_sp_nexthop *nh)
3789{
3790	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3791	list_del(&nh->router_list_node);
3792	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3793	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3794}
3795
3796static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3797				    unsigned long event, struct fib_nh *fib_nh)
3798{
3799	struct mlxsw_sp_nexthop_key key;
3800	struct mlxsw_sp_nexthop *nh;
3801
3802	if (mlxsw_sp->router->aborted)
3803		return;
3804
3805	key.fib_nh = fib_nh;
3806	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3807	if (WARN_ON_ONCE(!nh))
3808		return;
3809
3810	switch (event) {
3811	case FIB_EVENT_NH_ADD:
3812		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3813		break;
3814	case FIB_EVENT_NH_DEL:
3815		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3816		break;
3817	}
3818
3819	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3820}
3821
3822static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3823					struct mlxsw_sp_rif *rif)
3824{
3825	struct mlxsw_sp_nexthop *nh;
3826	bool removing;
3827
3828	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3829		switch (nh->type) {
3830		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3831			removing = false;
3832			break;
3833		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3834			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3835			break;
3836		default:
3837			WARN_ON(1);
3838			continue;
3839		}
3840
3841		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3842		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3843	}
3844}
3845
3846static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3847					 struct mlxsw_sp_rif *old_rif,
3848					 struct mlxsw_sp_rif *new_rif)
3849{
3850	struct mlxsw_sp_nexthop *nh;
3851
3852	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3853	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3854		nh->rif = new_rif;
3855	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3856}
3857
3858static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3859					   struct mlxsw_sp_rif *rif)
3860{
3861	struct mlxsw_sp_nexthop *nh, *tmp;
3862
3863	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3864		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3865		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3866	}
3867}
3868
3869static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3870				   struct fib_info *fi)
3871{
3872	const struct fib_nh *nh = fib_info_nh(fi, 0);
3873
3874	return nh->fib_nh_scope == RT_SCOPE_LINK ||
3875	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
3876}
3877
3878static struct mlxsw_sp_nexthop_group *
3879mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3880{
3881	unsigned int nhs = fib_info_num_path(fi);
3882	struct mlxsw_sp_nexthop_group *nh_grp;
3883	struct mlxsw_sp_nexthop *nh;
3884	struct fib_nh *fib_nh;
3885	int i;
3886	int err;
3887
3888	nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL);
3889	if (!nh_grp)
3890		return ERR_PTR(-ENOMEM);
3891	nh_grp->priv = fi;
3892	INIT_LIST_HEAD(&nh_grp->fib_list);
3893	nh_grp->neigh_tbl = &arp_tbl;
3894
3895	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3896	nh_grp->count = nhs;
3897	fib_info_hold(fi);
3898	for (i = 0; i < nh_grp->count; i++) {
3899		nh = &nh_grp->nexthops[i];
3900		fib_nh = fib_info_nh(fi, i);
3901		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3902		if (err)
3903			goto err_nexthop4_init;
3904	}
3905	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3906	if (err)
3907		goto err_nexthop_group_insert;
3908	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3909	return nh_grp;
3910
3911err_nexthop_group_insert:
3912err_nexthop4_init:
3913	for (i--; i >= 0; i--) {
3914		nh = &nh_grp->nexthops[i];
3915		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3916	}
3917	fib_info_put(fi);
3918	kfree(nh_grp);
3919	return ERR_PTR(err);
3920}
3921
3922static void
3923mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3924				struct mlxsw_sp_nexthop_group *nh_grp)
3925{
3926	struct mlxsw_sp_nexthop *nh;
3927	int i;
3928
3929	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3930	for (i = 0; i < nh_grp->count; i++) {
3931		nh = &nh_grp->nexthops[i];
3932		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3933	}
3934	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3935	WARN_ON_ONCE(nh_grp->adj_index_valid);
3936	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3937	kfree(nh_grp);
3938}
3939
3940static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3941				       struct mlxsw_sp_fib_entry *fib_entry,
3942				       struct fib_info *fi)
3943{
3944	struct mlxsw_sp_nexthop_group *nh_grp;
3945
3946	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3947	if (!nh_grp) {
3948		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3949		if (IS_ERR(nh_grp))
3950			return PTR_ERR(nh_grp);
3951	}
3952	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3953	fib_entry->nh_group = nh_grp;
3954	return 0;
3955}
3956
3957static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3958					struct mlxsw_sp_fib_entry *fib_entry)
3959{
3960	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3961
3962	list_del(&fib_entry->nexthop_group_node);
3963	if (!list_empty(&nh_grp->fib_list))
3964		return;
3965	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3966}
3967
3968static bool
3969mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3970{
3971	struct mlxsw_sp_fib4_entry *fib4_entry;
3972
3973	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3974				  common);
3975	return !fib4_entry->tos;
3976}
3977
3978static bool
3979mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3980{
3981	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3982
3983	switch (fib_entry->fib_node->fib->proto) {
3984	case MLXSW_SP_L3_PROTO_IPV4:
3985		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3986			return false;
3987		break;
3988	case MLXSW_SP_L3_PROTO_IPV6:
3989		break;
3990	}
3991
3992	switch (fib_entry->type) {
3993	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3994		return !!nh_group->adj_index_valid;
3995	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3996		return !!nh_group->nh_rif;
3997	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
3998	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3999	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4000		return true;
4001	default:
4002		return false;
4003	}
4004}
4005
4006static struct mlxsw_sp_nexthop *
4007mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
4008		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4009{
4010	int i;
4011
4012	for (i = 0; i < nh_grp->count; i++) {
4013		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
4014		struct fib6_info *rt = mlxsw_sp_rt6->rt;
4015
4016		if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
4017		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
4018				    &rt->fib6_nh->fib_nh_gw6))
4019			return nh;
4020		continue;
4021	}
4022
4023	return NULL;
4024}
4025
4026static void
4027mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4028{
4029	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4030	int i;
4031
4032	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
4033	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE ||
4034	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
4035	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
4036		nh_grp->nexthops->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
4037		return;
4038	}
4039
4040	for (i = 0; i < nh_grp->count; i++) {
4041		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
4042
4043		if (nh->offloaded)
4044			nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
4045		else
4046			nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4047	}
4048}
4049
4050static void
4051mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4052{
4053	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4054	int i;
4055
4056	if (!list_is_singular(&nh_grp->fib_list))
4057		return;
4058
4059	for (i = 0; i < nh_grp->count; i++) {
4060		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
4061
4062		nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4063	}
4064}
4065
4066static void
4067mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4068{
4069	struct mlxsw_sp_fib6_entry *fib6_entry;
4070	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4071
4072	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4073				  common);
4074
4075	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
4076	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
4077		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4078				 list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
4079		return;
4080	}
4081
4082	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4083		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4084		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
4085		struct mlxsw_sp_nexthop *nh;
4086
4087		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
4088		if (nh && nh->offloaded)
4089			fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
4090		else
4091			fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4092	}
4093}
4094
4095static void
4096mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4097{
4098	struct mlxsw_sp_fib6_entry *fib6_entry;
4099	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4100
4101	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4102				  common);
4103	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4104		struct fib6_info *rt = mlxsw_sp_rt6->rt;
4105
4106		rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4107	}
4108}
4109
4110static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4111{
4112	switch (fib_entry->fib_node->fib->proto) {
4113	case MLXSW_SP_L3_PROTO_IPV4:
4114		mlxsw_sp_fib4_entry_offload_set(fib_entry);
4115		break;
4116	case MLXSW_SP_L3_PROTO_IPV6:
4117		mlxsw_sp_fib6_entry_offload_set(fib_entry);
4118		break;
4119	}
4120}
4121
4122static void
4123mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4124{
4125	switch (fib_entry->fib_node->fib->proto) {
4126	case MLXSW_SP_L3_PROTO_IPV4:
4127		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
4128		break;
4129	case MLXSW_SP_L3_PROTO_IPV6:
4130		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
4131		break;
4132	}
4133}
4134
4135static void
4136mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
4137				   enum mlxsw_reg_ralue_op op, int err)
4138{
4139	switch (op) {
4140	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4141		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
4142	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4143		if (err)
4144			return;
4145		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
4146			mlxsw_sp_fib_entry_offload_set(fib_entry);
4147		else
4148			mlxsw_sp_fib_entry_offload_unset(fib_entry);
4149		return;
4150	default:
4151		return;
4152	}
4153}
4154
4155static void
4156mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4157			      const struct mlxsw_sp_fib_entry *fib_entry,
4158			      enum mlxsw_reg_ralue_op op)
4159{
4160	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4161	enum mlxsw_reg_ralxx_protocol proto;
4162	u32 *p_dip;
4163
4164	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4165
4166	switch (fib->proto) {
4167	case MLXSW_SP_L3_PROTO_IPV4:
4168		p_dip = (u32 *) fib_entry->fib_node->key.addr;
4169		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4170				      fib_entry->fib_node->key.prefix_len,
4171				      *p_dip);
4172		break;
4173	case MLXSW_SP_L3_PROTO_IPV6:
4174		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4175				      fib_entry->fib_node->key.prefix_len,
4176				      fib_entry->fib_node->key.addr);
4177		break;
4178	}
4179}
4180
4181static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4182					struct mlxsw_sp_fib_entry *fib_entry,
4183					enum mlxsw_reg_ralue_op op)
4184{
4185	char ralue_pl[MLXSW_REG_RALUE_LEN];
4186	enum mlxsw_reg_ralue_trap_action trap_action;
4187	u16 trap_id = 0;
4188	u32 adjacency_index = 0;
4189	u16 ecmp_size = 0;
4190
4191	/* In case the nexthop group adjacency index is valid, use it
4192	 * with provided ECMP size. Otherwise, setup trap and pass
4193	 * traffic to kernel.
4194	 */
4195	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4196		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4197		adjacency_index = fib_entry->nh_group->adj_index;
4198		ecmp_size = fib_entry->nh_group->ecmp_size;
4199	} else {
4200		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4201		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4202	}
4203
4204	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4205	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4206					adjacency_index, ecmp_size);
4207	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4208}
4209
4210static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4211				       struct mlxsw_sp_fib_entry *fib_entry,
4212				       enum mlxsw_reg_ralue_op op)
4213{
4214	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4215	enum mlxsw_reg_ralue_trap_action trap_action;
4216	char ralue_pl[MLXSW_REG_RALUE_LEN];
4217	u16 trap_id = 0;
4218	u16 rif_index = 0;
4219
4220	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4221		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4222		rif_index = rif->rif_index;
4223	} else {
4224		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4225		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4226	}
4227
4228	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4229	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4230				       rif_index);
4231	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4232}
4233
4234static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4235				      struct mlxsw_sp_fib_entry *fib_entry,
4236				      enum mlxsw_reg_ralue_op op)
4237{
4238	char ralue_pl[MLXSW_REG_RALUE_LEN];
4239
4240	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4241	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4242	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4243}
4244
4245static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
4246					   struct mlxsw_sp_fib_entry *fib_entry,
4247					   enum mlxsw_reg_ralue_op op)
4248{
4249	enum mlxsw_reg_ralue_trap_action trap_action;
4250	char ralue_pl[MLXSW_REG_RALUE_LEN];
4251
4252	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
4253	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4254	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
4255	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4256}
4257
4258static int
4259mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4260				 struct mlxsw_sp_fib_entry *fib_entry,
4261				 enum mlxsw_reg_ralue_op op)
4262{
4263	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4264	const struct mlxsw_sp_ipip_ops *ipip_ops;
4265
4266	if (WARN_ON(!ipip_entry))
4267		return -EINVAL;
4268
4269	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4270	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4271				      fib_entry->decap.tunnel_index);
4272}
4273
4274static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
4275					   struct mlxsw_sp_fib_entry *fib_entry,
4276					   enum mlxsw_reg_ralue_op op)
4277{
4278	char ralue_pl[MLXSW_REG_RALUE_LEN];
4279
4280	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4281	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
4282					   fib_entry->decap.tunnel_index);
4283	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4284}
4285
4286static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4287				   struct mlxsw_sp_fib_entry *fib_entry,
4288				   enum mlxsw_reg_ralue_op op)
4289{
4290	switch (fib_entry->type) {
4291	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4292		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4293	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4294		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4295	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4296		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4297	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4298		return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
4299	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4300		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4301							fib_entry, op);
4302	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4303		return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
4304	}
4305	return -EINVAL;
4306}
4307
4308static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4309				 struct mlxsw_sp_fib_entry *fib_entry,
4310				 enum mlxsw_reg_ralue_op op)
4311{
4312	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4313
4314	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4315
4316	return err;
4317}
4318
4319static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4320				     struct mlxsw_sp_fib_entry *fib_entry)
4321{
4322	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4323				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4324}
4325
4326static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4327				  struct mlxsw_sp_fib_entry *fib_entry)
4328{
4329	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4330				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4331}
4332
4333static int
4334mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4335			     const struct fib_entry_notifier_info *fen_info,
4336			     struct mlxsw_sp_fib_entry *fib_entry)
4337{
4338	struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
4339	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4340	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
4341	struct mlxsw_sp_ipip_entry *ipip_entry;
4342	struct fib_info *fi = fen_info->fi;
4343
4344	switch (fen_info->type) {
4345	case RTN_LOCAL:
4346		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4347						 MLXSW_SP_L3_PROTO_IPV4, dip);
4348		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4349			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4350			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4351							     fib_entry,
4352							     ipip_entry);
4353		}
4354		if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
4355						     dip.addr4)) {
4356			u32 t_index;
4357
4358			t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
4359			fib_entry->decap.tunnel_index = t_index;
4360			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
4361			return 0;
4362		}
4363		/* fall through */
4364	case RTN_BROADCAST:
4365		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4366		return 0;
4367	case RTN_BLACKHOLE:
4368		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
4369		return 0;
4370	case RTN_UNREACHABLE: /* fall through */
4371	case RTN_PROHIBIT:
4372		/* Packets hitting these routes need to be trapped, but
4373		 * can do so with a lower priority than packets directed
4374		 * at the host, so use action type local instead of trap.
4375		 */
4376		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4377		return 0;
4378	case RTN_UNICAST:
4379		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4380			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4381		else
4382			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4383		return 0;
4384	default:
4385		return -EINVAL;
4386	}
4387}
4388
4389static struct mlxsw_sp_fib4_entry *
4390mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4391			   struct mlxsw_sp_fib_node *fib_node,
4392			   const struct fib_entry_notifier_info *fen_info)
4393{
4394	struct mlxsw_sp_fib4_entry *fib4_entry;
4395	struct mlxsw_sp_fib_entry *fib_entry;
4396	int err;
4397
4398	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4399	if (!fib4_entry)
4400		return ERR_PTR(-ENOMEM);
4401	fib_entry = &fib4_entry->common;
4402
4403	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4404	if (err)
4405		goto err_fib4_entry_type_set;
4406
4407	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4408	if (err)
4409		goto err_nexthop4_group_get;
4410
4411	fib4_entry->prio = fen_info->fi->fib_priority;
4412	fib4_entry->tb_id = fen_info->tb_id;
4413	fib4_entry->type = fen_info->type;
4414	fib4_entry->tos = fen_info->tos;
4415
4416	fib_entry->fib_node = fib_node;
4417
4418	return fib4_entry;
4419
4420err_nexthop4_group_get:
4421err_fib4_entry_type_set:
4422	kfree(fib4_entry);
4423	return ERR_PTR(err);
4424}
4425
4426static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4427					struct mlxsw_sp_fib4_entry *fib4_entry)
4428{
4429	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4430	kfree(fib4_entry);
4431}
4432
4433static struct mlxsw_sp_fib4_entry *
4434mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4435			   const struct fib_entry_notifier_info *fen_info)
4436{
4437	struct mlxsw_sp_fib4_entry *fib4_entry;
4438	struct mlxsw_sp_fib_node *fib_node;
4439	struct mlxsw_sp_fib *fib;
4440	struct mlxsw_sp_vr *vr;
4441
4442	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4443	if (!vr)
4444		return NULL;
4445	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4446
4447	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4448					    sizeof(fen_info->dst),
4449					    fen_info->dst_len);
4450	if (!fib_node)
4451		return NULL;
4452
4453	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4454		if (fib4_entry->tb_id == fen_info->tb_id &&
4455		    fib4_entry->tos == fen_info->tos &&
4456		    fib4_entry->type == fen_info->type &&
4457		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4458		    fen_info->fi) {
4459			return fib4_entry;
4460		}
4461	}
4462
4463	return NULL;
4464}
4465
4466static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4467	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4468	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4469	.key_len = sizeof(struct mlxsw_sp_fib_key),
4470	.automatic_shrinking = true,
4471};
4472
4473static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4474				    struct mlxsw_sp_fib_node *fib_node)
4475{
4476	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4477				      mlxsw_sp_fib_ht_params);
4478}
4479
4480static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4481				     struct mlxsw_sp_fib_node *fib_node)
4482{
4483	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4484			       mlxsw_sp_fib_ht_params);
4485}
4486
4487static struct mlxsw_sp_fib_node *
4488mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4489			 size_t addr_len, unsigned char prefix_len)
4490{
4491	struct mlxsw_sp_fib_key key;
4492
4493	memset(&key, 0, sizeof(key));
4494	memcpy(key.addr, addr, addr_len);
4495	key.prefix_len = prefix_len;
4496	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4497}
4498
4499static struct mlxsw_sp_fib_node *
4500mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4501			 size_t addr_len, unsigned char prefix_len)
4502{
4503	struct mlxsw_sp_fib_node *fib_node;
4504
4505	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4506	if (!fib_node)
4507		return NULL;
4508
4509	INIT_LIST_HEAD(&fib_node->entry_list);
4510	list_add(&fib_node->list, &fib->node_list);
4511	memcpy(fib_node->key.addr, addr, addr_len);
4512	fib_node->key.prefix_len = prefix_len;
4513
4514	return fib_node;
4515}
4516
4517static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4518{
4519	list_del(&fib_node->list);
4520	WARN_ON(!list_empty(&fib_node->entry_list));
4521	kfree(fib_node);
4522}
4523
4524static bool
4525mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4526				 const struct mlxsw_sp_fib_entry *fib_entry)
4527{
4528	return list_first_entry(&fib_node->entry_list,
4529				struct mlxsw_sp_fib_entry, list) == fib_entry;
4530}
4531
4532static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4533				      struct mlxsw_sp_fib_node *fib_node)
4534{
4535	struct mlxsw_sp_prefix_usage req_prefix_usage;
4536	struct mlxsw_sp_fib *fib = fib_node->fib;
4537	struct mlxsw_sp_lpm_tree *lpm_tree;
4538	int err;
4539
4540	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4541	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4542		goto out;
4543
4544	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4545	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4546	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4547					 fib->proto);
4548	if (IS_ERR(lpm_tree))
4549		return PTR_ERR(lpm_tree);
4550
4551	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4552	if (err)
4553		goto err_lpm_tree_replace;
4554
4555out:
4556	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4557	return 0;
4558
4559err_lpm_tree_replace:
4560	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4561	return err;
4562}
4563
4564static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4565					 struct mlxsw_sp_fib_node *fib_node)
4566{
4567	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4568	struct mlxsw_sp_prefix_usage req_prefix_usage;
4569	struct mlxsw_sp_fib *fib = fib_node->fib;
4570	int err;
4571
4572	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4573		return;
4574	/* Try to construct a new LPM tree from the current prefix usage
4575	 * minus the unused one. If we fail, continue using the old one.
4576	 */
4577	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4578	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4579				    fib_node->key.prefix_len);
4580	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4581					 fib->proto);
4582	if (IS_ERR(lpm_tree))
4583		return;
4584
4585	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4586	if (err)
4587		goto err_lpm_tree_replace;
4588
4589	return;
4590
4591err_lpm_tree_replace:
4592	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4593}
4594
4595static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4596				  struct mlxsw_sp_fib_node *fib_node,
4597				  struct mlxsw_sp_fib *fib)
4598{
4599	int err;
4600
4601	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4602	if (err)
4603		return err;
4604	fib_node->fib = fib;
4605
4606	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4607	if (err)
4608		goto err_fib_lpm_tree_link;
4609
4610	return 0;
4611
4612err_fib_lpm_tree_link:
4613	fib_node->fib = NULL;
4614	mlxsw_sp_fib_node_remove(fib, fib_node);
4615	return err;
4616}
4617
4618static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4619				   struct mlxsw_sp_fib_node *fib_node)
4620{
4621	struct mlxsw_sp_fib *fib = fib_node->fib;
4622
4623	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4624	fib_node->fib = NULL;
4625	mlxsw_sp_fib_node_remove(fib, fib_node);
4626}
4627
4628static struct mlxsw_sp_fib_node *
4629mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4630		      size_t addr_len, unsigned char prefix_len,
4631		      enum mlxsw_sp_l3proto proto)
4632{
4633	struct mlxsw_sp_fib_node *fib_node;
4634	struct mlxsw_sp_fib *fib;
4635	struct mlxsw_sp_vr *vr;
4636	int err;
4637
4638	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4639	if (IS_ERR(vr))
4640		return ERR_CAST(vr);
4641	fib = mlxsw_sp_vr_fib(vr, proto);
4642
4643	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4644	if (fib_node)
4645		return fib_node;
4646
4647	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4648	if (!fib_node) {
4649		err = -ENOMEM;
4650		goto err_fib_node_create;
4651	}
4652
4653	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4654	if (err)
4655		goto err_fib_node_init;
4656
4657	return fib_node;
4658
4659err_fib_node_init:
4660	mlxsw_sp_fib_node_destroy(fib_node);
4661err_fib_node_create:
4662	mlxsw_sp_vr_put(mlxsw_sp, vr);
4663	return ERR_PTR(err);
4664}
4665
4666static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4667				  struct mlxsw_sp_fib_node *fib_node)
4668{
4669	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4670
4671	if (!list_empty(&fib_node->entry_list))
4672		return;
4673	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4674	mlxsw_sp_fib_node_destroy(fib_node);
4675	mlxsw_sp_vr_put(mlxsw_sp, vr);
4676}
4677
4678static struct mlxsw_sp_fib4_entry *
4679mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4680			      const struct mlxsw_sp_fib4_entry *new4_entry)
4681{
4682	struct mlxsw_sp_fib4_entry *fib4_entry;
4683
4684	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4685		if (fib4_entry->tb_id > new4_entry->tb_id)
4686			continue;
4687		if (fib4_entry->tb_id != new4_entry->tb_id)
4688			break;
4689		if (fib4_entry->tos > new4_entry->tos)
4690			continue;
4691		if (fib4_entry->prio >= new4_entry->prio ||
4692		    fib4_entry->tos < new4_entry->tos)
4693			return fib4_entry;
4694	}
4695
4696	return NULL;
4697}
4698
4699static int
4700mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4701			       struct mlxsw_sp_fib4_entry *new4_entry)
4702{
4703	struct mlxsw_sp_fib_node *fib_node;
4704
4705	if (WARN_ON(!fib4_entry))
4706		return -EINVAL;
4707
4708	fib_node = fib4_entry->common.fib_node;
4709	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4710				 common.list) {
4711		if (fib4_entry->tb_id != new4_entry->tb_id ||
4712		    fib4_entry->tos != new4_entry->tos ||
4713		    fib4_entry->prio != new4_entry->prio)
4714			break;
4715	}
4716
4717	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4718	return 0;
4719}
4720
4721static int
4722mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4723			       bool replace, bool append)
4724{
4725	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4726	struct mlxsw_sp_fib4_entry *fib4_entry;
4727
4728	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4729
4730	if (append)
4731		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4732	if (replace && WARN_ON(!fib4_entry))
4733		return -EINVAL;
4734
4735	/* Insert new entry before replaced one, so that we can later
4736	 * remove the second.
4737	 */
4738	if (fib4_entry) {
4739		list_add_tail(&new4_entry->common.list,
4740			      &fib4_entry->common.list);
4741	} else {
4742		struct mlxsw_sp_fib4_entry *last;
4743
4744		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4745			if (new4_entry->tb_id > last->tb_id)
4746				break;
4747			fib4_entry = last;
4748		}
4749
4750		if (fib4_entry)
4751			list_add(&new4_entry->common.list,
4752				 &fib4_entry->common.list);
4753		else
4754			list_add(&new4_entry->common.list,
4755				 &fib_node->entry_list);
4756	}
4757
4758	return 0;
4759}
4760
4761static void
4762mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4763{
4764	list_del(&fib4_entry->common.list);
4765}
4766
4767static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4768				       struct mlxsw_sp_fib_entry *fib_entry)
4769{
4770	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4771
4772	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4773		return 0;
4774
4775	/* To prevent packet loss, overwrite the previously offloaded
4776	 * entry.
4777	 */
4778	if (!list_is_singular(&fib_node->entry_list)) {
4779		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4780		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4781
4782		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4783	}
4784
4785	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4786}
4787
4788static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4789					struct mlxsw_sp_fib_entry *fib_entry)
4790{
4791	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4792
4793	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4794		return;
4795
4796	/* Promote the next entry by overwriting the deleted entry */
4797	if (!list_is_singular(&fib_node->entry_list)) {
4798		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4799		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4800
4801		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4802		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4803		return;
4804	}
4805
4806	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4807}
4808
4809static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4810					 struct mlxsw_sp_fib4_entry *fib4_entry,
4811					 bool replace, bool append)
4812{
4813	int err;
4814
4815	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4816	if (err)
4817		return err;
4818
4819	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4820	if (err)
4821		goto err_fib_node_entry_add;
4822
4823	return 0;
4824
4825err_fib_node_entry_add:
4826	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4827	return err;
4828}
4829
4830static void
4831mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4832				struct mlxsw_sp_fib4_entry *fib4_entry)
4833{
4834	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4835	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4836
4837	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4838		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4839}
4840
4841static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4842					struct mlxsw_sp_fib4_entry *fib4_entry,
4843					bool replace)
4844{
4845	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4846	struct mlxsw_sp_fib4_entry *replaced;
4847
4848	if (!replace)
4849		return;
4850
4851	/* We inserted the new entry before replaced one */
4852	replaced = list_next_entry(fib4_entry, common.list);
4853
4854	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4855	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4856	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4857}
4858
4859static int
4860mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4861			 const struct fib_entry_notifier_info *fen_info,
4862			 bool replace, bool append)
4863{
4864	struct mlxsw_sp_fib4_entry *fib4_entry;
4865	struct mlxsw_sp_fib_node *fib_node;
4866	int err;
4867
4868	if (mlxsw_sp->router->aborted)
4869		return 0;
4870
4871	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4872					 &fen_info->dst, sizeof(fen_info->dst),
4873					 fen_info->dst_len,
4874					 MLXSW_SP_L3_PROTO_IPV4);
4875	if (IS_ERR(fib_node)) {
4876		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4877		return PTR_ERR(fib_node);
4878	}
4879
4880	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4881	if (IS_ERR(fib4_entry)) {
4882		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4883		err = PTR_ERR(fib4_entry);
4884		goto err_fib4_entry_create;
4885	}
4886
4887	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4888					    append);
4889	if (err) {
4890		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4891		goto err_fib4_node_entry_link;
4892	}
4893
4894	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4895
4896	return 0;
4897
4898err_fib4_node_entry_link:
4899	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4900err_fib4_entry_create:
4901	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4902	return err;
4903}
4904
4905static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4906				     struct fib_entry_notifier_info *fen_info)
4907{
4908	struct mlxsw_sp_fib4_entry *fib4_entry;
4909	struct mlxsw_sp_fib_node *fib_node;
4910
4911	if (mlxsw_sp->router->aborted)
4912		return;
4913
4914	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4915	if (WARN_ON(!fib4_entry))
4916		return;
4917	fib_node = fib4_entry->common.fib_node;
4918
4919	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4920	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4921	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4922}
4923
4924static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4925{
4926	/* Packets with link-local destination IP arriving to the router
4927	 * are trapped to the CPU, so no need to program specific routes
4928	 * for them.
4929	 */
4930	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4931		return true;
4932
4933	/* Multicast routes aren't supported, so ignore them. Neighbour
4934	 * Discovery packets are specifically trapped.
4935	 */
4936	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4937		return true;
4938
4939	/* Cloned routes are irrelevant in the forwarding path. */
4940	if (rt->fib6_flags & RTF_CACHE)
4941		return true;
4942
4943	return false;
4944}
4945
4946static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4947{
4948	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4949
4950	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4951	if (!mlxsw_sp_rt6)
4952		return ERR_PTR(-ENOMEM);
4953
4954	/* In case of route replace, replaced route is deleted with
4955	 * no notification. Take reference to prevent accessing freed
4956	 * memory.
4957	 */
4958	mlxsw_sp_rt6->rt = rt;
4959	fib6_info_hold(rt);
4960
4961	return mlxsw_sp_rt6;
4962}
4963
4964#if IS_ENABLED(CONFIG_IPV6)
4965static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4966{
4967	fib6_info_release(rt);
4968}
4969#else
4970static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4971{
4972}
4973#endif
4974
4975static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4976{
4977	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4978	kfree(mlxsw_sp_rt6);
4979}
4980
4981static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4982{
4983	/* RTF_CACHE routes are ignored */
4984	return !(rt->fib6_flags & RTF_ADDRCONF) &&
4985		rt->fib6_nh->fib_nh_gw_family;
4986}
4987
4988static struct fib6_info *
4989mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4990{
4991	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4992				list)->rt;
4993}
4994
4995static struct mlxsw_sp_fib6_entry *
4996mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4997				 const struct fib6_info *nrt, bool replace)
4998{
4999	struct mlxsw_sp_fib6_entry *fib6_entry;
5000
5001	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
5002		return NULL;
5003
5004	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5005		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5006
5007		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
5008		 * virtual router.
5009		 */
5010		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5011			continue;
5012		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5013			break;
5014		if (rt->fib6_metric < nrt->fib6_metric)
5015			continue;
5016		if (rt->fib6_metric == nrt->fib6_metric &&
5017		    mlxsw_sp_fib6_rt_can_mp(rt))
5018			return fib6_entry;
5019		if (rt->fib6_metric > nrt->fib6_metric)
5020			break;
5021	}
5022
5023	return NULL;
5024}
5025
5026static struct mlxsw_sp_rt6 *
5027mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
5028			    const struct fib6_info *rt)
5029{
5030	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5031
5032	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
5033		if (mlxsw_sp_rt6->rt == rt)
5034			return mlxsw_sp_rt6;
5035	}
5036
5037	return NULL;
5038}
5039
5040static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
5041					const struct fib6_info *rt,
5042					enum mlxsw_sp_ipip_type *ret)
5043{
5044	return rt->fib6_nh->fib_nh_dev &&
5045	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
5046}
5047
5048static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
5049				       struct mlxsw_sp_nexthop_group *nh_grp,
5050				       struct mlxsw_sp_nexthop *nh,
5051				       const struct fib6_info *rt)
5052{
5053	const struct mlxsw_sp_ipip_ops *ipip_ops;
5054	struct mlxsw_sp_ipip_entry *ipip_entry;
5055	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
5056	struct mlxsw_sp_rif *rif;
5057	int err;
5058
5059	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
5060	if (ipip_entry) {
5061		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5062		if (ipip_ops->can_offload(mlxsw_sp, dev,
5063					  MLXSW_SP_L3_PROTO_IPV6)) {
5064			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
5065			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
5066			return 0;
5067		}
5068	}
5069
5070	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
5071	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
5072	if (!rif)
5073		return 0;
5074	mlxsw_sp_nexthop_rif_init(nh, rif);
5075
5076	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
5077	if (err)
5078		goto err_nexthop_neigh_init;
5079
5080	return 0;
5081
5082err_nexthop_neigh_init:
5083	mlxsw_sp_nexthop_rif_fini(nh);
5084	return err;
5085}
5086
5087static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
5088					struct mlxsw_sp_nexthop *nh)
5089{
5090	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
5091}
5092
5093static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
5094				  struct mlxsw_sp_nexthop_group *nh_grp,
5095				  struct mlxsw_sp_nexthop *nh,
5096				  const struct fib6_info *rt)
5097{
5098	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
5099
5100	nh->nh_grp = nh_grp;
5101	nh->nh_weight = rt->fib6_nh->fib_nh_weight;
5102	memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
5103	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5104
5105	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5106
5107	if (!dev)
5108		return 0;
5109	nh->ifindex = dev->ifindex;
5110
5111	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
5112}
5113
5114static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
5115				   struct mlxsw_sp_nexthop *nh)
5116{
5117	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
5118	list_del(&nh->router_list_node);
5119	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5120}
5121
5122static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5123				    const struct fib6_info *rt)
5124{
5125	return rt->fib6_nh->fib_nh_gw_family ||
5126	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
5127}
5128
5129static struct mlxsw_sp_nexthop_group *
5130mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
5131			       struct mlxsw_sp_fib6_entry *fib6_entry)
5132{
5133	struct mlxsw_sp_nexthop_group *nh_grp;
5134	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5135	struct mlxsw_sp_nexthop *nh;
5136	int i = 0;
5137	int err;
5138
5139	nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6),
5140			 GFP_KERNEL);
5141	if (!nh_grp)
5142		return ERR_PTR(-ENOMEM);
5143	INIT_LIST_HEAD(&nh_grp->fib_list);
5144#if IS_ENABLED(CONFIG_IPV6)
5145	nh_grp->neigh_tbl = &nd_tbl;
5146#endif
5147	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
5148					struct mlxsw_sp_rt6, list);
5149	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
5150	nh_grp->count = fib6_entry->nrt6;
5151	for (i = 0; i < nh_grp->count; i++) {
5152		struct fib6_info *rt = mlxsw_sp_rt6->rt;
5153
5154		nh = &nh_grp->nexthops[i];
5155		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
5156		if (err)
5157			goto err_nexthop6_init;
5158		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
5159	}
5160
5161	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5162	if (err)
5163		goto err_nexthop_group_insert;
5164
5165	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5166	return nh_grp;
5167
5168err_nexthop_group_insert:
5169err_nexthop6_init:
5170	for (i--; i >= 0; i--) {
5171		nh = &nh_grp->nexthops[i];
5172		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5173	}
5174	kfree(nh_grp);
5175	return ERR_PTR(err);
5176}
5177
5178static void
5179mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5180				struct mlxsw_sp_nexthop_group *nh_grp)
5181{
5182	struct mlxsw_sp_nexthop *nh;
5183	int i = nh_grp->count;
5184
5185	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5186	for (i--; i >= 0; i--) {
5187		nh = &nh_grp->nexthops[i];
5188		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5189	}
5190	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5191	WARN_ON(nh_grp->adj_index_valid);
5192	kfree(nh_grp);
5193}
5194
5195static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5196				       struct mlxsw_sp_fib6_entry *fib6_entry)
5197{
5198	struct mlxsw_sp_nexthop_group *nh_grp;
5199
5200	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5201	if (!nh_grp) {
5202		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5203		if (IS_ERR(nh_grp))
5204			return PTR_ERR(nh_grp);
5205	}
5206
5207	list_add_tail(&fib6_entry->common.nexthop_group_node,
5208		      &nh_grp->fib_list);
5209	fib6_entry->common.nh_group = nh_grp;
5210
5211	return 0;
5212}
5213
5214static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5215					struct mlxsw_sp_fib_entry *fib_entry)
5216{
5217	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5218
5219	list_del(&fib_entry->nexthop_group_node);
5220	if (!list_empty(&nh_grp->fib_list))
5221		return;
5222	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5223}
5224
5225static int
5226mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5227			       struct mlxsw_sp_fib6_entry *fib6_entry)
5228{
5229	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5230	int err;
5231
5232	fib6_entry->common.nh_group = NULL;
5233	list_del(&fib6_entry->common.nexthop_group_node);
5234
5235	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5236	if (err)
5237		goto err_nexthop6_group_get;
5238
5239	/* In case this entry is offloaded, then the adjacency index
5240	 * currently associated with it in the device's table is that
5241	 * of the old group. Start using the new one instead.
5242	 */
5243	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5244	if (err)
5245		goto err_fib_node_entry_add;
5246
5247	if (list_empty(&old_nh_grp->fib_list))
5248		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5249
5250	return 0;
5251
5252err_fib_node_entry_add:
5253	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5254err_nexthop6_group_get:
5255	list_add_tail(&fib6_entry->common.nexthop_group_node,
5256		      &old_nh_grp->fib_list);
5257	fib6_entry->common.nh_group = old_nh_grp;
5258	return err;
5259}
5260
5261static int
5262mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5263				struct mlxsw_sp_fib6_entry *fib6_entry,
5264				struct fib6_info **rt_arr, unsigned int nrt6)
5265{
5266	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5267	int err, i;
5268
5269	for (i = 0; i < nrt6; i++) {
5270		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
5271		if (IS_ERR(mlxsw_sp_rt6)) {
5272			err = PTR_ERR(mlxsw_sp_rt6);
5273			goto err_rt6_create;
5274		}
5275
5276		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5277		fib6_entry->nrt6++;
5278	}
5279
5280	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5281	if (err)
5282		goto err_nexthop6_group_update;
5283
5284	return 0;
5285
5286err_nexthop6_group_update:
5287	i = nrt6;
5288err_rt6_create:
5289	for (i--; i >= 0; i--) {
5290		fib6_entry->nrt6--;
5291		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
5292					       struct mlxsw_sp_rt6, list);
5293		list_del(&mlxsw_sp_rt6->list);
5294		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5295	}
5296	return err;
5297}
5298
5299static void
5300mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5301				struct mlxsw_sp_fib6_entry *fib6_entry,
5302				struct fib6_info **rt_arr, unsigned int nrt6)
5303{
5304	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5305	int i;
5306
5307	for (i = 0; i < nrt6; i++) {
5308		mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
5309							   rt_arr[i]);
5310		if (WARN_ON_ONCE(!mlxsw_sp_rt6))
5311			continue;
5312
5313		fib6_entry->nrt6--;
5314		list_del(&mlxsw_sp_rt6->list);
5315		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5316	}
5317
5318	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5319}
5320
5321static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5322					 struct mlxsw_sp_fib_entry *fib_entry,
5323					 const struct fib6_info *rt)
5324{
5325	/* Packets hitting RTF_REJECT routes need to be discarded by the
5326	 * stack. We can rely on their destination device not having a
5327	 * RIF (it's the loopback device) and can thus use action type
5328	 * local, which will cause them to be trapped with a lower
5329	 * priority than packets that need to be locally received.
5330	 */
5331	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5332		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5333	else if (rt->fib6_type == RTN_BLACKHOLE)
5334		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5335	else if (rt->fib6_flags & RTF_REJECT)
5336		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5337	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5338		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5339	else
5340		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5341}
5342
5343static void
5344mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5345{
5346	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5347
5348	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5349				 list) {
5350		fib6_entry->nrt6--;
5351		list_del(&mlxsw_sp_rt6->list);
5352		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5353	}
5354}
5355
5356static struct mlxsw_sp_fib6_entry *
5357mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5358			   struct mlxsw_sp_fib_node *fib_node,
5359			   struct fib6_info **rt_arr, unsigned int nrt6)
5360{
5361	struct mlxsw_sp_fib6_entry *fib6_entry;
5362	struct mlxsw_sp_fib_entry *fib_entry;
5363	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5364	int err, i;
5365
5366	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5367	if (!fib6_entry)
5368		return ERR_PTR(-ENOMEM);
5369	fib_entry = &fib6_entry->common;
5370
5371	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5372
5373	for (i = 0; i < nrt6; i++) {
5374		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
5375		if (IS_ERR(mlxsw_sp_rt6)) {
5376			err = PTR_ERR(mlxsw_sp_rt6);
5377			goto err_rt6_create;
5378		}
5379		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5380		fib6_entry->nrt6++;
5381	}
5382
5383	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
5384
5385	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5386	if (err)
5387		goto err_nexthop6_group_get;
5388
5389	fib_entry->fib_node = fib_node;
5390
5391	return fib6_entry;
5392
5393err_nexthop6_group_get:
5394	i = nrt6;
5395err_rt6_create:
5396	for (i--; i >= 0; i--) {
5397		fib6_entry->nrt6--;
5398		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
5399					       struct mlxsw_sp_rt6, list);
5400		list_del(&mlxsw_sp_rt6->list);
5401		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5402	}
5403	kfree(fib6_entry);
5404	return ERR_PTR(err);
5405}
5406
5407static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5408					struct mlxsw_sp_fib6_entry *fib6_entry)
5409{
5410	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5411	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5412	WARN_ON(fib6_entry->nrt6);
5413	kfree(fib6_entry);
5414}
5415
5416static struct mlxsw_sp_fib6_entry *
5417mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5418			      const struct fib6_info *nrt, bool replace)
5419{
5420	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5421
5422	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5423		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5424
5425		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5426			continue;
5427		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5428			break;
5429		if (replace && rt->fib6_metric == nrt->fib6_metric) {
5430			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5431			    mlxsw_sp_fib6_rt_can_mp(nrt))
5432				return fib6_entry;
5433			if (mlxsw_sp_fib6_rt_can_mp(nrt))
5434				fallback = fallback ?: fib6_entry;
5435		}
5436		if (rt->fib6_metric > nrt->fib6_metric)
5437			return fallback ?: fib6_entry;
5438	}
5439
5440	return fallback;
5441}
5442
5443static int
5444mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5445			       bool *p_replace)
5446{
5447	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5448	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5449	struct mlxsw_sp_fib6_entry *fib6_entry;
5450
5451	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace);
5452
5453	if (*p_replace && !fib6_entry)
5454		*p_replace = false;
5455
5456	if (fib6_entry) {
5457		list_add_tail(&new6_entry->common.list,
5458			      &fib6_entry->common.list);
5459	} else {
5460		struct mlxsw_sp_fib6_entry *last;
5461
5462		list_for_each_entry(last, &fib_node->entry_list, common.list) {
5463			struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5464
5465			if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5466				break;
5467			fib6_entry = last;
5468		}
5469
5470		if (fib6_entry)
5471			list_add(&new6_entry->common.list,
5472				 &fib6_entry->common.list);
5473		else
5474			list_add(&new6_entry->common.list,
5475				 &fib_node->entry_list);
5476	}
5477
5478	return 0;
5479}
5480
5481static void
5482mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5483{
5484	list_del(&fib6_entry->common.list);
5485}
5486
5487static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5488					 struct mlxsw_sp_fib6_entry *fib6_entry,
5489					 bool *p_replace)
5490{
5491	int err;
5492
5493	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace);
5494	if (err)
5495		return err;
5496
5497	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5498	if (err)
5499		goto err_fib_node_entry_add;
5500
5501	return 0;
5502
5503err_fib_node_entry_add:
5504	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5505	return err;
5506}
5507
5508static void
5509mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5510				struct mlxsw_sp_fib6_entry *fib6_entry)
5511{
5512	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5513	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5514}
5515
5516static struct mlxsw_sp_fib6_entry *
5517mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5518			   const struct fib6_info *rt)
5519{
5520	struct mlxsw_sp_fib6_entry *fib6_entry;
5521	struct mlxsw_sp_fib_node *fib_node;
5522	struct mlxsw_sp_fib *fib;
5523	struct mlxsw_sp_vr *vr;
5524
5525	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5526	if (!vr)
5527		return NULL;
5528	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5529
5530	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5531					    sizeof(rt->fib6_dst.addr),
5532					    rt->fib6_dst.plen);
5533	if (!fib_node)
5534		return NULL;
5535
5536	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5537		struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5538
5539		if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5540		    rt->fib6_metric == iter_rt->fib6_metric &&
5541		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5542			return fib6_entry;
5543	}
5544
5545	return NULL;
5546}
5547
5548static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5549					struct mlxsw_sp_fib6_entry *fib6_entry,
5550					bool replace)
5551{
5552	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5553	struct mlxsw_sp_fib6_entry *replaced;
5554
5555	if (!replace)
5556		return;
5557
5558	replaced = list_next_entry(fib6_entry, common.list);
5559
5560	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5561	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5562	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5563}
5564
5565static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5566				    struct fib6_info **rt_arr,
5567				    unsigned int nrt6, bool replace)
5568{
5569	struct mlxsw_sp_fib6_entry *fib6_entry;
5570	struct mlxsw_sp_fib_node *fib_node;
5571	struct fib6_info *rt = rt_arr[0];
5572	int err;
5573
5574	if (mlxsw_sp->router->aborted)
5575		return 0;
5576
5577	if (rt->fib6_src.plen)
5578		return -EINVAL;
5579
5580	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5581		return 0;
5582
5583	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5584					 &rt->fib6_dst.addr,
5585					 sizeof(rt->fib6_dst.addr),
5586					 rt->fib6_dst.plen,
5587					 MLXSW_SP_L3_PROTO_IPV6);
5588	if (IS_ERR(fib_node))
5589		return PTR_ERR(fib_node);
5590
5591	/* Before creating a new entry, try to append route to an existing
5592	 * multipath entry.
5593	 */
5594	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5595	if (fib6_entry) {
5596		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry,
5597						      rt_arr, nrt6);
5598		if (err)
5599			goto err_fib6_entry_nexthop_add;
5600		return 0;
5601	}
5602
5603	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
5604						nrt6);
5605	if (IS_ERR(fib6_entry)) {
5606		err = PTR_ERR(fib6_entry);
5607		goto err_fib6_entry_create;
5608	}
5609
5610	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace);
5611	if (err)
5612		goto err_fib6_node_entry_link;
5613
5614	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5615
5616	return 0;
5617
5618err_fib6_node_entry_link:
5619	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5620err_fib6_entry_create:
5621err_fib6_entry_nexthop_add:
5622	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5623	return err;
5624}
5625
5626static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5627				     struct fib6_info **rt_arr,
5628				     unsigned int nrt6)
5629{
5630	struct mlxsw_sp_fib6_entry *fib6_entry;
5631	struct mlxsw_sp_fib_node *fib_node;
5632	struct fib6_info *rt = rt_arr[0];
5633
5634	if (mlxsw_sp->router->aborted)
5635		return;
5636
5637	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5638		return;
5639
5640	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5641	if (WARN_ON(!fib6_entry))
5642		return;
5643
5644	/* If not all the nexthops are deleted, then only reduce the nexthop
5645	 * group.
5646	 */
5647	if (nrt6 != fib6_entry->nrt6) {
5648		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
5649						nrt6);
5650		return;
5651	}
5652
5653	fib_node = fib6_entry->common.fib_node;
5654
5655	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5656	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5657	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5658}
5659
5660static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5661					    enum mlxsw_reg_ralxx_protocol proto,
5662					    u8 tree_id)
5663{
5664	char ralta_pl[MLXSW_REG_RALTA_LEN];
5665	char ralst_pl[MLXSW_REG_RALST_LEN];
5666	int i, err;
5667
5668	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5669	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5670	if (err)
5671		return err;
5672
5673	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5674	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5675	if (err)
5676		return err;
5677
5678	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5679		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5680		char raltb_pl[MLXSW_REG_RALTB_LEN];
5681		char ralue_pl[MLXSW_REG_RALUE_LEN];
5682
5683		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5684		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5685				      raltb_pl);
5686		if (err)
5687			return err;
5688
5689		mlxsw_reg_ralue_pack(ralue_pl, proto,
5690				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5691		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5692		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5693				      ralue_pl);
5694		if (err)
5695			return err;
5696	}
5697
5698	return 0;
5699}
5700
5701static struct mlxsw_sp_mr_table *
5702mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5703{
5704	if (family == RTNL_FAMILY_IPMR)
5705		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5706	else
5707		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5708}
5709
5710static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5711				     struct mfc_entry_notifier_info *men_info,
5712				     bool replace)
5713{
5714	struct mlxsw_sp_mr_table *mrt;
5715	struct mlxsw_sp_vr *vr;
5716
5717	if (mlxsw_sp->router->aborted)
5718		return 0;
5719
5720	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5721	if (IS_ERR(vr))
5722		return PTR_ERR(vr);
5723
5724	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5725	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5726}
5727
5728static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5729				      struct mfc_entry_notifier_info *men_info)
5730{
5731	struct mlxsw_sp_mr_table *mrt;
5732	struct mlxsw_sp_vr *vr;
5733
5734	if (mlxsw_sp->router->aborted)
5735		return;
5736
5737	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5738	if (WARN_ON(!vr))
5739		return;
5740
5741	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5742	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5743	mlxsw_sp_vr_put(mlxsw_sp, vr);
5744}
5745
5746static int
5747mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5748			      struct vif_entry_notifier_info *ven_info)
5749{
5750	struct mlxsw_sp_mr_table *mrt;
5751	struct mlxsw_sp_rif *rif;
5752	struct mlxsw_sp_vr *vr;
5753
5754	if (mlxsw_sp->router->aborted)
5755		return 0;
5756
5757	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5758	if (IS_ERR(vr))
5759		return PTR_ERR(vr);
5760
5761	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5762	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5763	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5764				   ven_info->vif_index,
5765				   ven_info->vif_flags, rif);
5766}
5767
5768static void
5769mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5770			      struct vif_entry_notifier_info *ven_info)
5771{
5772	struct mlxsw_sp_mr_table *mrt;
5773	struct mlxsw_sp_vr *vr;
5774
5775	if (mlxsw_sp->router->aborted)
5776		return;
5777
5778	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5779	if (WARN_ON(!vr))
5780		return;
5781
5782	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5783	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5784	mlxsw_sp_vr_put(mlxsw_sp, vr);
5785}
5786
5787static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5788{
5789	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5790	int err;
5791
5792	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5793					       MLXSW_SP_LPM_TREE_MIN);
5794	if (err)
5795		return err;
5796
5797	/* The multicast router code does not need an abort trap as by default,
5798	 * packets that don't match any routes are trapped to the CPU.
5799	 */
5800
5801	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5802	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5803						MLXSW_SP_LPM_TREE_MIN + 1);
5804}
5805
5806static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5807				     struct mlxsw_sp_fib_node *fib_node)
5808{
5809	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5810
5811	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5812				 common.list) {
5813		bool do_break = &tmp->common.list == &fib_node->entry_list;
5814
5815		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5816		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5817		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5818		/* Break when entry list is empty and node was freed.
5819		 * Otherwise, we'll access freed memory in the next
5820		 * iteration.
5821		 */
5822		if (do_break)
5823			break;
5824	}
5825}
5826
5827static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5828				     struct mlxsw_sp_fib_node *fib_node)
5829{
5830	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5831
5832	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5833				 common.list) {
5834		bool do_break = &tmp->common.list == &fib_node->entry_list;
5835
5836		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5837		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5838		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5839		if (do_break)
5840			break;
5841	}
5842}
5843
5844static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5845				    struct mlxsw_sp_fib_node *fib_node)
5846{
5847	switch (fib_node->fib->proto) {
5848	case MLXSW_SP_L3_PROTO_IPV4:
5849		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5850		break;
5851	case MLXSW_SP_L3_PROTO_IPV6:
5852		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5853		break;
5854	}
5855}
5856
5857static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5858				  struct mlxsw_sp_vr *vr,
5859				  enum mlxsw_sp_l3proto proto)
5860{
5861	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5862	struct mlxsw_sp_fib_node *fib_node, *tmp;
5863
5864	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5865		bool do_break = &tmp->list == &fib->node_list;
5866
5867		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5868		if (do_break)
5869			break;
5870	}
5871}
5872
5873static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5874{
5875	int i, j;
5876
5877	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5878		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5879
5880		if (!mlxsw_sp_vr_is_used(vr))
5881			continue;
5882
5883		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5884			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5885		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5886
5887		/* If virtual router was only used for IPv4, then it's no
5888		 * longer used.
5889		 */
5890		if (!mlxsw_sp_vr_is_used(vr))
5891			continue;
5892		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5893	}
5894}
5895
5896static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5897{
5898	int err;
5899
5900	if (mlxsw_sp->router->aborted)
5901		return;
5902	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5903	mlxsw_sp_router_fib_flush(mlxsw_sp);
5904	mlxsw_sp->router->aborted = true;
5905	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5906	if (err)
5907		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5908}
5909
5910struct mlxsw_sp_fib6_event_work {
5911	struct fib6_info **rt_arr;
5912	unsigned int nrt6;
5913};
5914
5915struct mlxsw_sp_fib_event_work {
5916	struct work_struct work;
5917	union {
5918		struct mlxsw_sp_fib6_event_work fib6_work;
5919		struct fib_entry_notifier_info fen_info;
5920		struct fib_rule_notifier_info fr_info;
5921		struct fib_nh_notifier_info fnh_info;
5922		struct mfc_entry_notifier_info men_info;
5923		struct vif_entry_notifier_info ven_info;
5924	};
5925	struct mlxsw_sp *mlxsw_sp;
5926	unsigned long event;
5927};
5928
5929static int
5930mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
5931			       struct fib6_entry_notifier_info *fen6_info)
5932{
5933	struct fib6_info *rt = fen6_info->rt;
5934	struct fib6_info **rt_arr;
5935	struct fib6_info *iter;
5936	unsigned int nrt6;
5937	int i = 0;
5938
5939	nrt6 = fen6_info->nsiblings + 1;
5940
5941	rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
5942	if (!rt_arr)
5943		return -ENOMEM;
5944
5945	fib6_work->rt_arr = rt_arr;
5946	fib6_work->nrt6 = nrt6;
5947
5948	rt_arr[0] = rt;
5949	fib6_info_hold(rt);
5950
5951	if (!fen6_info->nsiblings)
5952		return 0;
5953
5954	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
5955		if (i == fen6_info->nsiblings)
5956			break;
5957
5958		rt_arr[i + 1] = iter;
5959		fib6_info_hold(iter);
5960		i++;
5961	}
5962	WARN_ON_ONCE(i != fen6_info->nsiblings);
5963
5964	return 0;
5965}
5966
5967static void
5968mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
5969{
5970	int i;
5971
5972	for (i = 0; i < fib6_work->nrt6; i++)
5973		mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
5974	kfree(fib6_work->rt_arr);
5975}
5976
5977static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5978{
5979	struct mlxsw_sp_fib_event_work *fib_work =
5980		container_of(work, struct mlxsw_sp_fib_event_work, work);
5981	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5982	bool replace, append;
5983	int err;
5984
5985	/* Protect internal structures from changes */
5986	rtnl_lock();
5987	mlxsw_sp_span_respin(mlxsw_sp);
5988
5989	switch (fib_work->event) {
5990	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5991	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5992	case FIB_EVENT_ENTRY_ADD:
5993		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5994		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5995		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5996					       replace, append);
5997		if (err)
5998			mlxsw_sp_router_fib_abort(mlxsw_sp);
5999		fib_info_put(fib_work->fen_info.fi);
6000		break;
6001	case FIB_EVENT_ENTRY_DEL:
6002		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
6003		fib_info_put(fib_work->fen_info.fi);
6004		break;
6005	case FIB_EVENT_RULE_ADD:
6006		/* if we get here, a rule was added that we do not support.
6007		 * just do the fib_abort
6008		 */
6009		mlxsw_sp_router_fib_abort(mlxsw_sp);
6010		break;
6011	case FIB_EVENT_NH_ADD: /* fall through */
6012	case FIB_EVENT_NH_DEL:
6013		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
6014					fib_work->fnh_info.fib_nh);
6015		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
6016		break;
6017	}
6018	rtnl_unlock();
6019	kfree(fib_work);
6020}
6021
6022static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
6023{
6024	struct mlxsw_sp_fib_event_work *fib_work =
6025		container_of(work, struct mlxsw_sp_fib_event_work, work);
6026	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
6027	bool replace;
6028	int err;
6029
6030	rtnl_lock();
6031	mlxsw_sp_span_respin(mlxsw_sp);
6032
6033	switch (fib_work->event) {
6034	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6035	case FIB_EVENT_ENTRY_ADD:
6036		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
6037		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
6038					       fib_work->fib6_work.rt_arr,
6039					       fib_work->fib6_work.nrt6,
6040					       replace);
6041		if (err)
6042			mlxsw_sp_router_fib_abort(mlxsw_sp);
6043		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
6044		break;
6045	case FIB_EVENT_ENTRY_DEL:
6046		mlxsw_sp_router_fib6_del(mlxsw_sp,
6047					 fib_work->fib6_work.rt_arr,
6048					 fib_work->fib6_work.nrt6);
6049		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
6050		break;
6051	case FIB_EVENT_RULE_ADD:
6052		/* if we get here, a rule was added that we do not support.
6053		 * just do the fib_abort
6054		 */
6055		mlxsw_sp_router_fib_abort(mlxsw_sp);
6056		break;
6057	}
6058	rtnl_unlock();
6059	kfree(fib_work);
6060}
6061
6062static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
6063{
6064	struct mlxsw_sp_fib_event_work *fib_work =
6065		container_of(work, struct mlxsw_sp_fib_event_work, work);
6066	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
6067	bool replace;
6068	int err;
6069
6070	rtnl_lock();
6071	switch (fib_work->event) {
6072	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6073	case FIB_EVENT_ENTRY_ADD:
6074		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
6075
6076		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
6077						replace);
6078		if (err)
6079			mlxsw_sp_router_fib_abort(mlxsw_sp);
6080		mr_cache_put(fib_work->men_info.mfc);
6081		break;
6082	case FIB_EVENT_ENTRY_DEL:
6083		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
6084		mr_cache_put(fib_work->men_info.mfc);
6085		break;
6086	case FIB_EVENT_VIF_ADD:
6087		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
6088						    &fib_work->ven_info);
6089		if (err)
6090			mlxsw_sp_router_fib_abort(mlxsw_sp);
6091		dev_put(fib_work->ven_info.dev);
6092		break;
6093	case FIB_EVENT_VIF_DEL:
6094		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
6095					      &fib_work->ven_info);
6096		dev_put(fib_work->ven_info.dev);
6097		break;
6098	case FIB_EVENT_RULE_ADD:
6099		/* if we get here, a rule was added that we do not support.
6100		 * just do the fib_abort
6101		 */
6102		mlxsw_sp_router_fib_abort(mlxsw_sp);
6103		break;
6104	}
6105	rtnl_unlock();
6106	kfree(fib_work);
6107}
6108
6109static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
6110				       struct fib_notifier_info *info)
6111{
6112	struct fib_entry_notifier_info *fen_info;
6113	struct fib_nh_notifier_info *fnh_info;
6114
6115	switch (fib_work->event) {
6116	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6117	case FIB_EVENT_ENTRY_APPEND: /* fall through */
6118	case FIB_EVENT_ENTRY_ADD: /* fall through */
6119	case FIB_EVENT_ENTRY_DEL:
6120		fen_info = container_of(info, struct fib_entry_notifier_info,
6121					info);
6122		fib_work->fen_info = *fen_info;
6123		/* Take reference on fib_info to prevent it from being
6124		 * freed while work is queued. Release it afterwards.
6125		 */
6126		fib_info_hold(fib_work->fen_info.fi);
6127		break;
6128	case FIB_EVENT_NH_ADD: /* fall through */
6129	case FIB_EVENT_NH_DEL:
6130		fnh_info = container_of(info, struct fib_nh_notifier_info,
6131					info);
6132		fib_work->fnh_info = *fnh_info;
6133		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
6134		break;
6135	}
6136}
6137
6138static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
6139				      struct fib_notifier_info *info)
6140{
6141	struct fib6_entry_notifier_info *fen6_info;
6142	int err;
6143
6144	switch (fib_work->event) {
6145	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6146	case FIB_EVENT_ENTRY_ADD: /* fall through */
6147	case FIB_EVENT_ENTRY_DEL:
6148		fen6_info = container_of(info, struct fib6_entry_notifier_info,
6149					 info);
6150		err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
6151						     fen6_info);
6152		if (err)
6153			return err;
6154		break;
6155	}
6156
6157	return 0;
6158}
6159
6160static void
6161mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
6162			    struct fib_notifier_info *info)
6163{
6164	switch (fib_work->event) {
6165	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6166	case FIB_EVENT_ENTRY_ADD: /* fall through */
6167	case FIB_EVENT_ENTRY_DEL:
6168		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
6169		mr_cache_hold(fib_work->men_info.mfc);
6170		break;
6171	case FIB_EVENT_VIF_ADD: /* fall through */
6172	case FIB_EVENT_VIF_DEL:
6173		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
6174		dev_hold(fib_work->ven_info.dev);
6175		break;
6176	}
6177}
6178
6179static int mlxsw_sp_router_fib_rule_event(unsigned long event,
6180					  struct fib_notifier_info *info,
6181					  struct mlxsw_sp *mlxsw_sp)
6182{
6183	struct netlink_ext_ack *extack = info->extack;
6184	struct fib_rule_notifier_info *fr_info;
6185	struct fib_rule *rule;
6186	int err = 0;
6187
6188	/* nothing to do at the moment */
6189	if (event == FIB_EVENT_RULE_DEL)
6190		return 0;
6191
6192	if (mlxsw_sp->router->aborted)
6193		return 0;
6194
6195	fr_info = container_of(info, struct fib_rule_notifier_info, info);
6196	rule = fr_info->rule;
6197
6198	/* Rule only affects locally generated traffic */
6199	if (rule->iifindex == info->net->loopback_dev->ifindex)
6200		return 0;
6201
6202	switch (info->family) {
6203	case AF_INET:
6204		if (!fib4_rule_default(rule) && !rule->l3mdev)
6205			err = -EOPNOTSUPP;
6206		break;
6207	case AF_INET6:
6208		if (!fib6_rule_default(rule) && !rule->l3mdev)
6209			err = -EOPNOTSUPP;
6210		break;
6211	case RTNL_FAMILY_IPMR:
6212		if (!ipmr_rule_default(rule) && !rule->l3mdev)
6213			err = -EOPNOTSUPP;
6214		break;
6215	case RTNL_FAMILY_IP6MR:
6216		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
6217			err = -EOPNOTSUPP;
6218		break;
6219	}
6220
6221	if (err < 0)
6222		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
6223
6224	return err;
6225}
6226
6227/* Called with rcu_read_lock() */
6228static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
6229				     unsigned long event, void *ptr)
6230{
6231	struct mlxsw_sp_fib_event_work *fib_work;
6232	struct fib_notifier_info *info = ptr;
6233	struct mlxsw_sp_router *router;
6234	int err;
6235
6236	if (!net_eq(info->net, &init_net) ||
6237	    (info->family != AF_INET && info->family != AF_INET6 &&
6238	     info->family != RTNL_FAMILY_IPMR &&
6239	     info->family != RTNL_FAMILY_IP6MR))
6240		return NOTIFY_DONE;
6241
6242	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6243
6244	switch (event) {
6245	case FIB_EVENT_RULE_ADD: /* fall through */
6246	case FIB_EVENT_RULE_DEL:
6247		err = mlxsw_sp_router_fib_rule_event(event, info,
6248						     router->mlxsw_sp);
6249		if (!err || info->extack)
6250			return notifier_from_errno(err);
6251		break;
6252	case FIB_EVENT_ENTRY_ADD:
6253	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6254	case FIB_EVENT_ENTRY_APPEND:  /* fall through */
6255		if (router->aborted) {
6256			NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
6257			return notifier_from_errno(-EINVAL);
6258		}
6259		if (info->family == AF_INET) {
6260			struct fib_entry_notifier_info *fen_info = ptr;
6261
6262			if (fen_info->fi->fib_nh_is_v6) {
6263				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
6264				return notifier_from_errno(-EINVAL);
6265			}
6266			if (fen_info->fi->nh) {
6267				NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
6268				return notifier_from_errno(-EINVAL);
6269			}
6270		} else if (info->family == AF_INET6) {
6271			struct fib6_entry_notifier_info *fen6_info;
6272
6273			fen6_info = container_of(info,
6274						 struct fib6_entry_notifier_info,
6275						 info);
6276			if (fen6_info->rt->nh) {
6277				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported");
6278				return notifier_from_errno(-EINVAL);
6279			}
6280		}
6281		break;
6282	}
6283
6284	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
6285	if (WARN_ON(!fib_work))
6286		return NOTIFY_BAD;
6287
6288	fib_work->mlxsw_sp = router->mlxsw_sp;
6289	fib_work->event = event;
6290
6291	switch (info->family) {
6292	case AF_INET:
6293		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
6294		mlxsw_sp_router_fib4_event(fib_work, info);
6295		break;
6296	case AF_INET6:
6297		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
6298		err = mlxsw_sp_router_fib6_event(fib_work, info);
6299		if (err)
6300			goto err_fib_event;
6301		break;
6302	case RTNL_FAMILY_IP6MR:
6303	case RTNL_FAMILY_IPMR:
6304		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6305		mlxsw_sp_router_fibmr_event(fib_work, info);
6306		break;
6307	}
6308
6309	mlxsw_core_schedule_work(&fib_work->work);
6310
6311	return NOTIFY_DONE;
6312
6313err_fib_event:
6314	kfree(fib_work);
6315	return NOTIFY_BAD;
6316}
6317
6318struct mlxsw_sp_rif *
6319mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6320			 const struct net_device *dev)
6321{
6322	int i;
6323
6324	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6325		if (mlxsw_sp->router->rifs[i] &&
6326		    mlxsw_sp->router->rifs[i]->dev == dev)
6327			return mlxsw_sp->router->rifs[i];
6328
6329	return NULL;
6330}
6331
6332static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6333{
6334	char ritr_pl[MLXSW_REG_RITR_LEN];
6335	int err;
6336
6337	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6338	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6339	if (err)
6340		return err;
6341
6342	mlxsw_reg_ritr_enable_set(ritr_pl, false);
6343	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6344}
6345
6346static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6347					  struct mlxsw_sp_rif *rif)
6348{
6349	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6350	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6351	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6352}
6353
6354static bool
6355mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6356			   unsigned long event)
6357{
6358	struct inet6_dev *inet6_dev;
6359	bool addr_list_empty = true;
6360	struct in_device *idev;
6361
6362	switch (event) {
6363	case NETDEV_UP:
6364		return rif == NULL;
6365	case NETDEV_DOWN:
6366		idev = __in_dev_get_rtnl(dev);
6367		if (idev && idev->ifa_list)
6368			addr_list_empty = false;
6369
6370		inet6_dev = __in6_dev_get(dev);
6371		if (addr_list_empty && inet6_dev &&
6372		    !list_empty(&inet6_dev->addr_list))
6373			addr_list_empty = false;
6374
6375		/* macvlans do not have a RIF, but rather piggy back on the
6376		 * RIF of their lower device.
6377		 */
6378		if (netif_is_macvlan(dev) && addr_list_empty)
6379			return true;
6380
6381		if (rif && addr_list_empty &&
6382		    !netif_is_l3_slave(rif->dev))
6383			return true;
6384		/* It is possible we already removed the RIF ourselves
6385		 * if it was assigned to a netdev that is now a bridge
6386		 * or LAG slave.
6387		 */
6388		return false;
6389	}
6390
6391	return false;
6392}
6393
6394static enum mlxsw_sp_rif_type
6395mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6396		      const struct net_device *dev)
6397{
6398	enum mlxsw_sp_fid_type type;
6399
6400	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6401		return MLXSW_SP_RIF_TYPE_IPIP_LB;
6402
6403	/* Otherwise RIF type is derived from the type of the underlying FID. */
6404	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6405		type = MLXSW_SP_FID_TYPE_8021Q;
6406	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6407		type = MLXSW_SP_FID_TYPE_8021Q;
6408	else if (netif_is_bridge_master(dev))
6409		type = MLXSW_SP_FID_TYPE_8021D;
6410	else
6411		type = MLXSW_SP_FID_TYPE_RFID;
6412
6413	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6414}
6415
6416static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6417{
6418	int i;
6419
6420	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6421		if (!mlxsw_sp->router->rifs[i]) {
6422			*p_rif_index = i;
6423			return 0;
6424		}
6425	}
6426
6427	return -ENOBUFS;
6428}
6429
6430static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6431					       u16 vr_id,
6432					       struct net_device *l3_dev)
6433{
6434	struct mlxsw_sp_rif *rif;
6435
6436	rif = kzalloc(rif_size, GFP_KERNEL);
6437	if (!rif)
6438		return NULL;
6439
6440	INIT_LIST_HEAD(&rif->nexthop_list);
6441	INIT_LIST_HEAD(&rif->neigh_list);
6442	if (l3_dev) {
6443		ether_addr_copy(rif->addr, l3_dev->dev_addr);
6444		rif->mtu = l3_dev->mtu;
6445		rif->dev = l3_dev;
6446	}
6447	rif->vr_id = vr_id;
6448	rif->rif_index = rif_index;
6449
6450	return rif;
6451}
6452
6453struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6454					   u16 rif_index)
6455{
6456	return mlxsw_sp->router->rifs[rif_index];
6457}
6458
6459u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6460{
6461	return rif->rif_index;
6462}
6463
6464u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6465{
6466	return lb_rif->common.rif_index;
6467}
6468
6469u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6470{
6471	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
6472	struct mlxsw_sp_vr *ul_vr;
6473
6474	ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
6475	if (WARN_ON(IS_ERR(ul_vr)))
6476		return 0;
6477
6478	return ul_vr->id;
6479}
6480
6481u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6482{
6483	return lb_rif->ul_rif_id;
6484}
6485
6486int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6487{
6488	return rif->dev->ifindex;
6489}
6490
6491const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6492{
6493	return rif->dev;
6494}
6495
6496struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6497{
6498	return rif->fid;
6499}
6500
6501static struct mlxsw_sp_rif *
6502mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6503		    const struct mlxsw_sp_rif_params *params,
6504		    struct netlink_ext_ack *extack)
6505{
6506	u32 tb_id = l3mdev_fib_table(params->dev);
6507	const struct mlxsw_sp_rif_ops *ops;
6508	struct mlxsw_sp_fid *fid = NULL;
6509	enum mlxsw_sp_rif_type type;
6510	struct mlxsw_sp_rif *rif;
6511	struct mlxsw_sp_vr *vr;
6512	u16 rif_index;
6513	int i, err;
6514
6515	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6516	ops = mlxsw_sp->rif_ops_arr[type];
6517
6518	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6519	if (IS_ERR(vr))
6520		return ERR_CAST(vr);
6521	vr->rif_count++;
6522
6523	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6524	if (err) {
6525		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6526		goto err_rif_index_alloc;
6527	}
6528
6529	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6530	if (!rif) {
6531		err = -ENOMEM;
6532		goto err_rif_alloc;
6533	}
6534	dev_hold(rif->dev);
6535	mlxsw_sp->router->rifs[rif_index] = rif;
6536	rif->mlxsw_sp = mlxsw_sp;
6537	rif->ops = ops;
6538
6539	if (ops->fid_get) {
6540		fid = ops->fid_get(rif, extack);
6541		if (IS_ERR(fid)) {
6542			err = PTR_ERR(fid);
6543			goto err_fid_get;
6544		}
6545		rif->fid = fid;
6546	}
6547
6548	if (ops->setup)
6549		ops->setup(rif, params);
6550
6551	err = ops->configure(rif);
6552	if (err)
6553		goto err_configure;
6554
6555	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6556		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6557		if (err)
6558			goto err_mr_rif_add;
6559	}
6560
6561	mlxsw_sp_rif_counters_alloc(rif);
6562
6563	return rif;
6564
6565err_mr_rif_add:
6566	for (i--; i >= 0; i--)
6567		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6568	ops->deconfigure(rif);
6569err_configure:
6570	if (fid)
6571		mlxsw_sp_fid_put(fid);
6572err_fid_get:
6573	mlxsw_sp->router->rifs[rif_index] = NULL;
6574	dev_put(rif->dev);
6575	kfree(rif);
6576err_rif_alloc:
6577err_rif_index_alloc:
6578	vr->rif_count--;
6579	mlxsw_sp_vr_put(mlxsw_sp, vr);
6580	return ERR_PTR(err);
6581}
6582
6583static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6584{
6585	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6586	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6587	struct mlxsw_sp_fid *fid = rif->fid;
6588	struct mlxsw_sp_vr *vr;
6589	int i;
6590
6591	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6592	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6593
6594	mlxsw_sp_rif_counters_free(rif);
6595	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6596		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6597	ops->deconfigure(rif);
6598	if (fid)
6599		/* Loopback RIFs are not associated with a FID. */
6600		mlxsw_sp_fid_put(fid);
6601	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6602	dev_put(rif->dev);
6603	kfree(rif);
6604	vr->rif_count--;
6605	mlxsw_sp_vr_put(mlxsw_sp, vr);
6606}
6607
6608void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6609				 struct net_device *dev)
6610{
6611	struct mlxsw_sp_rif *rif;
6612
6613	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6614	if (!rif)
6615		return;
6616	mlxsw_sp_rif_destroy(rif);
6617}
6618
6619static void
6620mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6621				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6622{
6623	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6624
6625	params->vid = mlxsw_sp_port_vlan->vid;
6626	params->lag = mlxsw_sp_port->lagged;
6627	if (params->lag)
6628		params->lag_id = mlxsw_sp_port->lag_id;
6629	else
6630		params->system_port = mlxsw_sp_port->local_port;
6631}
6632
6633static struct mlxsw_sp_rif_subport *
6634mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6635{
6636	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6637}
6638
6639static struct mlxsw_sp_rif *
6640mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
6641			 const struct mlxsw_sp_rif_params *params,
6642			 struct netlink_ext_ack *extack)
6643{
6644	struct mlxsw_sp_rif_subport *rif_subport;
6645	struct mlxsw_sp_rif *rif;
6646
6647	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
6648	if (!rif)
6649		return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
6650
6651	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6652	refcount_inc(&rif_subport->ref_count);
6653	return rif;
6654}
6655
6656static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
6657{
6658	struct mlxsw_sp_rif_subport *rif_subport;
6659
6660	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6661	if (!refcount_dec_and_test(&rif_subport->ref_count))
6662		return;
6663
6664	mlxsw_sp_rif_destroy(rif);
6665}
6666
6667static int
6668mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6669			       struct net_device *l3_dev,
6670			       struct netlink_ext_ack *extack)
6671{
6672	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6673	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6674	struct mlxsw_sp_rif_params params = {
6675		.dev = l3_dev,
6676	};
6677	u16 vid = mlxsw_sp_port_vlan->vid;
6678	struct mlxsw_sp_rif *rif;
6679	struct mlxsw_sp_fid *fid;
6680	int err;
6681
6682	mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6683	rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
6684	if (IS_ERR(rif))
6685		return PTR_ERR(rif);
6686
6687	/* FID was already created, just take a reference */
6688	fid = rif->ops->fid_get(rif, extack);
6689	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6690	if (err)
6691		goto err_fid_port_vid_map;
6692
6693	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6694	if (err)
6695		goto err_port_vid_learning_set;
6696
6697	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6698					BR_STATE_FORWARDING);
6699	if (err)
6700		goto err_port_vid_stp_set;
6701
6702	mlxsw_sp_port_vlan->fid = fid;
6703
6704	return 0;
6705
6706err_port_vid_stp_set:
6707	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6708err_port_vid_learning_set:
6709	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6710err_fid_port_vid_map:
6711	mlxsw_sp_fid_put(fid);
6712	mlxsw_sp_rif_subport_put(rif);
6713	return err;
6714}
6715
6716void
6717mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6718{
6719	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6720	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6721	struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
6722	u16 vid = mlxsw_sp_port_vlan->vid;
6723
6724	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6725		return;
6726
6727	mlxsw_sp_port_vlan->fid = NULL;
6728	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6729	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6730	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6731	mlxsw_sp_fid_put(fid);
6732	mlxsw_sp_rif_subport_put(rif);
6733}
6734
6735static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6736					     struct net_device *port_dev,
6737					     unsigned long event, u16 vid,
6738					     struct netlink_ext_ack *extack)
6739{
6740	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6741	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6742
6743	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6744	if (WARN_ON(!mlxsw_sp_port_vlan))
6745		return -EINVAL;
6746
6747	switch (event) {
6748	case NETDEV_UP:
6749		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6750						      l3_dev, extack);
6751	case NETDEV_DOWN:
6752		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6753		break;
6754	}
6755
6756	return 0;
6757}
6758
6759static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6760					unsigned long event,
6761					struct netlink_ext_ack *extack)
6762{
6763	if (netif_is_bridge_port(port_dev) ||
6764	    netif_is_lag_port(port_dev) ||
6765	    netif_is_ovs_port(port_dev))
6766		return 0;
6767
6768	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
6769						 MLXSW_SP_DEFAULT_VID, extack);
6770}
6771
6772static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6773					 struct net_device *lag_dev,
6774					 unsigned long event, u16 vid,
6775					 struct netlink_ext_ack *extack)
6776{
6777	struct net_device *port_dev;
6778	struct list_head *iter;
6779	int err;
6780
6781	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6782		if (mlxsw_sp_port_dev_check(port_dev)) {
6783			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6784								port_dev,
6785								event, vid,
6786								extack);
6787			if (err)
6788				return err;
6789		}
6790	}
6791
6792	return 0;
6793}
6794
6795static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6796				       unsigned long event,
6797				       struct netlink_ext_ack *extack)
6798{
6799	if (netif_is_bridge_port(lag_dev))
6800		return 0;
6801
6802	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
6803					     MLXSW_SP_DEFAULT_VID, extack);
6804}
6805
6806static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
6807					  struct net_device *l3_dev,
6808					  unsigned long event,
6809					  struct netlink_ext_ack *extack)
6810{
6811	struct mlxsw_sp_rif_params params = {
6812		.dev = l3_dev,
6813	};
6814	struct mlxsw_sp_rif *rif;
6815
6816	switch (event) {
6817	case NETDEV_UP:
6818		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6819		if (IS_ERR(rif))
6820			return PTR_ERR(rif);
6821		break;
6822	case NETDEV_DOWN:
6823		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6824		mlxsw_sp_rif_destroy(rif);
6825		break;
6826	}
6827
6828	return 0;
6829}
6830
6831static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
6832					struct net_device *vlan_dev,
6833					unsigned long event,
6834					struct netlink_ext_ack *extack)
6835{
6836	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6837	u16 vid = vlan_dev_vlan_id(vlan_dev);
6838
6839	if (netif_is_bridge_port(vlan_dev))
6840		return 0;
6841
6842	if (mlxsw_sp_port_dev_check(real_dev))
6843		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6844							 event, vid, extack);
6845	else if (netif_is_lag_master(real_dev))
6846		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6847						     vid, extack);
6848	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6849		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
6850						      extack);
6851
6852	return 0;
6853}
6854
6855static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6856{
6857	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6858	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6859
6860	return ether_addr_equal_masked(mac, vrrp4, mask);
6861}
6862
6863static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6864{
6865	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6866	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6867
6868	return ether_addr_equal_masked(mac, vrrp6, mask);
6869}
6870
6871static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6872				const u8 *mac, bool adding)
6873{
6874	char ritr_pl[MLXSW_REG_RITR_LEN];
6875	u8 vrrp_id = adding ? mac[5] : 0;
6876	int err;
6877
6878	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6879	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6880		return 0;
6881
6882	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6883	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6884	if (err)
6885		return err;
6886
6887	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6888		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6889	else
6890		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6891
6892	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6893}
6894
6895static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6896				    const struct net_device *macvlan_dev,
6897				    struct netlink_ext_ack *extack)
6898{
6899	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6900	struct mlxsw_sp_rif *rif;
6901	int err;
6902
6903	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6904	if (!rif) {
6905		NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6906		return -EOPNOTSUPP;
6907	}
6908
6909	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6910				  mlxsw_sp_fid_index(rif->fid), true);
6911	if (err)
6912		return err;
6913
6914	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6915				   macvlan_dev->dev_addr, true);
6916	if (err)
6917		goto err_rif_vrrp_add;
6918
6919	/* Make sure the bridge driver does not have this MAC pointing at
6920	 * some other port.
6921	 */
6922	if (rif->ops->fdb_del)
6923		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6924
6925	return 0;
6926
6927err_rif_vrrp_add:
6928	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6929			    mlxsw_sp_fid_index(rif->fid), false);
6930	return err;
6931}
6932
6933void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6934			      const struct net_device *macvlan_dev)
6935{
6936	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6937	struct mlxsw_sp_rif *rif;
6938
6939	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6940	/* If we do not have a RIF, then we already took care of
6941	 * removing the macvlan's MAC during RIF deletion.
6942	 */
6943	if (!rif)
6944		return;
6945	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6946			     false);
6947	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6948			    mlxsw_sp_fid_index(rif->fid), false);
6949}
6950
6951static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
6952					   struct net_device *macvlan_dev,
6953					   unsigned long event,
6954					   struct netlink_ext_ack *extack)
6955{
6956	switch (event) {
6957	case NETDEV_UP:
6958		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6959	case NETDEV_DOWN:
6960		mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6961		break;
6962	}
6963
6964	return 0;
6965}
6966
6967static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
6968					       struct net_device *dev,
6969					       const unsigned char *dev_addr,
6970					       struct netlink_ext_ack *extack)
6971{
6972	struct mlxsw_sp_rif *rif;
6973	int i;
6974
6975	/* A RIF is not created for macvlan netdevs. Their MAC is used to
6976	 * populate the FDB
6977	 */
6978	if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
6979		return 0;
6980
6981	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6982		rif = mlxsw_sp->router->rifs[i];
6983		if (rif && rif->dev && rif->dev != dev &&
6984		    !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
6985					     mlxsw_sp->mac_mask)) {
6986			NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
6987			return -EINVAL;
6988		}
6989	}
6990
6991	return 0;
6992}
6993
6994static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
6995				     struct net_device *dev,
6996				     unsigned long event,
6997				     struct netlink_ext_ack *extack)
6998{
6999	if (mlxsw_sp_port_dev_check(dev))
7000		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
7001	else if (netif_is_lag_master(dev))
7002		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
7003	else if (netif_is_bridge_master(dev))
7004		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
7005						      extack);
7006	else if (is_vlan_dev(dev))
7007		return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
7008						    extack);
7009	else if (netif_is_macvlan(dev))
7010		return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
7011						       extack);
7012	else
7013		return 0;
7014}
7015
7016static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
7017				   unsigned long event, void *ptr)
7018{
7019	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
7020	struct net_device *dev = ifa->ifa_dev->dev;
7021	struct mlxsw_sp_router *router;
7022	struct mlxsw_sp_rif *rif;
7023	int err = 0;
7024
7025	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
7026	if (event == NETDEV_UP)
7027		goto out;
7028
7029	router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
7030	rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
7031	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7032		goto out;
7033
7034	err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
7035out:
7036	return notifier_from_errno(err);
7037}
7038
7039int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
7040				  unsigned long event, void *ptr)
7041{
7042	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
7043	struct net_device *dev = ivi->ivi_dev->dev;
7044	struct mlxsw_sp *mlxsw_sp;
7045	struct mlxsw_sp_rif *rif;
7046	int err = 0;
7047
7048	mlxsw_sp = mlxsw_sp_lower_get(dev);
7049	if (!mlxsw_sp)
7050		goto out;
7051
7052	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7053	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7054		goto out;
7055
7056	err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
7057						  ivi->extack);
7058	if (err)
7059		goto out;
7060
7061	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
7062out:
7063	return notifier_from_errno(err);
7064}
7065
7066struct mlxsw_sp_inet6addr_event_work {
7067	struct work_struct work;
7068	struct mlxsw_sp *mlxsw_sp;
7069	struct net_device *dev;
7070	unsigned long event;
7071};
7072
7073static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
7074{
7075	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
7076		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
7077	struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
7078	struct net_device *dev = inet6addr_work->dev;
7079	unsigned long event = inet6addr_work->event;
7080	struct mlxsw_sp_rif *rif;
7081
7082	rtnl_lock();
7083
7084	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7085	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7086		goto out;
7087
7088	__mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
7089out:
7090	rtnl_unlock();
7091	dev_put(dev);
7092	kfree(inet6addr_work);
7093}
7094
7095/* Called with rcu_read_lock() */
7096static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
7097				    unsigned long event, void *ptr)
7098{
7099	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
7100	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
7101	struct net_device *dev = if6->idev->dev;
7102	struct mlxsw_sp_router *router;
7103
7104	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
7105	if (event == NETDEV_UP)
7106		return NOTIFY_DONE;
7107
7108	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
7109	if (!inet6addr_work)
7110		return NOTIFY_BAD;
7111
7112	router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
7113	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
7114	inet6addr_work->mlxsw_sp = router->mlxsw_sp;
7115	inet6addr_work->dev = dev;
7116	inet6addr_work->event = event;
7117	dev_hold(dev);
7118	mlxsw_core_schedule_work(&inet6addr_work->work);
7119
7120	return NOTIFY_DONE;
7121}
7122
7123int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
7124				   unsigned long event, void *ptr)
7125{
7126	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
7127	struct net_device *dev = i6vi->i6vi_dev->dev;
7128	struct mlxsw_sp *mlxsw_sp;
7129	struct mlxsw_sp_rif *rif;
7130	int err = 0;
7131
7132	mlxsw_sp = mlxsw_sp_lower_get(dev);
7133	if (!mlxsw_sp)
7134		goto out;
7135
7136	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7137	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7138		goto out;
7139
7140	err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
7141						  i6vi->extack);
7142	if (err)
7143		goto out;
7144
7145	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
7146out:
7147	return notifier_from_errno(err);
7148}
7149
7150static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
7151			     const char *mac, int mtu)
7152{
7153	char ritr_pl[MLXSW_REG_RITR_LEN];
7154	int err;
7155
7156	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
7157	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7158	if (err)
7159		return err;
7160
7161	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
7162	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
7163	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
7164	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7165}
7166
7167static int
7168mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
7169				  struct mlxsw_sp_rif *rif)
7170{
7171	struct net_device *dev = rif->dev;
7172	u16 fid_index;
7173	int err;
7174
7175	fid_index = mlxsw_sp_fid_index(rif->fid);
7176
7177	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
7178	if (err)
7179		return err;
7180
7181	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
7182				dev->mtu);
7183	if (err)
7184		goto err_rif_edit;
7185
7186	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
7187	if (err)
7188		goto err_rif_fdb_op;
7189
7190	if (rif->mtu != dev->mtu) {
7191		struct mlxsw_sp_vr *vr;
7192		int i;
7193
7194		/* The RIF is relevant only to its mr_table instance, as unlike
7195		 * unicast routing, in multicast routing a RIF cannot be shared
7196		 * between several multicast routing tables.
7197		 */
7198		vr = &mlxsw_sp->router->vrs[rif->vr_id];
7199		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
7200			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
7201						   rif, dev->mtu);
7202	}
7203
7204	ether_addr_copy(rif->addr, dev->dev_addr);
7205	rif->mtu = dev->mtu;
7206
7207	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
7208
7209	return 0;
7210
7211err_rif_fdb_op:
7212	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
7213err_rif_edit:
7214	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
7215	return err;
7216}
7217
7218static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
7219			    struct netdev_notifier_pre_changeaddr_info *info)
7220{
7221	struct netlink_ext_ack *extack;
7222
7223	extack = netdev_notifier_info_to_extack(&info->info);
7224	return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
7225						   info->dev_addr, extack);
7226}
7227
7228int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
7229					 unsigned long event, void *ptr)
7230{
7231	struct mlxsw_sp *mlxsw_sp;
7232	struct mlxsw_sp_rif *rif;
7233
7234	mlxsw_sp = mlxsw_sp_lower_get(dev);
7235	if (!mlxsw_sp)
7236		return 0;
7237
7238	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7239	if (!rif)
7240		return 0;
7241
7242	switch (event) {
7243	case NETDEV_CHANGEMTU: /* fall through */
7244	case NETDEV_CHANGEADDR:
7245		return mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
7246	case NETDEV_PRE_CHANGEADDR:
7247		return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
7248	}
7249
7250	return 0;
7251}
7252
7253static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
7254				  struct net_device *l3_dev,
7255				  struct netlink_ext_ack *extack)
7256{
7257	struct mlxsw_sp_rif *rif;
7258
7259	/* If netdev is already associated with a RIF, then we need to
7260	 * destroy it and create a new one with the new virtual router ID.
7261	 */
7262	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7263	if (rif)
7264		__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
7265					  extack);
7266
7267	return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
7268}
7269
7270static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
7271				    struct net_device *l3_dev)
7272{
7273	struct mlxsw_sp_rif *rif;
7274
7275	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7276	if (!rif)
7277		return;
7278	__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
7279}
7280
7281int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
7282				 struct netdev_notifier_changeupper_info *info)
7283{
7284	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
7285	int err = 0;
7286
7287	/* We do not create a RIF for a macvlan, but only use it to
7288	 * direct more MAC addresses to the router.
7289	 */
7290	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
7291		return 0;
7292
7293	switch (event) {
7294	case NETDEV_PRECHANGEUPPER:
7295		return 0;
7296	case NETDEV_CHANGEUPPER:
7297		if (info->linking) {
7298			struct netlink_ext_ack *extack;
7299
7300			extack = netdev_notifier_info_to_extack(&info->info);
7301			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
7302		} else {
7303			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
7304		}
7305		break;
7306	}
7307
7308	return err;
7309}
7310
7311static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
7312{
7313	struct mlxsw_sp_rif *rif = data;
7314
7315	if (!netif_is_macvlan(dev))
7316		return 0;
7317
7318	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
7319				   mlxsw_sp_fid_index(rif->fid), false);
7320}
7321
7322static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
7323{
7324	if (!netif_is_macvlan_port(rif->dev))
7325		return 0;
7326
7327	netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
7328	return netdev_walk_all_upper_dev_rcu(rif->dev,
7329					     __mlxsw_sp_rif_macvlan_flush, rif);
7330}
7331
7332static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
7333				       const struct mlxsw_sp_rif_params *params)
7334{
7335	struct mlxsw_sp_rif_subport *rif_subport;
7336
7337	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7338	refcount_set(&rif_subport->ref_count, 1);
7339	rif_subport->vid = params->vid;
7340	rif_subport->lag = params->lag;
7341	if (params->lag)
7342		rif_subport->lag_id = params->lag_id;
7343	else
7344		rif_subport->system_port = params->system_port;
7345}
7346
7347static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
7348{
7349	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7350	struct mlxsw_sp_rif_subport *rif_subport;
7351	char ritr_pl[MLXSW_REG_RITR_LEN];
7352
7353	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7354	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
7355			    rif->rif_index, rif->vr_id, rif->dev->mtu);
7356	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7357	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
7358				  rif_subport->lag ? rif_subport->lag_id :
7359						     rif_subport->system_port,
7360				  rif_subport->vid);
7361
7362	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7363}
7364
7365static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
7366{
7367	int err;
7368
7369	err = mlxsw_sp_rif_subport_op(rif, true);
7370	if (err)
7371		return err;
7372
7373	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7374				  mlxsw_sp_fid_index(rif->fid), true);
7375	if (err)
7376		goto err_rif_fdb_op;
7377
7378	mlxsw_sp_fid_rif_set(rif->fid, rif);
7379	return 0;
7380
7381err_rif_fdb_op:
7382	mlxsw_sp_rif_subport_op(rif, false);
7383	return err;
7384}
7385
7386static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
7387{
7388	struct mlxsw_sp_fid *fid = rif->fid;
7389
7390	mlxsw_sp_fid_rif_set(fid, NULL);
7391	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7392			    mlxsw_sp_fid_index(fid), false);
7393	mlxsw_sp_rif_macvlan_flush(rif);
7394	mlxsw_sp_rif_subport_op(rif, false);
7395}
7396
7397static struct mlxsw_sp_fid *
7398mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
7399			     struct netlink_ext_ack *extack)
7400{
7401	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
7402}
7403
7404static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
7405	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
7406	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
7407	.setup			= mlxsw_sp_rif_subport_setup,
7408	.configure		= mlxsw_sp_rif_subport_configure,
7409	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
7410	.fid_get		= mlxsw_sp_rif_subport_fid_get,
7411};
7412
7413static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7414				    enum mlxsw_reg_ritr_if_type type,
7415				    u16 vid_fid, bool enable)
7416{
7417	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7418	char ritr_pl[MLXSW_REG_RITR_LEN];
7419
7420	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7421			    rif->dev->mtu);
7422	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7423	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7424
7425	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7426}
7427
7428u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7429{
7430	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7431}
7432
7433static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7434{
7435	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7436	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7437	int err;
7438
7439	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7440	if (err)
7441		return err;
7442
7443	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7444				     mlxsw_sp_router_port(mlxsw_sp), true);
7445	if (err)
7446		goto err_fid_mc_flood_set;
7447
7448	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7449				     mlxsw_sp_router_port(mlxsw_sp), true);
7450	if (err)
7451		goto err_fid_bc_flood_set;
7452
7453	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7454				  mlxsw_sp_fid_index(rif->fid), true);
7455	if (err)
7456		goto err_rif_fdb_op;
7457
7458	mlxsw_sp_fid_rif_set(rif->fid, rif);
7459	return 0;
7460
7461err_rif_fdb_op:
7462	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7463			       mlxsw_sp_router_port(mlxsw_sp), false);
7464err_fid_bc_flood_set:
7465	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7466			       mlxsw_sp_router_port(mlxsw_sp), false);
7467err_fid_mc_flood_set:
7468	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7469	return err;
7470}
7471
7472static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7473{
7474	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7475	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7476	struct mlxsw_sp_fid *fid = rif->fid;
7477
7478	mlxsw_sp_fid_rif_set(fid, NULL);
7479	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7480			    mlxsw_sp_fid_index(fid), false);
7481	mlxsw_sp_rif_macvlan_flush(rif);
7482	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7483			       mlxsw_sp_router_port(mlxsw_sp), false);
7484	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7485			       mlxsw_sp_router_port(mlxsw_sp), false);
7486	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7487}
7488
7489static struct mlxsw_sp_fid *
7490mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7491			  struct netlink_ext_ack *extack)
7492{
7493	struct net_device *br_dev = rif->dev;
7494	u16 vid;
7495	int err;
7496
7497	if (is_vlan_dev(rif->dev)) {
7498		vid = vlan_dev_vlan_id(rif->dev);
7499		br_dev = vlan_dev_real_dev(rif->dev);
7500		if (WARN_ON(!netif_is_bridge_master(br_dev)))
7501			return ERR_PTR(-EINVAL);
7502	} else {
7503		err = br_vlan_get_pvid(rif->dev, &vid);
7504		if (err < 0 || !vid) {
7505			NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7506			return ERR_PTR(-EINVAL);
7507		}
7508	}
7509
7510	return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, br_dev, vid, extack);
7511}
7512
7513static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7514{
7515	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7516	struct switchdev_notifier_fdb_info info;
7517	struct net_device *br_dev;
7518	struct net_device *dev;
7519
7520	br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7521	dev = br_fdb_find_port(br_dev, mac, vid);
7522	if (!dev)
7523		return;
7524
7525	info.addr = mac;
7526	info.vid = vid;
7527	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7528				 NULL);
7529}
7530
7531static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7532	.type			= MLXSW_SP_RIF_TYPE_VLAN,
7533	.rif_size		= sizeof(struct mlxsw_sp_rif),
7534	.configure		= mlxsw_sp_rif_vlan_configure,
7535	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
7536	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
7537	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
7538};
7539
7540static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7541{
7542	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7543	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7544	int err;
7545
7546	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7547				       true);
7548	if (err)
7549		return err;
7550
7551	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7552				     mlxsw_sp_router_port(mlxsw_sp), true);
7553	if (err)
7554		goto err_fid_mc_flood_set;
7555
7556	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7557				     mlxsw_sp_router_port(mlxsw_sp), true);
7558	if (err)
7559		goto err_fid_bc_flood_set;
7560
7561	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7562				  mlxsw_sp_fid_index(rif->fid), true);
7563	if (err)
7564		goto err_rif_fdb_op;
7565
7566	mlxsw_sp_fid_rif_set(rif->fid, rif);
7567	return 0;
7568
7569err_rif_fdb_op:
7570	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7571			       mlxsw_sp_router_port(mlxsw_sp), false);
7572err_fid_bc_flood_set:
7573	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7574			       mlxsw_sp_router_port(mlxsw_sp), false);
7575err_fid_mc_flood_set:
7576	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7577	return err;
7578}
7579
7580static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7581{
7582	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7583	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7584	struct mlxsw_sp_fid *fid = rif->fid;
7585
7586	mlxsw_sp_fid_rif_set(fid, NULL);
7587	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7588			    mlxsw_sp_fid_index(fid), false);
7589	mlxsw_sp_rif_macvlan_flush(rif);
7590	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7591			       mlxsw_sp_router_port(mlxsw_sp), false);
7592	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7593			       mlxsw_sp_router_port(mlxsw_sp), false);
7594	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7595}
7596
7597static struct mlxsw_sp_fid *
7598mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7599			 struct netlink_ext_ack *extack)
7600{
7601	return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, rif->dev, 0, extack);
7602}
7603
7604static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7605{
7606	struct switchdev_notifier_fdb_info info;
7607	struct net_device *dev;
7608
7609	dev = br_fdb_find_port(rif->dev, mac, 0);
7610	if (!dev)
7611		return;
7612
7613	info.addr = mac;
7614	info.vid = 0;
7615	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7616				 NULL);
7617}
7618
7619static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7620	.type			= MLXSW_SP_RIF_TYPE_FID,
7621	.rif_size		= sizeof(struct mlxsw_sp_rif),
7622	.configure		= mlxsw_sp_rif_fid_configure,
7623	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7624	.fid_get		= mlxsw_sp_rif_fid_fid_get,
7625	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
7626};
7627
7628static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
7629	.type			= MLXSW_SP_RIF_TYPE_VLAN,
7630	.rif_size		= sizeof(struct mlxsw_sp_rif),
7631	.configure		= mlxsw_sp_rif_fid_configure,
7632	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7633	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
7634	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
7635};
7636
7637static struct mlxsw_sp_rif_ipip_lb *
7638mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7639{
7640	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7641}
7642
7643static void
7644mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7645			   const struct mlxsw_sp_rif_params *params)
7646{
7647	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7648	struct mlxsw_sp_rif_ipip_lb *rif_lb;
7649
7650	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7651				 common);
7652	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7653	rif_lb->lb_config = params_lb->lb_config;
7654}
7655
7656static int
7657mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7658{
7659	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7660	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7661	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7662	struct mlxsw_sp_vr *ul_vr;
7663	int err;
7664
7665	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7666	if (IS_ERR(ul_vr))
7667		return PTR_ERR(ul_vr);
7668
7669	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
7670	if (err)
7671		goto err_loopback_op;
7672
7673	lb_rif->ul_vr_id = ul_vr->id;
7674	lb_rif->ul_rif_id = 0;
7675	++ul_vr->rif_count;
7676	return 0;
7677
7678err_loopback_op:
7679	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7680	return err;
7681}
7682
7683static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7684{
7685	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7686	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7687	struct mlxsw_sp_vr *ul_vr;
7688
7689	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7690	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
7691
7692	--ul_vr->rif_count;
7693	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7694}
7695
7696static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
7697	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7698	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7699	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7700	.configure		= mlxsw_sp1_rif_ipip_lb_configure,
7701	.deconfigure		= mlxsw_sp1_rif_ipip_lb_deconfigure,
7702};
7703
7704const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
7705	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7706	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
7707	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7708	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp1_rif_ipip_lb_ops,
7709};
7710
7711static int
7712mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
7713{
7714	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7715	char ritr_pl[MLXSW_REG_RITR_LEN];
7716
7717	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
7718			    ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
7719	mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
7720					     MLXSW_REG_RITR_LOOPBACK_GENERIC);
7721
7722	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7723}
7724
7725static struct mlxsw_sp_rif *
7726mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
7727		       struct netlink_ext_ack *extack)
7728{
7729	struct mlxsw_sp_rif *ul_rif;
7730	u16 rif_index;
7731	int err;
7732
7733	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
7734	if (err) {
7735		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
7736		return ERR_PTR(err);
7737	}
7738
7739	ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
7740	if (!ul_rif)
7741		return ERR_PTR(-ENOMEM);
7742
7743	mlxsw_sp->router->rifs[rif_index] = ul_rif;
7744	ul_rif->mlxsw_sp = mlxsw_sp;
7745	err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
7746	if (err)
7747		goto ul_rif_op_err;
7748
7749	return ul_rif;
7750
7751ul_rif_op_err:
7752	mlxsw_sp->router->rifs[rif_index] = NULL;
7753	kfree(ul_rif);
7754	return ERR_PTR(err);
7755}
7756
7757static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
7758{
7759	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7760
7761	mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
7762	mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
7763	kfree(ul_rif);
7764}
7765
7766static struct mlxsw_sp_rif *
7767mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
7768		    struct netlink_ext_ack *extack)
7769{
7770	struct mlxsw_sp_vr *vr;
7771	int err;
7772
7773	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
7774	if (IS_ERR(vr))
7775		return ERR_CAST(vr);
7776
7777	if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
7778		return vr->ul_rif;
7779
7780	vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
7781	if (IS_ERR(vr->ul_rif)) {
7782		err = PTR_ERR(vr->ul_rif);
7783		goto err_ul_rif_create;
7784	}
7785
7786	vr->rif_count++;
7787	refcount_set(&vr->ul_rif_refcnt, 1);
7788
7789	return vr->ul_rif;
7790
7791err_ul_rif_create:
7792	mlxsw_sp_vr_put(mlxsw_sp, vr);
7793	return ERR_PTR(err);
7794}
7795
7796static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
7797{
7798	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7799	struct mlxsw_sp_vr *vr;
7800
7801	vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
7802
7803	if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
7804		return;
7805
7806	vr->rif_count--;
7807	mlxsw_sp_ul_rif_destroy(ul_rif);
7808	mlxsw_sp_vr_put(mlxsw_sp, vr);
7809}
7810
7811int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
7812			       u16 *ul_rif_index)
7813{
7814	struct mlxsw_sp_rif *ul_rif;
7815
7816	ASSERT_RTNL();
7817
7818	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7819	if (IS_ERR(ul_rif))
7820		return PTR_ERR(ul_rif);
7821	*ul_rif_index = ul_rif->rif_index;
7822
7823	return 0;
7824}
7825
7826void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
7827{
7828	struct mlxsw_sp_rif *ul_rif;
7829
7830	ASSERT_RTNL();
7831
7832	ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
7833	if (WARN_ON(!ul_rif))
7834		return;
7835
7836	mlxsw_sp_ul_rif_put(ul_rif);
7837}
7838
7839static int
7840mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7841{
7842	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7843	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7844	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7845	struct mlxsw_sp_rif *ul_rif;
7846	int err;
7847
7848	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7849	if (IS_ERR(ul_rif))
7850		return PTR_ERR(ul_rif);
7851
7852	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
7853	if (err)
7854		goto err_loopback_op;
7855
7856	lb_rif->ul_vr_id = 0;
7857	lb_rif->ul_rif_id = ul_rif->rif_index;
7858
7859	return 0;
7860
7861err_loopback_op:
7862	mlxsw_sp_ul_rif_put(ul_rif);
7863	return err;
7864}
7865
7866static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7867{
7868	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7869	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7870	struct mlxsw_sp_rif *ul_rif;
7871
7872	ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
7873	mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
7874	mlxsw_sp_ul_rif_put(ul_rif);
7875}
7876
7877static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
7878	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7879	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7880	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7881	.configure		= mlxsw_sp2_rif_ipip_lb_configure,
7882	.deconfigure		= mlxsw_sp2_rif_ipip_lb_deconfigure,
7883};
7884
7885const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
7886	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7887	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
7888	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7889	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp2_rif_ipip_lb_ops,
7890};
7891
7892static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7893{
7894	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7895
7896	mlxsw_sp->router->rifs = kcalloc(max_rifs,
7897					 sizeof(struct mlxsw_sp_rif *),
7898					 GFP_KERNEL);
7899	if (!mlxsw_sp->router->rifs)
7900		return -ENOMEM;
7901
7902	return 0;
7903}
7904
7905static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7906{
7907	int i;
7908
7909	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7910		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7911
7912	kfree(mlxsw_sp->router->rifs);
7913}
7914
7915static int
7916mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7917{
7918	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7919
7920	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7921	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7922}
7923
7924static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7925{
7926	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7927	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7928	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7929}
7930
7931static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7932{
7933	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7934}
7935
7936static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7937{
7938	struct mlxsw_sp_router *router;
7939
7940	/* Flush pending FIB notifications and then flush the device's
7941	 * table before requesting another dump. The FIB notification
7942	 * block is unregistered, so no need to take RTNL.
7943	 */
7944	mlxsw_core_flush_owq();
7945	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7946	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7947}
7948
7949#ifdef CONFIG_IP_ROUTE_MULTIPATH
7950static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7951{
7952	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7953}
7954
7955static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7956{
7957	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7958}
7959
7960static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7961{
7962	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7963
7964	mlxsw_sp_mp_hash_header_set(recr2_pl,
7965				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7966	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7967	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7968	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7969	if (only_l3)
7970		return;
7971	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7972	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7973	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7974	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7975}
7976
7977static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7978{
7979	bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7980
7981	mlxsw_sp_mp_hash_header_set(recr2_pl,
7982				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7983	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7984	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7985	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7986	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7987	if (only_l3) {
7988		mlxsw_sp_mp_hash_field_set(recr2_pl,
7989					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7990	} else {
7991		mlxsw_sp_mp_hash_header_set(recr2_pl,
7992					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7993		mlxsw_sp_mp_hash_field_set(recr2_pl,
7994					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
7995		mlxsw_sp_mp_hash_field_set(recr2_pl,
7996					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
7997	}
7998}
7999
8000static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
8001{
8002	char recr2_pl[MLXSW_REG_RECR2_LEN];
8003	u32 seed;
8004
8005	seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
8006	mlxsw_reg_recr2_pack(recr2_pl, seed);
8007	mlxsw_sp_mp4_hash_init(recr2_pl);
8008	mlxsw_sp_mp6_hash_init(recr2_pl);
8009
8010	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
8011}
8012#else
8013static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
8014{
8015	return 0;
8016}
8017#endif
8018
8019static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
8020{
8021	char rdpm_pl[MLXSW_REG_RDPM_LEN];
8022	unsigned int i;
8023
8024	MLXSW_REG_ZERO(rdpm, rdpm_pl);
8025
8026	/* HW is determining switch priority based on DSCP-bits, but the
8027	 * kernel is still doing that based on the ToS. Since there's a
8028	 * mismatch in bits we need to make sure to translate the right
8029	 * value ToS would observe, skipping the 2 least-significant ECN bits.
8030	 */
8031	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
8032		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
8033
8034	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
8035}
8036
8037static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
8038{
8039	bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
8040	char rgcr_pl[MLXSW_REG_RGCR_LEN];
8041	u64 max_rifs;
8042	int err;
8043
8044	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
8045		return -EIO;
8046	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
8047
8048	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
8049	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
8050	mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
8051	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
8052	if (err)
8053		return err;
8054	return 0;
8055}
8056
8057static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
8058{
8059	char rgcr_pl[MLXSW_REG_RGCR_LEN];
8060
8061	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
8062	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
8063}
8064
8065int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
8066{
8067	struct mlxsw_sp_router *router;
8068	int err;
8069
8070	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
8071	if (!router)
8072		return -ENOMEM;
8073	mlxsw_sp->router = router;
8074	router->mlxsw_sp = mlxsw_sp;
8075
8076	router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
8077	err = register_inetaddr_notifier(&router->inetaddr_nb);
8078	if (err)
8079		goto err_register_inetaddr_notifier;
8080
8081	router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
8082	err = register_inet6addr_notifier(&router->inet6addr_nb);
8083	if (err)
8084		goto err_register_inet6addr_notifier;
8085
8086	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
8087	err = __mlxsw_sp_router_init(mlxsw_sp);
8088	if (err)
8089		goto err_router_init;
8090
8091	err = mlxsw_sp_rifs_init(mlxsw_sp);
8092	if (err)
8093		goto err_rifs_init;
8094
8095	err = mlxsw_sp_ipips_init(mlxsw_sp);
8096	if (err)
8097		goto err_ipips_init;
8098
8099	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
8100			      &mlxsw_sp_nexthop_ht_params);
8101	if (err)
8102		goto err_nexthop_ht_init;
8103
8104	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
8105			      &mlxsw_sp_nexthop_group_ht_params);
8106	if (err)
8107		goto err_nexthop_group_ht_init;
8108
8109	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
8110	err = mlxsw_sp_lpm_init(mlxsw_sp);
8111	if (err)
8112		goto err_lpm_init;
8113
8114	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
8115	if (err)
8116		goto err_mr_init;
8117
8118	err = mlxsw_sp_vrs_init(mlxsw_sp);
8119	if (err)
8120		goto err_vrs_init;
8121
8122	err = mlxsw_sp_neigh_init(mlxsw_sp);
8123	if (err)
8124		goto err_neigh_init;
8125
8126	mlxsw_sp->router->netevent_nb.notifier_call =
8127		mlxsw_sp_router_netevent_event;
8128	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8129	if (err)
8130		goto err_register_netevent_notifier;
8131
8132	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
8133	if (err)
8134		goto err_mp_hash_init;
8135
8136	err = mlxsw_sp_dscp_init(mlxsw_sp);
8137	if (err)
8138		goto err_dscp_init;
8139
8140	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
8141	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
8142				    mlxsw_sp_router_fib_dump_flush);
8143	if (err)
8144		goto err_register_fib_notifier;
8145
8146	return 0;
8147
8148err_register_fib_notifier:
8149err_dscp_init:
8150err_mp_hash_init:
8151	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8152err_register_netevent_notifier:
8153	mlxsw_sp_neigh_fini(mlxsw_sp);
8154err_neigh_init:
8155	mlxsw_sp_vrs_fini(mlxsw_sp);
8156err_vrs_init:
8157	mlxsw_sp_mr_fini(mlxsw_sp);
8158err_mr_init:
8159	mlxsw_sp_lpm_fini(mlxsw_sp);
8160err_lpm_init:
8161	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8162err_nexthop_group_ht_init:
8163	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8164err_nexthop_ht_init:
8165	mlxsw_sp_ipips_fini(mlxsw_sp);
8166err_ipips_init:
8167	mlxsw_sp_rifs_fini(mlxsw_sp);
8168err_rifs_init:
8169	__mlxsw_sp_router_fini(mlxsw_sp);
8170err_router_init:
8171	unregister_inet6addr_notifier(&router->inet6addr_nb);
8172err_register_inet6addr_notifier:
8173	unregister_inetaddr_notifier(&router->inetaddr_nb);
8174err_register_inetaddr_notifier:
8175	kfree(mlxsw_sp->router);
8176	return err;
8177}
8178
8179void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
8180{
8181	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
8182	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8183	mlxsw_sp_neigh_fini(mlxsw_sp);
8184	mlxsw_sp_vrs_fini(mlxsw_sp);
8185	mlxsw_sp_mr_fini(mlxsw_sp);
8186	mlxsw_sp_lpm_fini(mlxsw_sp);
8187	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8188	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8189	mlxsw_sp_ipips_fini(mlxsw_sp);
8190	mlxsw_sp_rifs_fini(mlxsw_sp);
8191	__mlxsw_sp_router_fini(mlxsw_sp);
8192	unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
8193	unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
8194	kfree(mlxsw_sp->router);
8195}