Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *  SR-IPv6 implementation
   4 *
   5 *  Authors:
   6 *  David Lebrun <david.lebrun@uclouvain.be>
   7 *  eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
   8 */
   9
  10#include <linux/filter.h>
  11#include <linux/types.h>
  12#include <linux/skbuff.h>
  13#include <linux/net.h>
  14#include <linux/module.h>
  15#include <net/ip.h>
  16#include <net/lwtunnel.h>
  17#include <net/netevent.h>
  18#include <net/netns/generic.h>
  19#include <net/ip6_fib.h>
  20#include <net/route.h>
  21#include <net/seg6.h>
  22#include <linux/seg6.h>
  23#include <linux/seg6_local.h>
  24#include <net/addrconf.h>
  25#include <net/ip6_route.h>
  26#include <net/dst_cache.h>
  27#include <net/ip_tunnels.h>
  28#ifdef CONFIG_IPV6_SEG6_HMAC
  29#include <net/seg6_hmac.h>
  30#endif
  31#include <net/seg6_local.h>
  32#include <linux/etherdevice.h>
  33#include <linux/bpf.h>
  34#include <linux/netfilter.h>
  35
  36#define SEG6_F_ATTR(i)		BIT(i)
  37
  38struct seg6_local_lwt;
  39
  40/* callbacks used for customizing the creation and destruction of a behavior */
  41struct seg6_local_lwtunnel_ops {
  42	int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg,
  43			   struct netlink_ext_ack *extack);
  44	void (*destroy_state)(struct seg6_local_lwt *slwt);
  45};
  46
  47struct seg6_action_desc {
  48	int action;
  49	unsigned long attrs;
  50
  51	/* The optattrs field is used for specifying all the optional
  52	 * attributes supported by a specific behavior.
  53	 * It means that if one of these attributes is not provided in the
  54	 * netlink message during the behavior creation, no errors will be
  55	 * returned to the userspace.
  56	 *
  57	 * Each attribute can be only of two types (mutually exclusive):
  58	 * 1) required or 2) optional.
  59	 * Every user MUST obey to this rule! If you set an attribute as
  60	 * required the same attribute CANNOT be set as optional and vice
  61	 * versa.
  62	 */
  63	unsigned long optattrs;
  64
  65	int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
  66	int static_headroom;
  67
  68	struct seg6_local_lwtunnel_ops slwt_ops;
  69};
  70
  71struct bpf_lwt_prog {
  72	struct bpf_prog *prog;
  73	char *name;
  74};
  75
  76/* default length values (expressed in bits) for both Locator-Block and
  77 * Locator-Node Function.
  78 *
  79 * Both SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS *must* be:
  80 *    i) greater than 0;
  81 *   ii) evenly divisible by 8. In other terms, the lengths of the
  82 *	 Locator-Block and Locator-Node Function must be byte-aligned (we can
  83 *	 relax this constraint in the future if really needed).
  84 *
  85 * Moreover, a third condition must hold:
  86 *  iii) SEG6_LOCAL_LCBLOCK_DBITS + SEG6_LOCAL_LCNODE_FN_DBITS <= 128.
  87 *
  88 * The correctness of SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS
  89 * values are checked during the kernel compilation. If the compilation stops,
  90 * check the value of these parameters to see if they meet conditions (i), (ii)
  91 * and (iii).
  92 */
  93#define SEG6_LOCAL_LCBLOCK_DBITS	32
  94#define SEG6_LOCAL_LCNODE_FN_DBITS	16
  95
  96/* The following next_csid_chk_{cntr,lcblock,lcblock_fn}_bits macros can be
  97 * used directly to check whether the lengths (in bits) of Locator-Block and
  98 * Locator-Node Function are valid according to (i), (ii), (iii).
  99 */
 100#define next_csid_chk_cntr_bits(blen, flen)		\
 101	((blen) + (flen) > 128)
 102
 103#define next_csid_chk_lcblock_bits(blen)		\
 104({							\
 105	typeof(blen) __tmp = blen;			\
 106	(!__tmp || __tmp > 120 || (__tmp & 0x07));	\
 107})
 108
 109#define next_csid_chk_lcnode_fn_bits(flen)		\
 110	next_csid_chk_lcblock_bits(flen)
 111
 112/* Supported Flavor operations are reported in this bitmask */
 113#define SEG6_LOCAL_FLV_SUPP_OPS	(BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID))
 114
 115struct seg6_flavors_info {
 116	/* Flavor operations */
 117	__u32 flv_ops;
 118
 119	/* Locator-Block length, expressed in bits */
 120	__u8 lcblock_bits;
 121	/* Locator-Node Function length, expressed in bits*/
 122	__u8 lcnode_func_bits;
 123};
 124
 125enum seg6_end_dt_mode {
 126	DT_INVALID_MODE	= -EINVAL,
 127	DT_LEGACY_MODE	= 0,
 128	DT_VRF_MODE	= 1,
 129};
 130
 131struct seg6_end_dt_info {
 132	enum seg6_end_dt_mode mode;
 133
 134	struct net *net;
 135	/* VRF device associated to the routing table used by the SRv6
 136	 * End.DT4/DT6 behavior for routing IPv4/IPv6 packets.
 137	 */
 138	int vrf_ifindex;
 139	int vrf_table;
 140
 141	/* tunneled packet family (IPv4 or IPv6).
 142	 * Protocol and header length are inferred from family.
 143	 */
 144	u16 family;
 145};
 146
 147struct pcpu_seg6_local_counters {
 148	u64_stats_t packets;
 149	u64_stats_t bytes;
 150	u64_stats_t errors;
 151
 152	struct u64_stats_sync syncp;
 153};
 154
 155/* This struct groups all the SRv6 Behavior counters supported so far.
 156 *
 157 * put_nla_counters() makes use of this data structure to collect all counter
 158 * values after the per-CPU counter evaluation has been performed.
 159 * Finally, each counter value (in seg6_local_counters) is stored in the
 160 * corresponding netlink attribute and sent to user space.
 161 *
 162 * NB: we don't want to expose this structure to user space!
 163 */
 164struct seg6_local_counters {
 165	__u64 packets;
 166	__u64 bytes;
 167	__u64 errors;
 168};
 169
 170#define seg6_local_alloc_pcpu_counters(__gfp)				\
 171	__netdev_alloc_pcpu_stats(struct pcpu_seg6_local_counters,	\
 172				  ((__gfp) | __GFP_ZERO))
 173
 174#define SEG6_F_LOCAL_COUNTERS	SEG6_F_ATTR(SEG6_LOCAL_COUNTERS)
 175
 176struct seg6_local_lwt {
 177	int action;
 178	struct ipv6_sr_hdr *srh;
 179	int table;
 180	struct in_addr nh4;
 181	struct in6_addr nh6;
 182	int iif;
 183	int oif;
 184	struct bpf_lwt_prog bpf;
 185#ifdef CONFIG_NET_L3_MASTER_DEV
 186	struct seg6_end_dt_info dt_info;
 187#endif
 188	struct seg6_flavors_info flv_info;
 189
 190	struct pcpu_seg6_local_counters __percpu *pcpu_counters;
 191
 192	int headroom;
 193	struct seg6_action_desc *desc;
 194	/* unlike the required attrs, we have to track the optional attributes
 195	 * that have been effectively parsed.
 196	 */
 197	unsigned long parsed_optattrs;
 198};
 199
 200static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
 201{
 202	return (struct seg6_local_lwt *)lwt->data;
 203}
 204
 205static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
 206{
 207	struct ipv6_sr_hdr *srh;
 208
 209	srh = seg6_get_srh(skb, IP6_FH_F_SKIP_RH);
 210	if (!srh)
 211		return NULL;
 212
 213#ifdef CONFIG_IPV6_SEG6_HMAC
 214	if (!seg6_hmac_validate_skb(skb))
 215		return NULL;
 216#endif
 217
 218	return srh;
 219}
 220
 221static bool decap_and_validate(struct sk_buff *skb, int proto)
 222{
 223	struct ipv6_sr_hdr *srh;
 224	unsigned int off = 0;
 225
 226	srh = seg6_get_srh(skb, 0);
 227	if (srh && srh->segments_left > 0)
 228		return false;
 229
 230#ifdef CONFIG_IPV6_SEG6_HMAC
 231	if (srh && !seg6_hmac_validate_skb(skb))
 232		return false;
 233#endif
 234
 235	if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
 236		return false;
 237
 238	if (!pskb_pull(skb, off))
 239		return false;
 240
 241	skb_postpull_rcsum(skb, skb_network_header(skb), off);
 242
 243	skb_reset_network_header(skb);
 244	skb_reset_transport_header(skb);
 245	if (iptunnel_pull_offloads(skb))
 246		return false;
 247
 248	return true;
 249}
 250
 251static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
 252{
 253	struct in6_addr *addr;
 254
 255	srh->segments_left--;
 256	addr = srh->segments + srh->segments_left;
 257	*daddr = *addr;
 258}
 259
 260static int
 261seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 262			u32 tbl_id, bool local_delivery)
 263{
 264	struct net *net = dev_net(skb->dev);
 265	struct ipv6hdr *hdr = ipv6_hdr(skb);
 266	int flags = RT6_LOOKUP_F_HAS_SADDR;
 267	struct dst_entry *dst = NULL;
 268	struct rt6_info *rt;
 269	struct flowi6 fl6;
 270	int dev_flags = 0;
 271
 272	memset(&fl6, 0, sizeof(fl6));
 273	fl6.flowi6_iif = skb->dev->ifindex;
 274	fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
 275	fl6.saddr = hdr->saddr;
 276	fl6.flowlabel = ip6_flowinfo(hdr);
 277	fl6.flowi6_mark = skb->mark;
 278	fl6.flowi6_proto = hdr->nexthdr;
 279
 280	if (nhaddr)
 281		fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
 282
 283	if (!tbl_id) {
 284		dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
 285	} else {
 286		struct fib6_table *table;
 287
 288		table = fib6_get_table(net, tbl_id);
 289		if (!table)
 290			goto out;
 291
 292		rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
 293		dst = &rt->dst;
 294	}
 295
 296	/* we want to discard traffic destined for local packet processing,
 297	 * if @local_delivery is set to false.
 298	 */
 299	if (!local_delivery)
 300		dev_flags |= IFF_LOOPBACK;
 301
 302	if (dst && (dst->dev->flags & dev_flags) && !dst->error) {
 303		dst_release(dst);
 304		dst = NULL;
 305	}
 306
 307out:
 308	if (!dst) {
 309		rt = net->ipv6.ip6_blk_hole_entry;
 310		dst = &rt->dst;
 311		dst_hold(dst);
 312	}
 313
 314	skb_dst_drop(skb);
 315	skb_dst_set(skb, dst);
 316	return dst->error;
 317}
 318
 319int seg6_lookup_nexthop(struct sk_buff *skb,
 320			struct in6_addr *nhaddr, u32 tbl_id)
 321{
 322	return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false);
 323}
 324
 325static __u8 seg6_flv_lcblock_octects(const struct seg6_flavors_info *finfo)
 326{
 327	return finfo->lcblock_bits >> 3;
 328}
 329
 330static __u8 seg6_flv_lcnode_func_octects(const struct seg6_flavors_info *finfo)
 331{
 332	return finfo->lcnode_func_bits >> 3;
 333}
 334
 335static bool seg6_next_csid_is_arg_zero(const struct in6_addr *addr,
 336				       const struct seg6_flavors_info *finfo)
 337{
 338	__u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
 339	__u8 blk_octects = seg6_flv_lcblock_octects(finfo);
 340	__u8 arg_octects;
 341	int i;
 342
 343	arg_octects = 16 - blk_octects - fnc_octects;
 344	for (i = 0; i < arg_octects; ++i) {
 345		if (addr->s6_addr[blk_octects + fnc_octects + i] != 0x00)
 346			return false;
 347	}
 348
 349	return true;
 350}
 351
 352/* assume that DA.Argument length > 0 */
 353static void seg6_next_csid_advance_arg(struct in6_addr *addr,
 354				       const struct seg6_flavors_info *finfo)
 355{
 356	__u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
 357	__u8 blk_octects = seg6_flv_lcblock_octects(finfo);
 358
 359	/* advance DA.Argument */
 360	memmove(&addr->s6_addr[blk_octects],
 361		&addr->s6_addr[blk_octects + fnc_octects],
 362		16 - blk_octects - fnc_octects);
 363
 364	memset(&addr->s6_addr[16 - fnc_octects], 0x00, fnc_octects);
 365}
 366
 367static int input_action_end_core(struct sk_buff *skb,
 368				 struct seg6_local_lwt *slwt)
 369{
 370	struct ipv6_sr_hdr *srh;
 371
 372	srh = get_and_validate_srh(skb);
 373	if (!srh)
 374		goto drop;
 375
 376	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
 377
 378	seg6_lookup_nexthop(skb, NULL, 0);
 379
 380	return dst_input(skb);
 381
 382drop:
 383	kfree_skb(skb);
 384	return -EINVAL;
 385}
 386
 387static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 388{
 389	const struct seg6_flavors_info *finfo = &slwt->flv_info;
 390	struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
 391
 392	if (seg6_next_csid_is_arg_zero(daddr, finfo))
 393		return input_action_end_core(skb, slwt);
 394
 395	/* update DA */
 396	seg6_next_csid_advance_arg(daddr, finfo);
 397
 398	seg6_lookup_nexthop(skb, NULL, 0);
 399
 400	return dst_input(skb);
 401}
 402
 403static bool seg6_next_csid_enabled(__u32 fops)
 404{
 405	return fops & BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID);
 406}
 407
 408/* regular endpoint function */
 409static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 410{
 411	const struct seg6_flavors_info *finfo = &slwt->flv_info;
 412
 413	if (seg6_next_csid_enabled(finfo->flv_ops))
 414		return end_next_csid_core(skb, slwt);
 415
 416	return input_action_end_core(skb, slwt);
 417}
 418
 419/* regular endpoint, and forward to specified nexthop */
 420static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 421{
 422	struct ipv6_sr_hdr *srh;
 423
 424	srh = get_and_validate_srh(skb);
 425	if (!srh)
 426		goto drop;
 427
 428	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
 429
 430	seg6_lookup_nexthop(skb, &slwt->nh6, 0);
 431
 432	return dst_input(skb);
 433
 434drop:
 435	kfree_skb(skb);
 436	return -EINVAL;
 437}
 438
 439static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 440{
 441	struct ipv6_sr_hdr *srh;
 442
 443	srh = get_and_validate_srh(skb);
 444	if (!srh)
 445		goto drop;
 446
 447	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
 448
 449	seg6_lookup_nexthop(skb, NULL, slwt->table);
 450
 451	return dst_input(skb);
 452
 453drop:
 454	kfree_skb(skb);
 455	return -EINVAL;
 456}
 457
 458/* decapsulate and forward inner L2 frame on specified interface */
 459static int input_action_end_dx2(struct sk_buff *skb,
 460				struct seg6_local_lwt *slwt)
 461{
 462	struct net *net = dev_net(skb->dev);
 463	struct net_device *odev;
 464	struct ethhdr *eth;
 465
 466	if (!decap_and_validate(skb, IPPROTO_ETHERNET))
 467		goto drop;
 468
 469	if (!pskb_may_pull(skb, ETH_HLEN))
 470		goto drop;
 471
 472	skb_reset_mac_header(skb);
 473	eth = (struct ethhdr *)skb->data;
 474
 475	/* To determine the frame's protocol, we assume it is 802.3. This avoids
 476	 * a call to eth_type_trans(), which is not really relevant for our
 477	 * use case.
 478	 */
 479	if (!eth_proto_is_802_3(eth->h_proto))
 480		goto drop;
 481
 482	odev = dev_get_by_index_rcu(net, slwt->oif);
 483	if (!odev)
 484		goto drop;
 485
 486	/* As we accept Ethernet frames, make sure the egress device is of
 487	 * the correct type.
 488	 */
 489	if (odev->type != ARPHRD_ETHER)
 490		goto drop;
 491
 492	if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
 493		goto drop;
 494
 495	skb_orphan(skb);
 496
 497	if (skb_warn_if_lro(skb))
 498		goto drop;
 499
 500	skb_forward_csum(skb);
 501
 502	if (skb->len - ETH_HLEN > odev->mtu)
 503		goto drop;
 504
 505	skb->dev = odev;
 506	skb->protocol = eth->h_proto;
 507
 508	return dev_queue_xmit(skb);
 509
 510drop:
 511	kfree_skb(skb);
 512	return -EINVAL;
 513}
 514
 515static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
 516				       struct sk_buff *skb)
 517{
 518	struct dst_entry *orig_dst = skb_dst(skb);
 519	struct in6_addr *nhaddr = NULL;
 520	struct seg6_local_lwt *slwt;
 521
 522	slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
 523
 524	/* The inner packet is not associated to any local interface,
 525	 * so we do not call netif_rx().
 526	 *
 527	 * If slwt->nh6 is set to ::, then lookup the nexthop for the
 528	 * inner packet's DA. Otherwise, use the specified nexthop.
 529	 */
 530	if (!ipv6_addr_any(&slwt->nh6))
 531		nhaddr = &slwt->nh6;
 532
 533	seg6_lookup_nexthop(skb, nhaddr, 0);
 534
 535	return dst_input(skb);
 536}
 537
 538/* decapsulate and forward to specified nexthop */
 539static int input_action_end_dx6(struct sk_buff *skb,
 540				struct seg6_local_lwt *slwt)
 541{
 542	/* this function accepts IPv6 encapsulated packets, with either
 543	 * an SRH with SL=0, or no SRH.
 544	 */
 545
 546	if (!decap_and_validate(skb, IPPROTO_IPV6))
 547		goto drop;
 548
 549	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 550		goto drop;
 551
 552	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
 553	nf_reset_ct(skb);
 554
 555	if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
 556		return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
 557			       dev_net(skb->dev), NULL, skb, NULL,
 558			       skb_dst(skb)->dev, input_action_end_dx6_finish);
 559
 560	return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
 561drop:
 562	kfree_skb(skb);
 563	return -EINVAL;
 564}
 565
 566static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
 567				       struct sk_buff *skb)
 568{
 569	struct dst_entry *orig_dst = skb_dst(skb);
 570	struct seg6_local_lwt *slwt;
 571	struct iphdr *iph;
 572	__be32 nhaddr;
 573	int err;
 574
 575	slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
 576
 577	iph = ip_hdr(skb);
 578
 579	nhaddr = slwt->nh4.s_addr ?: iph->daddr;
 580
 581	skb_dst_drop(skb);
 582
 583	err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
 584	if (err) {
 585		kfree_skb(skb);
 586		return -EINVAL;
 587	}
 588
 589	return dst_input(skb);
 590}
 591
 592static int input_action_end_dx4(struct sk_buff *skb,
 593				struct seg6_local_lwt *slwt)
 594{
 595	if (!decap_and_validate(skb, IPPROTO_IPIP))
 596		goto drop;
 597
 598	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 599		goto drop;
 600
 601	skb->protocol = htons(ETH_P_IP);
 602	skb_set_transport_header(skb, sizeof(struct iphdr));
 603	nf_reset_ct(skb);
 604
 605	if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
 606		return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
 607			       dev_net(skb->dev), NULL, skb, NULL,
 608			       skb_dst(skb)->dev, input_action_end_dx4_finish);
 609
 610	return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
 611drop:
 612	kfree_skb(skb);
 613	return -EINVAL;
 614}
 615
 616#ifdef CONFIG_NET_L3_MASTER_DEV
 617static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg)
 618{
 619	const struct nl_info *nli = &fib6_cfg->fc_nlinfo;
 620
 621	return nli->nl_net;
 622}
 623
 624static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
 625				   u16 family, struct netlink_ext_ack *extack)
 626{
 627	struct seg6_end_dt_info *info = &slwt->dt_info;
 628	int vrf_ifindex;
 629	struct net *net;
 630
 631	net = fib6_config_get_net(cfg);
 632
 633	/* note that vrf_table was already set by parse_nla_vrftable() */
 634	vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net,
 635							info->vrf_table);
 636	if (vrf_ifindex < 0) {
 637		if (vrf_ifindex == -EPERM) {
 638			NL_SET_ERR_MSG(extack,
 639				       "Strict mode for VRF is disabled");
 640		} else if (vrf_ifindex == -ENODEV) {
 641			NL_SET_ERR_MSG(extack,
 642				       "Table has no associated VRF device");
 643		} else {
 644			pr_debug("seg6local: SRv6 End.DT* creation error=%d\n",
 645				 vrf_ifindex);
 646		}
 647
 648		return vrf_ifindex;
 649	}
 650
 651	info->net = net;
 652	info->vrf_ifindex = vrf_ifindex;
 653
 654	info->family = family;
 655	info->mode = DT_VRF_MODE;
 656
 657	return 0;
 658}
 659
 660/* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and
 661 * routes the IPv4/IPv6 packet by looking at the configured routing table.
 662 *
 663 * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment
 664 * Routing Header packets) from several interfaces and the outer IPv6
 665 * destination address (DA) is used for retrieving the specific instance of the
 666 * End.DT4/DT6 behavior that should process the packets.
 667 *
 668 * However, the inner IPv4/IPv6 packet is not really bound to any receiving
 669 * interface and thus the End.DT4/DT6 sets the VRF (associated with the
 670 * corresponding routing table) as the *receiving* interface.
 671 * In other words, the End.DT4/DT6 processes a packet as if it has been received
 672 * directly by the VRF (and not by one of its slave devices, if any).
 673 * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in
 674 * according to the routing table configured by the End.DT4/DT6 instance.
 675 *
 676 * This design allows you to get some interesting features like:
 677 *  1) the statistics on rx packets;
 678 *  2) the possibility to install a packet sniffer on the receiving interface
 679 *     (the VRF one) for looking at the incoming packets;
 680 *  3) the possibility to leverage the netfilter prerouting hook for the inner
 681 *     IPv4 packet.
 682 *
 683 * This function returns:
 684 *  - the sk_buff* when the VRF rcv handler has processed the packet correctly;
 685 *  - NULL when the skb is consumed by the VRF rcv handler;
 686 *  - a pointer which encodes a negative error number in case of error.
 687 *    Note that in this case, the function takes care of freeing the skb.
 688 */
 689static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family,
 690				      struct net_device *dev)
 691{
 692	/* based on l3mdev_ip_rcv; we are only interested in the master */
 693	if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev)))
 694		goto drop;
 695
 696	if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv))
 697		goto drop;
 698
 699	/* the decap packet IPv4/IPv6 does not come with any mac header info.
 700	 * We must unset the mac header to allow the VRF device to rebuild it,
 701	 * just in case there is a sniffer attached on the device.
 702	 */
 703	skb_unset_mac_header(skb);
 704
 705	skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family);
 706	if (!skb)
 707		/* the skb buffer was consumed by the handler */
 708		return NULL;
 709
 710	/* when a packet is received by a VRF or by one of its slaves, the
 711	 * master device reference is set into the skb.
 712	 */
 713	if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex))
 714		goto drop;
 715
 716	return skb;
 717
 718drop:
 719	kfree_skb(skb);
 720	return ERR_PTR(-EINVAL);
 721}
 722
 723static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb,
 724					     struct seg6_end_dt_info *info)
 725{
 726	int vrf_ifindex = info->vrf_ifindex;
 727	struct net *net = info->net;
 728
 729	if (unlikely(vrf_ifindex < 0))
 730		goto error;
 731
 732	if (unlikely(!net_eq(dev_net(skb->dev), net)))
 733		goto error;
 734
 735	return dev_get_by_index_rcu(net, vrf_ifindex);
 736
 737error:
 738	return NULL;
 739}
 740
 741static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
 742				       struct seg6_local_lwt *slwt, u16 family)
 743{
 744	struct seg6_end_dt_info *info = &slwt->dt_info;
 745	struct net_device *vrf;
 746	__be16 protocol;
 747	int hdrlen;
 748
 749	vrf = end_dt_get_vrf_rcu(skb, info);
 750	if (unlikely(!vrf))
 751		goto drop;
 752
 753	switch (family) {
 754	case AF_INET:
 755		protocol = htons(ETH_P_IP);
 756		hdrlen = sizeof(struct iphdr);
 757		break;
 758	case AF_INET6:
 759		protocol = htons(ETH_P_IPV6);
 760		hdrlen = sizeof(struct ipv6hdr);
 761		break;
 762	case AF_UNSPEC:
 763		fallthrough;
 764	default:
 765		goto drop;
 766	}
 767
 768	if (unlikely(info->family != AF_UNSPEC && info->family != family)) {
 769		pr_warn_once("seg6local: SRv6 End.DT* family mismatch");
 770		goto drop;
 771	}
 772
 773	skb->protocol = protocol;
 774
 775	skb_dst_drop(skb);
 776
 777	skb_set_transport_header(skb, hdrlen);
 778	nf_reset_ct(skb);
 779
 780	return end_dt_vrf_rcv(skb, family, vrf);
 781
 782drop:
 783	kfree_skb(skb);
 784	return ERR_PTR(-EINVAL);
 785}
 786
 787static int input_action_end_dt4(struct sk_buff *skb,
 788				struct seg6_local_lwt *slwt)
 789{
 790	struct iphdr *iph;
 791	int err;
 792
 793	if (!decap_and_validate(skb, IPPROTO_IPIP))
 794		goto drop;
 795
 796	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 797		goto drop;
 798
 799	skb = end_dt_vrf_core(skb, slwt, AF_INET);
 800	if (!skb)
 801		/* packet has been processed and consumed by the VRF */
 802		return 0;
 803
 804	if (IS_ERR(skb))
 805		return PTR_ERR(skb);
 806
 807	iph = ip_hdr(skb);
 808
 809	err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
 810	if (unlikely(err))
 811		goto drop;
 812
 813	return dst_input(skb);
 814
 815drop:
 816	kfree_skb(skb);
 817	return -EINVAL;
 818}
 819
 820static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg,
 821			      struct netlink_ext_ack *extack)
 822{
 823	return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack);
 824}
 825
 826static enum
 827seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt)
 828{
 829	unsigned long parsed_optattrs = slwt->parsed_optattrs;
 830	bool legacy, vrfmode;
 831
 832	legacy	= !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE));
 833	vrfmode	= !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE));
 834
 835	if (!(legacy ^ vrfmode))
 836		/* both are absent or present: invalid DT6 mode */
 837		return DT_INVALID_MODE;
 838
 839	return legacy ? DT_LEGACY_MODE : DT_VRF_MODE;
 840}
 841
 842static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt)
 843{
 844	struct seg6_end_dt_info *info = &slwt->dt_info;
 845
 846	return info->mode;
 847}
 848
 849static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg,
 850			      struct netlink_ext_ack *extack)
 851{
 852	enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt);
 853	struct seg6_end_dt_info *info = &slwt->dt_info;
 854
 855	switch (mode) {
 856	case DT_LEGACY_MODE:
 857		info->mode = DT_LEGACY_MODE;
 858		return 0;
 859	case DT_VRF_MODE:
 860		return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack);
 861	default:
 862		NL_SET_ERR_MSG(extack, "table or vrftable must be specified");
 863		return -EINVAL;
 864	}
 865}
 866#endif
 867
 868static int input_action_end_dt6(struct sk_buff *skb,
 869				struct seg6_local_lwt *slwt)
 870{
 871	if (!decap_and_validate(skb, IPPROTO_IPV6))
 872		goto drop;
 873
 874	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 875		goto drop;
 876
 877#ifdef CONFIG_NET_L3_MASTER_DEV
 878	if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE)
 879		goto legacy_mode;
 880
 881	/* DT6_VRF_MODE */
 882	skb = end_dt_vrf_core(skb, slwt, AF_INET6);
 883	if (!skb)
 884		/* packet has been processed and consumed by the VRF */
 885		return 0;
 886
 887	if (IS_ERR(skb))
 888		return PTR_ERR(skb);
 889
 890	/* note: this time we do not need to specify the table because the VRF
 891	 * takes care of selecting the correct table.
 892	 */
 893	seg6_lookup_any_nexthop(skb, NULL, 0, true);
 894
 895	return dst_input(skb);
 896
 897legacy_mode:
 898#endif
 899	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
 900
 901	seg6_lookup_any_nexthop(skb, NULL, slwt->table, true);
 902
 903	return dst_input(skb);
 904
 905drop:
 906	kfree_skb(skb);
 907	return -EINVAL;
 908}
 909
 910#ifdef CONFIG_NET_L3_MASTER_DEV
 911static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg,
 912			       struct netlink_ext_ack *extack)
 913{
 914	return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack);
 915}
 916
 917static int input_action_end_dt46(struct sk_buff *skb,
 918				 struct seg6_local_lwt *slwt)
 919{
 920	unsigned int off = 0;
 921	int nexthdr;
 922
 923	nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL);
 924	if (unlikely(nexthdr < 0))
 925		goto drop;
 926
 927	switch (nexthdr) {
 928	case IPPROTO_IPIP:
 929		return input_action_end_dt4(skb, slwt);
 930	case IPPROTO_IPV6:
 931		return input_action_end_dt6(skb, slwt);
 932	}
 933
 934drop:
 935	kfree_skb(skb);
 936	return -EINVAL;
 937}
 938#endif
 939
 940/* push an SRH on top of the current one */
 941static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 942{
 943	struct ipv6_sr_hdr *srh;
 944	int err = -EINVAL;
 945
 946	srh = get_and_validate_srh(skb);
 947	if (!srh)
 948		goto drop;
 949
 950	err = seg6_do_srh_inline(skb, slwt->srh);
 951	if (err)
 952		goto drop;
 953
 954	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
 955
 956	seg6_lookup_nexthop(skb, NULL, 0);
 957
 958	return dst_input(skb);
 959
 960drop:
 961	kfree_skb(skb);
 962	return err;
 963}
 964
 965/* encapsulate within an outer IPv6 header and a specified SRH */
 966static int input_action_end_b6_encap(struct sk_buff *skb,
 967				     struct seg6_local_lwt *slwt)
 968{
 969	struct ipv6_sr_hdr *srh;
 970	int err = -EINVAL;
 971
 972	srh = get_and_validate_srh(skb);
 973	if (!srh)
 974		goto drop;
 975
 976	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
 977
 978	skb_reset_inner_headers(skb);
 979	skb->encapsulation = 1;
 980
 981	err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
 982	if (err)
 983		goto drop;
 984
 985	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
 986
 987	seg6_lookup_nexthop(skb, NULL, 0);
 988
 989	return dst_input(skb);
 990
 991drop:
 992	kfree_skb(skb);
 993	return err;
 994}
 995
 996DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
 997
 998bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
 999{
1000	struct seg6_bpf_srh_state *srh_state =
1001		this_cpu_ptr(&seg6_bpf_srh_states);
1002	struct ipv6_sr_hdr *srh = srh_state->srh;
1003
1004	if (unlikely(srh == NULL))
1005		return false;
1006
1007	if (unlikely(!srh_state->valid)) {
1008		if ((srh_state->hdrlen & 7) != 0)
1009			return false;
1010
1011		srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
1012		if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3, true))
1013			return false;
1014
1015		srh_state->valid = true;
1016	}
1017
1018	return true;
1019}
1020
1021static int input_action_end_bpf(struct sk_buff *skb,
1022				struct seg6_local_lwt *slwt)
1023{
1024	struct seg6_bpf_srh_state *srh_state =
1025		this_cpu_ptr(&seg6_bpf_srh_states);
1026	struct ipv6_sr_hdr *srh;
1027	int ret;
1028
1029	srh = get_and_validate_srh(skb);
1030	if (!srh) {
1031		kfree_skb(skb);
1032		return -EINVAL;
1033	}
1034	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
1035
1036	/* preempt_disable is needed to protect the per-CPU buffer srh_state,
1037	 * which is also accessed by the bpf_lwt_seg6_* helpers
1038	 */
1039	preempt_disable();
1040	srh_state->srh = srh;
1041	srh_state->hdrlen = srh->hdrlen << 3;
1042	srh_state->valid = true;
1043
1044	rcu_read_lock();
1045	bpf_compute_data_pointers(skb);
1046	ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
1047	rcu_read_unlock();
1048
1049	switch (ret) {
1050	case BPF_OK:
1051	case BPF_REDIRECT:
1052		break;
1053	case BPF_DROP:
1054		goto drop;
1055	default:
1056		pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret);
1057		goto drop;
1058	}
1059
1060	if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
1061		goto drop;
1062
1063	preempt_enable();
1064	if (ret != BPF_REDIRECT)
1065		seg6_lookup_nexthop(skb, NULL, 0);
1066
1067	return dst_input(skb);
1068
1069drop:
1070	preempt_enable();
1071	kfree_skb(skb);
1072	return -EINVAL;
1073}
1074
1075static struct seg6_action_desc seg6_action_table[] = {
1076	{
1077		.action		= SEG6_LOCAL_ACTION_END,
1078		.attrs		= 0,
1079		.optattrs	= SEG6_F_LOCAL_COUNTERS |
1080				  SEG6_F_ATTR(SEG6_LOCAL_FLAVORS),
1081		.input		= input_action_end,
1082	},
1083	{
1084		.action		= SEG6_LOCAL_ACTION_END_X,
1085		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_NH6),
1086		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1087		.input		= input_action_end_x,
1088	},
1089	{
1090		.action		= SEG6_LOCAL_ACTION_END_T,
1091		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_TABLE),
1092		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1093		.input		= input_action_end_t,
1094	},
1095	{
1096		.action		= SEG6_LOCAL_ACTION_END_DX2,
1097		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_OIF),
1098		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1099		.input		= input_action_end_dx2,
1100	},
1101	{
1102		.action		= SEG6_LOCAL_ACTION_END_DX6,
1103		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_NH6),
1104		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1105		.input		= input_action_end_dx6,
1106	},
1107	{
1108		.action		= SEG6_LOCAL_ACTION_END_DX4,
1109		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_NH4),
1110		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1111		.input		= input_action_end_dx4,
1112	},
1113	{
1114		.action		= SEG6_LOCAL_ACTION_END_DT4,
1115		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
1116		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1117#ifdef CONFIG_NET_L3_MASTER_DEV
1118		.input		= input_action_end_dt4,
1119		.slwt_ops	= {
1120					.build_state = seg6_end_dt4_build,
1121				  },
1122#endif
1123	},
1124	{
1125		.action		= SEG6_LOCAL_ACTION_END_DT6,
1126#ifdef CONFIG_NET_L3_MASTER_DEV
1127		.attrs		= 0,
1128		.optattrs	= SEG6_F_LOCAL_COUNTERS		|
1129				  SEG6_F_ATTR(SEG6_LOCAL_TABLE) |
1130				  SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
1131		.slwt_ops	= {
1132					.build_state = seg6_end_dt6_build,
1133				  },
1134#else
1135		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_TABLE),
1136		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1137#endif
1138		.input		= input_action_end_dt6,
1139	},
1140	{
1141		.action		= SEG6_LOCAL_ACTION_END_DT46,
1142		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
1143		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1144#ifdef CONFIG_NET_L3_MASTER_DEV
1145		.input		= input_action_end_dt46,
1146		.slwt_ops	= {
1147					.build_state = seg6_end_dt46_build,
1148				  },
1149#endif
1150	},
1151	{
1152		.action		= SEG6_LOCAL_ACTION_END_B6,
1153		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_SRH),
1154		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1155		.input		= input_action_end_b6,
1156	},
1157	{
1158		.action		= SEG6_LOCAL_ACTION_END_B6_ENCAP,
1159		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_SRH),
1160		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1161		.input		= input_action_end_b6_encap,
1162		.static_headroom	= sizeof(struct ipv6hdr),
1163	},
1164	{
1165		.action		= SEG6_LOCAL_ACTION_END_BPF,
1166		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_BPF),
1167		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1168		.input		= input_action_end_bpf,
1169	},
1170
1171};
1172
1173static struct seg6_action_desc *__get_action_desc(int action)
1174{
1175	struct seg6_action_desc *desc;
1176	int i, count;
1177
1178	count = ARRAY_SIZE(seg6_action_table);
1179	for (i = 0; i < count; i++) {
1180		desc = &seg6_action_table[i];
1181		if (desc->action == action)
1182			return desc;
1183	}
1184
1185	return NULL;
1186}
1187
1188static bool seg6_lwtunnel_counters_enabled(struct seg6_local_lwt *slwt)
1189{
1190	return slwt->parsed_optattrs & SEG6_F_LOCAL_COUNTERS;
1191}
1192
1193static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
1194				       unsigned int len, int err)
1195{
1196	struct pcpu_seg6_local_counters *pcounters;
1197
1198	pcounters = this_cpu_ptr(slwt->pcpu_counters);
1199	u64_stats_update_begin(&pcounters->syncp);
1200
1201	if (likely(!err)) {
1202		u64_stats_inc(&pcounters->packets);
1203		u64_stats_add(&pcounters->bytes, len);
1204	} else {
1205		u64_stats_inc(&pcounters->errors);
1206	}
1207
1208	u64_stats_update_end(&pcounters->syncp);
1209}
1210
1211static int seg6_local_input_core(struct net *net, struct sock *sk,
1212				 struct sk_buff *skb)
1213{
1214	struct dst_entry *orig_dst = skb_dst(skb);
1215	struct seg6_action_desc *desc;
1216	struct seg6_local_lwt *slwt;
1217	unsigned int len = skb->len;
1218	int rc;
1219
1220	slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
1221	desc = slwt->desc;
1222
1223	rc = desc->input(skb, slwt);
1224
1225	if (!seg6_lwtunnel_counters_enabled(slwt))
1226		return rc;
1227
1228	seg6_local_update_counters(slwt, len, rc);
1229
1230	return rc;
1231}
1232
1233static int seg6_local_input(struct sk_buff *skb)
1234{
1235	if (skb->protocol != htons(ETH_P_IPV6)) {
1236		kfree_skb(skb);
1237		return -EINVAL;
1238	}
1239
1240	if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
1241		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
1242			       dev_net(skb->dev), NULL, skb, skb->dev, NULL,
1243			       seg6_local_input_core);
1244
1245	return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
1246}
1247
1248static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
1249	[SEG6_LOCAL_ACTION]	= { .type = NLA_U32 },
1250	[SEG6_LOCAL_SRH]	= { .type = NLA_BINARY },
1251	[SEG6_LOCAL_TABLE]	= { .type = NLA_U32 },
1252	[SEG6_LOCAL_VRFTABLE]	= { .type = NLA_U32 },
1253	[SEG6_LOCAL_NH4]	= { .type = NLA_BINARY,
1254				    .len = sizeof(struct in_addr) },
1255	[SEG6_LOCAL_NH6]	= { .type = NLA_BINARY,
1256				    .len = sizeof(struct in6_addr) },
1257	[SEG6_LOCAL_IIF]	= { .type = NLA_U32 },
1258	[SEG6_LOCAL_OIF]	= { .type = NLA_U32 },
1259	[SEG6_LOCAL_BPF]	= { .type = NLA_NESTED },
1260	[SEG6_LOCAL_COUNTERS]	= { .type = NLA_NESTED },
1261	[SEG6_LOCAL_FLAVORS]	= { .type = NLA_NESTED },
1262};
1263
1264static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1265			 struct netlink_ext_ack *extack)
1266{
1267	struct ipv6_sr_hdr *srh;
1268	int len;
1269
1270	srh = nla_data(attrs[SEG6_LOCAL_SRH]);
1271	len = nla_len(attrs[SEG6_LOCAL_SRH]);
1272
1273	/* SRH must contain at least one segment */
1274	if (len < sizeof(*srh) + sizeof(struct in6_addr))
1275		return -EINVAL;
1276
1277	if (!seg6_validate_srh(srh, len, false))
1278		return -EINVAL;
1279
1280	slwt->srh = kmemdup(srh, len, GFP_KERNEL);
1281	if (!slwt->srh)
1282		return -ENOMEM;
1283
1284	slwt->headroom += len;
1285
1286	return 0;
1287}
1288
1289static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1290{
1291	struct ipv6_sr_hdr *srh;
1292	struct nlattr *nla;
1293	int len;
1294
1295	srh = slwt->srh;
1296	len = (srh->hdrlen + 1) << 3;
1297
1298	nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
1299	if (!nla)
1300		return -EMSGSIZE;
1301
1302	memcpy(nla_data(nla), srh, len);
1303
1304	return 0;
1305}
1306
1307static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1308{
1309	int len = (a->srh->hdrlen + 1) << 3;
1310
1311	if (len != ((b->srh->hdrlen + 1) << 3))
1312		return 1;
1313
1314	return memcmp(a->srh, b->srh, len);
1315}
1316
1317static void destroy_attr_srh(struct seg6_local_lwt *slwt)
1318{
1319	kfree(slwt->srh);
1320}
1321
1322static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1323			   struct netlink_ext_ack *extack)
1324{
1325	slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
1326
1327	return 0;
1328}
1329
1330static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1331{
1332	if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
1333		return -EMSGSIZE;
1334
1335	return 0;
1336}
1337
1338static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1339{
1340	if (a->table != b->table)
1341		return 1;
1342
1343	return 0;
1344}
1345
1346static struct
1347seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt)
1348{
1349#ifdef CONFIG_NET_L3_MASTER_DEV
1350	return &slwt->dt_info;
1351#else
1352	return ERR_PTR(-EOPNOTSUPP);
1353#endif
1354}
1355
1356static int parse_nla_vrftable(struct nlattr **attrs,
1357			      struct seg6_local_lwt *slwt,
1358			      struct netlink_ext_ack *extack)
1359{
1360	struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
1361
1362	if (IS_ERR(info))
1363		return PTR_ERR(info);
1364
1365	info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]);
1366
1367	return 0;
1368}
1369
1370static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1371{
1372	struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
1373
1374	if (IS_ERR(info))
1375		return PTR_ERR(info);
1376
1377	if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table))
1378		return -EMSGSIZE;
1379
1380	return 0;
1381}
1382
1383static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1384{
1385	struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a);
1386	struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b);
1387
1388	if (info_a->vrf_table != info_b->vrf_table)
1389		return 1;
1390
1391	return 0;
1392}
1393
1394static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1395			 struct netlink_ext_ack *extack)
1396{
1397	memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
1398	       sizeof(struct in_addr));
1399
1400	return 0;
1401}
1402
1403static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1404{
1405	struct nlattr *nla;
1406
1407	nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
1408	if (!nla)
1409		return -EMSGSIZE;
1410
1411	memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
1412
1413	return 0;
1414}
1415
1416static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1417{
1418	return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
1419}
1420
1421static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1422			 struct netlink_ext_ack *extack)
1423{
1424	memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
1425	       sizeof(struct in6_addr));
1426
1427	return 0;
1428}
1429
1430static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1431{
1432	struct nlattr *nla;
1433
1434	nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
1435	if (!nla)
1436		return -EMSGSIZE;
1437
1438	memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
1439
1440	return 0;
1441}
1442
1443static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1444{
1445	return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
1446}
1447
1448static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1449			 struct netlink_ext_ack *extack)
1450{
1451	slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
1452
1453	return 0;
1454}
1455
1456static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1457{
1458	if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
1459		return -EMSGSIZE;
1460
1461	return 0;
1462}
1463
1464static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1465{
1466	if (a->iif != b->iif)
1467		return 1;
1468
1469	return 0;
1470}
1471
1472static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1473			 struct netlink_ext_ack *extack)
1474{
1475	slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
1476
1477	return 0;
1478}
1479
1480static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1481{
1482	if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
1483		return -EMSGSIZE;
1484
1485	return 0;
1486}
1487
1488static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1489{
1490	if (a->oif != b->oif)
1491		return 1;
1492
1493	return 0;
1494}
1495
1496#define MAX_PROG_NAME 256
1497static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
1498	[SEG6_LOCAL_BPF_PROG]	   = { .type = NLA_U32, },
1499	[SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
1500				       .len = MAX_PROG_NAME },
1501};
1502
1503static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1504			 struct netlink_ext_ack *extack)
1505{
1506	struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
1507	struct bpf_prog *p;
1508	int ret;
1509	u32 fd;
1510
1511	ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX,
1512					  attrs[SEG6_LOCAL_BPF],
1513					  bpf_prog_policy, NULL);
1514	if (ret < 0)
1515		return ret;
1516
1517	if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME])
1518		return -EINVAL;
1519
1520	slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL);
1521	if (!slwt->bpf.name)
1522		return -ENOMEM;
1523
1524	fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]);
1525	p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
1526	if (IS_ERR(p)) {
1527		kfree(slwt->bpf.name);
1528		return PTR_ERR(p);
1529	}
1530
1531	slwt->bpf.prog = p;
1532	return 0;
1533}
1534
1535static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1536{
1537	struct nlattr *nest;
1538
1539	if (!slwt->bpf.prog)
1540		return 0;
1541
1542	nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF);
1543	if (!nest)
1544		return -EMSGSIZE;
1545
1546	if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id))
1547		return -EMSGSIZE;
1548
1549	if (slwt->bpf.name &&
1550	    nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name))
1551		return -EMSGSIZE;
1552
1553	return nla_nest_end(skb, nest);
1554}
1555
1556static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1557{
1558	if (!a->bpf.name && !b->bpf.name)
1559		return 0;
1560
1561	if (!a->bpf.name || !b->bpf.name)
1562		return 1;
1563
1564	return strcmp(a->bpf.name, b->bpf.name);
1565}
1566
1567static void destroy_attr_bpf(struct seg6_local_lwt *slwt)
1568{
1569	kfree(slwt->bpf.name);
1570	if (slwt->bpf.prog)
1571		bpf_prog_put(slwt->bpf.prog);
1572}
1573
1574static const struct
1575nla_policy seg6_local_counters_policy[SEG6_LOCAL_CNT_MAX + 1] = {
1576	[SEG6_LOCAL_CNT_PACKETS]	= { .type = NLA_U64 },
1577	[SEG6_LOCAL_CNT_BYTES]		= { .type = NLA_U64 },
1578	[SEG6_LOCAL_CNT_ERRORS]		= { .type = NLA_U64 },
1579};
1580
1581static int parse_nla_counters(struct nlattr **attrs,
1582			      struct seg6_local_lwt *slwt,
1583			      struct netlink_ext_ack *extack)
1584{
1585	struct pcpu_seg6_local_counters __percpu *pcounters;
1586	struct nlattr *tb[SEG6_LOCAL_CNT_MAX + 1];
1587	int ret;
1588
1589	ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_CNT_MAX,
1590					  attrs[SEG6_LOCAL_COUNTERS],
1591					  seg6_local_counters_policy, NULL);
1592	if (ret < 0)
1593		return ret;
1594
1595	/* basic support for SRv6 Behavior counters requires at least:
1596	 * packets, bytes and errors.
1597	 */
1598	if (!tb[SEG6_LOCAL_CNT_PACKETS] || !tb[SEG6_LOCAL_CNT_BYTES] ||
1599	    !tb[SEG6_LOCAL_CNT_ERRORS])
1600		return -EINVAL;
1601
1602	/* counters are always zero initialized */
1603	pcounters = seg6_local_alloc_pcpu_counters(GFP_KERNEL);
1604	if (!pcounters)
1605		return -ENOMEM;
1606
1607	slwt->pcpu_counters = pcounters;
1608
1609	return 0;
1610}
1611
1612static int seg6_local_fill_nla_counters(struct sk_buff *skb,
1613					struct seg6_local_counters *counters)
1614{
1615	if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_PACKETS, counters->packets,
1616			      SEG6_LOCAL_CNT_PAD))
1617		return -EMSGSIZE;
1618
1619	if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_BYTES, counters->bytes,
1620			      SEG6_LOCAL_CNT_PAD))
1621		return -EMSGSIZE;
1622
1623	if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_ERRORS, counters->errors,
1624			      SEG6_LOCAL_CNT_PAD))
1625		return -EMSGSIZE;
1626
1627	return 0;
1628}
1629
1630static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1631{
1632	struct seg6_local_counters counters = { 0, 0, 0 };
1633	struct nlattr *nest;
1634	int rc, i;
1635
1636	nest = nla_nest_start(skb, SEG6_LOCAL_COUNTERS);
1637	if (!nest)
1638		return -EMSGSIZE;
1639
1640	for_each_possible_cpu(i) {
1641		struct pcpu_seg6_local_counters *pcounters;
1642		u64 packets, bytes, errors;
1643		unsigned int start;
1644
1645		pcounters = per_cpu_ptr(slwt->pcpu_counters, i);
1646		do {
1647			start = u64_stats_fetch_begin(&pcounters->syncp);
1648
1649			packets = u64_stats_read(&pcounters->packets);
1650			bytes = u64_stats_read(&pcounters->bytes);
1651			errors = u64_stats_read(&pcounters->errors);
1652
1653		} while (u64_stats_fetch_retry(&pcounters->syncp, start));
1654
1655		counters.packets += packets;
1656		counters.bytes += bytes;
1657		counters.errors += errors;
1658	}
1659
1660	rc = seg6_local_fill_nla_counters(skb, &counters);
1661	if (rc < 0) {
1662		nla_nest_cancel(skb, nest);
1663		return rc;
1664	}
1665
1666	return nla_nest_end(skb, nest);
1667}
1668
1669static int cmp_nla_counters(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1670{
1671	/* a and b are equal if both have pcpu_counters set or not */
1672	return (!!((unsigned long)a->pcpu_counters)) ^
1673		(!!((unsigned long)b->pcpu_counters));
1674}
1675
1676static void destroy_attr_counters(struct seg6_local_lwt *slwt)
1677{
1678	free_percpu(slwt->pcpu_counters);
1679}
1680
1681static const
1682struct nla_policy seg6_local_flavors_policy[SEG6_LOCAL_FLV_MAX + 1] = {
1683	[SEG6_LOCAL_FLV_OPERATION]	= { .type = NLA_U32 },
1684	[SEG6_LOCAL_FLV_LCBLOCK_BITS]	= { .type = NLA_U8 },
1685	[SEG6_LOCAL_FLV_LCNODE_FN_BITS]	= { .type = NLA_U8 },
1686};
1687
1688/* check whether the lengths of the Locator-Block and Locator-Node Function
1689 * are compatible with the dimension of a C-SID container.
1690 */
1691static int seg6_chk_next_csid_cfg(__u8 block_len, __u8 func_len)
1692{
1693	/* Locator-Block and Locator-Node Function cannot exceed 128 bits
1694	 * (i.e. C-SID container lenghts).
1695	 */
1696	if (next_csid_chk_cntr_bits(block_len, func_len))
1697		return -EINVAL;
1698
1699	/* Locator-Block length must be greater than zero and evenly divisible
1700	 * by 8. There must be room for a Locator-Node Function, at least.
1701	 */
1702	if (next_csid_chk_lcblock_bits(block_len))
1703		return -EINVAL;
1704
1705	/* Locator-Node Function length must be greater than zero and evenly
1706	 * divisible by 8. There must be room for the Locator-Block.
1707	 */
1708	if (next_csid_chk_lcnode_fn_bits(func_len))
1709		return -EINVAL;
1710
1711	return 0;
1712}
1713
1714static int seg6_parse_nla_next_csid_cfg(struct nlattr **tb,
1715					struct seg6_flavors_info *finfo,
1716					struct netlink_ext_ack *extack)
1717{
1718	__u8 func_len = SEG6_LOCAL_LCNODE_FN_DBITS;
1719	__u8 block_len = SEG6_LOCAL_LCBLOCK_DBITS;
1720	int rc;
1721
1722	if (tb[SEG6_LOCAL_FLV_LCBLOCK_BITS])
1723		block_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCBLOCK_BITS]);
1724
1725	if (tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS])
1726		func_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS]);
1727
1728	rc = seg6_chk_next_csid_cfg(block_len, func_len);
1729	if (rc < 0) {
1730		NL_SET_ERR_MSG(extack,
1731			       "Invalid Locator Block/Node Function lengths");
1732		return rc;
1733	}
1734
1735	finfo->lcblock_bits = block_len;
1736	finfo->lcnode_func_bits = func_len;
1737
1738	return 0;
1739}
1740
1741static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1742			     struct netlink_ext_ack *extack)
1743{
1744	struct seg6_flavors_info *finfo = &slwt->flv_info;
1745	struct nlattr *tb[SEG6_LOCAL_FLV_MAX + 1];
1746	unsigned long fops;
1747	int rc;
1748
1749	rc = nla_parse_nested_deprecated(tb, SEG6_LOCAL_FLV_MAX,
1750					 attrs[SEG6_LOCAL_FLAVORS],
1751					 seg6_local_flavors_policy, NULL);
1752	if (rc < 0)
1753		return rc;
1754
1755	/* this attribute MUST always be present since it represents the Flavor
1756	 * operation(s) to be carried out.
1757	 */
1758	if (!tb[SEG6_LOCAL_FLV_OPERATION])
1759		return -EINVAL;
1760
1761	fops = nla_get_u32(tb[SEG6_LOCAL_FLV_OPERATION]);
1762	if (fops & ~SEG6_LOCAL_FLV_SUPP_OPS) {
1763		NL_SET_ERR_MSG(extack, "Unsupported Flavor operation(s)");
1764		return -EOPNOTSUPP;
1765	}
1766
1767	finfo->flv_ops = fops;
1768
1769	if (seg6_next_csid_enabled(fops)) {
1770		/* Locator-Block and Locator-Node Function lengths can be
1771		 * provided by the user space. Otherwise, default values are
1772		 * applied.
1773		 */
1774		rc = seg6_parse_nla_next_csid_cfg(tb, finfo, extack);
1775		if (rc < 0)
1776			return rc;
1777	}
1778
1779	return 0;
1780}
1781
1782static int seg6_fill_nla_next_csid_cfg(struct sk_buff *skb,
1783				       struct seg6_flavors_info *finfo)
1784{
1785	if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCBLOCK_BITS, finfo->lcblock_bits))
1786		return -EMSGSIZE;
1787
1788	if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCNODE_FN_BITS,
1789		       finfo->lcnode_func_bits))
1790		return -EMSGSIZE;
1791
1792	return 0;
1793}
1794
1795static int put_nla_flavors(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1796{
1797	struct seg6_flavors_info *finfo = &slwt->flv_info;
1798	__u32 fops = finfo->flv_ops;
1799	struct nlattr *nest;
1800	int rc;
1801
1802	nest = nla_nest_start(skb, SEG6_LOCAL_FLAVORS);
1803	if (!nest)
1804		return -EMSGSIZE;
1805
1806	if (nla_put_u32(skb, SEG6_LOCAL_FLV_OPERATION, fops)) {
1807		rc = -EMSGSIZE;
1808		goto err;
1809	}
1810
1811	if (seg6_next_csid_enabled(fops)) {
1812		rc = seg6_fill_nla_next_csid_cfg(skb, finfo);
1813		if (rc < 0)
1814			goto err;
1815	}
1816
1817	return nla_nest_end(skb, nest);
1818
1819err:
1820	nla_nest_cancel(skb, nest);
1821	return rc;
1822}
1823
1824static int seg6_cmp_nla_next_csid_cfg(struct seg6_flavors_info *finfo_a,
1825				      struct seg6_flavors_info *finfo_b)
1826{
1827	if (finfo_a->lcblock_bits != finfo_b->lcblock_bits)
1828		return 1;
1829
1830	if (finfo_a->lcnode_func_bits != finfo_b->lcnode_func_bits)
1831		return 1;
1832
1833	return 0;
1834}
1835
1836static int cmp_nla_flavors(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1837{
1838	struct seg6_flavors_info *finfo_a = &a->flv_info;
1839	struct seg6_flavors_info *finfo_b = &b->flv_info;
1840
1841	if (finfo_a->flv_ops != finfo_b->flv_ops)
1842		return 1;
1843
1844	if (seg6_next_csid_enabled(finfo_a->flv_ops)) {
1845		if (seg6_cmp_nla_next_csid_cfg(finfo_a, finfo_b))
1846			return 1;
1847	}
1848
1849	return 0;
1850}
1851
1852static int encap_size_flavors(struct seg6_local_lwt *slwt)
1853{
1854	struct seg6_flavors_info *finfo = &slwt->flv_info;
1855	int nlsize;
1856
1857	nlsize = nla_total_size(0) +	/* nest SEG6_LOCAL_FLAVORS */
1858		 nla_total_size(4);	/* SEG6_LOCAL_FLV_OPERATION */
1859
1860	if (seg6_next_csid_enabled(finfo->flv_ops))
1861		nlsize += nla_total_size(1) + /* SEG6_LOCAL_FLV_LCBLOCK_BITS */
1862			  nla_total_size(1); /* SEG6_LOCAL_FLV_LCNODE_FN_BITS */
1863
1864	return nlsize;
1865}
1866
1867struct seg6_action_param {
1868	int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1869		     struct netlink_ext_ack *extack);
1870	int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
1871	int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
1872
1873	/* optional destroy() callback useful for releasing resources which
1874	 * have been previously acquired in the corresponding parse()
1875	 * function.
1876	 */
1877	void (*destroy)(struct seg6_local_lwt *slwt);
1878};
1879
1880static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
1881	[SEG6_LOCAL_SRH]	= { .parse = parse_nla_srh,
1882				    .put = put_nla_srh,
1883				    .cmp = cmp_nla_srh,
1884				    .destroy = destroy_attr_srh },
1885
1886	[SEG6_LOCAL_TABLE]	= { .parse = parse_nla_table,
1887				    .put = put_nla_table,
1888				    .cmp = cmp_nla_table },
1889
1890	[SEG6_LOCAL_NH4]	= { .parse = parse_nla_nh4,
1891				    .put = put_nla_nh4,
1892				    .cmp = cmp_nla_nh4 },
1893
1894	[SEG6_LOCAL_NH6]	= { .parse = parse_nla_nh6,
1895				    .put = put_nla_nh6,
1896				    .cmp = cmp_nla_nh6 },
1897
1898	[SEG6_LOCAL_IIF]	= { .parse = parse_nla_iif,
1899				    .put = put_nla_iif,
1900				    .cmp = cmp_nla_iif },
1901
1902	[SEG6_LOCAL_OIF]	= { .parse = parse_nla_oif,
1903				    .put = put_nla_oif,
1904				    .cmp = cmp_nla_oif },
1905
1906	[SEG6_LOCAL_BPF]	= { .parse = parse_nla_bpf,
1907				    .put = put_nla_bpf,
1908				    .cmp = cmp_nla_bpf,
1909				    .destroy = destroy_attr_bpf },
1910
1911	[SEG6_LOCAL_VRFTABLE]	= { .parse = parse_nla_vrftable,
1912				    .put = put_nla_vrftable,
1913				    .cmp = cmp_nla_vrftable },
1914
1915	[SEG6_LOCAL_COUNTERS]	= { .parse = parse_nla_counters,
1916				    .put = put_nla_counters,
1917				    .cmp = cmp_nla_counters,
1918				    .destroy = destroy_attr_counters },
1919
1920	[SEG6_LOCAL_FLAVORS]	= { .parse = parse_nla_flavors,
1921				    .put = put_nla_flavors,
1922				    .cmp = cmp_nla_flavors },
1923};
1924
1925/* call the destroy() callback (if available) for each set attribute in
1926 * @parsed_attrs, starting from the first attribute up to the @max_parsed
1927 * (excluded) attribute.
1928 */
1929static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed,
1930			    struct seg6_local_lwt *slwt)
1931{
1932	struct seg6_action_param *param;
1933	int i;
1934
1935	/* Every required seg6local attribute is identified by an ID which is
1936	 * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask;
1937	 *
1938	 * We scan the 'parsed_attrs' bitmask, starting from the first attribute
1939	 * up to the @max_parsed (excluded) attribute.
1940	 * For each set attribute, we retrieve the corresponding destroy()
1941	 * callback. If the callback is not available, then we skip to the next
1942	 * attribute; otherwise, we call the destroy() callback.
1943	 */
1944	for (i = SEG6_LOCAL_SRH; i < max_parsed; ++i) {
1945		if (!(parsed_attrs & SEG6_F_ATTR(i)))
1946			continue;
1947
1948		param = &seg6_action_params[i];
1949
1950		if (param->destroy)
1951			param->destroy(slwt);
1952	}
1953}
1954
1955/* release all the resources that may have been acquired during parsing
1956 * operations.
1957 */
1958static void destroy_attrs(struct seg6_local_lwt *slwt)
1959{
1960	unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs;
1961
1962	__destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt);
1963}
1964
1965static int parse_nla_optional_attrs(struct nlattr **attrs,
1966				    struct seg6_local_lwt *slwt,
1967				    struct netlink_ext_ack *extack)
1968{
1969	struct seg6_action_desc *desc = slwt->desc;
1970	unsigned long parsed_optattrs = 0;
1971	struct seg6_action_param *param;
1972	int err, i;
1973
1974	for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; ++i) {
1975		if (!(desc->optattrs & SEG6_F_ATTR(i)) || !attrs[i])
1976			continue;
1977
1978		/* once here, the i-th attribute is provided by the
1979		 * userspace AND it is identified optional as well.
1980		 */
1981		param = &seg6_action_params[i];
1982
1983		err = param->parse(attrs, slwt, extack);
1984		if (err < 0)
1985			goto parse_optattrs_err;
1986
1987		/* current attribute has been correctly parsed */
1988		parsed_optattrs |= SEG6_F_ATTR(i);
1989	}
1990
1991	/* store in the tunnel state all the optional attributed successfully
1992	 * parsed.
1993	 */
1994	slwt->parsed_optattrs = parsed_optattrs;
1995
1996	return 0;
1997
1998parse_optattrs_err:
1999	__destroy_attrs(parsed_optattrs, i, slwt);
2000
2001	return err;
2002}
2003
2004/* call the custom constructor of the behavior during its initialization phase
2005 * and after that all its attributes have been parsed successfully.
2006 */
2007static int
2008seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg,
2009				struct netlink_ext_ack *extack)
2010{
2011	struct seg6_action_desc *desc = slwt->desc;
2012	struct seg6_local_lwtunnel_ops *ops;
2013
2014	ops = &desc->slwt_ops;
2015	if (!ops->build_state)
2016		return 0;
2017
2018	return ops->build_state(slwt, cfg, extack);
2019}
2020
2021/* call the custom destructor of the behavior which is invoked before the
2022 * tunnel is going to be destroyed.
2023 */
2024static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt)
2025{
2026	struct seg6_action_desc *desc = slwt->desc;
2027	struct seg6_local_lwtunnel_ops *ops;
2028
2029	ops = &desc->slwt_ops;
2030	if (!ops->destroy_state)
2031		return;
2032
2033	ops->destroy_state(slwt);
2034}
2035
2036static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt,
2037			    struct netlink_ext_ack *extack)
2038{
2039	struct seg6_action_param *param;
2040	struct seg6_action_desc *desc;
2041	unsigned long invalid_attrs;
2042	int i, err;
2043
2044	desc = __get_action_desc(slwt->action);
2045	if (!desc)
2046		return -EINVAL;
2047
2048	if (!desc->input)
2049		return -EOPNOTSUPP;
2050
2051	slwt->desc = desc;
2052	slwt->headroom += desc->static_headroom;
2053
2054	/* Forcing the desc->optattrs *set* and the desc->attrs *set* to be
2055	 * disjoined, this allow us to release acquired resources by optional
2056	 * attributes and by required attributes independently from each other
2057	 * without any interference.
2058	 * In other terms, we are sure that we do not release some the acquired
2059	 * resources twice.
2060	 *
2061	 * Note that if an attribute is configured both as required and as
2062	 * optional, it means that the user has messed something up in the
2063	 * seg6_action_table. Therefore, this check is required for SRv6
2064	 * behaviors to work properly.
2065	 */
2066	invalid_attrs = desc->attrs & desc->optattrs;
2067	if (invalid_attrs) {
2068		WARN_ONCE(1,
2069			  "An attribute cannot be both required AND optional");
2070		return -EINVAL;
2071	}
2072
2073	/* parse the required attributes */
2074	for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
2075		if (desc->attrs & SEG6_F_ATTR(i)) {
2076			if (!attrs[i])
2077				return -EINVAL;
2078
2079			param = &seg6_action_params[i];
2080
2081			err = param->parse(attrs, slwt, extack);
2082			if (err < 0)
2083				goto parse_attrs_err;
2084		}
2085	}
2086
2087	/* parse the optional attributes, if any */
2088	err = parse_nla_optional_attrs(attrs, slwt, extack);
2089	if (err < 0)
2090		goto parse_attrs_err;
2091
2092	return 0;
2093
2094parse_attrs_err:
2095	/* release any resource that may have been acquired during the i-1
2096	 * parse() operations.
2097	 */
2098	__destroy_attrs(desc->attrs, i, slwt);
2099
2100	return err;
2101}
2102
2103static int seg6_local_build_state(struct net *net, struct nlattr *nla,
2104				  unsigned int family, const void *cfg,
2105				  struct lwtunnel_state **ts,
2106				  struct netlink_ext_ack *extack)
2107{
2108	struct nlattr *tb[SEG6_LOCAL_MAX + 1];
2109	struct lwtunnel_state *newts;
2110	struct seg6_local_lwt *slwt;
2111	int err;
2112
2113	if (family != AF_INET6)
2114		return -EINVAL;
2115
2116	err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla,
2117					  seg6_local_policy, extack);
2118
2119	if (err < 0)
2120		return err;
2121
2122	if (!tb[SEG6_LOCAL_ACTION])
2123		return -EINVAL;
2124
2125	newts = lwtunnel_state_alloc(sizeof(*slwt));
2126	if (!newts)
2127		return -ENOMEM;
2128
2129	slwt = seg6_local_lwtunnel(newts);
2130	slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
2131
2132	err = parse_nla_action(tb, slwt, extack);
2133	if (err < 0)
2134		goto out_free;
2135
2136	err = seg6_local_lwtunnel_build_state(slwt, cfg, extack);
2137	if (err < 0)
2138		goto out_destroy_attrs;
2139
2140	newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
2141	newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
2142	newts->headroom = slwt->headroom;
2143
2144	*ts = newts;
2145
2146	return 0;
2147
2148out_destroy_attrs:
2149	destroy_attrs(slwt);
2150out_free:
2151	kfree(newts);
2152	return err;
2153}
2154
2155static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
2156{
2157	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
2158
2159	seg6_local_lwtunnel_destroy_state(slwt);
2160
2161	destroy_attrs(slwt);
2162
2163	return;
2164}
2165
2166static int seg6_local_fill_encap(struct sk_buff *skb,
2167				 struct lwtunnel_state *lwt)
2168{
2169	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
2170	struct seg6_action_param *param;
2171	unsigned long attrs;
2172	int i, err;
2173
2174	if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
2175		return -EMSGSIZE;
2176
2177	attrs = slwt->desc->attrs | slwt->parsed_optattrs;
2178
2179	for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
2180		if (attrs & SEG6_F_ATTR(i)) {
2181			param = &seg6_action_params[i];
2182			err = param->put(skb, slwt);
2183			if (err < 0)
2184				return err;
2185		}
2186	}
2187
2188	return 0;
2189}
2190
2191static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
2192{
2193	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
2194	unsigned long attrs;
2195	int nlsize;
2196
2197	nlsize = nla_total_size(4); /* action */
2198
2199	attrs = slwt->desc->attrs | slwt->parsed_optattrs;
2200
2201	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_SRH))
2202		nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
2203
2204	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE))
2205		nlsize += nla_total_size(4);
2206
2207	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH4))
2208		nlsize += nla_total_size(4);
2209
2210	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH6))
2211		nlsize += nla_total_size(16);
2212
2213	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_IIF))
2214		nlsize += nla_total_size(4);
2215
2216	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_OIF))
2217		nlsize += nla_total_size(4);
2218
2219	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_BPF))
2220		nlsize += nla_total_size(sizeof(struct nlattr)) +
2221		       nla_total_size(MAX_PROG_NAME) +
2222		       nla_total_size(4);
2223
2224	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE))
2225		nlsize += nla_total_size(4);
2226
2227	if (attrs & SEG6_F_LOCAL_COUNTERS)
2228		nlsize += nla_total_size(0) + /* nest SEG6_LOCAL_COUNTERS */
2229			  /* SEG6_LOCAL_CNT_PACKETS */
2230			  nla_total_size_64bit(sizeof(__u64)) +
2231			  /* SEG6_LOCAL_CNT_BYTES */
2232			  nla_total_size_64bit(sizeof(__u64)) +
2233			  /* SEG6_LOCAL_CNT_ERRORS */
2234			  nla_total_size_64bit(sizeof(__u64));
2235
2236	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_FLAVORS))
2237		nlsize += encap_size_flavors(slwt);
2238
2239	return nlsize;
2240}
2241
2242static int seg6_local_cmp_encap(struct lwtunnel_state *a,
2243				struct lwtunnel_state *b)
2244{
2245	struct seg6_local_lwt *slwt_a, *slwt_b;
2246	struct seg6_action_param *param;
2247	unsigned long attrs_a, attrs_b;
2248	int i;
2249
2250	slwt_a = seg6_local_lwtunnel(a);
2251	slwt_b = seg6_local_lwtunnel(b);
2252
2253	if (slwt_a->action != slwt_b->action)
2254		return 1;
2255
2256	attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs;
2257	attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs;
2258
2259	if (attrs_a != attrs_b)
2260		return 1;
2261
2262	for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
2263		if (attrs_a & SEG6_F_ATTR(i)) {
2264			param = &seg6_action_params[i];
2265			if (param->cmp(slwt_a, slwt_b))
2266				return 1;
2267		}
2268	}
2269
2270	return 0;
2271}
2272
2273static const struct lwtunnel_encap_ops seg6_local_ops = {
2274	.build_state	= seg6_local_build_state,
2275	.destroy_state	= seg6_local_destroy_state,
2276	.input		= seg6_local_input,
2277	.fill_encap	= seg6_local_fill_encap,
2278	.get_encap_size	= seg6_local_get_encap_size,
2279	.cmp_encap	= seg6_local_cmp_encap,
2280	.owner		= THIS_MODULE,
2281};
2282
2283int __init seg6_local_init(void)
2284{
2285	/* If the max total number of defined attributes is reached, then your
2286	 * kernel build stops here.
2287	 *
2288	 * This check is required to avoid arithmetic overflows when processing
2289	 * behavior attributes and the maximum number of defined attributes
2290	 * exceeds the allowed value.
2291	 */
2292	BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long));
2293
2294	/* If the default NEXT-C-SID Locator-Block/Node Function lengths (in
2295	 * bits) have been changed with invalid values, kernel build stops
2296	 * here.
2297	 */
2298	BUILD_BUG_ON(next_csid_chk_cntr_bits(SEG6_LOCAL_LCBLOCK_DBITS,
2299					     SEG6_LOCAL_LCNODE_FN_DBITS));
2300	BUILD_BUG_ON(next_csid_chk_lcblock_bits(SEG6_LOCAL_LCBLOCK_DBITS));
2301	BUILD_BUG_ON(next_csid_chk_lcnode_fn_bits(SEG6_LOCAL_LCNODE_FN_DBITS));
2302
2303	return lwtunnel_encap_add_ops(&seg6_local_ops,
2304				      LWTUNNEL_ENCAP_SEG6_LOCAL);
2305}
2306
2307void seg6_local_exit(void)
2308{
2309	lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);
2310}