Linux Audio

Check our new training course

Loading...
v6.9.4
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	Linux IPv6 multicast routing support for BSD pim6sd
   4 *	Based on net/ipv4/ipmr.c.
   5 *
   6 *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   7 *		LSIIT Laboratory, Strasbourg, France
   8 *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   9 *		6WIND, Paris, France
  10 *	Copyright (C)2007,2008 USAGI/WIDE Project
  11 *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  12 */
  13
  14#include <linux/uaccess.h>
  15#include <linux/types.h>
  16#include <linux/sched.h>
  17#include <linux/errno.h>
  18#include <linux/mm.h>
  19#include <linux/kernel.h>
  20#include <linux/fcntl.h>
  21#include <linux/stat.h>
  22#include <linux/socket.h>
  23#include <linux/inet.h>
  24#include <linux/netdevice.h>
  25#include <linux/inetdevice.h>
  26#include <linux/proc_fs.h>
  27#include <linux/seq_file.h>
  28#include <linux/init.h>
  29#include <linux/compat.h>
  30#include <linux/rhashtable.h>
  31#include <net/protocol.h>
  32#include <linux/skbuff.h>
  33#include <net/raw.h>
  34#include <linux/notifier.h>
  35#include <linux/if_arp.h>
  36#include <net/checksum.h>
  37#include <net/netlink.h>
  38#include <net/fib_rules.h>
  39
  40#include <net/ipv6.h>
  41#include <net/ip6_route.h>
  42#include <linux/mroute6.h>
  43#include <linux/pim.h>
  44#include <net/addrconf.h>
  45#include <linux/netfilter_ipv6.h>
  46#include <linux/export.h>
  47#include <net/ip6_checksum.h>
  48#include <linux/netconf.h>
  49#include <net/ip_tunnels.h>
  50
  51#include <linux/nospec.h>
  52
  53struct ip6mr_rule {
  54	struct fib_rule		common;
  55};
  56
  57struct ip6mr_result {
  58	struct mr_table	*mrt;
  59};
  60
  61/* Big lock, protecting vif table, mrt cache and mroute socket state.
  62   Note that the changes are semaphored via rtnl_lock.
  63 */
  64
  65static DEFINE_SPINLOCK(mrt_lock);
  66
  67static struct net_device *vif_dev_read(const struct vif_device *vif)
  68{
  69	return rcu_dereference(vif->dev);
  70}
  71
  72/* Multicast router control variables */
  73
  74/* Special spinlock for queue of unresolved entries */
  75static DEFINE_SPINLOCK(mfc_unres_lock);
  76
  77/* We return to original Alan's scheme. Hash table of resolved
  78   entries is changed only in process context and protected
  79   with weak lock mrt_lock. Queue of unresolved entries is protected
  80   with strong spinlock mfc_unres_lock.
  81
  82   In this case data path is free of exclusive locks at all.
  83 */
  84
  85static struct kmem_cache *mrt_cachep __read_mostly;
  86
  87static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
  88static void ip6mr_free_table(struct mr_table *mrt);
  89
  90static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
  91			   struct net_device *dev, struct sk_buff *skb,
  92			   struct mfc6_cache *cache);
  93static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
  94			      mifi_t mifi, int assert);
  95static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
  96			      int cmd);
  97static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
  98static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
  99			      struct netlink_ext_ack *extack);
 100static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 101			       struct netlink_callback *cb);
 102static void mroute_clean_tables(struct mr_table *mrt, int flags);
 103static void ipmr_expire_process(struct timer_list *t);
 104
 105#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 106#define ip6mr_for_each_table(mrt, net) \
 107	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
 108				lockdep_rtnl_is_held() || \
 109				list_empty(&net->ipv6.mr6_tables))
 110
 111static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 112					    struct mr_table *mrt)
 113{
 114	struct mr_table *ret;
 115
 116	if (!mrt)
 117		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
 118				     struct mr_table, list);
 119	else
 120		ret = list_entry_rcu(mrt->list.next,
 121				     struct mr_table, list);
 122
 123	if (&ret->list == &net->ipv6.mr6_tables)
 124		return NULL;
 125	return ret;
 126}
 127
 128static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 129{
 130	struct mr_table *mrt;
 131
 132	ip6mr_for_each_table(mrt, net) {
 133		if (mrt->id == id)
 134			return mrt;
 135	}
 136	return NULL;
 137}
 138
 139static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 140			    struct mr_table **mrt)
 141{
 142	int err;
 143	struct ip6mr_result res;
 144	struct fib_lookup_arg arg = {
 145		.result = &res,
 146		.flags = FIB_LOOKUP_NOREF,
 147	};
 148
 149	/* update flow if oif or iif point to device enslaved to l3mdev */
 150	l3mdev_update_flow(net, flowi6_to_flowi(flp6));
 151
 152	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 153			       flowi6_to_flowi(flp6), 0, &arg);
 154	if (err < 0)
 155		return err;
 156	*mrt = res.mrt;
 157	return 0;
 158}
 159
 160static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 161			     int flags, struct fib_lookup_arg *arg)
 162{
 163	struct ip6mr_result *res = arg->result;
 164	struct mr_table *mrt;
 165
 166	switch (rule->action) {
 167	case FR_ACT_TO_TBL:
 168		break;
 169	case FR_ACT_UNREACHABLE:
 170		return -ENETUNREACH;
 171	case FR_ACT_PROHIBIT:
 172		return -EACCES;
 173	case FR_ACT_BLACKHOLE:
 174	default:
 175		return -EINVAL;
 176	}
 177
 178	arg->table = fib_rule_get_table(rule, arg);
 179
 180	mrt = ip6mr_get_table(rule->fr_net, arg->table);
 181	if (!mrt)
 182		return -EAGAIN;
 183	res->mrt = mrt;
 184	return 0;
 185}
 186
 187static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 188{
 189	return 1;
 190}
 191
 192static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 193				struct fib_rule_hdr *frh, struct nlattr **tb,
 194				struct netlink_ext_ack *extack)
 195{
 196	return 0;
 197}
 198
 199static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 200			      struct nlattr **tb)
 201{
 202	return 1;
 203}
 204
 205static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 206			   struct fib_rule_hdr *frh)
 207{
 208	frh->dst_len = 0;
 209	frh->src_len = 0;
 210	frh->tos     = 0;
 211	return 0;
 212}
 213
 214static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 215	.family		= RTNL_FAMILY_IP6MR,
 216	.rule_size	= sizeof(struct ip6mr_rule),
 217	.addr_size	= sizeof(struct in6_addr),
 218	.action		= ip6mr_rule_action,
 219	.match		= ip6mr_rule_match,
 220	.configure	= ip6mr_rule_configure,
 221	.compare	= ip6mr_rule_compare,
 222	.fill		= ip6mr_rule_fill,
 223	.nlgroup	= RTNLGRP_IPV6_RULE,
 224	.owner		= THIS_MODULE,
 225};
 226
 227static int __net_init ip6mr_rules_init(struct net *net)
 228{
 229	struct fib_rules_ops *ops;
 230	struct mr_table *mrt;
 231	int err;
 232
 233	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 234	if (IS_ERR(ops))
 235		return PTR_ERR(ops);
 236
 237	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 238
 239	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 240	if (IS_ERR(mrt)) {
 241		err = PTR_ERR(mrt);
 242		goto err1;
 243	}
 244
 245	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT);
 246	if (err < 0)
 247		goto err2;
 248
 249	net->ipv6.mr6_rules_ops = ops;
 250	return 0;
 251
 252err2:
 253	rtnl_lock();
 254	ip6mr_free_table(mrt);
 255	rtnl_unlock();
 256err1:
 257	fib_rules_unregister(ops);
 258	return err;
 259}
 260
 261static void __net_exit ip6mr_rules_exit(struct net *net)
 262{
 263	struct mr_table *mrt, *next;
 264
 265	ASSERT_RTNL();
 266	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 267		list_del(&mrt->list);
 268		ip6mr_free_table(mrt);
 269	}
 270	fib_rules_unregister(net->ipv6.mr6_rules_ops);
 271}
 272
 273static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
 274			    struct netlink_ext_ack *extack)
 275{
 276	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
 277}
 278
 279static unsigned int ip6mr_rules_seq_read(struct net *net)
 280{
 281	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
 282}
 283
 284bool ip6mr_rule_default(const struct fib_rule *rule)
 285{
 286	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
 287	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
 288}
 289EXPORT_SYMBOL(ip6mr_rule_default);
 290#else
 291#define ip6mr_for_each_table(mrt, net) \
 292	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 293
 294static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 295					    struct mr_table *mrt)
 296{
 297	if (!mrt)
 298		return net->ipv6.mrt6;
 299	return NULL;
 300}
 301
 302static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 303{
 304	return net->ipv6.mrt6;
 305}
 306
 307static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 308			    struct mr_table **mrt)
 309{
 310	*mrt = net->ipv6.mrt6;
 311	return 0;
 312}
 313
 314static int __net_init ip6mr_rules_init(struct net *net)
 315{
 316	struct mr_table *mrt;
 317
 318	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 319	if (IS_ERR(mrt))
 320		return PTR_ERR(mrt);
 321	net->ipv6.mrt6 = mrt;
 322	return 0;
 323}
 324
 325static void __net_exit ip6mr_rules_exit(struct net *net)
 326{
 327	ASSERT_RTNL();
 328	ip6mr_free_table(net->ipv6.mrt6);
 329	net->ipv6.mrt6 = NULL;
 330}
 331
 332static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
 333			    struct netlink_ext_ack *extack)
 334{
 335	return 0;
 336}
 337
 338static unsigned int ip6mr_rules_seq_read(struct net *net)
 339{
 340	return 0;
 341}
 342#endif
 343
 344static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
 345			  const void *ptr)
 346{
 347	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
 348	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 349
 350	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
 351	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
 352}
 353
 354static const struct rhashtable_params ip6mr_rht_params = {
 355	.head_offset = offsetof(struct mr_mfc, mnode),
 356	.key_offset = offsetof(struct mfc6_cache, cmparg),
 357	.key_len = sizeof(struct mfc6_cache_cmp_arg),
 358	.nelem_hint = 3,
 359	.obj_cmpfn = ip6mr_hash_cmp,
 360	.automatic_shrinking = true,
 361};
 362
 363static void ip6mr_new_table_set(struct mr_table *mrt,
 364				struct net *net)
 365{
 366#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 367	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 368#endif
 369}
 370
 371static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
 372	.mf6c_origin = IN6ADDR_ANY_INIT,
 373	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 374};
 375
 376static struct mr_table_ops ip6mr_mr_table_ops = {
 377	.rht_params = &ip6mr_rht_params,
 378	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
 379};
 380
 381static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 382{
 383	struct mr_table *mrt;
 384
 385	mrt = ip6mr_get_table(net, id);
 386	if (mrt)
 387		return mrt;
 388
 389	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
 390			      ipmr_expire_process, ip6mr_new_table_set);
 391}
 392
 393static void ip6mr_free_table(struct mr_table *mrt)
 394{
 395	timer_shutdown_sync(&mrt->ipmr_expire_timer);
 396	mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
 397				 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
 398	rhltable_destroy(&mrt->mfc_hash);
 399	kfree(mrt);
 400}
 401
 402#ifdef CONFIG_PROC_FS
 403/* The /proc interfaces to multicast routing
 404 * /proc/ip6_mr_cache /proc/ip6_mr_vif
 405 */
 406
 407static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 408	__acquires(RCU)
 409{
 410	struct mr_vif_iter *iter = seq->private;
 411	struct net *net = seq_file_net(seq);
 412	struct mr_table *mrt;
 413
 414	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 415	if (!mrt)
 416		return ERR_PTR(-ENOENT);
 417
 418	iter->mrt = mrt;
 419
 420	rcu_read_lock();
 421	return mr_vif_seq_start(seq, pos);
 422}
 423
 424static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 425	__releases(RCU)
 426{
 427	rcu_read_unlock();
 428}
 429
 430static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 431{
 432	struct mr_vif_iter *iter = seq->private;
 433	struct mr_table *mrt = iter->mrt;
 434
 435	if (v == SEQ_START_TOKEN) {
 436		seq_puts(seq,
 437			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 438	} else {
 439		const struct vif_device *vif = v;
 440		const struct net_device *vif_dev;
 441		const char *name;
 442
 443		vif_dev = vif_dev_read(vif);
 444		name = vif_dev ? vif_dev->name : "none";
 445
 446		seq_printf(seq,
 447			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 448			   vif - mrt->vif_table,
 449			   name, vif->bytes_in, vif->pkt_in,
 450			   vif->bytes_out, vif->pkt_out,
 451			   vif->flags);
 452	}
 453	return 0;
 454}
 455
 456static const struct seq_operations ip6mr_vif_seq_ops = {
 457	.start = ip6mr_vif_seq_start,
 458	.next  = mr_vif_seq_next,
 459	.stop  = ip6mr_vif_seq_stop,
 460	.show  = ip6mr_vif_seq_show,
 461};
 462
 463static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 464{
 465	struct net *net = seq_file_net(seq);
 466	struct mr_table *mrt;
 467
 468	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 469	if (!mrt)
 470		return ERR_PTR(-ENOENT);
 471
 472	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 473}
 474
 475static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 476{
 477	int n;
 478
 479	if (v == SEQ_START_TOKEN) {
 480		seq_puts(seq,
 481			 "Group                            "
 482			 "Origin                           "
 483			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
 484	} else {
 485		const struct mfc6_cache *mfc = v;
 486		const struct mr_mfc_iter *it = seq->private;
 487		struct mr_table *mrt = it->mrt;
 488
 489		seq_printf(seq, "%pI6 %pI6 %-3hd",
 490			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 491			   mfc->_c.mfc_parent);
 492
 493		if (it->cache != &mrt->mfc_unres_queue) {
 494			seq_printf(seq, " %8lu %8lu %8lu",
 495				   mfc->_c.mfc_un.res.pkt,
 496				   mfc->_c.mfc_un.res.bytes,
 497				   mfc->_c.mfc_un.res.wrong_if);
 498			for (n = mfc->_c.mfc_un.res.minvif;
 499			     n < mfc->_c.mfc_un.res.maxvif; n++) {
 500				if (VIF_EXISTS(mrt, n) &&
 501				    mfc->_c.mfc_un.res.ttls[n] < 255)
 502					seq_printf(seq,
 503						   " %2d:%-3d", n,
 504						   mfc->_c.mfc_un.res.ttls[n]);
 505			}
 506		} else {
 507			/* unresolved mfc_caches don't contain
 508			 * pkt, bytes and wrong_if values
 509			 */
 510			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 511		}
 512		seq_putc(seq, '\n');
 513	}
 514	return 0;
 515}
 516
 517static const struct seq_operations ipmr_mfc_seq_ops = {
 518	.start = ipmr_mfc_seq_start,
 519	.next  = mr_mfc_seq_next,
 520	.stop  = mr_mfc_seq_stop,
 521	.show  = ipmr_mfc_seq_show,
 522};
 523#endif
 524
 525#ifdef CONFIG_IPV6_PIMSM_V2
 526
 527static int pim6_rcv(struct sk_buff *skb)
 528{
 529	struct pimreghdr *pim;
 530	struct ipv6hdr   *encap;
 531	struct net_device  *reg_dev = NULL;
 532	struct net *net = dev_net(skb->dev);
 533	struct mr_table *mrt;
 534	struct flowi6 fl6 = {
 535		.flowi6_iif	= skb->dev->ifindex,
 536		.flowi6_mark	= skb->mark,
 537	};
 538	int reg_vif_num;
 539
 540	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 541		goto drop;
 542
 543	pim = (struct pimreghdr *)skb_transport_header(skb);
 544	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
 545	    (pim->flags & PIM_NULL_REGISTER) ||
 546	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 547			     sizeof(*pim), IPPROTO_PIM,
 548			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
 549	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 550		goto drop;
 551
 552	/* check if the inner packet is destined to mcast group */
 553	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 554				   sizeof(*pim));
 555
 556	if (!ipv6_addr_is_multicast(&encap->daddr) ||
 557	    encap->payload_len == 0 ||
 558	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 559		goto drop;
 560
 561	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 562		goto drop;
 563
 564	/* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
 565	reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
 566	if (reg_vif_num >= 0)
 567		reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
 568
 569	if (!reg_dev)
 570		goto drop;
 571
 572	skb->mac_header = skb->network_header;
 573	skb_pull(skb, (u8 *)encap - skb->data);
 574	skb_reset_network_header(skb);
 575	skb->protocol = htons(ETH_P_IPV6);
 576	skb->ip_summed = CHECKSUM_NONE;
 577
 578	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 579
 580	netif_rx(skb);
 581
 582	return 0;
 583 drop:
 584	kfree_skb(skb);
 585	return 0;
 586}
 587
 588static const struct inet6_protocol pim6_protocol = {
 589	.handler	=	pim6_rcv,
 590};
 591
 592/* Service routines creating virtual interfaces: PIMREG */
 593
 594static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 595				      struct net_device *dev)
 596{
 597	struct net *net = dev_net(dev);
 598	struct mr_table *mrt;
 599	struct flowi6 fl6 = {
 600		.flowi6_oif	= dev->ifindex,
 601		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
 602		.flowi6_mark	= skb->mark,
 603	};
 604
 605	if (!pskb_inet_may_pull(skb))
 606		goto tx_err;
 607
 608	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 609		goto tx_err;
 610
 611	DEV_STATS_ADD(dev, tx_bytes, skb->len);
 612	DEV_STATS_INC(dev, tx_packets);
 613	rcu_read_lock();
 614	ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
 615			   MRT6MSG_WHOLEPKT);
 616	rcu_read_unlock();
 617	kfree_skb(skb);
 618	return NETDEV_TX_OK;
 619
 620tx_err:
 621	DEV_STATS_INC(dev, tx_errors);
 622	kfree_skb(skb);
 623	return NETDEV_TX_OK;
 624}
 625
 626static int reg_vif_get_iflink(const struct net_device *dev)
 627{
 628	return 0;
 629}
 630
 631static const struct net_device_ops reg_vif_netdev_ops = {
 632	.ndo_start_xmit	= reg_vif_xmit,
 633	.ndo_get_iflink = reg_vif_get_iflink,
 634};
 635
 636static void reg_vif_setup(struct net_device *dev)
 637{
 638	dev->type		= ARPHRD_PIMREG;
 639	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
 640	dev->flags		= IFF_NOARP;
 641	dev->netdev_ops		= &reg_vif_netdev_ops;
 642	dev->needs_free_netdev	= true;
 643	dev->features		|= NETIF_F_NETNS_LOCAL;
 644}
 645
 646static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 647{
 648	struct net_device *dev;
 649	char name[IFNAMSIZ];
 650
 651	if (mrt->id == RT6_TABLE_DFLT)
 652		sprintf(name, "pim6reg");
 653	else
 654		sprintf(name, "pim6reg%u", mrt->id);
 655
 656	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 657	if (!dev)
 658		return NULL;
 659
 660	dev_net_set(dev, net);
 661
 662	if (register_netdevice(dev)) {
 663		free_netdev(dev);
 664		return NULL;
 665	}
 666
 667	if (dev_open(dev, NULL))
 668		goto failure;
 669
 670	dev_hold(dev);
 671	return dev;
 672
 673failure:
 674	unregister_netdevice(dev);
 675	return NULL;
 676}
 677#endif
 678
 679static int call_ip6mr_vif_entry_notifiers(struct net *net,
 680					  enum fib_event_type event_type,
 681					  struct vif_device *vif,
 682					  struct net_device *vif_dev,
 683					  mifi_t vif_index, u32 tb_id)
 684{
 685	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 686				     vif, vif_dev, vif_index, tb_id,
 687				     &net->ipv6.ipmr_seq);
 688}
 689
 690static int call_ip6mr_mfc_entry_notifiers(struct net *net,
 691					  enum fib_event_type event_type,
 692					  struct mfc6_cache *mfc, u32 tb_id)
 693{
 694	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 695				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
 696}
 697
 698/* Delete a VIF entry */
 699static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 700		       struct list_head *head)
 701{
 702	struct vif_device *v;
 703	struct net_device *dev;
 704	struct inet6_dev *in6_dev;
 705
 706	if (vifi < 0 || vifi >= mrt->maxvif)
 707		return -EADDRNOTAVAIL;
 708
 709	v = &mrt->vif_table[vifi];
 710
 711	dev = rtnl_dereference(v->dev);
 712	if (!dev)
 713		return -EADDRNOTAVAIL;
 714
 715	call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
 716				       FIB_EVENT_VIF_DEL, v, dev,
 717				       vifi, mrt->id);
 718	spin_lock(&mrt_lock);
 719	RCU_INIT_POINTER(v->dev, NULL);
 720
 721#ifdef CONFIG_IPV6_PIMSM_V2
 722	if (vifi == mrt->mroute_reg_vif_num) {
 723		/* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
 724		WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
 725	}
 726#endif
 727
 728	if (vifi + 1 == mrt->maxvif) {
 729		int tmp;
 730		for (tmp = vifi - 1; tmp >= 0; tmp--) {
 731			if (VIF_EXISTS(mrt, tmp))
 732				break;
 733		}
 734		WRITE_ONCE(mrt->maxvif, tmp + 1);
 735	}
 736
 737	spin_unlock(&mrt_lock);
 738
 739	dev_set_allmulti(dev, -1);
 740
 741	in6_dev = __in6_dev_get(dev);
 742	if (in6_dev) {
 743		atomic_dec(&in6_dev->cnf.mc_forwarding);
 744		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 745					     NETCONFA_MC_FORWARDING,
 746					     dev->ifindex, &in6_dev->cnf);
 747	}
 748
 749	if ((v->flags & MIFF_REGISTER) && !notify)
 750		unregister_netdevice_queue(dev, head);
 751
 752	netdev_put(dev, &v->dev_tracker);
 753	return 0;
 754}
 755
 756static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
 757{
 758	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 759
 760	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
 761}
 762
 763static inline void ip6mr_cache_free(struct mfc6_cache *c)
 764{
 765	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 766}
 767
 768/* Destroy an unresolved cache entry, killing queued skbs
 769   and reporting error to netlink readers.
 770 */
 771
 772static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 773{
 774	struct net *net = read_pnet(&mrt->net);
 775	struct sk_buff *skb;
 776
 777	atomic_dec(&mrt->cache_resolve_queue_len);
 778
 779	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 780		if (ipv6_hdr(skb)->version == 0) {
 781			struct nlmsghdr *nlh = skb_pull(skb,
 782							sizeof(struct ipv6hdr));
 783			nlh->nlmsg_type = NLMSG_ERROR;
 784			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 785			skb_trim(skb, nlh->nlmsg_len);
 786			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 787			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 788		} else
 789			kfree_skb(skb);
 790	}
 791
 792	ip6mr_cache_free(c);
 793}
 794
 795
 796/* Timer process for all the unresolved queue. */
 797
 798static void ipmr_do_expire_process(struct mr_table *mrt)
 799{
 800	unsigned long now = jiffies;
 801	unsigned long expires = 10 * HZ;
 802	struct mr_mfc *c, *next;
 803
 804	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 805		if (time_after(c->mfc_un.unres.expires, now)) {
 806			/* not yet... */
 807			unsigned long interval = c->mfc_un.unres.expires - now;
 808			if (interval < expires)
 809				expires = interval;
 810			continue;
 811		}
 812
 813		list_del(&c->list);
 814		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
 815		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 816	}
 817
 818	if (!list_empty(&mrt->mfc_unres_queue))
 819		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 820}
 821
 822static void ipmr_expire_process(struct timer_list *t)
 823{
 824	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 825
 826	if (!spin_trylock(&mfc_unres_lock)) {
 827		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 828		return;
 829	}
 830
 831	if (!list_empty(&mrt->mfc_unres_queue))
 832		ipmr_do_expire_process(mrt);
 833
 834	spin_unlock(&mfc_unres_lock);
 835}
 836
 837/* Fill oifs list. It is called under locked mrt_lock. */
 838
 839static void ip6mr_update_thresholds(struct mr_table *mrt,
 840				    struct mr_mfc *cache,
 841				    unsigned char *ttls)
 842{
 843	int vifi;
 844
 845	cache->mfc_un.res.minvif = MAXMIFS;
 846	cache->mfc_un.res.maxvif = 0;
 847	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 848
 849	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 850		if (VIF_EXISTS(mrt, vifi) &&
 851		    ttls[vifi] && ttls[vifi] < 255) {
 852			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 853			if (cache->mfc_un.res.minvif > vifi)
 854				cache->mfc_un.res.minvif = vifi;
 855			if (cache->mfc_un.res.maxvif <= vifi)
 856				cache->mfc_un.res.maxvif = vifi + 1;
 857		}
 858	}
 859	cache->mfc_un.res.lastuse = jiffies;
 860}
 861
 862static int mif6_add(struct net *net, struct mr_table *mrt,
 863		    struct mif6ctl *vifc, int mrtsock)
 864{
 865	int vifi = vifc->mif6c_mifi;
 866	struct vif_device *v = &mrt->vif_table[vifi];
 867	struct net_device *dev;
 868	struct inet6_dev *in6_dev;
 869	int err;
 870
 871	/* Is vif busy ? */
 872	if (VIF_EXISTS(mrt, vifi))
 873		return -EADDRINUSE;
 874
 875	switch (vifc->mif6c_flags) {
 876#ifdef CONFIG_IPV6_PIMSM_V2
 877	case MIFF_REGISTER:
 878		/*
 879		 * Special Purpose VIF in PIM
 880		 * All the packets will be sent to the daemon
 881		 */
 882		if (mrt->mroute_reg_vif_num >= 0)
 883			return -EADDRINUSE;
 884		dev = ip6mr_reg_vif(net, mrt);
 885		if (!dev)
 886			return -ENOBUFS;
 887		err = dev_set_allmulti(dev, 1);
 888		if (err) {
 889			unregister_netdevice(dev);
 890			dev_put(dev);
 891			return err;
 892		}
 893		break;
 894#endif
 895	case 0:
 896		dev = dev_get_by_index(net, vifc->mif6c_pifi);
 897		if (!dev)
 898			return -EADDRNOTAVAIL;
 899		err = dev_set_allmulti(dev, 1);
 900		if (err) {
 901			dev_put(dev);
 902			return err;
 903		}
 904		break;
 905	default:
 906		return -EINVAL;
 907	}
 908
 909	in6_dev = __in6_dev_get(dev);
 910	if (in6_dev) {
 911		atomic_inc(&in6_dev->cnf.mc_forwarding);
 912		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 913					     NETCONFA_MC_FORWARDING,
 914					     dev->ifindex, &in6_dev->cnf);
 915	}
 916
 917	/* Fill in the VIF structures */
 918	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
 919			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
 920			MIFF_REGISTER);
 921
 922	/* And finish update writing critical data */
 923	spin_lock(&mrt_lock);
 924	rcu_assign_pointer(v->dev, dev);
 925	netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
 926#ifdef CONFIG_IPV6_PIMSM_V2
 927	if (v->flags & MIFF_REGISTER)
 928		WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
 929#endif
 930	if (vifi + 1 > mrt->maxvif)
 931		WRITE_ONCE(mrt->maxvif, vifi + 1);
 932	spin_unlock(&mrt_lock);
 933	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
 934				       v, dev, vifi, mrt->id);
 935	return 0;
 936}
 937
 938static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 939					   const struct in6_addr *origin,
 940					   const struct in6_addr *mcastgrp)
 941{
 942	struct mfc6_cache_cmp_arg arg = {
 943		.mf6c_origin = *origin,
 944		.mf6c_mcastgrp = *mcastgrp,
 945	};
 946
 947	return mr_mfc_find(mrt, &arg);
 948}
 949
 950/* Look for a (*,G) entry */
 951static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 952					       struct in6_addr *mcastgrp,
 953					       mifi_t mifi)
 954{
 955	struct mfc6_cache_cmp_arg arg = {
 956		.mf6c_origin = in6addr_any,
 957		.mf6c_mcastgrp = *mcastgrp,
 958	};
 959
 960	if (ipv6_addr_any(mcastgrp))
 961		return mr_mfc_find_any_parent(mrt, mifi);
 962	return mr_mfc_find_any(mrt, mifi, &arg);
 963}
 964
 965/* Look for a (S,G,iif) entry if parent != -1 */
 966static struct mfc6_cache *
 967ip6mr_cache_find_parent(struct mr_table *mrt,
 968			const struct in6_addr *origin,
 969			const struct in6_addr *mcastgrp,
 970			int parent)
 971{
 972	struct mfc6_cache_cmp_arg arg = {
 973		.mf6c_origin = *origin,
 974		.mf6c_mcastgrp = *mcastgrp,
 975	};
 976
 977	return mr_mfc_find_parent(mrt, &arg, parent);
 978}
 979
 980/* Allocate a multicast cache entry */
 981static struct mfc6_cache *ip6mr_cache_alloc(void)
 982{
 983	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 984	if (!c)
 985		return NULL;
 986	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 987	c->_c.mfc_un.res.minvif = MAXMIFS;
 988	c->_c.free = ip6mr_cache_free_rcu;
 989	refcount_set(&c->_c.mfc_un.res.refcount, 1);
 990	return c;
 991}
 992
 993static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 994{
 995	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 996	if (!c)
 997		return NULL;
 998	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
 999	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1000	return c;
1001}
1002
1003/*
1004 *	A cache entry has gone into a resolved state from queued
1005 */
1006
1007static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1008				struct mfc6_cache *uc, struct mfc6_cache *c)
1009{
1010	struct sk_buff *skb;
1011
1012	/*
1013	 *	Play the pending entries through our router
1014	 */
1015
1016	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1017		if (ipv6_hdr(skb)->version == 0) {
1018			struct nlmsghdr *nlh = skb_pull(skb,
1019							sizeof(struct ipv6hdr));
1020
1021			if (mr_fill_mroute(mrt, skb, &c->_c,
1022					   nlmsg_data(nlh)) > 0) {
1023				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1024			} else {
1025				nlh->nlmsg_type = NLMSG_ERROR;
1026				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1027				skb_trim(skb, nlh->nlmsg_len);
1028				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1029			}
1030			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1031		} else {
1032			rcu_read_lock();
1033			ip6_mr_forward(net, mrt, skb->dev, skb, c);
1034			rcu_read_unlock();
1035		}
1036	}
1037}
1038
1039/*
1040 *	Bounce a cache query up to pim6sd and netlink.
1041 *
1042 *	Called under rcu_read_lock()
1043 */
1044
1045static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1046			      mifi_t mifi, int assert)
1047{
1048	struct sock *mroute6_sk;
1049	struct sk_buff *skb;
1050	struct mrt6msg *msg;
1051	int ret;
1052
1053#ifdef CONFIG_IPV6_PIMSM_V2
1054	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1055		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1056						+sizeof(*msg));
1057	else
1058#endif
1059		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1060
1061	if (!skb)
1062		return -ENOBUFS;
1063
1064	/* I suppose that internal messages
1065	 * do not require checksums */
1066
1067	skb->ip_summed = CHECKSUM_UNNECESSARY;
1068
1069#ifdef CONFIG_IPV6_PIMSM_V2
1070	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1071		/* Ugly, but we have no choice with this interface.
1072		   Duplicate old header, fix length etc.
1073		   And all this only to mangle msg->im6_msgtype and
1074		   to set msg->im6_mbz to "mbz" :-)
1075		 */
1076		__skb_pull(skb, skb_network_offset(pkt));
1077
1078		skb_push(skb, sizeof(*msg));
1079		skb_reset_transport_header(skb);
1080		msg = (struct mrt6msg *)skb_transport_header(skb);
1081		msg->im6_mbz = 0;
1082		msg->im6_msgtype = assert;
1083		if (assert == MRT6MSG_WRMIFWHOLE)
1084			msg->im6_mif = mifi;
1085		else
1086			msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1087		msg->im6_pad = 0;
1088		msg->im6_src = ipv6_hdr(pkt)->saddr;
1089		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1090
1091		skb->ip_summed = CHECKSUM_UNNECESSARY;
1092	} else
1093#endif
1094	{
1095	/*
1096	 *	Copy the IP header
1097	 */
1098
1099	skb_put(skb, sizeof(struct ipv6hdr));
1100	skb_reset_network_header(skb);
1101	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1102
1103	/*
1104	 *	Add our header
1105	 */
1106	skb_put(skb, sizeof(*msg));
1107	skb_reset_transport_header(skb);
1108	msg = (struct mrt6msg *)skb_transport_header(skb);
1109
1110	msg->im6_mbz = 0;
1111	msg->im6_msgtype = assert;
1112	msg->im6_mif = mifi;
1113	msg->im6_pad = 0;
1114	msg->im6_src = ipv6_hdr(pkt)->saddr;
1115	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1116
1117	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1118	skb->ip_summed = CHECKSUM_UNNECESSARY;
1119	}
1120
1121	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1122	if (!mroute6_sk) {
1123		kfree_skb(skb);
1124		return -EINVAL;
1125	}
1126
1127	mrt6msg_netlink_event(mrt, skb);
1128
1129	/* Deliver to user space multicast routing algorithms */
1130	ret = sock_queue_rcv_skb(mroute6_sk, skb);
1131
1132	if (ret < 0) {
1133		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1134		kfree_skb(skb);
1135	}
1136
1137	return ret;
1138}
1139
1140/* Queue a packet for resolution. It gets locked cache entry! */
1141static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1142				  struct sk_buff *skb, struct net_device *dev)
1143{
1144	struct mfc6_cache *c;
1145	bool found = false;
1146	int err;
1147
1148	spin_lock_bh(&mfc_unres_lock);
1149	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1150		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1151		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1152			found = true;
1153			break;
1154		}
1155	}
1156
1157	if (!found) {
1158		/*
1159		 *	Create a new entry if allowable
1160		 */
1161
1162		c = ip6mr_cache_alloc_unres();
1163		if (!c) {
1164			spin_unlock_bh(&mfc_unres_lock);
1165
1166			kfree_skb(skb);
1167			return -ENOBUFS;
1168		}
1169
1170		/* Fill in the new cache entry */
1171		c->_c.mfc_parent = -1;
1172		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1173		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1174
1175		/*
1176		 *	Reflect first query at pim6sd
1177		 */
1178		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1179		if (err < 0) {
1180			/* If the report failed throw the cache entry
1181			   out - Brad Parker
1182			 */
1183			spin_unlock_bh(&mfc_unres_lock);
1184
1185			ip6mr_cache_free(c);
1186			kfree_skb(skb);
1187			return err;
1188		}
1189
1190		atomic_inc(&mrt->cache_resolve_queue_len);
1191		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1192		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1193
1194		ipmr_do_expire_process(mrt);
1195	}
1196
1197	/* See if we can append the packet */
1198	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1199		kfree_skb(skb);
1200		err = -ENOBUFS;
1201	} else {
1202		if (dev) {
1203			skb->dev = dev;
1204			skb->skb_iif = dev->ifindex;
1205		}
1206		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1207		err = 0;
1208	}
1209
1210	spin_unlock_bh(&mfc_unres_lock);
1211	return err;
1212}
1213
1214/*
1215 *	MFC6 cache manipulation by user space
1216 */
1217
1218static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1219			    int parent)
1220{
1221	struct mfc6_cache *c;
1222
1223	/* The entries are added/deleted only under RTNL */
1224	rcu_read_lock();
1225	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1226				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1227	rcu_read_unlock();
1228	if (!c)
1229		return -ENOENT;
1230	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1231	list_del_rcu(&c->_c.list);
1232
1233	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1234				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1235	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1236	mr_cache_put(&c->_c);
1237	return 0;
1238}
1239
1240static int ip6mr_device_event(struct notifier_block *this,
1241			      unsigned long event, void *ptr)
1242{
1243	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1244	struct net *net = dev_net(dev);
1245	struct mr_table *mrt;
1246	struct vif_device *v;
1247	int ct;
1248
1249	if (event != NETDEV_UNREGISTER)
1250		return NOTIFY_DONE;
1251
1252	ip6mr_for_each_table(mrt, net) {
1253		v = &mrt->vif_table[0];
1254		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1255			if (rcu_access_pointer(v->dev) == dev)
1256				mif6_delete(mrt, ct, 1, NULL);
1257		}
1258	}
1259
1260	return NOTIFY_DONE;
1261}
1262
1263static unsigned int ip6mr_seq_read(struct net *net)
1264{
1265	ASSERT_RTNL();
1266
1267	return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1268}
1269
1270static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1271		      struct netlink_ext_ack *extack)
1272{
1273	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1274		       ip6mr_mr_table_iter, extack);
1275}
1276
1277static struct notifier_block ip6_mr_notifier = {
1278	.notifier_call = ip6mr_device_event
1279};
1280
1281static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1282	.family		= RTNL_FAMILY_IP6MR,
1283	.fib_seq_read	= ip6mr_seq_read,
1284	.fib_dump	= ip6mr_dump,
1285	.owner		= THIS_MODULE,
1286};
1287
1288static int __net_init ip6mr_notifier_init(struct net *net)
1289{
1290	struct fib_notifier_ops *ops;
1291
1292	net->ipv6.ipmr_seq = 0;
1293
1294	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1295	if (IS_ERR(ops))
1296		return PTR_ERR(ops);
1297
1298	net->ipv6.ip6mr_notifier_ops = ops;
1299
1300	return 0;
1301}
1302
1303static void __net_exit ip6mr_notifier_exit(struct net *net)
1304{
1305	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1306	net->ipv6.ip6mr_notifier_ops = NULL;
1307}
1308
1309/* Setup for IP multicast routing */
1310static int __net_init ip6mr_net_init(struct net *net)
1311{
1312	int err;
1313
1314	err = ip6mr_notifier_init(net);
1315	if (err)
1316		return err;
1317
1318	err = ip6mr_rules_init(net);
1319	if (err < 0)
1320		goto ip6mr_rules_fail;
1321
1322#ifdef CONFIG_PROC_FS
1323	err = -ENOMEM;
1324	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1325			sizeof(struct mr_vif_iter)))
1326		goto proc_vif_fail;
1327	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1328			sizeof(struct mr_mfc_iter)))
1329		goto proc_cache_fail;
1330#endif
1331
1332	return 0;
1333
1334#ifdef CONFIG_PROC_FS
1335proc_cache_fail:
1336	remove_proc_entry("ip6_mr_vif", net->proc_net);
1337proc_vif_fail:
1338	rtnl_lock();
1339	ip6mr_rules_exit(net);
1340	rtnl_unlock();
1341#endif
1342ip6mr_rules_fail:
1343	ip6mr_notifier_exit(net);
1344	return err;
1345}
1346
1347static void __net_exit ip6mr_net_exit(struct net *net)
1348{
1349#ifdef CONFIG_PROC_FS
1350	remove_proc_entry("ip6_mr_cache", net->proc_net);
1351	remove_proc_entry("ip6_mr_vif", net->proc_net);
1352#endif
1353	ip6mr_notifier_exit(net);
1354}
1355
1356static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
1357{
1358	struct net *net;
1359
1360	rtnl_lock();
1361	list_for_each_entry(net, net_list, exit_list)
1362		ip6mr_rules_exit(net);
1363	rtnl_unlock();
1364}
1365
1366static struct pernet_operations ip6mr_net_ops = {
1367	.init = ip6mr_net_init,
1368	.exit = ip6mr_net_exit,
1369	.exit_batch = ip6mr_net_exit_batch,
1370};
1371
1372int __init ip6_mr_init(void)
1373{
1374	int err;
1375
1376	mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN);
 
 
 
1377	if (!mrt_cachep)
1378		return -ENOMEM;
1379
1380	err = register_pernet_subsys(&ip6mr_net_ops);
1381	if (err)
1382		goto reg_pernet_fail;
1383
1384	err = register_netdevice_notifier(&ip6_mr_notifier);
1385	if (err)
1386		goto reg_notif_fail;
1387#ifdef CONFIG_IPV6_PIMSM_V2
1388	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1389		pr_err("%s: can't add PIM protocol\n", __func__);
1390		err = -EAGAIN;
1391		goto add_proto_fail;
1392	}
1393#endif
1394	err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1395				   ip6mr_rtm_getroute, ip6mr_rtm_dumproute, 0);
1396	if (err == 0)
1397		return 0;
1398
1399#ifdef CONFIG_IPV6_PIMSM_V2
1400	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1401add_proto_fail:
1402	unregister_netdevice_notifier(&ip6_mr_notifier);
1403#endif
1404reg_notif_fail:
1405	unregister_pernet_subsys(&ip6mr_net_ops);
1406reg_pernet_fail:
1407	kmem_cache_destroy(mrt_cachep);
1408	return err;
1409}
1410
1411void ip6_mr_cleanup(void)
1412{
1413	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1414#ifdef CONFIG_IPV6_PIMSM_V2
1415	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1416#endif
1417	unregister_netdevice_notifier(&ip6_mr_notifier);
1418	unregister_pernet_subsys(&ip6mr_net_ops);
1419	kmem_cache_destroy(mrt_cachep);
1420}
1421
1422static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1423			 struct mf6cctl *mfc, int mrtsock, int parent)
1424{
1425	unsigned char ttls[MAXMIFS];
1426	struct mfc6_cache *uc, *c;
1427	struct mr_mfc *_uc;
1428	bool found;
1429	int i, err;
1430
1431	if (mfc->mf6cc_parent >= MAXMIFS)
1432		return -ENFILE;
1433
1434	memset(ttls, 255, MAXMIFS);
1435	for (i = 0; i < MAXMIFS; i++) {
1436		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1437			ttls[i] = 1;
1438	}
1439
1440	/* The entries are added/deleted only under RTNL */
1441	rcu_read_lock();
1442	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1443				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1444	rcu_read_unlock();
1445	if (c) {
1446		spin_lock(&mrt_lock);
1447		c->_c.mfc_parent = mfc->mf6cc_parent;
1448		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1449		if (!mrtsock)
1450			c->_c.mfc_flags |= MFC_STATIC;
1451		spin_unlock(&mrt_lock);
1452		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1453					       c, mrt->id);
1454		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1455		return 0;
1456	}
1457
1458	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1459	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1460		return -EINVAL;
1461
1462	c = ip6mr_cache_alloc();
1463	if (!c)
1464		return -ENOMEM;
1465
1466	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1467	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1468	c->_c.mfc_parent = mfc->mf6cc_parent;
1469	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1470	if (!mrtsock)
1471		c->_c.mfc_flags |= MFC_STATIC;
1472
1473	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1474				  ip6mr_rht_params);
1475	if (err) {
1476		pr_err("ip6mr: rhtable insert error %d\n", err);
1477		ip6mr_cache_free(c);
1478		return err;
1479	}
1480	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1481
1482	/* Check to see if we resolved a queued list. If so we
1483	 * need to send on the frames and tidy up.
1484	 */
1485	found = false;
1486	spin_lock_bh(&mfc_unres_lock);
1487	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1488		uc = (struct mfc6_cache *)_uc;
1489		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1490		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1491			list_del(&_uc->list);
1492			atomic_dec(&mrt->cache_resolve_queue_len);
1493			found = true;
1494			break;
1495		}
1496	}
1497	if (list_empty(&mrt->mfc_unres_queue))
1498		del_timer(&mrt->ipmr_expire_timer);
1499	spin_unlock_bh(&mfc_unres_lock);
1500
1501	if (found) {
1502		ip6mr_cache_resolve(net, mrt, uc, c);
1503		ip6mr_cache_free(uc);
1504	}
1505	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1506				       c, mrt->id);
1507	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1508	return 0;
1509}
1510
1511/*
1512 *	Close the multicast socket, and clear the vif tables etc
1513 */
1514
1515static void mroute_clean_tables(struct mr_table *mrt, int flags)
1516{
1517	struct mr_mfc *c, *tmp;
1518	LIST_HEAD(list);
1519	int i;
1520
1521	/* Shut down all active vif entries */
1522	if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1523		for (i = 0; i < mrt->maxvif; i++) {
1524			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1525			     !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1526			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1527				continue;
1528			mif6_delete(mrt, i, 0, &list);
1529		}
1530		unregister_netdevice_many(&list);
1531	}
1532
1533	/* Wipe the cache */
1534	if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1535		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1536			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1537			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1538				continue;
1539			rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1540			list_del_rcu(&c->list);
1541			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1542						       FIB_EVENT_ENTRY_DEL,
1543						       (struct mfc6_cache *)c, mrt->id);
1544			mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1545			mr_cache_put(c);
1546		}
1547	}
1548
1549	if (flags & MRT6_FLUSH_MFC) {
1550		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1551			spin_lock_bh(&mfc_unres_lock);
1552			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1553				list_del(&c->list);
1554				mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1555						  RTM_DELROUTE);
1556				ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1557			}
1558			spin_unlock_bh(&mfc_unres_lock);
1559		}
1560	}
1561}
1562
1563static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1564{
1565	int err = 0;
1566	struct net *net = sock_net(sk);
1567
1568	rtnl_lock();
1569	spin_lock(&mrt_lock);
1570	if (rtnl_dereference(mrt->mroute_sk)) {
1571		err = -EADDRINUSE;
1572	} else {
1573		rcu_assign_pointer(mrt->mroute_sk, sk);
1574		sock_set_flag(sk, SOCK_RCU_FREE);
1575		atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1576	}
1577	spin_unlock(&mrt_lock);
1578
1579	if (!err)
1580		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1581					     NETCONFA_MC_FORWARDING,
1582					     NETCONFA_IFINDEX_ALL,
1583					     net->ipv6.devconf_all);
1584	rtnl_unlock();
1585
1586	return err;
1587}
1588
1589int ip6mr_sk_done(struct sock *sk)
1590{
1591	struct net *net = sock_net(sk);
1592	struct ipv6_devconf *devconf;
1593	struct mr_table *mrt;
1594	int err = -EACCES;
1595
1596	if (sk->sk_type != SOCK_RAW ||
1597	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1598		return err;
1599
1600	devconf = net->ipv6.devconf_all;
1601	if (!devconf || !atomic_read(&devconf->mc_forwarding))
1602		return err;
1603
1604	rtnl_lock();
1605	ip6mr_for_each_table(mrt, net) {
1606		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1607			spin_lock(&mrt_lock);
1608			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1609			/* Note that mroute_sk had SOCK_RCU_FREE set,
1610			 * so the RCU grace period before sk freeing
1611			 * is guaranteed by sk_destruct()
1612			 */
1613			atomic_dec(&devconf->mc_forwarding);
1614			spin_unlock(&mrt_lock);
1615			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1616						     NETCONFA_MC_FORWARDING,
1617						     NETCONFA_IFINDEX_ALL,
1618						     net->ipv6.devconf_all);
1619
1620			mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1621			err = 0;
1622			break;
1623		}
1624	}
1625	rtnl_unlock();
1626
1627	return err;
1628}
1629
1630bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1631{
1632	struct mr_table *mrt;
1633	struct flowi6 fl6 = {
1634		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1635		.flowi6_oif	= skb->dev->ifindex,
1636		.flowi6_mark	= skb->mark,
1637	};
1638
1639	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1640		return NULL;
1641
1642	return rcu_access_pointer(mrt->mroute_sk);
1643}
1644EXPORT_SYMBOL(mroute6_is_socket);
1645
1646/*
1647 *	Socket options and virtual interface manipulation. The whole
1648 *	virtual interface system is a complete heap, but unfortunately
1649 *	that's how BSD mrouted happens to think. Maybe one day with a proper
1650 *	MOSPF/PIM router set up we can clean this up.
1651 */
1652
1653int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1654			  unsigned int optlen)
1655{
1656	int ret, parent = 0;
1657	struct mif6ctl vif;
1658	struct mf6cctl mfc;
1659	mifi_t mifi;
1660	struct net *net = sock_net(sk);
1661	struct mr_table *mrt;
1662
1663	if (sk->sk_type != SOCK_RAW ||
1664	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1665		return -EOPNOTSUPP;
1666
1667	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1668	if (!mrt)
1669		return -ENOENT;
1670
1671	if (optname != MRT6_INIT) {
1672		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1673		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1674			return -EACCES;
1675	}
1676
1677	switch (optname) {
1678	case MRT6_INIT:
1679		if (optlen < sizeof(int))
1680			return -EINVAL;
1681
1682		return ip6mr_sk_init(mrt, sk);
1683
1684	case MRT6_DONE:
1685		return ip6mr_sk_done(sk);
1686
1687	case MRT6_ADD_MIF:
1688		if (optlen < sizeof(vif))
1689			return -EINVAL;
1690		if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1691			return -EFAULT;
1692		if (vif.mif6c_mifi >= MAXMIFS)
1693			return -ENFILE;
1694		rtnl_lock();
1695		ret = mif6_add(net, mrt, &vif,
1696			       sk == rtnl_dereference(mrt->mroute_sk));
1697		rtnl_unlock();
1698		return ret;
1699
1700	case MRT6_DEL_MIF:
1701		if (optlen < sizeof(mifi_t))
1702			return -EINVAL;
1703		if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1704			return -EFAULT;
1705		rtnl_lock();
1706		ret = mif6_delete(mrt, mifi, 0, NULL);
1707		rtnl_unlock();
1708		return ret;
1709
1710	/*
1711	 *	Manipulate the forwarding caches. These live
1712	 *	in a sort of kernel/user symbiosis.
1713	 */
1714	case MRT6_ADD_MFC:
1715	case MRT6_DEL_MFC:
1716		parent = -1;
1717		fallthrough;
1718	case MRT6_ADD_MFC_PROXY:
1719	case MRT6_DEL_MFC_PROXY:
1720		if (optlen < sizeof(mfc))
1721			return -EINVAL;
1722		if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1723			return -EFAULT;
1724		if (parent == 0)
1725			parent = mfc.mf6cc_parent;
1726		rtnl_lock();
1727		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1728			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1729		else
1730			ret = ip6mr_mfc_add(net, mrt, &mfc,
1731					    sk ==
1732					    rtnl_dereference(mrt->mroute_sk),
1733					    parent);
1734		rtnl_unlock();
1735		return ret;
1736
1737	case MRT6_FLUSH:
1738	{
1739		int flags;
1740
1741		if (optlen != sizeof(flags))
1742			return -EINVAL;
1743		if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1744			return -EFAULT;
1745		rtnl_lock();
1746		mroute_clean_tables(mrt, flags);
1747		rtnl_unlock();
1748		return 0;
1749	}
1750
1751	/*
1752	 *	Control PIM assert (to activate pim will activate assert)
1753	 */
1754	case MRT6_ASSERT:
1755	{
1756		int v;
1757
1758		if (optlen != sizeof(v))
1759			return -EINVAL;
1760		if (copy_from_sockptr(&v, optval, sizeof(v)))
1761			return -EFAULT;
1762		mrt->mroute_do_assert = v;
1763		return 0;
1764	}
1765
1766#ifdef CONFIG_IPV6_PIMSM_V2
1767	case MRT6_PIM:
1768	{
1769		bool do_wrmifwhole;
1770		int v;
1771
1772		if (optlen != sizeof(v))
1773			return -EINVAL;
1774		if (copy_from_sockptr(&v, optval, sizeof(v)))
1775			return -EFAULT;
1776
1777		do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1778		v = !!v;
1779		rtnl_lock();
1780		ret = 0;
1781		if (v != mrt->mroute_do_pim) {
1782			mrt->mroute_do_pim = v;
1783			mrt->mroute_do_assert = v;
1784			mrt->mroute_do_wrvifwhole = do_wrmifwhole;
1785		}
1786		rtnl_unlock();
1787		return ret;
1788	}
1789
1790#endif
1791#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1792	case MRT6_TABLE:
1793	{
1794		u32 v;
1795
1796		if (optlen != sizeof(u32))
1797			return -EINVAL;
1798		if (copy_from_sockptr(&v, optval, sizeof(v)))
1799			return -EFAULT;
1800		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1801		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1802			return -EINVAL;
1803		if (sk == rcu_access_pointer(mrt->mroute_sk))
1804			return -EBUSY;
1805
1806		rtnl_lock();
1807		ret = 0;
1808		mrt = ip6mr_new_table(net, v);
1809		if (IS_ERR(mrt))
1810			ret = PTR_ERR(mrt);
1811		else
1812			raw6_sk(sk)->ip6mr_table = v;
1813		rtnl_unlock();
1814		return ret;
1815	}
1816#endif
1817	/*
1818	 *	Spurious command, or MRT6_VERSION which you cannot
1819	 *	set.
1820	 */
1821	default:
1822		return -ENOPROTOOPT;
1823	}
1824}
1825
1826/*
1827 *	Getsock opt support for the multicast routing system.
1828 */
1829
1830int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1831			  sockptr_t optlen)
1832{
1833	int olr;
1834	int val;
1835	struct net *net = sock_net(sk);
1836	struct mr_table *mrt;
1837
1838	if (sk->sk_type != SOCK_RAW ||
1839	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1840		return -EOPNOTSUPP;
1841
1842	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1843	if (!mrt)
1844		return -ENOENT;
1845
1846	switch (optname) {
1847	case MRT6_VERSION:
1848		val = 0x0305;
1849		break;
1850#ifdef CONFIG_IPV6_PIMSM_V2
1851	case MRT6_PIM:
1852		val = mrt->mroute_do_pim;
1853		break;
1854#endif
1855	case MRT6_ASSERT:
1856		val = mrt->mroute_do_assert;
1857		break;
1858	default:
1859		return -ENOPROTOOPT;
1860	}
1861
1862	if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1863		return -EFAULT;
1864
1865	olr = min_t(int, olr, sizeof(int));
1866	if (olr < 0)
1867		return -EINVAL;
1868
1869	if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1870		return -EFAULT;
1871	if (copy_to_sockptr(optval, &val, olr))
1872		return -EFAULT;
1873	return 0;
1874}
1875
1876/*
1877 *	The IP multicast ioctl support routines.
1878 */
1879int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
 
1880{
1881	struct sioc_sg_req6 *sr;
1882	struct sioc_mif_req6 *vr;
1883	struct vif_device *vif;
1884	struct mfc6_cache *c;
1885	struct net *net = sock_net(sk);
1886	struct mr_table *mrt;
1887
1888	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1889	if (!mrt)
1890		return -ENOENT;
1891
1892	switch (cmd) {
1893	case SIOCGETMIFCNT_IN6:
1894		vr = (struct sioc_mif_req6 *)arg;
1895		if (vr->mifi >= mrt->maxvif)
 
1896			return -EINVAL;
1897		vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
1898		rcu_read_lock();
1899		vif = &mrt->vif_table[vr->mifi];
1900		if (VIF_EXISTS(mrt, vr->mifi)) {
1901			vr->icount = READ_ONCE(vif->pkt_in);
1902			vr->ocount = READ_ONCE(vif->pkt_out);
1903			vr->ibytes = READ_ONCE(vif->bytes_in);
1904			vr->obytes = READ_ONCE(vif->bytes_out);
1905			rcu_read_unlock();
 
 
 
1906			return 0;
1907		}
1908		rcu_read_unlock();
1909		return -EADDRNOTAVAIL;
1910	case SIOCGETSGCNT_IN6:
1911		sr = (struct sioc_sg_req6 *)arg;
 
1912
1913		rcu_read_lock();
1914		c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
1915				     &sr->grp.sin6_addr);
1916		if (c) {
1917			sr->pktcnt = c->_c.mfc_un.res.pkt;
1918			sr->bytecnt = c->_c.mfc_un.res.bytes;
1919			sr->wrong_if = c->_c.mfc_un.res.wrong_if;
1920			rcu_read_unlock();
 
 
 
1921			return 0;
1922		}
1923		rcu_read_unlock();
1924		return -EADDRNOTAVAIL;
1925	default:
1926		return -ENOIOCTLCMD;
1927	}
1928}
1929
1930#ifdef CONFIG_COMPAT
1931struct compat_sioc_sg_req6 {
1932	struct sockaddr_in6 src;
1933	struct sockaddr_in6 grp;
1934	compat_ulong_t pktcnt;
1935	compat_ulong_t bytecnt;
1936	compat_ulong_t wrong_if;
1937};
1938
1939struct compat_sioc_mif_req6 {
1940	mifi_t	mifi;
1941	compat_ulong_t icount;
1942	compat_ulong_t ocount;
1943	compat_ulong_t ibytes;
1944	compat_ulong_t obytes;
1945};
1946
1947int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1948{
1949	struct compat_sioc_sg_req6 sr;
1950	struct compat_sioc_mif_req6 vr;
1951	struct vif_device *vif;
1952	struct mfc6_cache *c;
1953	struct net *net = sock_net(sk);
1954	struct mr_table *mrt;
1955
1956	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1957	if (!mrt)
1958		return -ENOENT;
1959
1960	switch (cmd) {
1961	case SIOCGETMIFCNT_IN6:
1962		if (copy_from_user(&vr, arg, sizeof(vr)))
1963			return -EFAULT;
1964		if (vr.mifi >= mrt->maxvif)
1965			return -EINVAL;
1966		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1967		rcu_read_lock();
1968		vif = &mrt->vif_table[vr.mifi];
1969		if (VIF_EXISTS(mrt, vr.mifi)) {
1970			vr.icount = READ_ONCE(vif->pkt_in);
1971			vr.ocount = READ_ONCE(vif->pkt_out);
1972			vr.ibytes = READ_ONCE(vif->bytes_in);
1973			vr.obytes = READ_ONCE(vif->bytes_out);
1974			rcu_read_unlock();
1975
1976			if (copy_to_user(arg, &vr, sizeof(vr)))
1977				return -EFAULT;
1978			return 0;
1979		}
1980		rcu_read_unlock();
1981		return -EADDRNOTAVAIL;
1982	case SIOCGETSGCNT_IN6:
1983		if (copy_from_user(&sr, arg, sizeof(sr)))
1984			return -EFAULT;
1985
1986		rcu_read_lock();
1987		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1988		if (c) {
1989			sr.pktcnt = c->_c.mfc_un.res.pkt;
1990			sr.bytecnt = c->_c.mfc_un.res.bytes;
1991			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1992			rcu_read_unlock();
1993
1994			if (copy_to_user(arg, &sr, sizeof(sr)))
1995				return -EFAULT;
1996			return 0;
1997		}
1998		rcu_read_unlock();
1999		return -EADDRNOTAVAIL;
2000	default:
2001		return -ENOIOCTLCMD;
2002	}
2003}
2004#endif
2005
2006static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2007{
2008	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2009		      IPSTATS_MIB_OUTFORWDATAGRAMS);
 
 
2010	return dst_output(net, sk, skb);
2011}
2012
2013/*
2014 *	Processing handlers for ip6mr_forward
2015 */
2016
2017static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
2018			  struct sk_buff *skb, int vifi)
2019{
2020	struct vif_device *vif = &mrt->vif_table[vifi];
2021	struct net_device *vif_dev;
2022	struct ipv6hdr *ipv6h;
2023	struct dst_entry *dst;
2024	struct flowi6 fl6;
2025
2026	vif_dev = vif_dev_read(vif);
2027	if (!vif_dev)
2028		goto out_free;
2029
2030#ifdef CONFIG_IPV6_PIMSM_V2
2031	if (vif->flags & MIFF_REGISTER) {
2032		WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2033		WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2034		DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2035		DEV_STATS_INC(vif_dev, tx_packets);
2036		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2037		goto out_free;
2038	}
2039#endif
2040
2041	ipv6h = ipv6_hdr(skb);
2042
2043	fl6 = (struct flowi6) {
2044		.flowi6_oif = vif->link,
2045		.daddr = ipv6h->daddr,
2046	};
2047
2048	dst = ip6_route_output(net, NULL, &fl6);
2049	if (dst->error) {
2050		dst_release(dst);
2051		goto out_free;
2052	}
2053
2054	skb_dst_drop(skb);
2055	skb_dst_set(skb, dst);
2056
2057	/*
2058	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2059	 * not only before forwarding, but after forwarding on all output
2060	 * interfaces. It is clear, if mrouter runs a multicasting
2061	 * program, it should receive packets not depending to what interface
2062	 * program is joined.
2063	 * If we will not make it, the program will have to join on all
2064	 * interfaces. On the other hand, multihoming host (or router, but
2065	 * not mrouter) cannot join to more than one interface - it will
2066	 * result in receiving multiple packets.
2067	 */
2068	skb->dev = vif_dev;
2069	WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2070	WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2071
2072	/* We are about to write */
2073	/* XXX: extension headers? */
2074	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2075		goto out_free;
2076
2077	ipv6h = ipv6_hdr(skb);
2078	ipv6h->hop_limit--;
2079
2080	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2081
2082	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2083		       net, NULL, skb, skb->dev, vif_dev,
2084		       ip6mr_forward2_finish);
2085
2086out_free:
2087	kfree_skb(skb);
2088	return 0;
2089}
2090
2091/* Called with rcu_read_lock() */
2092static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2093{
2094	int ct;
2095
2096	/* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2097	for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2098		if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2099			break;
2100	}
2101	return ct;
2102}
2103
2104/* Called under rcu_read_lock() */
2105static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2106			   struct net_device *dev, struct sk_buff *skb,
2107			   struct mfc6_cache *c)
2108{
2109	int psend = -1;
2110	int vif, ct;
2111	int true_vifi = ip6mr_find_vif(mrt, dev);
2112
2113	vif = c->_c.mfc_parent;
2114	c->_c.mfc_un.res.pkt++;
2115	c->_c.mfc_un.res.bytes += skb->len;
2116	c->_c.mfc_un.res.lastuse = jiffies;
2117
2118	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2119		struct mfc6_cache *cache_proxy;
2120
2121		/* For an (*,G) entry, we only check that the incoming
2122		 * interface is part of the static tree.
2123		 */
2124		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2125		if (cache_proxy &&
2126		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2127			goto forward;
2128	}
2129
2130	/*
2131	 * Wrong interface: drop packet and (maybe) send PIM assert.
2132	 */
2133	if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2134		c->_c.mfc_un.res.wrong_if++;
2135
2136		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2137		    /* pimsm uses asserts, when switching from RPT to SPT,
2138		       so that we cannot check that packet arrived on an oif.
2139		       It is bad, but otherwise we would need to move pretty
2140		       large chunk of pimd to kernel. Ough... --ANK
2141		     */
2142		    (mrt->mroute_do_pim ||
2143		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2144		    time_after(jiffies,
2145			       c->_c.mfc_un.res.last_assert +
2146			       MFC_ASSERT_THRESH)) {
2147			c->_c.mfc_un.res.last_assert = jiffies;
2148			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2149			if (mrt->mroute_do_wrvifwhole)
2150				ip6mr_cache_report(mrt, skb, true_vifi,
2151						   MRT6MSG_WRMIFWHOLE);
2152		}
2153		goto dont_forward;
2154	}
2155
2156forward:
2157	WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2158		   mrt->vif_table[vif].pkt_in + 1);
2159	WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2160		   mrt->vif_table[vif].bytes_in + skb->len);
2161
2162	/*
2163	 *	Forward the frame
2164	 */
2165	if (ipv6_addr_any(&c->mf6c_origin) &&
2166	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2167		if (true_vifi >= 0 &&
2168		    true_vifi != c->_c.mfc_parent &&
2169		    ipv6_hdr(skb)->hop_limit >
2170				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2171			/* It's an (*,*) entry and the packet is not coming from
2172			 * the upstream: forward the packet to the upstream
2173			 * only.
2174			 */
2175			psend = c->_c.mfc_parent;
2176			goto last_forward;
2177		}
2178		goto dont_forward;
2179	}
2180	for (ct = c->_c.mfc_un.res.maxvif - 1;
2181	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2182		/* For (*,G) entry, don't forward to the incoming interface */
2183		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2184		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2185			if (psend != -1) {
2186				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2187				if (skb2)
2188					ip6mr_forward2(net, mrt, skb2, psend);
2189			}
2190			psend = ct;
2191		}
2192	}
2193last_forward:
2194	if (psend != -1) {
2195		ip6mr_forward2(net, mrt, skb, psend);
2196		return;
2197	}
2198
2199dont_forward:
2200	kfree_skb(skb);
2201}
2202
2203
2204/*
2205 *	Multicast packets for forwarding arrive here
2206 */
2207
2208int ip6_mr_input(struct sk_buff *skb)
2209{
2210	struct mfc6_cache *cache;
2211	struct net *net = dev_net(skb->dev);
2212	struct mr_table *mrt;
2213	struct flowi6 fl6 = {
2214		.flowi6_iif	= skb->dev->ifindex,
2215		.flowi6_mark	= skb->mark,
2216	};
2217	int err;
2218	struct net_device *dev;
2219
2220	/* skb->dev passed in is the master dev for vrfs.
2221	 * Get the proper interface that does have a vif associated with it.
2222	 */
2223	dev = skb->dev;
2224	if (netif_is_l3_master(skb->dev)) {
2225		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2226		if (!dev) {
2227			kfree_skb(skb);
2228			return -ENODEV;
2229		}
2230	}
2231
2232	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2233	if (err < 0) {
2234		kfree_skb(skb);
2235		return err;
2236	}
2237
2238	cache = ip6mr_cache_find(mrt,
2239				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2240	if (!cache) {
2241		int vif = ip6mr_find_vif(mrt, dev);
2242
2243		if (vif >= 0)
2244			cache = ip6mr_cache_find_any(mrt,
2245						     &ipv6_hdr(skb)->daddr,
2246						     vif);
2247	}
2248
2249	/*
2250	 *	No usable cache entry
2251	 */
2252	if (!cache) {
2253		int vif;
2254
2255		vif = ip6mr_find_vif(mrt, dev);
2256		if (vif >= 0) {
2257			int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2258
2259			return err;
2260		}
2261		kfree_skb(skb);
2262		return -ENODEV;
2263	}
2264
2265	ip6_mr_forward(net, mrt, dev, skb, cache);
2266
2267	return 0;
2268}
2269
2270int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2271		    u32 portid)
2272{
2273	int err;
2274	struct mr_table *mrt;
2275	struct mfc6_cache *cache;
2276	struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
2277
2278	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2279	if (!mrt)
2280		return -ENOENT;
2281
2282	rcu_read_lock();
2283	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2284	if (!cache && skb->dev) {
2285		int vif = ip6mr_find_vif(mrt, skb->dev);
2286
2287		if (vif >= 0)
2288			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2289						     vif);
2290	}
2291
2292	if (!cache) {
2293		struct sk_buff *skb2;
2294		struct ipv6hdr *iph;
2295		struct net_device *dev;
2296		int vif;
2297
2298		dev = skb->dev;
2299		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2300			rcu_read_unlock();
2301			return -ENODEV;
2302		}
2303
2304		/* really correct? */
2305		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2306		if (!skb2) {
2307			rcu_read_unlock();
2308			return -ENOMEM;
2309		}
2310
2311		NETLINK_CB(skb2).portid = portid;
2312		skb_reset_transport_header(skb2);
2313
2314		skb_put(skb2, sizeof(struct ipv6hdr));
2315		skb_reset_network_header(skb2);
2316
2317		iph = ipv6_hdr(skb2);
2318		iph->version = 0;
2319		iph->priority = 0;
2320		iph->flow_lbl[0] = 0;
2321		iph->flow_lbl[1] = 0;
2322		iph->flow_lbl[2] = 0;
2323		iph->payload_len = 0;
2324		iph->nexthdr = IPPROTO_NONE;
2325		iph->hop_limit = 0;
2326		iph->saddr = rt->rt6i_src.addr;
2327		iph->daddr = rt->rt6i_dst.addr;
2328
2329		err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2330		rcu_read_unlock();
2331
2332		return err;
2333	}
2334
2335	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2336	rcu_read_unlock();
2337	return err;
2338}
2339
2340static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2341			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2342			     int flags)
2343{
2344	struct nlmsghdr *nlh;
2345	struct rtmsg *rtm;
2346	int err;
2347
2348	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2349	if (!nlh)
2350		return -EMSGSIZE;
2351
2352	rtm = nlmsg_data(nlh);
2353	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2354	rtm->rtm_dst_len  = 128;
2355	rtm->rtm_src_len  = 128;
2356	rtm->rtm_tos      = 0;
2357	rtm->rtm_table    = mrt->id;
2358	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2359		goto nla_put_failure;
2360	rtm->rtm_type = RTN_MULTICAST;
2361	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2362	if (c->_c.mfc_flags & MFC_STATIC)
2363		rtm->rtm_protocol = RTPROT_STATIC;
2364	else
2365		rtm->rtm_protocol = RTPROT_MROUTED;
2366	rtm->rtm_flags    = 0;
2367
2368	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2369	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2370		goto nla_put_failure;
2371	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2372	/* do not break the dump if cache is unresolved */
2373	if (err < 0 && err != -ENOENT)
2374		goto nla_put_failure;
2375
2376	nlmsg_end(skb, nlh);
2377	return 0;
2378
2379nla_put_failure:
2380	nlmsg_cancel(skb, nlh);
2381	return -EMSGSIZE;
2382}
2383
2384static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2385			      u32 portid, u32 seq, struct mr_mfc *c,
2386			      int cmd, int flags)
2387{
2388	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2389				 cmd, flags);
2390}
2391
2392static int mr6_msgsize(bool unresolved, int maxvif)
2393{
2394	size_t len =
2395		NLMSG_ALIGN(sizeof(struct rtmsg))
2396		+ nla_total_size(4)	/* RTA_TABLE */
2397		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2398		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2399		;
2400
2401	if (!unresolved)
2402		len = len
2403		      + nla_total_size(4)	/* RTA_IIF */
2404		      + nla_total_size(0)	/* RTA_MULTIPATH */
2405		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2406						/* RTA_MFC_STATS */
2407		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2408		;
2409
2410	return len;
2411}
2412
2413static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2414			      int cmd)
2415{
2416	struct net *net = read_pnet(&mrt->net);
2417	struct sk_buff *skb;
2418	int err = -ENOBUFS;
2419
2420	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2421			GFP_ATOMIC);
2422	if (!skb)
2423		goto errout;
2424
2425	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2426	if (err < 0)
2427		goto errout;
2428
2429	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2430	return;
2431
2432errout:
2433	kfree_skb(skb);
2434	if (err < 0)
2435		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2436}
2437
2438static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2439{
2440	size_t len =
2441		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2442		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2443		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2444					/* IP6MRA_CREPORT_SRC_ADDR */
2445		+ nla_total_size(sizeof(struct in6_addr))
2446					/* IP6MRA_CREPORT_DST_ADDR */
2447		+ nla_total_size(sizeof(struct in6_addr))
2448					/* IP6MRA_CREPORT_PKT */
2449		+ nla_total_size(payloadlen)
2450		;
2451
2452	return len;
2453}
2454
2455static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2456{
2457	struct net *net = read_pnet(&mrt->net);
2458	struct nlmsghdr *nlh;
2459	struct rtgenmsg *rtgenm;
2460	struct mrt6msg *msg;
2461	struct sk_buff *skb;
2462	struct nlattr *nla;
2463	int payloadlen;
2464
2465	payloadlen = pkt->len - sizeof(struct mrt6msg);
2466	msg = (struct mrt6msg *)skb_transport_header(pkt);
2467
2468	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2469	if (!skb)
2470		goto errout;
2471
2472	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2473			sizeof(struct rtgenmsg), 0);
2474	if (!nlh)
2475		goto errout;
2476	rtgenm = nlmsg_data(nlh);
2477	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2478	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2479	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2480	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2481			     &msg->im6_src) ||
2482	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2483			     &msg->im6_dst))
2484		goto nla_put_failure;
2485
2486	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2487	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2488				  nla_data(nla), payloadlen))
2489		goto nla_put_failure;
2490
2491	nlmsg_end(skb, nlh);
2492
2493	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2494	return;
2495
2496nla_put_failure:
2497	nlmsg_cancel(skb, nlh);
2498errout:
2499	kfree_skb(skb);
2500	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2501}
2502
2503static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2504	[RTA_SRC]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2505	[RTA_DST]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2506	[RTA_TABLE]		= { .type = NLA_U32 },
2507};
2508
2509static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2510					const struct nlmsghdr *nlh,
2511					struct nlattr **tb,
2512					struct netlink_ext_ack *extack)
2513{
2514	struct rtmsg *rtm;
2515	int err;
2516
2517	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2518			  extack);
2519	if (err)
2520		return err;
2521
2522	rtm = nlmsg_data(nlh);
2523	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2524	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2525	    rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2526	    rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2527		NL_SET_ERR_MSG_MOD(extack,
2528				   "Invalid values in header for multicast route get request");
2529		return -EINVAL;
2530	}
2531
2532	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2533	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2534		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2535		return -EINVAL;
2536	}
2537
2538	return 0;
2539}
2540
2541static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2542			      struct netlink_ext_ack *extack)
2543{
2544	struct net *net = sock_net(in_skb->sk);
2545	struct in6_addr src = {}, grp = {};
2546	struct nlattr *tb[RTA_MAX + 1];
2547	struct mfc6_cache *cache;
2548	struct mr_table *mrt;
2549	struct sk_buff *skb;
2550	u32 tableid;
2551	int err;
2552
2553	err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2554	if (err < 0)
2555		return err;
2556
2557	if (tb[RTA_SRC])
2558		src = nla_get_in6_addr(tb[RTA_SRC]);
2559	if (tb[RTA_DST])
2560		grp = nla_get_in6_addr(tb[RTA_DST]);
2561	tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
2562
2563	mrt = ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2564	if (!mrt) {
2565		NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2566		return -ENOENT;
2567	}
2568
2569	/* entries are added/deleted only under RTNL */
2570	rcu_read_lock();
2571	cache = ip6mr_cache_find(mrt, &src, &grp);
2572	rcu_read_unlock();
2573	if (!cache) {
2574		NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2575		return -ENOENT;
2576	}
2577
2578	skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL);
2579	if (!skb)
2580		return -ENOBUFS;
2581
2582	err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2583				nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2584	if (err < 0) {
2585		kfree_skb(skb);
2586		return err;
2587	}
2588
2589	return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2590}
2591
2592static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2593{
2594	const struct nlmsghdr *nlh = cb->nlh;
2595	struct fib_dump_filter filter = {
2596		.rtnl_held = true,
2597	};
2598	int err;
2599
2600	if (cb->strict_check) {
2601		err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2602					    &filter, cb);
2603		if (err < 0)
2604			return err;
2605	}
2606
2607	if (filter.table_id) {
2608		struct mr_table *mrt;
2609
2610		mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2611		if (!mrt) {
2612			if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2613				return skb->len;
2614
2615			NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2616			return -ENOENT;
2617		}
2618		err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2619				    &mfc_unres_lock, &filter);
2620		return skb->len ? : err;
2621	}
2622
2623	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2624				_ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2625}
v6.2
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	Linux IPv6 multicast routing support for BSD pim6sd
   4 *	Based on net/ipv4/ipmr.c.
   5 *
   6 *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   7 *		LSIIT Laboratory, Strasbourg, France
   8 *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   9 *		6WIND, Paris, France
  10 *	Copyright (C)2007,2008 USAGI/WIDE Project
  11 *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  12 */
  13
  14#include <linux/uaccess.h>
  15#include <linux/types.h>
  16#include <linux/sched.h>
  17#include <linux/errno.h>
  18#include <linux/mm.h>
  19#include <linux/kernel.h>
  20#include <linux/fcntl.h>
  21#include <linux/stat.h>
  22#include <linux/socket.h>
  23#include <linux/inet.h>
  24#include <linux/netdevice.h>
  25#include <linux/inetdevice.h>
  26#include <linux/proc_fs.h>
  27#include <linux/seq_file.h>
  28#include <linux/init.h>
  29#include <linux/compat.h>
  30#include <linux/rhashtable.h>
  31#include <net/protocol.h>
  32#include <linux/skbuff.h>
  33#include <net/raw.h>
  34#include <linux/notifier.h>
  35#include <linux/if_arp.h>
  36#include <net/checksum.h>
  37#include <net/netlink.h>
  38#include <net/fib_rules.h>
  39
  40#include <net/ipv6.h>
  41#include <net/ip6_route.h>
  42#include <linux/mroute6.h>
  43#include <linux/pim.h>
  44#include <net/addrconf.h>
  45#include <linux/netfilter_ipv6.h>
  46#include <linux/export.h>
  47#include <net/ip6_checksum.h>
  48#include <linux/netconf.h>
  49#include <net/ip_tunnels.h>
  50
  51#include <linux/nospec.h>
  52
  53struct ip6mr_rule {
  54	struct fib_rule		common;
  55};
  56
  57struct ip6mr_result {
  58	struct mr_table	*mrt;
  59};
  60
  61/* Big lock, protecting vif table, mrt cache and mroute socket state.
  62   Note that the changes are semaphored via rtnl_lock.
  63 */
  64
  65static DEFINE_SPINLOCK(mrt_lock);
  66
  67static struct net_device *vif_dev_read(const struct vif_device *vif)
  68{
  69	return rcu_dereference(vif->dev);
  70}
  71
  72/* Multicast router control variables */
  73
  74/* Special spinlock for queue of unresolved entries */
  75static DEFINE_SPINLOCK(mfc_unres_lock);
  76
  77/* We return to original Alan's scheme. Hash table of resolved
  78   entries is changed only in process context and protected
  79   with weak lock mrt_lock. Queue of unresolved entries is protected
  80   with strong spinlock mfc_unres_lock.
  81
  82   In this case data path is free of exclusive locks at all.
  83 */
  84
  85static struct kmem_cache *mrt_cachep __read_mostly;
  86
  87static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
  88static void ip6mr_free_table(struct mr_table *mrt);
  89
  90static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
  91			   struct net_device *dev, struct sk_buff *skb,
  92			   struct mfc6_cache *cache);
  93static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
  94			      mifi_t mifi, int assert);
  95static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
  96			      int cmd);
  97static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
  98static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
  99			      struct netlink_ext_ack *extack);
 100static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 101			       struct netlink_callback *cb);
 102static void mroute_clean_tables(struct mr_table *mrt, int flags);
 103static void ipmr_expire_process(struct timer_list *t);
 104
 105#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 106#define ip6mr_for_each_table(mrt, net) \
 107	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
 108				lockdep_rtnl_is_held() || \
 109				list_empty(&net->ipv6.mr6_tables))
 110
 111static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 112					    struct mr_table *mrt)
 113{
 114	struct mr_table *ret;
 115
 116	if (!mrt)
 117		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
 118				     struct mr_table, list);
 119	else
 120		ret = list_entry_rcu(mrt->list.next,
 121				     struct mr_table, list);
 122
 123	if (&ret->list == &net->ipv6.mr6_tables)
 124		return NULL;
 125	return ret;
 126}
 127
 128static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 129{
 130	struct mr_table *mrt;
 131
 132	ip6mr_for_each_table(mrt, net) {
 133		if (mrt->id == id)
 134			return mrt;
 135	}
 136	return NULL;
 137}
 138
 139static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 140			    struct mr_table **mrt)
 141{
 142	int err;
 143	struct ip6mr_result res;
 144	struct fib_lookup_arg arg = {
 145		.result = &res,
 146		.flags = FIB_LOOKUP_NOREF,
 147	};
 148
 149	/* update flow if oif or iif point to device enslaved to l3mdev */
 150	l3mdev_update_flow(net, flowi6_to_flowi(flp6));
 151
 152	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 153			       flowi6_to_flowi(flp6), 0, &arg);
 154	if (err < 0)
 155		return err;
 156	*mrt = res.mrt;
 157	return 0;
 158}
 159
 160static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 161			     int flags, struct fib_lookup_arg *arg)
 162{
 163	struct ip6mr_result *res = arg->result;
 164	struct mr_table *mrt;
 165
 166	switch (rule->action) {
 167	case FR_ACT_TO_TBL:
 168		break;
 169	case FR_ACT_UNREACHABLE:
 170		return -ENETUNREACH;
 171	case FR_ACT_PROHIBIT:
 172		return -EACCES;
 173	case FR_ACT_BLACKHOLE:
 174	default:
 175		return -EINVAL;
 176	}
 177
 178	arg->table = fib_rule_get_table(rule, arg);
 179
 180	mrt = ip6mr_get_table(rule->fr_net, arg->table);
 181	if (!mrt)
 182		return -EAGAIN;
 183	res->mrt = mrt;
 184	return 0;
 185}
 186
 187static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 188{
 189	return 1;
 190}
 191
 192static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 193				struct fib_rule_hdr *frh, struct nlattr **tb,
 194				struct netlink_ext_ack *extack)
 195{
 196	return 0;
 197}
 198
 199static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 200			      struct nlattr **tb)
 201{
 202	return 1;
 203}
 204
 205static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 206			   struct fib_rule_hdr *frh)
 207{
 208	frh->dst_len = 0;
 209	frh->src_len = 0;
 210	frh->tos     = 0;
 211	return 0;
 212}
 213
 214static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 215	.family		= RTNL_FAMILY_IP6MR,
 216	.rule_size	= sizeof(struct ip6mr_rule),
 217	.addr_size	= sizeof(struct in6_addr),
 218	.action		= ip6mr_rule_action,
 219	.match		= ip6mr_rule_match,
 220	.configure	= ip6mr_rule_configure,
 221	.compare	= ip6mr_rule_compare,
 222	.fill		= ip6mr_rule_fill,
 223	.nlgroup	= RTNLGRP_IPV6_RULE,
 224	.owner		= THIS_MODULE,
 225};
 226
 227static int __net_init ip6mr_rules_init(struct net *net)
 228{
 229	struct fib_rules_ops *ops;
 230	struct mr_table *mrt;
 231	int err;
 232
 233	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 234	if (IS_ERR(ops))
 235		return PTR_ERR(ops);
 236
 237	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 238
 239	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 240	if (IS_ERR(mrt)) {
 241		err = PTR_ERR(mrt);
 242		goto err1;
 243	}
 244
 245	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 246	if (err < 0)
 247		goto err2;
 248
 249	net->ipv6.mr6_rules_ops = ops;
 250	return 0;
 251
 252err2:
 253	rtnl_lock();
 254	ip6mr_free_table(mrt);
 255	rtnl_unlock();
 256err1:
 257	fib_rules_unregister(ops);
 258	return err;
 259}
 260
 261static void __net_exit ip6mr_rules_exit(struct net *net)
 262{
 263	struct mr_table *mrt, *next;
 264
 265	ASSERT_RTNL();
 266	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 267		list_del(&mrt->list);
 268		ip6mr_free_table(mrt);
 269	}
 270	fib_rules_unregister(net->ipv6.mr6_rules_ops);
 271}
 272
 273static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
 274			    struct netlink_ext_ack *extack)
 275{
 276	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
 277}
 278
 279static unsigned int ip6mr_rules_seq_read(struct net *net)
 280{
 281	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
 282}
 283
 284bool ip6mr_rule_default(const struct fib_rule *rule)
 285{
 286	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
 287	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
 288}
 289EXPORT_SYMBOL(ip6mr_rule_default);
 290#else
 291#define ip6mr_for_each_table(mrt, net) \
 292	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 293
 294static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 295					    struct mr_table *mrt)
 296{
 297	if (!mrt)
 298		return net->ipv6.mrt6;
 299	return NULL;
 300}
 301
 302static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 303{
 304	return net->ipv6.mrt6;
 305}
 306
 307static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 308			    struct mr_table **mrt)
 309{
 310	*mrt = net->ipv6.mrt6;
 311	return 0;
 312}
 313
 314static int __net_init ip6mr_rules_init(struct net *net)
 315{
 316	struct mr_table *mrt;
 317
 318	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 319	if (IS_ERR(mrt))
 320		return PTR_ERR(mrt);
 321	net->ipv6.mrt6 = mrt;
 322	return 0;
 323}
 324
 325static void __net_exit ip6mr_rules_exit(struct net *net)
 326{
 327	ASSERT_RTNL();
 328	ip6mr_free_table(net->ipv6.mrt6);
 329	net->ipv6.mrt6 = NULL;
 330}
 331
 332static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
 333			    struct netlink_ext_ack *extack)
 334{
 335	return 0;
 336}
 337
 338static unsigned int ip6mr_rules_seq_read(struct net *net)
 339{
 340	return 0;
 341}
 342#endif
 343
 344static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
 345			  const void *ptr)
 346{
 347	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
 348	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 349
 350	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
 351	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
 352}
 353
 354static const struct rhashtable_params ip6mr_rht_params = {
 355	.head_offset = offsetof(struct mr_mfc, mnode),
 356	.key_offset = offsetof(struct mfc6_cache, cmparg),
 357	.key_len = sizeof(struct mfc6_cache_cmp_arg),
 358	.nelem_hint = 3,
 359	.obj_cmpfn = ip6mr_hash_cmp,
 360	.automatic_shrinking = true,
 361};
 362
 363static void ip6mr_new_table_set(struct mr_table *mrt,
 364				struct net *net)
 365{
 366#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 367	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 368#endif
 369}
 370
 371static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
 372	.mf6c_origin = IN6ADDR_ANY_INIT,
 373	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 374};
 375
 376static struct mr_table_ops ip6mr_mr_table_ops = {
 377	.rht_params = &ip6mr_rht_params,
 378	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
 379};
 380
 381static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 382{
 383	struct mr_table *mrt;
 384
 385	mrt = ip6mr_get_table(net, id);
 386	if (mrt)
 387		return mrt;
 388
 389	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
 390			      ipmr_expire_process, ip6mr_new_table_set);
 391}
 392
 393static void ip6mr_free_table(struct mr_table *mrt)
 394{
 395	timer_shutdown_sync(&mrt->ipmr_expire_timer);
 396	mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
 397				 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
 398	rhltable_destroy(&mrt->mfc_hash);
 399	kfree(mrt);
 400}
 401
 402#ifdef CONFIG_PROC_FS
 403/* The /proc interfaces to multicast routing
 404 * /proc/ip6_mr_cache /proc/ip6_mr_vif
 405 */
 406
 407static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 408	__acquires(RCU)
 409{
 410	struct mr_vif_iter *iter = seq->private;
 411	struct net *net = seq_file_net(seq);
 412	struct mr_table *mrt;
 413
 414	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 415	if (!mrt)
 416		return ERR_PTR(-ENOENT);
 417
 418	iter->mrt = mrt;
 419
 420	rcu_read_lock();
 421	return mr_vif_seq_start(seq, pos);
 422}
 423
 424static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 425	__releases(RCU)
 426{
 427	rcu_read_unlock();
 428}
 429
 430static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 431{
 432	struct mr_vif_iter *iter = seq->private;
 433	struct mr_table *mrt = iter->mrt;
 434
 435	if (v == SEQ_START_TOKEN) {
 436		seq_puts(seq,
 437			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 438	} else {
 439		const struct vif_device *vif = v;
 440		const struct net_device *vif_dev;
 441		const char *name;
 442
 443		vif_dev = vif_dev_read(vif);
 444		name = vif_dev ? vif_dev->name : "none";
 445
 446		seq_printf(seq,
 447			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 448			   vif - mrt->vif_table,
 449			   name, vif->bytes_in, vif->pkt_in,
 450			   vif->bytes_out, vif->pkt_out,
 451			   vif->flags);
 452	}
 453	return 0;
 454}
 455
 456static const struct seq_operations ip6mr_vif_seq_ops = {
 457	.start = ip6mr_vif_seq_start,
 458	.next  = mr_vif_seq_next,
 459	.stop  = ip6mr_vif_seq_stop,
 460	.show  = ip6mr_vif_seq_show,
 461};
 462
 463static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 464{
 465	struct net *net = seq_file_net(seq);
 466	struct mr_table *mrt;
 467
 468	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 469	if (!mrt)
 470		return ERR_PTR(-ENOENT);
 471
 472	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 473}
 474
 475static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 476{
 477	int n;
 478
 479	if (v == SEQ_START_TOKEN) {
 480		seq_puts(seq,
 481			 "Group                            "
 482			 "Origin                           "
 483			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
 484	} else {
 485		const struct mfc6_cache *mfc = v;
 486		const struct mr_mfc_iter *it = seq->private;
 487		struct mr_table *mrt = it->mrt;
 488
 489		seq_printf(seq, "%pI6 %pI6 %-3hd",
 490			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 491			   mfc->_c.mfc_parent);
 492
 493		if (it->cache != &mrt->mfc_unres_queue) {
 494			seq_printf(seq, " %8lu %8lu %8lu",
 495				   mfc->_c.mfc_un.res.pkt,
 496				   mfc->_c.mfc_un.res.bytes,
 497				   mfc->_c.mfc_un.res.wrong_if);
 498			for (n = mfc->_c.mfc_un.res.minvif;
 499			     n < mfc->_c.mfc_un.res.maxvif; n++) {
 500				if (VIF_EXISTS(mrt, n) &&
 501				    mfc->_c.mfc_un.res.ttls[n] < 255)
 502					seq_printf(seq,
 503						   " %2d:%-3d", n,
 504						   mfc->_c.mfc_un.res.ttls[n]);
 505			}
 506		} else {
 507			/* unresolved mfc_caches don't contain
 508			 * pkt, bytes and wrong_if values
 509			 */
 510			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 511		}
 512		seq_putc(seq, '\n');
 513	}
 514	return 0;
 515}
 516
 517static const struct seq_operations ipmr_mfc_seq_ops = {
 518	.start = ipmr_mfc_seq_start,
 519	.next  = mr_mfc_seq_next,
 520	.stop  = mr_mfc_seq_stop,
 521	.show  = ipmr_mfc_seq_show,
 522};
 523#endif
 524
 525#ifdef CONFIG_IPV6_PIMSM_V2
 526
 527static int pim6_rcv(struct sk_buff *skb)
 528{
 529	struct pimreghdr *pim;
 530	struct ipv6hdr   *encap;
 531	struct net_device  *reg_dev = NULL;
 532	struct net *net = dev_net(skb->dev);
 533	struct mr_table *mrt;
 534	struct flowi6 fl6 = {
 535		.flowi6_iif	= skb->dev->ifindex,
 536		.flowi6_mark	= skb->mark,
 537	};
 538	int reg_vif_num;
 539
 540	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 541		goto drop;
 542
 543	pim = (struct pimreghdr *)skb_transport_header(skb);
 544	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
 545	    (pim->flags & PIM_NULL_REGISTER) ||
 546	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 547			     sizeof(*pim), IPPROTO_PIM,
 548			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
 549	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 550		goto drop;
 551
 552	/* check if the inner packet is destined to mcast group */
 553	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 554				   sizeof(*pim));
 555
 556	if (!ipv6_addr_is_multicast(&encap->daddr) ||
 557	    encap->payload_len == 0 ||
 558	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 559		goto drop;
 560
 561	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 562		goto drop;
 563
 564	/* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
 565	reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
 566	if (reg_vif_num >= 0)
 567		reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
 568
 569	if (!reg_dev)
 570		goto drop;
 571
 572	skb->mac_header = skb->network_header;
 573	skb_pull(skb, (u8 *)encap - skb->data);
 574	skb_reset_network_header(skb);
 575	skb->protocol = htons(ETH_P_IPV6);
 576	skb->ip_summed = CHECKSUM_NONE;
 577
 578	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 579
 580	netif_rx(skb);
 581
 582	return 0;
 583 drop:
 584	kfree_skb(skb);
 585	return 0;
 586}
 587
 588static const struct inet6_protocol pim6_protocol = {
 589	.handler	=	pim6_rcv,
 590};
 591
 592/* Service routines creating virtual interfaces: PIMREG */
 593
 594static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 595				      struct net_device *dev)
 596{
 597	struct net *net = dev_net(dev);
 598	struct mr_table *mrt;
 599	struct flowi6 fl6 = {
 600		.flowi6_oif	= dev->ifindex,
 601		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
 602		.flowi6_mark	= skb->mark,
 603	};
 604
 605	if (!pskb_inet_may_pull(skb))
 606		goto tx_err;
 607
 608	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 609		goto tx_err;
 610
 611	DEV_STATS_ADD(dev, tx_bytes, skb->len);
 612	DEV_STATS_INC(dev, tx_packets);
 613	rcu_read_lock();
 614	ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
 615			   MRT6MSG_WHOLEPKT);
 616	rcu_read_unlock();
 617	kfree_skb(skb);
 618	return NETDEV_TX_OK;
 619
 620tx_err:
 621	DEV_STATS_INC(dev, tx_errors);
 622	kfree_skb(skb);
 623	return NETDEV_TX_OK;
 624}
 625
 626static int reg_vif_get_iflink(const struct net_device *dev)
 627{
 628	return 0;
 629}
 630
 631static const struct net_device_ops reg_vif_netdev_ops = {
 632	.ndo_start_xmit	= reg_vif_xmit,
 633	.ndo_get_iflink = reg_vif_get_iflink,
 634};
 635
 636static void reg_vif_setup(struct net_device *dev)
 637{
 638	dev->type		= ARPHRD_PIMREG;
 639	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
 640	dev->flags		= IFF_NOARP;
 641	dev->netdev_ops		= &reg_vif_netdev_ops;
 642	dev->needs_free_netdev	= true;
 643	dev->features		|= NETIF_F_NETNS_LOCAL;
 644}
 645
 646static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 647{
 648	struct net_device *dev;
 649	char name[IFNAMSIZ];
 650
 651	if (mrt->id == RT6_TABLE_DFLT)
 652		sprintf(name, "pim6reg");
 653	else
 654		sprintf(name, "pim6reg%u", mrt->id);
 655
 656	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 657	if (!dev)
 658		return NULL;
 659
 660	dev_net_set(dev, net);
 661
 662	if (register_netdevice(dev)) {
 663		free_netdev(dev);
 664		return NULL;
 665	}
 666
 667	if (dev_open(dev, NULL))
 668		goto failure;
 669
 670	dev_hold(dev);
 671	return dev;
 672
 673failure:
 674	unregister_netdevice(dev);
 675	return NULL;
 676}
 677#endif
 678
 679static int call_ip6mr_vif_entry_notifiers(struct net *net,
 680					  enum fib_event_type event_type,
 681					  struct vif_device *vif,
 682					  struct net_device *vif_dev,
 683					  mifi_t vif_index, u32 tb_id)
 684{
 685	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 686				     vif, vif_dev, vif_index, tb_id,
 687				     &net->ipv6.ipmr_seq);
 688}
 689
 690static int call_ip6mr_mfc_entry_notifiers(struct net *net,
 691					  enum fib_event_type event_type,
 692					  struct mfc6_cache *mfc, u32 tb_id)
 693{
 694	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 695				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
 696}
 697
 698/* Delete a VIF entry */
 699static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 700		       struct list_head *head)
 701{
 702	struct vif_device *v;
 703	struct net_device *dev;
 704	struct inet6_dev *in6_dev;
 705
 706	if (vifi < 0 || vifi >= mrt->maxvif)
 707		return -EADDRNOTAVAIL;
 708
 709	v = &mrt->vif_table[vifi];
 710
 711	dev = rtnl_dereference(v->dev);
 712	if (!dev)
 713		return -EADDRNOTAVAIL;
 714
 715	call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
 716				       FIB_EVENT_VIF_DEL, v, dev,
 717				       vifi, mrt->id);
 718	spin_lock(&mrt_lock);
 719	RCU_INIT_POINTER(v->dev, NULL);
 720
 721#ifdef CONFIG_IPV6_PIMSM_V2
 722	if (vifi == mrt->mroute_reg_vif_num) {
 723		/* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
 724		WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
 725	}
 726#endif
 727
 728	if (vifi + 1 == mrt->maxvif) {
 729		int tmp;
 730		for (tmp = vifi - 1; tmp >= 0; tmp--) {
 731			if (VIF_EXISTS(mrt, tmp))
 732				break;
 733		}
 734		WRITE_ONCE(mrt->maxvif, tmp + 1);
 735	}
 736
 737	spin_unlock(&mrt_lock);
 738
 739	dev_set_allmulti(dev, -1);
 740
 741	in6_dev = __in6_dev_get(dev);
 742	if (in6_dev) {
 743		atomic_dec(&in6_dev->cnf.mc_forwarding);
 744		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 745					     NETCONFA_MC_FORWARDING,
 746					     dev->ifindex, &in6_dev->cnf);
 747	}
 748
 749	if ((v->flags & MIFF_REGISTER) && !notify)
 750		unregister_netdevice_queue(dev, head);
 751
 752	netdev_put(dev, &v->dev_tracker);
 753	return 0;
 754}
 755
 756static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
 757{
 758	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 759
 760	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
 761}
 762
 763static inline void ip6mr_cache_free(struct mfc6_cache *c)
 764{
 765	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 766}
 767
 768/* Destroy an unresolved cache entry, killing queued skbs
 769   and reporting error to netlink readers.
 770 */
 771
 772static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 773{
 774	struct net *net = read_pnet(&mrt->net);
 775	struct sk_buff *skb;
 776
 777	atomic_dec(&mrt->cache_resolve_queue_len);
 778
 779	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 780		if (ipv6_hdr(skb)->version == 0) {
 781			struct nlmsghdr *nlh = skb_pull(skb,
 782							sizeof(struct ipv6hdr));
 783			nlh->nlmsg_type = NLMSG_ERROR;
 784			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 785			skb_trim(skb, nlh->nlmsg_len);
 786			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 787			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 788		} else
 789			kfree_skb(skb);
 790	}
 791
 792	ip6mr_cache_free(c);
 793}
 794
 795
 796/* Timer process for all the unresolved queue. */
 797
 798static void ipmr_do_expire_process(struct mr_table *mrt)
 799{
 800	unsigned long now = jiffies;
 801	unsigned long expires = 10 * HZ;
 802	struct mr_mfc *c, *next;
 803
 804	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 805		if (time_after(c->mfc_un.unres.expires, now)) {
 806			/* not yet... */
 807			unsigned long interval = c->mfc_un.unres.expires - now;
 808			if (interval < expires)
 809				expires = interval;
 810			continue;
 811		}
 812
 813		list_del(&c->list);
 814		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
 815		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 816	}
 817
 818	if (!list_empty(&mrt->mfc_unres_queue))
 819		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 820}
 821
 822static void ipmr_expire_process(struct timer_list *t)
 823{
 824	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 825
 826	if (!spin_trylock(&mfc_unres_lock)) {
 827		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 828		return;
 829	}
 830
 831	if (!list_empty(&mrt->mfc_unres_queue))
 832		ipmr_do_expire_process(mrt);
 833
 834	spin_unlock(&mfc_unres_lock);
 835}
 836
 837/* Fill oifs list. It is called under locked mrt_lock. */
 838
 839static void ip6mr_update_thresholds(struct mr_table *mrt,
 840				    struct mr_mfc *cache,
 841				    unsigned char *ttls)
 842{
 843	int vifi;
 844
 845	cache->mfc_un.res.minvif = MAXMIFS;
 846	cache->mfc_un.res.maxvif = 0;
 847	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 848
 849	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 850		if (VIF_EXISTS(mrt, vifi) &&
 851		    ttls[vifi] && ttls[vifi] < 255) {
 852			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 853			if (cache->mfc_un.res.minvif > vifi)
 854				cache->mfc_un.res.minvif = vifi;
 855			if (cache->mfc_un.res.maxvif <= vifi)
 856				cache->mfc_un.res.maxvif = vifi + 1;
 857		}
 858	}
 859	cache->mfc_un.res.lastuse = jiffies;
 860}
 861
 862static int mif6_add(struct net *net, struct mr_table *mrt,
 863		    struct mif6ctl *vifc, int mrtsock)
 864{
 865	int vifi = vifc->mif6c_mifi;
 866	struct vif_device *v = &mrt->vif_table[vifi];
 867	struct net_device *dev;
 868	struct inet6_dev *in6_dev;
 869	int err;
 870
 871	/* Is vif busy ? */
 872	if (VIF_EXISTS(mrt, vifi))
 873		return -EADDRINUSE;
 874
 875	switch (vifc->mif6c_flags) {
 876#ifdef CONFIG_IPV6_PIMSM_V2
 877	case MIFF_REGISTER:
 878		/*
 879		 * Special Purpose VIF in PIM
 880		 * All the packets will be sent to the daemon
 881		 */
 882		if (mrt->mroute_reg_vif_num >= 0)
 883			return -EADDRINUSE;
 884		dev = ip6mr_reg_vif(net, mrt);
 885		if (!dev)
 886			return -ENOBUFS;
 887		err = dev_set_allmulti(dev, 1);
 888		if (err) {
 889			unregister_netdevice(dev);
 890			dev_put(dev);
 891			return err;
 892		}
 893		break;
 894#endif
 895	case 0:
 896		dev = dev_get_by_index(net, vifc->mif6c_pifi);
 897		if (!dev)
 898			return -EADDRNOTAVAIL;
 899		err = dev_set_allmulti(dev, 1);
 900		if (err) {
 901			dev_put(dev);
 902			return err;
 903		}
 904		break;
 905	default:
 906		return -EINVAL;
 907	}
 908
 909	in6_dev = __in6_dev_get(dev);
 910	if (in6_dev) {
 911		atomic_inc(&in6_dev->cnf.mc_forwarding);
 912		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 913					     NETCONFA_MC_FORWARDING,
 914					     dev->ifindex, &in6_dev->cnf);
 915	}
 916
 917	/* Fill in the VIF structures */
 918	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
 919			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
 920			MIFF_REGISTER);
 921
 922	/* And finish update writing critical data */
 923	spin_lock(&mrt_lock);
 924	rcu_assign_pointer(v->dev, dev);
 925	netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
 926#ifdef CONFIG_IPV6_PIMSM_V2
 927	if (v->flags & MIFF_REGISTER)
 928		WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
 929#endif
 930	if (vifi + 1 > mrt->maxvif)
 931		WRITE_ONCE(mrt->maxvif, vifi + 1);
 932	spin_unlock(&mrt_lock);
 933	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
 934				       v, dev, vifi, mrt->id);
 935	return 0;
 936}
 937
 938static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 939					   const struct in6_addr *origin,
 940					   const struct in6_addr *mcastgrp)
 941{
 942	struct mfc6_cache_cmp_arg arg = {
 943		.mf6c_origin = *origin,
 944		.mf6c_mcastgrp = *mcastgrp,
 945	};
 946
 947	return mr_mfc_find(mrt, &arg);
 948}
 949
 950/* Look for a (*,G) entry */
 951static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 952					       struct in6_addr *mcastgrp,
 953					       mifi_t mifi)
 954{
 955	struct mfc6_cache_cmp_arg arg = {
 956		.mf6c_origin = in6addr_any,
 957		.mf6c_mcastgrp = *mcastgrp,
 958	};
 959
 960	if (ipv6_addr_any(mcastgrp))
 961		return mr_mfc_find_any_parent(mrt, mifi);
 962	return mr_mfc_find_any(mrt, mifi, &arg);
 963}
 964
 965/* Look for a (S,G,iif) entry if parent != -1 */
 966static struct mfc6_cache *
 967ip6mr_cache_find_parent(struct mr_table *mrt,
 968			const struct in6_addr *origin,
 969			const struct in6_addr *mcastgrp,
 970			int parent)
 971{
 972	struct mfc6_cache_cmp_arg arg = {
 973		.mf6c_origin = *origin,
 974		.mf6c_mcastgrp = *mcastgrp,
 975	};
 976
 977	return mr_mfc_find_parent(mrt, &arg, parent);
 978}
 979
 980/* Allocate a multicast cache entry */
 981static struct mfc6_cache *ip6mr_cache_alloc(void)
 982{
 983	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 984	if (!c)
 985		return NULL;
 986	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 987	c->_c.mfc_un.res.minvif = MAXMIFS;
 988	c->_c.free = ip6mr_cache_free_rcu;
 989	refcount_set(&c->_c.mfc_un.res.refcount, 1);
 990	return c;
 991}
 992
 993static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 994{
 995	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 996	if (!c)
 997		return NULL;
 998	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
 999	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1000	return c;
1001}
1002
1003/*
1004 *	A cache entry has gone into a resolved state from queued
1005 */
1006
1007static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1008				struct mfc6_cache *uc, struct mfc6_cache *c)
1009{
1010	struct sk_buff *skb;
1011
1012	/*
1013	 *	Play the pending entries through our router
1014	 */
1015
1016	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1017		if (ipv6_hdr(skb)->version == 0) {
1018			struct nlmsghdr *nlh = skb_pull(skb,
1019							sizeof(struct ipv6hdr));
1020
1021			if (mr_fill_mroute(mrt, skb, &c->_c,
1022					   nlmsg_data(nlh)) > 0) {
1023				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1024			} else {
1025				nlh->nlmsg_type = NLMSG_ERROR;
1026				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1027				skb_trim(skb, nlh->nlmsg_len);
1028				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1029			}
1030			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1031		} else {
1032			rcu_read_lock();
1033			ip6_mr_forward(net, mrt, skb->dev, skb, c);
1034			rcu_read_unlock();
1035		}
1036	}
1037}
1038
1039/*
1040 *	Bounce a cache query up to pim6sd and netlink.
1041 *
1042 *	Called under rcu_read_lock()
1043 */
1044
1045static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1046			      mifi_t mifi, int assert)
1047{
1048	struct sock *mroute6_sk;
1049	struct sk_buff *skb;
1050	struct mrt6msg *msg;
1051	int ret;
1052
1053#ifdef CONFIG_IPV6_PIMSM_V2
1054	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1055		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1056						+sizeof(*msg));
1057	else
1058#endif
1059		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1060
1061	if (!skb)
1062		return -ENOBUFS;
1063
1064	/* I suppose that internal messages
1065	 * do not require checksums */
1066
1067	skb->ip_summed = CHECKSUM_UNNECESSARY;
1068
1069#ifdef CONFIG_IPV6_PIMSM_V2
1070	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1071		/* Ugly, but we have no choice with this interface.
1072		   Duplicate old header, fix length etc.
1073		   And all this only to mangle msg->im6_msgtype and
1074		   to set msg->im6_mbz to "mbz" :-)
1075		 */
1076		skb_push(skb, -skb_network_offset(pkt));
1077
1078		skb_push(skb, sizeof(*msg));
1079		skb_reset_transport_header(skb);
1080		msg = (struct mrt6msg *)skb_transport_header(skb);
1081		msg->im6_mbz = 0;
1082		msg->im6_msgtype = assert;
1083		if (assert == MRT6MSG_WRMIFWHOLE)
1084			msg->im6_mif = mifi;
1085		else
1086			msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1087		msg->im6_pad = 0;
1088		msg->im6_src = ipv6_hdr(pkt)->saddr;
1089		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1090
1091		skb->ip_summed = CHECKSUM_UNNECESSARY;
1092	} else
1093#endif
1094	{
1095	/*
1096	 *	Copy the IP header
1097	 */
1098
1099	skb_put(skb, sizeof(struct ipv6hdr));
1100	skb_reset_network_header(skb);
1101	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1102
1103	/*
1104	 *	Add our header
1105	 */
1106	skb_put(skb, sizeof(*msg));
1107	skb_reset_transport_header(skb);
1108	msg = (struct mrt6msg *)skb_transport_header(skb);
1109
1110	msg->im6_mbz = 0;
1111	msg->im6_msgtype = assert;
1112	msg->im6_mif = mifi;
1113	msg->im6_pad = 0;
1114	msg->im6_src = ipv6_hdr(pkt)->saddr;
1115	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1116
1117	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1118	skb->ip_summed = CHECKSUM_UNNECESSARY;
1119	}
1120
1121	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1122	if (!mroute6_sk) {
1123		kfree_skb(skb);
1124		return -EINVAL;
1125	}
1126
1127	mrt6msg_netlink_event(mrt, skb);
1128
1129	/* Deliver to user space multicast routing algorithms */
1130	ret = sock_queue_rcv_skb(mroute6_sk, skb);
1131
1132	if (ret < 0) {
1133		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1134		kfree_skb(skb);
1135	}
1136
1137	return ret;
1138}
1139
1140/* Queue a packet for resolution. It gets locked cache entry! */
1141static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1142				  struct sk_buff *skb, struct net_device *dev)
1143{
1144	struct mfc6_cache *c;
1145	bool found = false;
1146	int err;
1147
1148	spin_lock_bh(&mfc_unres_lock);
1149	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1150		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1151		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1152			found = true;
1153			break;
1154		}
1155	}
1156
1157	if (!found) {
1158		/*
1159		 *	Create a new entry if allowable
1160		 */
1161
1162		c = ip6mr_cache_alloc_unres();
1163		if (!c) {
1164			spin_unlock_bh(&mfc_unres_lock);
1165
1166			kfree_skb(skb);
1167			return -ENOBUFS;
1168		}
1169
1170		/* Fill in the new cache entry */
1171		c->_c.mfc_parent = -1;
1172		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1173		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1174
1175		/*
1176		 *	Reflect first query at pim6sd
1177		 */
1178		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1179		if (err < 0) {
1180			/* If the report failed throw the cache entry
1181			   out - Brad Parker
1182			 */
1183			spin_unlock_bh(&mfc_unres_lock);
1184
1185			ip6mr_cache_free(c);
1186			kfree_skb(skb);
1187			return err;
1188		}
1189
1190		atomic_inc(&mrt->cache_resolve_queue_len);
1191		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1192		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1193
1194		ipmr_do_expire_process(mrt);
1195	}
1196
1197	/* See if we can append the packet */
1198	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1199		kfree_skb(skb);
1200		err = -ENOBUFS;
1201	} else {
1202		if (dev) {
1203			skb->dev = dev;
1204			skb->skb_iif = dev->ifindex;
1205		}
1206		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1207		err = 0;
1208	}
1209
1210	spin_unlock_bh(&mfc_unres_lock);
1211	return err;
1212}
1213
1214/*
1215 *	MFC6 cache manipulation by user space
1216 */
1217
1218static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1219			    int parent)
1220{
1221	struct mfc6_cache *c;
1222
1223	/* The entries are added/deleted only under RTNL */
1224	rcu_read_lock();
1225	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1226				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1227	rcu_read_unlock();
1228	if (!c)
1229		return -ENOENT;
1230	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1231	list_del_rcu(&c->_c.list);
1232
1233	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1234				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1235	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1236	mr_cache_put(&c->_c);
1237	return 0;
1238}
1239
1240static int ip6mr_device_event(struct notifier_block *this,
1241			      unsigned long event, void *ptr)
1242{
1243	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1244	struct net *net = dev_net(dev);
1245	struct mr_table *mrt;
1246	struct vif_device *v;
1247	int ct;
1248
1249	if (event != NETDEV_UNREGISTER)
1250		return NOTIFY_DONE;
1251
1252	ip6mr_for_each_table(mrt, net) {
1253		v = &mrt->vif_table[0];
1254		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1255			if (rcu_access_pointer(v->dev) == dev)
1256				mif6_delete(mrt, ct, 1, NULL);
1257		}
1258	}
1259
1260	return NOTIFY_DONE;
1261}
1262
1263static unsigned int ip6mr_seq_read(struct net *net)
1264{
1265	ASSERT_RTNL();
1266
1267	return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1268}
1269
1270static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1271		      struct netlink_ext_ack *extack)
1272{
1273	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1274		       ip6mr_mr_table_iter, extack);
1275}
1276
1277static struct notifier_block ip6_mr_notifier = {
1278	.notifier_call = ip6mr_device_event
1279};
1280
1281static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1282	.family		= RTNL_FAMILY_IP6MR,
1283	.fib_seq_read	= ip6mr_seq_read,
1284	.fib_dump	= ip6mr_dump,
1285	.owner		= THIS_MODULE,
1286};
1287
1288static int __net_init ip6mr_notifier_init(struct net *net)
1289{
1290	struct fib_notifier_ops *ops;
1291
1292	net->ipv6.ipmr_seq = 0;
1293
1294	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1295	if (IS_ERR(ops))
1296		return PTR_ERR(ops);
1297
1298	net->ipv6.ip6mr_notifier_ops = ops;
1299
1300	return 0;
1301}
1302
1303static void __net_exit ip6mr_notifier_exit(struct net *net)
1304{
1305	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1306	net->ipv6.ip6mr_notifier_ops = NULL;
1307}
1308
1309/* Setup for IP multicast routing */
1310static int __net_init ip6mr_net_init(struct net *net)
1311{
1312	int err;
1313
1314	err = ip6mr_notifier_init(net);
1315	if (err)
1316		return err;
1317
1318	err = ip6mr_rules_init(net);
1319	if (err < 0)
1320		goto ip6mr_rules_fail;
1321
1322#ifdef CONFIG_PROC_FS
1323	err = -ENOMEM;
1324	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1325			sizeof(struct mr_vif_iter)))
1326		goto proc_vif_fail;
1327	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1328			sizeof(struct mr_mfc_iter)))
1329		goto proc_cache_fail;
1330#endif
1331
1332	return 0;
1333
1334#ifdef CONFIG_PROC_FS
1335proc_cache_fail:
1336	remove_proc_entry("ip6_mr_vif", net->proc_net);
1337proc_vif_fail:
1338	rtnl_lock();
1339	ip6mr_rules_exit(net);
1340	rtnl_unlock();
1341#endif
1342ip6mr_rules_fail:
1343	ip6mr_notifier_exit(net);
1344	return err;
1345}
1346
1347static void __net_exit ip6mr_net_exit(struct net *net)
1348{
1349#ifdef CONFIG_PROC_FS
1350	remove_proc_entry("ip6_mr_cache", net->proc_net);
1351	remove_proc_entry("ip6_mr_vif", net->proc_net);
1352#endif
1353	ip6mr_notifier_exit(net);
1354}
1355
1356static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
1357{
1358	struct net *net;
1359
1360	rtnl_lock();
1361	list_for_each_entry(net, net_list, exit_list)
1362		ip6mr_rules_exit(net);
1363	rtnl_unlock();
1364}
1365
1366static struct pernet_operations ip6mr_net_ops = {
1367	.init = ip6mr_net_init,
1368	.exit = ip6mr_net_exit,
1369	.exit_batch = ip6mr_net_exit_batch,
1370};
1371
1372int __init ip6_mr_init(void)
1373{
1374	int err;
1375
1376	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1377				       sizeof(struct mfc6_cache),
1378				       0, SLAB_HWCACHE_ALIGN,
1379				       NULL);
1380	if (!mrt_cachep)
1381		return -ENOMEM;
1382
1383	err = register_pernet_subsys(&ip6mr_net_ops);
1384	if (err)
1385		goto reg_pernet_fail;
1386
1387	err = register_netdevice_notifier(&ip6_mr_notifier);
1388	if (err)
1389		goto reg_notif_fail;
1390#ifdef CONFIG_IPV6_PIMSM_V2
1391	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1392		pr_err("%s: can't add PIM protocol\n", __func__);
1393		err = -EAGAIN;
1394		goto add_proto_fail;
1395	}
1396#endif
1397	err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1398				   ip6mr_rtm_getroute, ip6mr_rtm_dumproute, 0);
1399	if (err == 0)
1400		return 0;
1401
1402#ifdef CONFIG_IPV6_PIMSM_V2
1403	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1404add_proto_fail:
1405	unregister_netdevice_notifier(&ip6_mr_notifier);
1406#endif
1407reg_notif_fail:
1408	unregister_pernet_subsys(&ip6mr_net_ops);
1409reg_pernet_fail:
1410	kmem_cache_destroy(mrt_cachep);
1411	return err;
1412}
1413
1414void ip6_mr_cleanup(void)
1415{
1416	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1417#ifdef CONFIG_IPV6_PIMSM_V2
1418	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1419#endif
1420	unregister_netdevice_notifier(&ip6_mr_notifier);
1421	unregister_pernet_subsys(&ip6mr_net_ops);
1422	kmem_cache_destroy(mrt_cachep);
1423}
1424
1425static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1426			 struct mf6cctl *mfc, int mrtsock, int parent)
1427{
1428	unsigned char ttls[MAXMIFS];
1429	struct mfc6_cache *uc, *c;
1430	struct mr_mfc *_uc;
1431	bool found;
1432	int i, err;
1433
1434	if (mfc->mf6cc_parent >= MAXMIFS)
1435		return -ENFILE;
1436
1437	memset(ttls, 255, MAXMIFS);
1438	for (i = 0; i < MAXMIFS; i++) {
1439		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1440			ttls[i] = 1;
1441	}
1442
1443	/* The entries are added/deleted only under RTNL */
1444	rcu_read_lock();
1445	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1446				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1447	rcu_read_unlock();
1448	if (c) {
1449		spin_lock(&mrt_lock);
1450		c->_c.mfc_parent = mfc->mf6cc_parent;
1451		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1452		if (!mrtsock)
1453			c->_c.mfc_flags |= MFC_STATIC;
1454		spin_unlock(&mrt_lock);
1455		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1456					       c, mrt->id);
1457		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1458		return 0;
1459	}
1460
1461	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1462	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1463		return -EINVAL;
1464
1465	c = ip6mr_cache_alloc();
1466	if (!c)
1467		return -ENOMEM;
1468
1469	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1470	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1471	c->_c.mfc_parent = mfc->mf6cc_parent;
1472	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1473	if (!mrtsock)
1474		c->_c.mfc_flags |= MFC_STATIC;
1475
1476	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1477				  ip6mr_rht_params);
1478	if (err) {
1479		pr_err("ip6mr: rhtable insert error %d\n", err);
1480		ip6mr_cache_free(c);
1481		return err;
1482	}
1483	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1484
1485	/* Check to see if we resolved a queued list. If so we
1486	 * need to send on the frames and tidy up.
1487	 */
1488	found = false;
1489	spin_lock_bh(&mfc_unres_lock);
1490	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1491		uc = (struct mfc6_cache *)_uc;
1492		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1493		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1494			list_del(&_uc->list);
1495			atomic_dec(&mrt->cache_resolve_queue_len);
1496			found = true;
1497			break;
1498		}
1499	}
1500	if (list_empty(&mrt->mfc_unres_queue))
1501		del_timer(&mrt->ipmr_expire_timer);
1502	spin_unlock_bh(&mfc_unres_lock);
1503
1504	if (found) {
1505		ip6mr_cache_resolve(net, mrt, uc, c);
1506		ip6mr_cache_free(uc);
1507	}
1508	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1509				       c, mrt->id);
1510	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1511	return 0;
1512}
1513
1514/*
1515 *	Close the multicast socket, and clear the vif tables etc
1516 */
1517
1518static void mroute_clean_tables(struct mr_table *mrt, int flags)
1519{
1520	struct mr_mfc *c, *tmp;
1521	LIST_HEAD(list);
1522	int i;
1523
1524	/* Shut down all active vif entries */
1525	if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1526		for (i = 0; i < mrt->maxvif; i++) {
1527			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1528			     !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1529			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1530				continue;
1531			mif6_delete(mrt, i, 0, &list);
1532		}
1533		unregister_netdevice_many(&list);
1534	}
1535
1536	/* Wipe the cache */
1537	if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1538		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1539			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1540			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1541				continue;
1542			rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1543			list_del_rcu(&c->list);
1544			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1545						       FIB_EVENT_ENTRY_DEL,
1546						       (struct mfc6_cache *)c, mrt->id);
1547			mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1548			mr_cache_put(c);
1549		}
1550	}
1551
1552	if (flags & MRT6_FLUSH_MFC) {
1553		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1554			spin_lock_bh(&mfc_unres_lock);
1555			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1556				list_del(&c->list);
1557				mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1558						  RTM_DELROUTE);
1559				ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1560			}
1561			spin_unlock_bh(&mfc_unres_lock);
1562		}
1563	}
1564}
1565
1566static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1567{
1568	int err = 0;
1569	struct net *net = sock_net(sk);
1570
1571	rtnl_lock();
1572	spin_lock(&mrt_lock);
1573	if (rtnl_dereference(mrt->mroute_sk)) {
1574		err = -EADDRINUSE;
1575	} else {
1576		rcu_assign_pointer(mrt->mroute_sk, sk);
1577		sock_set_flag(sk, SOCK_RCU_FREE);
1578		atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1579	}
1580	spin_unlock(&mrt_lock);
1581
1582	if (!err)
1583		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1584					     NETCONFA_MC_FORWARDING,
1585					     NETCONFA_IFINDEX_ALL,
1586					     net->ipv6.devconf_all);
1587	rtnl_unlock();
1588
1589	return err;
1590}
1591
1592int ip6mr_sk_done(struct sock *sk)
1593{
1594	struct net *net = sock_net(sk);
1595	struct ipv6_devconf *devconf;
1596	struct mr_table *mrt;
1597	int err = -EACCES;
1598
1599	if (sk->sk_type != SOCK_RAW ||
1600	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1601		return err;
1602
1603	devconf = net->ipv6.devconf_all;
1604	if (!devconf || !atomic_read(&devconf->mc_forwarding))
1605		return err;
1606
1607	rtnl_lock();
1608	ip6mr_for_each_table(mrt, net) {
1609		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1610			spin_lock(&mrt_lock);
1611			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1612			/* Note that mroute_sk had SOCK_RCU_FREE set,
1613			 * so the RCU grace period before sk freeing
1614			 * is guaranteed by sk_destruct()
1615			 */
1616			atomic_dec(&devconf->mc_forwarding);
1617			spin_unlock(&mrt_lock);
1618			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1619						     NETCONFA_MC_FORWARDING,
1620						     NETCONFA_IFINDEX_ALL,
1621						     net->ipv6.devconf_all);
1622
1623			mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1624			err = 0;
1625			break;
1626		}
1627	}
1628	rtnl_unlock();
1629
1630	return err;
1631}
1632
1633bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1634{
1635	struct mr_table *mrt;
1636	struct flowi6 fl6 = {
1637		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1638		.flowi6_oif	= skb->dev->ifindex,
1639		.flowi6_mark	= skb->mark,
1640	};
1641
1642	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1643		return NULL;
1644
1645	return rcu_access_pointer(mrt->mroute_sk);
1646}
1647EXPORT_SYMBOL(mroute6_is_socket);
1648
1649/*
1650 *	Socket options and virtual interface manipulation. The whole
1651 *	virtual interface system is a complete heap, but unfortunately
1652 *	that's how BSD mrouted happens to think. Maybe one day with a proper
1653 *	MOSPF/PIM router set up we can clean this up.
1654 */
1655
1656int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1657			  unsigned int optlen)
1658{
1659	int ret, parent = 0;
1660	struct mif6ctl vif;
1661	struct mf6cctl mfc;
1662	mifi_t mifi;
1663	struct net *net = sock_net(sk);
1664	struct mr_table *mrt;
1665
1666	if (sk->sk_type != SOCK_RAW ||
1667	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1668		return -EOPNOTSUPP;
1669
1670	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1671	if (!mrt)
1672		return -ENOENT;
1673
1674	if (optname != MRT6_INIT) {
1675		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1676		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1677			return -EACCES;
1678	}
1679
1680	switch (optname) {
1681	case MRT6_INIT:
1682		if (optlen < sizeof(int))
1683			return -EINVAL;
1684
1685		return ip6mr_sk_init(mrt, sk);
1686
1687	case MRT6_DONE:
1688		return ip6mr_sk_done(sk);
1689
1690	case MRT6_ADD_MIF:
1691		if (optlen < sizeof(vif))
1692			return -EINVAL;
1693		if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1694			return -EFAULT;
1695		if (vif.mif6c_mifi >= MAXMIFS)
1696			return -ENFILE;
1697		rtnl_lock();
1698		ret = mif6_add(net, mrt, &vif,
1699			       sk == rtnl_dereference(mrt->mroute_sk));
1700		rtnl_unlock();
1701		return ret;
1702
1703	case MRT6_DEL_MIF:
1704		if (optlen < sizeof(mifi_t))
1705			return -EINVAL;
1706		if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1707			return -EFAULT;
1708		rtnl_lock();
1709		ret = mif6_delete(mrt, mifi, 0, NULL);
1710		rtnl_unlock();
1711		return ret;
1712
1713	/*
1714	 *	Manipulate the forwarding caches. These live
1715	 *	in a sort of kernel/user symbiosis.
1716	 */
1717	case MRT6_ADD_MFC:
1718	case MRT6_DEL_MFC:
1719		parent = -1;
1720		fallthrough;
1721	case MRT6_ADD_MFC_PROXY:
1722	case MRT6_DEL_MFC_PROXY:
1723		if (optlen < sizeof(mfc))
1724			return -EINVAL;
1725		if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1726			return -EFAULT;
1727		if (parent == 0)
1728			parent = mfc.mf6cc_parent;
1729		rtnl_lock();
1730		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1731			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1732		else
1733			ret = ip6mr_mfc_add(net, mrt, &mfc,
1734					    sk ==
1735					    rtnl_dereference(mrt->mroute_sk),
1736					    parent);
1737		rtnl_unlock();
1738		return ret;
1739
1740	case MRT6_FLUSH:
1741	{
1742		int flags;
1743
1744		if (optlen != sizeof(flags))
1745			return -EINVAL;
1746		if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1747			return -EFAULT;
1748		rtnl_lock();
1749		mroute_clean_tables(mrt, flags);
1750		rtnl_unlock();
1751		return 0;
1752	}
1753
1754	/*
1755	 *	Control PIM assert (to activate pim will activate assert)
1756	 */
1757	case MRT6_ASSERT:
1758	{
1759		int v;
1760
1761		if (optlen != sizeof(v))
1762			return -EINVAL;
1763		if (copy_from_sockptr(&v, optval, sizeof(v)))
1764			return -EFAULT;
1765		mrt->mroute_do_assert = v;
1766		return 0;
1767	}
1768
1769#ifdef CONFIG_IPV6_PIMSM_V2
1770	case MRT6_PIM:
1771	{
1772		bool do_wrmifwhole;
1773		int v;
1774
1775		if (optlen != sizeof(v))
1776			return -EINVAL;
1777		if (copy_from_sockptr(&v, optval, sizeof(v)))
1778			return -EFAULT;
1779
1780		do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1781		v = !!v;
1782		rtnl_lock();
1783		ret = 0;
1784		if (v != mrt->mroute_do_pim) {
1785			mrt->mroute_do_pim = v;
1786			mrt->mroute_do_assert = v;
1787			mrt->mroute_do_wrvifwhole = do_wrmifwhole;
1788		}
1789		rtnl_unlock();
1790		return ret;
1791	}
1792
1793#endif
1794#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1795	case MRT6_TABLE:
1796	{
1797		u32 v;
1798
1799		if (optlen != sizeof(u32))
1800			return -EINVAL;
1801		if (copy_from_sockptr(&v, optval, sizeof(v)))
1802			return -EFAULT;
1803		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1804		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1805			return -EINVAL;
1806		if (sk == rcu_access_pointer(mrt->mroute_sk))
1807			return -EBUSY;
1808
1809		rtnl_lock();
1810		ret = 0;
1811		mrt = ip6mr_new_table(net, v);
1812		if (IS_ERR(mrt))
1813			ret = PTR_ERR(mrt);
1814		else
1815			raw6_sk(sk)->ip6mr_table = v;
1816		rtnl_unlock();
1817		return ret;
1818	}
1819#endif
1820	/*
1821	 *	Spurious command, or MRT6_VERSION which you cannot
1822	 *	set.
1823	 */
1824	default:
1825		return -ENOPROTOOPT;
1826	}
1827}
1828
1829/*
1830 *	Getsock opt support for the multicast routing system.
1831 */
1832
1833int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1834			  sockptr_t optlen)
1835{
1836	int olr;
1837	int val;
1838	struct net *net = sock_net(sk);
1839	struct mr_table *mrt;
1840
1841	if (sk->sk_type != SOCK_RAW ||
1842	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1843		return -EOPNOTSUPP;
1844
1845	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1846	if (!mrt)
1847		return -ENOENT;
1848
1849	switch (optname) {
1850	case MRT6_VERSION:
1851		val = 0x0305;
1852		break;
1853#ifdef CONFIG_IPV6_PIMSM_V2
1854	case MRT6_PIM:
1855		val = mrt->mroute_do_pim;
1856		break;
1857#endif
1858	case MRT6_ASSERT:
1859		val = mrt->mroute_do_assert;
1860		break;
1861	default:
1862		return -ENOPROTOOPT;
1863	}
1864
1865	if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1866		return -EFAULT;
1867
1868	olr = min_t(int, olr, sizeof(int));
1869	if (olr < 0)
1870		return -EINVAL;
1871
1872	if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1873		return -EFAULT;
1874	if (copy_to_sockptr(optval, &val, olr))
1875		return -EFAULT;
1876	return 0;
1877}
1878
1879/*
1880 *	The IP multicast ioctl support routines.
1881 */
1882
1883int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1884{
1885	struct sioc_sg_req6 sr;
1886	struct sioc_mif_req6 vr;
1887	struct vif_device *vif;
1888	struct mfc6_cache *c;
1889	struct net *net = sock_net(sk);
1890	struct mr_table *mrt;
1891
1892	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1893	if (!mrt)
1894		return -ENOENT;
1895
1896	switch (cmd) {
1897	case SIOCGETMIFCNT_IN6:
1898		if (copy_from_user(&vr, arg, sizeof(vr)))
1899			return -EFAULT;
1900		if (vr.mifi >= mrt->maxvif)
1901			return -EINVAL;
1902		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1903		rcu_read_lock();
1904		vif = &mrt->vif_table[vr.mifi];
1905		if (VIF_EXISTS(mrt, vr.mifi)) {
1906			vr.icount = READ_ONCE(vif->pkt_in);
1907			vr.ocount = READ_ONCE(vif->pkt_out);
1908			vr.ibytes = READ_ONCE(vif->bytes_in);
1909			vr.obytes = READ_ONCE(vif->bytes_out);
1910			rcu_read_unlock();
1911
1912			if (copy_to_user(arg, &vr, sizeof(vr)))
1913				return -EFAULT;
1914			return 0;
1915		}
1916		rcu_read_unlock();
1917		return -EADDRNOTAVAIL;
1918	case SIOCGETSGCNT_IN6:
1919		if (copy_from_user(&sr, arg, sizeof(sr)))
1920			return -EFAULT;
1921
1922		rcu_read_lock();
1923		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
 
1924		if (c) {
1925			sr.pktcnt = c->_c.mfc_un.res.pkt;
1926			sr.bytecnt = c->_c.mfc_un.res.bytes;
1927			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1928			rcu_read_unlock();
1929
1930			if (copy_to_user(arg, &sr, sizeof(sr)))
1931				return -EFAULT;
1932			return 0;
1933		}
1934		rcu_read_unlock();
1935		return -EADDRNOTAVAIL;
1936	default:
1937		return -ENOIOCTLCMD;
1938	}
1939}
1940
1941#ifdef CONFIG_COMPAT
1942struct compat_sioc_sg_req6 {
1943	struct sockaddr_in6 src;
1944	struct sockaddr_in6 grp;
1945	compat_ulong_t pktcnt;
1946	compat_ulong_t bytecnt;
1947	compat_ulong_t wrong_if;
1948};
1949
1950struct compat_sioc_mif_req6 {
1951	mifi_t	mifi;
1952	compat_ulong_t icount;
1953	compat_ulong_t ocount;
1954	compat_ulong_t ibytes;
1955	compat_ulong_t obytes;
1956};
1957
1958int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1959{
1960	struct compat_sioc_sg_req6 sr;
1961	struct compat_sioc_mif_req6 vr;
1962	struct vif_device *vif;
1963	struct mfc6_cache *c;
1964	struct net *net = sock_net(sk);
1965	struct mr_table *mrt;
1966
1967	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1968	if (!mrt)
1969		return -ENOENT;
1970
1971	switch (cmd) {
1972	case SIOCGETMIFCNT_IN6:
1973		if (copy_from_user(&vr, arg, sizeof(vr)))
1974			return -EFAULT;
1975		if (vr.mifi >= mrt->maxvif)
1976			return -EINVAL;
1977		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1978		rcu_read_lock();
1979		vif = &mrt->vif_table[vr.mifi];
1980		if (VIF_EXISTS(mrt, vr.mifi)) {
1981			vr.icount = READ_ONCE(vif->pkt_in);
1982			vr.ocount = READ_ONCE(vif->pkt_out);
1983			vr.ibytes = READ_ONCE(vif->bytes_in);
1984			vr.obytes = READ_ONCE(vif->bytes_out);
1985			rcu_read_unlock();
1986
1987			if (copy_to_user(arg, &vr, sizeof(vr)))
1988				return -EFAULT;
1989			return 0;
1990		}
1991		rcu_read_unlock();
1992		return -EADDRNOTAVAIL;
1993	case SIOCGETSGCNT_IN6:
1994		if (copy_from_user(&sr, arg, sizeof(sr)))
1995			return -EFAULT;
1996
1997		rcu_read_lock();
1998		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1999		if (c) {
2000			sr.pktcnt = c->_c.mfc_un.res.pkt;
2001			sr.bytecnt = c->_c.mfc_un.res.bytes;
2002			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
2003			rcu_read_unlock();
2004
2005			if (copy_to_user(arg, &sr, sizeof(sr)))
2006				return -EFAULT;
2007			return 0;
2008		}
2009		rcu_read_unlock();
2010		return -EADDRNOTAVAIL;
2011	default:
2012		return -ENOIOCTLCMD;
2013	}
2014}
2015#endif
2016
2017static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2018{
2019	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2020		      IPSTATS_MIB_OUTFORWDATAGRAMS);
2021	IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
2022		      IPSTATS_MIB_OUTOCTETS, skb->len);
2023	return dst_output(net, sk, skb);
2024}
2025
2026/*
2027 *	Processing handlers for ip6mr_forward
2028 */
2029
2030static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
2031			  struct sk_buff *skb, int vifi)
2032{
2033	struct vif_device *vif = &mrt->vif_table[vifi];
2034	struct net_device *vif_dev;
2035	struct ipv6hdr *ipv6h;
2036	struct dst_entry *dst;
2037	struct flowi6 fl6;
2038
2039	vif_dev = vif_dev_read(vif);
2040	if (!vif_dev)
2041		goto out_free;
2042
2043#ifdef CONFIG_IPV6_PIMSM_V2
2044	if (vif->flags & MIFF_REGISTER) {
2045		WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2046		WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2047		DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2048		DEV_STATS_INC(vif_dev, tx_packets);
2049		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2050		goto out_free;
2051	}
2052#endif
2053
2054	ipv6h = ipv6_hdr(skb);
2055
2056	fl6 = (struct flowi6) {
2057		.flowi6_oif = vif->link,
2058		.daddr = ipv6h->daddr,
2059	};
2060
2061	dst = ip6_route_output(net, NULL, &fl6);
2062	if (dst->error) {
2063		dst_release(dst);
2064		goto out_free;
2065	}
2066
2067	skb_dst_drop(skb);
2068	skb_dst_set(skb, dst);
2069
2070	/*
2071	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2072	 * not only before forwarding, but after forwarding on all output
2073	 * interfaces. It is clear, if mrouter runs a multicasting
2074	 * program, it should receive packets not depending to what interface
2075	 * program is joined.
2076	 * If we will not make it, the program will have to join on all
2077	 * interfaces. On the other hand, multihoming host (or router, but
2078	 * not mrouter) cannot join to more than one interface - it will
2079	 * result in receiving multiple packets.
2080	 */
2081	skb->dev = vif_dev;
2082	WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2083	WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2084
2085	/* We are about to write */
2086	/* XXX: extension headers? */
2087	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2088		goto out_free;
2089
2090	ipv6h = ipv6_hdr(skb);
2091	ipv6h->hop_limit--;
2092
2093	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2094
2095	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2096		       net, NULL, skb, skb->dev, vif_dev,
2097		       ip6mr_forward2_finish);
2098
2099out_free:
2100	kfree_skb(skb);
2101	return 0;
2102}
2103
2104/* Called with rcu_read_lock() */
2105static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2106{
2107	int ct;
2108
2109	/* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2110	for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2111		if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2112			break;
2113	}
2114	return ct;
2115}
2116
2117/* Called under rcu_read_lock() */
2118static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2119			   struct net_device *dev, struct sk_buff *skb,
2120			   struct mfc6_cache *c)
2121{
2122	int psend = -1;
2123	int vif, ct;
2124	int true_vifi = ip6mr_find_vif(mrt, dev);
2125
2126	vif = c->_c.mfc_parent;
2127	c->_c.mfc_un.res.pkt++;
2128	c->_c.mfc_un.res.bytes += skb->len;
2129	c->_c.mfc_un.res.lastuse = jiffies;
2130
2131	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2132		struct mfc6_cache *cache_proxy;
2133
2134		/* For an (*,G) entry, we only check that the incoming
2135		 * interface is part of the static tree.
2136		 */
2137		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2138		if (cache_proxy &&
2139		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2140			goto forward;
2141	}
2142
2143	/*
2144	 * Wrong interface: drop packet and (maybe) send PIM assert.
2145	 */
2146	if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2147		c->_c.mfc_un.res.wrong_if++;
2148
2149		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2150		    /* pimsm uses asserts, when switching from RPT to SPT,
2151		       so that we cannot check that packet arrived on an oif.
2152		       It is bad, but otherwise we would need to move pretty
2153		       large chunk of pimd to kernel. Ough... --ANK
2154		     */
2155		    (mrt->mroute_do_pim ||
2156		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2157		    time_after(jiffies,
2158			       c->_c.mfc_un.res.last_assert +
2159			       MFC_ASSERT_THRESH)) {
2160			c->_c.mfc_un.res.last_assert = jiffies;
2161			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2162			if (mrt->mroute_do_wrvifwhole)
2163				ip6mr_cache_report(mrt, skb, true_vifi,
2164						   MRT6MSG_WRMIFWHOLE);
2165		}
2166		goto dont_forward;
2167	}
2168
2169forward:
2170	WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2171		   mrt->vif_table[vif].pkt_in + 1);
2172	WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2173		   mrt->vif_table[vif].bytes_in + skb->len);
2174
2175	/*
2176	 *	Forward the frame
2177	 */
2178	if (ipv6_addr_any(&c->mf6c_origin) &&
2179	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2180		if (true_vifi >= 0 &&
2181		    true_vifi != c->_c.mfc_parent &&
2182		    ipv6_hdr(skb)->hop_limit >
2183				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2184			/* It's an (*,*) entry and the packet is not coming from
2185			 * the upstream: forward the packet to the upstream
2186			 * only.
2187			 */
2188			psend = c->_c.mfc_parent;
2189			goto last_forward;
2190		}
2191		goto dont_forward;
2192	}
2193	for (ct = c->_c.mfc_un.res.maxvif - 1;
2194	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2195		/* For (*,G) entry, don't forward to the incoming interface */
2196		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2197		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2198			if (psend != -1) {
2199				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2200				if (skb2)
2201					ip6mr_forward2(net, mrt, skb2, psend);
2202			}
2203			psend = ct;
2204		}
2205	}
2206last_forward:
2207	if (psend != -1) {
2208		ip6mr_forward2(net, mrt, skb, psend);
2209		return;
2210	}
2211
2212dont_forward:
2213	kfree_skb(skb);
2214}
2215
2216
2217/*
2218 *	Multicast packets for forwarding arrive here
2219 */
2220
2221int ip6_mr_input(struct sk_buff *skb)
2222{
2223	struct mfc6_cache *cache;
2224	struct net *net = dev_net(skb->dev);
2225	struct mr_table *mrt;
2226	struct flowi6 fl6 = {
2227		.flowi6_iif	= skb->dev->ifindex,
2228		.flowi6_mark	= skb->mark,
2229	};
2230	int err;
2231	struct net_device *dev;
2232
2233	/* skb->dev passed in is the master dev for vrfs.
2234	 * Get the proper interface that does have a vif associated with it.
2235	 */
2236	dev = skb->dev;
2237	if (netif_is_l3_master(skb->dev)) {
2238		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2239		if (!dev) {
2240			kfree_skb(skb);
2241			return -ENODEV;
2242		}
2243	}
2244
2245	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2246	if (err < 0) {
2247		kfree_skb(skb);
2248		return err;
2249	}
2250
2251	cache = ip6mr_cache_find(mrt,
2252				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2253	if (!cache) {
2254		int vif = ip6mr_find_vif(mrt, dev);
2255
2256		if (vif >= 0)
2257			cache = ip6mr_cache_find_any(mrt,
2258						     &ipv6_hdr(skb)->daddr,
2259						     vif);
2260	}
2261
2262	/*
2263	 *	No usable cache entry
2264	 */
2265	if (!cache) {
2266		int vif;
2267
2268		vif = ip6mr_find_vif(mrt, dev);
2269		if (vif >= 0) {
2270			int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2271
2272			return err;
2273		}
2274		kfree_skb(skb);
2275		return -ENODEV;
2276	}
2277
2278	ip6_mr_forward(net, mrt, dev, skb, cache);
2279
2280	return 0;
2281}
2282
2283int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2284		    u32 portid)
2285{
2286	int err;
2287	struct mr_table *mrt;
2288	struct mfc6_cache *cache;
2289	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2290
2291	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2292	if (!mrt)
2293		return -ENOENT;
2294
2295	rcu_read_lock();
2296	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2297	if (!cache && skb->dev) {
2298		int vif = ip6mr_find_vif(mrt, skb->dev);
2299
2300		if (vif >= 0)
2301			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2302						     vif);
2303	}
2304
2305	if (!cache) {
2306		struct sk_buff *skb2;
2307		struct ipv6hdr *iph;
2308		struct net_device *dev;
2309		int vif;
2310
2311		dev = skb->dev;
2312		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2313			rcu_read_unlock();
2314			return -ENODEV;
2315		}
2316
2317		/* really correct? */
2318		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2319		if (!skb2) {
2320			rcu_read_unlock();
2321			return -ENOMEM;
2322		}
2323
2324		NETLINK_CB(skb2).portid = portid;
2325		skb_reset_transport_header(skb2);
2326
2327		skb_put(skb2, sizeof(struct ipv6hdr));
2328		skb_reset_network_header(skb2);
2329
2330		iph = ipv6_hdr(skb2);
2331		iph->version = 0;
2332		iph->priority = 0;
2333		iph->flow_lbl[0] = 0;
2334		iph->flow_lbl[1] = 0;
2335		iph->flow_lbl[2] = 0;
2336		iph->payload_len = 0;
2337		iph->nexthdr = IPPROTO_NONE;
2338		iph->hop_limit = 0;
2339		iph->saddr = rt->rt6i_src.addr;
2340		iph->daddr = rt->rt6i_dst.addr;
2341
2342		err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2343		rcu_read_unlock();
2344
2345		return err;
2346	}
2347
2348	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2349	rcu_read_unlock();
2350	return err;
2351}
2352
2353static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2354			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2355			     int flags)
2356{
2357	struct nlmsghdr *nlh;
2358	struct rtmsg *rtm;
2359	int err;
2360
2361	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2362	if (!nlh)
2363		return -EMSGSIZE;
2364
2365	rtm = nlmsg_data(nlh);
2366	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2367	rtm->rtm_dst_len  = 128;
2368	rtm->rtm_src_len  = 128;
2369	rtm->rtm_tos      = 0;
2370	rtm->rtm_table    = mrt->id;
2371	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2372		goto nla_put_failure;
2373	rtm->rtm_type = RTN_MULTICAST;
2374	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2375	if (c->_c.mfc_flags & MFC_STATIC)
2376		rtm->rtm_protocol = RTPROT_STATIC;
2377	else
2378		rtm->rtm_protocol = RTPROT_MROUTED;
2379	rtm->rtm_flags    = 0;
2380
2381	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2382	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2383		goto nla_put_failure;
2384	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2385	/* do not break the dump if cache is unresolved */
2386	if (err < 0 && err != -ENOENT)
2387		goto nla_put_failure;
2388
2389	nlmsg_end(skb, nlh);
2390	return 0;
2391
2392nla_put_failure:
2393	nlmsg_cancel(skb, nlh);
2394	return -EMSGSIZE;
2395}
2396
2397static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2398			      u32 portid, u32 seq, struct mr_mfc *c,
2399			      int cmd, int flags)
2400{
2401	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2402				 cmd, flags);
2403}
2404
2405static int mr6_msgsize(bool unresolved, int maxvif)
2406{
2407	size_t len =
2408		NLMSG_ALIGN(sizeof(struct rtmsg))
2409		+ nla_total_size(4)	/* RTA_TABLE */
2410		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2411		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2412		;
2413
2414	if (!unresolved)
2415		len = len
2416		      + nla_total_size(4)	/* RTA_IIF */
2417		      + nla_total_size(0)	/* RTA_MULTIPATH */
2418		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2419						/* RTA_MFC_STATS */
2420		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2421		;
2422
2423	return len;
2424}
2425
2426static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2427			      int cmd)
2428{
2429	struct net *net = read_pnet(&mrt->net);
2430	struct sk_buff *skb;
2431	int err = -ENOBUFS;
2432
2433	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2434			GFP_ATOMIC);
2435	if (!skb)
2436		goto errout;
2437
2438	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2439	if (err < 0)
2440		goto errout;
2441
2442	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2443	return;
2444
2445errout:
2446	kfree_skb(skb);
2447	if (err < 0)
2448		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2449}
2450
2451static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2452{
2453	size_t len =
2454		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2455		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2456		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2457					/* IP6MRA_CREPORT_SRC_ADDR */
2458		+ nla_total_size(sizeof(struct in6_addr))
2459					/* IP6MRA_CREPORT_DST_ADDR */
2460		+ nla_total_size(sizeof(struct in6_addr))
2461					/* IP6MRA_CREPORT_PKT */
2462		+ nla_total_size(payloadlen)
2463		;
2464
2465	return len;
2466}
2467
2468static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2469{
2470	struct net *net = read_pnet(&mrt->net);
2471	struct nlmsghdr *nlh;
2472	struct rtgenmsg *rtgenm;
2473	struct mrt6msg *msg;
2474	struct sk_buff *skb;
2475	struct nlattr *nla;
2476	int payloadlen;
2477
2478	payloadlen = pkt->len - sizeof(struct mrt6msg);
2479	msg = (struct mrt6msg *)skb_transport_header(pkt);
2480
2481	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2482	if (!skb)
2483		goto errout;
2484
2485	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2486			sizeof(struct rtgenmsg), 0);
2487	if (!nlh)
2488		goto errout;
2489	rtgenm = nlmsg_data(nlh);
2490	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2491	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2492	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2493	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2494			     &msg->im6_src) ||
2495	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2496			     &msg->im6_dst))
2497		goto nla_put_failure;
2498
2499	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2500	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2501				  nla_data(nla), payloadlen))
2502		goto nla_put_failure;
2503
2504	nlmsg_end(skb, nlh);
2505
2506	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2507	return;
2508
2509nla_put_failure:
2510	nlmsg_cancel(skb, nlh);
2511errout:
2512	kfree_skb(skb);
2513	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2514}
2515
2516static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2517	[RTA_SRC]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2518	[RTA_DST]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2519	[RTA_TABLE]		= { .type = NLA_U32 },
2520};
2521
2522static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2523					const struct nlmsghdr *nlh,
2524					struct nlattr **tb,
2525					struct netlink_ext_ack *extack)
2526{
2527	struct rtmsg *rtm;
2528	int err;
2529
2530	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2531			  extack);
2532	if (err)
2533		return err;
2534
2535	rtm = nlmsg_data(nlh);
2536	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2537	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2538	    rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2539	    rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2540		NL_SET_ERR_MSG_MOD(extack,
2541				   "Invalid values in header for multicast route get request");
2542		return -EINVAL;
2543	}
2544
2545	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2546	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2547		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2548		return -EINVAL;
2549	}
2550
2551	return 0;
2552}
2553
2554static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2555			      struct netlink_ext_ack *extack)
2556{
2557	struct net *net = sock_net(in_skb->sk);
2558	struct in6_addr src = {}, grp = {};
2559	struct nlattr *tb[RTA_MAX + 1];
2560	struct mfc6_cache *cache;
2561	struct mr_table *mrt;
2562	struct sk_buff *skb;
2563	u32 tableid;
2564	int err;
2565
2566	err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2567	if (err < 0)
2568		return err;
2569
2570	if (tb[RTA_SRC])
2571		src = nla_get_in6_addr(tb[RTA_SRC]);
2572	if (tb[RTA_DST])
2573		grp = nla_get_in6_addr(tb[RTA_DST]);
2574	tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
2575
2576	mrt = ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2577	if (!mrt) {
2578		NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2579		return -ENOENT;
2580	}
2581
2582	/* entries are added/deleted only under RTNL */
2583	rcu_read_lock();
2584	cache = ip6mr_cache_find(mrt, &src, &grp);
2585	rcu_read_unlock();
2586	if (!cache) {
2587		NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2588		return -ENOENT;
2589	}
2590
2591	skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL);
2592	if (!skb)
2593		return -ENOBUFS;
2594
2595	err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2596				nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2597	if (err < 0) {
2598		kfree_skb(skb);
2599		return err;
2600	}
2601
2602	return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2603}
2604
2605static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2606{
2607	const struct nlmsghdr *nlh = cb->nlh;
2608	struct fib_dump_filter filter = {};
 
 
2609	int err;
2610
2611	if (cb->strict_check) {
2612		err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2613					    &filter, cb);
2614		if (err < 0)
2615			return err;
2616	}
2617
2618	if (filter.table_id) {
2619		struct mr_table *mrt;
2620
2621		mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2622		if (!mrt) {
2623			if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2624				return skb->len;
2625
2626			NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2627			return -ENOENT;
2628		}
2629		err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2630				    &mfc_unres_lock, &filter);
2631		return skb->len ? : err;
2632	}
2633
2634	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2635				_ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2636}