Linux Audio

Check our new training course

Loading...
v3.5.6
   1/*
   2 *	Linux IPv6 multicast routing support for BSD pim6sd
   3 *	Based on net/ipv4/ipmr.c.
   4 *
   5 *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *		LSIIT Laboratory, Strasbourg, France
   7 *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *		6WIND, Paris, France
   9 *	Copyright (C)2007,2008 USAGI/WIDE Project
  10 *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *	This program is free software; you can redistribute it and/or
  13 *	modify it under the terms of the GNU General Public License
  14 *	as published by the Free Software Foundation; either version
  15 *	2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <asm/uaccess.h>
  20#include <linux/types.h>
  21#include <linux/sched.h>
  22#include <linux/errno.h>
  23#include <linux/timer.h>
  24#include <linux/mm.h>
  25#include <linux/kernel.h>
  26#include <linux/fcntl.h>
  27#include <linux/stat.h>
  28#include <linux/socket.h>
  29#include <linux/inet.h>
  30#include <linux/netdevice.h>
  31#include <linux/inetdevice.h>
  32#include <linux/proc_fs.h>
  33#include <linux/seq_file.h>
  34#include <linux/init.h>
  35#include <linux/slab.h>
  36#include <linux/compat.h>
  37#include <net/protocol.h>
  38#include <linux/skbuff.h>
  39#include <net/sock.h>
  40#include <net/raw.h>
  41#include <linux/notifier.h>
  42#include <linux/if_arp.h>
  43#include <net/checksum.h>
  44#include <net/netlink.h>
  45#include <net/fib_rules.h>
  46
  47#include <net/ipv6.h>
  48#include <net/ip6_route.h>
  49#include <linux/mroute6.h>
  50#include <linux/pim.h>
  51#include <net/addrconf.h>
  52#include <linux/netfilter_ipv6.h>
  53#include <linux/export.h>
  54#include <net/ip6_checksum.h>
 
  55
  56struct mr6_table {
  57	struct list_head	list;
  58#ifdef CONFIG_NET_NS
  59	struct net		*net;
  60#endif
  61	u32			id;
  62	struct sock		*mroute6_sk;
  63	struct timer_list	ipmr_expire_timer;
  64	struct list_head	mfc6_unres_queue;
  65	struct list_head	mfc6_cache_array[MFC6_LINES];
  66	struct mif_device	vif6_table[MAXMIFS];
  67	int			maxvif;
  68	atomic_t		cache_resolve_queue_len;
  69	int			mroute_do_assert;
  70	int			mroute_do_pim;
  71#ifdef CONFIG_IPV6_PIMSM_V2
  72	int			mroute_reg_vif_num;
  73#endif
  74};
  75
  76struct ip6mr_rule {
  77	struct fib_rule		common;
  78};
  79
  80struct ip6mr_result {
  81	struct mr6_table	*mrt;
  82};
  83
  84/* Big lock, protecting vif table, mrt cache and mroute socket state.
  85   Note that the changes are semaphored via rtnl_lock.
  86 */
  87
  88static DEFINE_RWLOCK(mrt_lock);
  89
  90/*
  91 *	Multicast router control variables
  92 */
  93
  94#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
  95
  96/* Special spinlock for queue of unresolved entries */
  97static DEFINE_SPINLOCK(mfc_unres_lock);
  98
  99/* We return to original Alan's scheme. Hash table of resolved
 100   entries is changed only in process context and protected
 101   with weak lock mrt_lock. Queue of unresolved entries is protected
 102   with strong spinlock mfc_unres_lock.
 103
 104   In this case data path is free of exclusive locks at all.
 105 */
 106
 107static struct kmem_cache *mrt_cachep __read_mostly;
 108
 109static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
 110static void ip6mr_free_table(struct mr6_table *mrt);
 111
 112static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 113			  struct sk_buff *skb, struct mfc6_cache *cache);
 114static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 115			      mifi_t mifi, int assert);
 116static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 117			       struct mfc6_cache *c, struct rtmsg *rtm);
 
 
 118static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 119			       struct netlink_callback *cb);
 120static void mroute_clean_tables(struct mr6_table *mrt);
 121static void ipmr_expire_process(unsigned long arg);
 122
 123#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 124#define ip6mr_for_each_table(mrt, net) \
 125	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 126
 127static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 128{
 129	struct mr6_table *mrt;
 130
 131	ip6mr_for_each_table(mrt, net) {
 132		if (mrt->id == id)
 133			return mrt;
 134	}
 135	return NULL;
 136}
 137
 138static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 139			    struct mr6_table **mrt)
 140{
 141	struct ip6mr_result res;
 142	struct fib_lookup_arg arg = { .result = &res, };
 143	int err;
 
 
 
 
 
 144
 145	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 146			       flowi6_to_flowi(flp6), 0, &arg);
 147	if (err < 0)
 148		return err;
 149	*mrt = res.mrt;
 150	return 0;
 151}
 152
 153static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 154			     int flags, struct fib_lookup_arg *arg)
 155{
 156	struct ip6mr_result *res = arg->result;
 157	struct mr6_table *mrt;
 158
 159	switch (rule->action) {
 160	case FR_ACT_TO_TBL:
 161		break;
 162	case FR_ACT_UNREACHABLE:
 163		return -ENETUNREACH;
 164	case FR_ACT_PROHIBIT:
 165		return -EACCES;
 166	case FR_ACT_BLACKHOLE:
 167	default:
 168		return -EINVAL;
 169	}
 170
 171	mrt = ip6mr_get_table(rule->fr_net, rule->table);
 172	if (mrt == NULL)
 173		return -EAGAIN;
 174	res->mrt = mrt;
 175	return 0;
 176}
 177
 178static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 179{
 180	return 1;
 181}
 182
 183static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 184	FRA_GENERIC_POLICY,
 185};
 186
 187static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 188				struct fib_rule_hdr *frh, struct nlattr **tb)
 189{
 190	return 0;
 191}
 192
 193static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 194			      struct nlattr **tb)
 195{
 196	return 1;
 197}
 198
 199static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 200			   struct fib_rule_hdr *frh)
 201{
 202	frh->dst_len = 0;
 203	frh->src_len = 0;
 204	frh->tos     = 0;
 205	return 0;
 206}
 207
 208static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
 209	.family		= RTNL_FAMILY_IP6MR,
 210	.rule_size	= sizeof(struct ip6mr_rule),
 211	.addr_size	= sizeof(struct in6_addr),
 212	.action		= ip6mr_rule_action,
 213	.match		= ip6mr_rule_match,
 214	.configure	= ip6mr_rule_configure,
 215	.compare	= ip6mr_rule_compare,
 216	.default_pref	= fib_default_rule_pref,
 217	.fill		= ip6mr_rule_fill,
 218	.nlgroup	= RTNLGRP_IPV6_RULE,
 219	.policy		= ip6mr_rule_policy,
 220	.owner		= THIS_MODULE,
 221};
 222
 223static int __net_init ip6mr_rules_init(struct net *net)
 224{
 225	struct fib_rules_ops *ops;
 226	struct mr6_table *mrt;
 227	int err;
 228
 229	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 230	if (IS_ERR(ops))
 231		return PTR_ERR(ops);
 232
 233	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 234
 235	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 236	if (mrt == NULL) {
 237		err = -ENOMEM;
 238		goto err1;
 239	}
 240
 241	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 242	if (err < 0)
 243		goto err2;
 244
 245	net->ipv6.mr6_rules_ops = ops;
 246	return 0;
 247
 248err2:
 249	kfree(mrt);
 250err1:
 251	fib_rules_unregister(ops);
 252	return err;
 253}
 254
 255static void __net_exit ip6mr_rules_exit(struct net *net)
 256{
 257	struct mr6_table *mrt, *next;
 258
 
 259	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 260		list_del(&mrt->list);
 261		ip6mr_free_table(mrt);
 262	}
 263	fib_rules_unregister(net->ipv6.mr6_rules_ops);
 
 264}
 265#else
 266#define ip6mr_for_each_table(mrt, net) \
 267	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 268
 269static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 270{
 271	return net->ipv6.mrt6;
 272}
 273
 274static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 275			    struct mr6_table **mrt)
 276{
 277	*mrt = net->ipv6.mrt6;
 278	return 0;
 279}
 280
 281static int __net_init ip6mr_rules_init(struct net *net)
 282{
 283	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
 284	return net->ipv6.mrt6 ? 0 : -ENOMEM;
 285}
 286
 287static void __net_exit ip6mr_rules_exit(struct net *net)
 288{
 
 289	ip6mr_free_table(net->ipv6.mrt6);
 
 
 290}
 291#endif
 292
 293static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 294{
 295	struct mr6_table *mrt;
 296	unsigned int i;
 297
 298	mrt = ip6mr_get_table(net, id);
 299	if (mrt != NULL)
 300		return mrt;
 301
 302	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
 303	if (mrt == NULL)
 304		return NULL;
 305	mrt->id = id;
 306	write_pnet(&mrt->net, net);
 307
 308	/* Forwarding cache */
 309	for (i = 0; i < MFC6_LINES; i++)
 310		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
 311
 312	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
 313
 314	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
 315		    (unsigned long)mrt);
 316
 317#ifdef CONFIG_IPV6_PIMSM_V2
 318	mrt->mroute_reg_vif_num = -1;
 319#endif
 320#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 321	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 322#endif
 323	return mrt;
 324}
 325
 326static void ip6mr_free_table(struct mr6_table *mrt)
 327{
 328	del_timer(&mrt->ipmr_expire_timer);
 329	mroute_clean_tables(mrt);
 330	kfree(mrt);
 331}
 332
 333#ifdef CONFIG_PROC_FS
 334
 335struct ipmr_mfc_iter {
 336	struct seq_net_private p;
 337	struct mr6_table *mrt;
 338	struct list_head *cache;
 339	int ct;
 340};
 341
 342
 343static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 344					   struct ipmr_mfc_iter *it, loff_t pos)
 345{
 346	struct mr6_table *mrt = it->mrt;
 347	struct mfc6_cache *mfc;
 348
 349	read_lock(&mrt_lock);
 350	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
 351		it->cache = &mrt->mfc6_cache_array[it->ct];
 352		list_for_each_entry(mfc, it->cache, list)
 353			if (pos-- == 0)
 354				return mfc;
 355	}
 356	read_unlock(&mrt_lock);
 357
 358	spin_lock_bh(&mfc_unres_lock);
 359	it->cache = &mrt->mfc6_unres_queue;
 360	list_for_each_entry(mfc, it->cache, list)
 361		if (pos-- == 0)
 362			return mfc;
 363	spin_unlock_bh(&mfc_unres_lock);
 364
 365	it->cache = NULL;
 366	return NULL;
 367}
 368
 369/*
 370 *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
 371 */
 372
 373struct ipmr_vif_iter {
 374	struct seq_net_private p;
 375	struct mr6_table *mrt;
 376	int ct;
 377};
 378
 379static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 380					    struct ipmr_vif_iter *iter,
 381					    loff_t pos)
 382{
 383	struct mr6_table *mrt = iter->mrt;
 384
 385	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 386		if (!MIF_EXISTS(mrt, iter->ct))
 387			continue;
 388		if (pos-- == 0)
 389			return &mrt->vif6_table[iter->ct];
 390	}
 391	return NULL;
 392}
 393
 394static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 395	__acquires(mrt_lock)
 396{
 397	struct ipmr_vif_iter *iter = seq->private;
 398	struct net *net = seq_file_net(seq);
 399	struct mr6_table *mrt;
 400
 401	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 402	if (mrt == NULL)
 403		return ERR_PTR(-ENOENT);
 404
 405	iter->mrt = mrt;
 406
 407	read_lock(&mrt_lock);
 408	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
 409		: SEQ_START_TOKEN;
 410}
 411
 412static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 413{
 414	struct ipmr_vif_iter *iter = seq->private;
 415	struct net *net = seq_file_net(seq);
 416	struct mr6_table *mrt = iter->mrt;
 417
 418	++*pos;
 419	if (v == SEQ_START_TOKEN)
 420		return ip6mr_vif_seq_idx(net, iter, 0);
 421
 422	while (++iter->ct < mrt->maxvif) {
 423		if (!MIF_EXISTS(mrt, iter->ct))
 424			continue;
 425		return &mrt->vif6_table[iter->ct];
 426	}
 427	return NULL;
 428}
 429
 430static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 431	__releases(mrt_lock)
 432{
 433	read_unlock(&mrt_lock);
 434}
 435
 436static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 437{
 438	struct ipmr_vif_iter *iter = seq->private;
 439	struct mr6_table *mrt = iter->mrt;
 440
 441	if (v == SEQ_START_TOKEN) {
 442		seq_puts(seq,
 443			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 444	} else {
 445		const struct mif_device *vif = v;
 446		const char *name = vif->dev ? vif->dev->name : "none";
 447
 448		seq_printf(seq,
 449			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 450			   vif - mrt->vif6_table,
 451			   name, vif->bytes_in, vif->pkt_in,
 452			   vif->bytes_out, vif->pkt_out,
 453			   vif->flags);
 454	}
 455	return 0;
 456}
 457
 458static const struct seq_operations ip6mr_vif_seq_ops = {
 459	.start = ip6mr_vif_seq_start,
 460	.next  = ip6mr_vif_seq_next,
 461	.stop  = ip6mr_vif_seq_stop,
 462	.show  = ip6mr_vif_seq_show,
 463};
 464
 465static int ip6mr_vif_open(struct inode *inode, struct file *file)
 466{
 467	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
 468			    sizeof(struct ipmr_vif_iter));
 469}
 470
 471static const struct file_operations ip6mr_vif_fops = {
 472	.owner	 = THIS_MODULE,
 473	.open    = ip6mr_vif_open,
 474	.read    = seq_read,
 475	.llseek  = seq_lseek,
 476	.release = seq_release_net,
 477};
 478
 479static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 480{
 481	struct ipmr_mfc_iter *it = seq->private;
 482	struct net *net = seq_file_net(seq);
 483	struct mr6_table *mrt;
 484
 485	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 486	if (mrt == NULL)
 487		return ERR_PTR(-ENOENT);
 488
 489	it->mrt = mrt;
 490	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 491		: SEQ_START_TOKEN;
 492}
 493
 494static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 495{
 496	struct mfc6_cache *mfc = v;
 497	struct ipmr_mfc_iter *it = seq->private;
 498	struct net *net = seq_file_net(seq);
 499	struct mr6_table *mrt = it->mrt;
 500
 501	++*pos;
 502
 503	if (v == SEQ_START_TOKEN)
 504		return ipmr_mfc_seq_idx(net, seq->private, 0);
 505
 506	if (mfc->list.next != it->cache)
 507		return list_entry(mfc->list.next, struct mfc6_cache, list);
 508
 509	if (it->cache == &mrt->mfc6_unres_queue)
 510		goto end_of_list;
 511
 512	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
 513
 514	while (++it->ct < MFC6_LINES) {
 515		it->cache = &mrt->mfc6_cache_array[it->ct];
 516		if (list_empty(it->cache))
 517			continue;
 518		return list_first_entry(it->cache, struct mfc6_cache, list);
 519	}
 520
 521	/* exhausted cache_array, show unresolved */
 522	read_unlock(&mrt_lock);
 523	it->cache = &mrt->mfc6_unres_queue;
 524	it->ct = 0;
 525
 526	spin_lock_bh(&mfc_unres_lock);
 527	if (!list_empty(it->cache))
 528		return list_first_entry(it->cache, struct mfc6_cache, list);
 529
 530 end_of_list:
 531	spin_unlock_bh(&mfc_unres_lock);
 532	it->cache = NULL;
 533
 534	return NULL;
 535}
 536
 537static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 538{
 539	struct ipmr_mfc_iter *it = seq->private;
 540	struct mr6_table *mrt = it->mrt;
 541
 542	if (it->cache == &mrt->mfc6_unres_queue)
 543		spin_unlock_bh(&mfc_unres_lock);
 544	else if (it->cache == mrt->mfc6_cache_array)
 545		read_unlock(&mrt_lock);
 546}
 547
 548static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 549{
 550	int n;
 551
 552	if (v == SEQ_START_TOKEN) {
 553		seq_puts(seq,
 554			 "Group                            "
 555			 "Origin                           "
 556			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
 557	} else {
 558		const struct mfc6_cache *mfc = v;
 559		const struct ipmr_mfc_iter *it = seq->private;
 560		struct mr6_table *mrt = it->mrt;
 561
 562		seq_printf(seq, "%pI6 %pI6 %-3hd",
 563			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 564			   mfc->mf6c_parent);
 565
 566		if (it->cache != &mrt->mfc6_unres_queue) {
 567			seq_printf(seq, " %8lu %8lu %8lu",
 568				   mfc->mfc_un.res.pkt,
 569				   mfc->mfc_un.res.bytes,
 570				   mfc->mfc_un.res.wrong_if);
 571			for (n = mfc->mfc_un.res.minvif;
 572			     n < mfc->mfc_un.res.maxvif; n++) {
 573				if (MIF_EXISTS(mrt, n) &&
 574				    mfc->mfc_un.res.ttls[n] < 255)
 575					seq_printf(seq,
 576						   " %2d:%-3d",
 577						   n, mfc->mfc_un.res.ttls[n]);
 578			}
 579		} else {
 580			/* unresolved mfc_caches don't contain
 581			 * pkt, bytes and wrong_if values
 582			 */
 583			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 584		}
 585		seq_putc(seq, '\n');
 586	}
 587	return 0;
 588}
 589
 590static const struct seq_operations ipmr_mfc_seq_ops = {
 591	.start = ipmr_mfc_seq_start,
 592	.next  = ipmr_mfc_seq_next,
 593	.stop  = ipmr_mfc_seq_stop,
 594	.show  = ipmr_mfc_seq_show,
 595};
 596
 597static int ipmr_mfc_open(struct inode *inode, struct file *file)
 598{
 599	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
 600			    sizeof(struct ipmr_mfc_iter));
 601}
 602
 603static const struct file_operations ip6mr_mfc_fops = {
 604	.owner	 = THIS_MODULE,
 605	.open    = ipmr_mfc_open,
 606	.read    = seq_read,
 607	.llseek  = seq_lseek,
 608	.release = seq_release_net,
 609};
 610#endif
 611
 612#ifdef CONFIG_IPV6_PIMSM_V2
 613
 614static int pim6_rcv(struct sk_buff *skb)
 615{
 616	struct pimreghdr *pim;
 617	struct ipv6hdr   *encap;
 618	struct net_device  *reg_dev = NULL;
 619	struct net *net = dev_net(skb->dev);
 620	struct mr6_table *mrt;
 621	struct flowi6 fl6 = {
 622		.flowi6_iif	= skb->dev->ifindex,
 623		.flowi6_mark	= skb->mark,
 624	};
 625	int reg_vif_num;
 626
 627	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 628		goto drop;
 629
 630	pim = (struct pimreghdr *)skb_transport_header(skb);
 631	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
 632	    (pim->flags & PIM_NULL_REGISTER) ||
 633	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 634			     sizeof(*pim), IPPROTO_PIM,
 635			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
 636	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 637		goto drop;
 638
 639	/* check if the inner packet is destined to mcast group */
 640	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 641				   sizeof(*pim));
 642
 643	if (!ipv6_addr_is_multicast(&encap->daddr) ||
 644	    encap->payload_len == 0 ||
 645	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 646		goto drop;
 647
 648	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 649		goto drop;
 650	reg_vif_num = mrt->mroute_reg_vif_num;
 651
 652	read_lock(&mrt_lock);
 653	if (reg_vif_num >= 0)
 654		reg_dev = mrt->vif6_table[reg_vif_num].dev;
 655	if (reg_dev)
 656		dev_hold(reg_dev);
 657	read_unlock(&mrt_lock);
 658
 659	if (reg_dev == NULL)
 660		goto drop;
 661
 662	skb->mac_header = skb->network_header;
 663	skb_pull(skb, (u8 *)encap - skb->data);
 664	skb_reset_network_header(skb);
 665	skb->protocol = htons(ETH_P_IPV6);
 666	skb->ip_summed = CHECKSUM_NONE;
 667	skb->pkt_type = PACKET_HOST;
 668
 669	skb_tunnel_rx(skb, reg_dev);
 670
 671	netif_rx(skb);
 672
 673	dev_put(reg_dev);
 674	return 0;
 675 drop:
 676	kfree_skb(skb);
 677	return 0;
 678}
 679
 680static const struct inet6_protocol pim6_protocol = {
 681	.handler	=	pim6_rcv,
 682};
 683
 684/* Service routines creating virtual interfaces: PIMREG */
 685
 686static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 687				      struct net_device *dev)
 688{
 689	struct net *net = dev_net(dev);
 690	struct mr6_table *mrt;
 691	struct flowi6 fl6 = {
 692		.flowi6_oif	= dev->ifindex,
 693		.flowi6_iif	= skb->skb_iif,
 694		.flowi6_mark	= skb->mark,
 695	};
 696	int err;
 697
 698	err = ip6mr_fib_lookup(net, &fl6, &mrt);
 699	if (err < 0) {
 700		kfree_skb(skb);
 701		return err;
 702	}
 703
 704	read_lock(&mrt_lock);
 705	dev->stats.tx_bytes += skb->len;
 706	dev->stats.tx_packets++;
 707	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 708	read_unlock(&mrt_lock);
 709	kfree_skb(skb);
 710	return NETDEV_TX_OK;
 711}
 712
 
 
 
 
 
 713static const struct net_device_ops reg_vif_netdev_ops = {
 714	.ndo_start_xmit	= reg_vif_xmit,
 
 715};
 716
 717static void reg_vif_setup(struct net_device *dev)
 718{
 719	dev->type		= ARPHRD_PIMREG;
 720	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
 721	dev->flags		= IFF_NOARP;
 722	dev->netdev_ops		= &reg_vif_netdev_ops;
 723	dev->destructor		= free_netdev;
 724	dev->features		|= NETIF_F_NETNS_LOCAL;
 725}
 726
 727static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 728{
 729	struct net_device *dev;
 730	char name[IFNAMSIZ];
 731
 732	if (mrt->id == RT6_TABLE_DFLT)
 733		sprintf(name, "pim6reg");
 734	else
 735		sprintf(name, "pim6reg%u", mrt->id);
 736
 737	dev = alloc_netdev(0, name, reg_vif_setup);
 738	if (dev == NULL)
 739		return NULL;
 740
 741	dev_net_set(dev, net);
 742
 743	if (register_netdevice(dev)) {
 744		free_netdev(dev);
 745		return NULL;
 746	}
 747	dev->iflink = 0;
 748
 749	if (dev_open(dev))
 750		goto failure;
 751
 752	dev_hold(dev);
 753	return dev;
 754
 755failure:
 756	/* allow the register to be completed before unregistering. */
 757	rtnl_unlock();
 758	rtnl_lock();
 759
 760	unregister_netdevice(dev);
 761	return NULL;
 762}
 763#endif
 764
 765/*
 766 *	Delete a VIF entry
 767 */
 768
 769static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 770{
 771	struct mif_device *v;
 772	struct net_device *dev;
 773	struct inet6_dev *in6_dev;
 774
 775	if (vifi < 0 || vifi >= mrt->maxvif)
 776		return -EADDRNOTAVAIL;
 777
 778	v = &mrt->vif6_table[vifi];
 779
 780	write_lock_bh(&mrt_lock);
 781	dev = v->dev;
 782	v->dev = NULL;
 783
 784	if (!dev) {
 785		write_unlock_bh(&mrt_lock);
 786		return -EADDRNOTAVAIL;
 787	}
 788
 789#ifdef CONFIG_IPV6_PIMSM_V2
 790	if (vifi == mrt->mroute_reg_vif_num)
 791		mrt->mroute_reg_vif_num = -1;
 792#endif
 793
 794	if (vifi + 1 == mrt->maxvif) {
 795		int tmp;
 796		for (tmp = vifi - 1; tmp >= 0; tmp--) {
 797			if (MIF_EXISTS(mrt, tmp))
 798				break;
 799		}
 800		mrt->maxvif = tmp + 1;
 801	}
 802
 803	write_unlock_bh(&mrt_lock);
 804
 805	dev_set_allmulti(dev, -1);
 806
 807	in6_dev = __in6_dev_get(dev);
 808	if (in6_dev)
 809		in6_dev->cnf.mc_forwarding--;
 
 
 
 
 810
 811	if (v->flags & MIFF_REGISTER)
 812		unregister_netdevice_queue(dev, head);
 813
 814	dev_put(dev);
 815	return 0;
 816}
 817
 818static inline void ip6mr_cache_free(struct mfc6_cache *c)
 819{
 820	kmem_cache_free(mrt_cachep, c);
 821}
 822
 823/* Destroy an unresolved cache entry, killing queued skbs
 824   and reporting error to netlink readers.
 825 */
 826
 827static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 828{
 829	struct net *net = read_pnet(&mrt->net);
 830	struct sk_buff *skb;
 831
 832	atomic_dec(&mrt->cache_resolve_queue_len);
 833
 834	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 835		if (ipv6_hdr(skb)->version == 0) {
 836			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 837			nlh->nlmsg_type = NLMSG_ERROR;
 838			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 839			skb_trim(skb, nlh->nlmsg_len);
 840			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
 841			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 842		} else
 843			kfree_skb(skb);
 844	}
 845
 846	ip6mr_cache_free(c);
 847}
 848
 849
 850/* Timer process for all the unresolved queue. */
 851
 852static void ipmr_do_expire_process(struct mr6_table *mrt)
 853{
 854	unsigned long now = jiffies;
 855	unsigned long expires = 10 * HZ;
 856	struct mfc6_cache *c, *next;
 857
 858	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 859		if (time_after(c->mfc_un.unres.expires, now)) {
 860			/* not yet... */
 861			unsigned long interval = c->mfc_un.unres.expires - now;
 862			if (interval < expires)
 863				expires = interval;
 864			continue;
 865		}
 866
 867		list_del(&c->list);
 
 868		ip6mr_destroy_unres(mrt, c);
 869	}
 870
 871	if (!list_empty(&mrt->mfc6_unres_queue))
 872		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 873}
 874
 875static void ipmr_expire_process(unsigned long arg)
 876{
 877	struct mr6_table *mrt = (struct mr6_table *)arg;
 878
 879	if (!spin_trylock(&mfc_unres_lock)) {
 880		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 881		return;
 882	}
 883
 884	if (!list_empty(&mrt->mfc6_unres_queue))
 885		ipmr_do_expire_process(mrt);
 886
 887	spin_unlock(&mfc_unres_lock);
 888}
 889
 890/* Fill oifs list. It is called under write locked mrt_lock. */
 891
 892static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
 893				    unsigned char *ttls)
 894{
 895	int vifi;
 896
 897	cache->mfc_un.res.minvif = MAXMIFS;
 898	cache->mfc_un.res.maxvif = 0;
 899	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 900
 901	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 902		if (MIF_EXISTS(mrt, vifi) &&
 903		    ttls[vifi] && ttls[vifi] < 255) {
 904			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 905			if (cache->mfc_un.res.minvif > vifi)
 906				cache->mfc_un.res.minvif = vifi;
 907			if (cache->mfc_un.res.maxvif <= vifi)
 908				cache->mfc_un.res.maxvif = vifi + 1;
 909		}
 910	}
 911}
 912
 913static int mif6_add(struct net *net, struct mr6_table *mrt,
 914		    struct mif6ctl *vifc, int mrtsock)
 915{
 916	int vifi = vifc->mif6c_mifi;
 917	struct mif_device *v = &mrt->vif6_table[vifi];
 918	struct net_device *dev;
 919	struct inet6_dev *in6_dev;
 920	int err;
 921
 922	/* Is vif busy ? */
 923	if (MIF_EXISTS(mrt, vifi))
 924		return -EADDRINUSE;
 925
 926	switch (vifc->mif6c_flags) {
 927#ifdef CONFIG_IPV6_PIMSM_V2
 928	case MIFF_REGISTER:
 929		/*
 930		 * Special Purpose VIF in PIM
 931		 * All the packets will be sent to the daemon
 932		 */
 933		if (mrt->mroute_reg_vif_num >= 0)
 934			return -EADDRINUSE;
 935		dev = ip6mr_reg_vif(net, mrt);
 936		if (!dev)
 937			return -ENOBUFS;
 938		err = dev_set_allmulti(dev, 1);
 939		if (err) {
 940			unregister_netdevice(dev);
 941			dev_put(dev);
 942			return err;
 943		}
 944		break;
 945#endif
 946	case 0:
 947		dev = dev_get_by_index(net, vifc->mif6c_pifi);
 948		if (!dev)
 949			return -EADDRNOTAVAIL;
 950		err = dev_set_allmulti(dev, 1);
 951		if (err) {
 952			dev_put(dev);
 953			return err;
 954		}
 955		break;
 956	default:
 957		return -EINVAL;
 958	}
 959
 960	in6_dev = __in6_dev_get(dev);
 961	if (in6_dev)
 962		in6_dev->cnf.mc_forwarding++;
 
 
 
 
 963
 964	/*
 965	 *	Fill in the VIF structures
 966	 */
 967	v->rate_limit = vifc->vifc_rate_limit;
 968	v->flags = vifc->mif6c_flags;
 969	if (!mrtsock)
 970		v->flags |= VIFF_STATIC;
 971	v->threshold = vifc->vifc_threshold;
 972	v->bytes_in = 0;
 973	v->bytes_out = 0;
 974	v->pkt_in = 0;
 975	v->pkt_out = 0;
 976	v->link = dev->ifindex;
 977	if (v->flags & MIFF_REGISTER)
 978		v->link = dev->iflink;
 979
 980	/* And finish update writing critical data */
 981	write_lock_bh(&mrt_lock);
 982	v->dev = dev;
 983#ifdef CONFIG_IPV6_PIMSM_V2
 984	if (v->flags & MIFF_REGISTER)
 985		mrt->mroute_reg_vif_num = vifi;
 986#endif
 987	if (vifi + 1 > mrt->maxvif)
 988		mrt->maxvif = vifi + 1;
 989	write_unlock_bh(&mrt_lock);
 990	return 0;
 991}
 992
 993static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
 994					   const struct in6_addr *origin,
 995					   const struct in6_addr *mcastgrp)
 996{
 997	int line = MFC6_HASH(mcastgrp, origin);
 998	struct mfc6_cache *c;
 999
1000	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1001		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1002		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1003			return c;
1004	}
1005	return NULL;
1006}
1007
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1008/*
1009 *	Allocate a multicast cache entry
1010 */
1011static struct mfc6_cache *ip6mr_cache_alloc(void)
1012{
1013	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1014	if (c == NULL)
1015		return NULL;
1016	c->mfc_un.res.minvif = MAXMIFS;
1017	return c;
1018}
1019
1020static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1021{
1022	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1023	if (c == NULL)
1024		return NULL;
1025	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1026	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1027	return c;
1028}
1029
1030/*
1031 *	A cache entry has gone into a resolved state from queued
1032 */
1033
1034static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1035				struct mfc6_cache *uc, struct mfc6_cache *c)
1036{
1037	struct sk_buff *skb;
1038
1039	/*
1040	 *	Play the pending entries through our router
1041	 */
1042
1043	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1044		if (ipv6_hdr(skb)->version == 0) {
1045			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1046
1047			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1048				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1049			} else {
1050				nlh->nlmsg_type = NLMSG_ERROR;
1051				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1052				skb_trim(skb, nlh->nlmsg_len);
1053				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1054			}
1055			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1056		} else
1057			ip6_mr_forward(net, mrt, skb, c);
1058	}
1059}
1060
1061/*
1062 *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1063 *	expects the following bizarre scheme.
1064 *
1065 *	Called under mrt_lock.
1066 */
1067
1068static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1069			      mifi_t mifi, int assert)
1070{
1071	struct sk_buff *skb;
1072	struct mrt6msg *msg;
1073	int ret;
1074
1075#ifdef CONFIG_IPV6_PIMSM_V2
1076	if (assert == MRT6MSG_WHOLEPKT)
1077		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1078						+sizeof(*msg));
1079	else
1080#endif
1081		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1082
1083	if (!skb)
1084		return -ENOBUFS;
1085
1086	/* I suppose that internal messages
1087	 * do not require checksums */
1088
1089	skb->ip_summed = CHECKSUM_UNNECESSARY;
1090
1091#ifdef CONFIG_IPV6_PIMSM_V2
1092	if (assert == MRT6MSG_WHOLEPKT) {
1093		/* Ugly, but we have no choice with this interface.
1094		   Duplicate old header, fix length etc.
1095		   And all this only to mangle msg->im6_msgtype and
1096		   to set msg->im6_mbz to "mbz" :-)
1097		 */
1098		skb_push(skb, -skb_network_offset(pkt));
1099
1100		skb_push(skb, sizeof(*msg));
1101		skb_reset_transport_header(skb);
1102		msg = (struct mrt6msg *)skb_transport_header(skb);
1103		msg->im6_mbz = 0;
1104		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1105		msg->im6_mif = mrt->mroute_reg_vif_num;
1106		msg->im6_pad = 0;
1107		msg->im6_src = ipv6_hdr(pkt)->saddr;
1108		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1109
1110		skb->ip_summed = CHECKSUM_UNNECESSARY;
1111	} else
1112#endif
1113	{
1114	/*
1115	 *	Copy the IP header
1116	 */
1117
1118	skb_put(skb, sizeof(struct ipv6hdr));
1119	skb_reset_network_header(skb);
1120	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1121
1122	/*
1123	 *	Add our header
1124	 */
1125	skb_put(skb, sizeof(*msg));
1126	skb_reset_transport_header(skb);
1127	msg = (struct mrt6msg *)skb_transport_header(skb);
1128
1129	msg->im6_mbz = 0;
1130	msg->im6_msgtype = assert;
1131	msg->im6_mif = mifi;
1132	msg->im6_pad = 0;
1133	msg->im6_src = ipv6_hdr(pkt)->saddr;
1134	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1135
1136	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1137	skb->ip_summed = CHECKSUM_UNNECESSARY;
1138	}
1139
1140	if (mrt->mroute6_sk == NULL) {
1141		kfree_skb(skb);
1142		return -EINVAL;
1143	}
1144
1145	/*
1146	 *	Deliver to user space multicast routing algorithms
1147	 */
1148	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1149	if (ret < 0) {
1150		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1151		kfree_skb(skb);
1152	}
1153
1154	return ret;
1155}
1156
1157/*
1158 *	Queue a packet for resolution. It gets locked cache entry!
1159 */
1160
1161static int
1162ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1163{
1164	bool found = false;
1165	int err;
1166	struct mfc6_cache *c;
1167
1168	spin_lock_bh(&mfc_unres_lock);
1169	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1170		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1171		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1172			found = true;
1173			break;
1174		}
1175	}
1176
1177	if (!found) {
1178		/*
1179		 *	Create a new entry if allowable
1180		 */
1181
1182		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1183		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1184			spin_unlock_bh(&mfc_unres_lock);
1185
1186			kfree_skb(skb);
1187			return -ENOBUFS;
1188		}
1189
1190		/*
1191		 *	Fill in the new cache entry
1192		 */
1193		c->mf6c_parent = -1;
1194		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1195		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1196
1197		/*
1198		 *	Reflect first query at pim6sd
1199		 */
1200		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1201		if (err < 0) {
1202			/* If the report failed throw the cache entry
1203			   out - Brad Parker
1204			 */
1205			spin_unlock_bh(&mfc_unres_lock);
1206
1207			ip6mr_cache_free(c);
1208			kfree_skb(skb);
1209			return err;
1210		}
1211
1212		atomic_inc(&mrt->cache_resolve_queue_len);
1213		list_add(&c->list, &mrt->mfc6_unres_queue);
 
1214
1215		ipmr_do_expire_process(mrt);
1216	}
1217
1218	/*
1219	 *	See if we can append the packet
1220	 */
1221	if (c->mfc_un.unres.unresolved.qlen > 3) {
1222		kfree_skb(skb);
1223		err = -ENOBUFS;
1224	} else {
1225		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1226		err = 0;
1227	}
1228
1229	spin_unlock_bh(&mfc_unres_lock);
1230	return err;
1231}
1232
1233/*
1234 *	MFC6 cache manipulation by user space
1235 */
1236
1237static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
 
1238{
1239	int line;
1240	struct mfc6_cache *c, *next;
1241
1242	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1243
1244	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1245		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1246		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
 
 
1247			write_lock_bh(&mrt_lock);
1248			list_del(&c->list);
1249			write_unlock_bh(&mrt_lock);
1250
 
1251			ip6mr_cache_free(c);
1252			return 0;
1253		}
1254	}
1255	return -ENOENT;
1256}
1257
1258static int ip6mr_device_event(struct notifier_block *this,
1259			      unsigned long event, void *ptr)
1260{
1261	struct net_device *dev = ptr;
1262	struct net *net = dev_net(dev);
1263	struct mr6_table *mrt;
1264	struct mif_device *v;
1265	int ct;
1266	LIST_HEAD(list);
1267
1268	if (event != NETDEV_UNREGISTER)
1269		return NOTIFY_DONE;
1270
1271	ip6mr_for_each_table(mrt, net) {
1272		v = &mrt->vif6_table[0];
1273		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1274			if (v->dev == dev)
1275				mif6_delete(mrt, ct, &list);
1276		}
1277	}
1278	unregister_netdevice_many(&list);
1279
1280	return NOTIFY_DONE;
1281}
1282
1283static struct notifier_block ip6_mr_notifier = {
1284	.notifier_call = ip6mr_device_event
1285};
1286
1287/*
1288 *	Setup for IP multicast routing
1289 */
1290
1291static int __net_init ip6mr_net_init(struct net *net)
1292{
1293	int err;
1294
1295	err = ip6mr_rules_init(net);
1296	if (err < 0)
1297		goto fail;
1298
1299#ifdef CONFIG_PROC_FS
1300	err = -ENOMEM;
1301	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1302		goto proc_vif_fail;
1303	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1304		goto proc_cache_fail;
1305#endif
1306
1307	return 0;
1308
1309#ifdef CONFIG_PROC_FS
1310proc_cache_fail:
1311	proc_net_remove(net, "ip6_mr_vif");
1312proc_vif_fail:
1313	ip6mr_rules_exit(net);
1314#endif
1315fail:
1316	return err;
1317}
1318
1319static void __net_exit ip6mr_net_exit(struct net *net)
1320{
1321#ifdef CONFIG_PROC_FS
1322	proc_net_remove(net, "ip6_mr_cache");
1323	proc_net_remove(net, "ip6_mr_vif");
1324#endif
1325	ip6mr_rules_exit(net);
1326}
1327
1328static struct pernet_operations ip6mr_net_ops = {
1329	.init = ip6mr_net_init,
1330	.exit = ip6mr_net_exit,
1331};
1332
1333int __init ip6_mr_init(void)
1334{
1335	int err;
1336
1337	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1338				       sizeof(struct mfc6_cache),
1339				       0, SLAB_HWCACHE_ALIGN,
1340				       NULL);
1341	if (!mrt_cachep)
1342		return -ENOMEM;
1343
1344	err = register_pernet_subsys(&ip6mr_net_ops);
1345	if (err)
1346		goto reg_pernet_fail;
1347
1348	err = register_netdevice_notifier(&ip6_mr_notifier);
1349	if (err)
1350		goto reg_notif_fail;
1351#ifdef CONFIG_IPV6_PIMSM_V2
1352	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1353		pr_err("%s: can't add PIM protocol\n", __func__);
1354		err = -EAGAIN;
1355		goto add_proto_fail;
1356	}
1357#endif
1358	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1359		      ip6mr_rtm_dumproute, NULL);
1360	return 0;
1361#ifdef CONFIG_IPV6_PIMSM_V2
1362add_proto_fail:
1363	unregister_netdevice_notifier(&ip6_mr_notifier);
1364#endif
1365reg_notif_fail:
1366	unregister_pernet_subsys(&ip6mr_net_ops);
1367reg_pernet_fail:
1368	kmem_cache_destroy(mrt_cachep);
1369	return err;
1370}
1371
1372void ip6_mr_cleanup(void)
1373{
 
 
 
 
1374	unregister_netdevice_notifier(&ip6_mr_notifier);
1375	unregister_pernet_subsys(&ip6mr_net_ops);
1376	kmem_cache_destroy(mrt_cachep);
1377}
1378
1379static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1380			 struct mf6cctl *mfc, int mrtsock)
1381{
1382	bool found = false;
1383	int line;
1384	struct mfc6_cache *uc, *c;
1385	unsigned char ttls[MAXMIFS];
1386	int i;
1387
1388	if (mfc->mf6cc_parent >= MAXMIFS)
1389		return -ENFILE;
1390
1391	memset(ttls, 255, MAXMIFS);
1392	for (i = 0; i < MAXMIFS; i++) {
1393		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1394			ttls[i] = 1;
1395
1396	}
1397
1398	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1399
1400	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1401		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1402		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
 
 
1403			found = true;
1404			break;
1405		}
1406	}
1407
1408	if (found) {
1409		write_lock_bh(&mrt_lock);
1410		c->mf6c_parent = mfc->mf6cc_parent;
1411		ip6mr_update_thresholds(mrt, c, ttls);
1412		if (!mrtsock)
1413			c->mfc_flags |= MFC_STATIC;
1414		write_unlock_bh(&mrt_lock);
 
1415		return 0;
1416	}
1417
1418	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
 
1419		return -EINVAL;
1420
1421	c = ip6mr_cache_alloc();
1422	if (c == NULL)
1423		return -ENOMEM;
1424
1425	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1426	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1427	c->mf6c_parent = mfc->mf6cc_parent;
1428	ip6mr_update_thresholds(mrt, c, ttls);
1429	if (!mrtsock)
1430		c->mfc_flags |= MFC_STATIC;
1431
1432	write_lock_bh(&mrt_lock);
1433	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1434	write_unlock_bh(&mrt_lock);
1435
1436	/*
1437	 *	Check to see if we resolved a queued list. If so we
1438	 *	need to send on the frames and tidy up.
1439	 */
1440	found = false;
1441	spin_lock_bh(&mfc_unres_lock);
1442	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1443		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1444		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1445			list_del(&uc->list);
1446			atomic_dec(&mrt->cache_resolve_queue_len);
1447			found = true;
1448			break;
1449		}
1450	}
1451	if (list_empty(&mrt->mfc6_unres_queue))
1452		del_timer(&mrt->ipmr_expire_timer);
1453	spin_unlock_bh(&mfc_unres_lock);
1454
1455	if (found) {
1456		ip6mr_cache_resolve(net, mrt, uc, c);
1457		ip6mr_cache_free(uc);
1458	}
 
1459	return 0;
1460}
1461
1462/*
1463 *	Close the multicast socket, and clear the vif tables etc
1464 */
1465
1466static void mroute_clean_tables(struct mr6_table *mrt)
1467{
1468	int i;
1469	LIST_HEAD(list);
1470	struct mfc6_cache *c, *next;
1471
1472	/*
1473	 *	Shut down all active vif entries
1474	 */
1475	for (i = 0; i < mrt->maxvif; i++) {
1476		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1477			mif6_delete(mrt, i, &list);
 
1478	}
1479	unregister_netdevice_many(&list);
1480
1481	/*
1482	 *	Wipe the cache
1483	 */
1484	for (i = 0; i < MFC6_LINES; i++) {
1485		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1486			if (c->mfc_flags & MFC_STATIC)
1487				continue;
1488			write_lock_bh(&mrt_lock);
1489			list_del(&c->list);
1490			write_unlock_bh(&mrt_lock);
1491
 
1492			ip6mr_cache_free(c);
1493		}
1494	}
1495
1496	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1497		spin_lock_bh(&mfc_unres_lock);
1498		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1499			list_del(&c->list);
 
1500			ip6mr_destroy_unres(mrt, c);
1501		}
1502		spin_unlock_bh(&mfc_unres_lock);
1503	}
1504}
1505
1506static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1507{
1508	int err = 0;
1509	struct net *net = sock_net(sk);
1510
1511	rtnl_lock();
1512	write_lock_bh(&mrt_lock);
1513	if (likely(mrt->mroute6_sk == NULL)) {
1514		mrt->mroute6_sk = sk;
1515		net->ipv6.devconf_all->mc_forwarding++;
 
 
 
1516	}
1517	else
1518		err = -EADDRINUSE;
1519	write_unlock_bh(&mrt_lock);
1520
1521	rtnl_unlock();
1522
1523	return err;
1524}
1525
1526int ip6mr_sk_done(struct sock *sk)
1527{
1528	int err = -EACCES;
1529	struct net *net = sock_net(sk);
1530	struct mr6_table *mrt;
1531
1532	rtnl_lock();
1533	ip6mr_for_each_table(mrt, net) {
1534		if (sk == mrt->mroute6_sk) {
1535			write_lock_bh(&mrt_lock);
1536			mrt->mroute6_sk = NULL;
1537			net->ipv6.devconf_all->mc_forwarding--;
 
 
 
 
1538			write_unlock_bh(&mrt_lock);
1539
1540			mroute_clean_tables(mrt);
1541			err = 0;
1542			break;
1543		}
1544	}
1545	rtnl_unlock();
1546
1547	return err;
1548}
1549
1550struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1551{
1552	struct mr6_table *mrt;
1553	struct flowi6 fl6 = {
1554		.flowi6_iif	= skb->skb_iif,
1555		.flowi6_oif	= skb->dev->ifindex,
1556		.flowi6_mark	= skb->mark,
1557	};
1558
1559	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1560		return NULL;
1561
1562	return mrt->mroute6_sk;
1563}
1564
1565/*
1566 *	Socket options and virtual interface manipulation. The whole
1567 *	virtual interface system is a complete heap, but unfortunately
1568 *	that's how BSD mrouted happens to think. Maybe one day with a proper
1569 *	MOSPF/PIM router set up we can clean this up.
1570 */
1571
1572int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1573{
1574	int ret;
1575	struct mif6ctl vif;
1576	struct mf6cctl mfc;
1577	mifi_t mifi;
1578	struct net *net = sock_net(sk);
1579	struct mr6_table *mrt;
1580
1581	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1582	if (mrt == NULL)
1583		return -ENOENT;
1584
1585	if (optname != MRT6_INIT) {
1586		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1587			return -EACCES;
1588	}
1589
1590	switch (optname) {
1591	case MRT6_INIT:
1592		if (sk->sk_type != SOCK_RAW ||
1593		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1594			return -EOPNOTSUPP;
1595		if (optlen < sizeof(int))
1596			return -EINVAL;
1597
1598		return ip6mr_sk_init(mrt, sk);
1599
1600	case MRT6_DONE:
1601		return ip6mr_sk_done(sk);
1602
1603	case MRT6_ADD_MIF:
1604		if (optlen < sizeof(vif))
1605			return -EINVAL;
1606		if (copy_from_user(&vif, optval, sizeof(vif)))
1607			return -EFAULT;
1608		if (vif.mif6c_mifi >= MAXMIFS)
1609			return -ENFILE;
1610		rtnl_lock();
1611		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1612		rtnl_unlock();
1613		return ret;
1614
1615	case MRT6_DEL_MIF:
1616		if (optlen < sizeof(mifi_t))
1617			return -EINVAL;
1618		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1619			return -EFAULT;
1620		rtnl_lock();
1621		ret = mif6_delete(mrt, mifi, NULL);
1622		rtnl_unlock();
1623		return ret;
1624
1625	/*
1626	 *	Manipulate the forwarding caches. These live
1627	 *	in a sort of kernel/user symbiosis.
1628	 */
1629	case MRT6_ADD_MFC:
1630	case MRT6_DEL_MFC:
 
 
 
1631		if (optlen < sizeof(mfc))
1632			return -EINVAL;
1633		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1634			return -EFAULT;
 
 
1635		rtnl_lock();
1636		if (optname == MRT6_DEL_MFC)
1637			ret = ip6mr_mfc_delete(mrt, &mfc);
1638		else
1639			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
 
1640		rtnl_unlock();
1641		return ret;
1642
1643	/*
1644	 *	Control PIM assert (to activate pim will activate assert)
1645	 */
1646	case MRT6_ASSERT:
1647	{
1648		int v;
 
 
 
1649		if (get_user(v, (int __user *)optval))
1650			return -EFAULT;
1651		mrt->mroute_do_assert = !!v;
1652		return 0;
1653	}
1654
1655#ifdef CONFIG_IPV6_PIMSM_V2
1656	case MRT6_PIM:
1657	{
1658		int v;
 
 
 
1659		if (get_user(v, (int __user *)optval))
1660			return -EFAULT;
1661		v = !!v;
1662		rtnl_lock();
1663		ret = 0;
1664		if (v != mrt->mroute_do_pim) {
1665			mrt->mroute_do_pim = v;
1666			mrt->mroute_do_assert = v;
1667		}
1668		rtnl_unlock();
1669		return ret;
1670	}
1671
1672#endif
1673#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1674	case MRT6_TABLE:
1675	{
1676		u32 v;
1677
1678		if (optlen != sizeof(u32))
1679			return -EINVAL;
1680		if (get_user(v, (u32 __user *)optval))
1681			return -EFAULT;
 
 
 
1682		if (sk == mrt->mroute6_sk)
1683			return -EBUSY;
1684
1685		rtnl_lock();
1686		ret = 0;
1687		if (!ip6mr_new_table(net, v))
1688			ret = -ENOMEM;
1689		raw6_sk(sk)->ip6mr_table = v;
1690		rtnl_unlock();
1691		return ret;
1692	}
1693#endif
1694	/*
1695	 *	Spurious command, or MRT6_VERSION which you cannot
1696	 *	set.
1697	 */
1698	default:
1699		return -ENOPROTOOPT;
1700	}
1701}
1702
1703/*
1704 *	Getsock opt support for the multicast routing system.
1705 */
1706
1707int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1708			  int __user *optlen)
1709{
1710	int olr;
1711	int val;
1712	struct net *net = sock_net(sk);
1713	struct mr6_table *mrt;
1714
1715	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1716	if (mrt == NULL)
1717		return -ENOENT;
1718
1719	switch (optname) {
1720	case MRT6_VERSION:
1721		val = 0x0305;
1722		break;
1723#ifdef CONFIG_IPV6_PIMSM_V2
1724	case MRT6_PIM:
1725		val = mrt->mroute_do_pim;
1726		break;
1727#endif
1728	case MRT6_ASSERT:
1729		val = mrt->mroute_do_assert;
1730		break;
1731	default:
1732		return -ENOPROTOOPT;
1733	}
1734
1735	if (get_user(olr, optlen))
1736		return -EFAULT;
1737
1738	olr = min_t(int, olr, sizeof(int));
1739	if (olr < 0)
1740		return -EINVAL;
1741
1742	if (put_user(olr, optlen))
1743		return -EFAULT;
1744	if (copy_to_user(optval, &val, olr))
1745		return -EFAULT;
1746	return 0;
1747}
1748
1749/*
1750 *	The IP multicast ioctl support routines.
1751 */
1752
1753int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1754{
1755	struct sioc_sg_req6 sr;
1756	struct sioc_mif_req6 vr;
1757	struct mif_device *vif;
1758	struct mfc6_cache *c;
1759	struct net *net = sock_net(sk);
1760	struct mr6_table *mrt;
1761
1762	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1763	if (mrt == NULL)
1764		return -ENOENT;
1765
1766	switch (cmd) {
1767	case SIOCGETMIFCNT_IN6:
1768		if (copy_from_user(&vr, arg, sizeof(vr)))
1769			return -EFAULT;
1770		if (vr.mifi >= mrt->maxvif)
1771			return -EINVAL;
1772		read_lock(&mrt_lock);
1773		vif = &mrt->vif6_table[vr.mifi];
1774		if (MIF_EXISTS(mrt, vr.mifi)) {
1775			vr.icount = vif->pkt_in;
1776			vr.ocount = vif->pkt_out;
1777			vr.ibytes = vif->bytes_in;
1778			vr.obytes = vif->bytes_out;
1779			read_unlock(&mrt_lock);
1780
1781			if (copy_to_user(arg, &vr, sizeof(vr)))
1782				return -EFAULT;
1783			return 0;
1784		}
1785		read_unlock(&mrt_lock);
1786		return -EADDRNOTAVAIL;
1787	case SIOCGETSGCNT_IN6:
1788		if (copy_from_user(&sr, arg, sizeof(sr)))
1789			return -EFAULT;
1790
1791		read_lock(&mrt_lock);
1792		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1793		if (c) {
1794			sr.pktcnt = c->mfc_un.res.pkt;
1795			sr.bytecnt = c->mfc_un.res.bytes;
1796			sr.wrong_if = c->mfc_un.res.wrong_if;
1797			read_unlock(&mrt_lock);
1798
1799			if (copy_to_user(arg, &sr, sizeof(sr)))
1800				return -EFAULT;
1801			return 0;
1802		}
1803		read_unlock(&mrt_lock);
1804		return -EADDRNOTAVAIL;
1805	default:
1806		return -ENOIOCTLCMD;
1807	}
1808}
1809
1810#ifdef CONFIG_COMPAT
1811struct compat_sioc_sg_req6 {
1812	struct sockaddr_in6 src;
1813	struct sockaddr_in6 grp;
1814	compat_ulong_t pktcnt;
1815	compat_ulong_t bytecnt;
1816	compat_ulong_t wrong_if;
1817};
1818
1819struct compat_sioc_mif_req6 {
1820	mifi_t	mifi;
1821	compat_ulong_t icount;
1822	compat_ulong_t ocount;
1823	compat_ulong_t ibytes;
1824	compat_ulong_t obytes;
1825};
1826
1827int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1828{
1829	struct compat_sioc_sg_req6 sr;
1830	struct compat_sioc_mif_req6 vr;
1831	struct mif_device *vif;
1832	struct mfc6_cache *c;
1833	struct net *net = sock_net(sk);
1834	struct mr6_table *mrt;
1835
1836	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1837	if (mrt == NULL)
1838		return -ENOENT;
1839
1840	switch (cmd) {
1841	case SIOCGETMIFCNT_IN6:
1842		if (copy_from_user(&vr, arg, sizeof(vr)))
1843			return -EFAULT;
1844		if (vr.mifi >= mrt->maxvif)
1845			return -EINVAL;
1846		read_lock(&mrt_lock);
1847		vif = &mrt->vif6_table[vr.mifi];
1848		if (MIF_EXISTS(mrt, vr.mifi)) {
1849			vr.icount = vif->pkt_in;
1850			vr.ocount = vif->pkt_out;
1851			vr.ibytes = vif->bytes_in;
1852			vr.obytes = vif->bytes_out;
1853			read_unlock(&mrt_lock);
1854
1855			if (copy_to_user(arg, &vr, sizeof(vr)))
1856				return -EFAULT;
1857			return 0;
1858		}
1859		read_unlock(&mrt_lock);
1860		return -EADDRNOTAVAIL;
1861	case SIOCGETSGCNT_IN6:
1862		if (copy_from_user(&sr, arg, sizeof(sr)))
1863			return -EFAULT;
1864
1865		read_lock(&mrt_lock);
1866		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1867		if (c) {
1868			sr.pktcnt = c->mfc_un.res.pkt;
1869			sr.bytecnt = c->mfc_un.res.bytes;
1870			sr.wrong_if = c->mfc_un.res.wrong_if;
1871			read_unlock(&mrt_lock);
1872
1873			if (copy_to_user(arg, &sr, sizeof(sr)))
1874				return -EFAULT;
1875			return 0;
1876		}
1877		read_unlock(&mrt_lock);
1878		return -EADDRNOTAVAIL;
1879	default:
1880		return -ENOIOCTLCMD;
1881	}
1882}
1883#endif
1884
1885static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1886{
1887	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1888			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1889	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1890			 IPSTATS_MIB_OUTOCTETS, skb->len);
1891	return dst_output(skb);
1892}
1893
1894/*
1895 *	Processing handlers for ip6mr_forward
1896 */
1897
1898static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1899			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1900{
1901	struct ipv6hdr *ipv6h;
1902	struct mif_device *vif = &mrt->vif6_table[vifi];
1903	struct net_device *dev;
1904	struct dst_entry *dst;
1905	struct flowi6 fl6;
1906
1907	if (vif->dev == NULL)
1908		goto out_free;
1909
1910#ifdef CONFIG_IPV6_PIMSM_V2
1911	if (vif->flags & MIFF_REGISTER) {
1912		vif->pkt_out++;
1913		vif->bytes_out += skb->len;
1914		vif->dev->stats.tx_bytes += skb->len;
1915		vif->dev->stats.tx_packets++;
1916		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1917		goto out_free;
1918	}
1919#endif
1920
1921	ipv6h = ipv6_hdr(skb);
1922
1923	fl6 = (struct flowi6) {
1924		.flowi6_oif = vif->link,
1925		.daddr = ipv6h->daddr,
1926	};
1927
1928	dst = ip6_route_output(net, NULL, &fl6);
1929	if (dst->error) {
1930		dst_release(dst);
1931		goto out_free;
1932	}
1933
1934	skb_dst_drop(skb);
1935	skb_dst_set(skb, dst);
1936
1937	/*
1938	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1939	 * not only before forwarding, but after forwarding on all output
1940	 * interfaces. It is clear, if mrouter runs a multicasting
1941	 * program, it should receive packets not depending to what interface
1942	 * program is joined.
1943	 * If we will not make it, the program will have to join on all
1944	 * interfaces. On the other hand, multihoming host (or router, but
1945	 * not mrouter) cannot join to more than one interface - it will
1946	 * result in receiving multiple packets.
1947	 */
1948	dev = vif->dev;
1949	skb->dev = dev;
1950	vif->pkt_out++;
1951	vif->bytes_out += skb->len;
1952
1953	/* We are about to write */
1954	/* XXX: extension headers? */
1955	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1956		goto out_free;
1957
1958	ipv6h = ipv6_hdr(skb);
1959	ipv6h->hop_limit--;
1960
1961	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1962
1963	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
 
1964		       ip6mr_forward2_finish);
1965
1966out_free:
1967	kfree_skb(skb);
1968	return 0;
1969}
1970
1971static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1972{
1973	int ct;
1974
1975	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1976		if (mrt->vif6_table[ct].dev == dev)
1977			break;
1978	}
1979	return ct;
1980}
1981
1982static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1983			  struct sk_buff *skb, struct mfc6_cache *cache)
1984{
1985	int psend = -1;
1986	int vif, ct;
 
1987
1988	vif = cache->mf6c_parent;
1989	cache->mfc_un.res.pkt++;
1990	cache->mfc_un.res.bytes += skb->len;
1991
 
 
 
 
 
 
 
 
 
 
 
 
1992	/*
1993	 * Wrong interface: drop packet and (maybe) send PIM assert.
1994	 */
1995	if (mrt->vif6_table[vif].dev != skb->dev) {
1996		int true_vifi;
1997
1998		cache->mfc_un.res.wrong_if++;
1999		true_vifi = ip6mr_find_vif(mrt, skb->dev);
2000
2001		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2002		    /* pimsm uses asserts, when switching from RPT to SPT,
2003		       so that we cannot check that packet arrived on an oif.
2004		       It is bad, but otherwise we would need to move pretty
2005		       large chunk of pimd to kernel. Ough... --ANK
2006		     */
2007		    (mrt->mroute_do_pim ||
2008		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2009		    time_after(jiffies,
2010			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2011			cache->mfc_un.res.last_assert = jiffies;
2012			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2013		}
2014		goto dont_forward;
2015	}
2016
 
2017	mrt->vif6_table[vif].pkt_in++;
2018	mrt->vif6_table[vif].bytes_in += skb->len;
2019
2020	/*
2021	 *	Forward the frame
2022	 */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2023	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2024		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
 
 
2025			if (psend != -1) {
2026				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2027				if (skb2)
2028					ip6mr_forward2(net, mrt, skb2, cache, psend);
2029			}
2030			psend = ct;
2031		}
2032	}
 
2033	if (psend != -1) {
2034		ip6mr_forward2(net, mrt, skb, cache, psend);
2035		return 0;
2036	}
2037
2038dont_forward:
2039	kfree_skb(skb);
2040	return 0;
2041}
2042
2043
2044/*
2045 *	Multicast packets for forwarding arrive here
2046 */
2047
2048int ip6_mr_input(struct sk_buff *skb)
2049{
2050	struct mfc6_cache *cache;
2051	struct net *net = dev_net(skb->dev);
2052	struct mr6_table *mrt;
2053	struct flowi6 fl6 = {
2054		.flowi6_iif	= skb->dev->ifindex,
2055		.flowi6_mark	= skb->mark,
2056	};
2057	int err;
2058
2059	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2060	if (err < 0) {
2061		kfree_skb(skb);
2062		return err;
2063	}
2064
2065	read_lock(&mrt_lock);
2066	cache = ip6mr_cache_find(mrt,
2067				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
 
 
 
 
 
 
 
 
2068
2069	/*
2070	 *	No usable cache entry
2071	 */
2072	if (cache == NULL) {
2073		int vif;
2074
2075		vif = ip6mr_find_vif(mrt, skb->dev);
2076		if (vif >= 0) {
2077			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2078			read_unlock(&mrt_lock);
2079
2080			return err;
2081		}
2082		read_unlock(&mrt_lock);
2083		kfree_skb(skb);
2084		return -ENODEV;
2085	}
2086
2087	ip6_mr_forward(net, mrt, skb, cache);
2088
2089	read_unlock(&mrt_lock);
2090
2091	return 0;
2092}
2093
2094
2095static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2096			       struct mfc6_cache *c, struct rtmsg *rtm)
2097{
2098	int ct;
2099	struct rtnexthop *nhp;
2100	u8 *b = skb_tail_pointer(skb);
2101	struct rtattr *mp_head;
2102
2103	/* If cache is unresolved, don't try to parse IIF and OIF */
2104	if (c->mf6c_parent >= MAXMIFS)
2105		return -ENOENT;
2106
2107	if (MIF_EXISTS(mrt, c->mf6c_parent))
2108		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2109
2110	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
 
 
2111
2112	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2113		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2114			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2115				goto rtattr_failure;
2116			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
 
 
 
2117			nhp->rtnh_flags = 0;
2118			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2119			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2120			nhp->rtnh_len = sizeof(*nhp);
2121		}
2122	}
2123	mp_head->rta_type = RTA_MULTIPATH;
2124	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
 
 
 
 
 
 
 
2125	rtm->rtm_type = RTN_MULTICAST;
2126	return 1;
2127
2128rtattr_failure:
2129	nlmsg_trim(skb, b);
2130	return -EMSGSIZE;
2131}
2132
2133int ip6mr_get_route(struct net *net,
2134		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2135{
2136	int err;
2137	struct mr6_table *mrt;
2138	struct mfc6_cache *cache;
2139	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2140
2141	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2142	if (mrt == NULL)
2143		return -ENOENT;
2144
2145	read_lock(&mrt_lock);
2146	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
 
 
 
 
 
 
 
2147
2148	if (!cache) {
2149		struct sk_buff *skb2;
2150		struct ipv6hdr *iph;
2151		struct net_device *dev;
2152		int vif;
2153
2154		if (nowait) {
2155			read_unlock(&mrt_lock);
2156			return -EAGAIN;
2157		}
2158
2159		dev = skb->dev;
2160		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2161			read_unlock(&mrt_lock);
2162			return -ENODEV;
2163		}
2164
2165		/* really correct? */
2166		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2167		if (!skb2) {
2168			read_unlock(&mrt_lock);
2169			return -ENOMEM;
2170		}
2171
2172		skb_reset_transport_header(skb2);
2173
2174		skb_put(skb2, sizeof(struct ipv6hdr));
2175		skb_reset_network_header(skb2);
2176
2177		iph = ipv6_hdr(skb2);
2178		iph->version = 0;
2179		iph->priority = 0;
2180		iph->flow_lbl[0] = 0;
2181		iph->flow_lbl[1] = 0;
2182		iph->flow_lbl[2] = 0;
2183		iph->payload_len = 0;
2184		iph->nexthdr = IPPROTO_NONE;
2185		iph->hop_limit = 0;
2186		iph->saddr = rt->rt6i_src.addr;
2187		iph->daddr = rt->rt6i_dst.addr;
2188
2189		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2190		read_unlock(&mrt_lock);
2191
2192		return err;
2193	}
2194
2195	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2196		cache->mfc_flags |= MFC_NOTIFY;
2197
2198	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2199	read_unlock(&mrt_lock);
2200	return err;
2201}
2202
2203static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2204			     u32 pid, u32 seq, struct mfc6_cache *c)
 
2205{
2206	struct nlmsghdr *nlh;
2207	struct rtmsg *rtm;
 
2208
2209	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2210	if (nlh == NULL)
2211		return -EMSGSIZE;
2212
2213	rtm = nlmsg_data(nlh);
2214	rtm->rtm_family   = RTNL_FAMILY_IPMR;
2215	rtm->rtm_dst_len  = 128;
2216	rtm->rtm_src_len  = 128;
2217	rtm->rtm_tos      = 0;
2218	rtm->rtm_table    = mrt->id;
2219	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2220		goto nla_put_failure;
 
2221	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2222	rtm->rtm_protocol = RTPROT_UNSPEC;
 
 
 
2223	rtm->rtm_flags    = 0;
2224
2225	if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2226	    nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2227		goto nla_put_failure;
2228	if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
 
 
2229		goto nla_put_failure;
2230
2231	return nlmsg_end(skb, nlh);
 
2232
2233nla_put_failure:
2234	nlmsg_cancel(skb, nlh);
2235	return -EMSGSIZE;
2236}
2237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2238static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2239{
2240	struct net *net = sock_net(skb->sk);
2241	struct mr6_table *mrt;
2242	struct mfc6_cache *mfc;
2243	unsigned int t = 0, s_t;
2244	unsigned int h = 0, s_h;
2245	unsigned int e = 0, s_e;
2246
2247	s_t = cb->args[0];
2248	s_h = cb->args[1];
2249	s_e = cb->args[2];
2250
2251	read_lock(&mrt_lock);
2252	ip6mr_for_each_table(mrt, net) {
2253		if (t < s_t)
2254			goto next_table;
2255		if (t > s_t)
2256			s_h = 0;
2257		for (h = s_h; h < MFC6_LINES; h++) {
2258			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2259				if (e < s_e)
2260					goto next_entry;
2261				if (ip6mr_fill_mroute(mrt, skb,
2262						      NETLINK_CB(cb->skb).pid,
2263						      cb->nlh->nlmsg_seq,
2264						      mfc) < 0)
 
2265					goto done;
2266next_entry:
2267				e++;
2268			}
2269			e = s_e = 0;
2270		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2271		s_h = 0;
2272next_table:
2273		t++;
2274	}
2275done:
2276	read_unlock(&mrt_lock);
2277
2278	cb->args[2] = e;
2279	cb->args[1] = h;
2280	cb->args[0] = t;
2281
2282	return skb->len;
2283}
v4.6
   1/*
   2 *	Linux IPv6 multicast routing support for BSD pim6sd
   3 *	Based on net/ipv4/ipmr.c.
   4 *
   5 *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *		LSIIT Laboratory, Strasbourg, France
   7 *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *		6WIND, Paris, France
   9 *	Copyright (C)2007,2008 USAGI/WIDE Project
  10 *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *	This program is free software; you can redistribute it and/or
  13 *	modify it under the terms of the GNU General Public License
  14 *	as published by the Free Software Foundation; either version
  15 *	2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <asm/uaccess.h>
  20#include <linux/types.h>
  21#include <linux/sched.h>
  22#include <linux/errno.h>
  23#include <linux/timer.h>
  24#include <linux/mm.h>
  25#include <linux/kernel.h>
  26#include <linux/fcntl.h>
  27#include <linux/stat.h>
  28#include <linux/socket.h>
  29#include <linux/inet.h>
  30#include <linux/netdevice.h>
  31#include <linux/inetdevice.h>
  32#include <linux/proc_fs.h>
  33#include <linux/seq_file.h>
  34#include <linux/init.h>
  35#include <linux/slab.h>
  36#include <linux/compat.h>
  37#include <net/protocol.h>
  38#include <linux/skbuff.h>
  39#include <net/sock.h>
  40#include <net/raw.h>
  41#include <linux/notifier.h>
  42#include <linux/if_arp.h>
  43#include <net/checksum.h>
  44#include <net/netlink.h>
  45#include <net/fib_rules.h>
  46
  47#include <net/ipv6.h>
  48#include <net/ip6_route.h>
  49#include <linux/mroute6.h>
  50#include <linux/pim.h>
  51#include <net/addrconf.h>
  52#include <linux/netfilter_ipv6.h>
  53#include <linux/export.h>
  54#include <net/ip6_checksum.h>
  55#include <linux/netconf.h>
  56
  57struct mr6_table {
  58	struct list_head	list;
  59	possible_net_t		net;
 
 
  60	u32			id;
  61	struct sock		*mroute6_sk;
  62	struct timer_list	ipmr_expire_timer;
  63	struct list_head	mfc6_unres_queue;
  64	struct list_head	mfc6_cache_array[MFC6_LINES];
  65	struct mif_device	vif6_table[MAXMIFS];
  66	int			maxvif;
  67	atomic_t		cache_resolve_queue_len;
  68	bool			mroute_do_assert;
  69	bool			mroute_do_pim;
  70#ifdef CONFIG_IPV6_PIMSM_V2
  71	int			mroute_reg_vif_num;
  72#endif
  73};
  74
  75struct ip6mr_rule {
  76	struct fib_rule		common;
  77};
  78
  79struct ip6mr_result {
  80	struct mr6_table	*mrt;
  81};
  82
  83/* Big lock, protecting vif table, mrt cache and mroute socket state.
  84   Note that the changes are semaphored via rtnl_lock.
  85 */
  86
  87static DEFINE_RWLOCK(mrt_lock);
  88
  89/*
  90 *	Multicast router control variables
  91 */
  92
  93#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
  94
  95/* Special spinlock for queue of unresolved entries */
  96static DEFINE_SPINLOCK(mfc_unres_lock);
  97
  98/* We return to original Alan's scheme. Hash table of resolved
  99   entries is changed only in process context and protected
 100   with weak lock mrt_lock. Queue of unresolved entries is protected
 101   with strong spinlock mfc_unres_lock.
 102
 103   In this case data path is free of exclusive locks at all.
 104 */
 105
 106static struct kmem_cache *mrt_cachep __read_mostly;
 107
 108static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
 109static void ip6mr_free_table(struct mr6_table *mrt);
 110
 111static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 112			   struct sk_buff *skb, struct mfc6_cache *cache);
 113static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 114			      mifi_t mifi, int assert);
 115static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 116			       struct mfc6_cache *c, struct rtmsg *rtm);
 117static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
 118			      int cmd);
 119static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 120			       struct netlink_callback *cb);
 121static void mroute_clean_tables(struct mr6_table *mrt, bool all);
 122static void ipmr_expire_process(unsigned long arg);
 123
 124#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 125#define ip6mr_for_each_table(mrt, net) \
 126	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 127
 128static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 129{
 130	struct mr6_table *mrt;
 131
 132	ip6mr_for_each_table(mrt, net) {
 133		if (mrt->id == id)
 134			return mrt;
 135	}
 136	return NULL;
 137}
 138
 139static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 140			    struct mr6_table **mrt)
 141{
 
 
 142	int err;
 143	struct ip6mr_result res;
 144	struct fib_lookup_arg arg = {
 145		.result = &res,
 146		.flags = FIB_LOOKUP_NOREF,
 147	};
 148
 149	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 150			       flowi6_to_flowi(flp6), 0, &arg);
 151	if (err < 0)
 152		return err;
 153	*mrt = res.mrt;
 154	return 0;
 155}
 156
 157static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 158			     int flags, struct fib_lookup_arg *arg)
 159{
 160	struct ip6mr_result *res = arg->result;
 161	struct mr6_table *mrt;
 162
 163	switch (rule->action) {
 164	case FR_ACT_TO_TBL:
 165		break;
 166	case FR_ACT_UNREACHABLE:
 167		return -ENETUNREACH;
 168	case FR_ACT_PROHIBIT:
 169		return -EACCES;
 170	case FR_ACT_BLACKHOLE:
 171	default:
 172		return -EINVAL;
 173	}
 174
 175	mrt = ip6mr_get_table(rule->fr_net, rule->table);
 176	if (!mrt)
 177		return -EAGAIN;
 178	res->mrt = mrt;
 179	return 0;
 180}
 181
 182static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 183{
 184	return 1;
 185}
 186
 187static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 188	FRA_GENERIC_POLICY,
 189};
 190
 191static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 192				struct fib_rule_hdr *frh, struct nlattr **tb)
 193{
 194	return 0;
 195}
 196
 197static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 198			      struct nlattr **tb)
 199{
 200	return 1;
 201}
 202
 203static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 204			   struct fib_rule_hdr *frh)
 205{
 206	frh->dst_len = 0;
 207	frh->src_len = 0;
 208	frh->tos     = 0;
 209	return 0;
 210}
 211
 212static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 213	.family		= RTNL_FAMILY_IP6MR,
 214	.rule_size	= sizeof(struct ip6mr_rule),
 215	.addr_size	= sizeof(struct in6_addr),
 216	.action		= ip6mr_rule_action,
 217	.match		= ip6mr_rule_match,
 218	.configure	= ip6mr_rule_configure,
 219	.compare	= ip6mr_rule_compare,
 
 220	.fill		= ip6mr_rule_fill,
 221	.nlgroup	= RTNLGRP_IPV6_RULE,
 222	.policy		= ip6mr_rule_policy,
 223	.owner		= THIS_MODULE,
 224};
 225
 226static int __net_init ip6mr_rules_init(struct net *net)
 227{
 228	struct fib_rules_ops *ops;
 229	struct mr6_table *mrt;
 230	int err;
 231
 232	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 233	if (IS_ERR(ops))
 234		return PTR_ERR(ops);
 235
 236	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 237
 238	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 239	if (!mrt) {
 240		err = -ENOMEM;
 241		goto err1;
 242	}
 243
 244	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 245	if (err < 0)
 246		goto err2;
 247
 248	net->ipv6.mr6_rules_ops = ops;
 249	return 0;
 250
 251err2:
 252	ip6mr_free_table(mrt);
 253err1:
 254	fib_rules_unregister(ops);
 255	return err;
 256}
 257
 258static void __net_exit ip6mr_rules_exit(struct net *net)
 259{
 260	struct mr6_table *mrt, *next;
 261
 262	rtnl_lock();
 263	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 264		list_del(&mrt->list);
 265		ip6mr_free_table(mrt);
 266	}
 267	fib_rules_unregister(net->ipv6.mr6_rules_ops);
 268	rtnl_unlock();
 269}
 270#else
 271#define ip6mr_for_each_table(mrt, net) \
 272	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 273
 274static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 275{
 276	return net->ipv6.mrt6;
 277}
 278
 279static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 280			    struct mr6_table **mrt)
 281{
 282	*mrt = net->ipv6.mrt6;
 283	return 0;
 284}
 285
 286static int __net_init ip6mr_rules_init(struct net *net)
 287{
 288	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
 289	return net->ipv6.mrt6 ? 0 : -ENOMEM;
 290}
 291
 292static void __net_exit ip6mr_rules_exit(struct net *net)
 293{
 294	rtnl_lock();
 295	ip6mr_free_table(net->ipv6.mrt6);
 296	net->ipv6.mrt6 = NULL;
 297	rtnl_unlock();
 298}
 299#endif
 300
 301static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 302{
 303	struct mr6_table *mrt;
 304	unsigned int i;
 305
 306	mrt = ip6mr_get_table(net, id);
 307	if (mrt)
 308		return mrt;
 309
 310	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
 311	if (!mrt)
 312		return NULL;
 313	mrt->id = id;
 314	write_pnet(&mrt->net, net);
 315
 316	/* Forwarding cache */
 317	for (i = 0; i < MFC6_LINES; i++)
 318		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
 319
 320	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
 321
 322	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
 323		    (unsigned long)mrt);
 324
 325#ifdef CONFIG_IPV6_PIMSM_V2
 326	mrt->mroute_reg_vif_num = -1;
 327#endif
 328#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 329	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 330#endif
 331	return mrt;
 332}
 333
 334static void ip6mr_free_table(struct mr6_table *mrt)
 335{
 336	del_timer_sync(&mrt->ipmr_expire_timer);
 337	mroute_clean_tables(mrt, true);
 338	kfree(mrt);
 339}
 340
 341#ifdef CONFIG_PROC_FS
 342
 343struct ipmr_mfc_iter {
 344	struct seq_net_private p;
 345	struct mr6_table *mrt;
 346	struct list_head *cache;
 347	int ct;
 348};
 349
 350
 351static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 352					   struct ipmr_mfc_iter *it, loff_t pos)
 353{
 354	struct mr6_table *mrt = it->mrt;
 355	struct mfc6_cache *mfc;
 356
 357	read_lock(&mrt_lock);
 358	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
 359		it->cache = &mrt->mfc6_cache_array[it->ct];
 360		list_for_each_entry(mfc, it->cache, list)
 361			if (pos-- == 0)
 362				return mfc;
 363	}
 364	read_unlock(&mrt_lock);
 365
 366	spin_lock_bh(&mfc_unres_lock);
 367	it->cache = &mrt->mfc6_unres_queue;
 368	list_for_each_entry(mfc, it->cache, list)
 369		if (pos-- == 0)
 370			return mfc;
 371	spin_unlock_bh(&mfc_unres_lock);
 372
 373	it->cache = NULL;
 374	return NULL;
 375}
 376
 377/*
 378 *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
 379 */
 380
 381struct ipmr_vif_iter {
 382	struct seq_net_private p;
 383	struct mr6_table *mrt;
 384	int ct;
 385};
 386
 387static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 388					    struct ipmr_vif_iter *iter,
 389					    loff_t pos)
 390{
 391	struct mr6_table *mrt = iter->mrt;
 392
 393	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 394		if (!MIF_EXISTS(mrt, iter->ct))
 395			continue;
 396		if (pos-- == 0)
 397			return &mrt->vif6_table[iter->ct];
 398	}
 399	return NULL;
 400}
 401
 402static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 403	__acquires(mrt_lock)
 404{
 405	struct ipmr_vif_iter *iter = seq->private;
 406	struct net *net = seq_file_net(seq);
 407	struct mr6_table *mrt;
 408
 409	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 410	if (!mrt)
 411		return ERR_PTR(-ENOENT);
 412
 413	iter->mrt = mrt;
 414
 415	read_lock(&mrt_lock);
 416	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
 417		: SEQ_START_TOKEN;
 418}
 419
 420static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 421{
 422	struct ipmr_vif_iter *iter = seq->private;
 423	struct net *net = seq_file_net(seq);
 424	struct mr6_table *mrt = iter->mrt;
 425
 426	++*pos;
 427	if (v == SEQ_START_TOKEN)
 428		return ip6mr_vif_seq_idx(net, iter, 0);
 429
 430	while (++iter->ct < mrt->maxvif) {
 431		if (!MIF_EXISTS(mrt, iter->ct))
 432			continue;
 433		return &mrt->vif6_table[iter->ct];
 434	}
 435	return NULL;
 436}
 437
 438static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 439	__releases(mrt_lock)
 440{
 441	read_unlock(&mrt_lock);
 442}
 443
 444static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 445{
 446	struct ipmr_vif_iter *iter = seq->private;
 447	struct mr6_table *mrt = iter->mrt;
 448
 449	if (v == SEQ_START_TOKEN) {
 450		seq_puts(seq,
 451			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 452	} else {
 453		const struct mif_device *vif = v;
 454		const char *name = vif->dev ? vif->dev->name : "none";
 455
 456		seq_printf(seq,
 457			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 458			   vif - mrt->vif6_table,
 459			   name, vif->bytes_in, vif->pkt_in,
 460			   vif->bytes_out, vif->pkt_out,
 461			   vif->flags);
 462	}
 463	return 0;
 464}
 465
 466static const struct seq_operations ip6mr_vif_seq_ops = {
 467	.start = ip6mr_vif_seq_start,
 468	.next  = ip6mr_vif_seq_next,
 469	.stop  = ip6mr_vif_seq_stop,
 470	.show  = ip6mr_vif_seq_show,
 471};
 472
 473static int ip6mr_vif_open(struct inode *inode, struct file *file)
 474{
 475	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
 476			    sizeof(struct ipmr_vif_iter));
 477}
 478
 479static const struct file_operations ip6mr_vif_fops = {
 480	.owner	 = THIS_MODULE,
 481	.open    = ip6mr_vif_open,
 482	.read    = seq_read,
 483	.llseek  = seq_lseek,
 484	.release = seq_release_net,
 485};
 486
 487static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 488{
 489	struct ipmr_mfc_iter *it = seq->private;
 490	struct net *net = seq_file_net(seq);
 491	struct mr6_table *mrt;
 492
 493	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 494	if (!mrt)
 495		return ERR_PTR(-ENOENT);
 496
 497	it->mrt = mrt;
 498	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 499		: SEQ_START_TOKEN;
 500}
 501
 502static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 503{
 504	struct mfc6_cache *mfc = v;
 505	struct ipmr_mfc_iter *it = seq->private;
 506	struct net *net = seq_file_net(seq);
 507	struct mr6_table *mrt = it->mrt;
 508
 509	++*pos;
 510
 511	if (v == SEQ_START_TOKEN)
 512		return ipmr_mfc_seq_idx(net, seq->private, 0);
 513
 514	if (mfc->list.next != it->cache)
 515		return list_entry(mfc->list.next, struct mfc6_cache, list);
 516
 517	if (it->cache == &mrt->mfc6_unres_queue)
 518		goto end_of_list;
 519
 520	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
 521
 522	while (++it->ct < MFC6_LINES) {
 523		it->cache = &mrt->mfc6_cache_array[it->ct];
 524		if (list_empty(it->cache))
 525			continue;
 526		return list_first_entry(it->cache, struct mfc6_cache, list);
 527	}
 528
 529	/* exhausted cache_array, show unresolved */
 530	read_unlock(&mrt_lock);
 531	it->cache = &mrt->mfc6_unres_queue;
 532	it->ct = 0;
 533
 534	spin_lock_bh(&mfc_unres_lock);
 535	if (!list_empty(it->cache))
 536		return list_first_entry(it->cache, struct mfc6_cache, list);
 537
 538 end_of_list:
 539	spin_unlock_bh(&mfc_unres_lock);
 540	it->cache = NULL;
 541
 542	return NULL;
 543}
 544
 545static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 546{
 547	struct ipmr_mfc_iter *it = seq->private;
 548	struct mr6_table *mrt = it->mrt;
 549
 550	if (it->cache == &mrt->mfc6_unres_queue)
 551		spin_unlock_bh(&mfc_unres_lock);
 552	else if (it->cache == &mrt->mfc6_cache_array[it->ct])
 553		read_unlock(&mrt_lock);
 554}
 555
 556static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 557{
 558	int n;
 559
 560	if (v == SEQ_START_TOKEN) {
 561		seq_puts(seq,
 562			 "Group                            "
 563			 "Origin                           "
 564			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
 565	} else {
 566		const struct mfc6_cache *mfc = v;
 567		const struct ipmr_mfc_iter *it = seq->private;
 568		struct mr6_table *mrt = it->mrt;
 569
 570		seq_printf(seq, "%pI6 %pI6 %-3hd",
 571			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 572			   mfc->mf6c_parent);
 573
 574		if (it->cache != &mrt->mfc6_unres_queue) {
 575			seq_printf(seq, " %8lu %8lu %8lu",
 576				   mfc->mfc_un.res.pkt,
 577				   mfc->mfc_un.res.bytes,
 578				   mfc->mfc_un.res.wrong_if);
 579			for (n = mfc->mfc_un.res.minvif;
 580			     n < mfc->mfc_un.res.maxvif; n++) {
 581				if (MIF_EXISTS(mrt, n) &&
 582				    mfc->mfc_un.res.ttls[n] < 255)
 583					seq_printf(seq,
 584						   " %2d:%-3d",
 585						   n, mfc->mfc_un.res.ttls[n]);
 586			}
 587		} else {
 588			/* unresolved mfc_caches don't contain
 589			 * pkt, bytes and wrong_if values
 590			 */
 591			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 592		}
 593		seq_putc(seq, '\n');
 594	}
 595	return 0;
 596}
 597
 598static const struct seq_operations ipmr_mfc_seq_ops = {
 599	.start = ipmr_mfc_seq_start,
 600	.next  = ipmr_mfc_seq_next,
 601	.stop  = ipmr_mfc_seq_stop,
 602	.show  = ipmr_mfc_seq_show,
 603};
 604
 605static int ipmr_mfc_open(struct inode *inode, struct file *file)
 606{
 607	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
 608			    sizeof(struct ipmr_mfc_iter));
 609}
 610
 611static const struct file_operations ip6mr_mfc_fops = {
 612	.owner	 = THIS_MODULE,
 613	.open    = ipmr_mfc_open,
 614	.read    = seq_read,
 615	.llseek  = seq_lseek,
 616	.release = seq_release_net,
 617};
 618#endif
 619
 620#ifdef CONFIG_IPV6_PIMSM_V2
 621
 622static int pim6_rcv(struct sk_buff *skb)
 623{
 624	struct pimreghdr *pim;
 625	struct ipv6hdr   *encap;
 626	struct net_device  *reg_dev = NULL;
 627	struct net *net = dev_net(skb->dev);
 628	struct mr6_table *mrt;
 629	struct flowi6 fl6 = {
 630		.flowi6_iif	= skb->dev->ifindex,
 631		.flowi6_mark	= skb->mark,
 632	};
 633	int reg_vif_num;
 634
 635	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 636		goto drop;
 637
 638	pim = (struct pimreghdr *)skb_transport_header(skb);
 639	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
 640	    (pim->flags & PIM_NULL_REGISTER) ||
 641	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 642			     sizeof(*pim), IPPROTO_PIM,
 643			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
 644	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 645		goto drop;
 646
 647	/* check if the inner packet is destined to mcast group */
 648	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 649				   sizeof(*pim));
 650
 651	if (!ipv6_addr_is_multicast(&encap->daddr) ||
 652	    encap->payload_len == 0 ||
 653	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 654		goto drop;
 655
 656	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 657		goto drop;
 658	reg_vif_num = mrt->mroute_reg_vif_num;
 659
 660	read_lock(&mrt_lock);
 661	if (reg_vif_num >= 0)
 662		reg_dev = mrt->vif6_table[reg_vif_num].dev;
 663	if (reg_dev)
 664		dev_hold(reg_dev);
 665	read_unlock(&mrt_lock);
 666
 667	if (!reg_dev)
 668		goto drop;
 669
 670	skb->mac_header = skb->network_header;
 671	skb_pull(skb, (u8 *)encap - skb->data);
 672	skb_reset_network_header(skb);
 673	skb->protocol = htons(ETH_P_IPV6);
 674	skb->ip_summed = CHECKSUM_NONE;
 
 675
 676	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 677
 678	netif_rx(skb);
 679
 680	dev_put(reg_dev);
 681	return 0;
 682 drop:
 683	kfree_skb(skb);
 684	return 0;
 685}
 686
 687static const struct inet6_protocol pim6_protocol = {
 688	.handler	=	pim6_rcv,
 689};
 690
 691/* Service routines creating virtual interfaces: PIMREG */
 692
 693static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 694				      struct net_device *dev)
 695{
 696	struct net *net = dev_net(dev);
 697	struct mr6_table *mrt;
 698	struct flowi6 fl6 = {
 699		.flowi6_oif	= dev->ifindex,
 700		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
 701		.flowi6_mark	= skb->mark,
 702	};
 703	int err;
 704
 705	err = ip6mr_fib_lookup(net, &fl6, &mrt);
 706	if (err < 0) {
 707		kfree_skb(skb);
 708		return err;
 709	}
 710
 711	read_lock(&mrt_lock);
 712	dev->stats.tx_bytes += skb->len;
 713	dev->stats.tx_packets++;
 714	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 715	read_unlock(&mrt_lock);
 716	kfree_skb(skb);
 717	return NETDEV_TX_OK;
 718}
 719
 720static int reg_vif_get_iflink(const struct net_device *dev)
 721{
 722	return 0;
 723}
 724
 725static const struct net_device_ops reg_vif_netdev_ops = {
 726	.ndo_start_xmit	= reg_vif_xmit,
 727	.ndo_get_iflink = reg_vif_get_iflink,
 728};
 729
 730static void reg_vif_setup(struct net_device *dev)
 731{
 732	dev->type		= ARPHRD_PIMREG;
 733	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
 734	dev->flags		= IFF_NOARP;
 735	dev->netdev_ops		= &reg_vif_netdev_ops;
 736	dev->destructor		= free_netdev;
 737	dev->features		|= NETIF_F_NETNS_LOCAL;
 738}
 739
 740static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 741{
 742	struct net_device *dev;
 743	char name[IFNAMSIZ];
 744
 745	if (mrt->id == RT6_TABLE_DFLT)
 746		sprintf(name, "pim6reg");
 747	else
 748		sprintf(name, "pim6reg%u", mrt->id);
 749
 750	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 751	if (!dev)
 752		return NULL;
 753
 754	dev_net_set(dev, net);
 755
 756	if (register_netdevice(dev)) {
 757		free_netdev(dev);
 758		return NULL;
 759	}
 
 760
 761	if (dev_open(dev))
 762		goto failure;
 763
 764	dev_hold(dev);
 765	return dev;
 766
 767failure:
 
 
 
 
 768	unregister_netdevice(dev);
 769	return NULL;
 770}
 771#endif
 772
 773/*
 774 *	Delete a VIF entry
 775 */
 776
 777static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 778{
 779	struct mif_device *v;
 780	struct net_device *dev;
 781	struct inet6_dev *in6_dev;
 782
 783	if (vifi < 0 || vifi >= mrt->maxvif)
 784		return -EADDRNOTAVAIL;
 785
 786	v = &mrt->vif6_table[vifi];
 787
 788	write_lock_bh(&mrt_lock);
 789	dev = v->dev;
 790	v->dev = NULL;
 791
 792	if (!dev) {
 793		write_unlock_bh(&mrt_lock);
 794		return -EADDRNOTAVAIL;
 795	}
 796
 797#ifdef CONFIG_IPV6_PIMSM_V2
 798	if (vifi == mrt->mroute_reg_vif_num)
 799		mrt->mroute_reg_vif_num = -1;
 800#endif
 801
 802	if (vifi + 1 == mrt->maxvif) {
 803		int tmp;
 804		for (tmp = vifi - 1; tmp >= 0; tmp--) {
 805			if (MIF_EXISTS(mrt, tmp))
 806				break;
 807		}
 808		mrt->maxvif = tmp + 1;
 809	}
 810
 811	write_unlock_bh(&mrt_lock);
 812
 813	dev_set_allmulti(dev, -1);
 814
 815	in6_dev = __in6_dev_get(dev);
 816	if (in6_dev) {
 817		in6_dev->cnf.mc_forwarding--;
 818		inet6_netconf_notify_devconf(dev_net(dev),
 819					     NETCONFA_MC_FORWARDING,
 820					     dev->ifindex, &in6_dev->cnf);
 821	}
 822
 823	if (v->flags & MIFF_REGISTER)
 824		unregister_netdevice_queue(dev, head);
 825
 826	dev_put(dev);
 827	return 0;
 828}
 829
 830static inline void ip6mr_cache_free(struct mfc6_cache *c)
 831{
 832	kmem_cache_free(mrt_cachep, c);
 833}
 834
 835/* Destroy an unresolved cache entry, killing queued skbs
 836   and reporting error to netlink readers.
 837 */
 838
 839static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 840{
 841	struct net *net = read_pnet(&mrt->net);
 842	struct sk_buff *skb;
 843
 844	atomic_dec(&mrt->cache_resolve_queue_len);
 845
 846	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 847		if (ipv6_hdr(skb)->version == 0) {
 848			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 849			nlh->nlmsg_type = NLMSG_ERROR;
 850			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 851			skb_trim(skb, nlh->nlmsg_len);
 852			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 853			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 854		} else
 855			kfree_skb(skb);
 856	}
 857
 858	ip6mr_cache_free(c);
 859}
 860
 861
 862/* Timer process for all the unresolved queue. */
 863
 864static void ipmr_do_expire_process(struct mr6_table *mrt)
 865{
 866	unsigned long now = jiffies;
 867	unsigned long expires = 10 * HZ;
 868	struct mfc6_cache *c, *next;
 869
 870	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 871		if (time_after(c->mfc_un.unres.expires, now)) {
 872			/* not yet... */
 873			unsigned long interval = c->mfc_un.unres.expires - now;
 874			if (interval < expires)
 875				expires = interval;
 876			continue;
 877		}
 878
 879		list_del(&c->list);
 880		mr6_netlink_event(mrt, c, RTM_DELROUTE);
 881		ip6mr_destroy_unres(mrt, c);
 882	}
 883
 884	if (!list_empty(&mrt->mfc6_unres_queue))
 885		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 886}
 887
 888static void ipmr_expire_process(unsigned long arg)
 889{
 890	struct mr6_table *mrt = (struct mr6_table *)arg;
 891
 892	if (!spin_trylock(&mfc_unres_lock)) {
 893		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 894		return;
 895	}
 896
 897	if (!list_empty(&mrt->mfc6_unres_queue))
 898		ipmr_do_expire_process(mrt);
 899
 900	spin_unlock(&mfc_unres_lock);
 901}
 902
 903/* Fill oifs list. It is called under write locked mrt_lock. */
 904
 905static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
 906				    unsigned char *ttls)
 907{
 908	int vifi;
 909
 910	cache->mfc_un.res.minvif = MAXMIFS;
 911	cache->mfc_un.res.maxvif = 0;
 912	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 913
 914	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 915		if (MIF_EXISTS(mrt, vifi) &&
 916		    ttls[vifi] && ttls[vifi] < 255) {
 917			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 918			if (cache->mfc_un.res.minvif > vifi)
 919				cache->mfc_un.res.minvif = vifi;
 920			if (cache->mfc_un.res.maxvif <= vifi)
 921				cache->mfc_un.res.maxvif = vifi + 1;
 922		}
 923	}
 924}
 925
 926static int mif6_add(struct net *net, struct mr6_table *mrt,
 927		    struct mif6ctl *vifc, int mrtsock)
 928{
 929	int vifi = vifc->mif6c_mifi;
 930	struct mif_device *v = &mrt->vif6_table[vifi];
 931	struct net_device *dev;
 932	struct inet6_dev *in6_dev;
 933	int err;
 934
 935	/* Is vif busy ? */
 936	if (MIF_EXISTS(mrt, vifi))
 937		return -EADDRINUSE;
 938
 939	switch (vifc->mif6c_flags) {
 940#ifdef CONFIG_IPV6_PIMSM_V2
 941	case MIFF_REGISTER:
 942		/*
 943		 * Special Purpose VIF in PIM
 944		 * All the packets will be sent to the daemon
 945		 */
 946		if (mrt->mroute_reg_vif_num >= 0)
 947			return -EADDRINUSE;
 948		dev = ip6mr_reg_vif(net, mrt);
 949		if (!dev)
 950			return -ENOBUFS;
 951		err = dev_set_allmulti(dev, 1);
 952		if (err) {
 953			unregister_netdevice(dev);
 954			dev_put(dev);
 955			return err;
 956		}
 957		break;
 958#endif
 959	case 0:
 960		dev = dev_get_by_index(net, vifc->mif6c_pifi);
 961		if (!dev)
 962			return -EADDRNOTAVAIL;
 963		err = dev_set_allmulti(dev, 1);
 964		if (err) {
 965			dev_put(dev);
 966			return err;
 967		}
 968		break;
 969	default:
 970		return -EINVAL;
 971	}
 972
 973	in6_dev = __in6_dev_get(dev);
 974	if (in6_dev) {
 975		in6_dev->cnf.mc_forwarding++;
 976		inet6_netconf_notify_devconf(dev_net(dev),
 977					     NETCONFA_MC_FORWARDING,
 978					     dev->ifindex, &in6_dev->cnf);
 979	}
 980
 981	/*
 982	 *	Fill in the VIF structures
 983	 */
 984	v->rate_limit = vifc->vifc_rate_limit;
 985	v->flags = vifc->mif6c_flags;
 986	if (!mrtsock)
 987		v->flags |= VIFF_STATIC;
 988	v->threshold = vifc->vifc_threshold;
 989	v->bytes_in = 0;
 990	v->bytes_out = 0;
 991	v->pkt_in = 0;
 992	v->pkt_out = 0;
 993	v->link = dev->ifindex;
 994	if (v->flags & MIFF_REGISTER)
 995		v->link = dev_get_iflink(dev);
 996
 997	/* And finish update writing critical data */
 998	write_lock_bh(&mrt_lock);
 999	v->dev = dev;
1000#ifdef CONFIG_IPV6_PIMSM_V2
1001	if (v->flags & MIFF_REGISTER)
1002		mrt->mroute_reg_vif_num = vifi;
1003#endif
1004	if (vifi + 1 > mrt->maxvif)
1005		mrt->maxvif = vifi + 1;
1006	write_unlock_bh(&mrt_lock);
1007	return 0;
1008}
1009
1010static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1011					   const struct in6_addr *origin,
1012					   const struct in6_addr *mcastgrp)
1013{
1014	int line = MFC6_HASH(mcastgrp, origin);
1015	struct mfc6_cache *c;
1016
1017	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1018		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1019		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1020			return c;
1021	}
1022	return NULL;
1023}
1024
1025/* Look for a (*,*,oif) entry */
1026static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1027						      mifi_t mifi)
1028{
1029	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1030	struct mfc6_cache *c;
1031
1032	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1033		if (ipv6_addr_any(&c->mf6c_origin) &&
1034		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1035		    (c->mfc_un.res.ttls[mifi] < 255))
1036			return c;
1037
1038	return NULL;
1039}
1040
1041/* Look for a (*,G) entry */
1042static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1043					       struct in6_addr *mcastgrp,
1044					       mifi_t mifi)
1045{
1046	int line = MFC6_HASH(mcastgrp, &in6addr_any);
1047	struct mfc6_cache *c, *proxy;
1048
1049	if (ipv6_addr_any(mcastgrp))
1050		goto skip;
1051
1052	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1053		if (ipv6_addr_any(&c->mf6c_origin) &&
1054		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1055			if (c->mfc_un.res.ttls[mifi] < 255)
1056				return c;
1057
1058			/* It's ok if the mifi is part of the static tree */
1059			proxy = ip6mr_cache_find_any_parent(mrt,
1060							    c->mf6c_parent);
1061			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1062				return c;
1063		}
1064
1065skip:
1066	return ip6mr_cache_find_any_parent(mrt, mifi);
1067}
1068
1069/*
1070 *	Allocate a multicast cache entry
1071 */
1072static struct mfc6_cache *ip6mr_cache_alloc(void)
1073{
1074	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1075	if (!c)
1076		return NULL;
1077	c->mfc_un.res.minvif = MAXMIFS;
1078	return c;
1079}
1080
1081static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1082{
1083	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1084	if (!c)
1085		return NULL;
1086	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1087	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1088	return c;
1089}
1090
1091/*
1092 *	A cache entry has gone into a resolved state from queued
1093 */
1094
1095static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1096				struct mfc6_cache *uc, struct mfc6_cache *c)
1097{
1098	struct sk_buff *skb;
1099
1100	/*
1101	 *	Play the pending entries through our router
1102	 */
1103
1104	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1105		if (ipv6_hdr(skb)->version == 0) {
1106			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1107
1108			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1109				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1110			} else {
1111				nlh->nlmsg_type = NLMSG_ERROR;
1112				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1113				skb_trim(skb, nlh->nlmsg_len);
1114				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1115			}
1116			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1117		} else
1118			ip6_mr_forward(net, mrt, skb, c);
1119	}
1120}
1121
1122/*
1123 *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1124 *	expects the following bizarre scheme.
1125 *
1126 *	Called under mrt_lock.
1127 */
1128
1129static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1130			      mifi_t mifi, int assert)
1131{
1132	struct sk_buff *skb;
1133	struct mrt6msg *msg;
1134	int ret;
1135
1136#ifdef CONFIG_IPV6_PIMSM_V2
1137	if (assert == MRT6MSG_WHOLEPKT)
1138		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1139						+sizeof(*msg));
1140	else
1141#endif
1142		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1143
1144	if (!skb)
1145		return -ENOBUFS;
1146
1147	/* I suppose that internal messages
1148	 * do not require checksums */
1149
1150	skb->ip_summed = CHECKSUM_UNNECESSARY;
1151
1152#ifdef CONFIG_IPV6_PIMSM_V2
1153	if (assert == MRT6MSG_WHOLEPKT) {
1154		/* Ugly, but we have no choice with this interface.
1155		   Duplicate old header, fix length etc.
1156		   And all this only to mangle msg->im6_msgtype and
1157		   to set msg->im6_mbz to "mbz" :-)
1158		 */
1159		skb_push(skb, -skb_network_offset(pkt));
1160
1161		skb_push(skb, sizeof(*msg));
1162		skb_reset_transport_header(skb);
1163		msg = (struct mrt6msg *)skb_transport_header(skb);
1164		msg->im6_mbz = 0;
1165		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1166		msg->im6_mif = mrt->mroute_reg_vif_num;
1167		msg->im6_pad = 0;
1168		msg->im6_src = ipv6_hdr(pkt)->saddr;
1169		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1170
1171		skb->ip_summed = CHECKSUM_UNNECESSARY;
1172	} else
1173#endif
1174	{
1175	/*
1176	 *	Copy the IP header
1177	 */
1178
1179	skb_put(skb, sizeof(struct ipv6hdr));
1180	skb_reset_network_header(skb);
1181	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1182
1183	/*
1184	 *	Add our header
1185	 */
1186	skb_put(skb, sizeof(*msg));
1187	skb_reset_transport_header(skb);
1188	msg = (struct mrt6msg *)skb_transport_header(skb);
1189
1190	msg->im6_mbz = 0;
1191	msg->im6_msgtype = assert;
1192	msg->im6_mif = mifi;
1193	msg->im6_pad = 0;
1194	msg->im6_src = ipv6_hdr(pkt)->saddr;
1195	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1196
1197	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1198	skb->ip_summed = CHECKSUM_UNNECESSARY;
1199	}
1200
1201	if (!mrt->mroute6_sk) {
1202		kfree_skb(skb);
1203		return -EINVAL;
1204	}
1205
1206	/*
1207	 *	Deliver to user space multicast routing algorithms
1208	 */
1209	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1210	if (ret < 0) {
1211		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1212		kfree_skb(skb);
1213	}
1214
1215	return ret;
1216}
1217
1218/*
1219 *	Queue a packet for resolution. It gets locked cache entry!
1220 */
1221
1222static int
1223ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1224{
1225	bool found = false;
1226	int err;
1227	struct mfc6_cache *c;
1228
1229	spin_lock_bh(&mfc_unres_lock);
1230	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1231		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1232		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1233			found = true;
1234			break;
1235		}
1236	}
1237
1238	if (!found) {
1239		/*
1240		 *	Create a new entry if allowable
1241		 */
1242
1243		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1244		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1245			spin_unlock_bh(&mfc_unres_lock);
1246
1247			kfree_skb(skb);
1248			return -ENOBUFS;
1249		}
1250
1251		/*
1252		 *	Fill in the new cache entry
1253		 */
1254		c->mf6c_parent = -1;
1255		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1256		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1257
1258		/*
1259		 *	Reflect first query at pim6sd
1260		 */
1261		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1262		if (err < 0) {
1263			/* If the report failed throw the cache entry
1264			   out - Brad Parker
1265			 */
1266			spin_unlock_bh(&mfc_unres_lock);
1267
1268			ip6mr_cache_free(c);
1269			kfree_skb(skb);
1270			return err;
1271		}
1272
1273		atomic_inc(&mrt->cache_resolve_queue_len);
1274		list_add(&c->list, &mrt->mfc6_unres_queue);
1275		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1276
1277		ipmr_do_expire_process(mrt);
1278	}
1279
1280	/*
1281	 *	See if we can append the packet
1282	 */
1283	if (c->mfc_un.unres.unresolved.qlen > 3) {
1284		kfree_skb(skb);
1285		err = -ENOBUFS;
1286	} else {
1287		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1288		err = 0;
1289	}
1290
1291	spin_unlock_bh(&mfc_unres_lock);
1292	return err;
1293}
1294
1295/*
1296 *	MFC6 cache manipulation by user space
1297 */
1298
1299static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1300			    int parent)
1301{
1302	int line;
1303	struct mfc6_cache *c, *next;
1304
1305	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1306
1307	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1308		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1309		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1310				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1311		    (parent == -1 || parent == c->mf6c_parent)) {
1312			write_lock_bh(&mrt_lock);
1313			list_del(&c->list);
1314			write_unlock_bh(&mrt_lock);
1315
1316			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1317			ip6mr_cache_free(c);
1318			return 0;
1319		}
1320	}
1321	return -ENOENT;
1322}
1323
1324static int ip6mr_device_event(struct notifier_block *this,
1325			      unsigned long event, void *ptr)
1326{
1327	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1328	struct net *net = dev_net(dev);
1329	struct mr6_table *mrt;
1330	struct mif_device *v;
1331	int ct;
1332	LIST_HEAD(list);
1333
1334	if (event != NETDEV_UNREGISTER)
1335		return NOTIFY_DONE;
1336
1337	ip6mr_for_each_table(mrt, net) {
1338		v = &mrt->vif6_table[0];
1339		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1340			if (v->dev == dev)
1341				mif6_delete(mrt, ct, &list);
1342		}
1343	}
1344	unregister_netdevice_many(&list);
1345
1346	return NOTIFY_DONE;
1347}
1348
1349static struct notifier_block ip6_mr_notifier = {
1350	.notifier_call = ip6mr_device_event
1351};
1352
1353/*
1354 *	Setup for IP multicast routing
1355 */
1356
1357static int __net_init ip6mr_net_init(struct net *net)
1358{
1359	int err;
1360
1361	err = ip6mr_rules_init(net);
1362	if (err < 0)
1363		goto fail;
1364
1365#ifdef CONFIG_PROC_FS
1366	err = -ENOMEM;
1367	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1368		goto proc_vif_fail;
1369	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1370		goto proc_cache_fail;
1371#endif
1372
1373	return 0;
1374
1375#ifdef CONFIG_PROC_FS
1376proc_cache_fail:
1377	remove_proc_entry("ip6_mr_vif", net->proc_net);
1378proc_vif_fail:
1379	ip6mr_rules_exit(net);
1380#endif
1381fail:
1382	return err;
1383}
1384
1385static void __net_exit ip6mr_net_exit(struct net *net)
1386{
1387#ifdef CONFIG_PROC_FS
1388	remove_proc_entry("ip6_mr_cache", net->proc_net);
1389	remove_proc_entry("ip6_mr_vif", net->proc_net);
1390#endif
1391	ip6mr_rules_exit(net);
1392}
1393
1394static struct pernet_operations ip6mr_net_ops = {
1395	.init = ip6mr_net_init,
1396	.exit = ip6mr_net_exit,
1397};
1398
1399int __init ip6_mr_init(void)
1400{
1401	int err;
1402
1403	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1404				       sizeof(struct mfc6_cache),
1405				       0, SLAB_HWCACHE_ALIGN,
1406				       NULL);
1407	if (!mrt_cachep)
1408		return -ENOMEM;
1409
1410	err = register_pernet_subsys(&ip6mr_net_ops);
1411	if (err)
1412		goto reg_pernet_fail;
1413
1414	err = register_netdevice_notifier(&ip6_mr_notifier);
1415	if (err)
1416		goto reg_notif_fail;
1417#ifdef CONFIG_IPV6_PIMSM_V2
1418	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1419		pr_err("%s: can't add PIM protocol\n", __func__);
1420		err = -EAGAIN;
1421		goto add_proto_fail;
1422	}
1423#endif
1424	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1425		      ip6mr_rtm_dumproute, NULL);
1426	return 0;
1427#ifdef CONFIG_IPV6_PIMSM_V2
1428add_proto_fail:
1429	unregister_netdevice_notifier(&ip6_mr_notifier);
1430#endif
1431reg_notif_fail:
1432	unregister_pernet_subsys(&ip6mr_net_ops);
1433reg_pernet_fail:
1434	kmem_cache_destroy(mrt_cachep);
1435	return err;
1436}
1437
1438void ip6_mr_cleanup(void)
1439{
1440	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1441#ifdef CONFIG_IPV6_PIMSM_V2
1442	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1443#endif
1444	unregister_netdevice_notifier(&ip6_mr_notifier);
1445	unregister_pernet_subsys(&ip6mr_net_ops);
1446	kmem_cache_destroy(mrt_cachep);
1447}
1448
1449static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1450			 struct mf6cctl *mfc, int mrtsock, int parent)
1451{
1452	bool found = false;
1453	int line;
1454	struct mfc6_cache *uc, *c;
1455	unsigned char ttls[MAXMIFS];
1456	int i;
1457
1458	if (mfc->mf6cc_parent >= MAXMIFS)
1459		return -ENFILE;
1460
1461	memset(ttls, 255, MAXMIFS);
1462	for (i = 0; i < MAXMIFS; i++) {
1463		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1464			ttls[i] = 1;
1465
1466	}
1467
1468	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1469
1470	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1471		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1472		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1473				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1474		    (parent == -1 || parent == mfc->mf6cc_parent)) {
1475			found = true;
1476			break;
1477		}
1478	}
1479
1480	if (found) {
1481		write_lock_bh(&mrt_lock);
1482		c->mf6c_parent = mfc->mf6cc_parent;
1483		ip6mr_update_thresholds(mrt, c, ttls);
1484		if (!mrtsock)
1485			c->mfc_flags |= MFC_STATIC;
1486		write_unlock_bh(&mrt_lock);
1487		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1488		return 0;
1489	}
1490
1491	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1492	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1493		return -EINVAL;
1494
1495	c = ip6mr_cache_alloc();
1496	if (!c)
1497		return -ENOMEM;
1498
1499	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1500	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1501	c->mf6c_parent = mfc->mf6cc_parent;
1502	ip6mr_update_thresholds(mrt, c, ttls);
1503	if (!mrtsock)
1504		c->mfc_flags |= MFC_STATIC;
1505
1506	write_lock_bh(&mrt_lock);
1507	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1508	write_unlock_bh(&mrt_lock);
1509
1510	/*
1511	 *	Check to see if we resolved a queued list. If so we
1512	 *	need to send on the frames and tidy up.
1513	 */
1514	found = false;
1515	spin_lock_bh(&mfc_unres_lock);
1516	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1517		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1518		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1519			list_del(&uc->list);
1520			atomic_dec(&mrt->cache_resolve_queue_len);
1521			found = true;
1522			break;
1523		}
1524	}
1525	if (list_empty(&mrt->mfc6_unres_queue))
1526		del_timer(&mrt->ipmr_expire_timer);
1527	spin_unlock_bh(&mfc_unres_lock);
1528
1529	if (found) {
1530		ip6mr_cache_resolve(net, mrt, uc, c);
1531		ip6mr_cache_free(uc);
1532	}
1533	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1534	return 0;
1535}
1536
1537/*
1538 *	Close the multicast socket, and clear the vif tables etc
1539 */
1540
1541static void mroute_clean_tables(struct mr6_table *mrt, bool all)
1542{
1543	int i;
1544	LIST_HEAD(list);
1545	struct mfc6_cache *c, *next;
1546
1547	/*
1548	 *	Shut down all active vif entries
1549	 */
1550	for (i = 0; i < mrt->maxvif; i++) {
1551		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
1552			continue;
1553		mif6_delete(mrt, i, &list);
1554	}
1555	unregister_netdevice_many(&list);
1556
1557	/*
1558	 *	Wipe the cache
1559	 */
1560	for (i = 0; i < MFC6_LINES; i++) {
1561		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1562			if (!all && (c->mfc_flags & MFC_STATIC))
1563				continue;
1564			write_lock_bh(&mrt_lock);
1565			list_del(&c->list);
1566			write_unlock_bh(&mrt_lock);
1567
1568			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1569			ip6mr_cache_free(c);
1570		}
1571	}
1572
1573	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1574		spin_lock_bh(&mfc_unres_lock);
1575		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1576			list_del(&c->list);
1577			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1578			ip6mr_destroy_unres(mrt, c);
1579		}
1580		spin_unlock_bh(&mfc_unres_lock);
1581	}
1582}
1583
1584static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1585{
1586	int err = 0;
1587	struct net *net = sock_net(sk);
1588
1589	rtnl_lock();
1590	write_lock_bh(&mrt_lock);
1591	if (likely(mrt->mroute6_sk == NULL)) {
1592		mrt->mroute6_sk = sk;
1593		net->ipv6.devconf_all->mc_forwarding++;
1594		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1595					     NETCONFA_IFINDEX_ALL,
1596					     net->ipv6.devconf_all);
1597	}
1598	else
1599		err = -EADDRINUSE;
1600	write_unlock_bh(&mrt_lock);
1601
1602	rtnl_unlock();
1603
1604	return err;
1605}
1606
1607int ip6mr_sk_done(struct sock *sk)
1608{
1609	int err = -EACCES;
1610	struct net *net = sock_net(sk);
1611	struct mr6_table *mrt;
1612
1613	rtnl_lock();
1614	ip6mr_for_each_table(mrt, net) {
1615		if (sk == mrt->mroute6_sk) {
1616			write_lock_bh(&mrt_lock);
1617			mrt->mroute6_sk = NULL;
1618			net->ipv6.devconf_all->mc_forwarding--;
1619			inet6_netconf_notify_devconf(net,
1620						     NETCONFA_MC_FORWARDING,
1621						     NETCONFA_IFINDEX_ALL,
1622						     net->ipv6.devconf_all);
1623			write_unlock_bh(&mrt_lock);
1624
1625			mroute_clean_tables(mrt, false);
1626			err = 0;
1627			break;
1628		}
1629	}
1630	rtnl_unlock();
1631
1632	return err;
1633}
1634
1635struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1636{
1637	struct mr6_table *mrt;
1638	struct flowi6 fl6 = {
1639		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1640		.flowi6_oif	= skb->dev->ifindex,
1641		.flowi6_mark	= skb->mark,
1642	};
1643
1644	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1645		return NULL;
1646
1647	return mrt->mroute6_sk;
1648}
1649
1650/*
1651 *	Socket options and virtual interface manipulation. The whole
1652 *	virtual interface system is a complete heap, but unfortunately
1653 *	that's how BSD mrouted happens to think. Maybe one day with a proper
1654 *	MOSPF/PIM router set up we can clean this up.
1655 */
1656
1657int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1658{
1659	int ret, parent = 0;
1660	struct mif6ctl vif;
1661	struct mf6cctl mfc;
1662	mifi_t mifi;
1663	struct net *net = sock_net(sk);
1664	struct mr6_table *mrt;
1665
1666	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1667	if (!mrt)
1668		return -ENOENT;
1669
1670	if (optname != MRT6_INIT) {
1671		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1672			return -EACCES;
1673	}
1674
1675	switch (optname) {
1676	case MRT6_INIT:
1677		if (sk->sk_type != SOCK_RAW ||
1678		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1679			return -EOPNOTSUPP;
1680		if (optlen < sizeof(int))
1681			return -EINVAL;
1682
1683		return ip6mr_sk_init(mrt, sk);
1684
1685	case MRT6_DONE:
1686		return ip6mr_sk_done(sk);
1687
1688	case MRT6_ADD_MIF:
1689		if (optlen < sizeof(vif))
1690			return -EINVAL;
1691		if (copy_from_user(&vif, optval, sizeof(vif)))
1692			return -EFAULT;
1693		if (vif.mif6c_mifi >= MAXMIFS)
1694			return -ENFILE;
1695		rtnl_lock();
1696		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1697		rtnl_unlock();
1698		return ret;
1699
1700	case MRT6_DEL_MIF:
1701		if (optlen < sizeof(mifi_t))
1702			return -EINVAL;
1703		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1704			return -EFAULT;
1705		rtnl_lock();
1706		ret = mif6_delete(mrt, mifi, NULL);
1707		rtnl_unlock();
1708		return ret;
1709
1710	/*
1711	 *	Manipulate the forwarding caches. These live
1712	 *	in a sort of kernel/user symbiosis.
1713	 */
1714	case MRT6_ADD_MFC:
1715	case MRT6_DEL_MFC:
1716		parent = -1;
1717	case MRT6_ADD_MFC_PROXY:
1718	case MRT6_DEL_MFC_PROXY:
1719		if (optlen < sizeof(mfc))
1720			return -EINVAL;
1721		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1722			return -EFAULT;
1723		if (parent == 0)
1724			parent = mfc.mf6cc_parent;
1725		rtnl_lock();
1726		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1727			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1728		else
1729			ret = ip6mr_mfc_add(net, mrt, &mfc,
1730					    sk == mrt->mroute6_sk, parent);
1731		rtnl_unlock();
1732		return ret;
1733
1734	/*
1735	 *	Control PIM assert (to activate pim will activate assert)
1736	 */
1737	case MRT6_ASSERT:
1738	{
1739		int v;
1740
1741		if (optlen != sizeof(v))
1742			return -EINVAL;
1743		if (get_user(v, (int __user *)optval))
1744			return -EFAULT;
1745		mrt->mroute_do_assert = v;
1746		return 0;
1747	}
1748
1749#ifdef CONFIG_IPV6_PIMSM_V2
1750	case MRT6_PIM:
1751	{
1752		int v;
1753
1754		if (optlen != sizeof(v))
1755			return -EINVAL;
1756		if (get_user(v, (int __user *)optval))
1757			return -EFAULT;
1758		v = !!v;
1759		rtnl_lock();
1760		ret = 0;
1761		if (v != mrt->mroute_do_pim) {
1762			mrt->mroute_do_pim = v;
1763			mrt->mroute_do_assert = v;
1764		}
1765		rtnl_unlock();
1766		return ret;
1767	}
1768
1769#endif
1770#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1771	case MRT6_TABLE:
1772	{
1773		u32 v;
1774
1775		if (optlen != sizeof(u32))
1776			return -EINVAL;
1777		if (get_user(v, (u32 __user *)optval))
1778			return -EFAULT;
1779		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1780		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1781			return -EINVAL;
1782		if (sk == mrt->mroute6_sk)
1783			return -EBUSY;
1784
1785		rtnl_lock();
1786		ret = 0;
1787		if (!ip6mr_new_table(net, v))
1788			ret = -ENOMEM;
1789		raw6_sk(sk)->ip6mr_table = v;
1790		rtnl_unlock();
1791		return ret;
1792	}
1793#endif
1794	/*
1795	 *	Spurious command, or MRT6_VERSION which you cannot
1796	 *	set.
1797	 */
1798	default:
1799		return -ENOPROTOOPT;
1800	}
1801}
1802
1803/*
1804 *	Getsock opt support for the multicast routing system.
1805 */
1806
1807int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1808			  int __user *optlen)
1809{
1810	int olr;
1811	int val;
1812	struct net *net = sock_net(sk);
1813	struct mr6_table *mrt;
1814
1815	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1816	if (!mrt)
1817		return -ENOENT;
1818
1819	switch (optname) {
1820	case MRT6_VERSION:
1821		val = 0x0305;
1822		break;
1823#ifdef CONFIG_IPV6_PIMSM_V2
1824	case MRT6_PIM:
1825		val = mrt->mroute_do_pim;
1826		break;
1827#endif
1828	case MRT6_ASSERT:
1829		val = mrt->mroute_do_assert;
1830		break;
1831	default:
1832		return -ENOPROTOOPT;
1833	}
1834
1835	if (get_user(olr, optlen))
1836		return -EFAULT;
1837
1838	olr = min_t(int, olr, sizeof(int));
1839	if (olr < 0)
1840		return -EINVAL;
1841
1842	if (put_user(olr, optlen))
1843		return -EFAULT;
1844	if (copy_to_user(optval, &val, olr))
1845		return -EFAULT;
1846	return 0;
1847}
1848
1849/*
1850 *	The IP multicast ioctl support routines.
1851 */
1852
1853int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1854{
1855	struct sioc_sg_req6 sr;
1856	struct sioc_mif_req6 vr;
1857	struct mif_device *vif;
1858	struct mfc6_cache *c;
1859	struct net *net = sock_net(sk);
1860	struct mr6_table *mrt;
1861
1862	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1863	if (!mrt)
1864		return -ENOENT;
1865
1866	switch (cmd) {
1867	case SIOCGETMIFCNT_IN6:
1868		if (copy_from_user(&vr, arg, sizeof(vr)))
1869			return -EFAULT;
1870		if (vr.mifi >= mrt->maxvif)
1871			return -EINVAL;
1872		read_lock(&mrt_lock);
1873		vif = &mrt->vif6_table[vr.mifi];
1874		if (MIF_EXISTS(mrt, vr.mifi)) {
1875			vr.icount = vif->pkt_in;
1876			vr.ocount = vif->pkt_out;
1877			vr.ibytes = vif->bytes_in;
1878			vr.obytes = vif->bytes_out;
1879			read_unlock(&mrt_lock);
1880
1881			if (copy_to_user(arg, &vr, sizeof(vr)))
1882				return -EFAULT;
1883			return 0;
1884		}
1885		read_unlock(&mrt_lock);
1886		return -EADDRNOTAVAIL;
1887	case SIOCGETSGCNT_IN6:
1888		if (copy_from_user(&sr, arg, sizeof(sr)))
1889			return -EFAULT;
1890
1891		read_lock(&mrt_lock);
1892		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1893		if (c) {
1894			sr.pktcnt = c->mfc_un.res.pkt;
1895			sr.bytecnt = c->mfc_un.res.bytes;
1896			sr.wrong_if = c->mfc_un.res.wrong_if;
1897			read_unlock(&mrt_lock);
1898
1899			if (copy_to_user(arg, &sr, sizeof(sr)))
1900				return -EFAULT;
1901			return 0;
1902		}
1903		read_unlock(&mrt_lock);
1904		return -EADDRNOTAVAIL;
1905	default:
1906		return -ENOIOCTLCMD;
1907	}
1908}
1909
1910#ifdef CONFIG_COMPAT
1911struct compat_sioc_sg_req6 {
1912	struct sockaddr_in6 src;
1913	struct sockaddr_in6 grp;
1914	compat_ulong_t pktcnt;
1915	compat_ulong_t bytecnt;
1916	compat_ulong_t wrong_if;
1917};
1918
1919struct compat_sioc_mif_req6 {
1920	mifi_t	mifi;
1921	compat_ulong_t icount;
1922	compat_ulong_t ocount;
1923	compat_ulong_t ibytes;
1924	compat_ulong_t obytes;
1925};
1926
1927int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1928{
1929	struct compat_sioc_sg_req6 sr;
1930	struct compat_sioc_mif_req6 vr;
1931	struct mif_device *vif;
1932	struct mfc6_cache *c;
1933	struct net *net = sock_net(sk);
1934	struct mr6_table *mrt;
1935
1936	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1937	if (!mrt)
1938		return -ENOENT;
1939
1940	switch (cmd) {
1941	case SIOCGETMIFCNT_IN6:
1942		if (copy_from_user(&vr, arg, sizeof(vr)))
1943			return -EFAULT;
1944		if (vr.mifi >= mrt->maxvif)
1945			return -EINVAL;
1946		read_lock(&mrt_lock);
1947		vif = &mrt->vif6_table[vr.mifi];
1948		if (MIF_EXISTS(mrt, vr.mifi)) {
1949			vr.icount = vif->pkt_in;
1950			vr.ocount = vif->pkt_out;
1951			vr.ibytes = vif->bytes_in;
1952			vr.obytes = vif->bytes_out;
1953			read_unlock(&mrt_lock);
1954
1955			if (copy_to_user(arg, &vr, sizeof(vr)))
1956				return -EFAULT;
1957			return 0;
1958		}
1959		read_unlock(&mrt_lock);
1960		return -EADDRNOTAVAIL;
1961	case SIOCGETSGCNT_IN6:
1962		if (copy_from_user(&sr, arg, sizeof(sr)))
1963			return -EFAULT;
1964
1965		read_lock(&mrt_lock);
1966		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1967		if (c) {
1968			sr.pktcnt = c->mfc_un.res.pkt;
1969			sr.bytecnt = c->mfc_un.res.bytes;
1970			sr.wrong_if = c->mfc_un.res.wrong_if;
1971			read_unlock(&mrt_lock);
1972
1973			if (copy_to_user(arg, &sr, sizeof(sr)))
1974				return -EFAULT;
1975			return 0;
1976		}
1977		read_unlock(&mrt_lock);
1978		return -EADDRNOTAVAIL;
1979	default:
1980		return -ENOIOCTLCMD;
1981	}
1982}
1983#endif
1984
1985static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1986{
1987	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
1988			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1989	IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
1990			 IPSTATS_MIB_OUTOCTETS, skb->len);
1991	return dst_output(net, sk, skb);
1992}
1993
1994/*
1995 *	Processing handlers for ip6mr_forward
1996 */
1997
1998static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1999			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
2000{
2001	struct ipv6hdr *ipv6h;
2002	struct mif_device *vif = &mrt->vif6_table[vifi];
2003	struct net_device *dev;
2004	struct dst_entry *dst;
2005	struct flowi6 fl6;
2006
2007	if (!vif->dev)
2008		goto out_free;
2009
2010#ifdef CONFIG_IPV6_PIMSM_V2
2011	if (vif->flags & MIFF_REGISTER) {
2012		vif->pkt_out++;
2013		vif->bytes_out += skb->len;
2014		vif->dev->stats.tx_bytes += skb->len;
2015		vif->dev->stats.tx_packets++;
2016		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2017		goto out_free;
2018	}
2019#endif
2020
2021	ipv6h = ipv6_hdr(skb);
2022
2023	fl6 = (struct flowi6) {
2024		.flowi6_oif = vif->link,
2025		.daddr = ipv6h->daddr,
2026	};
2027
2028	dst = ip6_route_output(net, NULL, &fl6);
2029	if (dst->error) {
2030		dst_release(dst);
2031		goto out_free;
2032	}
2033
2034	skb_dst_drop(skb);
2035	skb_dst_set(skb, dst);
2036
2037	/*
2038	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2039	 * not only before forwarding, but after forwarding on all output
2040	 * interfaces. It is clear, if mrouter runs a multicasting
2041	 * program, it should receive packets not depending to what interface
2042	 * program is joined.
2043	 * If we will not make it, the program will have to join on all
2044	 * interfaces. On the other hand, multihoming host (or router, but
2045	 * not mrouter) cannot join to more than one interface - it will
2046	 * result in receiving multiple packets.
2047	 */
2048	dev = vif->dev;
2049	skb->dev = dev;
2050	vif->pkt_out++;
2051	vif->bytes_out += skb->len;
2052
2053	/* We are about to write */
2054	/* XXX: extension headers? */
2055	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2056		goto out_free;
2057
2058	ipv6h = ipv6_hdr(skb);
2059	ipv6h->hop_limit--;
2060
2061	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2062
2063	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2064		       net, NULL, skb, skb->dev, dev,
2065		       ip6mr_forward2_finish);
2066
2067out_free:
2068	kfree_skb(skb);
2069	return 0;
2070}
2071
2072static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2073{
2074	int ct;
2075
2076	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2077		if (mrt->vif6_table[ct].dev == dev)
2078			break;
2079	}
2080	return ct;
2081}
2082
2083static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2084			   struct sk_buff *skb, struct mfc6_cache *cache)
2085{
2086	int psend = -1;
2087	int vif, ct;
2088	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2089
2090	vif = cache->mf6c_parent;
2091	cache->mfc_un.res.pkt++;
2092	cache->mfc_un.res.bytes += skb->len;
2093
2094	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2095		struct mfc6_cache *cache_proxy;
2096
2097		/* For an (*,G) entry, we only check that the incoming
2098		 * interface is part of the static tree.
2099		 */
2100		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2101		if (cache_proxy &&
2102		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2103			goto forward;
2104	}
2105
2106	/*
2107	 * Wrong interface: drop packet and (maybe) send PIM assert.
2108	 */
2109	if (mrt->vif6_table[vif].dev != skb->dev) {
 
 
2110		cache->mfc_un.res.wrong_if++;
 
2111
2112		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2113		    /* pimsm uses asserts, when switching from RPT to SPT,
2114		       so that we cannot check that packet arrived on an oif.
2115		       It is bad, but otherwise we would need to move pretty
2116		       large chunk of pimd to kernel. Ough... --ANK
2117		     */
2118		    (mrt->mroute_do_pim ||
2119		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2120		    time_after(jiffies,
2121			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2122			cache->mfc_un.res.last_assert = jiffies;
2123			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2124		}
2125		goto dont_forward;
2126	}
2127
2128forward:
2129	mrt->vif6_table[vif].pkt_in++;
2130	mrt->vif6_table[vif].bytes_in += skb->len;
2131
2132	/*
2133	 *	Forward the frame
2134	 */
2135	if (ipv6_addr_any(&cache->mf6c_origin) &&
2136	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2137		if (true_vifi >= 0 &&
2138		    true_vifi != cache->mf6c_parent &&
2139		    ipv6_hdr(skb)->hop_limit >
2140				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2141			/* It's an (*,*) entry and the packet is not coming from
2142			 * the upstream: forward the packet to the upstream
2143			 * only.
2144			 */
2145			psend = cache->mf6c_parent;
2146			goto last_forward;
2147		}
2148		goto dont_forward;
2149	}
2150	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2151		/* For (*,G) entry, don't forward to the incoming interface */
2152		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2153		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2154			if (psend != -1) {
2155				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2156				if (skb2)
2157					ip6mr_forward2(net, mrt, skb2, cache, psend);
2158			}
2159			psend = ct;
2160		}
2161	}
2162last_forward:
2163	if (psend != -1) {
2164		ip6mr_forward2(net, mrt, skb, cache, psend);
2165		return;
2166	}
2167
2168dont_forward:
2169	kfree_skb(skb);
 
2170}
2171
2172
2173/*
2174 *	Multicast packets for forwarding arrive here
2175 */
2176
2177int ip6_mr_input(struct sk_buff *skb)
2178{
2179	struct mfc6_cache *cache;
2180	struct net *net = dev_net(skb->dev);
2181	struct mr6_table *mrt;
2182	struct flowi6 fl6 = {
2183		.flowi6_iif	= skb->dev->ifindex,
2184		.flowi6_mark	= skb->mark,
2185	};
2186	int err;
2187
2188	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2189	if (err < 0) {
2190		kfree_skb(skb);
2191		return err;
2192	}
2193
2194	read_lock(&mrt_lock);
2195	cache = ip6mr_cache_find(mrt,
2196				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2197	if (!cache) {
2198		int vif = ip6mr_find_vif(mrt, skb->dev);
2199
2200		if (vif >= 0)
2201			cache = ip6mr_cache_find_any(mrt,
2202						     &ipv6_hdr(skb)->daddr,
2203						     vif);
2204	}
2205
2206	/*
2207	 *	No usable cache entry
2208	 */
2209	if (!cache) {
2210		int vif;
2211
2212		vif = ip6mr_find_vif(mrt, skb->dev);
2213		if (vif >= 0) {
2214			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2215			read_unlock(&mrt_lock);
2216
2217			return err;
2218		}
2219		read_unlock(&mrt_lock);
2220		kfree_skb(skb);
2221		return -ENODEV;
2222	}
2223
2224	ip6_mr_forward(net, mrt, skb, cache);
2225
2226	read_unlock(&mrt_lock);
2227
2228	return 0;
2229}
2230
2231
2232static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2233			       struct mfc6_cache *c, struct rtmsg *rtm)
2234{
2235	int ct;
2236	struct rtnexthop *nhp;
2237	struct nlattr *mp_attr;
2238	struct rta_mfc_stats mfcs;
2239
2240	/* If cache is unresolved, don't try to parse IIF and OIF */
2241	if (c->mf6c_parent >= MAXMIFS)
2242		return -ENOENT;
2243
2244	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2245	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2246		return -EMSGSIZE;
2247	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2248	if (!mp_attr)
2249		return -EMSGSIZE;
2250
2251	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2252		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2253			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2254			if (!nhp) {
2255				nla_nest_cancel(skb, mp_attr);
2256				return -EMSGSIZE;
2257			}
2258
2259			nhp->rtnh_flags = 0;
2260			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2261			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2262			nhp->rtnh_len = sizeof(*nhp);
2263		}
2264	}
2265
2266	nla_nest_end(skb, mp_attr);
2267
2268	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2269	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2270	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2271	if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2272		return -EMSGSIZE;
2273
2274	rtm->rtm_type = RTN_MULTICAST;
2275	return 1;
 
 
 
 
2276}
2277
2278int ip6mr_get_route(struct net *net,
2279		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2280{
2281	int err;
2282	struct mr6_table *mrt;
2283	struct mfc6_cache *cache;
2284	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2285
2286	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2287	if (!mrt)
2288		return -ENOENT;
2289
2290	read_lock(&mrt_lock);
2291	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2292	if (!cache && skb->dev) {
2293		int vif = ip6mr_find_vif(mrt, skb->dev);
2294
2295		if (vif >= 0)
2296			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2297						     vif);
2298	}
2299
2300	if (!cache) {
2301		struct sk_buff *skb2;
2302		struct ipv6hdr *iph;
2303		struct net_device *dev;
2304		int vif;
2305
2306		if (nowait) {
2307			read_unlock(&mrt_lock);
2308			return -EAGAIN;
2309		}
2310
2311		dev = skb->dev;
2312		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2313			read_unlock(&mrt_lock);
2314			return -ENODEV;
2315		}
2316
2317		/* really correct? */
2318		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2319		if (!skb2) {
2320			read_unlock(&mrt_lock);
2321			return -ENOMEM;
2322		}
2323
2324		skb_reset_transport_header(skb2);
2325
2326		skb_put(skb2, sizeof(struct ipv6hdr));
2327		skb_reset_network_header(skb2);
2328
2329		iph = ipv6_hdr(skb2);
2330		iph->version = 0;
2331		iph->priority = 0;
2332		iph->flow_lbl[0] = 0;
2333		iph->flow_lbl[1] = 0;
2334		iph->flow_lbl[2] = 0;
2335		iph->payload_len = 0;
2336		iph->nexthdr = IPPROTO_NONE;
2337		iph->hop_limit = 0;
2338		iph->saddr = rt->rt6i_src.addr;
2339		iph->daddr = rt->rt6i_dst.addr;
2340
2341		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2342		read_unlock(&mrt_lock);
2343
2344		return err;
2345	}
2346
2347	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2348		cache->mfc_flags |= MFC_NOTIFY;
2349
2350	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2351	read_unlock(&mrt_lock);
2352	return err;
2353}
2354
2355static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2356			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2357			     int flags)
2358{
2359	struct nlmsghdr *nlh;
2360	struct rtmsg *rtm;
2361	int err;
2362
2363	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2364	if (!nlh)
2365		return -EMSGSIZE;
2366
2367	rtm = nlmsg_data(nlh);
2368	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2369	rtm->rtm_dst_len  = 128;
2370	rtm->rtm_src_len  = 128;
2371	rtm->rtm_tos      = 0;
2372	rtm->rtm_table    = mrt->id;
2373	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2374		goto nla_put_failure;
2375	rtm->rtm_type = RTN_MULTICAST;
2376	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2377	if (c->mfc_flags & MFC_STATIC)
2378		rtm->rtm_protocol = RTPROT_STATIC;
2379	else
2380		rtm->rtm_protocol = RTPROT_MROUTED;
2381	rtm->rtm_flags    = 0;
2382
2383	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2384	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2385		goto nla_put_failure;
2386	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2387	/* do not break the dump if cache is unresolved */
2388	if (err < 0 && err != -ENOENT)
2389		goto nla_put_failure;
2390
2391	nlmsg_end(skb, nlh);
2392	return 0;
2393
2394nla_put_failure:
2395	nlmsg_cancel(skb, nlh);
2396	return -EMSGSIZE;
2397}
2398
2399static int mr6_msgsize(bool unresolved, int maxvif)
2400{
2401	size_t len =
2402		NLMSG_ALIGN(sizeof(struct rtmsg))
2403		+ nla_total_size(4)	/* RTA_TABLE */
2404		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2405		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2406		;
2407
2408	if (!unresolved)
2409		len = len
2410		      + nla_total_size(4)	/* RTA_IIF */
2411		      + nla_total_size(0)	/* RTA_MULTIPATH */
2412		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2413						/* RTA_MFC_STATS */
2414		      + nla_total_size(sizeof(struct rta_mfc_stats))
2415		;
2416
2417	return len;
2418}
2419
2420static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2421			      int cmd)
2422{
2423	struct net *net = read_pnet(&mrt->net);
2424	struct sk_buff *skb;
2425	int err = -ENOBUFS;
2426
2427	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2428			GFP_ATOMIC);
2429	if (!skb)
2430		goto errout;
2431
2432	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2433	if (err < 0)
2434		goto errout;
2435
2436	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2437	return;
2438
2439errout:
2440	kfree_skb(skb);
2441	if (err < 0)
2442		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2443}
2444
2445static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2446{
2447	struct net *net = sock_net(skb->sk);
2448	struct mr6_table *mrt;
2449	struct mfc6_cache *mfc;
2450	unsigned int t = 0, s_t;
2451	unsigned int h = 0, s_h;
2452	unsigned int e = 0, s_e;
2453
2454	s_t = cb->args[0];
2455	s_h = cb->args[1];
2456	s_e = cb->args[2];
2457
2458	read_lock(&mrt_lock);
2459	ip6mr_for_each_table(mrt, net) {
2460		if (t < s_t)
2461			goto next_table;
2462		if (t > s_t)
2463			s_h = 0;
2464		for (h = s_h; h < MFC6_LINES; h++) {
2465			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2466				if (e < s_e)
2467					goto next_entry;
2468				if (ip6mr_fill_mroute(mrt, skb,
2469						      NETLINK_CB(cb->skb).portid,
2470						      cb->nlh->nlmsg_seq,
2471						      mfc, RTM_NEWROUTE,
2472						      NLM_F_MULTI) < 0)
2473					goto done;
2474next_entry:
2475				e++;
2476			}
2477			e = s_e = 0;
2478		}
2479		spin_lock_bh(&mfc_unres_lock);
2480		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2481			if (e < s_e)
2482				goto next_entry2;
2483			if (ip6mr_fill_mroute(mrt, skb,
2484					      NETLINK_CB(cb->skb).portid,
2485					      cb->nlh->nlmsg_seq,
2486					      mfc, RTM_NEWROUTE,
2487					      NLM_F_MULTI) < 0) {
2488				spin_unlock_bh(&mfc_unres_lock);
2489				goto done;
2490			}
2491next_entry2:
2492			e++;
2493		}
2494		spin_unlock_bh(&mfc_unres_lock);
2495		e = s_e = 0;
2496		s_h = 0;
2497next_table:
2498		t++;
2499	}
2500done:
2501	read_unlock(&mrt_lock);
2502
2503	cb->args[2] = e;
2504	cb->args[1] = h;
2505	cb->args[0] = t;
2506
2507	return skb->len;
2508}