Linux Audio

Check our new training course

Loading...
v5.4
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	Linux INET6 implementation
   4 *	FIB front-end.
   5 *
   6 *	Authors:
   7 *	Pedro Roque		<roque@di.fc.ul.pt>
 
 
 
 
 
   8 */
   9
  10/*	Changes:
  11 *
  12 *	YOSHIFUJI Hideaki @USAGI
  13 *		reworked default router selection.
  14 *		- respect outgoing interface
  15 *		- select from (probably) reachable routers (i.e.
  16 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  17 *		- always select the same router if it is (probably)
  18 *		reachable.  otherwise, round-robin the list.
  19 *	Ville Nuorvala
  20 *		Fixed routing subtrees.
  21 */
  22
  23#define pr_fmt(fmt) "IPv6: " fmt
  24
  25#include <linux/capability.h>
  26#include <linux/errno.h>
  27#include <linux/export.h>
  28#include <linux/types.h>
  29#include <linux/times.h>
  30#include <linux/socket.h>
  31#include <linux/sockios.h>
  32#include <linux/net.h>
  33#include <linux/route.h>
  34#include <linux/netdevice.h>
  35#include <linux/in6.h>
  36#include <linux/mroute6.h>
  37#include <linux/init.h>
  38#include <linux/if_arp.h>
  39#include <linux/proc_fs.h>
  40#include <linux/seq_file.h>
  41#include <linux/nsproxy.h>
  42#include <linux/slab.h>
  43#include <linux/jhash.h>
  44#include <net/net_namespace.h>
  45#include <net/snmp.h>
  46#include <net/ipv6.h>
  47#include <net/ip6_fib.h>
  48#include <net/ip6_route.h>
  49#include <net/ndisc.h>
  50#include <net/addrconf.h>
  51#include <net/tcp.h>
  52#include <linux/rtnetlink.h>
  53#include <net/dst.h>
  54#include <net/dst_metadata.h>
  55#include <net/xfrm.h>
  56#include <net/netevent.h>
  57#include <net/netlink.h>
  58#include <net/rtnh.h>
  59#include <net/lwtunnel.h>
  60#include <net/ip_tunnels.h>
  61#include <net/l3mdev.h>
  62#include <net/ip.h>
  63#include <linux/uaccess.h>
  64
  65#ifdef CONFIG_SYSCTL
  66#include <linux/sysctl.h>
  67#endif
  68
  69static int ip6_rt_type_to_error(u8 fib6_type);
 
  70
  71#define CREATE_TRACE_POINTS
  72#include <trace/events/fib6.h>
  73EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
  74#undef CREATE_TRACE_POINTS
  75
  76enum rt6_nud_state {
  77	RT6_NUD_FAIL_HARD = -3,
  78	RT6_NUD_FAIL_PROBE = -2,
  79	RT6_NUD_FAIL_DO_RR = -1,
  80	RT6_NUD_SUCCEED = 1
  81};
  82
 
 
  83static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  84static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  85static unsigned int	 ip6_mtu(const struct dst_entry *dst);
  86static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  87static void		ip6_dst_destroy(struct dst_entry *);
  88static void		ip6_dst_ifdown(struct dst_entry *,
  89				       struct net_device *dev, int how);
  90static int		 ip6_dst_gc(struct dst_ops *ops);
  91
  92static int		ip6_pkt_discard(struct sk_buff *skb);
  93static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  94static int		ip6_pkt_prohibit(struct sk_buff *skb);
  95static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  96static void		ip6_link_failure(struct sk_buff *skb);
  97static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  98					   struct sk_buff *skb, u32 mtu);
  99static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 100					struct sk_buff *skb);
 101static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 102			   int strict);
 103static size_t rt6_nlmsg_size(struct fib6_info *f6i);
 104static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 105			 struct fib6_info *rt, struct dst_entry *dst,
 106			 struct in6_addr *dest, struct in6_addr *src,
 107			 int iif, int type, u32 portid, u32 seq,
 108			 unsigned int flags);
 109static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
 110					   const struct in6_addr *daddr,
 111					   const struct in6_addr *saddr);
 112
 113#ifdef CONFIG_IPV6_ROUTE_INFO
 114static struct fib6_info *rt6_add_route_info(struct net *net,
 115					   const struct in6_addr *prefix, int prefixlen,
 116					   const struct in6_addr *gwaddr,
 117					   struct net_device *dev,
 118					   unsigned int pref);
 119static struct fib6_info *rt6_get_route_info(struct net *net,
 120					   const struct in6_addr *prefix, int prefixlen,
 121					   const struct in6_addr *gwaddr,
 122					   struct net_device *dev);
 123#endif
 124
 125struct uncached_list {
 126	spinlock_t		lock;
 127	struct list_head	head;
 128};
 129
 130static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 131
 132void rt6_uncached_list_add(struct rt6_info *rt)
 133{
 134	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 135
 136	rt->rt6i_uncached_list = ul;
 137
 138	spin_lock_bh(&ul->lock);
 139	list_add_tail(&rt->rt6i_uncached, &ul->head);
 140	spin_unlock_bh(&ul->lock);
 141}
 142
 143void rt6_uncached_list_del(struct rt6_info *rt)
 144{
 145	if (!list_empty(&rt->rt6i_uncached)) {
 146		struct uncached_list *ul = rt->rt6i_uncached_list;
 147		struct net *net = dev_net(rt->dst.dev);
 148
 149		spin_lock_bh(&ul->lock);
 150		list_del(&rt->rt6i_uncached);
 151		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
 152		spin_unlock_bh(&ul->lock);
 153	}
 154}
 155
 156static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 157{
 158	struct net_device *loopback_dev = net->loopback_dev;
 159	int cpu;
 160
 161	if (dev == loopback_dev)
 162		return;
 
 163
 164	for_each_possible_cpu(cpu) {
 165		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 166		struct rt6_info *rt;
 167
 168		spin_lock_bh(&ul->lock);
 169		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
 170			struct inet6_dev *rt_idev = rt->rt6i_idev;
 171			struct net_device *rt_dev = rt->dst.dev;
 172
 173			if (rt_idev->dev == dev) {
 174				rt->rt6i_idev = in6_dev_get(loopback_dev);
 175				in6_dev_put(rt_idev);
 176			}
 177
 178			if (rt_dev == dev) {
 179				rt->dst.dev = blackhole_netdev;
 180				dev_hold(rt->dst.dev);
 181				dev_put(rt_dev);
 182			}
 183		}
 184		spin_unlock_bh(&ul->lock);
 185	}
 
 186}
 187
 188static inline const void *choose_neigh_daddr(const struct in6_addr *p,
 189					     struct sk_buff *skb,
 190					     const void *daddr)
 191{
 192	if (!ipv6_addr_any(p))
 193		return (const void *) p;
 194	else if (skb)
 195		return &ipv6_hdr(skb)->daddr;
 196	return daddr;
 197}
 198
 199struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
 200				   struct net_device *dev,
 201				   struct sk_buff *skb,
 202				   const void *daddr)
 203{
 204	struct neighbour *n;
 205
 206	daddr = choose_neigh_daddr(gw, skb, daddr);
 207	n = __ipv6_neigh_lookup(dev, daddr);
 208	if (n)
 209		return n;
 210
 211	n = neigh_create(&nd_tbl, daddr, dev);
 212	return IS_ERR(n) ? NULL : n;
 213}
 214
 215static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
 216					      struct sk_buff *skb,
 217					      const void *daddr)
 218{
 219	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
 220
 221	return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
 222				dst->dev, skb, daddr);
 223}
 224
 225static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 226{
 227	struct net_device *dev = dst->dev;
 228	struct rt6_info *rt = (struct rt6_info *)dst;
 229
 230	daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
 231	if (!daddr)
 232		return;
 233	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
 234		return;
 235	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
 236		return;
 237	__ipv6_confirm_neigh(dev, daddr);
 238}
 239
 240static struct dst_ops ip6_dst_ops_template = {
 241	.family			=	AF_INET6,
 
 242	.gc			=	ip6_dst_gc,
 243	.gc_thresh		=	1024,
 244	.check			=	ip6_dst_check,
 245	.default_advmss		=	ip6_default_advmss,
 246	.mtu			=	ip6_mtu,
 247	.cow_metrics		=	dst_cow_metrics_generic,
 248	.destroy		=	ip6_dst_destroy,
 249	.ifdown			=	ip6_dst_ifdown,
 250	.negative_advice	=	ip6_negative_advice,
 251	.link_failure		=	ip6_link_failure,
 252	.update_pmtu		=	ip6_rt_update_pmtu,
 253	.redirect		=	rt6_do_redirect,
 254	.local_out		=	__ip6_local_out,
 255	.neigh_lookup		=	ip6_dst_neigh_lookup,
 256	.confirm_neigh		=	ip6_confirm_neigh,
 257};
 258
 259static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 260{
 261	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 262
 263	return mtu ? : dst->dev->mtu;
 264}
 265
 266static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 267					 struct sk_buff *skb, u32 mtu)
 268{
 269}
 270
 271static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 272				      struct sk_buff *skb)
 273{
 
 274}
 275
 276static struct dst_ops ip6_dst_blackhole_ops = {
 277	.family			=	AF_INET6,
 
 278	.destroy		=	ip6_dst_destroy,
 279	.check			=	ip6_dst_check,
 280	.mtu			=	ip6_blackhole_mtu,
 281	.default_advmss		=	ip6_default_advmss,
 282	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 283	.redirect		=	ip6_rt_blackhole_redirect,
 284	.cow_metrics		=	dst_cow_metrics_generic,
 285	.neigh_lookup		=	ip6_dst_neigh_lookup,
 286};
 287
 288static const u32 ip6_template_metrics[RTAX_MAX] = {
 289	[RTAX_HOPLIMIT - 1] = 0,
 290};
 291
 292static const struct fib6_info fib6_null_entry_template = {
 293	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 294	.fib6_protocol  = RTPROT_KERNEL,
 295	.fib6_metric	= ~(u32)0,
 296	.fib6_ref	= REFCOUNT_INIT(1),
 297	.fib6_type	= RTN_UNREACHABLE,
 298	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
 299};
 300
 301static const struct rt6_info ip6_null_entry_template = {
 302	.dst = {
 303		.__refcnt	= ATOMIC_INIT(1),
 304		.__use		= 1,
 305		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 306		.error		= -ENETUNREACH,
 307		.input		= ip6_pkt_discard,
 308		.output		= ip6_pkt_discard_out,
 309	},
 310	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 311};
 312
 313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 314
 315static const struct rt6_info ip6_prohibit_entry_template = {
 
 
 
 316	.dst = {
 317		.__refcnt	= ATOMIC_INIT(1),
 318		.__use		= 1,
 319		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 320		.error		= -EACCES,
 321		.input		= ip6_pkt_prohibit,
 322		.output		= ip6_pkt_prohibit_out,
 323	},
 324	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 325};
 326
 327static const struct rt6_info ip6_blk_hole_entry_template = {
 328	.dst = {
 329		.__refcnt	= ATOMIC_INIT(1),
 330		.__use		= 1,
 331		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 332		.error		= -EINVAL,
 333		.input		= dst_discard,
 334		.output		= dst_discard_out,
 335	},
 336	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 337};
 338
 339#endif
 340
 341static void rt6_info_init(struct rt6_info *rt)
 342{
 343	struct dst_entry *dst = &rt->dst;
 344
 345	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 346	INIT_LIST_HEAD(&rt->rt6i_uncached);
 347}
 348
 349/* allocate dst with ip6_dst_ops */
 350struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
 351			       int flags)
 
 352{
 353	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 354					1, DST_OBSOLETE_FORCE_CHK, flags);
 355
 356	if (rt) {
 357		rt6_info_init(rt);
 358		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
 359	}
 360
 361	return rt;
 362}
 363EXPORT_SYMBOL(ip6_dst_alloc);
 364
 365static void ip6_dst_destroy(struct dst_entry *dst)
 366{
 367	struct rt6_info *rt = (struct rt6_info *)dst;
 368	struct fib6_info *from;
 369	struct inet6_dev *idev;
 370
 371	ip_dst_metrics_put(dst);
 372	rt6_uncached_list_del(rt);
 373
 374	idev = rt->rt6i_idev;
 375	if (idev) {
 376		rt->rt6i_idev = NULL;
 377		in6_dev_put(idev);
 378	}
 
 
 
 
 
 379
 380	from = xchg((__force struct fib6_info **)&rt->from, NULL);
 381	fib6_info_release(from);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 382}
 383
 384static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 385			   int how)
 386{
 387	struct rt6_info *rt = (struct rt6_info *)dst;
 388	struct inet6_dev *idev = rt->rt6i_idev;
 389	struct net_device *loopback_dev =
 390		dev_net(dev)->loopback_dev;
 391
 392	if (idev && idev->dev != loopback_dev) {
 393		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
 394		if (loopback_idev) {
 
 395			rt->rt6i_idev = loopback_idev;
 396			in6_dev_put(idev);
 397		}
 398	}
 399}
 400
 401static bool __rt6_check_expired(const struct rt6_info *rt)
 402{
 403	if (rt->rt6i_flags & RTF_EXPIRES)
 404		return time_after(jiffies, rt->dst.expires);
 405	else
 406		return false;
 407}
 408
 409static bool rt6_check_expired(const struct rt6_info *rt)
 410{
 411	struct fib6_info *from;
 412
 413	from = rcu_dereference(rt->from);
 414
 415	if (rt->rt6i_flags & RTF_EXPIRES) {
 416		if (time_after(jiffies, rt->dst.expires))
 417			return true;
 418	} else if (from) {
 419		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
 420			fib6_check_expired(from);
 421	}
 422	return false;
 423}
 424
 425void fib6_select_path(const struct net *net, struct fib6_result *res,
 426		      struct flowi6 *fl6, int oif, bool have_oif_match,
 427		      const struct sk_buff *skb, int strict)
 428{
 429	struct fib6_info *sibling, *next_sibling;
 430	struct fib6_info *match = res->f6i;
 431
 432	if ((!match->fib6_nsiblings && !match->nh) || have_oif_match)
 433		goto out;
 434
 435	/* We might have already computed the hash for ICMPv6 errors. In such
 436	 * case it will always be non-zero. Otherwise now is the time to do it.
 437	 */
 438	if (!fl6->mp_hash &&
 439	    (!match->nh || nexthop_is_multipath(match->nh)))
 440		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 441
 442	if (unlikely(match->nh)) {
 443		nexthop_path_fib6_result(res, fl6->mp_hash);
 444		return;
 445	}
 446
 447	if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
 448		goto out;
 449
 450	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
 451				 fib6_siblings) {
 452		const struct fib6_nh *nh = sibling->fib6_nh;
 453		int nh_upper_bound;
 454
 455		nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
 456		if (fl6->mp_hash > nh_upper_bound)
 457			continue;
 458		if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
 459			break;
 460		match = sibling;
 461		break;
 462	}
 463
 464out:
 465	res->f6i = match;
 466	res->nh = match->fib6_nh;
 467}
 468
 469/*
 470 *	Route lookup. rcu_read_lock() should be held.
 471 */
 472
 473static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
 474			       const struct in6_addr *saddr, int oif, int flags)
 475{
 476	const struct net_device *dev;
 477
 478	if (nh->fib_nh_flags & RTNH_F_DEAD)
 479		return false;
 480
 481	dev = nh->fib_nh_dev;
 482	if (oif) {
 483		if (dev->ifindex == oif)
 484			return true;
 485	} else {
 486		if (ipv6_chk_addr(net, saddr, dev,
 487				  flags & RT6_LOOKUP_F_IFACE))
 488			return true;
 489	}
 490
 491	return false;
 492}
 493
 494struct fib6_nh_dm_arg {
 495	struct net		*net;
 496	const struct in6_addr	*saddr;
 497	int			oif;
 498	int			flags;
 499	struct fib6_nh		*nh;
 500};
 501
 502static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
 503{
 504	struct fib6_nh_dm_arg *arg = _arg;
 505
 506	arg->nh = nh;
 507	return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
 508				  arg->flags);
 509}
 510
 511/* returns fib6_nh from nexthop or NULL */
 512static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
 513					struct fib6_result *res,
 514					const struct in6_addr *saddr,
 515					int oif, int flags)
 516{
 517	struct fib6_nh_dm_arg arg = {
 518		.net   = net,
 519		.saddr = saddr,
 520		.oif   = oif,
 521		.flags = flags,
 522	};
 523
 524	if (nexthop_is_blackhole(nh))
 525		return NULL;
 526
 527	if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
 528		return arg.nh;
 529
 530	return NULL;
 531}
 532
 533static void rt6_device_match(struct net *net, struct fib6_result *res,
 534			     const struct in6_addr *saddr, int oif, int flags)
 535{
 536	struct fib6_info *f6i = res->f6i;
 537	struct fib6_info *spf6i;
 538	struct fib6_nh *nh;
 539
 540	if (!oif && ipv6_addr_any(saddr)) {
 541		if (unlikely(f6i->nh)) {
 542			nh = nexthop_fib6_nh(f6i->nh);
 543			if (nexthop_is_blackhole(f6i->nh))
 544				goto out_blackhole;
 545		} else {
 546			nh = f6i->fib6_nh;
 547		}
 548		if (!(nh->fib_nh_flags & RTNH_F_DEAD))
 549			goto out;
 550	}
 551
 552	for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
 553		bool matched = false;
 554
 555		if (unlikely(spf6i->nh)) {
 556			nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
 557					      oif, flags);
 558			if (nh)
 559				matched = true;
 560		} else {
 561			nh = spf6i->fib6_nh;
 562			if (__rt6_device_match(net, nh, saddr, oif, flags))
 563				matched = true;
 564		}
 565		if (matched) {
 566			res->f6i = spf6i;
 567			goto out;
 568		}
 569	}
 570
 571	if (oif && flags & RT6_LOOKUP_F_IFACE) {
 572		res->f6i = net->ipv6.fib6_null_entry;
 573		nh = res->f6i->fib6_nh;
 574		goto out;
 575	}
 576
 577	if (unlikely(f6i->nh)) {
 578		nh = nexthop_fib6_nh(f6i->nh);
 579		if (nexthop_is_blackhole(f6i->nh))
 580			goto out_blackhole;
 581	} else {
 582		nh = f6i->fib6_nh;
 583	}
 584
 585	if (nh->fib_nh_flags & RTNH_F_DEAD) {
 586		res->f6i = net->ipv6.fib6_null_entry;
 587		nh = res->f6i->fib6_nh;
 588	}
 589out:
 590	res->nh = nh;
 591	res->fib6_type = res->f6i->fib6_type;
 592	res->fib6_flags = res->f6i->fib6_flags;
 593	return;
 594
 595out_blackhole:
 596	res->fib6_flags |= RTF_REJECT;
 597	res->fib6_type = RTN_BLACKHOLE;
 598	res->nh = nh;
 599}
 600
 601#ifdef CONFIG_IPV6_ROUTER_PREF
 602struct __rt6_probe_work {
 603	struct work_struct work;
 604	struct in6_addr target;
 605	struct net_device *dev;
 606};
 607
 608static void rt6_probe_deferred(struct work_struct *w)
 609{
 610	struct in6_addr mcaddr;
 611	struct __rt6_probe_work *work =
 612		container_of(w, struct __rt6_probe_work, work);
 613
 614	addrconf_addr_solict_mult(&work->target, &mcaddr);
 615	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
 616	dev_put(work->dev);
 617	kfree(work);
 618}
 619
 620static void rt6_probe(struct fib6_nh *fib6_nh)
 621{
 622	struct __rt6_probe_work *work = NULL;
 623	const struct in6_addr *nh_gw;
 624	unsigned long last_probe;
 625	struct neighbour *neigh;
 626	struct net_device *dev;
 627	struct inet6_dev *idev;
 628
 629	/*
 630	 * Okay, this does not seem to be appropriate
 631	 * for now, however, we need to check if it
 632	 * is really so; aka Router Reachability Probing.
 633	 *
 634	 * Router Reachability Probe MUST be rate-limited
 635	 * to no more than one per minute.
 636	 */
 637	if (!fib6_nh->fib_nh_gw_family)
 638		return;
 639
 640	nh_gw = &fib6_nh->fib_nh_gw6;
 641	dev = fib6_nh->fib_nh_dev;
 642	rcu_read_lock_bh();
 643	last_probe = READ_ONCE(fib6_nh->last_probe);
 644	idev = __in6_dev_get(dev);
 645	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 646	if (neigh) {
 647		if (neigh->nud_state & NUD_VALID)
 648			goto out;
 649
 650		write_lock(&neigh->lock);
 651		if (!(neigh->nud_state & NUD_VALID) &&
 652		    time_after(jiffies,
 653			       neigh->updated + idev->cnf.rtr_probe_interval)) {
 654			work = kmalloc(sizeof(*work), GFP_ATOMIC);
 655			if (work)
 656				__neigh_set_probe_once(neigh);
 657		}
 658		write_unlock(&neigh->lock);
 659	} else if (time_after(jiffies, last_probe +
 660				       idev->cnf.rtr_probe_interval)) {
 661		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 662	}
 663
 664	if (!work || cmpxchg(&fib6_nh->last_probe,
 665			     last_probe, jiffies) != last_probe) {
 666		kfree(work);
 667	} else {
 668		INIT_WORK(&work->work, rt6_probe_deferred);
 669		work->target = *nh_gw;
 670		dev_hold(dev);
 671		work->dev = dev;
 672		schedule_work(&work->work);
 673	}
 674
 675out:
 676	rcu_read_unlock_bh();
 677}
 678#else
 679static inline void rt6_probe(struct fib6_nh *fib6_nh)
 680{
 681}
 682#endif
 683
 684/*
 685 * Default Router Selection (RFC 2461 6.3.6)
 686 */
 687static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
 
 
 
 
 
 
 
 
 
 
 
 688{
 689	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 690	struct neighbour *neigh;
 
 691
 692	rcu_read_lock_bh();
 693	neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
 694					  &fib6_nh->fib_nh_gw6);
 695	if (neigh) {
 696		read_lock(&neigh->lock);
 
 
 697		if (neigh->nud_state & NUD_VALID)
 698			ret = RT6_NUD_SUCCEED;
 699#ifdef CONFIG_IPV6_ROUTER_PREF
 700		else if (!(neigh->nud_state & NUD_FAILED))
 701			ret = RT6_NUD_SUCCEED;
 702		else
 703			ret = RT6_NUD_FAIL_PROBE;
 704#endif
 705		read_unlock(&neigh->lock);
 706	} else {
 707		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 708		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 709	}
 710	rcu_read_unlock_bh();
 711
 712	return ret;
 713}
 714
 715static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 716			   int strict)
 717{
 718	int m = 0;
 719
 720	if (!oif || nh->fib_nh_dev->ifindex == oif)
 721		m = 2;
 722
 
 723	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 724		return RT6_NUD_FAIL_HARD;
 725#ifdef CONFIG_IPV6_ROUTER_PREF
 726	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
 727#endif
 728	if ((strict & RT6_LOOKUP_F_REACHABLE) &&
 729	    !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
 730		int n = rt6_check_neigh(nh);
 731		if (n < 0)
 732			return n;
 733	}
 734	return m;
 735}
 736
 737static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
 738		       int oif, int strict, int *mpri, bool *do_rr)
 739{
 740	bool match_do_rr = false;
 741	bool rc = false;
 742	int m;
 743
 744	if (nh->fib_nh_flags & RTNH_F_DEAD)
 745		goto out;
 746
 747	if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
 748	    nh->fib_nh_flags & RTNH_F_LINKDOWN &&
 749	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 750		goto out;
 751
 752	m = rt6_score_route(nh, fib6_flags, oif, strict);
 753	if (m == RT6_NUD_FAIL_DO_RR) {
 754		match_do_rr = true;
 755		m = 0; /* lowest valid score */
 756	} else if (m == RT6_NUD_FAIL_HARD) {
 757		goto out;
 758	}
 759
 760	if (strict & RT6_LOOKUP_F_REACHABLE)
 761		rt6_probe(nh);
 762
 763	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
 764	if (m > *mpri) {
 765		*do_rr = match_do_rr;
 
 766		*mpri = m;
 767		rc = true;
 
 
 768	}
 769out:
 770	return rc;
 771}
 772
 773struct fib6_nh_frl_arg {
 774	u32		flags;
 775	int		oif;
 776	int		strict;
 777	int		*mpri;
 778	bool		*do_rr;
 779	struct fib6_nh	*nh;
 780};
 781
 782static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
 783{
 784	struct fib6_nh_frl_arg *arg = _arg;
 785
 786	arg->nh = nh;
 787	return find_match(nh, arg->flags, arg->oif, arg->strict,
 788			  arg->mpri, arg->do_rr);
 789}
 790
 791static void __find_rr_leaf(struct fib6_info *f6i_start,
 792			   struct fib6_info *nomatch, u32 metric,
 793			   struct fib6_result *res, struct fib6_info **cont,
 794			   int oif, int strict, bool *do_rr, int *mpri)
 795{
 796	struct fib6_info *f6i;
 797
 798	for (f6i = f6i_start;
 799	     f6i && f6i != nomatch;
 800	     f6i = rcu_dereference(f6i->fib6_next)) {
 801		bool matched = false;
 802		struct fib6_nh *nh;
 803
 804		if (cont && f6i->fib6_metric != metric) {
 805			*cont = f6i;
 806			return;
 807		}
 808
 809		if (fib6_check_expired(f6i))
 810			continue;
 811
 812		if (unlikely(f6i->nh)) {
 813			struct fib6_nh_frl_arg arg = {
 814				.flags  = f6i->fib6_flags,
 815				.oif    = oif,
 816				.strict = strict,
 817				.mpri   = mpri,
 818				.do_rr  = do_rr
 819			};
 820
 821			if (nexthop_is_blackhole(f6i->nh)) {
 822				res->fib6_flags = RTF_REJECT;
 823				res->fib6_type = RTN_BLACKHOLE;
 824				res->f6i = f6i;
 825				res->nh = nexthop_fib6_nh(f6i->nh);
 826				return;
 827			}
 828			if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
 829						     &arg)) {
 830				matched = true;
 831				nh = arg.nh;
 832			}
 833		} else {
 834			nh = f6i->fib6_nh;
 835			if (find_match(nh, f6i->fib6_flags, oif, strict,
 836				       mpri, do_rr))
 837				matched = true;
 838		}
 839		if (matched) {
 840			res->f6i = f6i;
 841			res->nh = nh;
 842			res->fib6_flags = f6i->fib6_flags;
 843			res->fib6_type = f6i->fib6_type;
 844		}
 845	}
 846}
 847
 848static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
 849			 struct fib6_info *rr_head, int oif, int strict,
 850			 bool *do_rr, struct fib6_result *res)
 851{
 852	u32 metric = rr_head->fib6_metric;
 853	struct fib6_info *cont = NULL;
 854	int mpri = -1;
 855
 856	__find_rr_leaf(rr_head, NULL, metric, res, &cont,
 857		       oif, strict, do_rr, &mpri);
 
 
 
 
 
 858
 859	__find_rr_leaf(leaf, rr_head, metric, res, &cont,
 860		       oif, strict, do_rr, &mpri);
 861
 862	if (res->f6i || !cont)
 863		return;
 864
 865	__find_rr_leaf(cont, NULL, metric, res, NULL,
 866		       oif, strict, do_rr, &mpri);
 867}
 868
 869static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
 870		       struct fib6_result *res, int strict)
 871{
 872	struct fib6_info *leaf = rcu_dereference(fn->leaf);
 873	struct fib6_info *rt0;
 874	bool do_rr = false;
 875	int key_plen;
 876
 877	/* make sure this function or its helpers sets f6i */
 878	res->f6i = NULL;
 879
 880	if (!leaf || leaf == net->ipv6.fib6_null_entry)
 881		goto out;
 882
 883	rt0 = rcu_dereference(fn->rr_ptr);
 884	if (!rt0)
 885		rt0 = leaf;
 886
 887	/* Double check to make sure fn is not an intermediate node
 888	 * and fn->leaf does not points to its child's leaf
 889	 * (This might happen if all routes under fn are deleted from
 890	 * the tree and fib6_repair_tree() is called on the node.)
 891	 */
 892	key_plen = rt0->fib6_dst.plen;
 893#ifdef CONFIG_IPV6_SUBTREES
 894	if (rt0->fib6_src.plen)
 895		key_plen = rt0->fib6_src.plen;
 896#endif
 897	if (fn->fn_bit != key_plen)
 898		goto out;
 899
 900	find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
 901	if (do_rr) {
 902		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
 903
 904		/* no entries matched; do round-robin */
 905		if (!next || next->fib6_metric != rt0->fib6_metric)
 906			next = leaf;
 907
 908		if (next != rt0) {
 909			spin_lock_bh(&leaf->fib6_table->tb6_lock);
 910			/* make sure next is not being deleted from the tree */
 911			if (next->fib6_node)
 912				rcu_assign_pointer(fn->rr_ptr, next);
 913			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
 914		}
 915	}
 916
 917out:
 918	if (!res->f6i) {
 919		res->f6i = net->ipv6.fib6_null_entry;
 920		res->nh = res->f6i->fib6_nh;
 921		res->fib6_flags = res->f6i->fib6_flags;
 922		res->fib6_type = res->f6i->fib6_type;
 923	}
 924}
 925
 926static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
 927{
 928	return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
 929	       res->nh->fib_nh_gw_family;
 930}
 931
 932#ifdef CONFIG_IPV6_ROUTE_INFO
 933int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 934		  const struct in6_addr *gwaddr)
 935{
 936	struct net *net = dev_net(dev);
 937	struct route_info *rinfo = (struct route_info *) opt;
 938	struct in6_addr prefix_buf, *prefix;
 939	unsigned int pref;
 940	unsigned long lifetime;
 941	struct fib6_info *rt;
 942
 943	if (len < sizeof(struct route_info)) {
 944		return -EINVAL;
 945	}
 946
 947	/* Sanity check for prefix_len and length */
 948	if (rinfo->length > 3) {
 949		return -EINVAL;
 950	} else if (rinfo->prefix_len > 128) {
 951		return -EINVAL;
 952	} else if (rinfo->prefix_len > 64) {
 953		if (rinfo->length < 2) {
 954			return -EINVAL;
 955		}
 956	} else if (rinfo->prefix_len > 0) {
 957		if (rinfo->length < 1) {
 958			return -EINVAL;
 959		}
 960	}
 961
 962	pref = rinfo->route_pref;
 963	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 964		return -EINVAL;
 965
 966	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 967
 968	if (rinfo->length == 3)
 969		prefix = (struct in6_addr *)rinfo->prefix;
 970	else {
 971		/* this function is safe */
 972		ipv6_addr_prefix(&prefix_buf,
 973				 (struct in6_addr *)rinfo->prefix,
 974				 rinfo->prefix_len);
 975		prefix = &prefix_buf;
 976	}
 977
 978	if (rinfo->prefix_len == 0)
 979		rt = rt6_get_dflt_router(net, gwaddr, dev);
 980	else
 981		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 982					gwaddr, dev);
 983
 984	if (rt && !lifetime) {
 985		ip6_del_rt(net, rt);
 986		rt = NULL;
 987	}
 988
 989	if (!rt && lifetime)
 990		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 991					dev, pref);
 992	else if (rt)
 993		rt->fib6_flags = RTF_ROUTEINFO |
 994				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 995
 996	if (rt) {
 997		if (!addrconf_finite_timeout(lifetime))
 998			fib6_clean_expires(rt);
 999		else
1000			fib6_set_expires(rt, jiffies + HZ * lifetime);
1001
1002		fib6_info_release(rt);
 
1003	}
1004	return 0;
1005}
1006#endif
1007
1008/*
1009 *	Misc support functions
1010 */
1011
1012/* called with rcu_lock held */
1013static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
1014{
1015	struct net_device *dev = res->nh->fib_nh_dev;
1016
1017	if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
1018		/* for copies of local routes, dst->dev needs to be the
1019		 * device if it is a master device, the master device if
1020		 * device is enslaved, and the loopback as the default
1021		 */
1022		if (netif_is_l3_slave(dev) &&
1023		    !rt6_need_strict(&res->f6i->fib6_dst.addr))
1024			dev = l3mdev_master_dev_rcu(dev);
1025		else if (!netif_is_l3_master(dev))
1026			dev = dev_net(dev)->loopback_dev;
1027		/* last case is netif_is_l3_master(dev) is true in which
1028		 * case we want dev returned to be dev
1029		 */
1030	}
1031
1032	return dev;
1033}
1034
1035static const int fib6_prop[RTN_MAX + 1] = {
1036	[RTN_UNSPEC]	= 0,
1037	[RTN_UNICAST]	= 0,
1038	[RTN_LOCAL]	= 0,
1039	[RTN_BROADCAST]	= 0,
1040	[RTN_ANYCAST]	= 0,
1041	[RTN_MULTICAST]	= 0,
1042	[RTN_BLACKHOLE]	= -EINVAL,
1043	[RTN_UNREACHABLE] = -EHOSTUNREACH,
1044	[RTN_PROHIBIT]	= -EACCES,
1045	[RTN_THROW]	= -EAGAIN,
1046	[RTN_NAT]	= -EINVAL,
1047	[RTN_XRESOLVE]	= -EINVAL,
1048};
1049
1050static int ip6_rt_type_to_error(u8 fib6_type)
1051{
1052	return fib6_prop[fib6_type];
1053}
1054
1055static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
1056{
1057	unsigned short flags = 0;
1058
1059	if (rt->dst_nocount)
1060		flags |= DST_NOCOUNT;
1061	if (rt->dst_nopolicy)
1062		flags |= DST_NOPOLICY;
1063	if (rt->dst_host)
1064		flags |= DST_HOST;
1065
1066	return flags;
1067}
1068
1069static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
1070{
1071	rt->dst.error = ip6_rt_type_to_error(fib6_type);
1072
1073	switch (fib6_type) {
1074	case RTN_BLACKHOLE:
1075		rt->dst.output = dst_discard_out;
1076		rt->dst.input = dst_discard;
1077		break;
1078	case RTN_PROHIBIT:
1079		rt->dst.output = ip6_pkt_prohibit_out;
1080		rt->dst.input = ip6_pkt_prohibit;
1081		break;
1082	case RTN_THROW:
1083	case RTN_UNREACHABLE:
1084	default:
1085		rt->dst.output = ip6_pkt_discard_out;
1086		rt->dst.input = ip6_pkt_discard;
1087		break;
1088	}
1089}
1090
1091static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
1092{
1093	struct fib6_info *f6i = res->f6i;
1094
1095	if (res->fib6_flags & RTF_REJECT) {
1096		ip6_rt_init_dst_reject(rt, res->fib6_type);
1097		return;
1098	}
1099
1100	rt->dst.error = 0;
1101	rt->dst.output = ip6_output;
1102
1103	if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
1104		rt->dst.input = ip6_input;
1105	} else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
1106		rt->dst.input = ip6_mc_input;
1107	} else {
1108		rt->dst.input = ip6_forward;
1109	}
1110
1111	if (res->nh->fib_nh_lws) {
1112		rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
1113		lwtunnel_set_redirect(&rt->dst);
1114	}
1115
1116	rt->dst.lastuse = jiffies;
1117}
1118
1119/* Caller must already hold reference to @from */
1120static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
1121{
1122	rt->rt6i_flags &= ~RTF_EXPIRES;
1123	rcu_assign_pointer(rt->from, from);
1124	ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
1125}
1126
1127/* Caller must already hold reference to f6i in result */
1128static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
1129{
1130	const struct fib6_nh *nh = res->nh;
1131	const struct net_device *dev = nh->fib_nh_dev;
1132	struct fib6_info *f6i = res->f6i;
1133
1134	ip6_rt_init_dst(rt, res);
1135
1136	rt->rt6i_dst = f6i->fib6_dst;
1137	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
1138	rt->rt6i_flags = res->fib6_flags;
1139	if (nh->fib_nh_gw_family) {
1140		rt->rt6i_gateway = nh->fib_nh_gw6;
1141		rt->rt6i_flags |= RTF_GATEWAY;
1142	}
1143	rt6_set_from(rt, f6i);
1144#ifdef CONFIG_IPV6_SUBTREES
1145	rt->rt6i_src = f6i->fib6_src;
1146#endif
1147}
1148
1149static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1150					struct in6_addr *saddr)
1151{
1152	struct fib6_node *pn, *sn;
1153	while (1) {
1154		if (fn->fn_flags & RTN_TL_ROOT)
1155			return NULL;
1156		pn = rcu_dereference(fn->parent);
1157		sn = FIB6_SUBTREE(pn);
1158		if (sn && sn != fn)
1159			fn = fib6_node_lookup(sn, NULL, saddr);
1160		else
1161			fn = pn;
1162		if (fn->fn_flags & RTN_RTINFO)
1163			return fn;
1164	}
1165}
1166
1167static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1168{
1169	struct rt6_info *rt = *prt;
1170
1171	if (dst_hold_safe(&rt->dst))
1172		return true;
1173	if (net) {
1174		rt = net->ipv6.ip6_null_entry;
1175		dst_hold(&rt->dst);
1176	} else {
1177		rt = NULL;
1178	}
1179	*prt = rt;
1180	return false;
1181}
1182
1183/* called with rcu_lock held */
1184static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
1185{
1186	struct net_device *dev = res->nh->fib_nh_dev;
1187	struct fib6_info *f6i = res->f6i;
1188	unsigned short flags;
1189	struct rt6_info *nrt;
1190
1191	if (!fib6_info_hold_safe(f6i))
1192		goto fallback;
1193
1194	flags = fib6_info_dst_flags(f6i);
1195	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1196	if (!nrt) {
1197		fib6_info_release(f6i);
1198		goto fallback;
1199	}
1200
1201	ip6_rt_copy_init(nrt, res);
1202	return nrt;
1203
1204fallback:
1205	nrt = dev_net(dev)->ipv6.ip6_null_entry;
1206	dst_hold(&nrt->dst);
1207	return nrt;
1208}
1209
1210static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1211					     struct fib6_table *table,
1212					     struct flowi6 *fl6,
1213					     const struct sk_buff *skb,
1214					     int flags)
1215{
1216	struct fib6_result res = {};
1217	struct fib6_node *fn;
1218	struct rt6_info *rt;
1219
1220	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1221		flags &= ~RT6_LOOKUP_F_IFACE;
1222
1223	rcu_read_lock();
1224	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1225restart:
1226	res.f6i = rcu_dereference(fn->leaf);
1227	if (!res.f6i)
1228		res.f6i = net->ipv6.fib6_null_entry;
1229	else
1230		rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
1231				 flags);
1232
1233	if (res.f6i == net->ipv6.fib6_null_entry) {
1234		fn = fib6_backtrack(fn, &fl6->saddr);
1235		if (fn)
1236			goto restart;
1237
1238		rt = net->ipv6.ip6_null_entry;
1239		dst_hold(&rt->dst);
1240		goto out;
1241	} else if (res.fib6_flags & RTF_REJECT) {
1242		goto do_create;
1243	}
1244
1245	fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1246			 fl6->flowi6_oif != 0, skb, flags);
1247
1248	/* Search through exception table */
1249	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
1250	if (rt) {
1251		if (ip6_hold_safe(net, &rt))
1252			dst_use_noref(&rt->dst, jiffies);
1253	} else {
1254do_create:
1255		rt = ip6_create_rt_rcu(&res);
1256	}
1257
1258out:
1259	trace_fib6_table_lookup(net, &res, table, fl6);
1260
1261	rcu_read_unlock();
1262
1263	return rt;
1264}
1265
1266struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1267				   const struct sk_buff *skb, int flags)
1268{
1269	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1270}
1271EXPORT_SYMBOL_GPL(ip6_route_lookup);
1272
1273struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1274			    const struct in6_addr *saddr, int oif,
1275			    const struct sk_buff *skb, int strict)
1276{
1277	struct flowi6 fl6 = {
1278		.flowi6_oif = oif,
1279		.daddr = *daddr,
1280	};
1281	struct dst_entry *dst;
1282	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1283
1284	if (saddr) {
1285		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1286		flags |= RT6_LOOKUP_F_HAS_SADDR;
1287	}
1288
1289	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1290	if (dst->error == 0)
1291		return (struct rt6_info *) dst;
1292
1293	dst_release(dst);
1294
1295	return NULL;
1296}
 
1297EXPORT_SYMBOL(rt6_lookup);
1298
1299/* ip6_ins_rt is called with FREE table->tb6_lock.
1300 * It takes new route entry, the addition fails by any reason the
1301 * route is released.
1302 * Caller must hold dst before calling it.
1303 */
1304
1305static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1306			struct netlink_ext_ack *extack)
1307{
1308	int err;
1309	struct fib6_table *table;
1310
1311	table = rt->fib6_table;
1312	spin_lock_bh(&table->tb6_lock);
1313	err = fib6_add(&table->tb6_root, rt, info, extack);
1314	spin_unlock_bh(&table->tb6_lock);
1315
1316	return err;
1317}
1318
1319int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1320{
1321	struct nl_info info = {	.nl_net = net, };
1322
1323	return __ip6_ins_rt(rt, &info, NULL);
 
1324}
1325
1326static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
1327					   const struct in6_addr *daddr,
1328					   const struct in6_addr *saddr)
1329{
1330	struct fib6_info *f6i = res->f6i;
1331	struct net_device *dev;
1332	struct rt6_info *rt;
1333
1334	/*
1335	 *	Clone the route.
1336	 */
1337
1338	if (!fib6_info_hold_safe(f6i))
1339		return NULL;
1340
1341	dev = ip6_rt_get_dev_rcu(res);
1342	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1343	if (!rt) {
1344		fib6_info_release(f6i);
1345		return NULL;
1346	}
1347
1348	ip6_rt_copy_init(rt, res);
1349	rt->rt6i_flags |= RTF_CACHE;
1350	rt->dst.flags |= DST_HOST;
1351	rt->rt6i_dst.addr = *daddr;
1352	rt->rt6i_dst.plen = 128;
 
 
 
1353
1354	if (!rt6_is_gw_or_nonexthop(res)) {
1355		if (f6i->fib6_dst.plen != 128 &&
1356		    ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
1357			rt->rt6i_flags |= RTF_ANYCAST;
1358#ifdef CONFIG_IPV6_SUBTREES
1359		if (rt->rt6i_src.plen && saddr) {
1360			rt->rt6i_src.addr = *saddr;
1361			rt->rt6i_src.plen = 128;
1362		}
1363#endif
1364	}
1365
1366	return rt;
1367}
1368
1369static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
1370{
1371	struct fib6_info *f6i = res->f6i;
1372	unsigned short flags = fib6_info_dst_flags(f6i);
1373	struct net_device *dev;
1374	struct rt6_info *pcpu_rt;
1375
1376	if (!fib6_info_hold_safe(f6i))
1377		return NULL;
1378
1379	rcu_read_lock();
1380	dev = ip6_rt_get_dev_rcu(res);
1381	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
1382	rcu_read_unlock();
1383	if (!pcpu_rt) {
1384		fib6_info_release(f6i);
1385		return NULL;
1386	}
1387	ip6_rt_copy_init(pcpu_rt, res);
1388	pcpu_rt->rt6i_flags |= RTF_PCPU;
1389	return pcpu_rt;
1390}
1391
1392/* It should be called with rcu_read_lock() acquired */
1393static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
1394{
1395	struct rt6_info *pcpu_rt;
1396
1397	pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
1398
1399	return pcpu_rt;
1400}
1401
1402static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1403					    const struct fib6_result *res)
1404{
1405	struct rt6_info *pcpu_rt, *prev, **p;
1406
1407	pcpu_rt = ip6_rt_pcpu_alloc(res);
1408	if (!pcpu_rt)
1409		return NULL;
1410
1411	p = this_cpu_ptr(res->nh->rt6i_pcpu);
1412	prev = cmpxchg(p, NULL, pcpu_rt);
1413	BUG_ON(prev);
1414
1415	if (res->f6i->fib6_destroying) {
1416		struct fib6_info *from;
1417
1418		from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
1419		fib6_info_release(from);
1420	}
1421
1422	return pcpu_rt;
1423}
1424
1425/* exception hash table implementation
1426 */
1427static DEFINE_SPINLOCK(rt6_exception_lock);
1428
1429/* Remove rt6_ex from hash table and free the memory
1430 * Caller must hold rt6_exception_lock
1431 */
1432static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1433				 struct rt6_exception *rt6_ex)
1434{
1435	struct fib6_info *from;
1436	struct net *net;
1437
1438	if (!bucket || !rt6_ex)
1439		return;
1440
1441	net = dev_net(rt6_ex->rt6i->dst.dev);
1442	net->ipv6.rt6_stats->fib_rt_cache--;
1443
1444	/* purge completely the exception to allow releasing the held resources:
1445	 * some [sk] cache may keep the dst around for unlimited time
1446	 */
1447	from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
1448	fib6_info_release(from);
1449	dst_dev_put(&rt6_ex->rt6i->dst);
1450
1451	hlist_del_rcu(&rt6_ex->hlist);
1452	dst_release(&rt6_ex->rt6i->dst);
1453	kfree_rcu(rt6_ex, rcu);
1454	WARN_ON_ONCE(!bucket->depth);
1455	bucket->depth--;
1456}
1457
1458/* Remove oldest rt6_ex in bucket and free the memory
1459 * Caller must hold rt6_exception_lock
1460 */
1461static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1462{
1463	struct rt6_exception *rt6_ex, *oldest = NULL;
1464
1465	if (!bucket)
1466		return;
1467
1468	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1469		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1470			oldest = rt6_ex;
1471	}
1472	rt6_remove_exception(bucket, oldest);
1473}
1474
1475static u32 rt6_exception_hash(const struct in6_addr *dst,
1476			      const struct in6_addr *src)
1477{
1478	static u32 seed __read_mostly;
1479	u32 val;
1480
1481	net_get_random_once(&seed, sizeof(seed));
1482	val = jhash(dst, sizeof(*dst), seed);
1483
1484#ifdef CONFIG_IPV6_SUBTREES
1485	if (src)
1486		val = jhash(src, sizeof(*src), val);
1487#endif
1488	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1489}
1490
1491/* Helper function to find the cached rt in the hash table
1492 * and update bucket pointer to point to the bucket for this
1493 * (daddr, saddr) pair
1494 * Caller must hold rt6_exception_lock
1495 */
1496static struct rt6_exception *
1497__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1498			      const struct in6_addr *daddr,
1499			      const struct in6_addr *saddr)
1500{
1501	struct rt6_exception *rt6_ex;
1502	u32 hval;
1503
1504	if (!(*bucket) || !daddr)
1505		return NULL;
1506
1507	hval = rt6_exception_hash(daddr, saddr);
1508	*bucket += hval;
1509
1510	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1511		struct rt6_info *rt6 = rt6_ex->rt6i;
1512		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1513
1514#ifdef CONFIG_IPV6_SUBTREES
1515		if (matched && saddr)
1516			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1517#endif
1518		if (matched)
1519			return rt6_ex;
1520	}
1521	return NULL;
1522}
1523
1524/* Helper function to find the cached rt in the hash table
1525 * and update bucket pointer to point to the bucket for this
1526 * (daddr, saddr) pair
1527 * Caller must hold rcu_read_lock()
1528 */
1529static struct rt6_exception *
1530__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1531			 const struct in6_addr *daddr,
1532			 const struct in6_addr *saddr)
1533{
1534	struct rt6_exception *rt6_ex;
1535	u32 hval;
1536
1537	WARN_ON_ONCE(!rcu_read_lock_held());
1538
1539	if (!(*bucket) || !daddr)
1540		return NULL;
1541
1542	hval = rt6_exception_hash(daddr, saddr);
1543	*bucket += hval;
1544
1545	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1546		struct rt6_info *rt6 = rt6_ex->rt6i;
1547		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1548
1549#ifdef CONFIG_IPV6_SUBTREES
1550		if (matched && saddr)
1551			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1552#endif
1553		if (matched)
1554			return rt6_ex;
1555	}
1556	return NULL;
1557}
1558
1559static unsigned int fib6_mtu(const struct fib6_result *res)
1560{
1561	const struct fib6_nh *nh = res->nh;
1562	unsigned int mtu;
1563
1564	if (res->f6i->fib6_pmtu) {
1565		mtu = res->f6i->fib6_pmtu;
1566	} else {
1567		struct net_device *dev = nh->fib_nh_dev;
1568		struct inet6_dev *idev;
1569
1570		rcu_read_lock();
1571		idev = __in6_dev_get(dev);
1572		mtu = idev->cnf.mtu6;
1573		rcu_read_unlock();
1574	}
1575
1576	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1577
1578	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
1579}
1580
1581#define FIB6_EXCEPTION_BUCKET_FLUSHED  0x1UL
1582
1583/* used when the flushed bit is not relevant, only access to the bucket
1584 * (ie., all bucket users except rt6_insert_exception);
1585 *
1586 * called under rcu lock; sometimes called with rt6_exception_lock held
1587 */
1588static
1589struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
1590						       spinlock_t *lock)
1591{
1592	struct rt6_exception_bucket *bucket;
1593
1594	if (lock)
1595		bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1596						   lockdep_is_held(lock));
1597	else
1598		bucket = rcu_dereference(nh->rt6i_exception_bucket);
1599
1600	/* remove bucket flushed bit if set */
1601	if (bucket) {
1602		unsigned long p = (unsigned long)bucket;
1603
1604		p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
1605		bucket = (struct rt6_exception_bucket *)p;
1606	}
1607
1608	return bucket;
1609}
1610
1611static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
1612{
1613	unsigned long p = (unsigned long)bucket;
1614
1615	return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
1616}
1617
1618/* called with rt6_exception_lock held */
1619static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
1620					      spinlock_t *lock)
1621{
1622	struct rt6_exception_bucket *bucket;
1623	unsigned long p;
1624
1625	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1626					   lockdep_is_held(lock));
1627
1628	p = (unsigned long)bucket;
1629	p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
1630	bucket = (struct rt6_exception_bucket *)p;
1631	rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1632}
1633
1634static int rt6_insert_exception(struct rt6_info *nrt,
1635				const struct fib6_result *res)
1636{
1637	struct net *net = dev_net(nrt->dst.dev);
1638	struct rt6_exception_bucket *bucket;
1639	struct fib6_info *f6i = res->f6i;
1640	struct in6_addr *src_key = NULL;
1641	struct rt6_exception *rt6_ex;
1642	struct fib6_nh *nh = res->nh;
1643	int err = 0;
1644
1645	spin_lock_bh(&rt6_exception_lock);
1646
1647	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1648					  lockdep_is_held(&rt6_exception_lock));
1649	if (!bucket) {
1650		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1651				 GFP_ATOMIC);
1652		if (!bucket) {
1653			err = -ENOMEM;
1654			goto out;
1655		}
1656		rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1657	} else if (fib6_nh_excptn_bucket_flushed(bucket)) {
1658		err = -EINVAL;
1659		goto out;
1660	}
1661
1662#ifdef CONFIG_IPV6_SUBTREES
1663	/* fib6_src.plen != 0 indicates f6i is in subtree
1664	 * and exception table is indexed by a hash of
1665	 * both fib6_dst and fib6_src.
1666	 * Otherwise, the exception table is indexed by
1667	 * a hash of only fib6_dst.
1668	 */
1669	if (f6i->fib6_src.plen)
1670		src_key = &nrt->rt6i_src.addr;
1671#endif
1672	/* rt6_mtu_change() might lower mtu on f6i.
1673	 * Only insert this exception route if its mtu
1674	 * is less than f6i's mtu value.
1675	 */
1676	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
1677		err = -EINVAL;
1678		goto out;
1679	}
1680
1681	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1682					       src_key);
1683	if (rt6_ex)
1684		rt6_remove_exception(bucket, rt6_ex);
1685
1686	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1687	if (!rt6_ex) {
1688		err = -ENOMEM;
1689		goto out;
1690	}
1691	rt6_ex->rt6i = nrt;
1692	rt6_ex->stamp = jiffies;
1693	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1694	bucket->depth++;
1695	net->ipv6.rt6_stats->fib_rt_cache++;
1696
1697	if (bucket->depth > FIB6_MAX_DEPTH)
1698		rt6_exception_remove_oldest(bucket);
1699
1700out:
1701	spin_unlock_bh(&rt6_exception_lock);
1702
1703	/* Update fn->fn_sernum to invalidate all cached dst */
1704	if (!err) {
1705		spin_lock_bh(&f6i->fib6_table->tb6_lock);
1706		fib6_update_sernum(net, f6i);
1707		spin_unlock_bh(&f6i->fib6_table->tb6_lock);
1708		fib6_force_start_gc(net);
1709	}
1710
1711	return err;
1712}
1713
1714static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
1715{
1716	struct rt6_exception_bucket *bucket;
1717	struct rt6_exception *rt6_ex;
1718	struct hlist_node *tmp;
1719	int i;
1720
1721	spin_lock_bh(&rt6_exception_lock);
1722
1723	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1724	if (!bucket)
1725		goto out;
1726
1727	/* Prevent rt6_insert_exception() to recreate the bucket list */
1728	if (!from)
1729		fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
1730
1731	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1732		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
1733			if (!from ||
1734			    rcu_access_pointer(rt6_ex->rt6i->from) == from)
1735				rt6_remove_exception(bucket, rt6_ex);
1736		}
1737		WARN_ON_ONCE(!from && bucket->depth);
1738		bucket++;
1739	}
1740out:
1741	spin_unlock_bh(&rt6_exception_lock);
1742}
1743
1744static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
1745{
1746	struct fib6_info *f6i = arg;
1747
1748	fib6_nh_flush_exceptions(nh, f6i);
1749
1750	return 0;
1751}
1752
1753void rt6_flush_exceptions(struct fib6_info *f6i)
 
1754{
1755	if (f6i->nh)
1756		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
1757					 f6i);
1758	else
1759		fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
1760}
1761
1762/* Find cached rt in the hash table inside passed in rt
1763 * Caller has to hold rcu_read_lock()
1764 */
1765static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
1766					   const struct in6_addr *daddr,
1767					   const struct in6_addr *saddr)
1768{
1769	const struct in6_addr *src_key = NULL;
1770	struct rt6_exception_bucket *bucket;
1771	struct rt6_exception *rt6_ex;
1772	struct rt6_info *ret = NULL;
1773
1774#ifdef CONFIG_IPV6_SUBTREES
1775	/* fib6i_src.plen != 0 indicates f6i is in subtree
1776	 * and exception table is indexed by a hash of
1777	 * both fib6_dst and fib6_src.
1778	 * However, the src addr used to create the hash
1779	 * might not be exactly the passed in saddr which
1780	 * is a /128 addr from the flow.
1781	 * So we need to use f6i->fib6_src to redo lookup
1782	 * if the passed in saddr does not find anything.
1783	 * (See the logic in ip6_rt_cache_alloc() on how
1784	 * rt->rt6i_src is updated.)
1785	 */
1786	if (res->f6i->fib6_src.plen)
1787		src_key = saddr;
1788find_ex:
1789#endif
1790	bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
1791	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1792
1793	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1794		ret = rt6_ex->rt6i;
1795
1796#ifdef CONFIG_IPV6_SUBTREES
1797	/* Use fib6_src as src_key and redo lookup */
1798	if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
1799		src_key = &res->f6i->fib6_src.addr;
1800		goto find_ex;
1801	}
1802#endif
1803
1804	return ret;
1805}
1806
1807/* Remove the passed in cached rt from the hash table that contains it */
1808static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
1809				    const struct rt6_info *rt)
1810{
1811	const struct in6_addr *src_key = NULL;
1812	struct rt6_exception_bucket *bucket;
1813	struct rt6_exception *rt6_ex;
1814	int err;
1815
1816	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
1817		return -ENOENT;
1818
1819	spin_lock_bh(&rt6_exception_lock);
1820	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1821
1822#ifdef CONFIG_IPV6_SUBTREES
1823	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1824	 * and exception table is indexed by a hash of
1825	 * both rt6i_dst and rt6i_src.
1826	 * Otherwise, the exception table is indexed by
1827	 * a hash of only rt6i_dst.
1828	 */
1829	if (plen)
1830		src_key = &rt->rt6i_src.addr;
1831#endif
1832	rt6_ex = __rt6_find_exception_spinlock(&bucket,
1833					       &rt->rt6i_dst.addr,
1834					       src_key);
1835	if (rt6_ex) {
1836		rt6_remove_exception(bucket, rt6_ex);
1837		err = 0;
1838	} else {
1839		err = -ENOENT;
1840	}
1841
1842	spin_unlock_bh(&rt6_exception_lock);
1843	return err;
1844}
1845
1846struct fib6_nh_excptn_arg {
1847	struct rt6_info	*rt;
1848	int		plen;
1849};
1850
1851static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
1852{
1853	struct fib6_nh_excptn_arg *arg = _arg;
 
 
 
1854	int err;
 
1855
1856	err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
1857	if (err == 0)
1858		return 1;
1859
1860	return 0;
1861}
1862
1863static int rt6_remove_exception_rt(struct rt6_info *rt)
1864{
1865	struct fib6_info *from;
1866
1867	from = rcu_dereference(rt->from);
1868	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1869		return -EINVAL;
1870
1871	if (from->nh) {
1872		struct fib6_nh_excptn_arg arg = {
1873			.rt = rt,
1874			.plen = from->fib6_src.plen
1875		};
1876		int rc;
1877
1878		/* rc = 1 means an entry was found */
1879		rc = nexthop_for_each_fib6_nh(from->nh,
1880					      rt6_nh_remove_exception_rt,
1881					      &arg);
1882		return rc ? 0 : -ENOENT;
1883	}
1884
1885	return fib6_nh_remove_exception(from->fib6_nh,
1886					from->fib6_src.plen, rt);
1887}
1888
1889/* Find rt6_ex which contains the passed in rt cache and
1890 * refresh its stamp
1891 */
1892static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
1893				     const struct rt6_info *rt)
1894{
1895	const struct in6_addr *src_key = NULL;
1896	struct rt6_exception_bucket *bucket;
1897	struct rt6_exception *rt6_ex;
1898
1899	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
1900#ifdef CONFIG_IPV6_SUBTREES
1901	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1902	 * and exception table is indexed by a hash of
1903	 * both rt6i_dst and rt6i_src.
1904	 * Otherwise, the exception table is indexed by
1905	 * a hash of only rt6i_dst.
1906	 */
1907	if (plen)
1908		src_key = &rt->rt6i_src.addr;
1909#endif
1910	rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
1911	if (rt6_ex)
1912		rt6_ex->stamp = jiffies;
1913}
1914
1915struct fib6_nh_match_arg {
1916	const struct net_device *dev;
1917	const struct in6_addr	*gw;
1918	struct fib6_nh		*match;
1919};
1920
1921/* determine if fib6_nh has given device and gateway */
1922static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
1923{
1924	struct fib6_nh_match_arg *arg = _arg;
1925
1926	if (arg->dev != nh->fib_nh_dev ||
1927	    (arg->gw && !nh->fib_nh_gw_family) ||
1928	    (!arg->gw && nh->fib_nh_gw_family) ||
1929	    (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
1930		return 0;
1931
1932	arg->match = nh;
1933
1934	/* found a match, break the loop */
1935	return 1;
1936}
1937
1938static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1939{
1940	struct fib6_info *from;
1941	struct fib6_nh *fib6_nh;
1942
1943	rcu_read_lock();
1944
1945	from = rcu_dereference(rt->from);
1946	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1947		goto unlock;
1948
1949	if (from->nh) {
1950		struct fib6_nh_match_arg arg = {
1951			.dev = rt->dst.dev,
1952			.gw = &rt->rt6i_gateway,
1953		};
1954
1955		nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
1956
1957		if (!arg.match)
1958			goto unlock;
1959		fib6_nh = arg.match;
1960	} else {
1961		fib6_nh = from->fib6_nh;
1962	}
1963	fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
1964unlock:
1965	rcu_read_unlock();
1966}
1967
1968static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1969					 struct rt6_info *rt, int mtu)
1970{
1971	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1972	 * lowest MTU in the path: always allow updating the route PMTU to
1973	 * reflect PMTU decreases.
1974	 *
1975	 * If the new MTU is higher, and the route PMTU is equal to the local
1976	 * MTU, this means the old MTU is the lowest in the path, so allow
1977	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1978	 * handle this.
1979	 */
1980
1981	if (dst_mtu(&rt->dst) >= mtu)
1982		return true;
1983
1984	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1985		return true;
1986
1987	return false;
1988}
1989
1990static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1991				       const struct fib6_nh *nh, int mtu)
1992{
1993	struct rt6_exception_bucket *bucket;
1994	struct rt6_exception *rt6_ex;
1995	int i;
1996
1997	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1998	if (!bucket)
1999		return;
2000
2001	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2002		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
2003			struct rt6_info *entry = rt6_ex->rt6i;
2004
2005			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
2006			 * route), the metrics of its rt->from have already
2007			 * been updated.
2008			 */
2009			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
2010			    rt6_mtu_change_route_allowed(idev, entry, mtu))
2011				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
2012		}
2013		bucket++;
2014	}
2015}
2016
2017#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
 
2018
2019static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
2020					    const struct in6_addr *gateway)
2021{
2022	struct rt6_exception_bucket *bucket;
2023	struct rt6_exception *rt6_ex;
2024	struct hlist_node *tmp;
2025	int i;
2026
2027	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2028		return;
2029
2030	spin_lock_bh(&rt6_exception_lock);
2031	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2032	if (bucket) {
2033		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2034			hlist_for_each_entry_safe(rt6_ex, tmp,
2035						  &bucket->chain, hlist) {
2036				struct rt6_info *entry = rt6_ex->rt6i;
2037
2038				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
2039				    RTF_CACHE_GATEWAY &&
2040				    ipv6_addr_equal(gateway,
2041						    &entry->rt6i_gateway)) {
2042					rt6_remove_exception(bucket, rt6_ex);
2043				}
2044			}
2045			bucket++;
2046		}
2047	}
2048
2049	spin_unlock_bh(&rt6_exception_lock);
2050}
2051
2052static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
2053				      struct rt6_exception *rt6_ex,
2054				      struct fib6_gc_args *gc_args,
2055				      unsigned long now)
2056{
2057	struct rt6_info *rt = rt6_ex->rt6i;
2058
2059	/* we are pruning and obsoleting aged-out and non gateway exceptions
2060	 * even if others have still references to them, so that on next
2061	 * dst_check() such references can be dropped.
2062	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
2063	 * expired, independently from their aging, as per RFC 8201 section 4
2064	 */
2065	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
2066		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
2067			RT6_TRACE("aging clone %p\n", rt);
2068			rt6_remove_exception(bucket, rt6_ex);
2069			return;
2070		}
2071	} else if (time_after(jiffies, rt->dst.expires)) {
2072		RT6_TRACE("purging expired route %p\n", rt);
2073		rt6_remove_exception(bucket, rt6_ex);
2074		return;
2075	}
2076
2077	if (rt->rt6i_flags & RTF_GATEWAY) {
2078		struct neighbour *neigh;
2079		__u8 neigh_flags = 0;
2080
2081		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
2082		if (neigh)
2083			neigh_flags = neigh->flags;
2084
2085		if (!(neigh_flags & NTF_ROUTER)) {
2086			RT6_TRACE("purging route %p via non-router but gateway\n",
2087				  rt);
2088			rt6_remove_exception(bucket, rt6_ex);
2089			return;
2090		}
2091	}
2092
2093	gc_args->more++;
2094}
2095
2096static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
2097				   struct fib6_gc_args *gc_args,
2098				   unsigned long now)
2099{
2100	struct rt6_exception_bucket *bucket;
2101	struct rt6_exception *rt6_ex;
2102	struct hlist_node *tmp;
2103	int i;
2104
2105	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2106		return;
2107
2108	rcu_read_lock_bh();
2109	spin_lock(&rt6_exception_lock);
2110	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2111	if (bucket) {
2112		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2113			hlist_for_each_entry_safe(rt6_ex, tmp,
2114						  &bucket->chain, hlist) {
2115				rt6_age_examine_exception(bucket, rt6_ex,
2116							  gc_args, now);
2117			}
2118			bucket++;
2119		}
2120	}
2121	spin_unlock(&rt6_exception_lock);
2122	rcu_read_unlock_bh();
2123}
2124
2125struct fib6_nh_age_excptn_arg {
2126	struct fib6_gc_args	*gc_args;
2127	unsigned long		now;
2128};
2129
2130static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
2131{
2132	struct fib6_nh_age_excptn_arg *arg = _arg;
2133
2134	fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
2135	return 0;
2136}
2137
2138void rt6_age_exceptions(struct fib6_info *f6i,
2139			struct fib6_gc_args *gc_args,
2140			unsigned long now)
2141{
2142	if (f6i->nh) {
2143		struct fib6_nh_age_excptn_arg arg = {
2144			.gc_args = gc_args,
2145			.now = now
2146		};
2147
2148		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
2149					 &arg);
2150	} else {
2151		fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
2152	}
2153}
2154
2155/* must be called with rcu lock held */
2156int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
2157		      struct flowi6 *fl6, struct fib6_result *res, int strict)
2158{
2159	struct fib6_node *fn, *saved_fn;
2160
2161	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2162	saved_fn = fn;
2163
2164	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
2165		oif = 0;
2166
2167redo_rt6_select:
2168	rt6_select(net, fn, oif, res, strict);
2169	if (res->f6i == net->ipv6.fib6_null_entry) {
2170		fn = fib6_backtrack(fn, &fl6->saddr);
2171		if (fn)
2172			goto redo_rt6_select;
2173		else if (strict & RT6_LOOKUP_F_REACHABLE) {
2174			/* also consider unreachable route */
2175			strict &= ~RT6_LOOKUP_F_REACHABLE;
2176			fn = saved_fn;
2177			goto redo_rt6_select;
2178		}
2179	}
2180
2181	trace_fib6_table_lookup(net, res, table, fl6);
2182
2183	return 0;
2184}
2185
2186struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2187			       int oif, struct flowi6 *fl6,
2188			       const struct sk_buff *skb, int flags)
2189{
2190	struct fib6_result res = {};
2191	struct rt6_info *rt = NULL;
2192	int strict = 0;
2193
2194	WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
2195		     !rcu_read_lock_held());
2196
2197	strict |= flags & RT6_LOOKUP_F_IFACE;
2198	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
2199	if (net->ipv6.devconf_all->forwarding == 0)
2200		strict |= RT6_LOOKUP_F_REACHABLE;
2201
2202	rcu_read_lock();
2203
2204	fib6_table_lookup(net, table, oif, fl6, &res, strict);
2205	if (res.f6i == net->ipv6.fib6_null_entry)
2206		goto out;
2207
2208	fib6_select_path(net, &res, fl6, oif, false, skb, strict);
2209
2210	/*Search through exception table */
2211	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
2212	if (rt) {
2213		goto out;
2214	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
2215			    !res.nh->fib_nh_gw_family)) {
2216		/* Create a RTF_CACHE clone which will not be
2217		 * owned by the fib6 tree.  It is for the special case where
2218		 * the daddr in the skb during the neighbor look-up is different
2219		 * from the fl6->daddr used to look-up route here.
2220		 */
2221		rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
2222
2223		if (rt) {
2224			/* 1 refcnt is taken during ip6_rt_cache_alloc().
2225			 * As rt6_uncached_list_add() does not consume refcnt,
2226			 * this refcnt is always returned to the caller even
2227			 * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
2228			 */
2229			rt6_uncached_list_add(rt);
2230			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2231			rcu_read_unlock();
2232
2233			return rt;
2234		}
2235	} else {
2236		/* Get a percpu copy */
2237		local_bh_disable();
2238		rt = rt6_get_pcpu_route(&res);
2239
2240		if (!rt)
2241			rt = rt6_make_pcpu_route(net, &res);
2242
2243		local_bh_enable();
2244	}
2245out:
2246	if (!rt)
2247		rt = net->ipv6.ip6_null_entry;
2248	if (!(flags & RT6_LOOKUP_F_DST_NOREF))
2249		ip6_hold_safe(net, &rt);
2250	rcu_read_unlock();
 
 
 
 
2251
2252	return rt;
2253}
2254EXPORT_SYMBOL_GPL(ip6_pol_route);
2255
2256static struct rt6_info *ip6_pol_route_input(struct net *net,
2257					    struct fib6_table *table,
2258					    struct flowi6 *fl6,
2259					    const struct sk_buff *skb,
2260					    int flags)
2261{
2262	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
2263}
2264
2265struct dst_entry *ip6_route_input_lookup(struct net *net,
2266					 struct net_device *dev,
2267					 struct flowi6 *fl6,
2268					 const struct sk_buff *skb,
2269					 int flags)
2270{
2271	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
2272		flags |= RT6_LOOKUP_F_IFACE;
2273
2274	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
2275}
2276EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
2277
2278static void ip6_multipath_l3_keys(const struct sk_buff *skb,
2279				  struct flow_keys *keys,
2280				  struct flow_keys *flkeys)
2281{
2282	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
2283	const struct ipv6hdr *key_iph = outer_iph;
2284	struct flow_keys *_flkeys = flkeys;
2285	const struct ipv6hdr *inner_iph;
2286	const struct icmp6hdr *icmph;
2287	struct ipv6hdr _inner_iph;
2288	struct icmp6hdr _icmph;
2289
2290	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2291		goto out;
2292
2293	icmph = skb_header_pointer(skb, skb_transport_offset(skb),
2294				   sizeof(_icmph), &_icmph);
2295	if (!icmph)
2296		goto out;
2297
2298	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
2299	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
2300	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
2301	    icmph->icmp6_type != ICMPV6_PARAMPROB)
2302		goto out;
2303
2304	inner_iph = skb_header_pointer(skb,
2305				       skb_transport_offset(skb) + sizeof(*icmph),
2306				       sizeof(_inner_iph), &_inner_iph);
2307	if (!inner_iph)
2308		goto out;
2309
2310	key_iph = inner_iph;
2311	_flkeys = NULL;
2312out:
2313	if (_flkeys) {
2314		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2315		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2316		keys->tags.flow_label = _flkeys->tags.flow_label;
2317		keys->basic.ip_proto = _flkeys->basic.ip_proto;
2318	} else {
2319		keys->addrs.v6addrs.src = key_iph->saddr;
2320		keys->addrs.v6addrs.dst = key_iph->daddr;
2321		keys->tags.flow_label = ip6_flowlabel(key_iph);
2322		keys->basic.ip_proto = key_iph->nexthdr;
2323	}
2324}
2325
2326/* if skb is set it will be used and fl6 can be NULL */
2327u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2328		       const struct sk_buff *skb, struct flow_keys *flkeys)
2329{
2330	struct flow_keys hash_keys;
2331	u32 mhash;
2332
2333	switch (ip6_multipath_hash_policy(net)) {
2334	case 0:
2335		memset(&hash_keys, 0, sizeof(hash_keys));
2336		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2337		if (skb) {
2338			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2339		} else {
2340			hash_keys.addrs.v6addrs.src = fl6->saddr;
2341			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2342			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2343			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2344		}
2345		break;
2346	case 1:
2347		if (skb) {
2348			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2349			struct flow_keys keys;
2350
2351			/* short-circuit if we already have L4 hash present */
2352			if (skb->l4_hash)
2353				return skb_get_hash_raw(skb) >> 1;
2354
2355			memset(&hash_keys, 0, sizeof(hash_keys));
2356
2357                        if (!flkeys) {
2358				skb_flow_dissect_flow_keys(skb, &keys, flag);
2359				flkeys = &keys;
2360			}
2361			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2362			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2363			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2364			hash_keys.ports.src = flkeys->ports.src;
2365			hash_keys.ports.dst = flkeys->ports.dst;
2366			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2367		} else {
2368			memset(&hash_keys, 0, sizeof(hash_keys));
2369			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2370			hash_keys.addrs.v6addrs.src = fl6->saddr;
2371			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2372			hash_keys.ports.src = fl6->fl6_sport;
2373			hash_keys.ports.dst = fl6->fl6_dport;
2374			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2375		}
2376		break;
2377	case 2:
2378		memset(&hash_keys, 0, sizeof(hash_keys));
2379		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2380		if (skb) {
2381			struct flow_keys keys;
2382
2383			if (!flkeys) {
2384				skb_flow_dissect_flow_keys(skb, &keys, 0);
2385				flkeys = &keys;
2386			}
2387
2388			/* Inner can be v4 or v6 */
2389			if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2390				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2391				hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
2392				hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
2393			} else if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2394				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2395				hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2396				hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2397				hash_keys.tags.flow_label = flkeys->tags.flow_label;
2398				hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2399			} else {
2400				/* Same as case 0 */
2401				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2402				ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2403			}
2404		} else {
2405			/* Same as case 0 */
2406			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2407			hash_keys.addrs.v6addrs.src = fl6->saddr;
2408			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2409			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2410			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2411		}
2412		break;
2413	}
2414	mhash = flow_hash_from_keys(&hash_keys);
2415
2416	return mhash >> 1;
2417}
2418
2419/* Called with rcu held */
2420void ip6_route_input(struct sk_buff *skb)
2421{
2422	const struct ipv6hdr *iph = ipv6_hdr(skb);
2423	struct net *net = dev_net(skb->dev);
2424	int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
2425	struct ip_tunnel_info *tun_info;
2426	struct flowi6 fl6 = {
2427		.flowi6_iif = skb->dev->ifindex,
2428		.daddr = iph->daddr,
2429		.saddr = iph->saddr,
2430		.flowlabel = ip6_flowinfo(iph),
2431		.flowi6_mark = skb->mark,
2432		.flowi6_proto = iph->nexthdr,
2433	};
2434	struct flow_keys *flkeys = NULL, _flkeys;
2435
2436	tun_info = skb_tunnel_info(skb);
2437	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2438		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2439
2440	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2441		flkeys = &_flkeys;
2442
2443	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2444		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2445	skb_dst_drop(skb);
2446	skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
2447						      &fl6, skb, flags));
2448}
2449
2450static struct rt6_info *ip6_pol_route_output(struct net *net,
2451					     struct fib6_table *table,
2452					     struct flowi6 *fl6,
2453					     const struct sk_buff *skb,
2454					     int flags)
2455{
2456	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2457}
2458
2459struct dst_entry *ip6_route_output_flags_noref(struct net *net,
2460					       const struct sock *sk,
2461					       struct flowi6 *fl6, int flags)
2462{
2463	bool any_src;
2464
2465	if (ipv6_addr_type(&fl6->daddr) &
2466	    (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2467		struct dst_entry *dst;
2468
2469		/* This function does not take refcnt on the dst */
2470		dst = l3mdev_link_scope_lookup(net, fl6);
2471		if (dst)
2472			return dst;
2473	}
2474
2475	fl6->flowi6_iif = LOOPBACK_IFINDEX;
2476
2477	flags |= RT6_LOOKUP_F_DST_NOREF;
2478	any_src = ipv6_addr_any(&fl6->saddr);
2479	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2480	    (fl6->flowi6_oif && any_src))
2481		flags |= RT6_LOOKUP_F_IFACE;
2482
2483	if (!any_src)
2484		flags |= RT6_LOOKUP_F_HAS_SADDR;
2485	else if (sk)
2486		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2487
2488	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2489}
2490EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
2491
2492struct dst_entry *ip6_route_output_flags(struct net *net,
2493					 const struct sock *sk,
2494					 struct flowi6 *fl6,
2495					 int flags)
2496{
2497        struct dst_entry *dst;
2498        struct rt6_info *rt6;
2499
2500        rcu_read_lock();
2501        dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
2502        rt6 = (struct rt6_info *)dst;
2503        /* For dst cached in uncached_list, refcnt is already taken. */
2504        if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
2505                dst = &net->ipv6.ip6_null_entry->dst;
2506                dst_hold(dst);
2507        }
2508        rcu_read_unlock();
2509
2510        return dst;
2511}
2512EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2513
2514struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2515{
2516	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2517	struct net_device *loopback_dev = net->loopback_dev;
2518	struct dst_entry *new = NULL;
2519
2520	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2521		       DST_OBSOLETE_DEAD, 0);
2522	if (rt) {
2523		rt6_info_init(rt);
2524		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2525
2526		new = &rt->dst;
 
2527		new->__use = 1;
2528		new->input = dst_discard;
2529		new->output = dst_discard_out;
2530
2531		dst_copy_metrics(new, &ort->dst);
2532
2533		rt->rt6i_idev = in6_dev_get(loopback_dev);
2534		rt->rt6i_gateway = ort->rt6i_gateway;
2535		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
 
 
 
 
 
 
 
 
 
2536
2537		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2538#ifdef CONFIG_IPV6_SUBTREES
2539		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2540#endif
 
 
2541	}
2542
2543	dst_release(dst_orig);
2544	return new ? new : ERR_PTR(-ENOMEM);
2545}
2546
2547/*
2548 *	Destination cache support functions
2549 */
2550
2551static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2552{
2553	u32 rt_cookie = 0;
2554
2555	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2556		return false;
2557
2558	if (fib6_check_expired(f6i))
2559		return false;
2560
2561	return true;
2562}
2563
2564static struct dst_entry *rt6_check(struct rt6_info *rt,
2565				   struct fib6_info *from,
2566				   u32 cookie)
2567{
2568	u32 rt_cookie = 0;
2569
2570	if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
2571	    rt_cookie != cookie)
2572		return NULL;
2573
2574	if (rt6_check_expired(rt))
2575		return NULL;
2576
2577	return &rt->dst;
2578}
2579
2580static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2581					    struct fib6_info *from,
2582					    u32 cookie)
2583{
2584	if (!__rt6_check_expired(rt) &&
2585	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2586	    fib6_check(from, cookie))
2587		return &rt->dst;
2588	else
2589		return NULL;
2590}
2591
2592static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2593{
2594	struct dst_entry *dst_ret;
2595	struct fib6_info *from;
2596	struct rt6_info *rt;
2597
2598	rt = container_of(dst, struct rt6_info, dst);
2599
2600	rcu_read_lock();
2601
2602	/* All IPV6 dsts are created with ->obsolete set to the value
2603	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2604	 * into this function always.
2605	 */
2606
2607	from = rcu_dereference(rt->from);
2608
2609	if (from && (rt->rt6i_flags & RTF_PCPU ||
2610	    unlikely(!list_empty(&rt->rt6i_uncached))))
2611		dst_ret = rt6_dst_from_check(rt, from, cookie);
2612	else
2613		dst_ret = rt6_check(rt, from, cookie);
2614
2615	rcu_read_unlock();
2616
2617	return dst_ret;
 
 
 
 
 
 
 
 
2618}
2619
2620static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2621{
2622	struct rt6_info *rt = (struct rt6_info *) dst;
2623
2624	if (rt) {
2625		if (rt->rt6i_flags & RTF_CACHE) {
2626			rcu_read_lock();
2627			if (rt6_check_expired(rt)) {
2628				rt6_remove_exception_rt(rt);
2629				dst = NULL;
2630			}
2631			rcu_read_unlock();
2632		} else {
2633			dst_release(dst);
2634			dst = NULL;
2635		}
2636	}
2637	return dst;
2638}
2639
2640static void ip6_link_failure(struct sk_buff *skb)
2641{
2642	struct rt6_info *rt;
2643
2644	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2645
2646	rt = (struct rt6_info *) skb_dst(skb);
2647	if (rt) {
2648		rcu_read_lock();
2649		if (rt->rt6i_flags & RTF_CACHE) {
2650			rt6_remove_exception_rt(rt);
2651		} else {
2652			struct fib6_info *from;
2653			struct fib6_node *fn;
2654
2655			from = rcu_dereference(rt->from);
2656			if (from) {
2657				fn = rcu_dereference(from->fib6_node);
2658				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2659					fn->fn_sernum = -1;
2660			}
2661		}
2662		rcu_read_unlock();
2663	}
2664}
2665
2666static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2667{
2668	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2669		struct fib6_info *from;
2670
2671		rcu_read_lock();
2672		from = rcu_dereference(rt0->from);
2673		if (from)
2674			rt0->dst.expires = from->expires;
2675		rcu_read_unlock();
2676	}
2677
2678	dst_set_expires(&rt0->dst, timeout);
2679	rt0->rt6i_flags |= RTF_EXPIRES;
2680}
2681
2682static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2683{
2684	struct net *net = dev_net(rt->dst.dev);
2685
2686	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2687	rt->rt6i_flags |= RTF_MODIFIED;
2688	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2689}
2690
2691static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2692{
2693	return !(rt->rt6i_flags & RTF_CACHE) &&
2694		(rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
2695}
2696
2697static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2698				 const struct ipv6hdr *iph, u32 mtu)
2699{
2700	const struct in6_addr *daddr, *saddr;
2701	struct rt6_info *rt6 = (struct rt6_info *)dst;
2702
2703	if (dst_metric_locked(dst, RTAX_MTU))
2704		return;
2705
2706	if (iph) {
2707		daddr = &iph->daddr;
2708		saddr = &iph->saddr;
2709	} else if (sk) {
2710		daddr = &sk->sk_v6_daddr;
2711		saddr = &inet6_sk(sk)->saddr;
2712	} else {
2713		daddr = NULL;
2714		saddr = NULL;
2715	}
2716	dst_confirm_neigh(dst, daddr);
2717	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2718	if (mtu >= dst_mtu(dst))
2719		return;
2720
2721	if (!rt6_cache_allowed_for_pmtu(rt6)) {
2722		rt6_do_update_pmtu(rt6, mtu);
2723		/* update rt6_ex->stamp for cache */
2724		if (rt6->rt6i_flags & RTF_CACHE)
2725			rt6_update_exception_stamp_rt(rt6);
2726	} else if (daddr) {
2727		struct fib6_result res = {};
2728		struct rt6_info *nrt6;
2729
2730		rcu_read_lock();
2731		res.f6i = rcu_dereference(rt6->from);
2732		if (!res.f6i)
2733			goto out_unlock;
2734
2735		res.fib6_flags = res.f6i->fib6_flags;
2736		res.fib6_type = res.f6i->fib6_type;
2737
2738		if (res.f6i->nh) {
2739			struct fib6_nh_match_arg arg = {
2740				.dev = dst->dev,
2741				.gw = &rt6->rt6i_gateway,
2742			};
2743
2744			nexthop_for_each_fib6_nh(res.f6i->nh,
2745						 fib6_nh_find_match, &arg);
2746
2747			/* fib6_info uses a nexthop that does not have fib6_nh
2748			 * using the dst->dev + gw. Should be impossible.
2749			 */
2750			if (!arg.match)
2751				goto out_unlock;
2752
2753			res.nh = arg.match;
2754		} else {
2755			res.nh = res.f6i->fib6_nh;
2756		}
2757
2758		nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
2759		if (nrt6) {
2760			rt6_do_update_pmtu(nrt6, mtu);
2761			if (rt6_insert_exception(nrt6, &res))
2762				dst_release_immediate(&nrt6->dst);
2763		}
2764out_unlock:
2765		rcu_read_unlock();
2766	}
2767}
2768
2769static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2770			       struct sk_buff *skb, u32 mtu)
2771{
2772	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2773}
2774
2775void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2776		     int oif, u32 mark, kuid_t uid)
2777{
2778	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2779	struct dst_entry *dst;
2780	struct flowi6 fl6 = {
2781		.flowi6_oif = oif,
2782		.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2783		.daddr = iph->daddr,
2784		.saddr = iph->saddr,
2785		.flowlabel = ip6_flowinfo(iph),
2786		.flowi6_uid = uid,
2787	};
2788
2789	dst = ip6_route_output(net, NULL, &fl6);
2790	if (!dst->error)
2791		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2792	dst_release(dst);
2793}
2794EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2795
2796void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2797{
2798	int oif = sk->sk_bound_dev_if;
2799	struct dst_entry *dst;
2800
2801	if (!oif && skb->dev)
2802		oif = l3mdev_master_ifindex(skb->dev);
2803
2804	ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
2805
2806	dst = __sk_dst_get(sk);
2807	if (!dst || !dst->obsolete ||
2808	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2809		return;
2810
2811	bh_lock_sock(sk);
2812	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2813		ip6_datagram_dst_update(sk, false);
2814	bh_unlock_sock(sk);
2815}
2816EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2817
2818void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2819			   const struct flowi6 *fl6)
2820{
2821#ifdef CONFIG_IPV6_SUBTREES
2822	struct ipv6_pinfo *np = inet6_sk(sk);
2823#endif
2824
2825	ip6_dst_store(sk, dst,
2826		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2827		      &sk->sk_v6_daddr : NULL,
2828#ifdef CONFIG_IPV6_SUBTREES
2829		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2830		      &np->saddr :
2831#endif
2832		      NULL);
2833}
2834
2835static bool ip6_redirect_nh_match(const struct fib6_result *res,
2836				  struct flowi6 *fl6,
2837				  const struct in6_addr *gw,
2838				  struct rt6_info **ret)
2839{
2840	const struct fib6_nh *nh = res->nh;
2841
2842	if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
2843	    fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
2844		return false;
2845
2846	/* rt_cache's gateway might be different from its 'parent'
2847	 * in the case of an ip redirect.
2848	 * So we keep searching in the exception table if the gateway
2849	 * is different.
2850	 */
2851	if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
2852		struct rt6_info *rt_cache;
2853
2854		rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
2855		if (rt_cache &&
2856		    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
2857			*ret = rt_cache;
2858			return true;
2859		}
2860		return false;
2861	}
2862	return true;
2863}
2864
2865struct fib6_nh_rd_arg {
2866	struct fib6_result	*res;
2867	struct flowi6		*fl6;
2868	const struct in6_addr	*gw;
2869	struct rt6_info		**ret;
2870};
2871
2872static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
2873{
2874	struct fib6_nh_rd_arg *arg = _arg;
2875
2876	arg->res->nh = nh;
2877	return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
2878}
2879
2880/* Handle redirects */
2881struct ip6rd_flowi {
2882	struct flowi6 fl6;
2883	struct in6_addr gateway;
2884};
2885
2886static struct rt6_info *__ip6_route_redirect(struct net *net,
2887					     struct fib6_table *table,
2888					     struct flowi6 *fl6,
2889					     const struct sk_buff *skb,
2890					     int flags)
2891{
2892	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2893	struct rt6_info *ret = NULL;
2894	struct fib6_result res = {};
2895	struct fib6_nh_rd_arg arg = {
2896		.res = &res,
2897		.fl6 = fl6,
2898		.gw  = &rdfl->gateway,
2899		.ret = &ret
2900	};
2901	struct fib6_info *rt;
2902	struct fib6_node *fn;
2903
2904	/* l3mdev_update_flow overrides oif if the device is enslaved; in
2905	 * this case we must match on the real ingress device, so reset it
2906	 */
2907	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
2908		fl6->flowi6_oif = skb->dev->ifindex;
2909
2910	/* Get the "current" route for this destination and
2911	 * check if the redirect has come from appropriate router.
2912	 *
2913	 * RFC 4861 specifies that redirects should only be
2914	 * accepted if they come from the nexthop to the target.
2915	 * Due to the way the routes are chosen, this notion
2916	 * is a bit fuzzy and one might need to check all possible
2917	 * routes.
2918	 */
2919
2920	rcu_read_lock();
2921	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2922restart:
2923	for_each_fib6_node_rt_rcu(fn) {
2924		res.f6i = rt;
2925		if (fib6_check_expired(rt))
2926			continue;
2927		if (rt->fib6_flags & RTF_REJECT)
2928			break;
2929		if (unlikely(rt->nh)) {
2930			if (nexthop_is_blackhole(rt->nh))
2931				continue;
2932			/* on match, res->nh is filled in and potentially ret */
2933			if (nexthop_for_each_fib6_nh(rt->nh,
2934						     fib6_nh_redirect_match,
2935						     &arg))
2936				goto out;
2937		} else {
2938			res.nh = rt->fib6_nh;
2939			if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
2940						  &ret))
2941				goto out;
2942		}
2943	}
2944
2945	if (!rt)
2946		rt = net->ipv6.fib6_null_entry;
2947	else if (rt->fib6_flags & RTF_REJECT) {
2948		ret = net->ipv6.ip6_null_entry;
2949		goto out;
2950	}
2951
2952	if (rt == net->ipv6.fib6_null_entry) {
2953		fn = fib6_backtrack(fn, &fl6->saddr);
2954		if (fn)
2955			goto restart;
2956	}
2957
2958	res.f6i = rt;
2959	res.nh = rt->fib6_nh;
2960out:
2961	if (ret) {
2962		ip6_hold_safe(net, &ret);
2963	} else {
2964		res.fib6_flags = res.f6i->fib6_flags;
2965		res.fib6_type = res.f6i->fib6_type;
2966		ret = ip6_create_rt_rcu(&res);
2967	}
2968
2969	rcu_read_unlock();
2970
2971	trace_fib6_table_lookup(net, &res, table, fl6);
2972	return ret;
2973};
2974
2975static struct dst_entry *ip6_route_redirect(struct net *net,
2976					    const struct flowi6 *fl6,
2977					    const struct sk_buff *skb,
2978					    const struct in6_addr *gateway)
2979{
2980	int flags = RT6_LOOKUP_F_HAS_SADDR;
2981	struct ip6rd_flowi rdfl;
2982
2983	rdfl.fl6 = *fl6;
2984	rdfl.gateway = *gateway;
2985
2986	return fib6_rule_lookup(net, &rdfl.fl6, skb,
2987				flags, __ip6_route_redirect);
2988}
2989
2990void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2991		  kuid_t uid)
2992{
2993	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2994	struct dst_entry *dst;
2995	struct flowi6 fl6 = {
2996		.flowi6_iif = LOOPBACK_IFINDEX,
2997		.flowi6_oif = oif,
2998		.flowi6_mark = mark,
2999		.daddr = iph->daddr,
3000		.saddr = iph->saddr,
3001		.flowlabel = ip6_flowinfo(iph),
3002		.flowi6_uid = uid,
3003	};
3004
3005	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
3006	rt6_do_redirect(dst, NULL, skb);
3007	dst_release(dst);
3008}
3009EXPORT_SYMBOL_GPL(ip6_redirect);
3010
3011void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
3012{
3013	const struct ipv6hdr *iph = ipv6_hdr(skb);
3014	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
3015	struct dst_entry *dst;
3016	struct flowi6 fl6 = {
3017		.flowi6_iif = LOOPBACK_IFINDEX,
3018		.flowi6_oif = oif,
3019		.daddr = msg->dest,
3020		.saddr = iph->daddr,
3021		.flowi6_uid = sock_net_uid(net, NULL),
3022	};
3023
3024	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
3025	rt6_do_redirect(dst, NULL, skb);
3026	dst_release(dst);
3027}
3028
3029void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
3030{
3031	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
3032		     sk->sk_uid);
3033}
3034EXPORT_SYMBOL_GPL(ip6_sk_redirect);
3035
3036static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3037{
3038	struct net_device *dev = dst->dev;
3039	unsigned int mtu = dst_mtu(dst);
3040	struct net *net = dev_net(dev);
3041
3042	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
3043
3044	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
3045		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
3046
3047	/*
3048	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
3049	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
3050	 * IPV6_MAXPLEN is also valid and means: "any MSS,
3051	 * rely only on pmtu discovery"
3052	 */
3053	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
3054		mtu = IPV6_MAXPLEN;
3055	return mtu;
3056}
3057
3058static unsigned int ip6_mtu(const struct dst_entry *dst)
3059{
 
3060	struct inet6_dev *idev;
3061	unsigned int mtu;
3062
3063	mtu = dst_metric_raw(dst, RTAX_MTU);
3064	if (mtu)
3065		goto out;
3066
3067	mtu = IPV6_MIN_MTU;
3068
3069	rcu_read_lock();
3070	idev = __in6_dev_get(dst->dev);
3071	if (idev)
3072		mtu = idev->cnf.mtu6;
3073	rcu_read_unlock();
3074
3075out:
3076	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3077
3078	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
3079}
3080
3081/* MTU selection:
3082 * 1. mtu on route is locked - use it
3083 * 2. mtu from nexthop exception
3084 * 3. mtu from egress device
3085 *
3086 * based on ip6_dst_mtu_forward and exception logic of
3087 * rt6_find_cached_rt; called with rcu_read_lock
3088 */
3089u32 ip6_mtu_from_fib6(const struct fib6_result *res,
3090		      const struct in6_addr *daddr,
3091		      const struct in6_addr *saddr)
3092{
3093	const struct fib6_nh *nh = res->nh;
3094	struct fib6_info *f6i = res->f6i;
3095	struct inet6_dev *idev;
3096	struct rt6_info *rt;
3097	u32 mtu = 0;
3098
3099	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
3100		mtu = f6i->fib6_pmtu;
3101		if (mtu)
3102			goto out;
3103	}
3104
3105	rt = rt6_find_cached_rt(res, daddr, saddr);
3106	if (unlikely(rt)) {
3107		mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
3108	} else {
3109		struct net_device *dev = nh->fib_nh_dev;
3110
3111		mtu = IPV6_MIN_MTU;
3112		idev = __in6_dev_get(dev);
3113		if (idev && idev->cnf.mtu6 > mtu)
3114			mtu = idev->cnf.mtu6;
3115	}
3116
3117	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3118out:
3119	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
3120}
3121
3122struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
3123				  struct flowi6 *fl6)
 
3124{
3125	struct dst_entry *dst;
3126	struct rt6_info *rt;
3127	struct inet6_dev *idev = in6_dev_get(dev);
3128	struct net *net = dev_net(dev);
3129
3130	if (unlikely(!idev))
3131		return ERR_PTR(-ENODEV);
3132
3133	rt = ip6_dst_alloc(net, dev, 0);
3134	if (unlikely(!rt)) {
3135		in6_dev_put(idev);
3136		dst = ERR_PTR(-ENOMEM);
3137		goto out;
3138	}
3139
 
 
 
 
 
 
 
 
3140	rt->dst.flags |= DST_HOST;
3141	rt->dst.input = ip6_input;
3142	rt->dst.output  = ip6_output;
3143	rt->rt6i_gateway  = fl6->daddr;
3144	rt->rt6i_dst.addr = fl6->daddr;
 
 
 
3145	rt->rt6i_dst.plen = 128;
3146	rt->rt6i_idev     = idev;
3147	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
3148
3149	/* Add this dst into uncached_list so that rt6_disable_ip() can
3150	 * do proper release of the net_device
3151	 */
3152	rt6_uncached_list_add(rt);
3153	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
3154
3155	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
3156
3157out:
3158	return dst;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3159}
3160
3161static int ip6_dst_gc(struct dst_ops *ops)
3162{
 
3163	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
3164	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
3165	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
3166	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
3167	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
3168	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
3169	int entries;
3170
3171	entries = dst_entries_get_fast(ops);
3172	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
3173	    entries <= rt_max_size)
3174		goto out;
3175
3176	net->ipv6.ip6_rt_gc_expire++;
3177	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
 
3178	entries = dst_entries_get_slow(ops);
3179	if (entries < ops->gc_thresh)
3180		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
3181out:
3182	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
3183	return entries > rt_max_size;
3184}
3185
3186static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
3187			       const struct in6_addr *gw_addr, u32 tbid,
3188			       int flags, struct fib6_result *res)
3189{
3190	struct flowi6 fl6 = {
3191		.flowi6_oif = cfg->fc_ifindex,
3192		.daddr = *gw_addr,
3193		.saddr = cfg->fc_prefsrc,
3194	};
3195	struct fib6_table *table;
3196	int err;
3197
3198	table = fib6_get_table(net, tbid);
3199	if (!table)
3200		return -EINVAL;
3201
3202	if (!ipv6_addr_any(&cfg->fc_prefsrc))
3203		flags |= RT6_LOOKUP_F_HAS_SADDR;
3204
3205	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
3206
3207	err = fib6_table_lookup(net, table, cfg->fc_ifindex, &fl6, res, flags);
3208	if (!err && res->f6i != net->ipv6.fib6_null_entry)
3209		fib6_select_path(net, res, &fl6, cfg->fc_ifindex,
3210				 cfg->fc_ifindex != 0, NULL, flags);
3211
3212	return err;
3213}
3214
3215static int ip6_route_check_nh_onlink(struct net *net,
3216				     struct fib6_config *cfg,
3217				     const struct net_device *dev,
3218				     struct netlink_ext_ack *extack)
3219{
3220	u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
3221	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3222	struct fib6_result res = {};
3223	int err;
3224
3225	err = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0, &res);
3226	if (!err && !(res.fib6_flags & RTF_REJECT) &&
3227	    /* ignore match if it is the default route */
3228	    !ipv6_addr_any(&res.f6i->fib6_dst.addr) &&
3229	    (res.fib6_type != RTN_UNICAST || dev != res.nh->fib_nh_dev)) {
3230		NL_SET_ERR_MSG(extack,
3231			       "Nexthop has invalid gateway or device mismatch");
3232		err = -EINVAL;
3233	}
3234
3235	return err;
3236}
3237
3238static int ip6_route_check_nh(struct net *net,
3239			      struct fib6_config *cfg,
3240			      struct net_device **_dev,
3241			      struct inet6_dev **idev)
3242{
3243	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3244	struct net_device *dev = _dev ? *_dev : NULL;
3245	int flags = RT6_LOOKUP_F_IFACE;
3246	struct fib6_result res = {};
3247	int err = -EHOSTUNREACH;
3248
3249	if (cfg->fc_table) {
3250		err = ip6_nh_lookup_table(net, cfg, gw_addr,
3251					  cfg->fc_table, flags, &res);
3252		/* gw_addr can not require a gateway or resolve to a reject
3253		 * route. If a device is given, it must match the result.
3254		 */
3255		if (err || res.fib6_flags & RTF_REJECT ||
3256		    res.nh->fib_nh_gw_family ||
3257		    (dev && dev != res.nh->fib_nh_dev))
3258			err = -EHOSTUNREACH;
3259	}
3260
3261	if (err < 0) {
3262		struct flowi6 fl6 = {
3263			.flowi6_oif = cfg->fc_ifindex,
3264			.daddr = *gw_addr,
3265		};
3266
3267		err = fib6_lookup(net, cfg->fc_ifindex, &fl6, &res, flags);
3268		if (err || res.fib6_flags & RTF_REJECT ||
3269		    res.nh->fib_nh_gw_family)
3270			err = -EHOSTUNREACH;
3271
3272		if (err)
3273			return err;
3274
3275		fib6_select_path(net, &res, &fl6, cfg->fc_ifindex,
3276				 cfg->fc_ifindex != 0, NULL, flags);
3277	}
3278
3279	err = 0;
3280	if (dev) {
3281		if (dev != res.nh->fib_nh_dev)
3282			err = -EHOSTUNREACH;
3283	} else {
3284		*_dev = dev = res.nh->fib_nh_dev;
3285		dev_hold(dev);
3286		*idev = in6_dev_get(dev);
3287	}
3288
3289	return err;
3290}
3291
3292static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
3293			   struct net_device **_dev, struct inet6_dev **idev,
3294			   struct netlink_ext_ack *extack)
3295{
3296	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3297	int gwa_type = ipv6_addr_type(gw_addr);
3298	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
3299	const struct net_device *dev = *_dev;
3300	bool need_addr_check = !dev;
3301	int err = -EINVAL;
3302
3303	/* if gw_addr is local we will fail to detect this in case
3304	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
3305	 * will return already-added prefix route via interface that
3306	 * prefix route was assigned to, which might be non-loopback.
3307	 */
3308	if (dev &&
3309	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3310		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3311		goto out;
3312	}
3313
3314	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
3315		/* IPv6 strictly inhibits using not link-local
3316		 * addresses as nexthop address.
3317		 * Otherwise, router will not able to send redirects.
3318		 * It is very good, but in some (rare!) circumstances
3319		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
3320		 * some exceptions. --ANK
3321		 * We allow IPv4-mapped nexthops to support RFC4798-type
3322		 * addressing
3323		 */
3324		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
3325			NL_SET_ERR_MSG(extack, "Invalid gateway address");
3326			goto out;
3327		}
3328
3329		rcu_read_lock();
3330
3331		if (cfg->fc_flags & RTNH_F_ONLINK)
3332			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
3333		else
3334			err = ip6_route_check_nh(net, cfg, _dev, idev);
3335
3336		rcu_read_unlock();
3337
3338		if (err)
3339			goto out;
3340	}
3341
3342	/* reload in case device was changed */
3343	dev = *_dev;
3344
3345	err = -EINVAL;
3346	if (!dev) {
3347		NL_SET_ERR_MSG(extack, "Egress device not specified");
3348		goto out;
3349	} else if (dev->flags & IFF_LOOPBACK) {
3350		NL_SET_ERR_MSG(extack,
3351			       "Egress device can not be loopback device for this route");
3352		goto out;
3353	}
3354
3355	/* if we did not check gw_addr above, do so now that the
3356	 * egress device has been resolved.
3357	 */
3358	if (need_addr_check &&
3359	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3360		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3361		goto out;
3362	}
3363
3364	err = 0;
3365out:
3366	return err;
3367}
 
3368
3369static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
3370{
3371	if ((flags & RTF_REJECT) ||
3372	    (dev && (dev->flags & IFF_LOOPBACK) &&
3373	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
3374	     !(flags & RTF_LOCAL)))
3375		return true;
3376
3377	return false;
3378}
3379
3380int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
3381		 struct fib6_config *cfg, gfp_t gfp_flags,
3382		 struct netlink_ext_ack *extack)
3383{
 
 
 
3384	struct net_device *dev = NULL;
3385	struct inet6_dev *idev = NULL;
 
3386	int addr_type;
3387	int err;
3388
3389	fib6_nh->fib_nh_family = AF_INET6;
3390#ifdef CONFIG_IPV6_ROUTER_PREF
3391	fib6_nh->last_probe = jiffies;
 
 
3392#endif
3393
3394	err = -ENODEV;
3395	if (cfg->fc_ifindex) {
 
3396		dev = dev_get_by_index(net, cfg->fc_ifindex);
3397		if (!dev)
3398			goto out;
3399		idev = in6_dev_get(dev);
3400		if (!idev)
3401			goto out;
3402	}
3403
3404	if (cfg->fc_flags & RTNH_F_ONLINK) {
3405		if (!dev) {
3406			NL_SET_ERR_MSG(extack,
3407				       "Nexthop device required for onlink");
3408			goto out;
3409		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3410
3411		if (!(dev->flags & IFF_UP)) {
3412			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3413			err = -ENETDOWN;
 
 
 
 
 
 
 
 
3414			goto out;
3415		}
3416
3417		fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
3418	}
 
 
 
 
3419
3420	fib6_nh->fib_nh_weight = 1;
3421
3422	/* We cannot add true routes via loopback here,
3423	 * they would result in kernel looping; promote them to reject routes
3424	 */
3425	addr_type = ipv6_addr_type(&cfg->fc_dst);
3426	if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
 
3427		/* hold loopback dev/idev if we haven't done so. */
3428		if (dev != net->loopback_dev) {
3429			if (dev) {
3430				dev_put(dev);
3431				in6_dev_put(idev);
3432			}
3433			dev = net->loopback_dev;
3434			dev_hold(dev);
3435			idev = in6_dev_get(dev);
3436			if (!idev) {
3437				err = -ENODEV;
3438				goto out;
3439			}
3440		}
3441		goto pcpu_alloc;
 
 
 
 
3442	}
3443
3444	if (cfg->fc_flags & RTF_GATEWAY) {
3445		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3446		if (err)
3447			goto out;
3448
3449		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3450		fib6_nh->fib_nh_gw_family = AF_INET6;
3451	}
3452
3453	err = -ENODEV;
3454	if (!dev)
3455		goto out;
3456
3457	if (idev->cnf.disable_ipv6) {
3458		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3459		err = -EACCES;
3460		goto out;
3461	}
3462
3463	if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3464		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3465		err = -ENETDOWN;
3466		goto out;
3467	}
3468
3469	if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3470	    !netif_carrier_ok(dev))
3471		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
3472
3473	err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
3474				 cfg->fc_encap_type, cfg, gfp_flags, extack);
3475	if (err)
3476		goto out;
3477
3478pcpu_alloc:
3479	fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
3480	if (!fib6_nh->rt6i_pcpu) {
3481		err = -ENOMEM;
3482		goto out;
3483	}
3484
3485	fib6_nh->fib_nh_dev = dev;
3486	fib6_nh->fib_nh_oif = dev->ifindex;
3487	err = 0;
3488out:
3489	if (idev)
3490		in6_dev_put(idev);
3491
3492	if (err) {
3493		lwtstate_put(fib6_nh->fib_nh_lws);
3494		fib6_nh->fib_nh_lws = NULL;
3495		if (dev)
3496			dev_put(dev);
3497	}
3498
3499	return err;
3500}
3501
3502void fib6_nh_release(struct fib6_nh *fib6_nh)
3503{
3504	struct rt6_exception_bucket *bucket;
3505
3506	rcu_read_lock();
3507
3508	fib6_nh_flush_exceptions(fib6_nh, NULL);
3509	bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
3510	if (bucket) {
3511		rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
3512		kfree(bucket);
3513	}
3514
3515	rcu_read_unlock();
3516
3517	if (fib6_nh->rt6i_pcpu) {
3518		int cpu;
3519
3520		for_each_possible_cpu(cpu) {
3521			struct rt6_info **ppcpu_rt;
3522			struct rt6_info *pcpu_rt;
3523
3524			ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
3525			pcpu_rt = *ppcpu_rt;
3526			if (pcpu_rt) {
3527				dst_dev_put(&pcpu_rt->dst);
3528				dst_release(&pcpu_rt->dst);
3529				*ppcpu_rt = NULL;
3530			}
3531		}
3532
3533		free_percpu(fib6_nh->rt6i_pcpu);
3534	}
3535
3536	fib_nh_common_release(&fib6_nh->nh_common);
3537}
3538
3539static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3540					      gfp_t gfp_flags,
3541					      struct netlink_ext_ack *extack)
3542{
3543	struct net *net = cfg->fc_nlinfo.nl_net;
3544	struct fib6_info *rt = NULL;
3545	struct nexthop *nh = NULL;
3546	struct fib6_table *table;
3547	struct fib6_nh *fib6_nh;
3548	int err = -EINVAL;
3549	int addr_type;
3550
3551	/* RTF_PCPU is an internal flag; can not be set by userspace */
3552	if (cfg->fc_flags & RTF_PCPU) {
3553		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3554		goto out;
3555	}
3556
3557	/* RTF_CACHE is an internal flag; can not be set by userspace */
3558	if (cfg->fc_flags & RTF_CACHE) {
3559		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3560		goto out;
3561	}
3562
3563	if (cfg->fc_type > RTN_MAX) {
3564		NL_SET_ERR_MSG(extack, "Invalid route type");
3565		goto out;
3566	}
 
 
 
 
 
 
 
 
 
 
 
 
 
3567
3568	if (cfg->fc_dst_len > 128) {
3569		NL_SET_ERR_MSG(extack, "Invalid prefix length");
3570		goto out;
3571	}
3572	if (cfg->fc_src_len > 128) {
3573		NL_SET_ERR_MSG(extack, "Invalid source address length");
3574		goto out;
3575	}
3576#ifndef CONFIG_IPV6_SUBTREES
3577	if (cfg->fc_src_len) {
3578		NL_SET_ERR_MSG(extack,
3579			       "Specifying source address requires IPV6_SUBTREES to be enabled");
3580		goto out;
3581	}
3582#endif
3583	if (cfg->fc_nh_id) {
3584		nh = nexthop_find_by_id(net, cfg->fc_nh_id);
3585		if (!nh) {
3586			NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
3587			goto out;
3588		}
3589		err = fib6_check_nexthop(nh, cfg, extack);
3590		if (err)
3591			goto out;
3592	}
3593
3594	err = -ENOBUFS;
3595	if (cfg->fc_nlinfo.nlh &&
3596	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3597		table = fib6_get_table(net, cfg->fc_table);
3598		if (!table) {
3599			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3600			table = fib6_new_table(net, cfg->fc_table);
3601		}
3602	} else {
3603		table = fib6_new_table(net, cfg->fc_table);
3604	}
3605
3606	if (!table)
3607		goto out;
3608
3609	err = -ENOMEM;
3610	rt = fib6_info_alloc(gfp_flags, !nh);
3611	if (!rt)
3612		goto out;
3613
3614	rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3615					       extack);
3616	if (IS_ERR(rt->fib6_metrics)) {
3617		err = PTR_ERR(rt->fib6_metrics);
3618		/* Do not leave garbage there. */
3619		rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3620		goto out;
3621	}
3622
3623	if (cfg->fc_flags & RTF_ADDRCONF)
3624		rt->dst_nocount = true;
3625
3626	if (cfg->fc_flags & RTF_EXPIRES)
3627		fib6_set_expires(rt, jiffies +
3628				clock_t_to_jiffies(cfg->fc_expires));
3629	else
3630		fib6_clean_expires(rt);
3631
3632	if (cfg->fc_protocol == RTPROT_UNSPEC)
3633		cfg->fc_protocol = RTPROT_BOOT;
3634	rt->fib6_protocol = cfg->fc_protocol;
3635
3636	rt->fib6_table = table;
3637	rt->fib6_metric = cfg->fc_metric;
3638	rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
3639	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
3640
3641	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3642	rt->fib6_dst.plen = cfg->fc_dst_len;
3643	if (rt->fib6_dst.plen == 128)
3644		rt->dst_host = true;
3645
3646#ifdef CONFIG_IPV6_SUBTREES
3647	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3648	rt->fib6_src.plen = cfg->fc_src_len;
3649#endif
3650	if (nh) {
3651		if (!nexthop_get(nh)) {
3652			NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
3653			goto out;
3654		}
3655		if (rt->fib6_src.plen) {
3656			NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
3657			goto out;
3658		}
3659		rt->nh = nh;
3660		fib6_nh = nexthop_fib6_nh(rt->nh);
3661	} else {
3662		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
3663		if (err)
3664			goto out;
3665
3666		fib6_nh = rt->fib6_nh;
3667
3668		/* We cannot add true routes via loopback here, they would
3669		 * result in kernel looping; promote them to reject routes
3670		 */
3671		addr_type = ipv6_addr_type(&cfg->fc_dst);
3672		if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
3673				   addr_type))
3674			rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3675	}
3676
3677	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3678		struct net_device *dev = fib6_nh->fib_nh_dev;
3679
3680		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3681			NL_SET_ERR_MSG(extack, "Invalid source address");
3682			err = -EINVAL;
3683			goto out;
3684		}
3685		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3686		rt->fib6_prefsrc.plen = 128;
3687	} else
3688		rt->fib6_prefsrc.plen = 0;
3689
3690	return rt;
3691out:
3692	fib6_info_release(rt);
3693	return ERR_PTR(err);
3694}
3695
3696int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3697		  struct netlink_ext_ack *extack)
3698{
3699	struct fib6_info *rt;
3700	int err;
3701
3702	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3703	if (IS_ERR(rt))
3704		return PTR_ERR(rt);
3705
3706	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3707	fib6_info_release(rt);
3708
3709	return err;
3710}
3711
3712static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3713{
3714	struct net *net = info->nl_net;
3715	struct fib6_table *table;
3716	int err;
3717
3718	if (rt == net->ipv6.fib6_null_entry) {
3719		err = -ENOENT;
3720		goto out;
3721	}
3722
3723	table = rt->fib6_table;
3724	spin_lock_bh(&table->tb6_lock);
3725	err = fib6_del(rt, info);
3726	spin_unlock_bh(&table->tb6_lock);
3727
3728out:
3729	fib6_info_release(rt);
3730	return err;
3731}
3732
3733int ip6_del_rt(struct net *net, struct fib6_info *rt)
3734{
3735	struct nl_info info = { .nl_net = net };
3736
3737	return __ip6_del_rt(rt, &info);
3738}
 
 
3739
3740static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3741{
3742	struct nl_info *info = &cfg->fc_nlinfo;
3743	struct net *net = info->nl_net;
3744	struct sk_buff *skb = NULL;
3745	struct fib6_table *table;
3746	int err = -ENOENT;
3747
3748	if (rt == net->ipv6.fib6_null_entry)
3749		goto out_put;
3750	table = rt->fib6_table;
3751	spin_lock_bh(&table->tb6_lock);
3752
3753	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3754		struct fib6_info *sibling, *next_sibling;
3755
3756		/* prefer to send a single notification with all hops */
3757		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3758		if (skb) {
3759			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3760
3761			if (rt6_fill_node(net, skb, rt, NULL,
3762					  NULL, NULL, 0, RTM_DELROUTE,
3763					  info->portid, seq, 0) < 0) {
3764				kfree_skb(skb);
3765				skb = NULL;
3766			} else
3767				info->skip_notify = 1;
3768		}
3769
3770		info->skip_notify_kernel = 1;
3771		call_fib6_multipath_entry_notifiers(net,
3772						    FIB_EVENT_ENTRY_DEL,
3773						    rt,
3774						    rt->fib6_nsiblings,
3775						    NULL);
3776		list_for_each_entry_safe(sibling, next_sibling,
3777					 &rt->fib6_siblings,
3778					 fib6_siblings) {
3779			err = fib6_del(sibling, info);
3780			if (err)
3781				goto out_unlock;
3782		}
3783	}
3784
3785	err = fib6_del(rt, info);
3786out_unlock:
3787	spin_unlock_bh(&table->tb6_lock);
3788out_put:
3789	fib6_info_release(rt);
3790
3791	if (skb) {
3792		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3793			    info->nlh, gfp_any());
3794	}
3795	return err;
3796}
3797
3798static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3799{
3800	int rc = -ESRCH;
3801
3802	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3803		goto out;
3804
3805	if (cfg->fc_flags & RTF_GATEWAY &&
3806	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3807		goto out;
3808
3809	rc = rt6_remove_exception_rt(rt);
3810out:
3811	return rc;
 
 
 
 
 
 
3812}
3813
3814static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
3815			     struct fib6_nh *nh)
3816{
3817	struct fib6_result res = {
3818		.f6i = rt,
3819		.nh = nh,
3820	};
3821	struct rt6_info *rt_cache;
3822
3823	rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
3824	if (rt_cache)
3825		return __ip6_del_cached_rt(rt_cache, cfg);
3826
3827	return 0;
3828}
3829
3830struct fib6_nh_del_cached_rt_arg {
3831	struct fib6_config *cfg;
3832	struct fib6_info *f6i;
3833};
3834
3835static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
3836{
3837	struct fib6_nh_del_cached_rt_arg *arg = _arg;
3838	int rc;
3839
3840	rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
3841	return rc != -ESRCH ? rc : 0;
3842}
3843
3844static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
3845{
3846	struct fib6_nh_del_cached_rt_arg arg = {
3847		.cfg = cfg,
3848		.f6i = f6i
3849	};
3850
3851	return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
3852}
3853
3854static int ip6_route_del(struct fib6_config *cfg,
3855			 struct netlink_ext_ack *extack)
3856{
3857	struct fib6_table *table;
3858	struct fib6_info *rt;
3859	struct fib6_node *fn;
 
3860	int err = -ESRCH;
3861
3862	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3863	if (!table) {
3864		NL_SET_ERR_MSG(extack, "FIB table does not exist");
3865		return err;
3866	}
3867
3868	rcu_read_lock();
3869
3870	fn = fib6_locate(&table->tb6_root,
3871			 &cfg->fc_dst, cfg->fc_dst_len,
3872			 &cfg->fc_src, cfg->fc_src_len,
3873			 !(cfg->fc_flags & RTF_CACHE));
3874
3875	if (fn) {
3876		for_each_fib6_node_rt_rcu(fn) {
3877			struct fib6_nh *nh;
3878
3879			if (rt->nh && cfg->fc_nh_id &&
3880			    rt->nh->id != cfg->fc_nh_id)
3881				continue;
3882
3883			if (cfg->fc_flags & RTF_CACHE) {
3884				int rc = 0;
3885
3886				if (rt->nh) {
3887					rc = ip6_del_cached_rt_nh(cfg, rt);
3888				} else if (cfg->fc_nh_id) {
3889					continue;
3890				} else {
3891					nh = rt->fib6_nh;
3892					rc = ip6_del_cached_rt(cfg, rt, nh);
3893				}
3894				if (rc != -ESRCH) {
3895					rcu_read_unlock();
3896					return rc;
3897				}
3898				continue;
3899			}
3900
3901			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3902				continue;
3903			if (cfg->fc_protocol &&
3904			    cfg->fc_protocol != rt->fib6_protocol)
3905				continue;
3906
3907			if (rt->nh) {
3908				if (!fib6_info_hold_safe(rt))
3909					continue;
3910				rcu_read_unlock();
3911
3912				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3913			}
3914			if (cfg->fc_nh_id)
3915				continue;
3916
3917			nh = rt->fib6_nh;
3918			if (cfg->fc_ifindex &&
3919			    (!nh->fib_nh_dev ||
3920			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
3921				continue;
3922			if (cfg->fc_flags & RTF_GATEWAY &&
3923			    !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
3924				continue;
3925			if (!fib6_info_hold_safe(rt))
3926				continue;
3927			rcu_read_unlock();
 
3928
3929			/* if gateway was specified only delete the one hop */
3930			if (cfg->fc_flags & RTF_GATEWAY)
3931				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3932
3933			return __ip6_del_rt_siblings(rt, cfg);
3934		}
3935	}
3936	rcu_read_unlock();
3937
3938	return err;
3939}
3940
3941static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
 
 
 
 
 
 
 
 
 
 
 
3942{
3943	struct netevent_redirect netevent;
3944	struct rt6_info *rt, *nrt = NULL;
3945	struct fib6_result res = {};
3946	struct ndisc_options ndopts;
3947	struct inet6_dev *in6_dev;
3948	struct neighbour *neigh;
3949	struct rd_msg *msg;
3950	int optlen, on_link;
3951	u8 *lladdr;
3952
3953	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
3954	optlen -= sizeof(*msg);
 
 
 
 
 
 
 
 
3955
3956	if (optlen < 0) {
3957		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3958		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3959	}
3960
3961	msg = (struct rd_msg *)icmp6_hdr(skb);
 
 
 
 
3962
3963	if (ipv6_addr_is_multicast(&msg->dest)) {
3964		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3965		return;
3966	}
3967
3968	on_link = 0;
3969	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3970		on_link = 1;
3971	} else if (ipv6_addr_type(&msg->target) !=
3972		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
3973		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3974		return;
3975	}
3976
3977	in6_dev = __in6_dev_get(skb->dev);
3978	if (!in6_dev)
3979		return;
3980	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3981		return;
 
 
 
 
 
 
 
 
 
3982
3983	/* RFC2461 8.1:
3984	 *	The IP source address of the Redirect MUST be the same as the current
3985	 *	first-hop router for the specified ICMP Destination Address.
3986	 */
3987
3988	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3989		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3990		return;
3991	}
3992
3993	lladdr = NULL;
3994	if (ndopts.nd_opts_tgt_lladdr) {
3995		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3996					     skb->dev);
3997		if (!lladdr) {
3998			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3999			return;
4000		}
4001	}
4002
4003	rt = (struct rt6_info *) dst;
4004	if (rt->rt6i_flags & RTF_REJECT) {
4005		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
4006		return;
4007	}
 
 
4008
4009	/* Redirect received -> path was valid.
4010	 * Look, redirects are sent only in response to data packets,
4011	 * so that this nexthop apparently is reachable. --ANK
4012	 */
4013	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
4014
4015	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
4016	if (!neigh)
4017		return;
 
 
 
4018
4019	/*
4020	 *	We have finally decided to accept it.
4021	 */
4022
4023	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
4024		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
4025		     NEIGH_UPDATE_F_OVERRIDE|
4026		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
4027				     NEIGH_UPDATE_F_ISROUTER)),
4028		     NDISC_REDIRECT, &ndopts);
4029
4030	rcu_read_lock();
4031	res.f6i = rcu_dereference(rt->from);
4032	if (!res.f6i)
4033		goto out;
4034
4035	if (res.f6i->nh) {
4036		struct fib6_nh_match_arg arg = {
4037			.dev = dst->dev,
4038			.gw = &rt->rt6i_gateway,
4039		};
4040
4041		nexthop_for_each_fib6_nh(res.f6i->nh,
4042					 fib6_nh_find_match, &arg);
4043
4044		/* fib6_info uses a nexthop that does not have fib6_nh
4045		 * using the dst->dev. Should be impossible
4046		 */
4047		if (!arg.match)
4048			goto out;
4049		res.nh = arg.match;
4050	} else {
4051		res.nh = res.f6i->fib6_nh;
4052	}
4053
4054	res.fib6_flags = res.f6i->fib6_flags;
4055	res.fib6_type = res.f6i->fib6_type;
4056	nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
4057	if (!nrt)
4058		goto out;
4059
4060	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
4061	if (on_link)
4062		nrt->rt6i_flags &= ~RTF_GATEWAY;
4063
4064	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 
4065
4066	/* rt6_insert_exception() will take care of duplicated exceptions */
4067	if (rt6_insert_exception(nrt, &res)) {
4068		dst_release_immediate(&nrt->dst);
4069		goto out;
4070	}
4071
4072	netevent.old = &rt->dst;
4073	netevent.new = &nrt->dst;
4074	netevent.daddr = &msg->dest;
4075	netevent.neigh = neigh;
4076	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
4077
 
 
 
 
 
4078out:
4079	rcu_read_unlock();
4080	neigh_release(neigh);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4081}
4082
4083#ifdef CONFIG_IPV6_ROUTE_INFO
4084static struct fib6_info *rt6_get_route_info(struct net *net,
4085					   const struct in6_addr *prefix, int prefixlen,
4086					   const struct in6_addr *gwaddr,
4087					   struct net_device *dev)
4088{
4089	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
4090	int ifindex = dev->ifindex;
4091	struct fib6_node *fn;
4092	struct fib6_info *rt = NULL;
4093	struct fib6_table *table;
4094
4095	table = fib6_get_table(net, tb_id);
4096	if (!table)
4097		return NULL;
4098
4099	rcu_read_lock();
4100	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
4101	if (!fn)
4102		goto out;
4103
4104	for_each_fib6_node_rt_rcu(fn) {
4105		/* these routes do not use nexthops */
4106		if (rt->nh)
4107			continue;
4108		if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
4109			continue;
4110		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
4111		    !rt->fib6_nh->fib_nh_gw_family)
4112			continue;
4113		if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
4114			continue;
4115		if (!fib6_info_hold_safe(rt))
4116			continue;
 
4117		break;
4118	}
4119out:
4120	rcu_read_unlock();
4121	return rt;
4122}
4123
4124static struct fib6_info *rt6_add_route_info(struct net *net,
4125					   const struct in6_addr *prefix, int prefixlen,
4126					   const struct in6_addr *gwaddr,
4127					   struct net_device *dev,
4128					   unsigned int pref)
4129{
4130	struct fib6_config cfg = {
 
4131		.fc_metric	= IP6_RT_PRIO_USER,
4132		.fc_ifindex	= dev->ifindex,
4133		.fc_dst_len	= prefixlen,
4134		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
4135				  RTF_UP | RTF_PREF(pref),
4136		.fc_protocol = RTPROT_RA,
4137		.fc_type = RTN_UNICAST,
4138		.fc_nlinfo.portid = 0,
4139		.fc_nlinfo.nlh = NULL,
4140		.fc_nlinfo.nl_net = net,
4141	};
4142
4143	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4144	cfg.fc_dst = *prefix;
4145	cfg.fc_gateway = *gwaddr;
4146
4147	/* We should treat it as a default route if prefix length is 0. */
4148	if (!prefixlen)
4149		cfg.fc_flags |= RTF_DEFAULT;
4150
4151	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
4152
4153	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
4154}
4155#endif
4156
4157struct fib6_info *rt6_get_dflt_router(struct net *net,
4158				     const struct in6_addr *addr,
4159				     struct net_device *dev)
4160{
4161	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
4162	struct fib6_info *rt;
4163	struct fib6_table *table;
4164
4165	table = fib6_get_table(net, tb_id);
4166	if (!table)
4167		return NULL;
4168
4169	rcu_read_lock();
4170	for_each_fib6_node_rt_rcu(&table->tb6_root) {
4171		struct fib6_nh *nh;
4172
4173		/* RA routes do not use nexthops */
4174		if (rt->nh)
4175			continue;
4176
4177		nh = rt->fib6_nh;
4178		if (dev == nh->fib_nh_dev &&
4179		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
4180		    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
4181			break;
4182	}
4183	if (rt && !fib6_info_hold_safe(rt))
4184		rt = NULL;
4185	rcu_read_unlock();
4186	return rt;
4187}
4188
4189struct fib6_info *rt6_add_dflt_router(struct net *net,
4190				     const struct in6_addr *gwaddr,
4191				     struct net_device *dev,
4192				     unsigned int pref)
4193{
4194	struct fib6_config cfg = {
4195		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
4196		.fc_metric	= IP6_RT_PRIO_USER,
4197		.fc_ifindex	= dev->ifindex,
4198		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
4199				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
4200		.fc_protocol = RTPROT_RA,
4201		.fc_type = RTN_UNICAST,
4202		.fc_nlinfo.portid = 0,
4203		.fc_nlinfo.nlh = NULL,
4204		.fc_nlinfo.nl_net = net,
4205	};
4206
4207	cfg.fc_gateway = *gwaddr;
4208
4209	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
4210		struct fib6_table *table;
4211
4212		table = fib6_get_table(dev_net(dev), cfg.fc_table);
4213		if (table)
4214			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
4215	}
4216
4217	return rt6_get_dflt_router(net, gwaddr, dev);
4218}
4219
4220static void __rt6_purge_dflt_routers(struct net *net,
4221				     struct fib6_table *table)
4222{
4223	struct fib6_info *rt;
4224
4225restart:
4226	rcu_read_lock();
4227	for_each_fib6_node_rt_rcu(&table->tb6_root) {
4228		struct net_device *dev = fib6_info_nh_dev(rt);
4229		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
4230
4231		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
4232		    (!idev || idev->cnf.accept_ra != 2) &&
4233		    fib6_info_hold_safe(rt)) {
4234			rcu_read_unlock();
4235			ip6_del_rt(net, rt);
4236			goto restart;
4237		}
4238	}
4239	rcu_read_unlock();
4240
4241	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
4242}
4243
4244void rt6_purge_dflt_routers(struct net *net)
4245{
 
4246	struct fib6_table *table;
4247	struct hlist_head *head;
4248	unsigned int h;
4249
4250	rcu_read_lock();
 
 
 
4251
4252	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
4253		head = &net->ipv6.fib_table_hash[h];
4254		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
4255			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
4256				__rt6_purge_dflt_routers(net, table);
 
 
 
4257		}
4258	}
4259
4260	rcu_read_unlock();
4261}
4262
4263static void rtmsg_to_fib6_config(struct net *net,
4264				 struct in6_rtmsg *rtmsg,
4265				 struct fib6_config *cfg)
4266{
4267	*cfg = (struct fib6_config){
4268		.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
4269			 : RT6_TABLE_MAIN,
4270		.fc_ifindex = rtmsg->rtmsg_ifindex,
4271		.fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
4272		.fc_expires = rtmsg->rtmsg_info,
4273		.fc_dst_len = rtmsg->rtmsg_dst_len,
4274		.fc_src_len = rtmsg->rtmsg_src_len,
4275		.fc_flags = rtmsg->rtmsg_flags,
4276		.fc_type = rtmsg->rtmsg_type,
4277
4278		.fc_nlinfo.nl_net = net,
4279
4280		.fc_dst = rtmsg->rtmsg_dst,
4281		.fc_src = rtmsg->rtmsg_src,
4282		.fc_gateway = rtmsg->rtmsg_gateway,
4283	};
 
 
 
 
 
 
 
4284}
4285
4286int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4287{
4288	struct fib6_config cfg;
4289	struct in6_rtmsg rtmsg;
4290	int err;
4291
4292	switch (cmd) {
4293	case SIOCADDRT:		/* Add a route */
4294	case SIOCDELRT:		/* Delete a route */
4295		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
4296			return -EPERM;
4297		err = copy_from_user(&rtmsg, arg,
4298				     sizeof(struct in6_rtmsg));
4299		if (err)
4300			return -EFAULT;
4301
4302		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
4303
4304		rtnl_lock();
4305		switch (cmd) {
4306		case SIOCADDRT:
4307			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
4308			break;
4309		case SIOCDELRT:
4310			err = ip6_route_del(&cfg, NULL);
4311			break;
4312		default:
4313			err = -EINVAL;
4314		}
4315		rtnl_unlock();
4316
4317		return err;
4318	}
4319
4320	return -EINVAL;
4321}
4322
4323/*
4324 *	Drop the packet on the floor
4325 */
4326
4327static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
4328{
4329	struct dst_entry *dst = skb_dst(skb);
4330	struct net *net = dev_net(dst->dev);
4331	struct inet6_dev *idev;
4332	int type;
4333
4334	if (netif_is_l3_master(skb->dev) &&
4335	    dst->dev == net->loopback_dev)
4336		idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
4337	else
4338		idev = ip6_dst_idev(dst);
4339
4340	switch (ipstats_mib_noroutes) {
4341	case IPSTATS_MIB_INNOROUTES:
4342		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
4343		if (type == IPV6_ADDR_ANY) {
4344			IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 
4345			break;
4346		}
4347		/* FALLTHROUGH */
4348	case IPSTATS_MIB_OUTNOROUTES:
4349		IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
 
4350		break;
4351	}
4352
4353	/* Start over by dropping the dst for l3mdev case */
4354	if (netif_is_l3_master(skb->dev))
4355		skb_dst_drop(skb);
4356
4357	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
4358	kfree_skb(skb);
4359	return 0;
4360}
4361
4362static int ip6_pkt_discard(struct sk_buff *skb)
4363{
4364	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
4365}
4366
4367static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4368{
4369	skb->dev = skb_dst(skb)->dev;
4370	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
4371}
4372
 
 
4373static int ip6_pkt_prohibit(struct sk_buff *skb)
4374{
4375	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
4376}
4377
4378static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4379{
4380	skb->dev = skb_dst(skb)->dev;
4381	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
4382}
4383
 
 
4384/*
4385 *	Allocate a dst for local (unicast / anycast) address.
4386 */
4387
4388struct fib6_info *addrconf_f6i_alloc(struct net *net,
4389				     struct inet6_dev *idev,
4390				     const struct in6_addr *addr,
4391				     bool anycast, gfp_t gfp_flags)
4392{
4393	struct fib6_config cfg = {
4394		.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
4395		.fc_ifindex = idev->dev->ifindex,
4396		.fc_flags = RTF_UP | RTF_NONEXTHOP,
4397		.fc_dst = *addr,
4398		.fc_dst_len = 128,
4399		.fc_protocol = RTPROT_KERNEL,
4400		.fc_nlinfo.nl_net = net,
4401		.fc_ignore_dev_down = true,
4402	};
4403	struct fib6_info *f6i;
4404
4405	if (anycast) {
4406		cfg.fc_type = RTN_ANYCAST;
4407		cfg.fc_flags |= RTF_ANYCAST;
4408	} else {
4409		cfg.fc_type = RTN_LOCAL;
4410		cfg.fc_flags |= RTF_LOCAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4411	}
 
 
 
 
 
 
 
4412
4413	f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
4414	if (!IS_ERR(f6i))
4415		f6i->dst_nocount = true;
4416	return f6i;
 
 
 
 
 
 
 
 
 
 
 
 
 
4417}
4418
4419/* remove deleted ip from prefsrc entries */
4420struct arg_dev_net_ip {
4421	struct net_device *dev;
4422	struct net *net;
4423	struct in6_addr *addr;
4424};
4425
4426static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
4427{
4428	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
4429	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
4430	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
4431
4432	if (!rt->nh &&
4433	    ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
4434	    rt != net->ipv6.fib6_null_entry &&
4435	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
4436		spin_lock_bh(&rt6_exception_lock);
4437		/* remove prefsrc entry */
4438		rt->fib6_prefsrc.plen = 0;
4439		spin_unlock_bh(&rt6_exception_lock);
4440	}
4441	return 0;
4442}
4443
4444void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
4445{
4446	struct net *net = dev_net(ifp->idev->dev);
4447	struct arg_dev_net_ip adni = {
4448		.dev = ifp->idev->dev,
4449		.net = net,
4450		.addr = &ifp->addr,
4451	};
4452	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
4453}
4454
4455#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT)
4456
4457/* Remove routers and update dst entries when gateway turn into host. */
4458static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
4459{
4460	struct in6_addr *gateway = (struct in6_addr *)arg;
4461	struct fib6_nh *nh;
4462
4463	/* RA routes do not use nexthops */
4464	if (rt->nh)
4465		return 0;
4466
4467	nh = rt->fib6_nh;
4468	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
4469	    nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
4470		return -1;
4471
4472	/* Further clean up cached routes in exception table.
4473	 * This is needed because cached route may have a different
4474	 * gateway than its 'parent' in the case of an ip redirect.
4475	 */
4476	fib6_nh_exceptions_clean_tohost(nh, gateway);
4477
4478	return 0;
4479}
4480
4481void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
4482{
4483	fib6_clean_all(net, fib6_clean_tohost, gateway);
4484}
4485
4486struct arg_netdev_event {
4487	const struct net_device *dev;
4488	union {
4489		unsigned char nh_flags;
4490		unsigned long event;
4491	};
4492};
4493
4494static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
4495{
4496	struct fib6_info *iter;
4497	struct fib6_node *fn;
4498
4499	fn = rcu_dereference_protected(rt->fib6_node,
4500			lockdep_is_held(&rt->fib6_table->tb6_lock));
4501	iter = rcu_dereference_protected(fn->leaf,
4502			lockdep_is_held(&rt->fib6_table->tb6_lock));
4503	while (iter) {
4504		if (iter->fib6_metric == rt->fib6_metric &&
4505		    rt6_qualify_for_ecmp(iter))
4506			return iter;
4507		iter = rcu_dereference_protected(iter->fib6_next,
4508				lockdep_is_held(&rt->fib6_table->tb6_lock));
4509	}
4510
4511	return NULL;
4512}
4513
4514/* only called for fib entries with builtin fib6_nh */
4515static bool rt6_is_dead(const struct fib6_info *rt)
4516{
4517	if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
4518	    (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
4519	     ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
4520		return true;
4521
4522	return false;
4523}
4524
4525static int rt6_multipath_total_weight(const struct fib6_info *rt)
4526{
4527	struct fib6_info *iter;
4528	int total = 0;
4529
4530	if (!rt6_is_dead(rt))
4531		total += rt->fib6_nh->fib_nh_weight;
4532
4533	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
4534		if (!rt6_is_dead(iter))
4535			total += iter->fib6_nh->fib_nh_weight;
4536	}
4537
4538	return total;
4539}
4540
4541static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
4542{
4543	int upper_bound = -1;
4544
4545	if (!rt6_is_dead(rt)) {
4546		*weight += rt->fib6_nh->fib_nh_weight;
4547		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
4548						    total) - 1;
4549	}
4550	atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
4551}
4552
4553static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
4554{
4555	struct fib6_info *iter;
4556	int weight = 0;
4557
4558	rt6_upper_bound_set(rt, &weight, total);
4559
4560	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4561		rt6_upper_bound_set(iter, &weight, total);
4562}
4563
4564void rt6_multipath_rebalance(struct fib6_info *rt)
4565{
4566	struct fib6_info *first;
4567	int total;
4568
4569	/* In case the entire multipath route was marked for flushing,
4570	 * then there is no need to rebalance upon the removal of every
4571	 * sibling route.
4572	 */
4573	if (!rt->fib6_nsiblings || rt->should_flush)
4574		return;
4575
4576	/* During lookup routes are evaluated in order, so we need to
4577	 * make sure upper bounds are assigned from the first sibling
4578	 * onwards.
4579	 */
4580	first = rt6_multipath_first_sibling(rt);
4581	if (WARN_ON_ONCE(!first))
4582		return;
4583
4584	total = rt6_multipath_total_weight(first);
4585	rt6_multipath_upper_bound_set(first, total);
4586}
4587
4588static int fib6_ifup(struct fib6_info *rt, void *p_arg)
4589{
4590	const struct arg_netdev_event *arg = p_arg;
4591	struct net *net = dev_net(arg->dev);
4592
4593	if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
4594	    rt->fib6_nh->fib_nh_dev == arg->dev) {
4595		rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
4596		fib6_update_sernum_upto_root(net, rt);
4597		rt6_multipath_rebalance(rt);
4598	}
4599
4600	return 0;
4601}
4602
4603void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
4604{
4605	struct arg_netdev_event arg = {
4606		.dev = dev,
4607		{
4608			.nh_flags = nh_flags,
4609		},
4610	};
4611
4612	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4613		arg.nh_flags |= RTNH_F_LINKDOWN;
4614
4615	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4616}
4617
4618/* only called for fib entries with inline fib6_nh */
4619static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
4620				   const struct net_device *dev)
4621{
4622	struct fib6_info *iter;
4623
4624	if (rt->fib6_nh->fib_nh_dev == dev)
4625		return true;
4626	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4627		if (iter->fib6_nh->fib_nh_dev == dev)
4628			return true;
4629
4630	return false;
4631}
4632
4633static void rt6_multipath_flush(struct fib6_info *rt)
4634{
4635	struct fib6_info *iter;
4636
4637	rt->should_flush = 1;
4638	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4639		iter->should_flush = 1;
4640}
4641
4642static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
4643					     const struct net_device *down_dev)
4644{
4645	struct fib6_info *iter;
4646	unsigned int dead = 0;
4647
4648	if (rt->fib6_nh->fib_nh_dev == down_dev ||
4649	    rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4650		dead++;
4651	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4652		if (iter->fib6_nh->fib_nh_dev == down_dev ||
4653		    iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4654			dead++;
4655
4656	return dead;
4657}
4658
4659static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
4660				       const struct net_device *dev,
4661				       unsigned char nh_flags)
4662{
4663	struct fib6_info *iter;
4664
4665	if (rt->fib6_nh->fib_nh_dev == dev)
4666		rt->fib6_nh->fib_nh_flags |= nh_flags;
4667	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4668		if (iter->fib6_nh->fib_nh_dev == dev)
4669			iter->fib6_nh->fib_nh_flags |= nh_flags;
4670}
4671
4672/* called with write lock held for table with rt */
4673static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
4674{
4675	const struct arg_netdev_event *arg = p_arg;
4676	const struct net_device *dev = arg->dev;
4677	struct net *net = dev_net(dev);
4678
4679	if (rt == net->ipv6.fib6_null_entry || rt->nh)
4680		return 0;
4681
4682	switch (arg->event) {
4683	case NETDEV_UNREGISTER:
4684		return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4685	case NETDEV_DOWN:
4686		if (rt->should_flush)
4687			return -1;
4688		if (!rt->fib6_nsiblings)
4689			return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4690		if (rt6_multipath_uses_dev(rt, dev)) {
4691			unsigned int count;
4692
4693			count = rt6_multipath_dead_count(rt, dev);
4694			if (rt->fib6_nsiblings + 1 == count) {
4695				rt6_multipath_flush(rt);
4696				return -1;
4697			}
4698			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4699						   RTNH_F_LINKDOWN);
4700			fib6_update_sernum(net, rt);
4701			rt6_multipath_rebalance(rt);
4702		}
4703		return -2;
4704	case NETDEV_CHANGE:
4705		if (rt->fib6_nh->fib_nh_dev != dev ||
4706		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4707			break;
4708		rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
4709		rt6_multipath_rebalance(rt);
4710		break;
4711	}
4712
4713	return 0;
4714}
4715
4716void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4717{
4718	struct arg_netdev_event arg = {
4719		.dev = dev,
4720		{
4721			.event = event,
4722		},
4723	};
4724	struct net *net = dev_net(dev);
4725
4726	if (net->ipv6.sysctl.skip_notify_on_dev_down)
4727		fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4728	else
4729		fib6_clean_all(net, fib6_ifdown, &arg);
4730}
4731
4732void rt6_disable_ip(struct net_device *dev, unsigned long event)
4733{
4734	rt6_sync_down_dev(dev, event);
4735	rt6_uncached_list_flush_dev(dev_net(dev), dev);
4736	neigh_ifdown(&nd_tbl, dev);
4737}
4738
4739struct rt6_mtu_change_arg {
4740	struct net_device *dev;
4741	unsigned int mtu;
4742	struct fib6_info *f6i;
4743};
4744
4745static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
4746{
4747	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
4748	struct fib6_info *f6i = arg->f6i;
4749
4750	/* For administrative MTU increase, there is no way to discover
4751	 * IPv6 PMTU increase, so PMTU increase should be updated here.
4752	 * Since RFC 1981 doesn't include administrative MTU increase
4753	 * update PMTU increase is a MUST. (i.e. jumbo frame)
4754	 */
4755	if (nh->fib_nh_dev == arg->dev) {
4756		struct inet6_dev *idev = __in6_dev_get(arg->dev);
4757		u32 mtu = f6i->fib6_pmtu;
4758
4759		if (mtu >= arg->mtu ||
4760		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4761			fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
4762
4763		spin_lock_bh(&rt6_exception_lock);
4764		rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
4765		spin_unlock_bh(&rt6_exception_lock);
4766	}
4767
4768	return 0;
4769}
4770
4771static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
4772{
4773	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4774	struct inet6_dev *idev;
4775
4776	/* In IPv6 pmtu discovery is not optional,
4777	   so that RTAX_MTU lock cannot disable it.
4778	   We still use this lock to block changes
4779	   caused by addrconf/ndisc.
4780	*/
4781
4782	idev = __in6_dev_get(arg->dev);
4783	if (!idev)
4784		return 0;
4785
4786	if (fib6_metric_locked(f6i, RTAX_MTU))
4787		return 0;
4788
4789	arg->f6i = f6i;
4790	if (f6i->nh) {
4791		/* fib6_nh_mtu_change only returns 0, so this is safe */
4792		return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
4793						arg);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4794	}
4795
4796	return fib6_nh_mtu_change(f6i->fib6_nh, arg);
4797}
4798
4799void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4800{
4801	struct rt6_mtu_change_arg arg = {
4802		.dev = dev,
4803		.mtu = mtu,
4804	};
4805
4806	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4807}
4808
4809static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4810	[RTA_UNSPEC]		= { .strict_start_type = RTA_DPORT + 1 },
4811	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4812	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
4813	[RTA_OIF]               = { .type = NLA_U32 },
4814	[RTA_IIF]		= { .type = NLA_U32 },
4815	[RTA_PRIORITY]          = { .type = NLA_U32 },
4816	[RTA_METRICS]           = { .type = NLA_NESTED },
4817	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4818	[RTA_PREF]              = { .type = NLA_U8 },
4819	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
4820	[RTA_ENCAP]		= { .type = NLA_NESTED },
4821	[RTA_EXPIRES]		= { .type = NLA_U32 },
4822	[RTA_UID]		= { .type = NLA_U32 },
4823	[RTA_MARK]		= { .type = NLA_U32 },
4824	[RTA_TABLE]		= { .type = NLA_U32 },
4825	[RTA_IP_PROTO]		= { .type = NLA_U8 },
4826	[RTA_SPORT]		= { .type = NLA_U16 },
4827	[RTA_DPORT]		= { .type = NLA_U16 },
4828	[RTA_NH_ID]		= { .type = NLA_U32 },
4829};
4830
4831static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4832			      struct fib6_config *cfg,
4833			      struct netlink_ext_ack *extack)
4834{
4835	struct rtmsg *rtm;
4836	struct nlattr *tb[RTA_MAX+1];
4837	unsigned int pref;
4838	int err;
4839
4840	err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
4841				     rtm_ipv6_policy, extack);
4842	if (err < 0)
4843		goto errout;
4844
4845	err = -EINVAL;
4846	rtm = nlmsg_data(nlh);
 
4847
4848	*cfg = (struct fib6_config){
4849		.fc_table = rtm->rtm_table,
4850		.fc_dst_len = rtm->rtm_dst_len,
4851		.fc_src_len = rtm->rtm_src_len,
4852		.fc_flags = RTF_UP,
4853		.fc_protocol = rtm->rtm_protocol,
4854		.fc_type = rtm->rtm_type,
4855
4856		.fc_nlinfo.portid = NETLINK_CB(skb).portid,
4857		.fc_nlinfo.nlh = nlh,
4858		.fc_nlinfo.nl_net = sock_net(skb->sk),
4859	};
4860
4861	if (rtm->rtm_type == RTN_UNREACHABLE ||
4862	    rtm->rtm_type == RTN_BLACKHOLE ||
4863	    rtm->rtm_type == RTN_PROHIBIT ||
4864	    rtm->rtm_type == RTN_THROW)
4865		cfg->fc_flags |= RTF_REJECT;
4866
4867	if (rtm->rtm_type == RTN_LOCAL)
4868		cfg->fc_flags |= RTF_LOCAL;
4869
4870	if (rtm->rtm_flags & RTM_F_CLONED)
4871		cfg->fc_flags |= RTF_CACHE;
4872
4873	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4874
4875	if (tb[RTA_NH_ID]) {
4876		if (tb[RTA_GATEWAY]   || tb[RTA_OIF] ||
4877		    tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
4878			NL_SET_ERR_MSG(extack,
4879				       "Nexthop specification and nexthop id are mutually exclusive");
4880			goto errout;
4881		}
4882		cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
4883	}
4884
4885	if (tb[RTA_GATEWAY]) {
4886		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4887		cfg->fc_flags |= RTF_GATEWAY;
4888	}
4889	if (tb[RTA_VIA]) {
4890		NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4891		goto errout;
4892	}
4893
4894	if (tb[RTA_DST]) {
4895		int plen = (rtm->rtm_dst_len + 7) >> 3;
4896
4897		if (nla_len(tb[RTA_DST]) < plen)
4898			goto errout;
4899
4900		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4901	}
4902
4903	if (tb[RTA_SRC]) {
4904		int plen = (rtm->rtm_src_len + 7) >> 3;
4905
4906		if (nla_len(tb[RTA_SRC]) < plen)
4907			goto errout;
4908
4909		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4910	}
4911
4912	if (tb[RTA_PREFSRC])
4913		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4914
4915	if (tb[RTA_OIF])
4916		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4917
4918	if (tb[RTA_PRIORITY])
4919		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4920
4921	if (tb[RTA_METRICS]) {
4922		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4923		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4924	}
4925
4926	if (tb[RTA_TABLE])
4927		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4928
4929	if (tb[RTA_MULTIPATH]) {
4930		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4931		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4932
4933		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4934						     cfg->fc_mp_len, extack);
4935		if (err < 0)
4936			goto errout;
4937	}
4938
4939	if (tb[RTA_PREF]) {
4940		pref = nla_get_u8(tb[RTA_PREF]);
4941		if (pref != ICMPV6_ROUTER_PREF_LOW &&
4942		    pref != ICMPV6_ROUTER_PREF_HIGH)
4943			pref = ICMPV6_ROUTER_PREF_MEDIUM;
4944		cfg->fc_flags |= RTF_PREF(pref);
4945	}
4946
4947	if (tb[RTA_ENCAP])
4948		cfg->fc_encap = tb[RTA_ENCAP];
4949
4950	if (tb[RTA_ENCAP_TYPE]) {
4951		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4952
4953		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
4954		if (err < 0)
4955			goto errout;
4956	}
4957
4958	if (tb[RTA_EXPIRES]) {
4959		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4960
4961		if (addrconf_finite_timeout(timeout)) {
4962			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4963			cfg->fc_flags |= RTF_EXPIRES;
4964		}
4965	}
4966
4967	err = 0;
4968errout:
4969	return err;
4970}
4971
4972struct rt6_nh {
4973	struct fib6_info *fib6_info;
4974	struct fib6_config r_cfg;
4975	struct list_head next;
4976};
4977
4978static int ip6_route_info_append(struct net *net,
4979				 struct list_head *rt6_nh_list,
4980				 struct fib6_info *rt,
4981				 struct fib6_config *r_cfg)
4982{
4983	struct rt6_nh *nh;
4984	int err = -EEXIST;
4985
4986	list_for_each_entry(nh, rt6_nh_list, next) {
4987		/* check if fib6_info already exists */
4988		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
4989			return err;
4990	}
4991
4992	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4993	if (!nh)
4994		return -ENOMEM;
4995	nh->fib6_info = rt;
4996	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4997	list_add_tail(&nh->next, rt6_nh_list);
4998
4999	return 0;
5000}
5001
5002static void ip6_route_mpath_notify(struct fib6_info *rt,
5003				   struct fib6_info *rt_last,
5004				   struct nl_info *info,
5005				   __u16 nlflags)
5006{
5007	/* if this is an APPEND route, then rt points to the first route
5008	 * inserted and rt_last points to last route inserted. Userspace
5009	 * wants a consistent dump of the route which starts at the first
5010	 * nexthop. Since sibling routes are always added at the end of
5011	 * the list, find the first sibling of the last route appended
5012	 */
5013	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
5014		rt = list_first_entry(&rt_last->fib6_siblings,
5015				      struct fib6_info,
5016				      fib6_siblings);
5017	}
5018
5019	if (rt)
5020		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
5021}
5022
5023static int ip6_route_multipath_add(struct fib6_config *cfg,
5024				   struct netlink_ext_ack *extack)
5025{
5026	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
5027	struct nl_info *info = &cfg->fc_nlinfo;
5028	enum fib_event_type event_type;
5029	struct fib6_config r_cfg;
5030	struct rtnexthop *rtnh;
5031	struct fib6_info *rt;
5032	struct rt6_nh *err_nh;
5033	struct rt6_nh *nh, *nh_safe;
5034	__u16 nlflags;
5035	int remaining;
5036	int attrlen;
5037	int err = 1;
5038	int nhn = 0;
5039	int replace = (cfg->fc_nlinfo.nlh &&
5040		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
5041	LIST_HEAD(rt6_nh_list);
5042
5043	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
5044	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
5045		nlflags |= NLM_F_APPEND;
5046
5047	remaining = cfg->fc_mp_len;
5048	rtnh = (struct rtnexthop *)cfg->fc_mp;
5049
5050	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
5051	 * fib6_info structs per nexthop
5052	 */
5053	while (rtnh_ok(rtnh, remaining)) {
5054		memcpy(&r_cfg, cfg, sizeof(*cfg));
5055		if (rtnh->rtnh_ifindex)
5056			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5057
5058		attrlen = rtnh_attrlen(rtnh);
5059		if (attrlen > 0) {
5060			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5061
5062			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5063			if (nla) {
5064				r_cfg.fc_gateway = nla_get_in6_addr(nla);
5065				r_cfg.fc_flags |= RTF_GATEWAY;
5066			}
5067			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
5068			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
5069			if (nla)
5070				r_cfg.fc_encap_type = nla_get_u16(nla);
5071		}
5072
5073		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
5074		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
5075		if (IS_ERR(rt)) {
5076			err = PTR_ERR(rt);
5077			rt = NULL;
5078			goto cleanup;
5079		}
5080		if (!rt6_qualify_for_ecmp(rt)) {
5081			err = -EINVAL;
5082			NL_SET_ERR_MSG(extack,
5083				       "Device only routes can not be added for IPv6 using the multipath API.");
5084			fib6_info_release(rt);
5085			goto cleanup;
5086		}
5087
5088		rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
5089
5090		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
5091					    rt, &r_cfg);
5092		if (err) {
5093			fib6_info_release(rt);
5094			goto cleanup;
5095		}
5096
5097		rtnh = rtnh_next(rtnh, &remaining);
5098	}
5099
5100	if (list_empty(&rt6_nh_list)) {
5101		NL_SET_ERR_MSG(extack,
5102			       "Invalid nexthop configuration - no valid nexthops");
5103		return -EINVAL;
5104	}
5105
5106	/* for add and replace send one notification with all nexthops.
5107	 * Skip the notification in fib6_add_rt2node and send one with
5108	 * the full route when done
5109	 */
5110	info->skip_notify = 1;
5111
5112	/* For add and replace, send one notification with all nexthops. For
5113	 * append, send one notification with all appended nexthops.
5114	 */
5115	info->skip_notify_kernel = 1;
5116
5117	err_nh = NULL;
5118	list_for_each_entry(nh, &rt6_nh_list, next) {
5119		err = __ip6_ins_rt(nh->fib6_info, info, extack);
5120		fib6_info_release(nh->fib6_info);
5121
5122		if (!err) {
5123			/* save reference to last route successfully inserted */
5124			rt_last = nh->fib6_info;
5125
5126			/* save reference to first route for notification */
5127			if (!rt_notif)
5128				rt_notif = nh->fib6_info;
5129		}
5130
5131		/* nh->fib6_info is used or freed at this point, reset to NULL*/
5132		nh->fib6_info = NULL;
5133		if (err) {
5134			if (replace && nhn)
5135				NL_SET_ERR_MSG_MOD(extack,
5136						   "multipath route replace failed (check consistency of installed routes)");
5137			err_nh = nh;
5138			goto add_errout;
5139		}
5140
5141		/* Because each route is added like a single route we remove
5142		 * these flags after the first nexthop: if there is a collision,
5143		 * we have already failed to add the first nexthop:
5144		 * fib6_add_rt2node() has rejected it; when replacing, old
5145		 * nexthops have been replaced by first new, the rest should
5146		 * be added to it.
5147		 */
5148		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
5149						     NLM_F_REPLACE);
5150		nhn++;
5151	}
5152
5153	event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD;
5154	err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type,
5155						  rt_notif, nhn - 1, extack);
5156	if (err) {
5157		/* Delete all the siblings that were just added */
5158		err_nh = NULL;
5159		goto add_errout;
5160	}
5161
5162	/* success ... tell user about new route */
5163	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5164	goto cleanup;
5165
5166add_errout:
5167	/* send notification for routes that were added so that
5168	 * the delete notifications sent by ip6_route_del are
5169	 * coherent
5170	 */
5171	if (rt_notif)
5172		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5173
5174	/* Delete routes that were already added */
5175	list_for_each_entry(nh, &rt6_nh_list, next) {
5176		if (err_nh == nh)
5177			break;
5178		ip6_route_del(&nh->r_cfg, extack);
5179	}
5180
5181cleanup:
5182	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
5183		if (nh->fib6_info)
5184			fib6_info_release(nh->fib6_info);
5185		list_del(&nh->next);
5186		kfree(nh);
5187	}
5188
5189	return err;
5190}
5191
5192static int ip6_route_multipath_del(struct fib6_config *cfg,
5193				   struct netlink_ext_ack *extack)
5194{
5195	struct fib6_config r_cfg;
5196	struct rtnexthop *rtnh;
5197	int remaining;
5198	int attrlen;
5199	int err = 1, last_err = 0;
5200
5201	remaining = cfg->fc_mp_len;
5202	rtnh = (struct rtnexthop *)cfg->fc_mp;
5203
5204	/* Parse a Multipath Entry */
5205	while (rtnh_ok(rtnh, remaining)) {
5206		memcpy(&r_cfg, cfg, sizeof(*cfg));
5207		if (rtnh->rtnh_ifindex)
5208			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5209
5210		attrlen = rtnh_attrlen(rtnh);
5211		if (attrlen > 0) {
5212			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5213
5214			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5215			if (nla) {
5216				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
5217				r_cfg.fc_flags |= RTF_GATEWAY;
5218			}
5219		}
5220		err = ip6_route_del(&r_cfg, extack);
5221		if (err)
5222			last_err = err;
5223
5224		rtnh = rtnh_next(rtnh, &remaining);
5225	}
5226
5227	return last_err;
5228}
5229
5230static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5231			      struct netlink_ext_ack *extack)
5232{
5233	struct fib6_config cfg;
5234	int err;
5235
5236	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5237	if (err < 0)
5238		return err;
5239
5240	if (cfg.fc_nh_id &&
5241	    !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
5242		NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
5243		return -EINVAL;
5244	}
5245
5246	if (cfg.fc_mp)
5247		return ip6_route_multipath_del(&cfg, extack);
5248	else {
5249		cfg.fc_delete_all_nh = 1;
5250		return ip6_route_del(&cfg, extack);
5251	}
5252}
5253
5254static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5255			      struct netlink_ext_ack *extack)
5256{
5257	struct fib6_config cfg;
5258	int err;
5259
5260	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5261	if (err < 0)
5262		return err;
5263
5264	if (cfg.fc_metric == 0)
5265		cfg.fc_metric = IP6_RT_PRIO_USER;
5266
5267	if (cfg.fc_mp)
5268		return ip6_route_multipath_add(&cfg, extack);
5269	else
5270		return ip6_route_add(&cfg, GFP_KERNEL, extack);
5271}
5272
5273/* add the overhead of this fib6_nh to nexthop_len */
5274static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
5275{
5276	int *nexthop_len = arg;
5277
5278	*nexthop_len += nla_total_size(0)	 /* RTA_MULTIPATH */
5279		     + NLA_ALIGN(sizeof(struct rtnexthop))
5280		     + nla_total_size(16); /* RTA_GATEWAY */
5281
5282	if (nh->fib_nh_lws) {
5283		/* RTA_ENCAP_TYPE */
5284		*nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5285		/* RTA_ENCAP */
5286		*nexthop_len += nla_total_size(2);
5287	}
5288
5289	return 0;
5290}
5291
5292static size_t rt6_nlmsg_size(struct fib6_info *f6i)
5293{
5294	int nexthop_len;
5295
5296	if (f6i->nh) {
5297		nexthop_len = nla_total_size(4); /* RTA_NH_ID */
5298		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
5299					 &nexthop_len);
5300	} else {
5301		struct fib6_nh *nh = f6i->fib6_nh;
5302
5303		nexthop_len = 0;
5304		if (f6i->fib6_nsiblings) {
5305			nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
5306				    + NLA_ALIGN(sizeof(struct rtnexthop))
5307				    + nla_total_size(16) /* RTA_GATEWAY */
5308				    + lwtunnel_get_encap_size(nh->fib_nh_lws);
5309
5310			nexthop_len *= f6i->fib6_nsiblings;
5311		}
5312		nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5313	}
5314
5315	return NLMSG_ALIGN(sizeof(struct rtmsg))
5316	       + nla_total_size(16) /* RTA_SRC */
5317	       + nla_total_size(16) /* RTA_DST */
5318	       + nla_total_size(16) /* RTA_GATEWAY */
5319	       + nla_total_size(16) /* RTA_PREFSRC */
5320	       + nla_total_size(4) /* RTA_TABLE */
5321	       + nla_total_size(4) /* RTA_IIF */
5322	       + nla_total_size(4) /* RTA_OIF */
5323	       + nla_total_size(4) /* RTA_PRIORITY */
5324	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
5325	       + nla_total_size(sizeof(struct rta_cacheinfo))
5326	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
5327	       + nla_total_size(1) /* RTA_PREF */
5328	       + nexthop_len;
5329}
5330
5331static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
5332				 unsigned char *flags)
 
 
 
5333{
5334	if (nexthop_is_multipath(nh)) {
5335		struct nlattr *mp;
5336
5337		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5338		if (!mp)
5339			goto nla_put_failure;
5340
5341		if (nexthop_mpath_fill_node(skb, nh, AF_INET6))
5342			goto nla_put_failure;
5343
5344		nla_nest_end(skb, mp);
5345	} else {
5346		struct fib6_nh *fib6_nh;
5347
5348		fib6_nh = nexthop_fib6_nh(nh);
5349		if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6,
5350				     flags, false) < 0)
5351			goto nla_put_failure;
 
5352	}
5353
5354	return 0;
5355
5356nla_put_failure:
5357	return -EMSGSIZE;
5358}
5359
5360static int rt6_fill_node(struct net *net, struct sk_buff *skb,
5361			 struct fib6_info *rt, struct dst_entry *dst,
5362			 struct in6_addr *dest, struct in6_addr *src,
5363			 int iif, int type, u32 portid, u32 seq,
5364			 unsigned int flags)
5365{
5366	struct rt6_info *rt6 = (struct rt6_info *)dst;
5367	struct rt6key *rt6_dst, *rt6_src;
5368	u32 *pmetrics, table, rt6_flags;
5369	unsigned char nh_flags = 0;
5370	struct nlmsghdr *nlh;
5371	struct rtmsg *rtm;
5372	long expires = 0;
5373
5374	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
5375	if (!nlh)
5376		return -EMSGSIZE;
5377
5378	if (rt6) {
5379		rt6_dst = &rt6->rt6i_dst;
5380		rt6_src = &rt6->rt6i_src;
5381		rt6_flags = rt6->rt6i_flags;
5382	} else {
5383		rt6_dst = &rt->fib6_dst;
5384		rt6_src = &rt->fib6_src;
5385		rt6_flags = rt->fib6_flags;
5386	}
5387
5388	rtm = nlmsg_data(nlh);
5389	rtm->rtm_family = AF_INET6;
5390	rtm->rtm_dst_len = rt6_dst->plen;
5391	rtm->rtm_src_len = rt6_src->plen;
5392	rtm->rtm_tos = 0;
5393	if (rt->fib6_table)
5394		table = rt->fib6_table->tb6_id;
5395	else
5396		table = RT6_TABLE_UNSPEC;
5397	rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
5398	if (nla_put_u32(skb, RTA_TABLE, table))
5399		goto nla_put_failure;
5400
5401	rtm->rtm_type = rt->fib6_type;
 
 
 
 
 
5402	rtm->rtm_flags = 0;
5403	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
5404	rtm->rtm_protocol = rt->fib6_protocol;
 
 
 
 
 
 
5405
5406	if (rt6_flags & RTF_CACHE)
5407		rtm->rtm_flags |= RTM_F_CLONED;
5408
5409	if (dest) {
5410		if (nla_put_in6_addr(skb, RTA_DST, dest))
5411			goto nla_put_failure;
5412		rtm->rtm_dst_len = 128;
5413	} else if (rtm->rtm_dst_len)
5414		if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
5415			goto nla_put_failure;
5416#ifdef CONFIG_IPV6_SUBTREES
5417	if (src) {
5418		if (nla_put_in6_addr(skb, RTA_SRC, src))
5419			goto nla_put_failure;
5420		rtm->rtm_src_len = 128;
5421	} else if (rtm->rtm_src_len &&
5422		   nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
5423		goto nla_put_failure;
5424#endif
5425	if (iif) {
5426#ifdef CONFIG_IPV6_MROUTE
5427		if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
5428			int err = ip6mr_get_route(net, skb, rtm, portid);
5429
5430			if (err == 0)
5431				return 0;
5432			if (err < 0)
5433				goto nla_put_failure;
 
 
 
 
 
5434		} else
5435#endif
5436			if (nla_put_u32(skb, RTA_IIF, iif))
5437				goto nla_put_failure;
5438	} else if (dest) {
5439		struct in6_addr saddr_buf;
5440		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
5441		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5442			goto nla_put_failure;
5443	}
5444
5445	if (rt->fib6_prefsrc.plen) {
5446		struct in6_addr saddr_buf;
5447		saddr_buf = rt->fib6_prefsrc.addr;
5448		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5449			goto nla_put_failure;
5450	}
5451
5452	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
5453	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
5454		goto nla_put_failure;
5455
5456	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
5457		goto nla_put_failure;
5458
5459	/* For multipath routes, walk the siblings list and add
5460	 * each as a nexthop within RTA_MULTIPATH.
5461	 */
5462	if (rt6) {
5463		if (rt6_flags & RTF_GATEWAY &&
5464		    nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
5465			goto nla_put_failure;
5466
5467		if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
5468			goto nla_put_failure;
5469	} else if (rt->fib6_nsiblings) {
5470		struct fib6_info *sibling, *next_sibling;
5471		struct nlattr *mp;
5472
5473		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5474		if (!mp)
5475			goto nla_put_failure;
5476
5477		if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
5478				    rt->fib6_nh->fib_nh_weight, AF_INET6) < 0)
5479			goto nla_put_failure;
5480
5481		list_for_each_entry_safe(sibling, next_sibling,
5482					 &rt->fib6_siblings, fib6_siblings) {
5483			if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
5484					    sibling->fib6_nh->fib_nh_weight,
5485					    AF_INET6) < 0)
5486				goto nla_put_failure;
5487		}
5488
5489		nla_nest_end(skb, mp);
5490	} else if (rt->nh) {
5491		if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
5492			goto nla_put_failure;
5493
5494		if (nexthop_is_blackhole(rt->nh))
5495			rtm->rtm_type = RTN_BLACKHOLE;
5496
5497		if (rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
5498			goto nla_put_failure;
5499
5500		rtm->rtm_flags |= nh_flags;
5501	} else {
5502		if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6,
5503				     &nh_flags, false) < 0)
5504			goto nla_put_failure;
5505
5506		rtm->rtm_flags |= nh_flags;
5507	}
5508
5509	if (rt6_flags & RTF_EXPIRES) {
5510		expires = dst ? dst->expires : rt->expires;
5511		expires -= jiffies;
5512	}
5513
5514	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
5515		goto nla_put_failure;
 
 
 
 
5516
5517	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
 
5518		goto nla_put_failure;
5519
5520
5521	nlmsg_end(skb, nlh);
5522	return 0;
5523
5524nla_put_failure:
5525	nlmsg_cancel(skb, nlh);
5526	return -EMSGSIZE;
5527}
5528
5529static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
5530{
5531	const struct net_device *dev = arg;
5532
5533	if (nh->fib_nh_dev == dev)
5534		return 1;
5535
5536	return 0;
5537}
5538
5539static bool fib6_info_uses_dev(const struct fib6_info *f6i,
5540			       const struct net_device *dev)
5541{
5542	if (f6i->nh) {
5543		struct net_device *_dev = (struct net_device *)dev;
5544
5545		return !!nexthop_for_each_fib6_nh(f6i->nh,
5546						  fib6_info_nh_uses_dev,
5547						  _dev);
5548	}
5549
5550	if (f6i->fib6_nh->fib_nh_dev == dev)
5551		return true;
5552
5553	if (f6i->fib6_nsiblings) {
5554		struct fib6_info *sibling, *next_sibling;
5555
5556		list_for_each_entry_safe(sibling, next_sibling,
5557					 &f6i->fib6_siblings, fib6_siblings) {
5558			if (sibling->fib6_nh->fib_nh_dev == dev)
5559				return true;
5560		}
5561	}
5562
5563	return false;
5564}
5565
5566struct fib6_nh_exception_dump_walker {
5567	struct rt6_rtnl_dump_arg *dump;
5568	struct fib6_info *rt;
5569	unsigned int flags;
5570	unsigned int skip;
5571	unsigned int count;
5572};
5573
5574static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
5575{
5576	struct fib6_nh_exception_dump_walker *w = arg;
5577	struct rt6_rtnl_dump_arg *dump = w->dump;
5578	struct rt6_exception_bucket *bucket;
5579	struct rt6_exception *rt6_ex;
5580	int i, err;
5581
5582	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
5583	if (!bucket)
5584		return 0;
5585
5586	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
5587		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
5588			if (w->skip) {
5589				w->skip--;
5590				continue;
5591			}
5592
5593			/* Expiration of entries doesn't bump sernum, insertion
5594			 * does. Removal is triggered by insertion, so we can
5595			 * rely on the fact that if entries change between two
5596			 * partial dumps, this node is scanned again completely,
5597			 * see rt6_insert_exception() and fib6_dump_table().
5598			 *
5599			 * Count expired entries we go through as handled
5600			 * entries that we'll skip next time, in case of partial
5601			 * node dump. Otherwise, if entries expire meanwhile,
5602			 * we'll skip the wrong amount.
5603			 */
5604			if (rt6_check_expired(rt6_ex->rt6i)) {
5605				w->count++;
5606				continue;
5607			}
5608
5609			err = rt6_fill_node(dump->net, dump->skb, w->rt,
5610					    &rt6_ex->rt6i->dst, NULL, NULL, 0,
5611					    RTM_NEWROUTE,
5612					    NETLINK_CB(dump->cb->skb).portid,
5613					    dump->cb->nlh->nlmsg_seq, w->flags);
5614			if (err)
5615				return err;
5616
5617			w->count++;
5618		}
5619		bucket++;
5620	}
5621
5622	return 0;
5623}
5624
5625/* Return -1 if done with node, number of handled routes on partial dump */
5626int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
5627{
5628	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
5629	struct fib_dump_filter *filter = &arg->filter;
5630	unsigned int flags = NLM_F_MULTI;
5631	struct net *net = arg->net;
5632	int count = 0;
5633
5634	if (rt == net->ipv6.fib6_null_entry)
5635		return -1;
5636
5637	if ((filter->flags & RTM_F_PREFIX) &&
5638	    !(rt->fib6_flags & RTF_PREFIX_RT)) {
5639		/* success since this is not a prefix route */
5640		return -1;
5641	}
5642	if (filter->filter_set &&
5643	    ((filter->rt_type  && rt->fib6_type != filter->rt_type) ||
5644	     (filter->dev      && !fib6_info_uses_dev(rt, filter->dev)) ||
5645	     (filter->protocol && rt->fib6_protocol != filter->protocol))) {
5646		return -1;
5647	}
5648
5649	if (filter->filter_set ||
5650	    !filter->dump_routes || !filter->dump_exceptions) {
5651		flags |= NLM_F_DUMP_FILTERED;
5652	}
5653
5654	if (filter->dump_routes) {
5655		if (skip) {
5656			skip--;
5657		} else {
5658			if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
5659					  0, RTM_NEWROUTE,
5660					  NETLINK_CB(arg->cb->skb).portid,
5661					  arg->cb->nlh->nlmsg_seq, flags)) {
5662				return 0;
5663			}
5664			count++;
5665		}
5666	}
5667
5668	if (filter->dump_exceptions) {
5669		struct fib6_nh_exception_dump_walker w = { .dump = arg,
5670							   .rt = rt,
5671							   .flags = flags,
5672							   .skip = skip,
5673							   .count = 0 };
5674		int err;
5675
5676		rcu_read_lock();
5677		if (rt->nh) {
5678			err = nexthop_for_each_fib6_nh(rt->nh,
5679						       rt6_nh_dump_exceptions,
5680						       &w);
5681		} else {
5682			err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
5683		}
5684		rcu_read_unlock();
5685
5686		if (err)
5687			return count += w.count;
5688	}
5689
5690	return -1;
5691}
5692
5693static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
5694					const struct nlmsghdr *nlh,
5695					struct nlattr **tb,
5696					struct netlink_ext_ack *extack)
5697{
5698	struct rtmsg *rtm;
5699	int i, err;
5700
5701	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
5702		NL_SET_ERR_MSG_MOD(extack,
5703				   "Invalid header for get route request");
5704		return -EINVAL;
5705	}
5706
5707	if (!netlink_strict_get_check(skb))
5708		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
5709					      rtm_ipv6_policy, extack);
5710
5711	rtm = nlmsg_data(nlh);
5712	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
5713	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
5714	    rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
5715	    rtm->rtm_type) {
5716		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
5717		return -EINVAL;
5718	}
5719	if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
5720		NL_SET_ERR_MSG_MOD(extack,
5721				   "Invalid flags for get route request");
5722		return -EINVAL;
5723	}
5724
5725	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
5726					    rtm_ipv6_policy, extack);
5727	if (err)
5728		return err;
5729
5730	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
5731	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
5732		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
5733		return -EINVAL;
5734	}
5735
5736	for (i = 0; i <= RTA_MAX; i++) {
5737		if (!tb[i])
5738			continue;
5739
5740		switch (i) {
5741		case RTA_SRC:
5742		case RTA_DST:
5743		case RTA_IIF:
5744		case RTA_OIF:
5745		case RTA_MARK:
5746		case RTA_UID:
5747		case RTA_SPORT:
5748		case RTA_DPORT:
5749		case RTA_IP_PROTO:
5750			break;
5751		default:
5752			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
5753			return -EINVAL;
5754		}
5755	}
5756
5757	return 0;
 
 
 
5758}
5759
5760static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
5761			      struct netlink_ext_ack *extack)
5762{
5763	struct net *net = sock_net(in_skb->sk);
5764	struct nlattr *tb[RTA_MAX+1];
5765	int err, iif = 0, oif = 0;
5766	struct fib6_info *from;
5767	struct dst_entry *dst;
5768	struct rt6_info *rt;
5769	struct sk_buff *skb;
5770	struct rtmsg *rtm;
5771	struct flowi6 fl6 = {};
5772	bool fibmatch;
5773
5774	err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
5775	if (err < 0)
5776		goto errout;
5777
5778	err = -EINVAL;
5779	rtm = nlmsg_data(nlh);
5780	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
5781	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
5782
5783	if (tb[RTA_SRC]) {
5784		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
5785			goto errout;
5786
5787		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
5788	}
5789
5790	if (tb[RTA_DST]) {
5791		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
5792			goto errout;
5793
5794		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
5795	}
5796
5797	if (tb[RTA_IIF])
5798		iif = nla_get_u32(tb[RTA_IIF]);
5799
5800	if (tb[RTA_OIF])
5801		oif = nla_get_u32(tb[RTA_OIF]);
5802
5803	if (tb[RTA_MARK])
5804		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
5805
5806	if (tb[RTA_UID])
5807		fl6.flowi6_uid = make_kuid(current_user_ns(),
5808					   nla_get_u32(tb[RTA_UID]));
5809	else
5810		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
5811
5812	if (tb[RTA_SPORT])
5813		fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
5814
5815	if (tb[RTA_DPORT])
5816		fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
5817
5818	if (tb[RTA_IP_PROTO]) {
5819		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5820						  &fl6.flowi6_proto, AF_INET6,
5821						  extack);
5822		if (err)
5823			goto errout;
5824	}
5825
5826	if (iif) {
5827		struct net_device *dev;
5828		int flags = 0;
5829
5830		rcu_read_lock();
5831
5832		dev = dev_get_by_index_rcu(net, iif);
5833		if (!dev) {
5834			rcu_read_unlock();
5835			err = -ENODEV;
5836			goto errout;
5837		}
5838
5839		fl6.flowi6_iif = iif;
5840
5841		if (!ipv6_addr_any(&fl6.saddr))
5842			flags |= RT6_LOOKUP_F_HAS_SADDR;
5843
5844		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
5845
5846		rcu_read_unlock();
5847	} else {
5848		fl6.flowi6_oif = oif;
5849
5850		dst = ip6_route_output(net, NULL, &fl6);
5851	}
5852
5853
5854	rt = container_of(dst, struct rt6_info, dst);
5855	if (rt->dst.error) {
5856		err = rt->dst.error;
5857		ip6_rt_put(rt);
5858		goto errout;
5859	}
5860
5861	if (rt == net->ipv6.ip6_null_entry) {
5862		err = rt->dst.error;
5863		ip6_rt_put(rt);
5864		goto errout;
5865	}
5866
5867	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
5868	if (!skb) {
5869		ip6_rt_put(rt);
5870		err = -ENOBUFS;
5871		goto errout;
5872	}
5873
5874	skb_dst_set(skb, &rt->dst);
 
 
 
 
5875
5876	rcu_read_lock();
5877	from = rcu_dereference(rt->from);
5878	if (from) {
5879		if (fibmatch)
5880			err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
5881					    iif, RTM_NEWROUTE,
5882					    NETLINK_CB(in_skb).portid,
5883					    nlh->nlmsg_seq, 0);
5884		else
5885			err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5886					    &fl6.saddr, iif, RTM_NEWROUTE,
5887					    NETLINK_CB(in_skb).portid,
5888					    nlh->nlmsg_seq, 0);
5889	} else {
5890		err = -ENETUNREACH;
5891	}
5892	rcu_read_unlock();
5893
 
 
 
5894	if (err < 0) {
5895		kfree_skb(skb);
5896		goto errout;
5897	}
5898
5899	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
5900errout:
5901	return err;
5902}
5903
5904void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
5905		     unsigned int nlm_flags)
5906{
5907	struct sk_buff *skb;
5908	struct net *net = info->nl_net;
5909	u32 seq;
5910	int err;
5911
5912	err = -ENOBUFS;
5913	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
5914
5915	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
5916	if (!skb)
5917		goto errout;
5918
5919	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5920			    event, info->portid, seq, nlm_flags);
5921	if (err < 0) {
5922		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5923		WARN_ON(err == -EMSGSIZE);
5924		kfree_skb(skb);
5925		goto errout;
5926	}
5927	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
5928		    info->nlh, gfp_any());
5929	return;
5930errout:
5931	if (err < 0)
5932		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
5933}
5934
5935void fib6_rt_update(struct net *net, struct fib6_info *rt,
5936		    struct nl_info *info)
5937{
5938	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
5939	struct sk_buff *skb;
5940	int err = -ENOBUFS;
5941
5942	/* call_fib6_entry_notifiers will be removed when in-kernel notifier
5943	 * is implemented and supported for nexthop objects
5944	 */
5945	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
5946
5947	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
5948	if (!skb)
5949		goto errout;
5950
5951	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5952			    RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
5953	if (err < 0) {
5954		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5955		WARN_ON(err == -EMSGSIZE);
5956		kfree_skb(skb);
5957		goto errout;
5958	}
5959	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
5960		    info->nlh, gfp_any());
5961	return;
5962errout:
5963	if (err < 0)
5964		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
5965}
5966
5967static int ip6_route_dev_notify(struct notifier_block *this,
5968				unsigned long event, void *ptr)
5969{
5970	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
5971	struct net *net = dev_net(dev);
5972
5973	if (!(dev->flags & IFF_LOOPBACK))
5974		return NOTIFY_OK;
5975
5976	if (event == NETDEV_REGISTER) {
5977		net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
5978		net->ipv6.ip6_null_entry->dst.dev = dev;
5979		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5980#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5981		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
5982		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
5983		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
5984		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
5985#endif
5986	 } else if (event == NETDEV_UNREGISTER &&
5987		    dev->reg_state != NETREG_UNREGISTERED) {
5988		/* NETDEV_UNREGISTER could be fired for multiple times by
5989		 * netdev_wait_allrefs(). Make sure we only call this once.
5990		 */
5991		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
5992#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5993		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5994		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
5995#endif
5996	}
5997
5998	return NOTIFY_OK;
5999}
6000
6001/*
6002 *	/proc
6003 */
6004
6005#ifdef CONFIG_PROC_FS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6006static int rt6_stats_seq_show(struct seq_file *seq, void *v)
6007{
6008	struct net *net = (struct net *)seq->private;
6009	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
6010		   net->ipv6.rt6_stats->fib_nodes,
6011		   net->ipv6.rt6_stats->fib_route_nodes,
6012		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
6013		   net->ipv6.rt6_stats->fib_rt_entries,
6014		   net->ipv6.rt6_stats->fib_rt_cache,
6015		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
6016		   net->ipv6.rt6_stats->fib_discarded_routes);
6017
6018	return 0;
6019}
 
 
 
 
 
 
 
 
 
 
 
 
 
6020#endif	/* CONFIG_PROC_FS */
6021
6022#ifdef CONFIG_SYSCTL
6023
6024static
6025int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
6026			      void __user *buffer, size_t *lenp, loff_t *ppos)
6027{
6028	struct net *net;
6029	int delay;
6030	int ret;
6031	if (!write)
6032		return -EINVAL;
6033
6034	net = (struct net *)ctl->extra1;
6035	delay = net->ipv6.sysctl.flush_delay;
6036	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
6037	if (ret)
6038		return ret;
6039
6040	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
6041	return 0;
6042}
6043
6044static struct ctl_table ipv6_route_table_template[] = {
6045	{
6046		.procname	=	"flush",
6047		.data		=	&init_net.ipv6.sysctl.flush_delay,
6048		.maxlen		=	sizeof(int),
6049		.mode		=	0200,
6050		.proc_handler	=	ipv6_sysctl_rtcache_flush
6051	},
6052	{
6053		.procname	=	"gc_thresh",
6054		.data		=	&ip6_dst_ops_template.gc_thresh,
6055		.maxlen		=	sizeof(int),
6056		.mode		=	0644,
6057		.proc_handler	=	proc_dointvec,
6058	},
6059	{
6060		.procname	=	"max_size",
6061		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
6062		.maxlen		=	sizeof(int),
6063		.mode		=	0644,
6064		.proc_handler	=	proc_dointvec,
6065	},
6066	{
6067		.procname	=	"gc_min_interval",
6068		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6069		.maxlen		=	sizeof(int),
6070		.mode		=	0644,
6071		.proc_handler	=	proc_dointvec_jiffies,
6072	},
6073	{
6074		.procname	=	"gc_timeout",
6075		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
6076		.maxlen		=	sizeof(int),
6077		.mode		=	0644,
6078		.proc_handler	=	proc_dointvec_jiffies,
6079	},
6080	{
6081		.procname	=	"gc_interval",
6082		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
6083		.maxlen		=	sizeof(int),
6084		.mode		=	0644,
6085		.proc_handler	=	proc_dointvec_jiffies,
6086	},
6087	{
6088		.procname	=	"gc_elasticity",
6089		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
6090		.maxlen		=	sizeof(int),
6091		.mode		=	0644,
6092		.proc_handler	=	proc_dointvec,
6093	},
6094	{
6095		.procname	=	"mtu_expires",
6096		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
6097		.maxlen		=	sizeof(int),
6098		.mode		=	0644,
6099		.proc_handler	=	proc_dointvec_jiffies,
6100	},
6101	{
6102		.procname	=	"min_adv_mss",
6103		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
6104		.maxlen		=	sizeof(int),
6105		.mode		=	0644,
6106		.proc_handler	=	proc_dointvec,
6107	},
6108	{
6109		.procname	=	"gc_min_interval_ms",
6110		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6111		.maxlen		=	sizeof(int),
6112		.mode		=	0644,
6113		.proc_handler	=	proc_dointvec_ms_jiffies,
6114	},
6115	{
6116		.procname	=	"skip_notify_on_dev_down",
6117		.data		=	&init_net.ipv6.sysctl.skip_notify_on_dev_down,
6118		.maxlen		=	sizeof(int),
6119		.mode		=	0644,
6120		.proc_handler	=	proc_dointvec_minmax,
6121		.extra1		=	SYSCTL_ZERO,
6122		.extra2		=	SYSCTL_ONE,
6123	},
6124	{ }
6125};
6126
6127struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
6128{
6129	struct ctl_table *table;
6130
6131	table = kmemdup(ipv6_route_table_template,
6132			sizeof(ipv6_route_table_template),
6133			GFP_KERNEL);
6134
6135	if (table) {
6136		table[0].data = &net->ipv6.sysctl.flush_delay;
6137		table[0].extra1 = net;
6138		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
6139		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
6140		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6141		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
6142		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
6143		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
6144		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
6145		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
6146		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6147		table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
6148
6149		/* Don't export sysctls to unprivileged users */
6150		if (net->user_ns != &init_user_ns)
6151			table[0].procname = NULL;
6152	}
6153
6154	return table;
6155}
6156#endif
6157
6158static int __net_init ip6_route_net_init(struct net *net)
6159{
6160	int ret = -ENOMEM;
6161
6162	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
6163	       sizeof(net->ipv6.ip6_dst_ops));
6164
6165	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
6166		goto out_ip6_dst_ops;
6167
6168	net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
6169	if (!net->ipv6.fib6_null_entry)
6170		goto out_ip6_dst_entries;
6171	memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
6172	       sizeof(*net->ipv6.fib6_null_entry));
6173
6174	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
6175					   sizeof(*net->ipv6.ip6_null_entry),
6176					   GFP_KERNEL);
6177	if (!net->ipv6.ip6_null_entry)
6178		goto out_fib6_null_entry;
 
 
6179	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6180	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
6181			 ip6_template_metrics, true);
6182	INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached);
6183
6184#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6185	net->ipv6.fib6_has_custom_rules = false;
6186	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
6187					       sizeof(*net->ipv6.ip6_prohibit_entry),
6188					       GFP_KERNEL);
6189	if (!net->ipv6.ip6_prohibit_entry)
6190		goto out_ip6_null_entry;
 
 
6191	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6192	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
6193			 ip6_template_metrics, true);
6194	INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached);
6195
6196	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
6197					       sizeof(*net->ipv6.ip6_blk_hole_entry),
6198					       GFP_KERNEL);
6199	if (!net->ipv6.ip6_blk_hole_entry)
6200		goto out_ip6_prohibit_entry;
 
 
6201	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6202	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
6203			 ip6_template_metrics, true);
6204	INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
6205#endif
6206
6207	net->ipv6.sysctl.flush_delay = 0;
6208	net->ipv6.sysctl.ip6_rt_max_size = 4096;
6209	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
6210	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
6211	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
6212	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
6213	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
6214	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
6215	net->ipv6.sysctl.skip_notify_on_dev_down = 0;
6216
 
 
 
 
6217	net->ipv6.ip6_rt_gc_expire = 30*HZ;
6218
6219	ret = 0;
6220out:
6221	return ret;
6222
6223#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6224out_ip6_prohibit_entry:
6225	kfree(net->ipv6.ip6_prohibit_entry);
6226out_ip6_null_entry:
6227	kfree(net->ipv6.ip6_null_entry);
6228#endif
6229out_fib6_null_entry:
6230	kfree(net->ipv6.fib6_null_entry);
6231out_ip6_dst_entries:
6232	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6233out_ip6_dst_ops:
6234	goto out;
6235}
6236
6237static void __net_exit ip6_route_net_exit(struct net *net)
6238{
6239	kfree(net->ipv6.fib6_null_entry);
 
 
 
6240	kfree(net->ipv6.ip6_null_entry);
6241#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6242	kfree(net->ipv6.ip6_prohibit_entry);
6243	kfree(net->ipv6.ip6_blk_hole_entry);
6244#endif
6245	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6246}
6247
6248static int __net_init ip6_route_net_init_late(struct net *net)
6249{
6250#ifdef CONFIG_PROC_FS
6251	proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
6252			sizeof(struct ipv6_route_iter));
6253	proc_create_net_single("rt6_stats", 0444, net->proc_net,
6254			rt6_stats_seq_show, NULL);
6255#endif
6256	return 0;
6257}
6258
6259static void __net_exit ip6_route_net_exit_late(struct net *net)
6260{
6261#ifdef CONFIG_PROC_FS
6262	remove_proc_entry("ipv6_route", net->proc_net);
6263	remove_proc_entry("rt6_stats", net->proc_net);
6264#endif
6265}
6266
6267static struct pernet_operations ip6_route_net_ops = {
6268	.init = ip6_route_net_init,
6269	.exit = ip6_route_net_exit,
6270};
6271
6272static int __net_init ipv6_inetpeer_init(struct net *net)
6273{
6274	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
6275
6276	if (!bp)
6277		return -ENOMEM;
6278	inet_peer_base_init(bp);
6279	net->ipv6.peers = bp;
6280	return 0;
6281}
6282
6283static void __net_exit ipv6_inetpeer_exit(struct net *net)
6284{
6285	struct inet_peer_base *bp = net->ipv6.peers;
6286
6287	net->ipv6.peers = NULL;
6288	inetpeer_invalidate_tree(bp);
6289	kfree(bp);
6290}
6291
6292static struct pernet_operations ipv6_inetpeer_ops = {
6293	.init	=	ipv6_inetpeer_init,
6294	.exit	=	ipv6_inetpeer_exit,
6295};
6296
6297static struct pernet_operations ip6_route_net_late_ops = {
6298	.init = ip6_route_net_init_late,
6299	.exit = ip6_route_net_exit_late,
6300};
6301
6302static struct notifier_block ip6_route_dev_notifier = {
6303	.notifier_call = ip6_route_dev_notify,
6304	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
6305};
6306
6307void __init ip6_route_init_special_entries(void)
6308{
6309	/* Registering of the loopback is done before this portion of code,
6310	 * the loopback reference in rt6_info will not be taken, do it
6311	 * manually for init_net */
6312	init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
6313	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
6314	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6315  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
6316	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
6317	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6318	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
6319	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6320  #endif
6321}
6322
6323int __init ip6_route_init(void)
6324{
6325	int ret;
6326	int cpu;
6327
6328	ret = -ENOMEM;
6329	ip6_dst_ops_template.kmem_cachep =
6330		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
6331				  SLAB_HWCACHE_ALIGN, NULL);
6332	if (!ip6_dst_ops_template.kmem_cachep)
6333		goto out;
6334
6335	ret = dst_entries_init(&ip6_dst_blackhole_ops);
6336	if (ret)
6337		goto out_kmem_cache;
6338
6339	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
6340	if (ret)
6341		goto out_dst_entries;
6342
6343	ret = register_pernet_subsys(&ip6_route_net_ops);
6344	if (ret)
6345		goto out_register_inetpeer;
6346
6347	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
6348
 
 
 
 
 
 
 
 
 
 
 
6349	ret = fib6_init();
6350	if (ret)
6351		goto out_register_subsys;
6352
6353	ret = xfrm6_init();
6354	if (ret)
6355		goto out_fib6_init;
6356
6357	ret = fib6_rules_init();
6358	if (ret)
6359		goto xfrm6_init;
6360
6361	ret = register_pernet_subsys(&ip6_route_net_late_ops);
6362	if (ret)
 
 
6363		goto fib6_rules_init;
6364
6365	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
6366				   inet6_rtm_newroute, NULL, 0);
6367	if (ret < 0)
6368		goto out_register_late_subsys;
6369
6370	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
6371				   inet6_rtm_delroute, NULL, 0);
6372	if (ret < 0)
6373		goto out_register_late_subsys;
6374
6375	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
6376				   inet6_rtm_getroute, NULL,
6377				   RTNL_FLAG_DOIT_UNLOCKED);
6378	if (ret < 0)
6379		goto out_register_late_subsys;
6380
6381	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
6382	if (ret)
6383		goto out_register_late_subsys;
6384
6385	for_each_possible_cpu(cpu) {
6386		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
6387
6388		INIT_LIST_HEAD(&ul->head);
6389		spin_lock_init(&ul->lock);
6390	}
6391
6392out:
6393	return ret;
6394
6395out_register_late_subsys:
6396	rtnl_unregister_all(PF_INET6);
6397	unregister_pernet_subsys(&ip6_route_net_late_ops);
6398fib6_rules_init:
6399	fib6_rules_cleanup();
6400xfrm6_init:
6401	xfrm6_fini();
6402out_fib6_init:
6403	fib6_gc_cleanup();
6404out_register_subsys:
6405	unregister_pernet_subsys(&ip6_route_net_ops);
6406out_register_inetpeer:
6407	unregister_pernet_subsys(&ipv6_inetpeer_ops);
6408out_dst_entries:
6409	dst_entries_destroy(&ip6_dst_blackhole_ops);
6410out_kmem_cache:
6411	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6412	goto out;
6413}
6414
6415void ip6_route_cleanup(void)
6416{
6417	unregister_netdevice_notifier(&ip6_route_dev_notifier);
6418	unregister_pernet_subsys(&ip6_route_net_late_ops);
6419	fib6_rules_cleanup();
6420	xfrm6_fini();
6421	fib6_gc_cleanup();
6422	unregister_pernet_subsys(&ipv6_inetpeer_ops);
6423	unregister_pernet_subsys(&ip6_route_net_ops);
6424	dst_entries_destroy(&ip6_dst_blackhole_ops);
6425	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6426}
v3.1
 
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
 
 
  27#include <linux/capability.h>
  28#include <linux/errno.h>
 
  29#include <linux/types.h>
  30#include <linux/times.h>
  31#include <linux/socket.h>
  32#include <linux/sockios.h>
  33#include <linux/net.h>
  34#include <linux/route.h>
  35#include <linux/netdevice.h>
  36#include <linux/in6.h>
  37#include <linux/mroute6.h>
  38#include <linux/init.h>
  39#include <linux/if_arp.h>
  40#include <linux/proc_fs.h>
  41#include <linux/seq_file.h>
  42#include <linux/nsproxy.h>
  43#include <linux/slab.h>
 
  44#include <net/net_namespace.h>
  45#include <net/snmp.h>
  46#include <net/ipv6.h>
  47#include <net/ip6_fib.h>
  48#include <net/ip6_route.h>
  49#include <net/ndisc.h>
  50#include <net/addrconf.h>
  51#include <net/tcp.h>
  52#include <linux/rtnetlink.h>
  53#include <net/dst.h>
 
  54#include <net/xfrm.h>
  55#include <net/netevent.h>
  56#include <net/netlink.h>
  57
  58#include <asm/uaccess.h>
 
 
 
 
  59
  60#ifdef CONFIG_SYSCTL
  61#include <linux/sysctl.h>
  62#endif
  63
  64/* Set to 3 to get tracing. */
  65#define RT6_DEBUG 2
  66
  67#if RT6_DEBUG >= 3
  68#define RDBG(x) printk x
  69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
  70#else
  71#define RDBG(x)
  72#define RT6_TRACE(x...) do { ; } while (0)
  73#endif
 
 
 
 
  74
  75static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
  76				    const struct in6_addr *dest);
  77static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  78static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  79static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
  80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  81static void		ip6_dst_destroy(struct dst_entry *);
  82static void		ip6_dst_ifdown(struct dst_entry *,
  83				       struct net_device *dev, int how);
  84static int		 ip6_dst_gc(struct dst_ops *ops);
  85
  86static int		ip6_pkt_discard(struct sk_buff *skb);
  87static int		ip6_pkt_discard_out(struct sk_buff *skb);
 
 
  88static void		ip6_link_failure(struct sk_buff *skb);
  89static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  90
  91#ifdef CONFIG_IPV6_ROUTE_INFO
  92static struct rt6_info *rt6_add_route_info(struct net *net,
  93					   const struct in6_addr *prefix, int prefixlen,
  94					   const struct in6_addr *gwaddr, int ifindex,
  95					   unsigned pref);
  96static struct rt6_info *rt6_get_route_info(struct net *net,
 
  97					   const struct in6_addr *prefix, int prefixlen,
  98					   const struct in6_addr *gwaddr, int ifindex);
 
  99#endif
 100
 101static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 
 
 
 
 
 
 
 102{
 103	struct rt6_info *rt = (struct rt6_info *) dst;
 104	struct inet_peer *peer;
 105	u32 *p = NULL;
 
 
 
 
 
 106
 107	if (!(rt->dst.flags & DST_HOST))
 108		return NULL;
 
 
 
 109
 110	if (!rt->rt6i_peer)
 111		rt6_bind_peer(rt, 1);
 
 
 
 
 112
 113	peer = rt->rt6i_peer;
 114	if (peer) {
 115		u32 *old_p = __DST_METRICS_PTR(old);
 116		unsigned long prev, new;
 117
 118		p = peer->metrics;
 119		if (inet_metrics_new(peer))
 120			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 121
 122		new = (unsigned long) p;
 123		prev = cmpxchg(&dst->_metrics, old, new);
 
 
 
 
 
 
 
 
 
 
 
 124
 125		if (prev != old) {
 126			p = __DST_METRICS_PTR(prev);
 127			if (prev & DST_METRICS_READ_ONLY)
 128				p = NULL;
 
 129		}
 
 130	}
 131	return p;
 132}
 133
 134static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 135{
 136	return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
 
 
 
 
 
 
 
 
 
 
 137}
 138
 139static struct dst_ops ip6_dst_ops_template = {
 140	.family			=	AF_INET6,
 141	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 142	.gc			=	ip6_dst_gc,
 143	.gc_thresh		=	1024,
 144	.check			=	ip6_dst_check,
 145	.default_advmss		=	ip6_default_advmss,
 146	.default_mtu		=	ip6_default_mtu,
 147	.cow_metrics		=	ipv6_cow_metrics,
 148	.destroy		=	ip6_dst_destroy,
 149	.ifdown			=	ip6_dst_ifdown,
 150	.negative_advice	=	ip6_negative_advice,
 151	.link_failure		=	ip6_link_failure,
 152	.update_pmtu		=	ip6_rt_update_pmtu,
 
 153	.local_out		=	__ip6_local_out,
 154	.neigh_lookup		=	ip6_neigh_lookup,
 
 155};
 156
 157static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
 158{
 159	return 0;
 
 
 160}
 161
 162static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 
 163{
 164}
 165
 166static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 167					 unsigned long old)
 168{
 169	return NULL;
 170}
 171
 172static struct dst_ops ip6_dst_blackhole_ops = {
 173	.family			=	AF_INET6,
 174	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 175	.destroy		=	ip6_dst_destroy,
 176	.check			=	ip6_dst_check,
 177	.default_mtu		=	ip6_blackhole_default_mtu,
 178	.default_advmss		=	ip6_default_advmss,
 179	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 180	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
 181	.neigh_lookup		=	ip6_neigh_lookup,
 
 182};
 183
 184static const u32 ip6_template_metrics[RTAX_MAX] = {
 185	[RTAX_HOPLIMIT - 1] = 255,
 186};
 187
 188static struct rt6_info ip6_null_entry_template = {
 
 
 
 
 
 
 
 
 
 189	.dst = {
 190		.__refcnt	= ATOMIC_INIT(1),
 191		.__use		= 1,
 192		.obsolete	= -1,
 193		.error		= -ENETUNREACH,
 194		.input		= ip6_pkt_discard,
 195		.output		= ip6_pkt_discard_out,
 196	},
 197	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 198	.rt6i_protocol  = RTPROT_KERNEL,
 199	.rt6i_metric	= ~(u32) 0,
 200	.rt6i_ref	= ATOMIC_INIT(1),
 201};
 202
 203#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 204
 205static int ip6_pkt_prohibit(struct sk_buff *skb);
 206static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 207
 208static struct rt6_info ip6_prohibit_entry_template = {
 209	.dst = {
 210		.__refcnt	= ATOMIC_INIT(1),
 211		.__use		= 1,
 212		.obsolete	= -1,
 213		.error		= -EACCES,
 214		.input		= ip6_pkt_prohibit,
 215		.output		= ip6_pkt_prohibit_out,
 216	},
 217	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 218	.rt6i_protocol  = RTPROT_KERNEL,
 219	.rt6i_metric	= ~(u32) 0,
 220	.rt6i_ref	= ATOMIC_INIT(1),
 221};
 222
 223static struct rt6_info ip6_blk_hole_entry_template = {
 224	.dst = {
 225		.__refcnt	= ATOMIC_INIT(1),
 226		.__use		= 1,
 227		.obsolete	= -1,
 228		.error		= -EINVAL,
 229		.input		= dst_discard,
 230		.output		= dst_discard,
 231	},
 232	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 233	.rt6i_protocol  = RTPROT_KERNEL,
 234	.rt6i_metric	= ~(u32) 0,
 235	.rt6i_ref	= ATOMIC_INIT(1),
 236};
 237
 238#endif
 239
 
 
 
 
 
 
 
 
 240/* allocate dst with ip6_dst_ops */
 241static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
 242					     struct net_device *dev,
 243					     int flags)
 244{
 245	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
 
 246
 247	if (rt != NULL)
 248		memset(&rt->rt6i_table, 0,
 249			sizeof(*rt) - sizeof(struct dst_entry));
 
 250
 251	return rt;
 252}
 
 253
 254static void ip6_dst_destroy(struct dst_entry *dst)
 255{
 256	struct rt6_info *rt = (struct rt6_info *)dst;
 257	struct inet6_dev *idev = rt->rt6i_idev;
 258	struct inet_peer *peer = rt->rt6i_peer;
 259
 260	if (!(rt->dst.flags & DST_HOST))
 261		dst_destroy_metrics_generic(dst);
 262
 263	if (idev != NULL) {
 
 264		rt->rt6i_idev = NULL;
 265		in6_dev_put(idev);
 266	}
 267	if (peer) {
 268		rt->rt6i_peer = NULL;
 269		inet_putpeer(peer);
 270	}
 271}
 272
 273static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
 274
 275static u32 rt6_peer_genid(void)
 276{
 277	return atomic_read(&__rt6_peer_genid);
 278}
 279
 280void rt6_bind_peer(struct rt6_info *rt, int create)
 281{
 282	struct inet_peer *peer;
 283
 284	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
 285	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 286		inet_putpeer(peer);
 287	else
 288		rt->rt6i_peer_genid = rt6_peer_genid();
 289}
 290
 291static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 292			   int how)
 293{
 294	struct rt6_info *rt = (struct rt6_info *)dst;
 295	struct inet6_dev *idev = rt->rt6i_idev;
 296	struct net_device *loopback_dev =
 297		dev_net(dev)->loopback_dev;
 298
 299	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
 300		struct inet6_dev *loopback_idev =
 301			in6_dev_get(loopback_dev);
 302		if (loopback_idev != NULL) {
 303			rt->rt6i_idev = loopback_idev;
 304			in6_dev_put(idev);
 305		}
 306	}
 307}
 308
 309static __inline__ int rt6_check_expired(const struct rt6_info *rt)
 
 
 
 
 
 
 
 
 310{
 311	return (rt->rt6i_flags & RTF_EXPIRES) &&
 312		time_after(jiffies, rt->rt6i_expires);
 
 
 
 
 
 
 
 
 
 
 313}
 314
 315static inline int rt6_need_strict(const struct in6_addr *daddr)
 
 
 316{
 317	return ipv6_addr_type(daddr) &
 318		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 319}
 320
 321/*
 322 *	Route lookup. Any table->tb6_lock is implied.
 323 */
 324
 325static inline struct rt6_info *rt6_device_match(struct net *net,
 326						    struct rt6_info *rt,
 327						    const struct in6_addr *saddr,
 328						    int oif,
 329						    int flags)
 330{
 331	struct rt6_info *local = NULL;
 332	struct rt6_info *sprt;
 333
 334	if (!oif && ipv6_addr_any(saddr))
 335		goto out;
 336
 337	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 338		struct net_device *dev = sprt->rt6i_dev;
 339
 340		if (oif) {
 341			if (dev->ifindex == oif)
 342				return sprt;
 343			if (dev->flags & IFF_LOOPBACK) {
 344				if (sprt->rt6i_idev == NULL ||
 345				    sprt->rt6i_idev->dev->ifindex != oif) {
 346					if (flags & RT6_LOOKUP_F_IFACE && oif)
 347						continue;
 348					if (local && (!oif ||
 349						      local->rt6i_idev->dev->ifindex == oif))
 350						continue;
 351				}
 352				local = sprt;
 353			}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 354		} else {
 355			if (ipv6_chk_addr(net, saddr, dev,
 356					  flags & RT6_LOOKUP_F_IFACE))
 357				return sprt;
 358		}
 
 
 
 
 
 
 
 
 
 
 359	}
 360
 361	if (oif) {
 362		if (local)
 363			return local;
 
 
 
 
 364
 365		if (flags & RT6_LOOKUP_F_IFACE)
 366			return net->ipv6.ip6_null_entry;
 
 367	}
 368out:
 369	return rt;
 
 
 
 
 
 
 
 
 370}
 371
 372#ifdef CONFIG_IPV6_ROUTER_PREF
 373static void rt6_probe(struct rt6_info *rt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 374{
 
 
 
 375	struct neighbour *neigh;
 
 
 
 376	/*
 377	 * Okay, this does not seem to be appropriate
 378	 * for now, however, we need to check if it
 379	 * is really so; aka Router Reachability Probing.
 380	 *
 381	 * Router Reachability Probe MUST be rate-limited
 382	 * to no more than one per minute.
 383	 */
 384	rcu_read_lock();
 385	neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
 386	if (!neigh || (neigh->nud_state & NUD_VALID))
 387		goto out;
 388	read_lock_bh(&neigh->lock);
 389	if (!(neigh->nud_state & NUD_VALID) &&
 390	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 391		struct in6_addr mcaddr;
 392		struct in6_addr *target;
 393
 394		neigh->updated = jiffies;
 395		read_unlock_bh(&neigh->lock);
 396
 397		target = (struct in6_addr *)&neigh->primary_key;
 398		addrconf_addr_solict_mult(target, &mcaddr);
 399		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 400	} else {
 401		read_unlock_bh(&neigh->lock);
 
 
 
 
 402	}
 
 403out:
 404	rcu_read_unlock();
 405}
 406#else
 407static inline void rt6_probe(struct rt6_info *rt)
 408{
 409}
 410#endif
 411
 412/*
 413 * Default Router Selection (RFC 2461 6.3.6)
 414 */
 415static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 416{
 417	struct net_device *dev = rt->rt6i_dev;
 418	if (!oif || dev->ifindex == oif)
 419		return 2;
 420	if ((dev->flags & IFF_LOOPBACK) &&
 421	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 422		return 1;
 423	return 0;
 424}
 425
 426static inline int rt6_check_neigh(struct rt6_info *rt)
 427{
 
 428	struct neighbour *neigh;
 429	int m;
 430
 431	rcu_read_lock();
 432	neigh = dst_get_neighbour(&rt->dst);
 433	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 434	    !(rt->rt6i_flags & RTF_GATEWAY))
 435		m = 1;
 436	else if (neigh) {
 437		read_lock_bh(&neigh->lock);
 438		if (neigh->nud_state & NUD_VALID)
 439			m = 2;
 440#ifdef CONFIG_IPV6_ROUTER_PREF
 441		else if (neigh->nud_state & NUD_FAILED)
 442			m = 0;
 
 
 443#endif
 444		else
 445			m = 1;
 446		read_unlock_bh(&neigh->lock);
 447	} else
 448		m = 0;
 449	rcu_read_unlock();
 450	return m;
 
 451}
 452
 453static int rt6_score_route(struct rt6_info *rt, int oif,
 454			   int strict)
 455{
 456	int m, n;
 
 
 
 457
 458	m = rt6_check_dev(rt, oif);
 459	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 460		return -1;
 461#ifdef CONFIG_IPV6_ROUTER_PREF
 462	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 463#endif
 464	n = rt6_check_neigh(rt);
 465	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
 466		return -1;
 
 
 
 467	return m;
 468}
 469
 470static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 471				   int *mpri, struct rt6_info *match)
 472{
 
 
 473	int m;
 474
 475	if (rt6_check_expired(rt))
 
 
 
 
 
 476		goto out;
 477
 478	m = rt6_score_route(rt, oif, strict);
 479	if (m < 0)
 
 
 
 480		goto out;
 
 
 
 
 481
 
 482	if (m > *mpri) {
 483		if (strict & RT6_LOOKUP_F_REACHABLE)
 484			rt6_probe(match);
 485		*mpri = m;
 486		match = rt;
 487	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
 488		rt6_probe(rt);
 489	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 490
 491out:
 492	return match;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 493}
 494
 495static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 496				     struct rt6_info *rr_head,
 497				     u32 metric, int oif, int strict)
 498{
 499	struct rt6_info *rt, *match;
 
 500	int mpri = -1;
 501
 502	match = NULL;
 503	for (rt = rr_head; rt && rt->rt6i_metric == metric;
 504	     rt = rt->dst.rt6_next)
 505		match = find_match(rt, oif, strict, &mpri, match);
 506	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 507	     rt = rt->dst.rt6_next)
 508		match = find_match(rt, oif, strict, &mpri, match);
 509
 510	return match;
 
 
 
 
 
 
 
 511}
 512
 513static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 
 514{
 515	struct rt6_info *match, *rt0;
 516	struct net *net;
 
 
 
 
 
 517
 518	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
 519		  __func__, fn->leaf, oif);
 520
 521	rt0 = fn->rr_ptr;
 522	if (!rt0)
 523		fn->rr_ptr = rt0 = fn->leaf;
 524
 525	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 
 
 
 
 
 
 
 
 
 
 
 526
 527	if (!match &&
 528	    (strict & RT6_LOOKUP_F_REACHABLE)) {
 529		struct rt6_info *next = rt0->dst.rt6_next;
 530
 531		/* no entries matched; do round-robin */
 532		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 533			next = fn->leaf;
 534
 535		if (next != rt0)
 536			fn->rr_ptr = next;
 
 
 
 
 
 537	}
 538
 539	RT6_TRACE("%s() => %p\n",
 540		  __func__, match);
 
 
 
 
 
 
 541
 542	net = dev_net(rt0->rt6i_dev);
 543	return match ? match : net->ipv6.ip6_null_entry;
 
 
 544}
 545
 546#ifdef CONFIG_IPV6_ROUTE_INFO
 547int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 548		  const struct in6_addr *gwaddr)
 549{
 550	struct net *net = dev_net(dev);
 551	struct route_info *rinfo = (struct route_info *) opt;
 552	struct in6_addr prefix_buf, *prefix;
 553	unsigned int pref;
 554	unsigned long lifetime;
 555	struct rt6_info *rt;
 556
 557	if (len < sizeof(struct route_info)) {
 558		return -EINVAL;
 559	}
 560
 561	/* Sanity check for prefix_len and length */
 562	if (rinfo->length > 3) {
 563		return -EINVAL;
 564	} else if (rinfo->prefix_len > 128) {
 565		return -EINVAL;
 566	} else if (rinfo->prefix_len > 64) {
 567		if (rinfo->length < 2) {
 568			return -EINVAL;
 569		}
 570	} else if (rinfo->prefix_len > 0) {
 571		if (rinfo->length < 1) {
 572			return -EINVAL;
 573		}
 574	}
 575
 576	pref = rinfo->route_pref;
 577	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 578		return -EINVAL;
 579
 580	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 581
 582	if (rinfo->length == 3)
 583		prefix = (struct in6_addr *)rinfo->prefix;
 584	else {
 585		/* this function is safe */
 586		ipv6_addr_prefix(&prefix_buf,
 587				 (struct in6_addr *)rinfo->prefix,
 588				 rinfo->prefix_len);
 589		prefix = &prefix_buf;
 590	}
 591
 592	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 593				dev->ifindex);
 
 
 
 594
 595	if (rt && !lifetime) {
 596		ip6_del_rt(rt);
 597		rt = NULL;
 598	}
 599
 600	if (!rt && lifetime)
 601		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 602					pref);
 603	else if (rt)
 604		rt->rt6i_flags = RTF_ROUTEINFO |
 605				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 606
 607	if (rt) {
 608		if (!addrconf_finite_timeout(lifetime)) {
 609			rt->rt6i_flags &= ~RTF_EXPIRES;
 610		} else {
 611			rt->rt6i_expires = jiffies + HZ * lifetime;
 612			rt->rt6i_flags |= RTF_EXPIRES;
 613		}
 614		dst_release(&rt->dst);
 615	}
 616	return 0;
 617}
 618#endif
 619
 620#define BACKTRACK(__net, saddr)			\
 621do { \
 622	if (rt == __net->ipv6.ip6_null_entry) {	\
 623		struct fib6_node *pn; \
 624		while (1) { \
 625			if (fn->fn_flags & RTN_TL_ROOT) \
 626				goto out; \
 627			pn = fn->parent; \
 628			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 629				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 630			else \
 631				fn = pn; \
 632			if (fn->fn_flags & RTN_RTINFO) \
 633				goto restart; \
 634		} \
 635	} \
 636} while(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 637
 638static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 639					     struct fib6_table *table,
 640					     struct flowi6 *fl6, int flags)
 
 
 641{
 
 642	struct fib6_node *fn;
 643	struct rt6_info *rt;
 644
 645	read_lock_bh(&table->tb6_lock);
 646	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 
 
 
 647restart:
 648	rt = fn->leaf;
 649	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 650	BACKTRACK(net, &fl6->saddr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 651out:
 652	dst_use(&rt->dst, jiffies);
 653	read_unlock_bh(&table->tb6_lock);
 
 
 654	return rt;
 
 655
 
 
 
 
 656}
 
 657
 658struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 659			    const struct in6_addr *saddr, int oif, int strict)
 
 660{
 661	struct flowi6 fl6 = {
 662		.flowi6_oif = oif,
 663		.daddr = *daddr,
 664	};
 665	struct dst_entry *dst;
 666	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 667
 668	if (saddr) {
 669		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 670		flags |= RT6_LOOKUP_F_HAS_SADDR;
 671	}
 672
 673	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 674	if (dst->error == 0)
 675		return (struct rt6_info *) dst;
 676
 677	dst_release(dst);
 678
 679	return NULL;
 680}
 681
 682EXPORT_SYMBOL(rt6_lookup);
 683
 684/* ip6_ins_rt is called with FREE table->tb6_lock.
 685   It takes new route entry, the addition fails by any reason the
 686   route is freed. In any case, if caller does not hold it, it may
 687   be destroyed.
 688 */
 689
 690static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 
 691{
 692	int err;
 693	struct fib6_table *table;
 694
 695	table = rt->rt6i_table;
 696	write_lock_bh(&table->tb6_lock);
 697	err = fib6_add(&table->tb6_root, rt, info);
 698	write_unlock_bh(&table->tb6_lock);
 699
 700	return err;
 701}
 702
 703int ip6_ins_rt(struct rt6_info *rt)
 704{
 705	struct nl_info info = {
 706		.nl_net = dev_net(rt->rt6i_dev),
 707	};
 708	return __ip6_ins_rt(rt, &info);
 709}
 710
 711static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
 712				      const struct in6_addr *daddr,
 713				      const struct in6_addr *saddr)
 714{
 
 
 715	struct rt6_info *rt;
 716
 717	/*
 718	 *	Clone the route.
 719	 */
 720
 721	rt = ip6_rt_copy(ort, daddr);
 
 722
 723	if (rt) {
 724		struct neighbour *neigh;
 725		int attempts = !in_softirq();
 
 
 
 726
 727		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
 728			if (rt->rt6i_dst.plen != 128 &&
 729			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 730				rt->rt6i_flags |= RTF_ANYCAST;
 731			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
 732		}
 733
 734		rt->rt6i_flags |= RTF_CACHE;
 735
 
 
 
 
 736#ifdef CONFIG_IPV6_SUBTREES
 737		if (rt->rt6i_src.plen && saddr) {
 738			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
 739			rt->rt6i_src.plen = 128;
 740		}
 741#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 742
 743	retry:
 744		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 745		if (IS_ERR(neigh)) {
 746			struct net *net = dev_net(rt->rt6i_dev);
 747			int saved_rt_min_interval =
 748				net->ipv6.sysctl.ip6_rt_gc_min_interval;
 749			int saved_rt_elasticity =
 750				net->ipv6.sysctl.ip6_rt_gc_elasticity;
 751
 752			if (attempts-- > 0) {
 753				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
 754				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
 755
 756				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
 757
 758				net->ipv6.sysctl.ip6_rt_gc_elasticity =
 759					saved_rt_elasticity;
 760				net->ipv6.sysctl.ip6_rt_gc_min_interval =
 761					saved_rt_min_interval;
 762				goto retry;
 763			}
 764
 765			if (net_ratelimit())
 766				printk(KERN_WARNING
 767				       "ipv6: Neighbour table overflow.\n");
 768			dst_free(&rt->dst);
 769			return NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 770		}
 771		dst_set_neighbour(&rt->dst, neigh);
 
 
 
 
 772
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 773	}
 774
 775	return rt;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 776}
 777
 778static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 779					const struct in6_addr *daddr)
 780{
 781	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 
 
 
 
 
 782
 783	if (rt) {
 784		rt->rt6i_flags |= RTF_CACHE;
 785		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 786	}
 787	return rt;
 
 
 788}
 789
 790static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 791				      struct flowi6 *fl6, int flags)
 
 
 
 
 792{
 793	struct fib6_node *fn;
 794	struct rt6_info *rt, *nrt;
 795	int strict = 0;
 796	int attempts = 3;
 797	int err;
 798	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 799
 800	strict |= flags & RT6_LOOKUP_F_IFACE;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 801
 802relookup:
 803	read_lock_bh(&table->tb6_lock);
 804
 805restart_2:
 806	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 
 
 
 
 807
 808restart:
 809	rt = rt6_select(fn, oif, strict | reachable);
 
 810
 811	BACKTRACK(net, &fl6->saddr);
 812	if (rt == net->ipv6.ip6_null_entry ||
 813	    rt->rt6i_flags & RTF_CACHE)
 814		goto out;
 
 
 
 
 
 
 
 
 
 
 
 815
 816	dst_hold(&rt->dst);
 817	read_unlock_bh(&table->tb6_lock);
 818
 819	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
 820		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 821	else if (!(rt->dst.flags & DST_HOST))
 822		nrt = rt6_alloc_clone(rt, &fl6->daddr);
 823	else
 824		goto out2;
 
 825
 826	dst_release(&rt->dst);
 827	rt = nrt ? : net->ipv6.ip6_null_entry;
 828
 829	dst_hold(&rt->dst);
 830	if (nrt) {
 831		err = ip6_ins_rt(nrt);
 832		if (!err)
 833			goto out2;
 
 
 
 
 
 
 
 
 
 
 
 
 834	}
 835
 836	if (--attempts <= 0)
 837		goto out2;
 
 
 
 
 
 
 
 838
 839	/*
 840	 * Race condition! In the gap, when table->tb6_lock was
 841	 * released someone could insert this route.  Relookup.
 
 
 842	 */
 843	dst_release(&rt->dst);
 844	goto relookup;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 845
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 846out:
 847	if (reachable) {
 848		reachable = 0;
 849		goto restart_2;
 850	}
 851	dst_hold(&rt->dst);
 852	read_unlock_bh(&table->tb6_lock);
 853out2:
 854	rt->dst.lastuse = jiffies;
 855	rt->dst.__use++;
 856
 857	return rt;
 858}
 
 
 
 
 
 
 
 
 
 
 859
 860static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 861					    struct flowi6 *fl6, int flags)
 
 
 
 862{
 863	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 864}
 865
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 866void ip6_route_input(struct sk_buff *skb)
 867{
 868	const struct ipv6hdr *iph = ipv6_hdr(skb);
 869	struct net *net = dev_net(skb->dev);
 870	int flags = RT6_LOOKUP_F_HAS_SADDR;
 
 871	struct flowi6 fl6 = {
 872		.flowi6_iif = skb->dev->ifindex,
 873		.daddr = iph->daddr,
 874		.saddr = iph->saddr,
 875		.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
 876		.flowi6_mark = skb->mark,
 877		.flowi6_proto = iph->nexthdr,
 878	};
 
 
 
 
 
 879
 880	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
 881		flags |= RT6_LOOKUP_F_IFACE;
 882
 883	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
 
 
 
 
 884}
 885
 886static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 887					     struct flowi6 *fl6, int flags)
 
 
 
 888{
 889	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 890}
 891
 892struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 893				    struct flowi6 *fl6)
 
 894{
 895	int flags = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 896
 897	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 
 
 
 898		flags |= RT6_LOOKUP_F_IFACE;
 899
 900	if (!ipv6_addr_any(&fl6->saddr))
 901		flags |= RT6_LOOKUP_F_HAS_SADDR;
 902	else if (sk)
 903		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 904
 905	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 906}
 
 907
 908EXPORT_SYMBOL(ip6_route_output);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 909
 910struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 911{
 912	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 
 913	struct dst_entry *new = NULL;
 914
 915	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
 
 916	if (rt) {
 917		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
 
 918
 919		new = &rt->dst;
 920
 921		new->__use = 1;
 922		new->input = dst_discard;
 923		new->output = dst_discard;
 
 
 924
 925		if (dst_metrics_read_only(&ort->dst))
 926			new->_metrics = ort->dst._metrics;
 927		else
 928			dst_copy_metrics(new, &ort->dst);
 929		rt->rt6i_idev = ort->rt6i_idev;
 930		if (rt->rt6i_idev)
 931			in6_dev_hold(rt->rt6i_idev);
 932		rt->rt6i_expires = 0;
 933
 934		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
 935		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 936		rt->rt6i_metric = 0;
 937
 938		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 939#ifdef CONFIG_IPV6_SUBTREES
 940		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 941#endif
 942
 943		dst_free(new);
 944	}
 945
 946	dst_release(dst_orig);
 947	return new ? new : ERR_PTR(-ENOMEM);
 948}
 949
 950/*
 951 *	Destination cache support functions
 952 */
 953
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 954static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 955{
 
 
 956	struct rt6_info *rt;
 957
 958	rt = (struct rt6_info *) dst;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 959
 960	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
 961		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
 962			if (!rt->rt6i_peer)
 963				rt6_bind_peer(rt, 0);
 964			rt->rt6i_peer_genid = rt6_peer_genid();
 965		}
 966		return dst;
 967	}
 968	return NULL;
 969}
 970
 971static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 972{
 973	struct rt6_info *rt = (struct rt6_info *) dst;
 974
 975	if (rt) {
 976		if (rt->rt6i_flags & RTF_CACHE) {
 
 977			if (rt6_check_expired(rt)) {
 978				ip6_del_rt(rt);
 979				dst = NULL;
 980			}
 
 981		} else {
 982			dst_release(dst);
 983			dst = NULL;
 984		}
 985	}
 986	return dst;
 987}
 988
 989static void ip6_link_failure(struct sk_buff *skb)
 990{
 991	struct rt6_info *rt;
 992
 993	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
 994
 995	rt = (struct rt6_info *) skb_dst(skb);
 996	if (rt) {
 997		if (rt->rt6i_flags&RTF_CACHE) {
 998			dst_set_expires(&rt->dst, 0);
 999			rt->rt6i_flags |= RTF_EXPIRES;
1000		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1001			rt->rt6i_node->fn_sernum = -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1002	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1003}
1004
1005static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 
1006{
1007	struct rt6_info *rt6 = (struct rt6_info*)dst;
 
1008
1009	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1010		rt6->rt6i_flags |= RTF_MODIFIED;
1011		if (mtu < IPV6_MIN_MTU) {
1012			u32 features = dst_metric(dst, RTAX_FEATURES);
1013			mtu = IPV6_MIN_MTU;
1014			features |= RTAX_FEATURE_ALLFRAG;
1015			dst_metric_set(dst, RTAX_FEATURES, features);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1016		}
1017		dst_metric_set(dst, RTAX_MTU, mtu);
 
1018	}
1019}
1020
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1021static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1022{
1023	struct net_device *dev = dst->dev;
1024	unsigned int mtu = dst_mtu(dst);
1025	struct net *net = dev_net(dev);
1026
1027	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1028
1029	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1030		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1031
1032	/*
1033	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1034	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1035	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1036	 * rely only on pmtu discovery"
1037	 */
1038	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1039		mtu = IPV6_MAXPLEN;
1040	return mtu;
1041}
1042
1043static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1044{
1045	unsigned int mtu = IPV6_MIN_MTU;
1046	struct inet6_dev *idev;
 
 
 
 
 
 
 
1047
1048	rcu_read_lock();
1049	idev = __in6_dev_get(dst->dev);
1050	if (idev)
1051		mtu = idev->cnf.mtu6;
1052	rcu_read_unlock();
1053
1054	return mtu;
 
 
 
1055}
1056
1057static struct dst_entry *icmp6_dst_gc_list;
1058static DEFINE_SPINLOCK(icmp6_dst_lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1059
1060struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1061				  struct neighbour *neigh,
1062				  const struct in6_addr *addr)
1063{
 
1064	struct rt6_info *rt;
1065	struct inet6_dev *idev = in6_dev_get(dev);
1066	struct net *net = dev_net(dev);
1067
1068	if (unlikely(idev == NULL))
1069		return NULL;
1070
1071	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1072	if (unlikely(rt == NULL)) {
1073		in6_dev_put(idev);
 
1074		goto out;
1075	}
1076
1077	if (neigh)
1078		neigh_hold(neigh);
1079	else {
1080		neigh = ndisc_get_neigh(dev, addr);
1081		if (IS_ERR(neigh))
1082			neigh = NULL;
1083	}
1084
1085	rt->dst.flags |= DST_HOST;
 
1086	rt->dst.output  = ip6_output;
1087	dst_set_neighbour(&rt->dst, neigh);
1088	atomic_set(&rt->dst.__refcnt, 1);
1089	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1090
1091	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1092	rt->rt6i_dst.plen = 128;
1093	rt->rt6i_idev     = idev;
 
1094
1095	spin_lock_bh(&icmp6_dst_lock);
1096	rt->dst.next = icmp6_dst_gc_list;
1097	icmp6_dst_gc_list = &rt->dst;
1098	spin_unlock_bh(&icmp6_dst_lock);
 
1099
1100	fib6_force_start_gc(net);
1101
1102out:
1103	return &rt->dst;
1104}
1105
1106int icmp6_dst_gc(void)
1107{
1108	struct dst_entry *dst, **pprev;
1109	int more = 0;
1110
1111	spin_lock_bh(&icmp6_dst_lock);
1112	pprev = &icmp6_dst_gc_list;
1113
1114	while ((dst = *pprev) != NULL) {
1115		if (!atomic_read(&dst->__refcnt)) {
1116			*pprev = dst->next;
1117			dst_free(dst);
1118		} else {
1119			pprev = &dst->next;
1120			++more;
1121		}
1122	}
1123
1124	spin_unlock_bh(&icmp6_dst_lock);
1125
1126	return more;
1127}
1128
1129static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1130			    void *arg)
1131{
1132	struct dst_entry *dst, **pprev;
1133
1134	spin_lock_bh(&icmp6_dst_lock);
1135	pprev = &icmp6_dst_gc_list;
1136	while ((dst = *pprev) != NULL) {
1137		struct rt6_info *rt = (struct rt6_info *) dst;
1138		if (func(rt, arg)) {
1139			*pprev = dst->next;
1140			dst_free(dst);
1141		} else {
1142			pprev = &dst->next;
1143		}
1144	}
1145	spin_unlock_bh(&icmp6_dst_lock);
1146}
1147
1148static int ip6_dst_gc(struct dst_ops *ops)
1149{
1150	unsigned long now = jiffies;
1151	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1152	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1153	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1154	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1155	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1156	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1157	int entries;
1158
1159	entries = dst_entries_get_fast(ops);
1160	if (time_after(rt_last_gc + rt_min_interval, now) &&
1161	    entries <= rt_max_size)
1162		goto out;
1163
1164	net->ipv6.ip6_rt_gc_expire++;
1165	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1166	net->ipv6.ip6_rt_last_gc = now;
1167	entries = dst_entries_get_slow(ops);
1168	if (entries < ops->gc_thresh)
1169		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1170out:
1171	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1172	return entries > rt_max_size;
1173}
1174
1175/* Clean host part of a prefix. Not necessary in radix tree,
1176   but results in cleaner routing tables.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1177
1178   Remove it only when all the things will work!
1179 */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1180
1181int ip6_dst_hoplimit(struct dst_entry *dst)
1182{
1183	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1184	if (hoplimit == 0) {
1185		struct net_device *dev = dst->dev;
1186		struct inet6_dev *idev;
 
 
 
 
 
 
 
 
1187
1188		rcu_read_lock();
1189		idev = __in6_dev_get(dev);
1190		if (idev)
1191			hoplimit = idev->cnf.hop_limit;
1192		else
1193			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
 
1194		rcu_read_unlock();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1195	}
1196	return hoplimit;
 
 
 
 
 
 
 
 
 
 
 
 
1197}
1198EXPORT_SYMBOL(ip6_dst_hoplimit);
1199
1200/*
1201 *
1202 */
 
 
 
 
 
 
 
1203
1204int ip6_route_add(struct fib6_config *cfg)
 
 
1205{
1206	int err;
1207	struct net *net = cfg->fc_nlinfo.nl_net;
1208	struct rt6_info *rt = NULL;
1209	struct net_device *dev = NULL;
1210	struct inet6_dev *idev = NULL;
1211	struct fib6_table *table;
1212	int addr_type;
 
1213
1214	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1215		return -EINVAL;
1216#ifndef CONFIG_IPV6_SUBTREES
1217	if (cfg->fc_src_len)
1218		return -EINVAL;
1219#endif
 
 
1220	if (cfg->fc_ifindex) {
1221		err = -ENODEV;
1222		dev = dev_get_by_index(net, cfg->fc_ifindex);
1223		if (!dev)
1224			goto out;
1225		idev = in6_dev_get(dev);
1226		if (!idev)
1227			goto out;
1228	}
1229
1230	if (cfg->fc_metric == 0)
1231		cfg->fc_metric = IP6_RT_PRIO_USER;
1232
1233	table = fib6_new_table(net, cfg->fc_table);
1234	if (table == NULL) {
1235		err = -ENOBUFS;
1236		goto out;
1237	}
1238
1239	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1240
1241	if (rt == NULL) {
1242		err = -ENOMEM;
1243		goto out;
1244	}
1245
1246	rt->dst.obsolete = -1;
1247	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1248				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1249				0;
1250
1251	if (cfg->fc_protocol == RTPROT_UNSPEC)
1252		cfg->fc_protocol = RTPROT_BOOT;
1253	rt->rt6i_protocol = cfg->fc_protocol;
1254
1255	addr_type = ipv6_addr_type(&cfg->fc_dst);
1256
1257	if (addr_type & IPV6_ADDR_MULTICAST)
1258		rt->dst.input = ip6_mc_input;
1259	else if (cfg->fc_flags & RTF_LOCAL)
1260		rt->dst.input = ip6_input;
1261	else
1262		rt->dst.input = ip6_forward;
1263
1264	rt->dst.output = ip6_output;
1265
1266	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1267	rt->rt6i_dst.plen = cfg->fc_dst_len;
1268	if (rt->rt6i_dst.plen == 128)
1269	       rt->dst.flags |= DST_HOST;
1270
1271	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1272		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1273		if (!metrics) {
1274			err = -ENOMEM;
1275			goto out;
1276		}
1277		dst_init_metrics(&rt->dst, metrics, 0);
 
1278	}
1279#ifdef CONFIG_IPV6_SUBTREES
1280	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1281	rt->rt6i_src.plen = cfg->fc_src_len;
1282#endif
1283
1284	rt->rt6i_metric = cfg->fc_metric;
1285
1286	/* We cannot add true routes via loopback here,
1287	   they would result in kernel looping; promote them to reject routes
1288	 */
1289	if ((cfg->fc_flags & RTF_REJECT) ||
1290	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1291					      && !(cfg->fc_flags&RTF_LOCAL))) {
1292		/* hold loopback dev/idev if we haven't done so. */
1293		if (dev != net->loopback_dev) {
1294			if (dev) {
1295				dev_put(dev);
1296				in6_dev_put(idev);
1297			}
1298			dev = net->loopback_dev;
1299			dev_hold(dev);
1300			idev = in6_dev_get(dev);
1301			if (!idev) {
1302				err = -ENODEV;
1303				goto out;
1304			}
1305		}
1306		rt->dst.output = ip6_pkt_discard_out;
1307		rt->dst.input = ip6_pkt_discard;
1308		rt->dst.error = -ENETUNREACH;
1309		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1310		goto install_route;
1311	}
1312
1313	if (cfg->fc_flags & RTF_GATEWAY) {
1314		const struct in6_addr *gw_addr;
1315		int gwa_type;
 
 
 
 
 
 
 
 
 
1316
1317		gw_addr = &cfg->fc_gateway;
1318		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1319		gwa_type = ipv6_addr_type(gw_addr);
1320
1321		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1322			struct rt6_info *grt;
1323
1324			/* IPv6 strictly inhibits using not link-local
1325			   addresses as nexthop address.
1326			   Otherwise, router will not able to send redirects.
1327			   It is very good, but in some (rare!) circumstances
1328			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1329			   some exceptions. --ANK
1330			 */
1331			err = -EINVAL;
1332			if (!(gwa_type&IPV6_ADDR_UNICAST))
1333				goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1334
1335			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
 
 
 
 
1336
1337			err = -EHOSTUNREACH;
1338			if (grt == NULL)
1339				goto out;
1340			if (dev) {
1341				if (dev != grt->rt6i_dev) {
1342					dst_release(&grt->dst);
1343					goto out;
1344				}
1345			} else {
1346				dev = grt->rt6i_dev;
1347				idev = grt->rt6i_idev;
1348				dev_hold(dev);
1349				in6_dev_hold(grt->rt6i_idev);
1350			}
1351			if (!(grt->rt6i_flags&RTF_GATEWAY))
1352				err = 0;
1353			dst_release(&grt->dst);
1354
1355			if (err)
1356				goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1357		}
1358		err = -EINVAL;
1359		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1360			goto out;
1361	}
1362
1363	err = -ENODEV;
1364	if (dev == NULL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1365		goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1366
1367	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
 
 
1368		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
 
1369			err = -EINVAL;
1370			goto out;
1371		}
1372		ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1373		rt->rt6i_prefsrc.plen = 128;
1374	} else
1375		rt->rt6i_prefsrc.plen = 0;
 
 
 
 
 
 
 
 
 
 
 
 
1376
1377	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1378		struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1379		if (IS_ERR(n)) {
1380			err = PTR_ERR(n);
1381			goto out;
1382		}
1383		dst_set_neighbour(&rt->dst, n);
 
 
 
 
 
 
 
 
 
 
 
 
1384	}
1385
1386	rt->rt6i_flags = cfg->fc_flags;
 
 
 
 
 
 
 
 
 
 
 
 
1387
1388install_route:
1389	if (cfg->fc_mx) {
1390		struct nlattr *nla;
1391		int remaining;
1392
1393		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1394			int type = nla_type(nla);
 
 
 
 
 
1395
1396			if (type) {
1397				if (type > RTAX_MAX) {
1398					err = -EINVAL;
1399					goto out;
1400				}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1401
1402				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1403			}
 
 
 
 
 
 
 
 
 
 
1404		}
1405	}
1406
1407	rt->dst.dev = dev;
1408	rt->rt6i_idev = idev;
1409	rt->rt6i_table = table;
 
 
 
 
 
 
 
 
 
 
 
 
 
1410
1411	cfg->fc_nlinfo.nl_net = dev_net(dev);
 
1412
1413	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
 
 
1414
 
1415out:
1416	if (dev)
1417		dev_put(dev);
1418	if (idev)
1419		in6_dev_put(idev);
1420	if (rt)
1421		dst_free(&rt->dst);
1422	return err;
1423}
1424
1425static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 
1426{
1427	int err;
1428	struct fib6_table *table;
1429	struct net *net = dev_net(rt->rt6i_dev);
 
 
1430
1431	if (rt == net->ipv6.ip6_null_entry)
1432		return -ENOENT;
 
1433
1434	table = rt->rt6i_table;
1435	write_lock_bh(&table->tb6_lock);
1436
1437	err = fib6_del(rt, info);
1438	dst_release(&rt->dst);
 
 
1439
1440	write_unlock_bh(&table->tb6_lock);
 
 
 
1441
1442	return err;
 
1443}
1444
1445int ip6_del_rt(struct rt6_info *rt)
1446{
1447	struct nl_info info = {
1448		.nl_net = dev_net(rt->rt6i_dev),
 
1449	};
1450	return __ip6_del_rt(rt, &info);
 
1451}
1452
1453static int ip6_route_del(struct fib6_config *cfg)
 
1454{
1455	struct fib6_table *table;
 
1456	struct fib6_node *fn;
1457	struct rt6_info *rt;
1458	int err = -ESRCH;
1459
1460	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1461	if (table == NULL)
 
1462		return err;
 
1463
1464	read_lock_bh(&table->tb6_lock);
1465
1466	fn = fib6_locate(&table->tb6_root,
1467			 &cfg->fc_dst, cfg->fc_dst_len,
1468			 &cfg->fc_src, cfg->fc_src_len);
 
1469
1470	if (fn) {
1471		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1472			if (cfg->fc_ifindex &&
1473			    (rt->rt6i_dev == NULL ||
1474			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1475				continue;
1476			if (cfg->fc_flags & RTF_GATEWAY &&
1477			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1478				continue;
1479			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1480				continue;
1481			dst_hold(&rt->dst);
1482			read_unlock_bh(&table->tb6_lock);
1483
1484			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
 
 
 
 
1485		}
1486	}
1487	read_unlock_bh(&table->tb6_lock);
1488
1489	return err;
1490}
1491
1492/*
1493 *	Handle redirects
1494 */
1495struct ip6rd_flowi {
1496	struct flowi6 fl6;
1497	struct in6_addr gateway;
1498};
1499
1500static struct rt6_info *__ip6_route_redirect(struct net *net,
1501					     struct fib6_table *table,
1502					     struct flowi6 *fl6,
1503					     int flags)
1504{
1505	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1506	struct rt6_info *rt;
1507	struct fib6_node *fn;
 
 
 
 
 
 
1508
1509	/*
1510	 * Get the "current" route for this destination and
1511	 * check if the redirect has come from approriate router.
1512	 *
1513	 * RFC 2461 specifies that redirects should only be
1514	 * accepted if they come from the nexthop to the target.
1515	 * Due to the way the routes are chosen, this notion
1516	 * is a bit fuzzy and one might need to check all possible
1517	 * routes.
1518	 */
1519
1520	read_lock_bh(&table->tb6_lock);
1521	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1522restart:
1523	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1524		/*
1525		 * Current route is on-link; redirect is always invalid.
1526		 *
1527		 * Seems, previous statement is not true. It could
1528		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1529		 * But then router serving it might decide, that we should
1530		 * know truth 8)8) --ANK (980726).
1531		 */
1532		if (rt6_check_expired(rt))
1533			continue;
1534		if (!(rt->rt6i_flags & RTF_GATEWAY))
1535			continue;
1536		if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1537			continue;
1538		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1539			continue;
1540		break;
1541	}
1542
1543	if (!rt)
1544		rt = net->ipv6.ip6_null_entry;
1545	BACKTRACK(net, &fl6->saddr);
1546out:
1547	dst_hold(&rt->dst);
1548
1549	read_unlock_bh(&table->tb6_lock);
 
 
 
1550
1551	return rt;
1552};
 
 
 
 
 
 
1553
1554static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1555					   const struct in6_addr *src,
1556					   const struct in6_addr *gateway,
1557					   struct net_device *dev)
1558{
1559	int flags = RT6_LOOKUP_F_HAS_SADDR;
1560	struct net *net = dev_net(dev);
1561	struct ip6rd_flowi rdfl = {
1562		.fl6 = {
1563			.flowi6_oif = dev->ifindex,
1564			.daddr = *dest,
1565			.saddr = *src,
1566		},
1567	};
1568
1569	ipv6_addr_copy(&rdfl.gateway, gateway);
 
 
 
1570
1571	if (rt6_need_strict(dest))
1572		flags |= RT6_LOOKUP_F_IFACE;
 
 
1573
1574	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1575						   flags, __ip6_route_redirect);
1576}
 
 
 
 
 
 
1577
1578void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1579		  const struct in6_addr *saddr,
1580		  struct neighbour *neigh, u8 *lladdr, int on_link)
1581{
1582	struct rt6_info *rt, *nrt = NULL;
1583	struct netevent_redirect netevent;
1584	struct net *net = dev_net(neigh->dev);
1585
1586	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
 
 
 
 
1587
1588	if (rt == net->ipv6.ip6_null_entry) {
1589		if (net_ratelimit())
1590			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1591			       "for redirect target\n");
1592		goto out;
1593	}
1594
1595	/*
1596	 *	We have finally decided to accept it.
1597	 */
1598
1599	neigh_update(neigh, lladdr, NUD_STALE,
1600		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1601		     NEIGH_UPDATE_F_OVERRIDE|
1602		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1603				     NEIGH_UPDATE_F_ISROUTER))
1604		     );
 
 
 
 
 
1605
1606	/*
1607	 * Redirect received -> path was valid.
1608	 * Look, redirects are sent only in response to data packets,
1609	 * so that this nexthop apparently is reachable. --ANK
1610	 */
1611	dst_confirm(&rt->dst);
 
 
1612
1613	/* Duplicate redirect: silently ignore. */
1614	if (neigh == dst_get_neighbour_raw(&rt->dst))
1615		goto out;
 
 
 
 
 
 
1616
1617	nrt = ip6_rt_copy(rt, dest);
1618	if (nrt == NULL)
 
 
1619		goto out;
1620
1621	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1622	if (on_link)
1623		nrt->rt6i_flags &= ~RTF_GATEWAY;
1624
1625	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1626	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1627
1628	if (ip6_ins_rt(nrt))
 
 
1629		goto out;
 
1630
1631	netevent.old = &rt->dst;
1632	netevent.new = &nrt->dst;
 
 
1633	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1634
1635	if (rt->rt6i_flags&RTF_CACHE) {
1636		ip6_del_rt(rt);
1637		return;
1638	}
1639
1640out:
1641	dst_release(&rt->dst);
1642}
1643
1644/*
1645 *	Handle ICMP "packet too big" messages
1646 *	i.e. Path MTU discovery
1647 */
1648
1649static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1650			     struct net *net, u32 pmtu, int ifindex)
1651{
1652	struct rt6_info *rt, *nrt;
1653	int allfrag = 0;
1654again:
1655	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1656	if (rt == NULL)
1657		return;
1658
1659	if (rt6_check_expired(rt)) {
1660		ip6_del_rt(rt);
1661		goto again;
1662	}
1663
1664	if (pmtu >= dst_mtu(&rt->dst))
1665		goto out;
1666
1667	if (pmtu < IPV6_MIN_MTU) {
1668		/*
1669		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1670		 * MTU (1280) and a fragment header should always be included
1671		 * after a node receiving Too Big message reporting PMTU is
1672		 * less than the IPv6 Minimum Link MTU.
1673		 */
1674		pmtu = IPV6_MIN_MTU;
1675		allfrag = 1;
1676	}
1677
1678	/* New mtu received -> path was valid.
1679	   They are sent only in response to data packets,
1680	   so that this nexthop apparently is reachable. --ANK
1681	 */
1682	dst_confirm(&rt->dst);
1683
1684	/* Host route. If it is static, it would be better
1685	   not to override it, but add new one, so that
1686	   when cache entry will expire old pmtu
1687	   would return automatically.
1688	 */
1689	if (rt->rt6i_flags & RTF_CACHE) {
1690		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1691		if (allfrag) {
1692			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1693			features |= RTAX_FEATURE_ALLFRAG;
1694			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1695		}
1696		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1697		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1698		goto out;
1699	}
1700
1701	/* Network route.
1702	   Two cases are possible:
1703	   1. It is connected route. Action: COW
1704	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1705	 */
1706	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1707		nrt = rt6_alloc_cow(rt, daddr, saddr);
1708	else
1709		nrt = rt6_alloc_clone(rt, daddr);
1710
1711	if (nrt) {
1712		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1713		if (allfrag) {
1714			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1715			features |= RTAX_FEATURE_ALLFRAG;
1716			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1717		}
1718
1719		/* According to RFC 1981, detecting PMTU increase shouldn't be
1720		 * happened within 5 mins, the recommended timer is 10 mins.
1721		 * Here this route expiration time is set to ip6_rt_mtu_expires
1722		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1723		 * and detecting PMTU increase will be automatically happened.
1724		 */
1725		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1726		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1727
1728		ip6_ins_rt(nrt);
1729	}
1730out:
1731	dst_release(&rt->dst);
1732}
1733
1734void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1735			struct net_device *dev, u32 pmtu)
1736{
1737	struct net *net = dev_net(dev);
1738
1739	/*
1740	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1741	 * is sending along the path" that caused the Packet Too Big message.
1742	 * Since it's not possible in the general case to determine which
1743	 * interface was used to send the original packet, we update the MTU
1744	 * on the interface that will be used to send future packets. We also
1745	 * update the MTU on the interface that received the Packet Too Big in
1746	 * case the original packet was forced out that interface with
1747	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1748	 * correct behaviour, which would be to update the MTU on all
1749	 * interfaces.
1750	 */
1751	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1752	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1753}
1754
1755/*
1756 *	Misc support functions
1757 */
1758
1759static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1760				    const struct in6_addr *dest)
1761{
1762	struct net *net = dev_net(ort->rt6i_dev);
1763	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1764					    ort->dst.dev, 0);
1765
1766	if (rt) {
1767		rt->dst.input = ort->dst.input;
1768		rt->dst.output = ort->dst.output;
1769		rt->dst.flags |= DST_HOST;
1770
1771		ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1772		rt->rt6i_dst.plen = 128;
1773		dst_copy_metrics(&rt->dst, &ort->dst);
1774		rt->dst.error = ort->dst.error;
1775		rt->rt6i_idev = ort->rt6i_idev;
1776		if (rt->rt6i_idev)
1777			in6_dev_hold(rt->rt6i_idev);
1778		rt->dst.lastuse = jiffies;
1779		rt->rt6i_expires = 0;
1780
1781		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1782		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1783		rt->rt6i_metric = 0;
1784
1785#ifdef CONFIG_IPV6_SUBTREES
1786		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1787#endif
1788		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1789		rt->rt6i_table = ort->rt6i_table;
1790	}
1791	return rt;
1792}
1793
1794#ifdef CONFIG_IPV6_ROUTE_INFO
1795static struct rt6_info *rt6_get_route_info(struct net *net,
1796					   const struct in6_addr *prefix, int prefixlen,
1797					   const struct in6_addr *gwaddr, int ifindex)
 
1798{
 
 
1799	struct fib6_node *fn;
1800	struct rt6_info *rt = NULL;
1801	struct fib6_table *table;
1802
1803	table = fib6_get_table(net, RT6_TABLE_INFO);
1804	if (table == NULL)
1805		return NULL;
1806
1807	write_lock_bh(&table->tb6_lock);
1808	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1809	if (!fn)
1810		goto out;
1811
1812	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1813		if (rt->rt6i_dev->ifindex != ifindex)
 
1814			continue;
1815		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1816			continue;
1817		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
 
 
 
 
 
1818			continue;
1819		dst_hold(&rt->dst);
1820		break;
1821	}
1822out:
1823	write_unlock_bh(&table->tb6_lock);
1824	return rt;
1825}
1826
1827static struct rt6_info *rt6_add_route_info(struct net *net,
1828					   const struct in6_addr *prefix, int prefixlen,
1829					   const struct in6_addr *gwaddr, int ifindex,
1830					   unsigned pref)
 
1831{
1832	struct fib6_config cfg = {
1833		.fc_table	= RT6_TABLE_INFO,
1834		.fc_metric	= IP6_RT_PRIO_USER,
1835		.fc_ifindex	= ifindex,
1836		.fc_dst_len	= prefixlen,
1837		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1838				  RTF_UP | RTF_PREF(pref),
1839		.fc_nlinfo.pid = 0,
 
 
1840		.fc_nlinfo.nlh = NULL,
1841		.fc_nlinfo.nl_net = net,
1842	};
1843
1844	ipv6_addr_copy(&cfg.fc_dst, prefix);
1845	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
 
1846
1847	/* We should treat it as a default route if prefix length is 0. */
1848	if (!prefixlen)
1849		cfg.fc_flags |= RTF_DEFAULT;
1850
1851	ip6_route_add(&cfg);
1852
1853	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1854}
1855#endif
1856
1857struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
 
 
1858{
1859	struct rt6_info *rt;
 
1860	struct fib6_table *table;
1861
1862	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1863	if (table == NULL)
1864		return NULL;
1865
1866	write_lock_bh(&table->tb6_lock);
1867	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1868		if (dev == rt->rt6i_dev &&
1869		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1870		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
 
 
 
 
 
 
 
1871			break;
1872	}
1873	if (rt)
1874		dst_hold(&rt->dst);
1875	write_unlock_bh(&table->tb6_lock);
1876	return rt;
1877}
1878
1879struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 
1880				     struct net_device *dev,
1881				     unsigned int pref)
1882{
1883	struct fib6_config cfg = {
1884		.fc_table	= RT6_TABLE_DFLT,
1885		.fc_metric	= IP6_RT_PRIO_USER,
1886		.fc_ifindex	= dev->ifindex,
1887		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1888				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1889		.fc_nlinfo.pid = 0,
 
 
1890		.fc_nlinfo.nlh = NULL,
1891		.fc_nlinfo.nl_net = dev_net(dev),
1892	};
1893
1894	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1895
1896	ip6_route_add(&cfg);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1897
1898	return rt6_get_dflt_router(gwaddr, dev);
1899}
1900
1901void rt6_purge_dflt_routers(struct net *net)
1902{
1903	struct rt6_info *rt;
1904	struct fib6_table *table;
 
 
1905
1906	/* NOTE: Keep consistent with rt6_get_dflt_router */
1907	table = fib6_get_table(net, RT6_TABLE_DFLT);
1908	if (table == NULL)
1909		return;
1910
1911restart:
1912	read_lock_bh(&table->tb6_lock);
1913	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1914		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1915			dst_hold(&rt->dst);
1916			read_unlock_bh(&table->tb6_lock);
1917			ip6_del_rt(rt);
1918			goto restart;
1919		}
1920	}
1921	read_unlock_bh(&table->tb6_lock);
 
1922}
1923
1924static void rtmsg_to_fib6_config(struct net *net,
1925				 struct in6_rtmsg *rtmsg,
1926				 struct fib6_config *cfg)
1927{
1928	memset(cfg, 0, sizeof(*cfg));
 
 
 
 
 
 
 
 
 
1929
1930	cfg->fc_table = RT6_TABLE_MAIN;
1931	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1932	cfg->fc_metric = rtmsg->rtmsg_metric;
1933	cfg->fc_expires = rtmsg->rtmsg_info;
1934	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1935	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1936	cfg->fc_flags = rtmsg->rtmsg_flags;
1937
1938	cfg->fc_nlinfo.nl_net = net;
1939
1940	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1941	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1942	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1943}
1944
1945int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1946{
1947	struct fib6_config cfg;
1948	struct in6_rtmsg rtmsg;
1949	int err;
1950
1951	switch(cmd) {
1952	case SIOCADDRT:		/* Add a route */
1953	case SIOCDELRT:		/* Delete a route */
1954		if (!capable(CAP_NET_ADMIN))
1955			return -EPERM;
1956		err = copy_from_user(&rtmsg, arg,
1957				     sizeof(struct in6_rtmsg));
1958		if (err)
1959			return -EFAULT;
1960
1961		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1962
1963		rtnl_lock();
1964		switch (cmd) {
1965		case SIOCADDRT:
1966			err = ip6_route_add(&cfg);
1967			break;
1968		case SIOCDELRT:
1969			err = ip6_route_del(&cfg);
1970			break;
1971		default:
1972			err = -EINVAL;
1973		}
1974		rtnl_unlock();
1975
1976		return err;
1977	}
1978
1979	return -EINVAL;
1980}
1981
1982/*
1983 *	Drop the packet on the floor
1984 */
1985
1986static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1987{
 
 
 
1988	int type;
1989	struct dst_entry *dst = skb_dst(skb);
 
 
 
 
 
 
1990	switch (ipstats_mib_noroutes) {
1991	case IPSTATS_MIB_INNOROUTES:
1992		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1993		if (type == IPV6_ADDR_ANY) {
1994			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1995				      IPSTATS_MIB_INADDRERRORS);
1996			break;
1997		}
1998		/* FALLTHROUGH */
1999	case IPSTATS_MIB_OUTNOROUTES:
2000		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2001			      ipstats_mib_noroutes);
2002		break;
2003	}
 
 
 
 
 
2004	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2005	kfree_skb(skb);
2006	return 0;
2007}
2008
2009static int ip6_pkt_discard(struct sk_buff *skb)
2010{
2011	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2012}
2013
2014static int ip6_pkt_discard_out(struct sk_buff *skb)
2015{
2016	skb->dev = skb_dst(skb)->dev;
2017	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2018}
2019
2020#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2021
2022static int ip6_pkt_prohibit(struct sk_buff *skb)
2023{
2024	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2025}
2026
2027static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2028{
2029	skb->dev = skb_dst(skb)->dev;
2030	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2031}
2032
2033#endif
2034
2035/*
2036 *	Allocate a dst for local (unicast / anycast) address.
2037 */
2038
2039struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2040				    const struct in6_addr *addr,
2041				    int anycast)
2042{
2043	struct net *net = dev_net(idev->dev);
2044	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2045					    net->loopback_dev, 0);
2046	struct neighbour *neigh;
 
 
 
 
 
 
 
 
2047
2048	if (rt == NULL) {
2049		if (net_ratelimit())
2050			pr_warning("IPv6:  Maximum number of routes reached,"
2051				   " consider increasing route/max_size.\n");
2052		return ERR_PTR(-ENOMEM);
2053	}
2054
2055	in6_dev_hold(idev);
2056
2057	rt->dst.flags |= DST_HOST;
2058	rt->dst.input = ip6_input;
2059	rt->dst.output = ip6_output;
2060	rt->rt6i_idev = idev;
2061	rt->dst.obsolete = -1;
2062
2063	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2064	if (anycast)
2065		rt->rt6i_flags |= RTF_ANYCAST;
2066	else
2067		rt->rt6i_flags |= RTF_LOCAL;
2068	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2069	if (IS_ERR(neigh)) {
2070		dst_free(&rt->dst);
2071
2072		return ERR_CAST(neigh);
2073	}
2074	dst_set_neighbour(&rt->dst, neigh);
2075
2076	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2077	rt->rt6i_dst.plen = 128;
2078	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2079
2080	atomic_set(&rt->dst.__refcnt, 1);
2081
2082	return rt;
2083}
2084
2085int ip6_route_get_saddr(struct net *net,
2086			struct rt6_info *rt,
2087			const struct in6_addr *daddr,
2088			unsigned int prefs,
2089			struct in6_addr *saddr)
2090{
2091	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2092	int err = 0;
2093	if (rt->rt6i_prefsrc.plen)
2094		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2095	else
2096		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2097					 daddr, prefs, saddr);
2098	return err;
2099}
2100
2101/* remove deleted ip from prefsrc entries */
2102struct arg_dev_net_ip {
2103	struct net_device *dev;
2104	struct net *net;
2105	struct in6_addr *addr;
2106};
2107
2108static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2109{
2110	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2111	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2112	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2113
2114	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2115	    rt != net->ipv6.ip6_null_entry &&
2116	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
 
 
2117		/* remove prefsrc entry */
2118		rt->rt6i_prefsrc.plen = 0;
 
2119	}
2120	return 0;
2121}
2122
2123void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2124{
2125	struct net *net = dev_net(ifp->idev->dev);
2126	struct arg_dev_net_ip adni = {
2127		.dev = ifp->idev->dev,
2128		.net = net,
2129		.addr = &ifp->addr,
2130	};
2131	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2132}
2133
2134struct arg_dev_net {
2135	struct net_device *dev;
2136	struct net *net;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2137};
2138
2139static int fib6_ifdown(struct rt6_info *rt, void *arg)
2140{
2141	const struct arg_dev_net *adn = arg;
2142	const struct net_device *dev = adn->dev;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2143
2144	if ((rt->rt6i_dev == dev || dev == NULL) &&
2145	    rt != adn->net->ipv6.ip6_null_entry) {
2146		RT6_TRACE("deleted by ifdown %p\n", rt);
2147		return -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2148	}
 
2149	return 0;
2150}
2151
2152void rt6_ifdown(struct net *net, struct net_device *dev)
2153{
2154	struct arg_dev_net adn = {
2155		.dev = dev,
2156		.net = net,
 
 
2157	};
 
2158
2159	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2160	icmp6_clean_all(fib6_ifdown, &adn);
 
 
2161}
2162
2163struct rt6_mtu_change_arg
2164{
 
 
 
 
 
 
2165	struct net_device *dev;
2166	unsigned mtu;
 
2167};
2168
2169static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2170{
2171	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2172	struct inet6_dev *idev;
2173
2174	/* In IPv6 pmtu discovery is not optional,
2175	   so that RTAX_MTU lock cannot disable it.
2176	   We still use this lock to block changes
2177	   caused by addrconf/ndisc.
2178	*/
2179
2180	idev = __in6_dev_get(arg->dev);
2181	if (idev == NULL)
 
 
 
2182		return 0;
2183
2184	/* For administrative MTU increase, there is no way to discover
2185	   IPv6 PMTU increase, so PMTU increase should be updated here.
2186	   Since RFC 1981 doesn't include administrative MTU increase
2187	   update PMTU increase is a MUST. (i.e. jumbo frame)
2188	 */
2189	/*
2190	   If new MTU is less than route PMTU, this new MTU will be the
2191	   lowest MTU in the path, update the route PMTU to reflect PMTU
2192	   decreases; if new MTU is greater than route PMTU, and the
2193	   old MTU is the lowest MTU in the path, update the route PMTU
2194	   to reflect the increase. In this case if the other nodes' MTU
2195	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2196	   PMTU discouvery.
2197	 */
2198	if (rt->rt6i_dev == arg->dev &&
2199	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2200	    (dst_mtu(&rt->dst) >= arg->mtu ||
2201	     (dst_mtu(&rt->dst) < arg->mtu &&
2202	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2203		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2204	}
2205	return 0;
 
2206}
2207
2208void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2209{
2210	struct rt6_mtu_change_arg arg = {
2211		.dev = dev,
2212		.mtu = mtu,
2213	};
2214
2215	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2216}
2217
2218static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 
2219	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
 
2220	[RTA_OIF]               = { .type = NLA_U32 },
2221	[RTA_IIF]		= { .type = NLA_U32 },
2222	[RTA_PRIORITY]          = { .type = NLA_U32 },
2223	[RTA_METRICS]           = { .type = NLA_NESTED },
 
 
 
 
 
 
 
 
 
 
 
 
2224};
2225
2226static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2227			      struct fib6_config *cfg)
 
2228{
2229	struct rtmsg *rtm;
2230	struct nlattr *tb[RTA_MAX+1];
 
2231	int err;
2232
2233	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
 
2234	if (err < 0)
2235		goto errout;
2236
2237	err = -EINVAL;
2238	rtm = nlmsg_data(nlh);
2239	memset(cfg, 0, sizeof(*cfg));
2240
2241	cfg->fc_table = rtm->rtm_table;
2242	cfg->fc_dst_len = rtm->rtm_dst_len;
2243	cfg->fc_src_len = rtm->rtm_src_len;
2244	cfg->fc_flags = RTF_UP;
2245	cfg->fc_protocol = rtm->rtm_protocol;
 
 
 
 
 
 
 
2246
2247	if (rtm->rtm_type == RTN_UNREACHABLE)
 
 
 
2248		cfg->fc_flags |= RTF_REJECT;
2249
2250	if (rtm->rtm_type == RTN_LOCAL)
2251		cfg->fc_flags |= RTF_LOCAL;
2252
2253	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2254	cfg->fc_nlinfo.nlh = nlh;
2255	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
 
 
 
 
 
 
 
 
 
 
 
2256
2257	if (tb[RTA_GATEWAY]) {
2258		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2259		cfg->fc_flags |= RTF_GATEWAY;
2260	}
 
 
 
 
2261
2262	if (tb[RTA_DST]) {
2263		int plen = (rtm->rtm_dst_len + 7) >> 3;
2264
2265		if (nla_len(tb[RTA_DST]) < plen)
2266			goto errout;
2267
2268		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2269	}
2270
2271	if (tb[RTA_SRC]) {
2272		int plen = (rtm->rtm_src_len + 7) >> 3;
2273
2274		if (nla_len(tb[RTA_SRC]) < plen)
2275			goto errout;
2276
2277		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2278	}
2279
2280	if (tb[RTA_PREFSRC])
2281		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2282
2283	if (tb[RTA_OIF])
2284		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2285
2286	if (tb[RTA_PRIORITY])
2287		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2288
2289	if (tb[RTA_METRICS]) {
2290		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2291		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2292	}
2293
2294	if (tb[RTA_TABLE])
2295		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2297	err = 0;
2298errout:
2299	return err;
2300}
2301
2302static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2303{
2304	struct fib6_config cfg;
2305	int err;
2306
2307	err = rtm_to_fib6_config(skb, nlh, &cfg);
2308	if (err < 0)
2309		return err;
2310
2311	return ip6_route_del(&cfg);
 
 
 
 
 
 
 
 
 
 
 
2312}
2313
2314static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
2315{
2316	struct fib6_config cfg;
2317	int err;
2318
2319	err = rtm_to_fib6_config(skb, nlh, &cfg);
2320	if (err < 0)
2321		return err;
2322
2323	return ip6_route_add(&cfg);
 
 
 
 
 
 
2324}
2325
2326static inline size_t rt6_nlmsg_size(void)
 
2327{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2328	return NLMSG_ALIGN(sizeof(struct rtmsg))
2329	       + nla_total_size(16) /* RTA_SRC */
2330	       + nla_total_size(16) /* RTA_DST */
2331	       + nla_total_size(16) /* RTA_GATEWAY */
2332	       + nla_total_size(16) /* RTA_PREFSRC */
2333	       + nla_total_size(4) /* RTA_TABLE */
2334	       + nla_total_size(4) /* RTA_IIF */
2335	       + nla_total_size(4) /* RTA_OIF */
2336	       + nla_total_size(4) /* RTA_PRIORITY */
2337	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2338	       + nla_total_size(sizeof(struct rta_cacheinfo));
 
 
 
2339}
2340
2341static int rt6_fill_node(struct net *net,
2342			 struct sk_buff *skb, struct rt6_info *rt,
2343			 struct in6_addr *dst, struct in6_addr *src,
2344			 int iif, int type, u32 pid, u32 seq,
2345			 int prefix, int nowait, unsigned int flags)
2346{
2347	struct rtmsg *rtm;
2348	struct nlmsghdr *nlh;
2349	long expires;
2350	u32 table;
2351	struct neighbour *n;
 
 
 
 
 
 
 
 
2352
2353	if (prefix) {	/* user wants prefix routes only */
2354		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2355			/* success since this is not a prefix route */
2356			return 1;
2357		}
2358	}
2359
2360	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2361	if (nlh == NULL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2362		return -EMSGSIZE;
2363
 
 
 
 
 
 
 
 
 
 
2364	rtm = nlmsg_data(nlh);
2365	rtm->rtm_family = AF_INET6;
2366	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2367	rtm->rtm_src_len = rt->rt6i_src.plen;
2368	rtm->rtm_tos = 0;
2369	if (rt->rt6i_table)
2370		table = rt->rt6i_table->tb6_id;
2371	else
2372		table = RT6_TABLE_UNSPEC;
2373	rtm->rtm_table = table;
2374	NLA_PUT_U32(skb, RTA_TABLE, table);
2375	if (rt->rt6i_flags&RTF_REJECT)
2376		rtm->rtm_type = RTN_UNREACHABLE;
2377	else if (rt->rt6i_flags&RTF_LOCAL)
2378		rtm->rtm_type = RTN_LOCAL;
2379	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2380		rtm->rtm_type = RTN_LOCAL;
2381	else
2382		rtm->rtm_type = RTN_UNICAST;
2383	rtm->rtm_flags = 0;
2384	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2385	rtm->rtm_protocol = rt->rt6i_protocol;
2386	if (rt->rt6i_flags&RTF_DYNAMIC)
2387		rtm->rtm_protocol = RTPROT_REDIRECT;
2388	else if (rt->rt6i_flags & RTF_ADDRCONF)
2389		rtm->rtm_protocol = RTPROT_KERNEL;
2390	else if (rt->rt6i_flags&RTF_DEFAULT)
2391		rtm->rtm_protocol = RTPROT_RA;
2392
2393	if (rt->rt6i_flags&RTF_CACHE)
2394		rtm->rtm_flags |= RTM_F_CLONED;
2395
2396	if (dst) {
2397		NLA_PUT(skb, RTA_DST, 16, dst);
 
2398		rtm->rtm_dst_len = 128;
2399	} else if (rtm->rtm_dst_len)
2400		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
 
2401#ifdef CONFIG_IPV6_SUBTREES
2402	if (src) {
2403		NLA_PUT(skb, RTA_SRC, 16, src);
 
2404		rtm->rtm_src_len = 128;
2405	} else if (rtm->rtm_src_len)
2406		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
 
2407#endif
2408	if (iif) {
2409#ifdef CONFIG_IPV6_MROUTE
2410		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2411			int err = ip6mr_get_route(net, skb, rtm, nowait);
2412			if (err <= 0) {
2413				if (!nowait) {
2414					if (err == 0)
2415						return 0;
2416					goto nla_put_failure;
2417				} else {
2418					if (err == -EMSGSIZE)
2419						goto nla_put_failure;
2420				}
2421			}
2422		} else
2423#endif
2424			NLA_PUT_U32(skb, RTA_IIF, iif);
2425	} else if (dst) {
 
2426		struct in6_addr saddr_buf;
2427		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2428			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 
2429	}
2430
2431	if (rt->rt6i_prefsrc.plen) {
2432		struct in6_addr saddr_buf;
2433		ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2434		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 
2435	}
2436
2437	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
 
 
 
 
2438		goto nla_put_failure;
2439
2440	rcu_read_lock();
2441	n = dst_get_neighbour(&rt->dst);
2442	if (n)
2443		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2444	rcu_read_unlock();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2445
2446	if (rt->dst.dev)
2447		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2448
2449	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
 
 
 
2450
2451	if (!(rt->rt6i_flags & RTF_EXPIRES))
2452		expires = 0;
2453	else if (rt->rt6i_expires - jiffies < INT_MAX)
2454		expires = rt->rt6i_expires - jiffies;
2455	else
2456		expires = INT_MAX;
2457
2458	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2459			       expires, rt->dst.error) < 0)
2460		goto nla_put_failure;
2461
2462	return nlmsg_end(skb, nlh);
 
 
2463
2464nla_put_failure:
2465	nlmsg_cancel(skb, nlh);
2466	return -EMSGSIZE;
2467}
2468
2469int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2470{
2471	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2472	int prefix;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2473
2474	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2475		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2476		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2477	} else
2478		prefix = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2479
2480	return rt6_fill_node(arg->net,
2481		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2482		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2483		     prefix, 0, NLM_F_MULTI);
2484}
2485
2486static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 
2487{
2488	struct net *net = sock_net(in_skb->sk);
2489	struct nlattr *tb[RTA_MAX+1];
 
 
 
2490	struct rt6_info *rt;
2491	struct sk_buff *skb;
2492	struct rtmsg *rtm;
2493	struct flowi6 fl6;
2494	int err, iif = 0;
2495
2496	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2497	if (err < 0)
2498		goto errout;
2499
2500	err = -EINVAL;
2501	memset(&fl6, 0, sizeof(fl6));
 
 
2502
2503	if (tb[RTA_SRC]) {
2504		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2505			goto errout;
2506
2507		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2508	}
2509
2510	if (tb[RTA_DST]) {
2511		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2512			goto errout;
2513
2514		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2515	}
2516
2517	if (tb[RTA_IIF])
2518		iif = nla_get_u32(tb[RTA_IIF]);
2519
2520	if (tb[RTA_OIF])
2521		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2522
2523	if (iif) {
2524		struct net_device *dev;
2525		dev = __dev_get_by_index(net, iif);
 
 
 
 
2526		if (!dev) {
 
2527			err = -ENODEV;
2528			goto errout;
2529		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2530	}
2531
2532	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2533	if (skb == NULL) {
 
2534		err = -ENOBUFS;
2535		goto errout;
2536	}
2537
2538	/* Reserve room for dummy headers, this skb can pass
2539	   through good chunk of routing engine.
2540	 */
2541	skb_reset_mac_header(skb);
2542	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2543
2544	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2545	skb_dst_set(skb, &rt->dst);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2546
2547	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2548			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2549			    nlh->nlmsg_seq, 0, 0, 0);
2550	if (err < 0) {
2551		kfree_skb(skb);
2552		goto errout;
2553	}
2554
2555	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2556errout:
2557	return err;
2558}
2559
2560void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 
2561{
2562	struct sk_buff *skb;
2563	struct net *net = info->nl_net;
2564	u32 seq;
2565	int err;
2566
2567	err = -ENOBUFS;
2568	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2569
2570	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2571	if (skb == NULL)
2572		goto errout;
2573
2574	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2575				event, info->pid, seq, 0, 0, 0);
2576	if (err < 0) {
2577		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2578		WARN_ON(err == -EMSGSIZE);
2579		kfree_skb(skb);
2580		goto errout;
2581	}
2582	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2583		    info->nlh, gfp_any());
2584	return;
2585errout:
2586	if (err < 0)
2587		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2588}
2589
2590static int ip6_route_dev_notify(struct notifier_block *this,
2591				unsigned long event, void *data)
2592{
2593	struct net_device *dev = (struct net_device *)data;
2594	struct net *net = dev_net(dev);
2595
2596	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
 
 
 
 
2597		net->ipv6.ip6_null_entry->dst.dev = dev;
2598		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2599#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2600		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2601		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2602		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2603		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2604#endif
 
 
 
 
 
 
 
 
 
 
2605	}
2606
2607	return NOTIFY_OK;
2608}
2609
2610/*
2611 *	/proc
2612 */
2613
2614#ifdef CONFIG_PROC_FS
2615
2616struct rt6_proc_arg
2617{
2618	char *buffer;
2619	int offset;
2620	int length;
2621	int skip;
2622	int len;
2623};
2624
2625static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2626{
2627	struct seq_file *m = p_arg;
2628	struct neighbour *n;
2629
2630	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2631
2632#ifdef CONFIG_IPV6_SUBTREES
2633	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2634#else
2635	seq_puts(m, "00000000000000000000000000000000 00 ");
2636#endif
2637	rcu_read_lock();
2638	n = dst_get_neighbour(&rt->dst);
2639	if (n) {
2640		seq_printf(m, "%pi6", n->primary_key);
2641	} else {
2642		seq_puts(m, "00000000000000000000000000000000");
2643	}
2644	rcu_read_unlock();
2645	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2646		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2647		   rt->dst.__use, rt->rt6i_flags,
2648		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2649	return 0;
2650}
2651
2652static int ipv6_route_show(struct seq_file *m, void *v)
2653{
2654	struct net *net = (struct net *)m->private;
2655	fib6_clean_all(net, rt6_info_route, 0, m);
2656	return 0;
2657}
2658
2659static int ipv6_route_open(struct inode *inode, struct file *file)
2660{
2661	return single_open_net(inode, file, ipv6_route_show);
2662}
2663
2664static const struct file_operations ipv6_route_proc_fops = {
2665	.owner		= THIS_MODULE,
2666	.open		= ipv6_route_open,
2667	.read		= seq_read,
2668	.llseek		= seq_lseek,
2669	.release	= single_release_net,
2670};
2671
2672static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2673{
2674	struct net *net = (struct net *)seq->private;
2675	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2676		   net->ipv6.rt6_stats->fib_nodes,
2677		   net->ipv6.rt6_stats->fib_route_nodes,
2678		   net->ipv6.rt6_stats->fib_rt_alloc,
2679		   net->ipv6.rt6_stats->fib_rt_entries,
2680		   net->ipv6.rt6_stats->fib_rt_cache,
2681		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2682		   net->ipv6.rt6_stats->fib_discarded_routes);
2683
2684	return 0;
2685}
2686
2687static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2688{
2689	return single_open_net(inode, file, rt6_stats_seq_show);
2690}
2691
2692static const struct file_operations rt6_stats_seq_fops = {
2693	.owner	 = THIS_MODULE,
2694	.open	 = rt6_stats_seq_open,
2695	.read	 = seq_read,
2696	.llseek	 = seq_lseek,
2697	.release = single_release_net,
2698};
2699#endif	/* CONFIG_PROC_FS */
2700
2701#ifdef CONFIG_SYSCTL
2702
2703static
2704int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2705			      void __user *buffer, size_t *lenp, loff_t *ppos)
2706{
2707	struct net *net;
2708	int delay;
 
2709	if (!write)
2710		return -EINVAL;
2711
2712	net = (struct net *)ctl->extra1;
2713	delay = net->ipv6.sysctl.flush_delay;
2714	proc_dointvec(ctl, write, buffer, lenp, ppos);
2715	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
 
 
 
2716	return 0;
2717}
2718
2719ctl_table ipv6_route_table_template[] = {
2720	{
2721		.procname	=	"flush",
2722		.data		=	&init_net.ipv6.sysctl.flush_delay,
2723		.maxlen		=	sizeof(int),
2724		.mode		=	0200,
2725		.proc_handler	=	ipv6_sysctl_rtcache_flush
2726	},
2727	{
2728		.procname	=	"gc_thresh",
2729		.data		=	&ip6_dst_ops_template.gc_thresh,
2730		.maxlen		=	sizeof(int),
2731		.mode		=	0644,
2732		.proc_handler	=	proc_dointvec,
2733	},
2734	{
2735		.procname	=	"max_size",
2736		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2737		.maxlen		=	sizeof(int),
2738		.mode		=	0644,
2739		.proc_handler	=	proc_dointvec,
2740	},
2741	{
2742		.procname	=	"gc_min_interval",
2743		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2744		.maxlen		=	sizeof(int),
2745		.mode		=	0644,
2746		.proc_handler	=	proc_dointvec_jiffies,
2747	},
2748	{
2749		.procname	=	"gc_timeout",
2750		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2751		.maxlen		=	sizeof(int),
2752		.mode		=	0644,
2753		.proc_handler	=	proc_dointvec_jiffies,
2754	},
2755	{
2756		.procname	=	"gc_interval",
2757		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2758		.maxlen		=	sizeof(int),
2759		.mode		=	0644,
2760		.proc_handler	=	proc_dointvec_jiffies,
2761	},
2762	{
2763		.procname	=	"gc_elasticity",
2764		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2765		.maxlen		=	sizeof(int),
2766		.mode		=	0644,
2767		.proc_handler	=	proc_dointvec,
2768	},
2769	{
2770		.procname	=	"mtu_expires",
2771		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2772		.maxlen		=	sizeof(int),
2773		.mode		=	0644,
2774		.proc_handler	=	proc_dointvec_jiffies,
2775	},
2776	{
2777		.procname	=	"min_adv_mss",
2778		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2779		.maxlen		=	sizeof(int),
2780		.mode		=	0644,
2781		.proc_handler	=	proc_dointvec,
2782	},
2783	{
2784		.procname	=	"gc_min_interval_ms",
2785		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2786		.maxlen		=	sizeof(int),
2787		.mode		=	0644,
2788		.proc_handler	=	proc_dointvec_ms_jiffies,
2789	},
 
 
 
 
 
 
 
 
 
2790	{ }
2791};
2792
2793struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2794{
2795	struct ctl_table *table;
2796
2797	table = kmemdup(ipv6_route_table_template,
2798			sizeof(ipv6_route_table_template),
2799			GFP_KERNEL);
2800
2801	if (table) {
2802		table[0].data = &net->ipv6.sysctl.flush_delay;
2803		table[0].extra1 = net;
2804		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2805		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2806		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2807		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2808		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2809		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2810		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2811		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2812		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
 
 
 
 
 
2813	}
2814
2815	return table;
2816}
2817#endif
2818
2819static int __net_init ip6_route_net_init(struct net *net)
2820{
2821	int ret = -ENOMEM;
2822
2823	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2824	       sizeof(net->ipv6.ip6_dst_ops));
2825
2826	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2827		goto out_ip6_dst_ops;
2828
 
 
 
 
 
 
2829	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2830					   sizeof(*net->ipv6.ip6_null_entry),
2831					   GFP_KERNEL);
2832	if (!net->ipv6.ip6_null_entry)
2833		goto out_ip6_dst_entries;
2834	net->ipv6.ip6_null_entry->dst.path =
2835		(struct dst_entry *)net->ipv6.ip6_null_entry;
2836	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2837	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2838			 ip6_template_metrics, true);
 
2839
2840#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
2841	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2842					       sizeof(*net->ipv6.ip6_prohibit_entry),
2843					       GFP_KERNEL);
2844	if (!net->ipv6.ip6_prohibit_entry)
2845		goto out_ip6_null_entry;
2846	net->ipv6.ip6_prohibit_entry->dst.path =
2847		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2848	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2849	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2850			 ip6_template_metrics, true);
 
2851
2852	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2853					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2854					       GFP_KERNEL);
2855	if (!net->ipv6.ip6_blk_hole_entry)
2856		goto out_ip6_prohibit_entry;
2857	net->ipv6.ip6_blk_hole_entry->dst.path =
2858		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2859	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2860	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2861			 ip6_template_metrics, true);
 
2862#endif
2863
2864	net->ipv6.sysctl.flush_delay = 0;
2865	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2866	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2867	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2868	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2869	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2870	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2871	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 
2872
2873#ifdef CONFIG_PROC_FS
2874	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2875	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2876#endif
2877	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2878
2879	ret = 0;
2880out:
2881	return ret;
2882
2883#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2884out_ip6_prohibit_entry:
2885	kfree(net->ipv6.ip6_prohibit_entry);
2886out_ip6_null_entry:
2887	kfree(net->ipv6.ip6_null_entry);
2888#endif
 
 
2889out_ip6_dst_entries:
2890	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2891out_ip6_dst_ops:
2892	goto out;
2893}
2894
2895static void __net_exit ip6_route_net_exit(struct net *net)
2896{
2897#ifdef CONFIG_PROC_FS
2898	proc_net_remove(net, "ipv6_route");
2899	proc_net_remove(net, "rt6_stats");
2900#endif
2901	kfree(net->ipv6.ip6_null_entry);
2902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2903	kfree(net->ipv6.ip6_prohibit_entry);
2904	kfree(net->ipv6.ip6_blk_hole_entry);
2905#endif
2906	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2907}
2908
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2909static struct pernet_operations ip6_route_net_ops = {
2910	.init = ip6_route_net_init,
2911	.exit = ip6_route_net_exit,
2912};
2913
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2914static struct notifier_block ip6_route_dev_notifier = {
2915	.notifier_call = ip6_route_dev_notify,
2916	.priority = 0,
2917};
2918
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2919int __init ip6_route_init(void)
2920{
2921	int ret;
 
2922
2923	ret = -ENOMEM;
2924	ip6_dst_ops_template.kmem_cachep =
2925		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2926				  SLAB_HWCACHE_ALIGN, NULL);
2927	if (!ip6_dst_ops_template.kmem_cachep)
2928		goto out;
2929
2930	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2931	if (ret)
2932		goto out_kmem_cache;
2933
 
 
 
 
2934	ret = register_pernet_subsys(&ip6_route_net_ops);
2935	if (ret)
2936		goto out_dst_entries;
2937
2938	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2939
2940	/* Registering of the loopback is done before this portion of code,
2941	 * the loopback reference in rt6_info will not be taken, do it
2942	 * manually for init_net */
2943	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2944	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2945  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2946	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2947	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2948	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2949	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2950  #endif
2951	ret = fib6_init();
2952	if (ret)
2953		goto out_register_subsys;
2954
2955	ret = xfrm6_init();
2956	if (ret)
2957		goto out_fib6_init;
2958
2959	ret = fib6_rules_init();
2960	if (ret)
2961		goto xfrm6_init;
2962
2963	ret = -ENOBUFS;
2964	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2965	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2966	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2967		goto fib6_rules_init;
2968
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2969	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2970	if (ret)
2971		goto fib6_rules_init;
 
 
 
 
 
 
 
2972
2973out:
2974	return ret;
2975
 
 
 
2976fib6_rules_init:
2977	fib6_rules_cleanup();
2978xfrm6_init:
2979	xfrm6_fini();
2980out_fib6_init:
2981	fib6_gc_cleanup();
2982out_register_subsys:
2983	unregister_pernet_subsys(&ip6_route_net_ops);
 
 
2984out_dst_entries:
2985	dst_entries_destroy(&ip6_dst_blackhole_ops);
2986out_kmem_cache:
2987	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2988	goto out;
2989}
2990
2991void ip6_route_cleanup(void)
2992{
2993	unregister_netdevice_notifier(&ip6_route_dev_notifier);
 
2994	fib6_rules_cleanup();
2995	xfrm6_fini();
2996	fib6_gc_cleanup();
 
2997	unregister_pernet_subsys(&ip6_route_net_ops);
2998	dst_entries_destroy(&ip6_dst_blackhole_ops);
2999	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3000}