Linux Audio

Check our new training course

Loading...
v4.6
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/dst_metadata.h>
  58#include <net/xfrm.h>
  59#include <net/netevent.h>
  60#include <net/netlink.h>
  61#include <net/nexthop.h>
  62#include <net/lwtunnel.h>
  63#include <net/ip_tunnels.h>
  64#include <net/l3mdev.h>
  65#include <trace/events/fib6.h>
  66
  67#include <asm/uaccess.h>
  68
  69#ifdef CONFIG_SYSCTL
  70#include <linux/sysctl.h>
  71#endif
  72
  73enum rt6_nud_state {
  74	RT6_NUD_FAIL_HARD = -3,
  75	RT6_NUD_FAIL_PROBE = -2,
  76	RT6_NUD_FAIL_DO_RR = -1,
  77	RT6_NUD_SUCCEED = 1
  78};
  79
  80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
  81static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  82static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  83static unsigned int	 ip6_mtu(const struct dst_entry *dst);
  84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  85static void		ip6_dst_destroy(struct dst_entry *);
  86static void		ip6_dst_ifdown(struct dst_entry *,
  87				       struct net_device *dev, int how);
  88static int		 ip6_dst_gc(struct dst_ops *ops);
  89
  90static int		ip6_pkt_discard(struct sk_buff *skb);
  91static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  92static int		ip6_pkt_prohibit(struct sk_buff *skb);
  93static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  94static void		ip6_link_failure(struct sk_buff *skb);
  95static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  96					   struct sk_buff *skb, u32 mtu);
  97static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  98					struct sk_buff *skb);
  99static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
 101
 102#ifdef CONFIG_IPV6_ROUTE_INFO
 103static struct rt6_info *rt6_add_route_info(struct net *net,
 104					   const struct in6_addr *prefix, int prefixlen,
 105					   const struct in6_addr *gwaddr, int ifindex,
 
 106					   unsigned int pref);
 107static struct rt6_info *rt6_get_route_info(struct net *net,
 108					   const struct in6_addr *prefix, int prefixlen,
 109					   const struct in6_addr *gwaddr, int ifindex);
 
 110#endif
 111
 112struct uncached_list {
 113	spinlock_t		lock;
 114	struct list_head	head;
 115};
 116
 117static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 118
 119static void rt6_uncached_list_add(struct rt6_info *rt)
 120{
 121	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 122
 123	rt->dst.flags |= DST_NOCACHE;
 124	rt->rt6i_uncached_list = ul;
 125
 126	spin_lock_bh(&ul->lock);
 127	list_add_tail(&rt->rt6i_uncached, &ul->head);
 128	spin_unlock_bh(&ul->lock);
 129}
 130
 131static void rt6_uncached_list_del(struct rt6_info *rt)
 132{
 133	if (!list_empty(&rt->rt6i_uncached)) {
 134		struct uncached_list *ul = rt->rt6i_uncached_list;
 135
 136		spin_lock_bh(&ul->lock);
 137		list_del(&rt->rt6i_uncached);
 138		spin_unlock_bh(&ul->lock);
 139	}
 140}
 141
 142static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 143{
 144	struct net_device *loopback_dev = net->loopback_dev;
 145	int cpu;
 146
 147	if (dev == loopback_dev)
 148		return;
 149
 150	for_each_possible_cpu(cpu) {
 151		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 152		struct rt6_info *rt;
 153
 154		spin_lock_bh(&ul->lock);
 155		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
 156			struct inet6_dev *rt_idev = rt->rt6i_idev;
 157			struct net_device *rt_dev = rt->dst.dev;
 158
 159			if (rt_idev->dev == dev) {
 160				rt->rt6i_idev = in6_dev_get(loopback_dev);
 161				in6_dev_put(rt_idev);
 162			}
 163
 164			if (rt_dev == dev) {
 165				rt->dst.dev = loopback_dev;
 166				dev_hold(rt->dst.dev);
 167				dev_put(rt_dev);
 168			}
 169		}
 170		spin_unlock_bh(&ul->lock);
 171	}
 172}
 173
 174static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
 175{
 176	return dst_metrics_write_ptr(rt->dst.from);
 177}
 178
 179static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 180{
 181	struct rt6_info *rt = (struct rt6_info *)dst;
 182
 183	if (rt->rt6i_flags & RTF_PCPU)
 184		return rt6_pcpu_cow_metrics(rt);
 185	else if (rt->rt6i_flags & RTF_CACHE)
 186		return NULL;
 187	else
 188		return dst_cow_metrics_generic(dst, old);
 189}
 190
 191static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 192					     struct sk_buff *skb,
 193					     const void *daddr)
 194{
 195	struct in6_addr *p = &rt->rt6i_gateway;
 196
 197	if (!ipv6_addr_any(p))
 198		return (const void *) p;
 199	else if (skb)
 200		return &ipv6_hdr(skb)->daddr;
 201	return daddr;
 202}
 203
 204static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 205					  struct sk_buff *skb,
 206					  const void *daddr)
 207{
 208	struct rt6_info *rt = (struct rt6_info *) dst;
 209	struct neighbour *n;
 210
 211	daddr = choose_neigh_daddr(rt, skb, daddr);
 212	n = __ipv6_neigh_lookup(dst->dev, daddr);
 213	if (n)
 214		return n;
 215	return neigh_create(&nd_tbl, daddr, dst->dev);
 216}
 217
 218static struct dst_ops ip6_dst_ops_template = {
 219	.family			=	AF_INET6,
 220	.gc			=	ip6_dst_gc,
 221	.gc_thresh		=	1024,
 222	.check			=	ip6_dst_check,
 223	.default_advmss		=	ip6_default_advmss,
 224	.mtu			=	ip6_mtu,
 225	.cow_metrics		=	ipv6_cow_metrics,
 226	.destroy		=	ip6_dst_destroy,
 227	.ifdown			=	ip6_dst_ifdown,
 228	.negative_advice	=	ip6_negative_advice,
 229	.link_failure		=	ip6_link_failure,
 230	.update_pmtu		=	ip6_rt_update_pmtu,
 231	.redirect		=	rt6_do_redirect,
 232	.local_out		=	__ip6_local_out,
 233	.neigh_lookup		=	ip6_neigh_lookup,
 234};
 235
 236static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 237{
 238	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 239
 240	return mtu ? : dst->dev->mtu;
 241}
 242
 243static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 244					 struct sk_buff *skb, u32 mtu)
 245{
 246}
 247
 248static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 249				      struct sk_buff *skb)
 250{
 251}
 252
 253static struct dst_ops ip6_dst_blackhole_ops = {
 254	.family			=	AF_INET6,
 255	.destroy		=	ip6_dst_destroy,
 256	.check			=	ip6_dst_check,
 257	.mtu			=	ip6_blackhole_mtu,
 258	.default_advmss		=	ip6_default_advmss,
 259	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 260	.redirect		=	ip6_rt_blackhole_redirect,
 261	.cow_metrics		=	dst_cow_metrics_generic,
 262	.neigh_lookup		=	ip6_neigh_lookup,
 263};
 264
 265static const u32 ip6_template_metrics[RTAX_MAX] = {
 266	[RTAX_HOPLIMIT - 1] = 0,
 267};
 268
 269static const struct rt6_info ip6_null_entry_template = {
 270	.dst = {
 271		.__refcnt	= ATOMIC_INIT(1),
 272		.__use		= 1,
 273		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 274		.error		= -ENETUNREACH,
 275		.input		= ip6_pkt_discard,
 276		.output		= ip6_pkt_discard_out,
 277	},
 278	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 279	.rt6i_protocol  = RTPROT_KERNEL,
 280	.rt6i_metric	= ~(u32) 0,
 281	.rt6i_ref	= ATOMIC_INIT(1),
 282};
 283
 284#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 285
 286static const struct rt6_info ip6_prohibit_entry_template = {
 287	.dst = {
 288		.__refcnt	= ATOMIC_INIT(1),
 289		.__use		= 1,
 290		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 291		.error		= -EACCES,
 292		.input		= ip6_pkt_prohibit,
 293		.output		= ip6_pkt_prohibit_out,
 294	},
 295	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 296	.rt6i_protocol  = RTPROT_KERNEL,
 297	.rt6i_metric	= ~(u32) 0,
 298	.rt6i_ref	= ATOMIC_INIT(1),
 299};
 300
 301static const struct rt6_info ip6_blk_hole_entry_template = {
 302	.dst = {
 303		.__refcnt	= ATOMIC_INIT(1),
 304		.__use		= 1,
 305		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 306		.error		= -EINVAL,
 307		.input		= dst_discard,
 308		.output		= dst_discard_out,
 309	},
 310	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 311	.rt6i_protocol  = RTPROT_KERNEL,
 312	.rt6i_metric	= ~(u32) 0,
 313	.rt6i_ref	= ATOMIC_INIT(1),
 314};
 315
 316#endif
 317
 318static void rt6_info_init(struct rt6_info *rt)
 319{
 320	struct dst_entry *dst = &rt->dst;
 321
 322	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 323	INIT_LIST_HEAD(&rt->rt6i_siblings);
 324	INIT_LIST_HEAD(&rt->rt6i_uncached);
 325}
 326
 327/* allocate dst with ip6_dst_ops */
 328static struct rt6_info *__ip6_dst_alloc(struct net *net,
 329					struct net_device *dev,
 330					int flags)
 331{
 332	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 333					0, DST_OBSOLETE_FORCE_CHK, flags);
 334
 335	if (rt)
 336		rt6_info_init(rt);
 337
 338	return rt;
 339}
 340
 341struct rt6_info *ip6_dst_alloc(struct net *net,
 342			       struct net_device *dev,
 343			       int flags)
 344{
 345	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
 346
 347	if (rt) {
 348		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
 349		if (rt->rt6i_pcpu) {
 350			int cpu;
 351
 352			for_each_possible_cpu(cpu) {
 353				struct rt6_info **p;
 354
 355				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
 356				/* no one shares rt */
 357				*p =  NULL;
 358			}
 359		} else {
 360			dst_destroy((struct dst_entry *)rt);
 361			return NULL;
 362		}
 363	}
 364
 365	return rt;
 366}
 367EXPORT_SYMBOL(ip6_dst_alloc);
 368
 369static void ip6_dst_destroy(struct dst_entry *dst)
 370{
 371	struct rt6_info *rt = (struct rt6_info *)dst;
 372	struct dst_entry *from = dst->from;
 373	struct inet6_dev *idev;
 374
 375	dst_destroy_metrics_generic(dst);
 376	free_percpu(rt->rt6i_pcpu);
 377	rt6_uncached_list_del(rt);
 378
 379	idev = rt->rt6i_idev;
 380	if (idev) {
 381		rt->rt6i_idev = NULL;
 382		in6_dev_put(idev);
 383	}
 384
 385	dst->from = NULL;
 386	dst_release(from);
 387}
 388
 389static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 390			   int how)
 391{
 392	struct rt6_info *rt = (struct rt6_info *)dst;
 393	struct inet6_dev *idev = rt->rt6i_idev;
 394	struct net_device *loopback_dev =
 395		dev_net(dev)->loopback_dev;
 396
 397	if (dev != loopback_dev) {
 398		if (idev && idev->dev == dev) {
 399			struct inet6_dev *loopback_idev =
 400				in6_dev_get(loopback_dev);
 401			if (loopback_idev) {
 402				rt->rt6i_idev = loopback_idev;
 403				in6_dev_put(idev);
 404			}
 405		}
 406	}
 407}
 408
 409static bool __rt6_check_expired(const struct rt6_info *rt)
 410{
 411	if (rt->rt6i_flags & RTF_EXPIRES)
 412		return time_after(jiffies, rt->dst.expires);
 413	else
 414		return false;
 415}
 416
 417static bool rt6_check_expired(const struct rt6_info *rt)
 418{
 419	if (rt->rt6i_flags & RTF_EXPIRES) {
 420		if (time_after(jiffies, rt->dst.expires))
 421			return true;
 422	} else if (rt->dst.from) {
 423		return rt6_check_expired((struct rt6_info *) rt->dst.from);
 424	}
 425	return false;
 426}
 427
 428/* Multipath route selection:
 429 *   Hash based function using packet header and flowlabel.
 430 * Adapted from fib_info_hashfn()
 431 */
 432static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 433			       const struct flowi6 *fl6)
 434{
 435	return get_hash_from_flowi6(fl6) % candidate_count;
 436}
 437
 438static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 439					     struct flowi6 *fl6, int oif,
 440					     int strict)
 441{
 442	struct rt6_info *sibling, *next_sibling;
 443	int route_choosen;
 444
 445	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
 446	/* Don't change the route, if route_choosen == 0
 447	 * (siblings does not include ourself)
 448	 */
 449	if (route_choosen)
 450		list_for_each_entry_safe(sibling, next_sibling,
 451				&match->rt6i_siblings, rt6i_siblings) {
 452			route_choosen--;
 453			if (route_choosen == 0) {
 454				if (rt6_score_route(sibling, oif, strict) < 0)
 455					break;
 456				match = sibling;
 457				break;
 458			}
 459		}
 460	return match;
 461}
 462
 463/*
 464 *	Route lookup. Any table->tb6_lock is implied.
 465 */
 466
 467static inline struct rt6_info *rt6_device_match(struct net *net,
 468						    struct rt6_info *rt,
 469						    const struct in6_addr *saddr,
 470						    int oif,
 471						    int flags)
 472{
 473	struct rt6_info *local = NULL;
 474	struct rt6_info *sprt;
 475
 476	if (!oif && ipv6_addr_any(saddr))
 477		goto out;
 478
 479	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 480		struct net_device *dev = sprt->dst.dev;
 481
 482		if (oif) {
 483			if (dev->ifindex == oif)
 484				return sprt;
 485			if (dev->flags & IFF_LOOPBACK) {
 486				if (!sprt->rt6i_idev ||
 487				    sprt->rt6i_idev->dev->ifindex != oif) {
 488					if (flags & RT6_LOOKUP_F_IFACE)
 489						continue;
 490					if (local &&
 491					    local->rt6i_idev->dev->ifindex == oif)
 492						continue;
 493				}
 494				local = sprt;
 495			}
 496		} else {
 497			if (ipv6_chk_addr(net, saddr, dev,
 498					  flags & RT6_LOOKUP_F_IFACE))
 499				return sprt;
 500		}
 501	}
 502
 503	if (oif) {
 504		if (local)
 505			return local;
 506
 507		if (flags & RT6_LOOKUP_F_IFACE)
 508			return net->ipv6.ip6_null_entry;
 509	}
 510out:
 511	return rt;
 512}
 513
 514#ifdef CONFIG_IPV6_ROUTER_PREF
 515struct __rt6_probe_work {
 516	struct work_struct work;
 517	struct in6_addr target;
 518	struct net_device *dev;
 519};
 520
 521static void rt6_probe_deferred(struct work_struct *w)
 522{
 523	struct in6_addr mcaddr;
 524	struct __rt6_probe_work *work =
 525		container_of(w, struct __rt6_probe_work, work);
 526
 527	addrconf_addr_solict_mult(&work->target, &mcaddr);
 528	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
 529	dev_put(work->dev);
 530	kfree(work);
 531}
 532
 533static void rt6_probe(struct rt6_info *rt)
 534{
 535	struct __rt6_probe_work *work;
 536	struct neighbour *neigh;
 537	/*
 538	 * Okay, this does not seem to be appropriate
 539	 * for now, however, we need to check if it
 540	 * is really so; aka Router Reachability Probing.
 541	 *
 542	 * Router Reachability Probe MUST be rate-limited
 543	 * to no more than one per minute.
 544	 */
 545	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 546		return;
 547	rcu_read_lock_bh();
 548	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 549	if (neigh) {
 550		if (neigh->nud_state & NUD_VALID)
 551			goto out;
 552
 553		work = NULL;
 554		write_lock(&neigh->lock);
 555		if (!(neigh->nud_state & NUD_VALID) &&
 556		    time_after(jiffies,
 557			       neigh->updated +
 558			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
 559			work = kmalloc(sizeof(*work), GFP_ATOMIC);
 560			if (work)
 561				__neigh_set_probe_once(neigh);
 562		}
 563		write_unlock(&neigh->lock);
 564	} else {
 565		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 566	}
 567
 568	if (work) {
 569		INIT_WORK(&work->work, rt6_probe_deferred);
 570		work->target = rt->rt6i_gateway;
 571		dev_hold(rt->dst.dev);
 572		work->dev = rt->dst.dev;
 573		schedule_work(&work->work);
 574	}
 575
 576out:
 577	rcu_read_unlock_bh();
 578}
 579#else
 580static inline void rt6_probe(struct rt6_info *rt)
 581{
 582}
 583#endif
 584
 585/*
 586 * Default Router Selection (RFC 2461 6.3.6)
 587 */
 588static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 589{
 590	struct net_device *dev = rt->dst.dev;
 591	if (!oif || dev->ifindex == oif)
 592		return 2;
 593	if ((dev->flags & IFF_LOOPBACK) &&
 594	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 595		return 1;
 596	return 0;
 597}
 598
 599static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 600{
 601	struct neighbour *neigh;
 602	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 603
 604	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 605	    !(rt->rt6i_flags & RTF_GATEWAY))
 606		return RT6_NUD_SUCCEED;
 607
 608	rcu_read_lock_bh();
 609	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 610	if (neigh) {
 611		read_lock(&neigh->lock);
 612		if (neigh->nud_state & NUD_VALID)
 613			ret = RT6_NUD_SUCCEED;
 614#ifdef CONFIG_IPV6_ROUTER_PREF
 615		else if (!(neigh->nud_state & NUD_FAILED))
 616			ret = RT6_NUD_SUCCEED;
 617		else
 618			ret = RT6_NUD_FAIL_PROBE;
 619#endif
 620		read_unlock(&neigh->lock);
 621	} else {
 622		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 623		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 624	}
 625	rcu_read_unlock_bh();
 626
 627	return ret;
 628}
 629
 630static int rt6_score_route(struct rt6_info *rt, int oif,
 631			   int strict)
 632{
 633	int m;
 634
 635	m = rt6_check_dev(rt, oif);
 636	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 637		return RT6_NUD_FAIL_HARD;
 638#ifdef CONFIG_IPV6_ROUTER_PREF
 639	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 640#endif
 641	if (strict & RT6_LOOKUP_F_REACHABLE) {
 642		int n = rt6_check_neigh(rt);
 643		if (n < 0)
 644			return n;
 645	}
 646	return m;
 647}
 648
 649static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 650				   int *mpri, struct rt6_info *match,
 651				   bool *do_rr)
 652{
 653	int m;
 654	bool match_do_rr = false;
 655	struct inet6_dev *idev = rt->rt6i_idev;
 656	struct net_device *dev = rt->dst.dev;
 657
 658	if (dev && !netif_carrier_ok(dev) &&
 659	    idev->cnf.ignore_routes_with_linkdown)
 
 660		goto out;
 661
 662	if (rt6_check_expired(rt))
 663		goto out;
 664
 665	m = rt6_score_route(rt, oif, strict);
 666	if (m == RT6_NUD_FAIL_DO_RR) {
 667		match_do_rr = true;
 668		m = 0; /* lowest valid score */
 669	} else if (m == RT6_NUD_FAIL_HARD) {
 670		goto out;
 671	}
 672
 673	if (strict & RT6_LOOKUP_F_REACHABLE)
 674		rt6_probe(rt);
 675
 676	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
 677	if (m > *mpri) {
 678		*do_rr = match_do_rr;
 679		*mpri = m;
 680		match = rt;
 681	}
 682out:
 683	return match;
 684}
 685
 686static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 687				     struct rt6_info *rr_head,
 688				     u32 metric, int oif, int strict,
 689				     bool *do_rr)
 690{
 691	struct rt6_info *rt, *match, *cont;
 692	int mpri = -1;
 693
 694	match = NULL;
 695	cont = NULL;
 696	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
 697		if (rt->rt6i_metric != metric) {
 698			cont = rt;
 699			break;
 700		}
 701
 702		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 703	}
 704
 705	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
 706		if (rt->rt6i_metric != metric) {
 707			cont = rt;
 708			break;
 709		}
 710
 711		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 712	}
 713
 714	if (match || !cont)
 715		return match;
 716
 717	for (rt = cont; rt; rt = rt->dst.rt6_next)
 718		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 719
 720	return match;
 721}
 722
 723static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 724{
 725	struct rt6_info *match, *rt0;
 726	struct net *net;
 727	bool do_rr = false;
 728
 729	rt0 = fn->rr_ptr;
 730	if (!rt0)
 731		fn->rr_ptr = rt0 = fn->leaf;
 732
 733	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
 734			     &do_rr);
 735
 736	if (do_rr) {
 737		struct rt6_info *next = rt0->dst.rt6_next;
 738
 739		/* no entries matched; do round-robin */
 740		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 741			next = fn->leaf;
 742
 743		if (next != rt0)
 744			fn->rr_ptr = next;
 745	}
 746
 747	net = dev_net(rt0->dst.dev);
 748	return match ? match : net->ipv6.ip6_null_entry;
 749}
 750
 751static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
 752{
 753	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
 754}
 755
 756#ifdef CONFIG_IPV6_ROUTE_INFO
 757int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 758		  const struct in6_addr *gwaddr)
 759{
 760	struct net *net = dev_net(dev);
 761	struct route_info *rinfo = (struct route_info *) opt;
 762	struct in6_addr prefix_buf, *prefix;
 763	unsigned int pref;
 764	unsigned long lifetime;
 765	struct rt6_info *rt;
 766
 767	if (len < sizeof(struct route_info)) {
 768		return -EINVAL;
 769	}
 770
 771	/* Sanity check for prefix_len and length */
 772	if (rinfo->length > 3) {
 773		return -EINVAL;
 774	} else if (rinfo->prefix_len > 128) {
 775		return -EINVAL;
 776	} else if (rinfo->prefix_len > 64) {
 777		if (rinfo->length < 2) {
 778			return -EINVAL;
 779		}
 780	} else if (rinfo->prefix_len > 0) {
 781		if (rinfo->length < 1) {
 782			return -EINVAL;
 783		}
 784	}
 785
 786	pref = rinfo->route_pref;
 787	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 788		return -EINVAL;
 789
 790	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 791
 792	if (rinfo->length == 3)
 793		prefix = (struct in6_addr *)rinfo->prefix;
 794	else {
 795		/* this function is safe */
 796		ipv6_addr_prefix(&prefix_buf,
 797				 (struct in6_addr *)rinfo->prefix,
 798				 rinfo->prefix_len);
 799		prefix = &prefix_buf;
 800	}
 801
 802	if (rinfo->prefix_len == 0)
 803		rt = rt6_get_dflt_router(gwaddr, dev);
 804	else
 805		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 806					gwaddr, dev->ifindex);
 807
 808	if (rt && !lifetime) {
 809		ip6_del_rt(rt);
 810		rt = NULL;
 811	}
 812
 813	if (!rt && lifetime)
 814		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 815					pref);
 816	else if (rt)
 817		rt->rt6i_flags = RTF_ROUTEINFO |
 818				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 819
 820	if (rt) {
 821		if (!addrconf_finite_timeout(lifetime))
 822			rt6_clean_expires(rt);
 823		else
 824			rt6_set_expires(rt, jiffies + HZ * lifetime);
 825
 826		ip6_rt_put(rt);
 827	}
 828	return 0;
 829}
 830#endif
 831
 832static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
 833					struct in6_addr *saddr)
 834{
 835	struct fib6_node *pn;
 836	while (1) {
 837		if (fn->fn_flags & RTN_TL_ROOT)
 838			return NULL;
 839		pn = fn->parent;
 840		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
 841			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
 842		else
 843			fn = pn;
 844		if (fn->fn_flags & RTN_RTINFO)
 845			return fn;
 846	}
 847}
 848
 849static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 850					     struct fib6_table *table,
 851					     struct flowi6 *fl6, int flags)
 852{
 853	struct fib6_node *fn;
 854	struct rt6_info *rt;
 855
 856	read_lock_bh(&table->tb6_lock);
 857	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 858restart:
 859	rt = fn->leaf;
 860	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 861	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
 862		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
 863	if (rt == net->ipv6.ip6_null_entry) {
 864		fn = fib6_backtrack(fn, &fl6->saddr);
 865		if (fn)
 866			goto restart;
 867	}
 868	dst_use(&rt->dst, jiffies);
 869	read_unlock_bh(&table->tb6_lock);
 870
 871	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
 872
 873	return rt;
 874
 875}
 876
 877struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 878				    int flags)
 879{
 880	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 881}
 882EXPORT_SYMBOL_GPL(ip6_route_lookup);
 883
 884struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 885			    const struct in6_addr *saddr, int oif, int strict)
 886{
 887	struct flowi6 fl6 = {
 888		.flowi6_oif = oif,
 889		.daddr = *daddr,
 890	};
 891	struct dst_entry *dst;
 892	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 893
 894	if (saddr) {
 895		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 896		flags |= RT6_LOOKUP_F_HAS_SADDR;
 897	}
 898
 899	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 900	if (dst->error == 0)
 901		return (struct rt6_info *) dst;
 902
 903	dst_release(dst);
 904
 905	return NULL;
 906}
 907EXPORT_SYMBOL(rt6_lookup);
 908
 909/* ip6_ins_rt is called with FREE table->tb6_lock.
 910   It takes new route entry, the addition fails by any reason the
 911   route is freed. In any case, if caller does not hold it, it may
 912   be destroyed.
 913 */
 914
 915static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 916			struct mx6_config *mxc)
 917{
 918	int err;
 919	struct fib6_table *table;
 920
 921	table = rt->rt6i_table;
 922	write_lock_bh(&table->tb6_lock);
 923	err = fib6_add(&table->tb6_root, rt, info, mxc);
 924	write_unlock_bh(&table->tb6_lock);
 925
 926	return err;
 927}
 928
 929int ip6_ins_rt(struct rt6_info *rt)
 930{
 931	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
 932	struct mx6_config mxc = { .mx = NULL, };
 933
 934	return __ip6_ins_rt(rt, &info, &mxc);
 935}
 936
 937static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 938					   const struct in6_addr *daddr,
 939					   const struct in6_addr *saddr)
 940{
 941	struct rt6_info *rt;
 942
 943	/*
 944	 *	Clone the route.
 945	 */
 946
 947	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
 948		ort = (struct rt6_info *)ort->dst.from;
 949
 950	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
 951
 952	if (!rt)
 953		return NULL;
 954
 955	ip6_rt_copy_init(rt, ort);
 956	rt->rt6i_flags |= RTF_CACHE;
 957	rt->rt6i_metric = 0;
 958	rt->dst.flags |= DST_HOST;
 959	rt->rt6i_dst.addr = *daddr;
 960	rt->rt6i_dst.plen = 128;
 961
 962	if (!rt6_is_gw_or_nonexthop(ort)) {
 963		if (ort->rt6i_dst.plen != 128 &&
 964		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 965			rt->rt6i_flags |= RTF_ANYCAST;
 966#ifdef CONFIG_IPV6_SUBTREES
 967		if (rt->rt6i_src.plen && saddr) {
 968			rt->rt6i_src.addr = *saddr;
 969			rt->rt6i_src.plen = 128;
 970		}
 971#endif
 972	}
 973
 974	return rt;
 975}
 976
 977static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 978{
 979	struct rt6_info *pcpu_rt;
 980
 981	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
 982				  rt->dst.dev, rt->dst.flags);
 983
 984	if (!pcpu_rt)
 985		return NULL;
 986	ip6_rt_copy_init(pcpu_rt, rt);
 987	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
 988	pcpu_rt->rt6i_flags |= RTF_PCPU;
 989	return pcpu_rt;
 990}
 991
 992/* It should be called with read_lock_bh(&tb6_lock) acquired */
 993static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 994{
 995	struct rt6_info *pcpu_rt, **p;
 996
 997	p = this_cpu_ptr(rt->rt6i_pcpu);
 998	pcpu_rt = *p;
 999
1000	if (pcpu_rt) {
1001		dst_hold(&pcpu_rt->dst);
1002		rt6_dst_from_metrics_check(pcpu_rt);
1003	}
1004	return pcpu_rt;
1005}
1006
1007static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1008{
1009	struct fib6_table *table = rt->rt6i_table;
1010	struct rt6_info *pcpu_rt, *prev, **p;
1011
1012	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1013	if (!pcpu_rt) {
1014		struct net *net = dev_net(rt->dst.dev);
1015
1016		dst_hold(&net->ipv6.ip6_null_entry->dst);
1017		return net->ipv6.ip6_null_entry;
1018	}
1019
1020	read_lock_bh(&table->tb6_lock);
1021	if (rt->rt6i_pcpu) {
1022		p = this_cpu_ptr(rt->rt6i_pcpu);
1023		prev = cmpxchg(p, NULL, pcpu_rt);
1024		if (prev) {
1025			/* If someone did it before us, return prev instead */
1026			dst_destroy(&pcpu_rt->dst);
1027			pcpu_rt = prev;
1028		}
1029	} else {
1030		/* rt has been removed from the fib6 tree
1031		 * before we have a chance to acquire the read_lock.
1032		 * In this case, don't brother to create a pcpu rt
1033		 * since rt is going away anyway.  The next
1034		 * dst_check() will trigger a re-lookup.
1035		 */
1036		dst_destroy(&pcpu_rt->dst);
1037		pcpu_rt = rt;
1038	}
1039	dst_hold(&pcpu_rt->dst);
1040	rt6_dst_from_metrics_check(pcpu_rt);
1041	read_unlock_bh(&table->tb6_lock);
1042	return pcpu_rt;
1043}
1044
1045static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1046				      struct flowi6 *fl6, int flags)
1047{
1048	struct fib6_node *fn, *saved_fn;
1049	struct rt6_info *rt;
1050	int strict = 0;
1051
1052	strict |= flags & RT6_LOOKUP_F_IFACE;
 
1053	if (net->ipv6.devconf_all->forwarding == 0)
1054		strict |= RT6_LOOKUP_F_REACHABLE;
1055
1056	read_lock_bh(&table->tb6_lock);
1057
1058	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1059	saved_fn = fn;
1060
1061	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1062		oif = 0;
1063
1064redo_rt6_select:
1065	rt = rt6_select(fn, oif, strict);
1066	if (rt->rt6i_nsiblings)
1067		rt = rt6_multipath_select(rt, fl6, oif, strict);
1068	if (rt == net->ipv6.ip6_null_entry) {
1069		fn = fib6_backtrack(fn, &fl6->saddr);
1070		if (fn)
1071			goto redo_rt6_select;
1072		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1073			/* also consider unreachable route */
1074			strict &= ~RT6_LOOKUP_F_REACHABLE;
1075			fn = saved_fn;
1076			goto redo_rt6_select;
1077		}
1078	}
1079
1080
1081	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1082		dst_use(&rt->dst, jiffies);
1083		read_unlock_bh(&table->tb6_lock);
1084
1085		rt6_dst_from_metrics_check(rt);
1086
1087		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1088		return rt;
1089	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1090			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1091		/* Create a RTF_CACHE clone which will not be
1092		 * owned by the fib6 tree.  It is for the special case where
1093		 * the daddr in the skb during the neighbor look-up is different
1094		 * from the fl6->daddr used to look-up route here.
1095		 */
1096
1097		struct rt6_info *uncached_rt;
1098
1099		dst_use(&rt->dst, jiffies);
1100		read_unlock_bh(&table->tb6_lock);
1101
1102		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1103		dst_release(&rt->dst);
1104
1105		if (uncached_rt)
1106			rt6_uncached_list_add(uncached_rt);
1107		else
1108			uncached_rt = net->ipv6.ip6_null_entry;
1109
1110		dst_hold(&uncached_rt->dst);
1111
1112		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1113		return uncached_rt;
1114
1115	} else {
1116		/* Get a percpu copy */
1117
1118		struct rt6_info *pcpu_rt;
1119
1120		rt->dst.lastuse = jiffies;
1121		rt->dst.__use++;
1122		pcpu_rt = rt6_get_pcpu_route(rt);
1123
1124		if (pcpu_rt) {
1125			read_unlock_bh(&table->tb6_lock);
1126		} else {
1127			/* We have to do the read_unlock first
1128			 * because rt6_make_pcpu_route() may trigger
1129			 * ip6_dst_gc() which will take the write_lock.
1130			 */
1131			dst_hold(&rt->dst);
1132			read_unlock_bh(&table->tb6_lock);
1133			pcpu_rt = rt6_make_pcpu_route(rt);
1134			dst_release(&rt->dst);
1135		}
1136
1137		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1138		return pcpu_rt;
1139
1140	}
1141}
 
1142
1143static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1144					    struct flowi6 *fl6, int flags)
1145{
1146	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1147}
1148
1149static struct dst_entry *ip6_route_input_lookup(struct net *net,
1150						struct net_device *dev,
1151						struct flowi6 *fl6, int flags)
1152{
1153	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1154		flags |= RT6_LOOKUP_F_IFACE;
1155
1156	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1157}
 
1158
1159void ip6_route_input(struct sk_buff *skb)
1160{
1161	const struct ipv6hdr *iph = ipv6_hdr(skb);
1162	struct net *net = dev_net(skb->dev);
1163	int flags = RT6_LOOKUP_F_HAS_SADDR;
1164	struct ip_tunnel_info *tun_info;
1165	struct flowi6 fl6 = {
1166		.flowi6_iif = l3mdev_fib_oif(skb->dev),
1167		.daddr = iph->daddr,
1168		.saddr = iph->saddr,
1169		.flowlabel = ip6_flowinfo(iph),
1170		.flowi6_mark = skb->mark,
1171		.flowi6_proto = iph->nexthdr,
1172	};
1173
1174	tun_info = skb_tunnel_info(skb);
1175	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1176		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1177	skb_dst_drop(skb);
1178	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1179}
1180
1181static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1182					     struct flowi6 *fl6, int flags)
1183{
1184	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1185}
1186
1187struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1188					 struct flowi6 *fl6, int flags)
1189{
1190	struct dst_entry *dst;
1191	bool any_src;
1192
1193	dst = l3mdev_rt6_dst_by_oif(net, fl6);
1194	if (dst)
1195		return dst;
 
 
 
 
1196
1197	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1198
1199	any_src = ipv6_addr_any(&fl6->saddr);
1200	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1201	    (fl6->flowi6_oif && any_src))
1202		flags |= RT6_LOOKUP_F_IFACE;
1203
1204	if (!any_src)
1205		flags |= RT6_LOOKUP_F_HAS_SADDR;
1206	else if (sk)
1207		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1208
1209	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1210}
1211EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1212
1213struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1214{
1215	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1216	struct dst_entry *new = NULL;
1217
1218	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1219	if (rt) {
1220		rt6_info_init(rt);
1221
1222		new = &rt->dst;
1223		new->__use = 1;
1224		new->input = dst_discard;
1225		new->output = dst_discard_out;
1226
1227		dst_copy_metrics(new, &ort->dst);
1228		rt->rt6i_idev = ort->rt6i_idev;
1229		if (rt->rt6i_idev)
1230			in6_dev_hold(rt->rt6i_idev);
1231
1232		rt->rt6i_gateway = ort->rt6i_gateway;
1233		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1234		rt->rt6i_metric = 0;
1235
1236		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1237#ifdef CONFIG_IPV6_SUBTREES
1238		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1239#endif
1240
1241		dst_free(new);
1242	}
1243
1244	dst_release(dst_orig);
1245	return new ? new : ERR_PTR(-ENOMEM);
1246}
1247
1248/*
1249 *	Destination cache support functions
1250 */
1251
1252static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1253{
1254	if (rt->dst.from &&
1255	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1256		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1257}
1258
1259static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1260{
1261	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1262		return NULL;
1263
1264	if (rt6_check_expired(rt))
1265		return NULL;
1266
1267	return &rt->dst;
1268}
1269
1270static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1271{
1272	if (!__rt6_check_expired(rt) &&
1273	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1274	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1275		return &rt->dst;
1276	else
1277		return NULL;
1278}
1279
1280static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1281{
1282	struct rt6_info *rt;
1283
1284	rt = (struct rt6_info *) dst;
1285
1286	/* All IPV6 dsts are created with ->obsolete set to the value
1287	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288	 * into this function always.
1289	 */
1290
1291	rt6_dst_from_metrics_check(rt);
1292
1293	if (rt->rt6i_flags & RTF_PCPU ||
1294	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1295		return rt6_dst_from_check(rt, cookie);
1296	else
1297		return rt6_check(rt, cookie);
1298}
1299
1300static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1301{
1302	struct rt6_info *rt = (struct rt6_info *) dst;
1303
1304	if (rt) {
1305		if (rt->rt6i_flags & RTF_CACHE) {
1306			if (rt6_check_expired(rt)) {
1307				ip6_del_rt(rt);
1308				dst = NULL;
1309			}
1310		} else {
1311			dst_release(dst);
1312			dst = NULL;
1313		}
1314	}
1315	return dst;
1316}
1317
1318static void ip6_link_failure(struct sk_buff *skb)
1319{
1320	struct rt6_info *rt;
1321
1322	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1323
1324	rt = (struct rt6_info *) skb_dst(skb);
1325	if (rt) {
1326		if (rt->rt6i_flags & RTF_CACHE) {
1327			dst_hold(&rt->dst);
1328			ip6_del_rt(rt);
1329		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1330			rt->rt6i_node->fn_sernum = -1;
1331		}
1332	}
1333}
1334
1335static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1336{
1337	struct net *net = dev_net(rt->dst.dev);
1338
1339	rt->rt6i_flags |= RTF_MODIFIED;
1340	rt->rt6i_pmtu = mtu;
1341	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1342}
1343
1344static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1345{
1346	return !(rt->rt6i_flags & RTF_CACHE) &&
1347		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1348}
1349
1350static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1351				 const struct ipv6hdr *iph, u32 mtu)
1352{
1353	struct rt6_info *rt6 = (struct rt6_info *)dst;
1354
1355	if (rt6->rt6i_flags & RTF_LOCAL)
1356		return;
1357
 
 
 
1358	dst_confirm(dst);
1359	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1360	if (mtu >= dst_mtu(dst))
1361		return;
1362
1363	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1364		rt6_do_update_pmtu(rt6, mtu);
1365	} else {
1366		const struct in6_addr *daddr, *saddr;
1367		struct rt6_info *nrt6;
1368
1369		if (iph) {
1370			daddr = &iph->daddr;
1371			saddr = &iph->saddr;
1372		} else if (sk) {
1373			daddr = &sk->sk_v6_daddr;
1374			saddr = &inet6_sk(sk)->saddr;
1375		} else {
1376			return;
1377		}
1378		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1379		if (nrt6) {
1380			rt6_do_update_pmtu(nrt6, mtu);
1381
1382			/* ip6_ins_rt(nrt6) will bump the
1383			 * rt6->rt6i_node->fn_sernum
1384			 * which will fail the next rt6_check() and
1385			 * invalidate the sk->sk_dst_cache.
1386			 */
1387			ip6_ins_rt(nrt6);
1388		}
1389	}
1390}
1391
1392static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1393			       struct sk_buff *skb, u32 mtu)
1394{
1395	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1396}
1397
1398void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1399		     int oif, u32 mark)
1400{
1401	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1402	struct dst_entry *dst;
1403	struct flowi6 fl6;
1404
1405	memset(&fl6, 0, sizeof(fl6));
1406	fl6.flowi6_oif = oif;
1407	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1408	fl6.daddr = iph->daddr;
1409	fl6.saddr = iph->saddr;
1410	fl6.flowlabel = ip6_flowinfo(iph);
 
1411
1412	dst = ip6_route_output(net, NULL, &fl6);
1413	if (!dst->error)
1414		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1415	dst_release(dst);
1416}
1417EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1418
1419void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1420{
1421	struct dst_entry *dst;
1422
1423	ip6_update_pmtu(skb, sock_net(sk), mtu,
1424			sk->sk_bound_dev_if, sk->sk_mark);
1425
1426	dst = __sk_dst_get(sk);
1427	if (!dst || !dst->obsolete ||
1428	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1429		return;
1430
1431	bh_lock_sock(sk);
1432	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1433		ip6_datagram_dst_update(sk, false);
1434	bh_unlock_sock(sk);
1435}
1436EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1437
1438/* Handle redirects */
1439struct ip6rd_flowi {
1440	struct flowi6 fl6;
1441	struct in6_addr gateway;
1442};
1443
1444static struct rt6_info *__ip6_route_redirect(struct net *net,
1445					     struct fib6_table *table,
1446					     struct flowi6 *fl6,
1447					     int flags)
1448{
1449	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1450	struct rt6_info *rt;
1451	struct fib6_node *fn;
1452
1453	/* Get the "current" route for this destination and
1454	 * check if the redirect has come from approriate router.
1455	 *
1456	 * RFC 4861 specifies that redirects should only be
1457	 * accepted if they come from the nexthop to the target.
1458	 * Due to the way the routes are chosen, this notion
1459	 * is a bit fuzzy and one might need to check all possible
1460	 * routes.
1461	 */
1462
1463	read_lock_bh(&table->tb6_lock);
1464	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1465restart:
1466	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1467		if (rt6_check_expired(rt))
1468			continue;
1469		if (rt->dst.error)
1470			break;
1471		if (!(rt->rt6i_flags & RTF_GATEWAY))
1472			continue;
1473		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1474			continue;
1475		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1476			continue;
1477		break;
1478	}
1479
1480	if (!rt)
1481		rt = net->ipv6.ip6_null_entry;
1482	else if (rt->dst.error) {
1483		rt = net->ipv6.ip6_null_entry;
1484		goto out;
1485	}
1486
1487	if (rt == net->ipv6.ip6_null_entry) {
1488		fn = fib6_backtrack(fn, &fl6->saddr);
1489		if (fn)
1490			goto restart;
1491	}
1492
1493out:
1494	dst_hold(&rt->dst);
1495
1496	read_unlock_bh(&table->tb6_lock);
1497
1498	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1499	return rt;
1500};
1501
1502static struct dst_entry *ip6_route_redirect(struct net *net,
1503					const struct flowi6 *fl6,
1504					const struct in6_addr *gateway)
1505{
1506	int flags = RT6_LOOKUP_F_HAS_SADDR;
1507	struct ip6rd_flowi rdfl;
1508
1509	rdfl.fl6 = *fl6;
1510	rdfl.gateway = *gateway;
1511
1512	return fib6_rule_lookup(net, &rdfl.fl6,
1513				flags, __ip6_route_redirect);
1514}
1515
1516void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
 
1517{
1518	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1519	struct dst_entry *dst;
1520	struct flowi6 fl6;
1521
1522	memset(&fl6, 0, sizeof(fl6));
1523	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1524	fl6.flowi6_oif = oif;
1525	fl6.flowi6_mark = mark;
1526	fl6.daddr = iph->daddr;
1527	fl6.saddr = iph->saddr;
1528	fl6.flowlabel = ip6_flowinfo(iph);
 
1529
1530	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1531	rt6_do_redirect(dst, NULL, skb);
1532	dst_release(dst);
1533}
1534EXPORT_SYMBOL_GPL(ip6_redirect);
1535
1536void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1537			    u32 mark)
1538{
1539	const struct ipv6hdr *iph = ipv6_hdr(skb);
1540	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1541	struct dst_entry *dst;
1542	struct flowi6 fl6;
1543
1544	memset(&fl6, 0, sizeof(fl6));
1545	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1546	fl6.flowi6_oif = oif;
1547	fl6.flowi6_mark = mark;
1548	fl6.daddr = msg->dest;
1549	fl6.saddr = iph->daddr;
 
1550
1551	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1552	rt6_do_redirect(dst, NULL, skb);
1553	dst_release(dst);
1554}
1555
1556void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1557{
1558	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
 
1559}
1560EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1561
1562static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1563{
1564	struct net_device *dev = dst->dev;
1565	unsigned int mtu = dst_mtu(dst);
1566	struct net *net = dev_net(dev);
1567
1568	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1569
1570	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1571		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1572
1573	/*
1574	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1575	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1576	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1577	 * rely only on pmtu discovery"
1578	 */
1579	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1580		mtu = IPV6_MAXPLEN;
1581	return mtu;
1582}
1583
1584static unsigned int ip6_mtu(const struct dst_entry *dst)
1585{
1586	const struct rt6_info *rt = (const struct rt6_info *)dst;
1587	unsigned int mtu = rt->rt6i_pmtu;
1588	struct inet6_dev *idev;
1589
1590	if (mtu)
1591		goto out;
1592
1593	mtu = dst_metric_raw(dst, RTAX_MTU);
1594	if (mtu)
1595		goto out;
1596
1597	mtu = IPV6_MIN_MTU;
1598
1599	rcu_read_lock();
1600	idev = __in6_dev_get(dst->dev);
1601	if (idev)
1602		mtu = idev->cnf.mtu6;
1603	rcu_read_unlock();
1604
1605out:
1606	return min_t(unsigned int, mtu, IP6_MAX_MTU);
 
 
1607}
1608
1609static struct dst_entry *icmp6_dst_gc_list;
1610static DEFINE_SPINLOCK(icmp6_dst_lock);
1611
1612struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1613				  struct flowi6 *fl6)
1614{
1615	struct dst_entry *dst;
1616	struct rt6_info *rt;
1617	struct inet6_dev *idev = in6_dev_get(dev);
1618	struct net *net = dev_net(dev);
1619
1620	if (unlikely(!idev))
1621		return ERR_PTR(-ENODEV);
1622
1623	rt = ip6_dst_alloc(net, dev, 0);
1624	if (unlikely(!rt)) {
1625		in6_dev_put(idev);
1626		dst = ERR_PTR(-ENOMEM);
1627		goto out;
1628	}
1629
1630	rt->dst.flags |= DST_HOST;
1631	rt->dst.output  = ip6_output;
1632	atomic_set(&rt->dst.__refcnt, 1);
1633	rt->rt6i_gateway  = fl6->daddr;
1634	rt->rt6i_dst.addr = fl6->daddr;
1635	rt->rt6i_dst.plen = 128;
1636	rt->rt6i_idev     = idev;
1637	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1638
1639	spin_lock_bh(&icmp6_dst_lock);
1640	rt->dst.next = icmp6_dst_gc_list;
1641	icmp6_dst_gc_list = &rt->dst;
1642	spin_unlock_bh(&icmp6_dst_lock);
1643
1644	fib6_force_start_gc(net);
1645
1646	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1647
1648out:
1649	return dst;
1650}
1651
1652int icmp6_dst_gc(void)
1653{
1654	struct dst_entry *dst, **pprev;
1655	int more = 0;
1656
1657	spin_lock_bh(&icmp6_dst_lock);
1658	pprev = &icmp6_dst_gc_list;
1659
1660	while ((dst = *pprev) != NULL) {
1661		if (!atomic_read(&dst->__refcnt)) {
1662			*pprev = dst->next;
1663			dst_free(dst);
1664		} else {
1665			pprev = &dst->next;
1666			++more;
1667		}
1668	}
1669
1670	spin_unlock_bh(&icmp6_dst_lock);
1671
1672	return more;
1673}
1674
1675static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1676			    void *arg)
1677{
1678	struct dst_entry *dst, **pprev;
1679
1680	spin_lock_bh(&icmp6_dst_lock);
1681	pprev = &icmp6_dst_gc_list;
1682	while ((dst = *pprev) != NULL) {
1683		struct rt6_info *rt = (struct rt6_info *) dst;
1684		if (func(rt, arg)) {
1685			*pprev = dst->next;
1686			dst_free(dst);
1687		} else {
1688			pprev = &dst->next;
1689		}
1690	}
1691	spin_unlock_bh(&icmp6_dst_lock);
1692}
1693
1694static int ip6_dst_gc(struct dst_ops *ops)
1695{
1696	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1697	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1698	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1699	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1700	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1701	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1702	int entries;
1703
1704	entries = dst_entries_get_fast(ops);
1705	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1706	    entries <= rt_max_size)
1707		goto out;
1708
1709	net->ipv6.ip6_rt_gc_expire++;
1710	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1711	entries = dst_entries_get_slow(ops);
1712	if (entries < ops->gc_thresh)
1713		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1714out:
1715	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1716	return entries > rt_max_size;
1717}
1718
1719static int ip6_convert_metrics(struct mx6_config *mxc,
1720			       const struct fib6_config *cfg)
1721{
1722	bool ecn_ca = false;
1723	struct nlattr *nla;
1724	int remaining;
1725	u32 *mp;
1726
1727	if (!cfg->fc_mx)
1728		return 0;
1729
1730	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1731	if (unlikely(!mp))
1732		return -ENOMEM;
1733
1734	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1735		int type = nla_type(nla);
1736		u32 val;
1737
1738		if (!type)
1739			continue;
1740		if (unlikely(type > RTAX_MAX))
1741			goto err;
1742
1743		if (type == RTAX_CC_ALGO) {
1744			char tmp[TCP_CA_NAME_MAX];
1745
1746			nla_strlcpy(tmp, nla, sizeof(tmp));
1747			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1748			if (val == TCP_CA_UNSPEC)
1749				goto err;
1750		} else {
1751			val = nla_get_u32(nla);
1752		}
1753		if (type == RTAX_HOPLIMIT && val > 255)
1754			val = 255;
1755		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1756			goto err;
1757
1758		mp[type - 1] = val;
1759		__set_bit(type - 1, mxc->mx_valid);
1760	}
1761
1762	if (ecn_ca) {
1763		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1764		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1765	}
1766
1767	mxc->mx = mp;
1768	return 0;
1769 err:
1770	kfree(mp);
1771	return -EINVAL;
1772}
1773
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1774static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1775{
1776	struct net *net = cfg->fc_nlinfo.nl_net;
1777	struct rt6_info *rt = NULL;
1778	struct net_device *dev = NULL;
1779	struct inet6_dev *idev = NULL;
1780	struct fib6_table *table;
1781	int addr_type;
1782	int err = -EINVAL;
1783
1784	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1785		goto out;
1786#ifndef CONFIG_IPV6_SUBTREES
1787	if (cfg->fc_src_len)
1788		goto out;
1789#endif
1790	if (cfg->fc_ifindex) {
1791		err = -ENODEV;
1792		dev = dev_get_by_index(net, cfg->fc_ifindex);
1793		if (!dev)
1794			goto out;
1795		idev = in6_dev_get(dev);
1796		if (!idev)
1797			goto out;
1798	}
1799
1800	if (cfg->fc_metric == 0)
1801		cfg->fc_metric = IP6_RT_PRIO_USER;
1802
1803	err = -ENOBUFS;
1804	if (cfg->fc_nlinfo.nlh &&
1805	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1806		table = fib6_get_table(net, cfg->fc_table);
1807		if (!table) {
1808			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1809			table = fib6_new_table(net, cfg->fc_table);
1810		}
1811	} else {
1812		table = fib6_new_table(net, cfg->fc_table);
1813	}
1814
1815	if (!table)
1816		goto out;
1817
1818	rt = ip6_dst_alloc(net, NULL,
1819			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1820
1821	if (!rt) {
1822		err = -ENOMEM;
1823		goto out;
1824	}
1825
1826	if (cfg->fc_flags & RTF_EXPIRES)
1827		rt6_set_expires(rt, jiffies +
1828				clock_t_to_jiffies(cfg->fc_expires));
1829	else
1830		rt6_clean_expires(rt);
1831
1832	if (cfg->fc_protocol == RTPROT_UNSPEC)
1833		cfg->fc_protocol = RTPROT_BOOT;
1834	rt->rt6i_protocol = cfg->fc_protocol;
1835
1836	addr_type = ipv6_addr_type(&cfg->fc_dst);
1837
1838	if (addr_type & IPV6_ADDR_MULTICAST)
1839		rt->dst.input = ip6_mc_input;
1840	else if (cfg->fc_flags & RTF_LOCAL)
1841		rt->dst.input = ip6_input;
1842	else
1843		rt->dst.input = ip6_forward;
1844
1845	rt->dst.output = ip6_output;
1846
1847	if (cfg->fc_encap) {
1848		struct lwtunnel_state *lwtstate;
1849
1850		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1851					   cfg->fc_encap, AF_INET6, cfg,
1852					   &lwtstate);
1853		if (err)
1854			goto out;
1855		rt->dst.lwtstate = lwtstate_get(lwtstate);
1856		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1857			rt->dst.lwtstate->orig_output = rt->dst.output;
1858			rt->dst.output = lwtunnel_output;
1859		}
1860		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1861			rt->dst.lwtstate->orig_input = rt->dst.input;
1862			rt->dst.input = lwtunnel_input;
1863		}
1864	}
1865
1866	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1867	rt->rt6i_dst.plen = cfg->fc_dst_len;
1868	if (rt->rt6i_dst.plen == 128)
1869		rt->dst.flags |= DST_HOST;
1870
1871#ifdef CONFIG_IPV6_SUBTREES
1872	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1873	rt->rt6i_src.plen = cfg->fc_src_len;
1874#endif
1875
1876	rt->rt6i_metric = cfg->fc_metric;
1877
1878	/* We cannot add true routes via loopback here,
1879	   they would result in kernel looping; promote them to reject routes
1880	 */
1881	if ((cfg->fc_flags & RTF_REJECT) ||
1882	    (dev && (dev->flags & IFF_LOOPBACK) &&
1883	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1884	     !(cfg->fc_flags & RTF_LOCAL))) {
1885		/* hold loopback dev/idev if we haven't done so. */
1886		if (dev != net->loopback_dev) {
1887			if (dev) {
1888				dev_put(dev);
1889				in6_dev_put(idev);
1890			}
1891			dev = net->loopback_dev;
1892			dev_hold(dev);
1893			idev = in6_dev_get(dev);
1894			if (!idev) {
1895				err = -ENODEV;
1896				goto out;
1897			}
1898		}
1899		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1900		switch (cfg->fc_type) {
1901		case RTN_BLACKHOLE:
1902			rt->dst.error = -EINVAL;
1903			rt->dst.output = dst_discard_out;
1904			rt->dst.input = dst_discard;
1905			break;
1906		case RTN_PROHIBIT:
1907			rt->dst.error = -EACCES;
1908			rt->dst.output = ip6_pkt_prohibit_out;
1909			rt->dst.input = ip6_pkt_prohibit;
1910			break;
1911		case RTN_THROW:
1912		case RTN_UNREACHABLE:
1913		default:
1914			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1915					: (cfg->fc_type == RTN_UNREACHABLE)
1916					? -EHOSTUNREACH : -ENETUNREACH;
1917			rt->dst.output = ip6_pkt_discard_out;
1918			rt->dst.input = ip6_pkt_discard;
1919			break;
1920		}
1921		goto install_route;
1922	}
1923
1924	if (cfg->fc_flags & RTF_GATEWAY) {
1925		const struct in6_addr *gw_addr;
1926		int gwa_type;
1927
1928		gw_addr = &cfg->fc_gateway;
1929		gwa_type = ipv6_addr_type(gw_addr);
1930
1931		/* if gw_addr is local we will fail to detect this in case
1932		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1933		 * will return already-added prefix route via interface that
1934		 * prefix route was assigned to, which might be non-loopback.
1935		 */
1936		err = -EINVAL;
1937		if (ipv6_chk_addr_and_flags(net, gw_addr,
1938					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1939					    dev : NULL, 0, 0))
1940			goto out;
1941
1942		rt->rt6i_gateway = *gw_addr;
1943
1944		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1945			struct rt6_info *grt;
1946
1947			/* IPv6 strictly inhibits using not link-local
1948			   addresses as nexthop address.
1949			   Otherwise, router will not able to send redirects.
1950			   It is very good, but in some (rare!) circumstances
1951			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1952			   some exceptions. --ANK
 
 
1953			 */
1954			if (!(gwa_type & IPV6_ADDR_UNICAST))
 
1955				goto out;
1956
1957			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1958
1959			err = -EHOSTUNREACH;
1960			if (!grt)
1961				goto out;
1962			if (dev) {
1963				if (dev != grt->dst.dev) {
1964					ip6_rt_put(grt);
1965					goto out;
1966				}
1967			} else {
1968				dev = grt->dst.dev;
1969				idev = grt->rt6i_idev;
1970				dev_hold(dev);
1971				in6_dev_hold(grt->rt6i_idev);
1972			}
1973			if (!(grt->rt6i_flags & RTF_GATEWAY))
1974				err = 0;
1975			ip6_rt_put(grt);
1976
1977			if (err)
1978				goto out;
1979		}
1980		err = -EINVAL;
1981		if (!dev || (dev->flags & IFF_LOOPBACK))
1982			goto out;
1983	}
1984
1985	err = -ENODEV;
1986	if (!dev)
1987		goto out;
1988
1989	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1990		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1991			err = -EINVAL;
1992			goto out;
1993		}
1994		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1995		rt->rt6i_prefsrc.plen = 128;
1996	} else
1997		rt->rt6i_prefsrc.plen = 0;
1998
1999	rt->rt6i_flags = cfg->fc_flags;
2000
2001install_route:
2002	rt->dst.dev = dev;
2003	rt->rt6i_idev = idev;
2004	rt->rt6i_table = table;
2005
2006	cfg->fc_nlinfo.nl_net = dev_net(dev);
2007
2008	return rt;
2009out:
2010	if (dev)
2011		dev_put(dev);
2012	if (idev)
2013		in6_dev_put(idev);
2014	if (rt)
2015		dst_free(&rt->dst);
2016
2017	return ERR_PTR(err);
2018}
2019
2020int ip6_route_add(struct fib6_config *cfg)
2021{
2022	struct mx6_config mxc = { .mx = NULL, };
2023	struct rt6_info *rt;
2024	int err;
2025
2026	rt = ip6_route_info_create(cfg);
2027	if (IS_ERR(rt)) {
2028		err = PTR_ERR(rt);
2029		rt = NULL;
2030		goto out;
2031	}
2032
2033	err = ip6_convert_metrics(&mxc, cfg);
2034	if (err)
2035		goto out;
2036
2037	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2038
2039	kfree(mxc.mx);
2040
2041	return err;
2042out:
2043	if (rt)
2044		dst_free(&rt->dst);
2045
2046	return err;
2047}
2048
2049static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2050{
2051	int err;
2052	struct fib6_table *table;
2053	struct net *net = dev_net(rt->dst.dev);
2054
2055	if (rt == net->ipv6.ip6_null_entry ||
2056	    rt->dst.flags & DST_NOCACHE) {
2057		err = -ENOENT;
2058		goto out;
2059	}
2060
2061	table = rt->rt6i_table;
2062	write_lock_bh(&table->tb6_lock);
2063	err = fib6_del(rt, info);
2064	write_unlock_bh(&table->tb6_lock);
2065
2066out:
2067	ip6_rt_put(rt);
2068	return err;
2069}
2070
2071int ip6_del_rt(struct rt6_info *rt)
2072{
2073	struct nl_info info = {
2074		.nl_net = dev_net(rt->dst.dev),
2075	};
2076	return __ip6_del_rt(rt, &info);
2077}
2078
2079static int ip6_route_del(struct fib6_config *cfg)
2080{
2081	struct fib6_table *table;
2082	struct fib6_node *fn;
2083	struct rt6_info *rt;
2084	int err = -ESRCH;
2085
2086	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2087	if (!table)
2088		return err;
2089
2090	read_lock_bh(&table->tb6_lock);
2091
2092	fn = fib6_locate(&table->tb6_root,
2093			 &cfg->fc_dst, cfg->fc_dst_len,
2094			 &cfg->fc_src, cfg->fc_src_len);
2095
2096	if (fn) {
2097		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2098			if ((rt->rt6i_flags & RTF_CACHE) &&
2099			    !(cfg->fc_flags & RTF_CACHE))
2100				continue;
2101			if (cfg->fc_ifindex &&
2102			    (!rt->dst.dev ||
2103			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2104				continue;
2105			if (cfg->fc_flags & RTF_GATEWAY &&
2106			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2107				continue;
2108			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2109				continue;
 
 
2110			dst_hold(&rt->dst);
2111			read_unlock_bh(&table->tb6_lock);
2112
2113			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2114		}
2115	}
2116	read_unlock_bh(&table->tb6_lock);
2117
2118	return err;
2119}
2120
2121static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2122{
2123	struct netevent_redirect netevent;
2124	struct rt6_info *rt, *nrt = NULL;
2125	struct ndisc_options ndopts;
2126	struct inet6_dev *in6_dev;
2127	struct neighbour *neigh;
2128	struct rd_msg *msg;
2129	int optlen, on_link;
2130	u8 *lladdr;
2131
2132	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2133	optlen -= sizeof(*msg);
2134
2135	if (optlen < 0) {
2136		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2137		return;
2138	}
2139
2140	msg = (struct rd_msg *)icmp6_hdr(skb);
2141
2142	if (ipv6_addr_is_multicast(&msg->dest)) {
2143		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2144		return;
2145	}
2146
2147	on_link = 0;
2148	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2149		on_link = 1;
2150	} else if (ipv6_addr_type(&msg->target) !=
2151		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2152		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2153		return;
2154	}
2155
2156	in6_dev = __in6_dev_get(skb->dev);
2157	if (!in6_dev)
2158		return;
2159	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2160		return;
2161
2162	/* RFC2461 8.1:
2163	 *	The IP source address of the Redirect MUST be the same as the current
2164	 *	first-hop router for the specified ICMP Destination Address.
2165	 */
2166
2167	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2168		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2169		return;
2170	}
2171
2172	lladdr = NULL;
2173	if (ndopts.nd_opts_tgt_lladdr) {
2174		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2175					     skb->dev);
2176		if (!lladdr) {
2177			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2178			return;
2179		}
2180	}
2181
2182	rt = (struct rt6_info *) dst;
2183	if (rt->rt6i_flags & RTF_REJECT) {
2184		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2185		return;
2186	}
2187
2188	/* Redirect received -> path was valid.
2189	 * Look, redirects are sent only in response to data packets,
2190	 * so that this nexthop apparently is reachable. --ANK
2191	 */
2192	dst_confirm(&rt->dst);
2193
2194	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2195	if (!neigh)
2196		return;
2197
2198	/*
2199	 *	We have finally decided to accept it.
2200	 */
2201
2202	neigh_update(neigh, lladdr, NUD_STALE,
2203		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2204		     NEIGH_UPDATE_F_OVERRIDE|
2205		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2206				     NEIGH_UPDATE_F_ISROUTER))
2207		     );
2208
2209	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2210	if (!nrt)
2211		goto out;
2212
2213	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2214	if (on_link)
2215		nrt->rt6i_flags &= ~RTF_GATEWAY;
2216
2217	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2218
2219	if (ip6_ins_rt(nrt))
2220		goto out;
2221
2222	netevent.old = &rt->dst;
2223	netevent.new = &nrt->dst;
2224	netevent.daddr = &msg->dest;
2225	netevent.neigh = neigh;
2226	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2227
2228	if (rt->rt6i_flags & RTF_CACHE) {
2229		rt = (struct rt6_info *) dst_clone(&rt->dst);
2230		ip6_del_rt(rt);
2231	}
2232
2233out:
2234	neigh_release(neigh);
2235}
2236
2237/*
2238 *	Misc support functions
2239 */
2240
2241static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2242{
2243	BUG_ON(from->dst.from);
2244
2245	rt->rt6i_flags &= ~RTF_EXPIRES;
2246	dst_hold(&from->dst);
2247	rt->dst.from = &from->dst;
2248	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2249}
2250
2251static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2252{
2253	rt->dst.input = ort->dst.input;
2254	rt->dst.output = ort->dst.output;
2255	rt->rt6i_dst = ort->rt6i_dst;
2256	rt->dst.error = ort->dst.error;
2257	rt->rt6i_idev = ort->rt6i_idev;
2258	if (rt->rt6i_idev)
2259		in6_dev_hold(rt->rt6i_idev);
2260	rt->dst.lastuse = jiffies;
2261	rt->rt6i_gateway = ort->rt6i_gateway;
2262	rt->rt6i_flags = ort->rt6i_flags;
2263	rt6_set_from(rt, ort);
2264	rt->rt6i_metric = ort->rt6i_metric;
2265#ifdef CONFIG_IPV6_SUBTREES
2266	rt->rt6i_src = ort->rt6i_src;
2267#endif
2268	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2269	rt->rt6i_table = ort->rt6i_table;
2270	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2271}
2272
2273#ifdef CONFIG_IPV6_ROUTE_INFO
2274static struct rt6_info *rt6_get_route_info(struct net *net,
2275					   const struct in6_addr *prefix, int prefixlen,
2276					   const struct in6_addr *gwaddr, int ifindex)
 
2277{
 
 
2278	struct fib6_node *fn;
2279	struct rt6_info *rt = NULL;
2280	struct fib6_table *table;
2281
2282	table = fib6_get_table(net, RT6_TABLE_INFO);
2283	if (!table)
2284		return NULL;
2285
2286	read_lock_bh(&table->tb6_lock);
2287	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2288	if (!fn)
2289		goto out;
2290
2291	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2292		if (rt->dst.dev->ifindex != ifindex)
2293			continue;
2294		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2295			continue;
2296		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2297			continue;
2298		dst_hold(&rt->dst);
2299		break;
2300	}
2301out:
2302	read_unlock_bh(&table->tb6_lock);
2303	return rt;
2304}
2305
2306static struct rt6_info *rt6_add_route_info(struct net *net,
2307					   const struct in6_addr *prefix, int prefixlen,
2308					   const struct in6_addr *gwaddr, int ifindex,
 
2309					   unsigned int pref)
2310{
2311	struct fib6_config cfg = {
2312		.fc_metric	= IP6_RT_PRIO_USER,
2313		.fc_ifindex	= ifindex,
2314		.fc_dst_len	= prefixlen,
2315		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2316				  RTF_UP | RTF_PREF(pref),
2317		.fc_nlinfo.portid = 0,
2318		.fc_nlinfo.nlh = NULL,
2319		.fc_nlinfo.nl_net = net,
2320	};
2321
2322	cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2323	cfg.fc_dst = *prefix;
2324	cfg.fc_gateway = *gwaddr;
2325
2326	/* We should treat it as a default route if prefix length is 0. */
2327	if (!prefixlen)
2328		cfg.fc_flags |= RTF_DEFAULT;
2329
2330	ip6_route_add(&cfg);
2331
2332	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2333}
2334#endif
2335
2336struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2337{
 
2338	struct rt6_info *rt;
2339	struct fib6_table *table;
2340
2341	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2342	if (!table)
2343		return NULL;
2344
2345	read_lock_bh(&table->tb6_lock);
2346	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2347		if (dev == rt->dst.dev &&
2348		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2349		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2350			break;
2351	}
2352	if (rt)
2353		dst_hold(&rt->dst);
2354	read_unlock_bh(&table->tb6_lock);
2355	return rt;
2356}
2357
2358struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2359				     struct net_device *dev,
2360				     unsigned int pref)
2361{
2362	struct fib6_config cfg = {
2363		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2364		.fc_metric	= IP6_RT_PRIO_USER,
2365		.fc_ifindex	= dev->ifindex,
2366		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2367				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2368		.fc_nlinfo.portid = 0,
2369		.fc_nlinfo.nlh = NULL,
2370		.fc_nlinfo.nl_net = dev_net(dev),
2371	};
2372
2373	cfg.fc_gateway = *gwaddr;
2374
2375	ip6_route_add(&cfg);
 
 
 
 
 
 
2376
2377	return rt6_get_dflt_router(gwaddr, dev);
2378}
2379
2380void rt6_purge_dflt_routers(struct net *net)
2381{
2382	struct rt6_info *rt;
2383	struct fib6_table *table;
2384
2385	/* NOTE: Keep consistent with rt6_get_dflt_router */
2386	table = fib6_get_table(net, RT6_TABLE_DFLT);
2387	if (!table)
2388		return;
2389
2390restart:
2391	read_lock_bh(&table->tb6_lock);
2392	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2393		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2394		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2395			dst_hold(&rt->dst);
2396			read_unlock_bh(&table->tb6_lock);
2397			ip6_del_rt(rt);
2398			goto restart;
2399		}
2400	}
2401	read_unlock_bh(&table->tb6_lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2402}
2403
2404static void rtmsg_to_fib6_config(struct net *net,
2405				 struct in6_rtmsg *rtmsg,
2406				 struct fib6_config *cfg)
2407{
2408	memset(cfg, 0, sizeof(*cfg));
2409
2410	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2411			 : RT6_TABLE_MAIN;
2412	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2413	cfg->fc_metric = rtmsg->rtmsg_metric;
2414	cfg->fc_expires = rtmsg->rtmsg_info;
2415	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2416	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2417	cfg->fc_flags = rtmsg->rtmsg_flags;
2418
2419	cfg->fc_nlinfo.nl_net = net;
2420
2421	cfg->fc_dst = rtmsg->rtmsg_dst;
2422	cfg->fc_src = rtmsg->rtmsg_src;
2423	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2424}
2425
2426int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2427{
2428	struct fib6_config cfg;
2429	struct in6_rtmsg rtmsg;
2430	int err;
2431
2432	switch (cmd) {
2433	case SIOCADDRT:		/* Add a route */
2434	case SIOCDELRT:		/* Delete a route */
2435		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2436			return -EPERM;
2437		err = copy_from_user(&rtmsg, arg,
2438				     sizeof(struct in6_rtmsg));
2439		if (err)
2440			return -EFAULT;
2441
2442		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2443
2444		rtnl_lock();
2445		switch (cmd) {
2446		case SIOCADDRT:
2447			err = ip6_route_add(&cfg);
2448			break;
2449		case SIOCDELRT:
2450			err = ip6_route_del(&cfg);
2451			break;
2452		default:
2453			err = -EINVAL;
2454		}
2455		rtnl_unlock();
2456
2457		return err;
2458	}
2459
2460	return -EINVAL;
2461}
2462
2463/*
2464 *	Drop the packet on the floor
2465 */
2466
2467static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2468{
2469	int type;
2470	struct dst_entry *dst = skb_dst(skb);
2471	switch (ipstats_mib_noroutes) {
2472	case IPSTATS_MIB_INNOROUTES:
2473		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2474		if (type == IPV6_ADDR_ANY) {
2475			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2476				      IPSTATS_MIB_INADDRERRORS);
2477			break;
2478		}
2479		/* FALLTHROUGH */
2480	case IPSTATS_MIB_OUTNOROUTES:
2481		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2482			      ipstats_mib_noroutes);
2483		break;
2484	}
2485	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2486	kfree_skb(skb);
2487	return 0;
2488}
2489
2490static int ip6_pkt_discard(struct sk_buff *skb)
2491{
2492	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2493}
2494
2495static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2496{
2497	skb->dev = skb_dst(skb)->dev;
2498	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2499}
2500
2501static int ip6_pkt_prohibit(struct sk_buff *skb)
2502{
2503	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2504}
2505
2506static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2507{
2508	skb->dev = skb_dst(skb)->dev;
2509	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2510}
2511
2512/*
2513 *	Allocate a dst for local (unicast / anycast) address.
2514 */
2515
2516struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2517				    const struct in6_addr *addr,
2518				    bool anycast)
2519{
2520	u32 tb_id;
2521	struct net *net = dev_net(idev->dev);
2522	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2523					    DST_NOCOUNT);
 
 
 
 
 
 
 
 
2524	if (!rt)
2525		return ERR_PTR(-ENOMEM);
2526
2527	in6_dev_hold(idev);
2528
2529	rt->dst.flags |= DST_HOST;
2530	rt->dst.input = ip6_input;
2531	rt->dst.output = ip6_output;
2532	rt->rt6i_idev = idev;
2533
2534	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2535	if (anycast)
2536		rt->rt6i_flags |= RTF_ANYCAST;
2537	else
2538		rt->rt6i_flags |= RTF_LOCAL;
2539
2540	rt->rt6i_gateway  = *addr;
2541	rt->rt6i_dst.addr = *addr;
2542	rt->rt6i_dst.plen = 128;
2543	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2544	rt->rt6i_table = fib6_get_table(net, tb_id);
2545	rt->dst.flags |= DST_NOCACHE;
2546
2547	atomic_set(&rt->dst.__refcnt, 1);
2548
2549	return rt;
2550}
2551
2552int ip6_route_get_saddr(struct net *net,
2553			struct rt6_info *rt,
2554			const struct in6_addr *daddr,
2555			unsigned int prefs,
2556			struct in6_addr *saddr)
2557{
2558	struct inet6_dev *idev =
2559		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2560	int err = 0;
2561	if (rt && rt->rt6i_prefsrc.plen)
2562		*saddr = rt->rt6i_prefsrc.addr;
2563	else
2564		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2565					 daddr, prefs, saddr);
2566	return err;
2567}
2568
2569/* remove deleted ip from prefsrc entries */
2570struct arg_dev_net_ip {
2571	struct net_device *dev;
2572	struct net *net;
2573	struct in6_addr *addr;
2574};
2575
2576static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2577{
2578	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2579	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2580	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2581
2582	if (((void *)rt->dst.dev == dev || !dev) &&
2583	    rt != net->ipv6.ip6_null_entry &&
2584	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2585		/* remove prefsrc entry */
2586		rt->rt6i_prefsrc.plen = 0;
2587	}
2588	return 0;
2589}
2590
2591void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2592{
2593	struct net *net = dev_net(ifp->idev->dev);
2594	struct arg_dev_net_ip adni = {
2595		.dev = ifp->idev->dev,
2596		.net = net,
2597		.addr = &ifp->addr,
2598	};
2599	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2600}
2601
2602#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2603#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2604
2605/* Remove routers and update dst entries when gateway turn into host. */
2606static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2607{
2608	struct in6_addr *gateway = (struct in6_addr *)arg;
2609
2610	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2611	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2612	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2613		return -1;
2614	}
2615	return 0;
2616}
2617
2618void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2619{
2620	fib6_clean_all(net, fib6_clean_tohost, gateway);
2621}
2622
2623struct arg_dev_net {
2624	struct net_device *dev;
2625	struct net *net;
2626};
2627
2628static int fib6_ifdown(struct rt6_info *rt, void *arg)
2629{
2630	const struct arg_dev_net *adn = arg;
2631	const struct net_device *dev = adn->dev;
2632
2633	if ((rt->dst.dev == dev || !dev) &&
2634	    rt != adn->net->ipv6.ip6_null_entry)
2635		return -1;
2636
2637	return 0;
2638}
2639
2640void rt6_ifdown(struct net *net, struct net_device *dev)
2641{
2642	struct arg_dev_net adn = {
2643		.dev = dev,
2644		.net = net,
2645	};
2646
2647	fib6_clean_all(net, fib6_ifdown, &adn);
2648	icmp6_clean_all(fib6_ifdown, &adn);
2649	if (dev)
2650		rt6_uncached_list_flush_dev(net, dev);
2651}
2652
2653struct rt6_mtu_change_arg {
2654	struct net_device *dev;
2655	unsigned int mtu;
2656};
2657
2658static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2659{
2660	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2661	struct inet6_dev *idev;
2662
2663	/* In IPv6 pmtu discovery is not optional,
2664	   so that RTAX_MTU lock cannot disable it.
2665	   We still use this lock to block changes
2666	   caused by addrconf/ndisc.
2667	*/
2668
2669	idev = __in6_dev_get(arg->dev);
2670	if (!idev)
2671		return 0;
2672
2673	/* For administrative MTU increase, there is no way to discover
2674	   IPv6 PMTU increase, so PMTU increase should be updated here.
2675	   Since RFC 1981 doesn't include administrative MTU increase
2676	   update PMTU increase is a MUST. (i.e. jumbo frame)
2677	 */
2678	/*
2679	   If new MTU is less than route PMTU, this new MTU will be the
2680	   lowest MTU in the path, update the route PMTU to reflect PMTU
2681	   decreases; if new MTU is greater than route PMTU, and the
2682	   old MTU is the lowest MTU in the path, update the route PMTU
2683	   to reflect the increase. In this case if the other nodes' MTU
2684	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2685	   PMTU discouvery.
2686	 */
2687	if (rt->dst.dev == arg->dev &&
 
2688	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2689		if (rt->rt6i_flags & RTF_CACHE) {
2690			/* For RTF_CACHE with rt6i_pmtu == 0
2691			 * (i.e. a redirected route),
2692			 * the metrics of its rt->dst.from has already
2693			 * been updated.
2694			 */
2695			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2696				rt->rt6i_pmtu = arg->mtu;
2697		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2698			   (dst_mtu(&rt->dst) < arg->mtu &&
2699			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2700			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2701		}
2702	}
2703	return 0;
2704}
2705
2706void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2707{
2708	struct rt6_mtu_change_arg arg = {
2709		.dev = dev,
2710		.mtu = mtu,
2711	};
2712
2713	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2714}
2715
2716static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2717	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2718	[RTA_OIF]               = { .type = NLA_U32 },
2719	[RTA_IIF]		= { .type = NLA_U32 },
2720	[RTA_PRIORITY]          = { .type = NLA_U32 },
2721	[RTA_METRICS]           = { .type = NLA_NESTED },
2722	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2723	[RTA_PREF]              = { .type = NLA_U8 },
2724	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2725	[RTA_ENCAP]		= { .type = NLA_NESTED },
2726	[RTA_EXPIRES]		= { .type = NLA_U32 },
 
2727};
2728
2729static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2730			      struct fib6_config *cfg)
2731{
2732	struct rtmsg *rtm;
2733	struct nlattr *tb[RTA_MAX+1];
2734	unsigned int pref;
2735	int err;
2736
2737	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2738	if (err < 0)
2739		goto errout;
2740
2741	err = -EINVAL;
2742	rtm = nlmsg_data(nlh);
2743	memset(cfg, 0, sizeof(*cfg));
2744
2745	cfg->fc_table = rtm->rtm_table;
2746	cfg->fc_dst_len = rtm->rtm_dst_len;
2747	cfg->fc_src_len = rtm->rtm_src_len;
2748	cfg->fc_flags = RTF_UP;
2749	cfg->fc_protocol = rtm->rtm_protocol;
2750	cfg->fc_type = rtm->rtm_type;
2751
2752	if (rtm->rtm_type == RTN_UNREACHABLE ||
2753	    rtm->rtm_type == RTN_BLACKHOLE ||
2754	    rtm->rtm_type == RTN_PROHIBIT ||
2755	    rtm->rtm_type == RTN_THROW)
2756		cfg->fc_flags |= RTF_REJECT;
2757
2758	if (rtm->rtm_type == RTN_LOCAL)
2759		cfg->fc_flags |= RTF_LOCAL;
2760
2761	if (rtm->rtm_flags & RTM_F_CLONED)
2762		cfg->fc_flags |= RTF_CACHE;
2763
2764	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2765	cfg->fc_nlinfo.nlh = nlh;
2766	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2767
2768	if (tb[RTA_GATEWAY]) {
2769		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2770		cfg->fc_flags |= RTF_GATEWAY;
2771	}
2772
2773	if (tb[RTA_DST]) {
2774		int plen = (rtm->rtm_dst_len + 7) >> 3;
2775
2776		if (nla_len(tb[RTA_DST]) < plen)
2777			goto errout;
2778
2779		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2780	}
2781
2782	if (tb[RTA_SRC]) {
2783		int plen = (rtm->rtm_src_len + 7) >> 3;
2784
2785		if (nla_len(tb[RTA_SRC]) < plen)
2786			goto errout;
2787
2788		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2789	}
2790
2791	if (tb[RTA_PREFSRC])
2792		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2793
2794	if (tb[RTA_OIF])
2795		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2796
2797	if (tb[RTA_PRIORITY])
2798		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2799
2800	if (tb[RTA_METRICS]) {
2801		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2802		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2803	}
2804
2805	if (tb[RTA_TABLE])
2806		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2807
2808	if (tb[RTA_MULTIPATH]) {
2809		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2810		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
 
 
 
 
 
2811	}
2812
2813	if (tb[RTA_PREF]) {
2814		pref = nla_get_u8(tb[RTA_PREF]);
2815		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2816		    pref != ICMPV6_ROUTER_PREF_HIGH)
2817			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2818		cfg->fc_flags |= RTF_PREF(pref);
2819	}
2820
2821	if (tb[RTA_ENCAP])
2822		cfg->fc_encap = tb[RTA_ENCAP];
2823
2824	if (tb[RTA_ENCAP_TYPE])
2825		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2826
 
 
 
 
 
2827	if (tb[RTA_EXPIRES]) {
2828		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2829
2830		if (addrconf_finite_timeout(timeout)) {
2831			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2832			cfg->fc_flags |= RTF_EXPIRES;
2833		}
2834	}
2835
2836	err = 0;
2837errout:
2838	return err;
2839}
2840
2841struct rt6_nh {
2842	struct rt6_info *rt6_info;
2843	struct fib6_config r_cfg;
2844	struct mx6_config mxc;
2845	struct list_head next;
2846};
2847
2848static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2849{
2850	struct rt6_nh *nh;
2851
2852	list_for_each_entry(nh, rt6_nh_list, next) {
2853		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2854		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2855		        nh->r_cfg.fc_ifindex);
2856	}
2857}
2858
2859static int ip6_route_info_append(struct list_head *rt6_nh_list,
2860				 struct rt6_info *rt, struct fib6_config *r_cfg)
2861{
2862	struct rt6_nh *nh;
2863	struct rt6_info *rtnh;
2864	int err = -EEXIST;
2865
2866	list_for_each_entry(nh, rt6_nh_list, next) {
2867		/* check if rt6_info already exists */
2868		rtnh = nh->rt6_info;
2869
2870		if (rtnh->dst.dev == rt->dst.dev &&
2871		    rtnh->rt6i_idev == rt->rt6i_idev &&
2872		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2873				    &rt->rt6i_gateway))
2874			return err;
2875	}
2876
2877	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2878	if (!nh)
2879		return -ENOMEM;
2880	nh->rt6_info = rt;
2881	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2882	if (err) {
2883		kfree(nh);
2884		return err;
2885	}
2886	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2887	list_add_tail(&nh->next, rt6_nh_list);
2888
2889	return 0;
2890}
2891
2892static int ip6_route_multipath_add(struct fib6_config *cfg)
2893{
2894	struct fib6_config r_cfg;
2895	struct rtnexthop *rtnh;
2896	struct rt6_info *rt;
2897	struct rt6_nh *err_nh;
2898	struct rt6_nh *nh, *nh_safe;
2899	int remaining;
2900	int attrlen;
2901	int err = 1;
2902	int nhn = 0;
2903	int replace = (cfg->fc_nlinfo.nlh &&
2904		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2905	LIST_HEAD(rt6_nh_list);
2906
2907	remaining = cfg->fc_mp_len;
2908	rtnh = (struct rtnexthop *)cfg->fc_mp;
2909
2910	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2911	 * rt6_info structs per nexthop
2912	 */
2913	while (rtnh_ok(rtnh, remaining)) {
2914		memcpy(&r_cfg, cfg, sizeof(*cfg));
2915		if (rtnh->rtnh_ifindex)
2916			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2917
2918		attrlen = rtnh_attrlen(rtnh);
2919		if (attrlen > 0) {
2920			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2921
2922			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2923			if (nla) {
2924				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2925				r_cfg.fc_flags |= RTF_GATEWAY;
2926			}
2927			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2928			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2929			if (nla)
2930				r_cfg.fc_encap_type = nla_get_u16(nla);
2931		}
2932
2933		rt = ip6_route_info_create(&r_cfg);
2934		if (IS_ERR(rt)) {
2935			err = PTR_ERR(rt);
2936			rt = NULL;
2937			goto cleanup;
2938		}
2939
2940		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2941		if (err) {
2942			dst_free(&rt->dst);
2943			goto cleanup;
2944		}
2945
2946		rtnh = rtnh_next(rtnh, &remaining);
2947	}
2948
2949	err_nh = NULL;
2950	list_for_each_entry(nh, &rt6_nh_list, next) {
2951		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2952		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2953		nh->rt6_info = NULL;
2954		if (err) {
2955			if (replace && nhn)
2956				ip6_print_replace_route_err(&rt6_nh_list);
2957			err_nh = nh;
2958			goto add_errout;
2959		}
2960
2961		/* Because each route is added like a single route we remove
2962		 * these flags after the first nexthop: if there is a collision,
2963		 * we have already failed to add the first nexthop:
2964		 * fib6_add_rt2node() has rejected it; when replacing, old
2965		 * nexthops have been replaced by first new, the rest should
2966		 * be added to it.
2967		 */
2968		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2969						     NLM_F_REPLACE);
2970		nhn++;
2971	}
2972
2973	goto cleanup;
2974
2975add_errout:
2976	/* Delete routes that were already added */
2977	list_for_each_entry(nh, &rt6_nh_list, next) {
2978		if (err_nh == nh)
2979			break;
2980		ip6_route_del(&nh->r_cfg);
2981	}
2982
2983cleanup:
2984	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2985		if (nh->rt6_info)
2986			dst_free(&nh->rt6_info->dst);
2987		kfree(nh->mxc.mx);
2988		list_del(&nh->next);
2989		kfree(nh);
2990	}
2991
2992	return err;
2993}
2994
2995static int ip6_route_multipath_del(struct fib6_config *cfg)
2996{
2997	struct fib6_config r_cfg;
2998	struct rtnexthop *rtnh;
2999	int remaining;
3000	int attrlen;
3001	int err = 1, last_err = 0;
3002
3003	remaining = cfg->fc_mp_len;
3004	rtnh = (struct rtnexthop *)cfg->fc_mp;
3005
3006	/* Parse a Multipath Entry */
3007	while (rtnh_ok(rtnh, remaining)) {
3008		memcpy(&r_cfg, cfg, sizeof(*cfg));
3009		if (rtnh->rtnh_ifindex)
3010			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3011
3012		attrlen = rtnh_attrlen(rtnh);
3013		if (attrlen > 0) {
3014			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3015
3016			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3017			if (nla) {
3018				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3019				r_cfg.fc_flags |= RTF_GATEWAY;
3020			}
3021		}
3022		err = ip6_route_del(&r_cfg);
3023		if (err)
3024			last_err = err;
3025
3026		rtnh = rtnh_next(rtnh, &remaining);
3027	}
3028
3029	return last_err;
3030}
3031
3032static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3033{
3034	struct fib6_config cfg;
3035	int err;
3036
3037	err = rtm_to_fib6_config(skb, nlh, &cfg);
3038	if (err < 0)
3039		return err;
3040
3041	if (cfg.fc_mp)
3042		return ip6_route_multipath_del(&cfg);
3043	else
3044		return ip6_route_del(&cfg);
3045}
3046
3047static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3048{
3049	struct fib6_config cfg;
3050	int err;
3051
3052	err = rtm_to_fib6_config(skb, nlh, &cfg);
3053	if (err < 0)
3054		return err;
3055
3056	if (cfg.fc_mp)
3057		return ip6_route_multipath_add(&cfg);
3058	else
3059		return ip6_route_add(&cfg);
3060}
3061
3062static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3063{
3064	return NLMSG_ALIGN(sizeof(struct rtmsg))
3065	       + nla_total_size(16) /* RTA_SRC */
3066	       + nla_total_size(16) /* RTA_DST */
3067	       + nla_total_size(16) /* RTA_GATEWAY */
3068	       + nla_total_size(16) /* RTA_PREFSRC */
3069	       + nla_total_size(4) /* RTA_TABLE */
3070	       + nla_total_size(4) /* RTA_IIF */
3071	       + nla_total_size(4) /* RTA_OIF */
3072	       + nla_total_size(4) /* RTA_PRIORITY */
3073	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3074	       + nla_total_size(sizeof(struct rta_cacheinfo))
3075	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3076	       + nla_total_size(1) /* RTA_PREF */
3077	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3078}
3079
3080static int rt6_fill_node(struct net *net,
3081			 struct sk_buff *skb, struct rt6_info *rt,
3082			 struct in6_addr *dst, struct in6_addr *src,
3083			 int iif, int type, u32 portid, u32 seq,
3084			 int prefix, int nowait, unsigned int flags)
3085{
3086	u32 metrics[RTAX_MAX];
3087	struct rtmsg *rtm;
3088	struct nlmsghdr *nlh;
3089	long expires;
3090	u32 table;
3091
3092	if (prefix) {	/* user wants prefix routes only */
3093		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3094			/* success since this is not a prefix route */
3095			return 1;
3096		}
3097	}
3098
3099	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3100	if (!nlh)
3101		return -EMSGSIZE;
3102
3103	rtm = nlmsg_data(nlh);
3104	rtm->rtm_family = AF_INET6;
3105	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3106	rtm->rtm_src_len = rt->rt6i_src.plen;
3107	rtm->rtm_tos = 0;
3108	if (rt->rt6i_table)
3109		table = rt->rt6i_table->tb6_id;
3110	else
3111		table = RT6_TABLE_UNSPEC;
3112	rtm->rtm_table = table;
3113	if (nla_put_u32(skb, RTA_TABLE, table))
3114		goto nla_put_failure;
3115	if (rt->rt6i_flags & RTF_REJECT) {
3116		switch (rt->dst.error) {
3117		case -EINVAL:
3118			rtm->rtm_type = RTN_BLACKHOLE;
3119			break;
3120		case -EACCES:
3121			rtm->rtm_type = RTN_PROHIBIT;
3122			break;
3123		case -EAGAIN:
3124			rtm->rtm_type = RTN_THROW;
3125			break;
3126		default:
3127			rtm->rtm_type = RTN_UNREACHABLE;
3128			break;
3129		}
3130	}
3131	else if (rt->rt6i_flags & RTF_LOCAL)
3132		rtm->rtm_type = RTN_LOCAL;
3133	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3134		rtm->rtm_type = RTN_LOCAL;
3135	else
3136		rtm->rtm_type = RTN_UNICAST;
3137	rtm->rtm_flags = 0;
3138	if (!netif_carrier_ok(rt->dst.dev)) {
3139		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3140		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3141			rtm->rtm_flags |= RTNH_F_DEAD;
3142	}
3143	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3144	rtm->rtm_protocol = rt->rt6i_protocol;
3145	if (rt->rt6i_flags & RTF_DYNAMIC)
3146		rtm->rtm_protocol = RTPROT_REDIRECT;
3147	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3148		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3149			rtm->rtm_protocol = RTPROT_RA;
3150		else
3151			rtm->rtm_protocol = RTPROT_KERNEL;
3152	}
3153
3154	if (rt->rt6i_flags & RTF_CACHE)
3155		rtm->rtm_flags |= RTM_F_CLONED;
3156
3157	if (dst) {
3158		if (nla_put_in6_addr(skb, RTA_DST, dst))
3159			goto nla_put_failure;
3160		rtm->rtm_dst_len = 128;
3161	} else if (rtm->rtm_dst_len)
3162		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3163			goto nla_put_failure;
3164#ifdef CONFIG_IPV6_SUBTREES
3165	if (src) {
3166		if (nla_put_in6_addr(skb, RTA_SRC, src))
3167			goto nla_put_failure;
3168		rtm->rtm_src_len = 128;
3169	} else if (rtm->rtm_src_len &&
3170		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3171		goto nla_put_failure;
3172#endif
3173	if (iif) {
3174#ifdef CONFIG_IPV6_MROUTE
3175		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3176			int err = ip6mr_get_route(net, skb, rtm, nowait);
 
 
3177			if (err <= 0) {
3178				if (!nowait) {
3179					if (err == 0)
3180						return 0;
3181					goto nla_put_failure;
3182				} else {
3183					if (err == -EMSGSIZE)
3184						goto nla_put_failure;
3185				}
3186			}
3187		} else
3188#endif
3189			if (nla_put_u32(skb, RTA_IIF, iif))
3190				goto nla_put_failure;
3191	} else if (dst) {
3192		struct in6_addr saddr_buf;
3193		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3194		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3195			goto nla_put_failure;
3196	}
3197
3198	if (rt->rt6i_prefsrc.plen) {
3199		struct in6_addr saddr_buf;
3200		saddr_buf = rt->rt6i_prefsrc.addr;
3201		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3202			goto nla_put_failure;
3203	}
3204
3205	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3206	if (rt->rt6i_pmtu)
3207		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3208	if (rtnetlink_put_metrics(skb, metrics) < 0)
3209		goto nla_put_failure;
3210
3211	if (rt->rt6i_flags & RTF_GATEWAY) {
3212		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3213			goto nla_put_failure;
3214	}
3215
3216	if (rt->dst.dev &&
3217	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3218		goto nla_put_failure;
3219	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3220		goto nla_put_failure;
3221
3222	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3223
3224	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3225		goto nla_put_failure;
3226
3227	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3228		goto nla_put_failure;
3229
3230	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
 
3231
3232	nlmsg_end(skb, nlh);
3233	return 0;
3234
3235nla_put_failure:
3236	nlmsg_cancel(skb, nlh);
3237	return -EMSGSIZE;
3238}
3239
3240int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3241{
3242	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3243	int prefix;
3244
3245	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3246		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3247		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3248	} else
3249		prefix = 0;
3250
3251	return rt6_fill_node(arg->net,
3252		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3253		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3254		     prefix, 0, NLM_F_MULTI);
3255}
3256
3257static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3258{
3259	struct net *net = sock_net(in_skb->sk);
3260	struct nlattr *tb[RTA_MAX+1];
3261	struct rt6_info *rt;
3262	struct sk_buff *skb;
3263	struct rtmsg *rtm;
3264	struct flowi6 fl6;
3265	int err, iif = 0, oif = 0;
3266
3267	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3268	if (err < 0)
3269		goto errout;
3270
3271	err = -EINVAL;
3272	memset(&fl6, 0, sizeof(fl6));
 
 
3273
3274	if (tb[RTA_SRC]) {
3275		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3276			goto errout;
3277
3278		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3279	}
3280
3281	if (tb[RTA_DST]) {
3282		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3283			goto errout;
3284
3285		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3286	}
3287
3288	if (tb[RTA_IIF])
3289		iif = nla_get_u32(tb[RTA_IIF]);
3290
3291	if (tb[RTA_OIF])
3292		oif = nla_get_u32(tb[RTA_OIF]);
3293
3294	if (tb[RTA_MARK])
3295		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3296
 
 
 
 
 
 
3297	if (iif) {
3298		struct net_device *dev;
3299		int flags = 0;
3300
3301		dev = __dev_get_by_index(net, iif);
3302		if (!dev) {
3303			err = -ENODEV;
3304			goto errout;
3305		}
3306
3307		fl6.flowi6_iif = iif;
3308
3309		if (!ipv6_addr_any(&fl6.saddr))
3310			flags |= RT6_LOOKUP_F_HAS_SADDR;
3311
3312		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3313							       flags);
3314	} else {
3315		fl6.flowi6_oif = oif;
3316
3317		if (netif_index_is_l3_master(net, oif)) {
3318			fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3319					   FLOWI_FLAG_SKIP_NH_OIF;
3320		}
3321
3322		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3323	}
3324
3325	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3326	if (!skb) {
3327		ip6_rt_put(rt);
3328		err = -ENOBUFS;
3329		goto errout;
3330	}
3331
3332	/* Reserve room for dummy headers, this skb can pass
3333	   through good chunk of routing engine.
3334	 */
3335	skb_reset_mac_header(skb);
3336	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3337
3338	skb_dst_set(skb, &rt->dst);
3339
3340	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3341			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3342			    nlh->nlmsg_seq, 0, 0, 0);
3343	if (err < 0) {
3344		kfree_skb(skb);
3345		goto errout;
3346	}
3347
3348	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3349errout:
3350	return err;
3351}
3352
3353void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3354		     unsigned int nlm_flags)
3355{
3356	struct sk_buff *skb;
3357	struct net *net = info->nl_net;
3358	u32 seq;
3359	int err;
3360
3361	err = -ENOBUFS;
3362	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3363
3364	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3365	if (!skb)
3366		goto errout;
3367
3368	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3369				event, info->portid, seq, 0, 0, nlm_flags);
3370	if (err < 0) {
3371		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3372		WARN_ON(err == -EMSGSIZE);
3373		kfree_skb(skb);
3374		goto errout;
3375	}
3376	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3377		    info->nlh, gfp_any());
3378	return;
3379errout:
3380	if (err < 0)
3381		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3382}
3383
3384static int ip6_route_dev_notify(struct notifier_block *this,
3385				unsigned long event, void *ptr)
3386{
3387	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3388	struct net *net = dev_net(dev);
3389
3390	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3391		net->ipv6.ip6_null_entry->dst.dev = dev;
3392		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3393#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3394		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3395		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3396		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3397		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3398#endif
3399	}
3400
3401	return NOTIFY_OK;
3402}
3403
3404/*
3405 *	/proc
3406 */
3407
3408#ifdef CONFIG_PROC_FS
3409
3410static const struct file_operations ipv6_route_proc_fops = {
3411	.owner		= THIS_MODULE,
3412	.open		= ipv6_route_open,
3413	.read		= seq_read,
3414	.llseek		= seq_lseek,
3415	.release	= seq_release_net,
3416};
3417
3418static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3419{
3420	struct net *net = (struct net *)seq->private;
3421	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3422		   net->ipv6.rt6_stats->fib_nodes,
3423		   net->ipv6.rt6_stats->fib_route_nodes,
3424		   net->ipv6.rt6_stats->fib_rt_alloc,
3425		   net->ipv6.rt6_stats->fib_rt_entries,
3426		   net->ipv6.rt6_stats->fib_rt_cache,
3427		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3428		   net->ipv6.rt6_stats->fib_discarded_routes);
3429
3430	return 0;
3431}
3432
3433static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3434{
3435	return single_open_net(inode, file, rt6_stats_seq_show);
3436}
3437
3438static const struct file_operations rt6_stats_seq_fops = {
3439	.owner	 = THIS_MODULE,
3440	.open	 = rt6_stats_seq_open,
3441	.read	 = seq_read,
3442	.llseek	 = seq_lseek,
3443	.release = single_release_net,
3444};
3445#endif	/* CONFIG_PROC_FS */
3446
3447#ifdef CONFIG_SYSCTL
3448
3449static
3450int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3451			      void __user *buffer, size_t *lenp, loff_t *ppos)
3452{
3453	struct net *net;
3454	int delay;
3455	if (!write)
3456		return -EINVAL;
3457
3458	net = (struct net *)ctl->extra1;
3459	delay = net->ipv6.sysctl.flush_delay;
3460	proc_dointvec(ctl, write, buffer, lenp, ppos);
3461	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3462	return 0;
3463}
3464
3465struct ctl_table ipv6_route_table_template[] = {
3466	{
3467		.procname	=	"flush",
3468		.data		=	&init_net.ipv6.sysctl.flush_delay,
3469		.maxlen		=	sizeof(int),
3470		.mode		=	0200,
3471		.proc_handler	=	ipv6_sysctl_rtcache_flush
3472	},
3473	{
3474		.procname	=	"gc_thresh",
3475		.data		=	&ip6_dst_ops_template.gc_thresh,
3476		.maxlen		=	sizeof(int),
3477		.mode		=	0644,
3478		.proc_handler	=	proc_dointvec,
3479	},
3480	{
3481		.procname	=	"max_size",
3482		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3483		.maxlen		=	sizeof(int),
3484		.mode		=	0644,
3485		.proc_handler	=	proc_dointvec,
3486	},
3487	{
3488		.procname	=	"gc_min_interval",
3489		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3490		.maxlen		=	sizeof(int),
3491		.mode		=	0644,
3492		.proc_handler	=	proc_dointvec_jiffies,
3493	},
3494	{
3495		.procname	=	"gc_timeout",
3496		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3497		.maxlen		=	sizeof(int),
3498		.mode		=	0644,
3499		.proc_handler	=	proc_dointvec_jiffies,
3500	},
3501	{
3502		.procname	=	"gc_interval",
3503		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3504		.maxlen		=	sizeof(int),
3505		.mode		=	0644,
3506		.proc_handler	=	proc_dointvec_jiffies,
3507	},
3508	{
3509		.procname	=	"gc_elasticity",
3510		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3511		.maxlen		=	sizeof(int),
3512		.mode		=	0644,
3513		.proc_handler	=	proc_dointvec,
3514	},
3515	{
3516		.procname	=	"mtu_expires",
3517		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3518		.maxlen		=	sizeof(int),
3519		.mode		=	0644,
3520		.proc_handler	=	proc_dointvec_jiffies,
3521	},
3522	{
3523		.procname	=	"min_adv_mss",
3524		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3525		.maxlen		=	sizeof(int),
3526		.mode		=	0644,
3527		.proc_handler	=	proc_dointvec,
3528	},
3529	{
3530		.procname	=	"gc_min_interval_ms",
3531		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3532		.maxlen		=	sizeof(int),
3533		.mode		=	0644,
3534		.proc_handler	=	proc_dointvec_ms_jiffies,
3535	},
3536	{ }
3537};
3538
3539struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3540{
3541	struct ctl_table *table;
3542
3543	table = kmemdup(ipv6_route_table_template,
3544			sizeof(ipv6_route_table_template),
3545			GFP_KERNEL);
3546
3547	if (table) {
3548		table[0].data = &net->ipv6.sysctl.flush_delay;
3549		table[0].extra1 = net;
3550		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3551		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3552		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3553		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3554		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3555		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3556		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3557		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3558		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3559
3560		/* Don't export sysctls to unprivileged users */
3561		if (net->user_ns != &init_user_ns)
3562			table[0].procname = NULL;
3563	}
3564
3565	return table;
3566}
3567#endif
3568
3569static int __net_init ip6_route_net_init(struct net *net)
3570{
3571	int ret = -ENOMEM;
3572
3573	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3574	       sizeof(net->ipv6.ip6_dst_ops));
3575
3576	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3577		goto out_ip6_dst_ops;
3578
3579	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3580					   sizeof(*net->ipv6.ip6_null_entry),
3581					   GFP_KERNEL);
3582	if (!net->ipv6.ip6_null_entry)
3583		goto out_ip6_dst_entries;
3584	net->ipv6.ip6_null_entry->dst.path =
3585		(struct dst_entry *)net->ipv6.ip6_null_entry;
3586	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3587	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3588			 ip6_template_metrics, true);
3589
3590#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3591	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3592					       sizeof(*net->ipv6.ip6_prohibit_entry),
3593					       GFP_KERNEL);
3594	if (!net->ipv6.ip6_prohibit_entry)
3595		goto out_ip6_null_entry;
3596	net->ipv6.ip6_prohibit_entry->dst.path =
3597		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3598	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3599	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3600			 ip6_template_metrics, true);
3601
3602	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3603					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3604					       GFP_KERNEL);
3605	if (!net->ipv6.ip6_blk_hole_entry)
3606		goto out_ip6_prohibit_entry;
3607	net->ipv6.ip6_blk_hole_entry->dst.path =
3608		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3609	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3610	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3611			 ip6_template_metrics, true);
3612#endif
3613
3614	net->ipv6.sysctl.flush_delay = 0;
3615	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3616	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3617	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3618	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3619	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3620	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3621	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3622
3623	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3624
3625	ret = 0;
3626out:
3627	return ret;
3628
3629#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3630out_ip6_prohibit_entry:
3631	kfree(net->ipv6.ip6_prohibit_entry);
3632out_ip6_null_entry:
3633	kfree(net->ipv6.ip6_null_entry);
3634#endif
3635out_ip6_dst_entries:
3636	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3637out_ip6_dst_ops:
3638	goto out;
3639}
3640
3641static void __net_exit ip6_route_net_exit(struct net *net)
3642{
3643	kfree(net->ipv6.ip6_null_entry);
3644#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3645	kfree(net->ipv6.ip6_prohibit_entry);
3646	kfree(net->ipv6.ip6_blk_hole_entry);
3647#endif
3648	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3649}
3650
3651static int __net_init ip6_route_net_init_late(struct net *net)
3652{
3653#ifdef CONFIG_PROC_FS
3654	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3655	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3656#endif
3657	return 0;
3658}
3659
3660static void __net_exit ip6_route_net_exit_late(struct net *net)
3661{
3662#ifdef CONFIG_PROC_FS
3663	remove_proc_entry("ipv6_route", net->proc_net);
3664	remove_proc_entry("rt6_stats", net->proc_net);
3665#endif
3666}
3667
3668static struct pernet_operations ip6_route_net_ops = {
3669	.init = ip6_route_net_init,
3670	.exit = ip6_route_net_exit,
3671};
3672
3673static int __net_init ipv6_inetpeer_init(struct net *net)
3674{
3675	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3676
3677	if (!bp)
3678		return -ENOMEM;
3679	inet_peer_base_init(bp);
3680	net->ipv6.peers = bp;
3681	return 0;
3682}
3683
3684static void __net_exit ipv6_inetpeer_exit(struct net *net)
3685{
3686	struct inet_peer_base *bp = net->ipv6.peers;
3687
3688	net->ipv6.peers = NULL;
3689	inetpeer_invalidate_tree(bp);
3690	kfree(bp);
3691}
3692
3693static struct pernet_operations ipv6_inetpeer_ops = {
3694	.init	=	ipv6_inetpeer_init,
3695	.exit	=	ipv6_inetpeer_exit,
3696};
3697
3698static struct pernet_operations ip6_route_net_late_ops = {
3699	.init = ip6_route_net_init_late,
3700	.exit = ip6_route_net_exit_late,
3701};
3702
3703static struct notifier_block ip6_route_dev_notifier = {
3704	.notifier_call = ip6_route_dev_notify,
3705	.priority = 0,
3706};
3707
3708int __init ip6_route_init(void)
3709{
3710	int ret;
3711	int cpu;
3712
3713	ret = -ENOMEM;
3714	ip6_dst_ops_template.kmem_cachep =
3715		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3716				  SLAB_HWCACHE_ALIGN, NULL);
3717	if (!ip6_dst_ops_template.kmem_cachep)
3718		goto out;
3719
3720	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3721	if (ret)
3722		goto out_kmem_cache;
3723
3724	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3725	if (ret)
3726		goto out_dst_entries;
3727
3728	ret = register_pernet_subsys(&ip6_route_net_ops);
3729	if (ret)
3730		goto out_register_inetpeer;
3731
3732	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3733
3734	/* Registering of the loopback is done before this portion of code,
3735	 * the loopback reference in rt6_info will not be taken, do it
3736	 * manually for init_net */
3737	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3738	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3739  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3740	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3741	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3742	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3743	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3744  #endif
3745	ret = fib6_init();
3746	if (ret)
3747		goto out_register_subsys;
3748
3749	ret = xfrm6_init();
3750	if (ret)
3751		goto out_fib6_init;
3752
3753	ret = fib6_rules_init();
3754	if (ret)
3755		goto xfrm6_init;
3756
3757	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3758	if (ret)
3759		goto fib6_rules_init;
3760
3761	ret = -ENOBUFS;
3762	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3763	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3764	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3765		goto out_register_late_subsys;
3766
3767	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3768	if (ret)
3769		goto out_register_late_subsys;
3770
3771	for_each_possible_cpu(cpu) {
3772		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3773
3774		INIT_LIST_HEAD(&ul->head);
3775		spin_lock_init(&ul->lock);
3776	}
3777
3778out:
3779	return ret;
3780
3781out_register_late_subsys:
3782	unregister_pernet_subsys(&ip6_route_net_late_ops);
3783fib6_rules_init:
3784	fib6_rules_cleanup();
3785xfrm6_init:
3786	xfrm6_fini();
3787out_fib6_init:
3788	fib6_gc_cleanup();
3789out_register_subsys:
3790	unregister_pernet_subsys(&ip6_route_net_ops);
3791out_register_inetpeer:
3792	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3793out_dst_entries:
3794	dst_entries_destroy(&ip6_dst_blackhole_ops);
3795out_kmem_cache:
3796	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3797	goto out;
3798}
3799
3800void ip6_route_cleanup(void)
3801{
3802	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3803	unregister_pernet_subsys(&ip6_route_net_late_ops);
3804	fib6_rules_cleanup();
3805	xfrm6_fini();
3806	fib6_gc_cleanup();
3807	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3808	unregister_pernet_subsys(&ip6_route_net_ops);
3809	dst_entries_destroy(&ip6_dst_blackhole_ops);
3810	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3811}
v4.10.11
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/dst_metadata.h>
  58#include <net/xfrm.h>
  59#include <net/netevent.h>
  60#include <net/netlink.h>
  61#include <net/nexthop.h>
  62#include <net/lwtunnel.h>
  63#include <net/ip_tunnels.h>
  64#include <net/l3mdev.h>
  65#include <trace/events/fib6.h>
  66
  67#include <linux/uaccess.h>
  68
  69#ifdef CONFIG_SYSCTL
  70#include <linux/sysctl.h>
  71#endif
  72
  73enum rt6_nud_state {
  74	RT6_NUD_FAIL_HARD = -3,
  75	RT6_NUD_FAIL_PROBE = -2,
  76	RT6_NUD_FAIL_DO_RR = -1,
  77	RT6_NUD_SUCCEED = 1
  78};
  79
  80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
  81static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  82static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  83static unsigned int	 ip6_mtu(const struct dst_entry *dst);
  84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  85static void		ip6_dst_destroy(struct dst_entry *);
  86static void		ip6_dst_ifdown(struct dst_entry *,
  87				       struct net_device *dev, int how);
  88static int		 ip6_dst_gc(struct dst_ops *ops);
  89
  90static int		ip6_pkt_discard(struct sk_buff *skb);
  91static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  92static int		ip6_pkt_prohibit(struct sk_buff *skb);
  93static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  94static void		ip6_link_failure(struct sk_buff *skb);
  95static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  96					   struct sk_buff *skb, u32 mtu);
  97static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  98					struct sk_buff *skb);
  99static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
 101
 102#ifdef CONFIG_IPV6_ROUTE_INFO
 103static struct rt6_info *rt6_add_route_info(struct net *net,
 104					   const struct in6_addr *prefix, int prefixlen,
 105					   const struct in6_addr *gwaddr,
 106					   struct net_device *dev,
 107					   unsigned int pref);
 108static struct rt6_info *rt6_get_route_info(struct net *net,
 109					   const struct in6_addr *prefix, int prefixlen,
 110					   const struct in6_addr *gwaddr,
 111					   struct net_device *dev);
 112#endif
 113
 114struct uncached_list {
 115	spinlock_t		lock;
 116	struct list_head	head;
 117};
 118
 119static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 120
 121static void rt6_uncached_list_add(struct rt6_info *rt)
 122{
 123	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 124
 125	rt->dst.flags |= DST_NOCACHE;
 126	rt->rt6i_uncached_list = ul;
 127
 128	spin_lock_bh(&ul->lock);
 129	list_add_tail(&rt->rt6i_uncached, &ul->head);
 130	spin_unlock_bh(&ul->lock);
 131}
 132
 133static void rt6_uncached_list_del(struct rt6_info *rt)
 134{
 135	if (!list_empty(&rt->rt6i_uncached)) {
 136		struct uncached_list *ul = rt->rt6i_uncached_list;
 137
 138		spin_lock_bh(&ul->lock);
 139		list_del(&rt->rt6i_uncached);
 140		spin_unlock_bh(&ul->lock);
 141	}
 142}
 143
 144static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 145{
 146	struct net_device *loopback_dev = net->loopback_dev;
 147	int cpu;
 148
 149	if (dev == loopback_dev)
 150		return;
 151
 152	for_each_possible_cpu(cpu) {
 153		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 154		struct rt6_info *rt;
 155
 156		spin_lock_bh(&ul->lock);
 157		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
 158			struct inet6_dev *rt_idev = rt->rt6i_idev;
 159			struct net_device *rt_dev = rt->dst.dev;
 160
 161			if (rt_idev->dev == dev) {
 162				rt->rt6i_idev = in6_dev_get(loopback_dev);
 163				in6_dev_put(rt_idev);
 164			}
 165
 166			if (rt_dev == dev) {
 167				rt->dst.dev = loopback_dev;
 168				dev_hold(rt->dst.dev);
 169				dev_put(rt_dev);
 170			}
 171		}
 172		spin_unlock_bh(&ul->lock);
 173	}
 174}
 175
 176static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
 177{
 178	return dst_metrics_write_ptr(rt->dst.from);
 179}
 180
 181static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 182{
 183	struct rt6_info *rt = (struct rt6_info *)dst;
 184
 185	if (rt->rt6i_flags & RTF_PCPU)
 186		return rt6_pcpu_cow_metrics(rt);
 187	else if (rt->rt6i_flags & RTF_CACHE)
 188		return NULL;
 189	else
 190		return dst_cow_metrics_generic(dst, old);
 191}
 192
 193static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 194					     struct sk_buff *skb,
 195					     const void *daddr)
 196{
 197	struct in6_addr *p = &rt->rt6i_gateway;
 198
 199	if (!ipv6_addr_any(p))
 200		return (const void *) p;
 201	else if (skb)
 202		return &ipv6_hdr(skb)->daddr;
 203	return daddr;
 204}
 205
 206static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 207					  struct sk_buff *skb,
 208					  const void *daddr)
 209{
 210	struct rt6_info *rt = (struct rt6_info *) dst;
 211	struct neighbour *n;
 212
 213	daddr = choose_neigh_daddr(rt, skb, daddr);
 214	n = __ipv6_neigh_lookup(dst->dev, daddr);
 215	if (n)
 216		return n;
 217	return neigh_create(&nd_tbl, daddr, dst->dev);
 218}
 219
 220static struct dst_ops ip6_dst_ops_template = {
 221	.family			=	AF_INET6,
 222	.gc			=	ip6_dst_gc,
 223	.gc_thresh		=	1024,
 224	.check			=	ip6_dst_check,
 225	.default_advmss		=	ip6_default_advmss,
 226	.mtu			=	ip6_mtu,
 227	.cow_metrics		=	ipv6_cow_metrics,
 228	.destroy		=	ip6_dst_destroy,
 229	.ifdown			=	ip6_dst_ifdown,
 230	.negative_advice	=	ip6_negative_advice,
 231	.link_failure		=	ip6_link_failure,
 232	.update_pmtu		=	ip6_rt_update_pmtu,
 233	.redirect		=	rt6_do_redirect,
 234	.local_out		=	__ip6_local_out,
 235	.neigh_lookup		=	ip6_neigh_lookup,
 236};
 237
 238static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 239{
 240	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 241
 242	return mtu ? : dst->dev->mtu;
 243}
 244
 245static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 246					 struct sk_buff *skb, u32 mtu)
 247{
 248}
 249
 250static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 251				      struct sk_buff *skb)
 252{
 253}
 254
 255static struct dst_ops ip6_dst_blackhole_ops = {
 256	.family			=	AF_INET6,
 257	.destroy		=	ip6_dst_destroy,
 258	.check			=	ip6_dst_check,
 259	.mtu			=	ip6_blackhole_mtu,
 260	.default_advmss		=	ip6_default_advmss,
 261	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 262	.redirect		=	ip6_rt_blackhole_redirect,
 263	.cow_metrics		=	dst_cow_metrics_generic,
 264	.neigh_lookup		=	ip6_neigh_lookup,
 265};
 266
 267static const u32 ip6_template_metrics[RTAX_MAX] = {
 268	[RTAX_HOPLIMIT - 1] = 0,
 269};
 270
 271static const struct rt6_info ip6_null_entry_template = {
 272	.dst = {
 273		.__refcnt	= ATOMIC_INIT(1),
 274		.__use		= 1,
 275		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 276		.error		= -ENETUNREACH,
 277		.input		= ip6_pkt_discard,
 278		.output		= ip6_pkt_discard_out,
 279	},
 280	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 281	.rt6i_protocol  = RTPROT_KERNEL,
 282	.rt6i_metric	= ~(u32) 0,
 283	.rt6i_ref	= ATOMIC_INIT(1),
 284};
 285
 286#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 287
 288static const struct rt6_info ip6_prohibit_entry_template = {
 289	.dst = {
 290		.__refcnt	= ATOMIC_INIT(1),
 291		.__use		= 1,
 292		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 293		.error		= -EACCES,
 294		.input		= ip6_pkt_prohibit,
 295		.output		= ip6_pkt_prohibit_out,
 296	},
 297	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 298	.rt6i_protocol  = RTPROT_KERNEL,
 299	.rt6i_metric	= ~(u32) 0,
 300	.rt6i_ref	= ATOMIC_INIT(1),
 301};
 302
 303static const struct rt6_info ip6_blk_hole_entry_template = {
 304	.dst = {
 305		.__refcnt	= ATOMIC_INIT(1),
 306		.__use		= 1,
 307		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 308		.error		= -EINVAL,
 309		.input		= dst_discard,
 310		.output		= dst_discard_out,
 311	},
 312	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 313	.rt6i_protocol  = RTPROT_KERNEL,
 314	.rt6i_metric	= ~(u32) 0,
 315	.rt6i_ref	= ATOMIC_INIT(1),
 316};
 317
 318#endif
 319
 320static void rt6_info_init(struct rt6_info *rt)
 321{
 322	struct dst_entry *dst = &rt->dst;
 323
 324	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 325	INIT_LIST_HEAD(&rt->rt6i_siblings);
 326	INIT_LIST_HEAD(&rt->rt6i_uncached);
 327}
 328
 329/* allocate dst with ip6_dst_ops */
 330static struct rt6_info *__ip6_dst_alloc(struct net *net,
 331					struct net_device *dev,
 332					int flags)
 333{
 334	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 335					0, DST_OBSOLETE_FORCE_CHK, flags);
 336
 337	if (rt)
 338		rt6_info_init(rt);
 339
 340	return rt;
 341}
 342
 343struct rt6_info *ip6_dst_alloc(struct net *net,
 344			       struct net_device *dev,
 345			       int flags)
 346{
 347	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
 348
 349	if (rt) {
 350		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
 351		if (rt->rt6i_pcpu) {
 352			int cpu;
 353
 354			for_each_possible_cpu(cpu) {
 355				struct rt6_info **p;
 356
 357				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
 358				/* no one shares rt */
 359				*p =  NULL;
 360			}
 361		} else {
 362			dst_destroy((struct dst_entry *)rt);
 363			return NULL;
 364		}
 365	}
 366
 367	return rt;
 368}
 369EXPORT_SYMBOL(ip6_dst_alloc);
 370
 371static void ip6_dst_destroy(struct dst_entry *dst)
 372{
 373	struct rt6_info *rt = (struct rt6_info *)dst;
 374	struct dst_entry *from = dst->from;
 375	struct inet6_dev *idev;
 376
 377	dst_destroy_metrics_generic(dst);
 378	free_percpu(rt->rt6i_pcpu);
 379	rt6_uncached_list_del(rt);
 380
 381	idev = rt->rt6i_idev;
 382	if (idev) {
 383		rt->rt6i_idev = NULL;
 384		in6_dev_put(idev);
 385	}
 386
 387	dst->from = NULL;
 388	dst_release(from);
 389}
 390
 391static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 392			   int how)
 393{
 394	struct rt6_info *rt = (struct rt6_info *)dst;
 395	struct inet6_dev *idev = rt->rt6i_idev;
 396	struct net_device *loopback_dev =
 397		dev_net(dev)->loopback_dev;
 398
 399	if (dev != loopback_dev) {
 400		if (idev && idev->dev == dev) {
 401			struct inet6_dev *loopback_idev =
 402				in6_dev_get(loopback_dev);
 403			if (loopback_idev) {
 404				rt->rt6i_idev = loopback_idev;
 405				in6_dev_put(idev);
 406			}
 407		}
 408	}
 409}
 410
 411static bool __rt6_check_expired(const struct rt6_info *rt)
 412{
 413	if (rt->rt6i_flags & RTF_EXPIRES)
 414		return time_after(jiffies, rt->dst.expires);
 415	else
 416		return false;
 417}
 418
 419static bool rt6_check_expired(const struct rt6_info *rt)
 420{
 421	if (rt->rt6i_flags & RTF_EXPIRES) {
 422		if (time_after(jiffies, rt->dst.expires))
 423			return true;
 424	} else if (rt->dst.from) {
 425		return rt6_check_expired((struct rt6_info *) rt->dst.from);
 426	}
 427	return false;
 428}
 429
 430/* Multipath route selection:
 431 *   Hash based function using packet header and flowlabel.
 432 * Adapted from fib_info_hashfn()
 433 */
 434static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 435			       const struct flowi6 *fl6)
 436{
 437	return get_hash_from_flowi6(fl6) % candidate_count;
 438}
 439
 440static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 441					     struct flowi6 *fl6, int oif,
 442					     int strict)
 443{
 444	struct rt6_info *sibling, *next_sibling;
 445	int route_choosen;
 446
 447	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
 448	/* Don't change the route, if route_choosen == 0
 449	 * (siblings does not include ourself)
 450	 */
 451	if (route_choosen)
 452		list_for_each_entry_safe(sibling, next_sibling,
 453				&match->rt6i_siblings, rt6i_siblings) {
 454			route_choosen--;
 455			if (route_choosen == 0) {
 456				if (rt6_score_route(sibling, oif, strict) < 0)
 457					break;
 458				match = sibling;
 459				break;
 460			}
 461		}
 462	return match;
 463}
 464
 465/*
 466 *	Route lookup. Any table->tb6_lock is implied.
 467 */
 468
 469static inline struct rt6_info *rt6_device_match(struct net *net,
 470						    struct rt6_info *rt,
 471						    const struct in6_addr *saddr,
 472						    int oif,
 473						    int flags)
 474{
 475	struct rt6_info *local = NULL;
 476	struct rt6_info *sprt;
 477
 478	if (!oif && ipv6_addr_any(saddr))
 479		goto out;
 480
 481	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 482		struct net_device *dev = sprt->dst.dev;
 483
 484		if (oif) {
 485			if (dev->ifindex == oif)
 486				return sprt;
 487			if (dev->flags & IFF_LOOPBACK) {
 488				if (!sprt->rt6i_idev ||
 489				    sprt->rt6i_idev->dev->ifindex != oif) {
 490					if (flags & RT6_LOOKUP_F_IFACE)
 491						continue;
 492					if (local &&
 493					    local->rt6i_idev->dev->ifindex == oif)
 494						continue;
 495				}
 496				local = sprt;
 497			}
 498		} else {
 499			if (ipv6_chk_addr(net, saddr, dev,
 500					  flags & RT6_LOOKUP_F_IFACE))
 501				return sprt;
 502		}
 503	}
 504
 505	if (oif) {
 506		if (local)
 507			return local;
 508
 509		if (flags & RT6_LOOKUP_F_IFACE)
 510			return net->ipv6.ip6_null_entry;
 511	}
 512out:
 513	return rt;
 514}
 515
 516#ifdef CONFIG_IPV6_ROUTER_PREF
 517struct __rt6_probe_work {
 518	struct work_struct work;
 519	struct in6_addr target;
 520	struct net_device *dev;
 521};
 522
 523static void rt6_probe_deferred(struct work_struct *w)
 524{
 525	struct in6_addr mcaddr;
 526	struct __rt6_probe_work *work =
 527		container_of(w, struct __rt6_probe_work, work);
 528
 529	addrconf_addr_solict_mult(&work->target, &mcaddr);
 530	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
 531	dev_put(work->dev);
 532	kfree(work);
 533}
 534
 535static void rt6_probe(struct rt6_info *rt)
 536{
 537	struct __rt6_probe_work *work;
 538	struct neighbour *neigh;
 539	/*
 540	 * Okay, this does not seem to be appropriate
 541	 * for now, however, we need to check if it
 542	 * is really so; aka Router Reachability Probing.
 543	 *
 544	 * Router Reachability Probe MUST be rate-limited
 545	 * to no more than one per minute.
 546	 */
 547	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 548		return;
 549	rcu_read_lock_bh();
 550	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 551	if (neigh) {
 552		if (neigh->nud_state & NUD_VALID)
 553			goto out;
 554
 555		work = NULL;
 556		write_lock(&neigh->lock);
 557		if (!(neigh->nud_state & NUD_VALID) &&
 558		    time_after(jiffies,
 559			       neigh->updated +
 560			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
 561			work = kmalloc(sizeof(*work), GFP_ATOMIC);
 562			if (work)
 563				__neigh_set_probe_once(neigh);
 564		}
 565		write_unlock(&neigh->lock);
 566	} else {
 567		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 568	}
 569
 570	if (work) {
 571		INIT_WORK(&work->work, rt6_probe_deferred);
 572		work->target = rt->rt6i_gateway;
 573		dev_hold(rt->dst.dev);
 574		work->dev = rt->dst.dev;
 575		schedule_work(&work->work);
 576	}
 577
 578out:
 579	rcu_read_unlock_bh();
 580}
 581#else
 582static inline void rt6_probe(struct rt6_info *rt)
 583{
 584}
 585#endif
 586
 587/*
 588 * Default Router Selection (RFC 2461 6.3.6)
 589 */
 590static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 591{
 592	struct net_device *dev = rt->dst.dev;
 593	if (!oif || dev->ifindex == oif)
 594		return 2;
 595	if ((dev->flags & IFF_LOOPBACK) &&
 596	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 597		return 1;
 598	return 0;
 599}
 600
 601static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 602{
 603	struct neighbour *neigh;
 604	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 605
 606	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 607	    !(rt->rt6i_flags & RTF_GATEWAY))
 608		return RT6_NUD_SUCCEED;
 609
 610	rcu_read_lock_bh();
 611	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 612	if (neigh) {
 613		read_lock(&neigh->lock);
 614		if (neigh->nud_state & NUD_VALID)
 615			ret = RT6_NUD_SUCCEED;
 616#ifdef CONFIG_IPV6_ROUTER_PREF
 617		else if (!(neigh->nud_state & NUD_FAILED))
 618			ret = RT6_NUD_SUCCEED;
 619		else
 620			ret = RT6_NUD_FAIL_PROBE;
 621#endif
 622		read_unlock(&neigh->lock);
 623	} else {
 624		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 625		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 626	}
 627	rcu_read_unlock_bh();
 628
 629	return ret;
 630}
 631
 632static int rt6_score_route(struct rt6_info *rt, int oif,
 633			   int strict)
 634{
 635	int m;
 636
 637	m = rt6_check_dev(rt, oif);
 638	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 639		return RT6_NUD_FAIL_HARD;
 640#ifdef CONFIG_IPV6_ROUTER_PREF
 641	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 642#endif
 643	if (strict & RT6_LOOKUP_F_REACHABLE) {
 644		int n = rt6_check_neigh(rt);
 645		if (n < 0)
 646			return n;
 647	}
 648	return m;
 649}
 650
 651static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 652				   int *mpri, struct rt6_info *match,
 653				   bool *do_rr)
 654{
 655	int m;
 656	bool match_do_rr = false;
 657	struct inet6_dev *idev = rt->rt6i_idev;
 658	struct net_device *dev = rt->dst.dev;
 659
 660	if (dev && !netif_carrier_ok(dev) &&
 661	    idev->cnf.ignore_routes_with_linkdown &&
 662	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 663		goto out;
 664
 665	if (rt6_check_expired(rt))
 666		goto out;
 667
 668	m = rt6_score_route(rt, oif, strict);
 669	if (m == RT6_NUD_FAIL_DO_RR) {
 670		match_do_rr = true;
 671		m = 0; /* lowest valid score */
 672	} else if (m == RT6_NUD_FAIL_HARD) {
 673		goto out;
 674	}
 675
 676	if (strict & RT6_LOOKUP_F_REACHABLE)
 677		rt6_probe(rt);
 678
 679	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
 680	if (m > *mpri) {
 681		*do_rr = match_do_rr;
 682		*mpri = m;
 683		match = rt;
 684	}
 685out:
 686	return match;
 687}
 688
 689static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 690				     struct rt6_info *rr_head,
 691				     u32 metric, int oif, int strict,
 692				     bool *do_rr)
 693{
 694	struct rt6_info *rt, *match, *cont;
 695	int mpri = -1;
 696
 697	match = NULL;
 698	cont = NULL;
 699	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
 700		if (rt->rt6i_metric != metric) {
 701			cont = rt;
 702			break;
 703		}
 704
 705		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 706	}
 707
 708	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
 709		if (rt->rt6i_metric != metric) {
 710			cont = rt;
 711			break;
 712		}
 713
 714		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 715	}
 716
 717	if (match || !cont)
 718		return match;
 719
 720	for (rt = cont; rt; rt = rt->dst.rt6_next)
 721		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 722
 723	return match;
 724}
 725
 726static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 727{
 728	struct rt6_info *match, *rt0;
 729	struct net *net;
 730	bool do_rr = false;
 731
 732	rt0 = fn->rr_ptr;
 733	if (!rt0)
 734		fn->rr_ptr = rt0 = fn->leaf;
 735
 736	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
 737			     &do_rr);
 738
 739	if (do_rr) {
 740		struct rt6_info *next = rt0->dst.rt6_next;
 741
 742		/* no entries matched; do round-robin */
 743		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 744			next = fn->leaf;
 745
 746		if (next != rt0)
 747			fn->rr_ptr = next;
 748	}
 749
 750	net = dev_net(rt0->dst.dev);
 751	return match ? match : net->ipv6.ip6_null_entry;
 752}
 753
 754static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
 755{
 756	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
 757}
 758
 759#ifdef CONFIG_IPV6_ROUTE_INFO
 760int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 761		  const struct in6_addr *gwaddr)
 762{
 763	struct net *net = dev_net(dev);
 764	struct route_info *rinfo = (struct route_info *) opt;
 765	struct in6_addr prefix_buf, *prefix;
 766	unsigned int pref;
 767	unsigned long lifetime;
 768	struct rt6_info *rt;
 769
 770	if (len < sizeof(struct route_info)) {
 771		return -EINVAL;
 772	}
 773
 774	/* Sanity check for prefix_len and length */
 775	if (rinfo->length > 3) {
 776		return -EINVAL;
 777	} else if (rinfo->prefix_len > 128) {
 778		return -EINVAL;
 779	} else if (rinfo->prefix_len > 64) {
 780		if (rinfo->length < 2) {
 781			return -EINVAL;
 782		}
 783	} else if (rinfo->prefix_len > 0) {
 784		if (rinfo->length < 1) {
 785			return -EINVAL;
 786		}
 787	}
 788
 789	pref = rinfo->route_pref;
 790	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 791		return -EINVAL;
 792
 793	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 794
 795	if (rinfo->length == 3)
 796		prefix = (struct in6_addr *)rinfo->prefix;
 797	else {
 798		/* this function is safe */
 799		ipv6_addr_prefix(&prefix_buf,
 800				 (struct in6_addr *)rinfo->prefix,
 801				 rinfo->prefix_len);
 802		prefix = &prefix_buf;
 803	}
 804
 805	if (rinfo->prefix_len == 0)
 806		rt = rt6_get_dflt_router(gwaddr, dev);
 807	else
 808		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 809					gwaddr, dev);
 810
 811	if (rt && !lifetime) {
 812		ip6_del_rt(rt);
 813		rt = NULL;
 814	}
 815
 816	if (!rt && lifetime)
 817		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 818					dev, pref);
 819	else if (rt)
 820		rt->rt6i_flags = RTF_ROUTEINFO |
 821				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 822
 823	if (rt) {
 824		if (!addrconf_finite_timeout(lifetime))
 825			rt6_clean_expires(rt);
 826		else
 827			rt6_set_expires(rt, jiffies + HZ * lifetime);
 828
 829		ip6_rt_put(rt);
 830	}
 831	return 0;
 832}
 833#endif
 834
 835static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
 836					struct in6_addr *saddr)
 837{
 838	struct fib6_node *pn;
 839	while (1) {
 840		if (fn->fn_flags & RTN_TL_ROOT)
 841			return NULL;
 842		pn = fn->parent;
 843		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
 844			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
 845		else
 846			fn = pn;
 847		if (fn->fn_flags & RTN_RTINFO)
 848			return fn;
 849	}
 850}
 851
 852static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 853					     struct fib6_table *table,
 854					     struct flowi6 *fl6, int flags)
 855{
 856	struct fib6_node *fn;
 857	struct rt6_info *rt;
 858
 859	read_lock_bh(&table->tb6_lock);
 860	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 861restart:
 862	rt = fn->leaf;
 863	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 864	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
 865		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
 866	if (rt == net->ipv6.ip6_null_entry) {
 867		fn = fib6_backtrack(fn, &fl6->saddr);
 868		if (fn)
 869			goto restart;
 870	}
 871	dst_use(&rt->dst, jiffies);
 872	read_unlock_bh(&table->tb6_lock);
 873
 874	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
 875
 876	return rt;
 877
 878}
 879
 880struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 881				    int flags)
 882{
 883	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 884}
 885EXPORT_SYMBOL_GPL(ip6_route_lookup);
 886
 887struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 888			    const struct in6_addr *saddr, int oif, int strict)
 889{
 890	struct flowi6 fl6 = {
 891		.flowi6_oif = oif,
 892		.daddr = *daddr,
 893	};
 894	struct dst_entry *dst;
 895	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 896
 897	if (saddr) {
 898		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 899		flags |= RT6_LOOKUP_F_HAS_SADDR;
 900	}
 901
 902	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 903	if (dst->error == 0)
 904		return (struct rt6_info *) dst;
 905
 906	dst_release(dst);
 907
 908	return NULL;
 909}
 910EXPORT_SYMBOL(rt6_lookup);
 911
 912/* ip6_ins_rt is called with FREE table->tb6_lock.
 913   It takes new route entry, the addition fails by any reason the
 914   route is freed. In any case, if caller does not hold it, it may
 915   be destroyed.
 916 */
 917
 918static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 919			struct mx6_config *mxc)
 920{
 921	int err;
 922	struct fib6_table *table;
 923
 924	table = rt->rt6i_table;
 925	write_lock_bh(&table->tb6_lock);
 926	err = fib6_add(&table->tb6_root, rt, info, mxc);
 927	write_unlock_bh(&table->tb6_lock);
 928
 929	return err;
 930}
 931
 932int ip6_ins_rt(struct rt6_info *rt)
 933{
 934	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
 935	struct mx6_config mxc = { .mx = NULL, };
 936
 937	return __ip6_ins_rt(rt, &info, &mxc);
 938}
 939
 940static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 941					   const struct in6_addr *daddr,
 942					   const struct in6_addr *saddr)
 943{
 944	struct rt6_info *rt;
 945
 946	/*
 947	 *	Clone the route.
 948	 */
 949
 950	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
 951		ort = (struct rt6_info *)ort->dst.from;
 952
 953	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
 954
 955	if (!rt)
 956		return NULL;
 957
 958	ip6_rt_copy_init(rt, ort);
 959	rt->rt6i_flags |= RTF_CACHE;
 960	rt->rt6i_metric = 0;
 961	rt->dst.flags |= DST_HOST;
 962	rt->rt6i_dst.addr = *daddr;
 963	rt->rt6i_dst.plen = 128;
 964
 965	if (!rt6_is_gw_or_nonexthop(ort)) {
 966		if (ort->rt6i_dst.plen != 128 &&
 967		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 968			rt->rt6i_flags |= RTF_ANYCAST;
 969#ifdef CONFIG_IPV6_SUBTREES
 970		if (rt->rt6i_src.plen && saddr) {
 971			rt->rt6i_src.addr = *saddr;
 972			rt->rt6i_src.plen = 128;
 973		}
 974#endif
 975	}
 976
 977	return rt;
 978}
 979
 980static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 981{
 982	struct rt6_info *pcpu_rt;
 983
 984	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
 985				  rt->dst.dev, rt->dst.flags);
 986
 987	if (!pcpu_rt)
 988		return NULL;
 989	ip6_rt_copy_init(pcpu_rt, rt);
 990	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
 991	pcpu_rt->rt6i_flags |= RTF_PCPU;
 992	return pcpu_rt;
 993}
 994
 995/* It should be called with read_lock_bh(&tb6_lock) acquired */
 996static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 997{
 998	struct rt6_info *pcpu_rt, **p;
 999
1000	p = this_cpu_ptr(rt->rt6i_pcpu);
1001	pcpu_rt = *p;
1002
1003	if (pcpu_rt) {
1004		dst_hold(&pcpu_rt->dst);
1005		rt6_dst_from_metrics_check(pcpu_rt);
1006	}
1007	return pcpu_rt;
1008}
1009
1010static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1011{
1012	struct fib6_table *table = rt->rt6i_table;
1013	struct rt6_info *pcpu_rt, *prev, **p;
1014
1015	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1016	if (!pcpu_rt) {
1017		struct net *net = dev_net(rt->dst.dev);
1018
1019		dst_hold(&net->ipv6.ip6_null_entry->dst);
1020		return net->ipv6.ip6_null_entry;
1021	}
1022
1023	read_lock_bh(&table->tb6_lock);
1024	if (rt->rt6i_pcpu) {
1025		p = this_cpu_ptr(rt->rt6i_pcpu);
1026		prev = cmpxchg(p, NULL, pcpu_rt);
1027		if (prev) {
1028			/* If someone did it before us, return prev instead */
1029			dst_destroy(&pcpu_rt->dst);
1030			pcpu_rt = prev;
1031		}
1032	} else {
1033		/* rt has been removed from the fib6 tree
1034		 * before we have a chance to acquire the read_lock.
1035		 * In this case, don't brother to create a pcpu rt
1036		 * since rt is going away anyway.  The next
1037		 * dst_check() will trigger a re-lookup.
1038		 */
1039		dst_destroy(&pcpu_rt->dst);
1040		pcpu_rt = rt;
1041	}
1042	dst_hold(&pcpu_rt->dst);
1043	rt6_dst_from_metrics_check(pcpu_rt);
1044	read_unlock_bh(&table->tb6_lock);
1045	return pcpu_rt;
1046}
1047
1048struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1049			       int oif, struct flowi6 *fl6, int flags)
1050{
1051	struct fib6_node *fn, *saved_fn;
1052	struct rt6_info *rt;
1053	int strict = 0;
1054
1055	strict |= flags & RT6_LOOKUP_F_IFACE;
1056	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1057	if (net->ipv6.devconf_all->forwarding == 0)
1058		strict |= RT6_LOOKUP_F_REACHABLE;
1059
1060	read_lock_bh(&table->tb6_lock);
1061
1062	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1063	saved_fn = fn;
1064
1065	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1066		oif = 0;
1067
1068redo_rt6_select:
1069	rt = rt6_select(fn, oif, strict);
1070	if (rt->rt6i_nsiblings)
1071		rt = rt6_multipath_select(rt, fl6, oif, strict);
1072	if (rt == net->ipv6.ip6_null_entry) {
1073		fn = fib6_backtrack(fn, &fl6->saddr);
1074		if (fn)
1075			goto redo_rt6_select;
1076		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1077			/* also consider unreachable route */
1078			strict &= ~RT6_LOOKUP_F_REACHABLE;
1079			fn = saved_fn;
1080			goto redo_rt6_select;
1081		}
1082	}
1083
1084
1085	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1086		dst_use(&rt->dst, jiffies);
1087		read_unlock_bh(&table->tb6_lock);
1088
1089		rt6_dst_from_metrics_check(rt);
1090
1091		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1092		return rt;
1093	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1094			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1095		/* Create a RTF_CACHE clone which will not be
1096		 * owned by the fib6 tree.  It is for the special case where
1097		 * the daddr in the skb during the neighbor look-up is different
1098		 * from the fl6->daddr used to look-up route here.
1099		 */
1100
1101		struct rt6_info *uncached_rt;
1102
1103		dst_use(&rt->dst, jiffies);
1104		read_unlock_bh(&table->tb6_lock);
1105
1106		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1107		dst_release(&rt->dst);
1108
1109		if (uncached_rt)
1110			rt6_uncached_list_add(uncached_rt);
1111		else
1112			uncached_rt = net->ipv6.ip6_null_entry;
1113
1114		dst_hold(&uncached_rt->dst);
1115
1116		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1117		return uncached_rt;
1118
1119	} else {
1120		/* Get a percpu copy */
1121
1122		struct rt6_info *pcpu_rt;
1123
1124		rt->dst.lastuse = jiffies;
1125		rt->dst.__use++;
1126		pcpu_rt = rt6_get_pcpu_route(rt);
1127
1128		if (pcpu_rt) {
1129			read_unlock_bh(&table->tb6_lock);
1130		} else {
1131			/* We have to do the read_unlock first
1132			 * because rt6_make_pcpu_route() may trigger
1133			 * ip6_dst_gc() which will take the write_lock.
1134			 */
1135			dst_hold(&rt->dst);
1136			read_unlock_bh(&table->tb6_lock);
1137			pcpu_rt = rt6_make_pcpu_route(rt);
1138			dst_release(&rt->dst);
1139		}
1140
1141		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1142		return pcpu_rt;
1143
1144	}
1145}
1146EXPORT_SYMBOL_GPL(ip6_pol_route);
1147
1148static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1149					    struct flowi6 *fl6, int flags)
1150{
1151	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1152}
1153
1154struct dst_entry *ip6_route_input_lookup(struct net *net,
1155					 struct net_device *dev,
1156					 struct flowi6 *fl6, int flags)
1157{
1158	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1159		flags |= RT6_LOOKUP_F_IFACE;
1160
1161	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1162}
1163EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1164
1165void ip6_route_input(struct sk_buff *skb)
1166{
1167	const struct ipv6hdr *iph = ipv6_hdr(skb);
1168	struct net *net = dev_net(skb->dev);
1169	int flags = RT6_LOOKUP_F_HAS_SADDR;
1170	struct ip_tunnel_info *tun_info;
1171	struct flowi6 fl6 = {
1172		.flowi6_iif = skb->dev->ifindex,
1173		.daddr = iph->daddr,
1174		.saddr = iph->saddr,
1175		.flowlabel = ip6_flowinfo(iph),
1176		.flowi6_mark = skb->mark,
1177		.flowi6_proto = iph->nexthdr,
1178	};
1179
1180	tun_info = skb_tunnel_info(skb);
1181	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1182		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1183	skb_dst_drop(skb);
1184	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1185}
1186
1187static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1188					     struct flowi6 *fl6, int flags)
1189{
1190	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1191}
1192
1193struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1194					 struct flowi6 *fl6, int flags)
1195{
 
1196	bool any_src;
1197
1198	if (rt6_need_strict(&fl6->daddr)) {
1199		struct dst_entry *dst;
1200
1201		dst = l3mdev_link_scope_lookup(net, fl6);
1202		if (dst)
1203			return dst;
1204	}
1205
1206	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1207
1208	any_src = ipv6_addr_any(&fl6->saddr);
1209	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1210	    (fl6->flowi6_oif && any_src))
1211		flags |= RT6_LOOKUP_F_IFACE;
1212
1213	if (!any_src)
1214		flags |= RT6_LOOKUP_F_HAS_SADDR;
1215	else if (sk)
1216		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1217
1218	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1219}
1220EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1221
1222struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1223{
1224	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1225	struct dst_entry *new = NULL;
1226
1227	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1228	if (rt) {
1229		rt6_info_init(rt);
1230
1231		new = &rt->dst;
1232		new->__use = 1;
1233		new->input = dst_discard;
1234		new->output = dst_discard_out;
1235
1236		dst_copy_metrics(new, &ort->dst);
1237		rt->rt6i_idev = ort->rt6i_idev;
1238		if (rt->rt6i_idev)
1239			in6_dev_hold(rt->rt6i_idev);
1240
1241		rt->rt6i_gateway = ort->rt6i_gateway;
1242		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1243		rt->rt6i_metric = 0;
1244
1245		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1246#ifdef CONFIG_IPV6_SUBTREES
1247		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1248#endif
1249
1250		dst_free(new);
1251	}
1252
1253	dst_release(dst_orig);
1254	return new ? new : ERR_PTR(-ENOMEM);
1255}
1256
1257/*
1258 *	Destination cache support functions
1259 */
1260
1261static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1262{
1263	if (rt->dst.from &&
1264	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1265		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1266}
1267
1268static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1269{
1270	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1271		return NULL;
1272
1273	if (rt6_check_expired(rt))
1274		return NULL;
1275
1276	return &rt->dst;
1277}
1278
1279static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1280{
1281	if (!__rt6_check_expired(rt) &&
1282	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1283	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1284		return &rt->dst;
1285	else
1286		return NULL;
1287}
1288
1289static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1290{
1291	struct rt6_info *rt;
1292
1293	rt = (struct rt6_info *) dst;
1294
1295	/* All IPV6 dsts are created with ->obsolete set to the value
1296	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1297	 * into this function always.
1298	 */
1299
1300	rt6_dst_from_metrics_check(rt);
1301
1302	if (rt->rt6i_flags & RTF_PCPU ||
1303	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1304		return rt6_dst_from_check(rt, cookie);
1305	else
1306		return rt6_check(rt, cookie);
1307}
1308
1309static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1310{
1311	struct rt6_info *rt = (struct rt6_info *) dst;
1312
1313	if (rt) {
1314		if (rt->rt6i_flags & RTF_CACHE) {
1315			if (rt6_check_expired(rt)) {
1316				ip6_del_rt(rt);
1317				dst = NULL;
1318			}
1319		} else {
1320			dst_release(dst);
1321			dst = NULL;
1322		}
1323	}
1324	return dst;
1325}
1326
1327static void ip6_link_failure(struct sk_buff *skb)
1328{
1329	struct rt6_info *rt;
1330
1331	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1332
1333	rt = (struct rt6_info *) skb_dst(skb);
1334	if (rt) {
1335		if (rt->rt6i_flags & RTF_CACHE) {
1336			dst_hold(&rt->dst);
1337			ip6_del_rt(rt);
1338		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1339			rt->rt6i_node->fn_sernum = -1;
1340		}
1341	}
1342}
1343
1344static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1345{
1346	struct net *net = dev_net(rt->dst.dev);
1347
1348	rt->rt6i_flags |= RTF_MODIFIED;
1349	rt->rt6i_pmtu = mtu;
1350	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1351}
1352
1353static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1354{
1355	return !(rt->rt6i_flags & RTF_CACHE) &&
1356		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1357}
1358
1359static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1360				 const struct ipv6hdr *iph, u32 mtu)
1361{
1362	struct rt6_info *rt6 = (struct rt6_info *)dst;
1363
1364	if (rt6->rt6i_flags & RTF_LOCAL)
1365		return;
1366
1367	if (dst_metric_locked(dst, RTAX_MTU))
1368		return;
1369
1370	dst_confirm(dst);
1371	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1372	if (mtu >= dst_mtu(dst))
1373		return;
1374
1375	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1376		rt6_do_update_pmtu(rt6, mtu);
1377	} else {
1378		const struct in6_addr *daddr, *saddr;
1379		struct rt6_info *nrt6;
1380
1381		if (iph) {
1382			daddr = &iph->daddr;
1383			saddr = &iph->saddr;
1384		} else if (sk) {
1385			daddr = &sk->sk_v6_daddr;
1386			saddr = &inet6_sk(sk)->saddr;
1387		} else {
1388			return;
1389		}
1390		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1391		if (nrt6) {
1392			rt6_do_update_pmtu(nrt6, mtu);
1393
1394			/* ip6_ins_rt(nrt6) will bump the
1395			 * rt6->rt6i_node->fn_sernum
1396			 * which will fail the next rt6_check() and
1397			 * invalidate the sk->sk_dst_cache.
1398			 */
1399			ip6_ins_rt(nrt6);
1400		}
1401	}
1402}
1403
1404static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1405			       struct sk_buff *skb, u32 mtu)
1406{
1407	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1408}
1409
1410void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1411		     int oif, u32 mark, kuid_t uid)
1412{
1413	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1414	struct dst_entry *dst;
1415	struct flowi6 fl6;
1416
1417	memset(&fl6, 0, sizeof(fl6));
1418	fl6.flowi6_oif = oif;
1419	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1420	fl6.daddr = iph->daddr;
1421	fl6.saddr = iph->saddr;
1422	fl6.flowlabel = ip6_flowinfo(iph);
1423	fl6.flowi6_uid = uid;
1424
1425	dst = ip6_route_output(net, NULL, &fl6);
1426	if (!dst->error)
1427		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1428	dst_release(dst);
1429}
1430EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1431
1432void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1433{
1434	struct dst_entry *dst;
1435
1436	ip6_update_pmtu(skb, sock_net(sk), mtu,
1437			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1438
1439	dst = __sk_dst_get(sk);
1440	if (!dst || !dst->obsolete ||
1441	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1442		return;
1443
1444	bh_lock_sock(sk);
1445	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1446		ip6_datagram_dst_update(sk, false);
1447	bh_unlock_sock(sk);
1448}
1449EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1450
1451/* Handle redirects */
1452struct ip6rd_flowi {
1453	struct flowi6 fl6;
1454	struct in6_addr gateway;
1455};
1456
1457static struct rt6_info *__ip6_route_redirect(struct net *net,
1458					     struct fib6_table *table,
1459					     struct flowi6 *fl6,
1460					     int flags)
1461{
1462	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1463	struct rt6_info *rt;
1464	struct fib6_node *fn;
1465
1466	/* Get the "current" route for this destination and
1467	 * check if the redirect has come from appropriate router.
1468	 *
1469	 * RFC 4861 specifies that redirects should only be
1470	 * accepted if they come from the nexthop to the target.
1471	 * Due to the way the routes are chosen, this notion
1472	 * is a bit fuzzy and one might need to check all possible
1473	 * routes.
1474	 */
1475
1476	read_lock_bh(&table->tb6_lock);
1477	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1478restart:
1479	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1480		if (rt6_check_expired(rt))
1481			continue;
1482		if (rt->dst.error)
1483			break;
1484		if (!(rt->rt6i_flags & RTF_GATEWAY))
1485			continue;
1486		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1487			continue;
1488		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1489			continue;
1490		break;
1491	}
1492
1493	if (!rt)
1494		rt = net->ipv6.ip6_null_entry;
1495	else if (rt->dst.error) {
1496		rt = net->ipv6.ip6_null_entry;
1497		goto out;
1498	}
1499
1500	if (rt == net->ipv6.ip6_null_entry) {
1501		fn = fib6_backtrack(fn, &fl6->saddr);
1502		if (fn)
1503			goto restart;
1504	}
1505
1506out:
1507	dst_hold(&rt->dst);
1508
1509	read_unlock_bh(&table->tb6_lock);
1510
1511	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1512	return rt;
1513};
1514
1515static struct dst_entry *ip6_route_redirect(struct net *net,
1516					const struct flowi6 *fl6,
1517					const struct in6_addr *gateway)
1518{
1519	int flags = RT6_LOOKUP_F_HAS_SADDR;
1520	struct ip6rd_flowi rdfl;
1521
1522	rdfl.fl6 = *fl6;
1523	rdfl.gateway = *gateway;
1524
1525	return fib6_rule_lookup(net, &rdfl.fl6,
1526				flags, __ip6_route_redirect);
1527}
1528
1529void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1530		  kuid_t uid)
1531{
1532	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1533	struct dst_entry *dst;
1534	struct flowi6 fl6;
1535
1536	memset(&fl6, 0, sizeof(fl6));
1537	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1538	fl6.flowi6_oif = oif;
1539	fl6.flowi6_mark = mark;
1540	fl6.daddr = iph->daddr;
1541	fl6.saddr = iph->saddr;
1542	fl6.flowlabel = ip6_flowinfo(iph);
1543	fl6.flowi6_uid = uid;
1544
1545	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1546	rt6_do_redirect(dst, NULL, skb);
1547	dst_release(dst);
1548}
1549EXPORT_SYMBOL_GPL(ip6_redirect);
1550
1551void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1552			    u32 mark)
1553{
1554	const struct ipv6hdr *iph = ipv6_hdr(skb);
1555	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1556	struct dst_entry *dst;
1557	struct flowi6 fl6;
1558
1559	memset(&fl6, 0, sizeof(fl6));
1560	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1561	fl6.flowi6_oif = oif;
1562	fl6.flowi6_mark = mark;
1563	fl6.daddr = msg->dest;
1564	fl6.saddr = iph->daddr;
1565	fl6.flowi6_uid = sock_net_uid(net, NULL);
1566
1567	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1568	rt6_do_redirect(dst, NULL, skb);
1569	dst_release(dst);
1570}
1571
1572void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1573{
1574	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1575		     sk->sk_uid);
1576}
1577EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1578
1579static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1580{
1581	struct net_device *dev = dst->dev;
1582	unsigned int mtu = dst_mtu(dst);
1583	struct net *net = dev_net(dev);
1584
1585	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1586
1587	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1588		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1589
1590	/*
1591	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1592	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1593	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1594	 * rely only on pmtu discovery"
1595	 */
1596	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1597		mtu = IPV6_MAXPLEN;
1598	return mtu;
1599}
1600
1601static unsigned int ip6_mtu(const struct dst_entry *dst)
1602{
1603	const struct rt6_info *rt = (const struct rt6_info *)dst;
1604	unsigned int mtu = rt->rt6i_pmtu;
1605	struct inet6_dev *idev;
1606
1607	if (mtu)
1608		goto out;
1609
1610	mtu = dst_metric_raw(dst, RTAX_MTU);
1611	if (mtu)
1612		goto out;
1613
1614	mtu = IPV6_MIN_MTU;
1615
1616	rcu_read_lock();
1617	idev = __in6_dev_get(dst->dev);
1618	if (idev)
1619		mtu = idev->cnf.mtu6;
1620	rcu_read_unlock();
1621
1622out:
1623	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1624
1625	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1626}
1627
1628static struct dst_entry *icmp6_dst_gc_list;
1629static DEFINE_SPINLOCK(icmp6_dst_lock);
1630
1631struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1632				  struct flowi6 *fl6)
1633{
1634	struct dst_entry *dst;
1635	struct rt6_info *rt;
1636	struct inet6_dev *idev = in6_dev_get(dev);
1637	struct net *net = dev_net(dev);
1638
1639	if (unlikely(!idev))
1640		return ERR_PTR(-ENODEV);
1641
1642	rt = ip6_dst_alloc(net, dev, 0);
1643	if (unlikely(!rt)) {
1644		in6_dev_put(idev);
1645		dst = ERR_PTR(-ENOMEM);
1646		goto out;
1647	}
1648
1649	rt->dst.flags |= DST_HOST;
1650	rt->dst.output  = ip6_output;
1651	atomic_set(&rt->dst.__refcnt, 1);
1652	rt->rt6i_gateway  = fl6->daddr;
1653	rt->rt6i_dst.addr = fl6->daddr;
1654	rt->rt6i_dst.plen = 128;
1655	rt->rt6i_idev     = idev;
1656	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1657
1658	spin_lock_bh(&icmp6_dst_lock);
1659	rt->dst.next = icmp6_dst_gc_list;
1660	icmp6_dst_gc_list = &rt->dst;
1661	spin_unlock_bh(&icmp6_dst_lock);
1662
1663	fib6_force_start_gc(net);
1664
1665	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1666
1667out:
1668	return dst;
1669}
1670
1671int icmp6_dst_gc(void)
1672{
1673	struct dst_entry *dst, **pprev;
1674	int more = 0;
1675
1676	spin_lock_bh(&icmp6_dst_lock);
1677	pprev = &icmp6_dst_gc_list;
1678
1679	while ((dst = *pprev) != NULL) {
1680		if (!atomic_read(&dst->__refcnt)) {
1681			*pprev = dst->next;
1682			dst_free(dst);
1683		} else {
1684			pprev = &dst->next;
1685			++more;
1686		}
1687	}
1688
1689	spin_unlock_bh(&icmp6_dst_lock);
1690
1691	return more;
1692}
1693
1694static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1695			    void *arg)
1696{
1697	struct dst_entry *dst, **pprev;
1698
1699	spin_lock_bh(&icmp6_dst_lock);
1700	pprev = &icmp6_dst_gc_list;
1701	while ((dst = *pprev) != NULL) {
1702		struct rt6_info *rt = (struct rt6_info *) dst;
1703		if (func(rt, arg)) {
1704			*pprev = dst->next;
1705			dst_free(dst);
1706		} else {
1707			pprev = &dst->next;
1708		}
1709	}
1710	spin_unlock_bh(&icmp6_dst_lock);
1711}
1712
1713static int ip6_dst_gc(struct dst_ops *ops)
1714{
1715	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1716	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1717	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1718	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1719	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1720	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1721	int entries;
1722
1723	entries = dst_entries_get_fast(ops);
1724	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1725	    entries <= rt_max_size)
1726		goto out;
1727
1728	net->ipv6.ip6_rt_gc_expire++;
1729	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1730	entries = dst_entries_get_slow(ops);
1731	if (entries < ops->gc_thresh)
1732		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1733out:
1734	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1735	return entries > rt_max_size;
1736}
1737
1738static int ip6_convert_metrics(struct mx6_config *mxc,
1739			       const struct fib6_config *cfg)
1740{
1741	bool ecn_ca = false;
1742	struct nlattr *nla;
1743	int remaining;
1744	u32 *mp;
1745
1746	if (!cfg->fc_mx)
1747		return 0;
1748
1749	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1750	if (unlikely(!mp))
1751		return -ENOMEM;
1752
1753	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1754		int type = nla_type(nla);
1755		u32 val;
1756
1757		if (!type)
1758			continue;
1759		if (unlikely(type > RTAX_MAX))
1760			goto err;
1761
1762		if (type == RTAX_CC_ALGO) {
1763			char tmp[TCP_CA_NAME_MAX];
1764
1765			nla_strlcpy(tmp, nla, sizeof(tmp));
1766			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1767			if (val == TCP_CA_UNSPEC)
1768				goto err;
1769		} else {
1770			val = nla_get_u32(nla);
1771		}
1772		if (type == RTAX_HOPLIMIT && val > 255)
1773			val = 255;
1774		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1775			goto err;
1776
1777		mp[type - 1] = val;
1778		__set_bit(type - 1, mxc->mx_valid);
1779	}
1780
1781	if (ecn_ca) {
1782		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1783		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1784	}
1785
1786	mxc->mx = mp;
1787	return 0;
1788 err:
1789	kfree(mp);
1790	return -EINVAL;
1791}
1792
1793static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1794					    struct fib6_config *cfg,
1795					    const struct in6_addr *gw_addr)
1796{
1797	struct flowi6 fl6 = {
1798		.flowi6_oif = cfg->fc_ifindex,
1799		.daddr = *gw_addr,
1800		.saddr = cfg->fc_prefsrc,
1801	};
1802	struct fib6_table *table;
1803	struct rt6_info *rt;
1804	int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1805
1806	table = fib6_get_table(net, cfg->fc_table);
1807	if (!table)
1808		return NULL;
1809
1810	if (!ipv6_addr_any(&cfg->fc_prefsrc))
1811		flags |= RT6_LOOKUP_F_HAS_SADDR;
1812
1813	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1814
1815	/* if table lookup failed, fall back to full lookup */
1816	if (rt == net->ipv6.ip6_null_entry) {
1817		ip6_rt_put(rt);
1818		rt = NULL;
1819	}
1820
1821	return rt;
1822}
1823
1824static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1825{
1826	struct net *net = cfg->fc_nlinfo.nl_net;
1827	struct rt6_info *rt = NULL;
1828	struct net_device *dev = NULL;
1829	struct inet6_dev *idev = NULL;
1830	struct fib6_table *table;
1831	int addr_type;
1832	int err = -EINVAL;
1833
1834	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1835		goto out;
1836#ifndef CONFIG_IPV6_SUBTREES
1837	if (cfg->fc_src_len)
1838		goto out;
1839#endif
1840	if (cfg->fc_ifindex) {
1841		err = -ENODEV;
1842		dev = dev_get_by_index(net, cfg->fc_ifindex);
1843		if (!dev)
1844			goto out;
1845		idev = in6_dev_get(dev);
1846		if (!idev)
1847			goto out;
1848	}
1849
1850	if (cfg->fc_metric == 0)
1851		cfg->fc_metric = IP6_RT_PRIO_USER;
1852
1853	err = -ENOBUFS;
1854	if (cfg->fc_nlinfo.nlh &&
1855	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1856		table = fib6_get_table(net, cfg->fc_table);
1857		if (!table) {
1858			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1859			table = fib6_new_table(net, cfg->fc_table);
1860		}
1861	} else {
1862		table = fib6_new_table(net, cfg->fc_table);
1863	}
1864
1865	if (!table)
1866		goto out;
1867
1868	rt = ip6_dst_alloc(net, NULL,
1869			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1870
1871	if (!rt) {
1872		err = -ENOMEM;
1873		goto out;
1874	}
1875
1876	if (cfg->fc_flags & RTF_EXPIRES)
1877		rt6_set_expires(rt, jiffies +
1878				clock_t_to_jiffies(cfg->fc_expires));
1879	else
1880		rt6_clean_expires(rt);
1881
1882	if (cfg->fc_protocol == RTPROT_UNSPEC)
1883		cfg->fc_protocol = RTPROT_BOOT;
1884	rt->rt6i_protocol = cfg->fc_protocol;
1885
1886	addr_type = ipv6_addr_type(&cfg->fc_dst);
1887
1888	if (addr_type & IPV6_ADDR_MULTICAST)
1889		rt->dst.input = ip6_mc_input;
1890	else if (cfg->fc_flags & RTF_LOCAL)
1891		rt->dst.input = ip6_input;
1892	else
1893		rt->dst.input = ip6_forward;
1894
1895	rt->dst.output = ip6_output;
1896
1897	if (cfg->fc_encap) {
1898		struct lwtunnel_state *lwtstate;
1899
1900		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1901					   cfg->fc_encap, AF_INET6, cfg,
1902					   &lwtstate);
1903		if (err)
1904			goto out;
1905		rt->dst.lwtstate = lwtstate_get(lwtstate);
1906		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1907			rt->dst.lwtstate->orig_output = rt->dst.output;
1908			rt->dst.output = lwtunnel_output;
1909		}
1910		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1911			rt->dst.lwtstate->orig_input = rt->dst.input;
1912			rt->dst.input = lwtunnel_input;
1913		}
1914	}
1915
1916	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1917	rt->rt6i_dst.plen = cfg->fc_dst_len;
1918	if (rt->rt6i_dst.plen == 128)
1919		rt->dst.flags |= DST_HOST;
1920
1921#ifdef CONFIG_IPV6_SUBTREES
1922	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1923	rt->rt6i_src.plen = cfg->fc_src_len;
1924#endif
1925
1926	rt->rt6i_metric = cfg->fc_metric;
1927
1928	/* We cannot add true routes via loopback here,
1929	   they would result in kernel looping; promote them to reject routes
1930	 */
1931	if ((cfg->fc_flags & RTF_REJECT) ||
1932	    (dev && (dev->flags & IFF_LOOPBACK) &&
1933	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1934	     !(cfg->fc_flags & RTF_LOCAL))) {
1935		/* hold loopback dev/idev if we haven't done so. */
1936		if (dev != net->loopback_dev) {
1937			if (dev) {
1938				dev_put(dev);
1939				in6_dev_put(idev);
1940			}
1941			dev = net->loopback_dev;
1942			dev_hold(dev);
1943			idev = in6_dev_get(dev);
1944			if (!idev) {
1945				err = -ENODEV;
1946				goto out;
1947			}
1948		}
1949		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1950		switch (cfg->fc_type) {
1951		case RTN_BLACKHOLE:
1952			rt->dst.error = -EINVAL;
1953			rt->dst.output = dst_discard_out;
1954			rt->dst.input = dst_discard;
1955			break;
1956		case RTN_PROHIBIT:
1957			rt->dst.error = -EACCES;
1958			rt->dst.output = ip6_pkt_prohibit_out;
1959			rt->dst.input = ip6_pkt_prohibit;
1960			break;
1961		case RTN_THROW:
1962		case RTN_UNREACHABLE:
1963		default:
1964			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1965					: (cfg->fc_type == RTN_UNREACHABLE)
1966					? -EHOSTUNREACH : -ENETUNREACH;
1967			rt->dst.output = ip6_pkt_discard_out;
1968			rt->dst.input = ip6_pkt_discard;
1969			break;
1970		}
1971		goto install_route;
1972	}
1973
1974	if (cfg->fc_flags & RTF_GATEWAY) {
1975		const struct in6_addr *gw_addr;
1976		int gwa_type;
1977
1978		gw_addr = &cfg->fc_gateway;
1979		gwa_type = ipv6_addr_type(gw_addr);
1980
1981		/* if gw_addr is local we will fail to detect this in case
1982		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1983		 * will return already-added prefix route via interface that
1984		 * prefix route was assigned to, which might be non-loopback.
1985		 */
1986		err = -EINVAL;
1987		if (ipv6_chk_addr_and_flags(net, gw_addr,
1988					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1989					    dev : NULL, 0, 0))
1990			goto out;
1991
1992		rt->rt6i_gateway = *gw_addr;
1993
1994		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1995			struct rt6_info *grt = NULL;
1996
1997			/* IPv6 strictly inhibits using not link-local
1998			   addresses as nexthop address.
1999			   Otherwise, router will not able to send redirects.
2000			   It is very good, but in some (rare!) circumstances
2001			   (SIT, PtP, NBMA NOARP links) it is handy to allow
2002			   some exceptions. --ANK
2003			   We allow IPv4-mapped nexthops to support RFC4798-type
2004			   addressing
2005			 */
2006			if (!(gwa_type & (IPV6_ADDR_UNICAST |
2007					  IPV6_ADDR_MAPPED)))
2008				goto out;
2009
2010			if (cfg->fc_table) {
2011				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2012
2013				if (grt) {
2014					if (grt->rt6i_flags & RTF_GATEWAY ||
2015					    (dev && dev != grt->dst.dev)) {
2016						ip6_rt_put(grt);
2017						grt = NULL;
2018					}
2019				}
2020			}
2021
2022			if (!grt)
2023				grt = rt6_lookup(net, gw_addr, NULL,
2024						 cfg->fc_ifindex, 1);
2025
2026			err = -EHOSTUNREACH;
2027			if (!grt)
2028				goto out;
2029			if (dev) {
2030				if (dev != grt->dst.dev) {
2031					ip6_rt_put(grt);
2032					goto out;
2033				}
2034			} else {
2035				dev = grt->dst.dev;
2036				idev = grt->rt6i_idev;
2037				dev_hold(dev);
2038				in6_dev_hold(grt->rt6i_idev);
2039			}
2040			if (!(grt->rt6i_flags & RTF_GATEWAY))
2041				err = 0;
2042			ip6_rt_put(grt);
2043
2044			if (err)
2045				goto out;
2046		}
2047		err = -EINVAL;
2048		if (!dev || (dev->flags & IFF_LOOPBACK))
2049			goto out;
2050	}
2051
2052	err = -ENODEV;
2053	if (!dev)
2054		goto out;
2055
2056	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2057		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2058			err = -EINVAL;
2059			goto out;
2060		}
2061		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2062		rt->rt6i_prefsrc.plen = 128;
2063	} else
2064		rt->rt6i_prefsrc.plen = 0;
2065
2066	rt->rt6i_flags = cfg->fc_flags;
2067
2068install_route:
2069	rt->dst.dev = dev;
2070	rt->rt6i_idev = idev;
2071	rt->rt6i_table = table;
2072
2073	cfg->fc_nlinfo.nl_net = dev_net(dev);
2074
2075	return rt;
2076out:
2077	if (dev)
2078		dev_put(dev);
2079	if (idev)
2080		in6_dev_put(idev);
2081	if (rt)
2082		dst_free(&rt->dst);
2083
2084	return ERR_PTR(err);
2085}
2086
2087int ip6_route_add(struct fib6_config *cfg)
2088{
2089	struct mx6_config mxc = { .mx = NULL, };
2090	struct rt6_info *rt;
2091	int err;
2092
2093	rt = ip6_route_info_create(cfg);
2094	if (IS_ERR(rt)) {
2095		err = PTR_ERR(rt);
2096		rt = NULL;
2097		goto out;
2098	}
2099
2100	err = ip6_convert_metrics(&mxc, cfg);
2101	if (err)
2102		goto out;
2103
2104	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2105
2106	kfree(mxc.mx);
2107
2108	return err;
2109out:
2110	if (rt)
2111		dst_free(&rt->dst);
2112
2113	return err;
2114}
2115
2116static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2117{
2118	int err;
2119	struct fib6_table *table;
2120	struct net *net = dev_net(rt->dst.dev);
2121
2122	if (rt == net->ipv6.ip6_null_entry ||
2123	    rt->dst.flags & DST_NOCACHE) {
2124		err = -ENOENT;
2125		goto out;
2126	}
2127
2128	table = rt->rt6i_table;
2129	write_lock_bh(&table->tb6_lock);
2130	err = fib6_del(rt, info);
2131	write_unlock_bh(&table->tb6_lock);
2132
2133out:
2134	ip6_rt_put(rt);
2135	return err;
2136}
2137
2138int ip6_del_rt(struct rt6_info *rt)
2139{
2140	struct nl_info info = {
2141		.nl_net = dev_net(rt->dst.dev),
2142	};
2143	return __ip6_del_rt(rt, &info);
2144}
2145
2146static int ip6_route_del(struct fib6_config *cfg)
2147{
2148	struct fib6_table *table;
2149	struct fib6_node *fn;
2150	struct rt6_info *rt;
2151	int err = -ESRCH;
2152
2153	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2154	if (!table)
2155		return err;
2156
2157	read_lock_bh(&table->tb6_lock);
2158
2159	fn = fib6_locate(&table->tb6_root,
2160			 &cfg->fc_dst, cfg->fc_dst_len,
2161			 &cfg->fc_src, cfg->fc_src_len);
2162
2163	if (fn) {
2164		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2165			if ((rt->rt6i_flags & RTF_CACHE) &&
2166			    !(cfg->fc_flags & RTF_CACHE))
2167				continue;
2168			if (cfg->fc_ifindex &&
2169			    (!rt->dst.dev ||
2170			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2171				continue;
2172			if (cfg->fc_flags & RTF_GATEWAY &&
2173			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2174				continue;
2175			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2176				continue;
2177			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2178				continue;
2179			dst_hold(&rt->dst);
2180			read_unlock_bh(&table->tb6_lock);
2181
2182			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2183		}
2184	}
2185	read_unlock_bh(&table->tb6_lock);
2186
2187	return err;
2188}
2189
2190static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2191{
2192	struct netevent_redirect netevent;
2193	struct rt6_info *rt, *nrt = NULL;
2194	struct ndisc_options ndopts;
2195	struct inet6_dev *in6_dev;
2196	struct neighbour *neigh;
2197	struct rd_msg *msg;
2198	int optlen, on_link;
2199	u8 *lladdr;
2200
2201	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2202	optlen -= sizeof(*msg);
2203
2204	if (optlen < 0) {
2205		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2206		return;
2207	}
2208
2209	msg = (struct rd_msg *)icmp6_hdr(skb);
2210
2211	if (ipv6_addr_is_multicast(&msg->dest)) {
2212		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2213		return;
2214	}
2215
2216	on_link = 0;
2217	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2218		on_link = 1;
2219	} else if (ipv6_addr_type(&msg->target) !=
2220		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2221		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2222		return;
2223	}
2224
2225	in6_dev = __in6_dev_get(skb->dev);
2226	if (!in6_dev)
2227		return;
2228	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2229		return;
2230
2231	/* RFC2461 8.1:
2232	 *	The IP source address of the Redirect MUST be the same as the current
2233	 *	first-hop router for the specified ICMP Destination Address.
2234	 */
2235
2236	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2237		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2238		return;
2239	}
2240
2241	lladdr = NULL;
2242	if (ndopts.nd_opts_tgt_lladdr) {
2243		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2244					     skb->dev);
2245		if (!lladdr) {
2246			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2247			return;
2248		}
2249	}
2250
2251	rt = (struct rt6_info *) dst;
2252	if (rt->rt6i_flags & RTF_REJECT) {
2253		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2254		return;
2255	}
2256
2257	/* Redirect received -> path was valid.
2258	 * Look, redirects are sent only in response to data packets,
2259	 * so that this nexthop apparently is reachable. --ANK
2260	 */
2261	dst_confirm(&rt->dst);
2262
2263	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2264	if (!neigh)
2265		return;
2266
2267	/*
2268	 *	We have finally decided to accept it.
2269	 */
2270
2271	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2272		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2273		     NEIGH_UPDATE_F_OVERRIDE|
2274		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2275				     NEIGH_UPDATE_F_ISROUTER)),
2276		     NDISC_REDIRECT, &ndopts);
2277
2278	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2279	if (!nrt)
2280		goto out;
2281
2282	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2283	if (on_link)
2284		nrt->rt6i_flags &= ~RTF_GATEWAY;
2285
2286	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2287
2288	if (ip6_ins_rt(nrt))
2289		goto out;
2290
2291	netevent.old = &rt->dst;
2292	netevent.new = &nrt->dst;
2293	netevent.daddr = &msg->dest;
2294	netevent.neigh = neigh;
2295	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2296
2297	if (rt->rt6i_flags & RTF_CACHE) {
2298		rt = (struct rt6_info *) dst_clone(&rt->dst);
2299		ip6_del_rt(rt);
2300	}
2301
2302out:
2303	neigh_release(neigh);
2304}
2305
2306/*
2307 *	Misc support functions
2308 */
2309
2310static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2311{
2312	BUG_ON(from->dst.from);
2313
2314	rt->rt6i_flags &= ~RTF_EXPIRES;
2315	dst_hold(&from->dst);
2316	rt->dst.from = &from->dst;
2317	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2318}
2319
2320static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2321{
2322	rt->dst.input = ort->dst.input;
2323	rt->dst.output = ort->dst.output;
2324	rt->rt6i_dst = ort->rt6i_dst;
2325	rt->dst.error = ort->dst.error;
2326	rt->rt6i_idev = ort->rt6i_idev;
2327	if (rt->rt6i_idev)
2328		in6_dev_hold(rt->rt6i_idev);
2329	rt->dst.lastuse = jiffies;
2330	rt->rt6i_gateway = ort->rt6i_gateway;
2331	rt->rt6i_flags = ort->rt6i_flags;
2332	rt6_set_from(rt, ort);
2333	rt->rt6i_metric = ort->rt6i_metric;
2334#ifdef CONFIG_IPV6_SUBTREES
2335	rt->rt6i_src = ort->rt6i_src;
2336#endif
2337	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2338	rt->rt6i_table = ort->rt6i_table;
2339	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2340}
2341
2342#ifdef CONFIG_IPV6_ROUTE_INFO
2343static struct rt6_info *rt6_get_route_info(struct net *net,
2344					   const struct in6_addr *prefix, int prefixlen,
2345					   const struct in6_addr *gwaddr,
2346					   struct net_device *dev)
2347{
2348	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2349	int ifindex = dev->ifindex;
2350	struct fib6_node *fn;
2351	struct rt6_info *rt = NULL;
2352	struct fib6_table *table;
2353
2354	table = fib6_get_table(net, tb_id);
2355	if (!table)
2356		return NULL;
2357
2358	read_lock_bh(&table->tb6_lock);
2359	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2360	if (!fn)
2361		goto out;
2362
2363	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2364		if (rt->dst.dev->ifindex != ifindex)
2365			continue;
2366		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2367			continue;
2368		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2369			continue;
2370		dst_hold(&rt->dst);
2371		break;
2372	}
2373out:
2374	read_unlock_bh(&table->tb6_lock);
2375	return rt;
2376}
2377
2378static struct rt6_info *rt6_add_route_info(struct net *net,
2379					   const struct in6_addr *prefix, int prefixlen,
2380					   const struct in6_addr *gwaddr,
2381					   struct net_device *dev,
2382					   unsigned int pref)
2383{
2384	struct fib6_config cfg = {
2385		.fc_metric	= IP6_RT_PRIO_USER,
2386		.fc_ifindex	= dev->ifindex,
2387		.fc_dst_len	= prefixlen,
2388		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2389				  RTF_UP | RTF_PREF(pref),
2390		.fc_nlinfo.portid = 0,
2391		.fc_nlinfo.nlh = NULL,
2392		.fc_nlinfo.nl_net = net,
2393	};
2394
2395	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
2396	cfg.fc_dst = *prefix;
2397	cfg.fc_gateway = *gwaddr;
2398
2399	/* We should treat it as a default route if prefix length is 0. */
2400	if (!prefixlen)
2401		cfg.fc_flags |= RTF_DEFAULT;
2402
2403	ip6_route_add(&cfg);
2404
2405	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2406}
2407#endif
2408
2409struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2410{
2411	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
2412	struct rt6_info *rt;
2413	struct fib6_table *table;
2414
2415	table = fib6_get_table(dev_net(dev), tb_id);
2416	if (!table)
2417		return NULL;
2418
2419	read_lock_bh(&table->tb6_lock);
2420	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2421		if (dev == rt->dst.dev &&
2422		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2423		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2424			break;
2425	}
2426	if (rt)
2427		dst_hold(&rt->dst);
2428	read_unlock_bh(&table->tb6_lock);
2429	return rt;
2430}
2431
2432struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2433				     struct net_device *dev,
2434				     unsigned int pref)
2435{
2436	struct fib6_config cfg = {
2437		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2438		.fc_metric	= IP6_RT_PRIO_USER,
2439		.fc_ifindex	= dev->ifindex,
2440		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2441				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2442		.fc_nlinfo.portid = 0,
2443		.fc_nlinfo.nlh = NULL,
2444		.fc_nlinfo.nl_net = dev_net(dev),
2445	};
2446
2447	cfg.fc_gateway = *gwaddr;
2448
2449	if (!ip6_route_add(&cfg)) {
2450		struct fib6_table *table;
2451
2452		table = fib6_get_table(dev_net(dev), cfg.fc_table);
2453		if (table)
2454			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2455	}
2456
2457	return rt6_get_dflt_router(gwaddr, dev);
2458}
2459
2460static void __rt6_purge_dflt_routers(struct fib6_table *table)
2461{
2462	struct rt6_info *rt;
 
 
 
 
 
 
2463
2464restart:
2465	read_lock_bh(&table->tb6_lock);
2466	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2467		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2468		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2469			dst_hold(&rt->dst);
2470			read_unlock_bh(&table->tb6_lock);
2471			ip6_del_rt(rt);
2472			goto restart;
2473		}
2474	}
2475	read_unlock_bh(&table->tb6_lock);
2476
2477	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2478}
2479
2480void rt6_purge_dflt_routers(struct net *net)
2481{
2482	struct fib6_table *table;
2483	struct hlist_head *head;
2484	unsigned int h;
2485
2486	rcu_read_lock();
2487
2488	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2489		head = &net->ipv6.fib_table_hash[h];
2490		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2491			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2492				__rt6_purge_dflt_routers(table);
2493		}
2494	}
2495
2496	rcu_read_unlock();
2497}
2498
2499static void rtmsg_to_fib6_config(struct net *net,
2500				 struct in6_rtmsg *rtmsg,
2501				 struct fib6_config *cfg)
2502{
2503	memset(cfg, 0, sizeof(*cfg));
2504
2505	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2506			 : RT6_TABLE_MAIN;
2507	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2508	cfg->fc_metric = rtmsg->rtmsg_metric;
2509	cfg->fc_expires = rtmsg->rtmsg_info;
2510	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2511	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2512	cfg->fc_flags = rtmsg->rtmsg_flags;
2513
2514	cfg->fc_nlinfo.nl_net = net;
2515
2516	cfg->fc_dst = rtmsg->rtmsg_dst;
2517	cfg->fc_src = rtmsg->rtmsg_src;
2518	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2519}
2520
2521int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2522{
2523	struct fib6_config cfg;
2524	struct in6_rtmsg rtmsg;
2525	int err;
2526
2527	switch (cmd) {
2528	case SIOCADDRT:		/* Add a route */
2529	case SIOCDELRT:		/* Delete a route */
2530		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2531			return -EPERM;
2532		err = copy_from_user(&rtmsg, arg,
2533				     sizeof(struct in6_rtmsg));
2534		if (err)
2535			return -EFAULT;
2536
2537		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2538
2539		rtnl_lock();
2540		switch (cmd) {
2541		case SIOCADDRT:
2542			err = ip6_route_add(&cfg);
2543			break;
2544		case SIOCDELRT:
2545			err = ip6_route_del(&cfg);
2546			break;
2547		default:
2548			err = -EINVAL;
2549		}
2550		rtnl_unlock();
2551
2552		return err;
2553	}
2554
2555	return -EINVAL;
2556}
2557
2558/*
2559 *	Drop the packet on the floor
2560 */
2561
2562static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2563{
2564	int type;
2565	struct dst_entry *dst = skb_dst(skb);
2566	switch (ipstats_mib_noroutes) {
2567	case IPSTATS_MIB_INNOROUTES:
2568		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2569		if (type == IPV6_ADDR_ANY) {
2570			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2571				      IPSTATS_MIB_INADDRERRORS);
2572			break;
2573		}
2574		/* FALLTHROUGH */
2575	case IPSTATS_MIB_OUTNOROUTES:
2576		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2577			      ipstats_mib_noroutes);
2578		break;
2579	}
2580	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2581	kfree_skb(skb);
2582	return 0;
2583}
2584
2585static int ip6_pkt_discard(struct sk_buff *skb)
2586{
2587	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2588}
2589
2590static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2591{
2592	skb->dev = skb_dst(skb)->dev;
2593	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2594}
2595
2596static int ip6_pkt_prohibit(struct sk_buff *skb)
2597{
2598	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2599}
2600
2601static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2602{
2603	skb->dev = skb_dst(skb)->dev;
2604	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2605}
2606
2607/*
2608 *	Allocate a dst for local (unicast / anycast) address.
2609 */
2610
2611struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2612				    const struct in6_addr *addr,
2613				    bool anycast)
2614{
2615	u32 tb_id;
2616	struct net *net = dev_net(idev->dev);
2617	struct net_device *dev = net->loopback_dev;
2618	struct rt6_info *rt;
2619
2620	/* use L3 Master device as loopback for host routes if device
2621	 * is enslaved and address is not link local or multicast
2622	 */
2623	if (!rt6_need_strict(addr))
2624		dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2625
2626	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2627	if (!rt)
2628		return ERR_PTR(-ENOMEM);
2629
2630	in6_dev_hold(idev);
2631
2632	rt->dst.flags |= DST_HOST;
2633	rt->dst.input = ip6_input;
2634	rt->dst.output = ip6_output;
2635	rt->rt6i_idev = idev;
2636
2637	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2638	if (anycast)
2639		rt->rt6i_flags |= RTF_ANYCAST;
2640	else
2641		rt->rt6i_flags |= RTF_LOCAL;
2642
2643	rt->rt6i_gateway  = *addr;
2644	rt->rt6i_dst.addr = *addr;
2645	rt->rt6i_dst.plen = 128;
2646	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2647	rt->rt6i_table = fib6_get_table(net, tb_id);
2648	rt->dst.flags |= DST_NOCACHE;
2649
2650	atomic_set(&rt->dst.__refcnt, 1);
2651
2652	return rt;
2653}
2654
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2655/* remove deleted ip from prefsrc entries */
2656struct arg_dev_net_ip {
2657	struct net_device *dev;
2658	struct net *net;
2659	struct in6_addr *addr;
2660};
2661
2662static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2663{
2664	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2665	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2666	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2667
2668	if (((void *)rt->dst.dev == dev || !dev) &&
2669	    rt != net->ipv6.ip6_null_entry &&
2670	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2671		/* remove prefsrc entry */
2672		rt->rt6i_prefsrc.plen = 0;
2673	}
2674	return 0;
2675}
2676
2677void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2678{
2679	struct net *net = dev_net(ifp->idev->dev);
2680	struct arg_dev_net_ip adni = {
2681		.dev = ifp->idev->dev,
2682		.net = net,
2683		.addr = &ifp->addr,
2684	};
2685	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2686}
2687
2688#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2689#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2690
2691/* Remove routers and update dst entries when gateway turn into host. */
2692static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2693{
2694	struct in6_addr *gateway = (struct in6_addr *)arg;
2695
2696	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2697	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2698	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2699		return -1;
2700	}
2701	return 0;
2702}
2703
2704void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2705{
2706	fib6_clean_all(net, fib6_clean_tohost, gateway);
2707}
2708
2709struct arg_dev_net {
2710	struct net_device *dev;
2711	struct net *net;
2712};
2713
2714static int fib6_ifdown(struct rt6_info *rt, void *arg)
2715{
2716	const struct arg_dev_net *adn = arg;
2717	const struct net_device *dev = adn->dev;
2718
2719	if ((rt->dst.dev == dev || !dev) &&
2720	    rt != adn->net->ipv6.ip6_null_entry)
2721		return -1;
2722
2723	return 0;
2724}
2725
2726void rt6_ifdown(struct net *net, struct net_device *dev)
2727{
2728	struct arg_dev_net adn = {
2729		.dev = dev,
2730		.net = net,
2731	};
2732
2733	fib6_clean_all(net, fib6_ifdown, &adn);
2734	icmp6_clean_all(fib6_ifdown, &adn);
2735	if (dev)
2736		rt6_uncached_list_flush_dev(net, dev);
2737}
2738
2739struct rt6_mtu_change_arg {
2740	struct net_device *dev;
2741	unsigned int mtu;
2742};
2743
2744static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2745{
2746	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2747	struct inet6_dev *idev;
2748
2749	/* In IPv6 pmtu discovery is not optional,
2750	   so that RTAX_MTU lock cannot disable it.
2751	   We still use this lock to block changes
2752	   caused by addrconf/ndisc.
2753	*/
2754
2755	idev = __in6_dev_get(arg->dev);
2756	if (!idev)
2757		return 0;
2758
2759	/* For administrative MTU increase, there is no way to discover
2760	   IPv6 PMTU increase, so PMTU increase should be updated here.
2761	   Since RFC 1981 doesn't include administrative MTU increase
2762	   update PMTU increase is a MUST. (i.e. jumbo frame)
2763	 */
2764	/*
2765	   If new MTU is less than route PMTU, this new MTU will be the
2766	   lowest MTU in the path, update the route PMTU to reflect PMTU
2767	   decreases; if new MTU is greater than route PMTU, and the
2768	   old MTU is the lowest MTU in the path, update the route PMTU
2769	   to reflect the increase. In this case if the other nodes' MTU
2770	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2771	   PMTU discovery.
2772	 */
2773	if (rt->dst.dev == arg->dev &&
2774	    dst_metric_raw(&rt->dst, RTAX_MTU) &&
2775	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2776		if (rt->rt6i_flags & RTF_CACHE) {
2777			/* For RTF_CACHE with rt6i_pmtu == 0
2778			 * (i.e. a redirected route),
2779			 * the metrics of its rt->dst.from has already
2780			 * been updated.
2781			 */
2782			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2783				rt->rt6i_pmtu = arg->mtu;
2784		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2785			   (dst_mtu(&rt->dst) < arg->mtu &&
2786			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2787			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2788		}
2789	}
2790	return 0;
2791}
2792
2793void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2794{
2795	struct rt6_mtu_change_arg arg = {
2796		.dev = dev,
2797		.mtu = mtu,
2798	};
2799
2800	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2801}
2802
2803static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2804	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2805	[RTA_OIF]               = { .type = NLA_U32 },
2806	[RTA_IIF]		= { .type = NLA_U32 },
2807	[RTA_PRIORITY]          = { .type = NLA_U32 },
2808	[RTA_METRICS]           = { .type = NLA_NESTED },
2809	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2810	[RTA_PREF]              = { .type = NLA_U8 },
2811	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2812	[RTA_ENCAP]		= { .type = NLA_NESTED },
2813	[RTA_EXPIRES]		= { .type = NLA_U32 },
2814	[RTA_UID]		= { .type = NLA_U32 },
2815};
2816
2817static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2818			      struct fib6_config *cfg)
2819{
2820	struct rtmsg *rtm;
2821	struct nlattr *tb[RTA_MAX+1];
2822	unsigned int pref;
2823	int err;
2824
2825	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2826	if (err < 0)
2827		goto errout;
2828
2829	err = -EINVAL;
2830	rtm = nlmsg_data(nlh);
2831	memset(cfg, 0, sizeof(*cfg));
2832
2833	cfg->fc_table = rtm->rtm_table;
2834	cfg->fc_dst_len = rtm->rtm_dst_len;
2835	cfg->fc_src_len = rtm->rtm_src_len;
2836	cfg->fc_flags = RTF_UP;
2837	cfg->fc_protocol = rtm->rtm_protocol;
2838	cfg->fc_type = rtm->rtm_type;
2839
2840	if (rtm->rtm_type == RTN_UNREACHABLE ||
2841	    rtm->rtm_type == RTN_BLACKHOLE ||
2842	    rtm->rtm_type == RTN_PROHIBIT ||
2843	    rtm->rtm_type == RTN_THROW)
2844		cfg->fc_flags |= RTF_REJECT;
2845
2846	if (rtm->rtm_type == RTN_LOCAL)
2847		cfg->fc_flags |= RTF_LOCAL;
2848
2849	if (rtm->rtm_flags & RTM_F_CLONED)
2850		cfg->fc_flags |= RTF_CACHE;
2851
2852	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2853	cfg->fc_nlinfo.nlh = nlh;
2854	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2855
2856	if (tb[RTA_GATEWAY]) {
2857		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2858		cfg->fc_flags |= RTF_GATEWAY;
2859	}
2860
2861	if (tb[RTA_DST]) {
2862		int plen = (rtm->rtm_dst_len + 7) >> 3;
2863
2864		if (nla_len(tb[RTA_DST]) < plen)
2865			goto errout;
2866
2867		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2868	}
2869
2870	if (tb[RTA_SRC]) {
2871		int plen = (rtm->rtm_src_len + 7) >> 3;
2872
2873		if (nla_len(tb[RTA_SRC]) < plen)
2874			goto errout;
2875
2876		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2877	}
2878
2879	if (tb[RTA_PREFSRC])
2880		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2881
2882	if (tb[RTA_OIF])
2883		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2884
2885	if (tb[RTA_PRIORITY])
2886		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2887
2888	if (tb[RTA_METRICS]) {
2889		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2890		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2891	}
2892
2893	if (tb[RTA_TABLE])
2894		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2895
2896	if (tb[RTA_MULTIPATH]) {
2897		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2898		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2899
2900		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
2901						     cfg->fc_mp_len);
2902		if (err < 0)
2903			goto errout;
2904	}
2905
2906	if (tb[RTA_PREF]) {
2907		pref = nla_get_u8(tb[RTA_PREF]);
2908		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2909		    pref != ICMPV6_ROUTER_PREF_HIGH)
2910			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2911		cfg->fc_flags |= RTF_PREF(pref);
2912	}
2913
2914	if (tb[RTA_ENCAP])
2915		cfg->fc_encap = tb[RTA_ENCAP];
2916
2917	if (tb[RTA_ENCAP_TYPE]) {
2918		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2919
2920		err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
2921		if (err < 0)
2922			goto errout;
2923	}
2924
2925	if (tb[RTA_EXPIRES]) {
2926		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2927
2928		if (addrconf_finite_timeout(timeout)) {
2929			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2930			cfg->fc_flags |= RTF_EXPIRES;
2931		}
2932	}
2933
2934	err = 0;
2935errout:
2936	return err;
2937}
2938
2939struct rt6_nh {
2940	struct rt6_info *rt6_info;
2941	struct fib6_config r_cfg;
2942	struct mx6_config mxc;
2943	struct list_head next;
2944};
2945
2946static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2947{
2948	struct rt6_nh *nh;
2949
2950	list_for_each_entry(nh, rt6_nh_list, next) {
2951		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2952		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2953		        nh->r_cfg.fc_ifindex);
2954	}
2955}
2956
2957static int ip6_route_info_append(struct list_head *rt6_nh_list,
2958				 struct rt6_info *rt, struct fib6_config *r_cfg)
2959{
2960	struct rt6_nh *nh;
2961	struct rt6_info *rtnh;
2962	int err = -EEXIST;
2963
2964	list_for_each_entry(nh, rt6_nh_list, next) {
2965		/* check if rt6_info already exists */
2966		rtnh = nh->rt6_info;
2967
2968		if (rtnh->dst.dev == rt->dst.dev &&
2969		    rtnh->rt6i_idev == rt->rt6i_idev &&
2970		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2971				    &rt->rt6i_gateway))
2972			return err;
2973	}
2974
2975	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2976	if (!nh)
2977		return -ENOMEM;
2978	nh->rt6_info = rt;
2979	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2980	if (err) {
2981		kfree(nh);
2982		return err;
2983	}
2984	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2985	list_add_tail(&nh->next, rt6_nh_list);
2986
2987	return 0;
2988}
2989
2990static int ip6_route_multipath_add(struct fib6_config *cfg)
2991{
2992	struct fib6_config r_cfg;
2993	struct rtnexthop *rtnh;
2994	struct rt6_info *rt;
2995	struct rt6_nh *err_nh;
2996	struct rt6_nh *nh, *nh_safe;
2997	int remaining;
2998	int attrlen;
2999	int err = 1;
3000	int nhn = 0;
3001	int replace = (cfg->fc_nlinfo.nlh &&
3002		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3003	LIST_HEAD(rt6_nh_list);
3004
3005	remaining = cfg->fc_mp_len;
3006	rtnh = (struct rtnexthop *)cfg->fc_mp;
3007
3008	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
3009	 * rt6_info structs per nexthop
3010	 */
3011	while (rtnh_ok(rtnh, remaining)) {
3012		memcpy(&r_cfg, cfg, sizeof(*cfg));
3013		if (rtnh->rtnh_ifindex)
3014			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3015
3016		attrlen = rtnh_attrlen(rtnh);
3017		if (attrlen > 0) {
3018			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3019
3020			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3021			if (nla) {
3022				r_cfg.fc_gateway = nla_get_in6_addr(nla);
3023				r_cfg.fc_flags |= RTF_GATEWAY;
3024			}
3025			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3026			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3027			if (nla)
3028				r_cfg.fc_encap_type = nla_get_u16(nla);
3029		}
3030
3031		rt = ip6_route_info_create(&r_cfg);
3032		if (IS_ERR(rt)) {
3033			err = PTR_ERR(rt);
3034			rt = NULL;
3035			goto cleanup;
3036		}
3037
3038		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3039		if (err) {
3040			dst_free(&rt->dst);
3041			goto cleanup;
3042		}
3043
3044		rtnh = rtnh_next(rtnh, &remaining);
3045	}
3046
3047	err_nh = NULL;
3048	list_for_each_entry(nh, &rt6_nh_list, next) {
3049		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
3050		/* nh->rt6_info is used or freed at this point, reset to NULL*/
3051		nh->rt6_info = NULL;
3052		if (err) {
3053			if (replace && nhn)
3054				ip6_print_replace_route_err(&rt6_nh_list);
3055			err_nh = nh;
3056			goto add_errout;
3057		}
3058
3059		/* Because each route is added like a single route we remove
3060		 * these flags after the first nexthop: if there is a collision,
3061		 * we have already failed to add the first nexthop:
3062		 * fib6_add_rt2node() has rejected it; when replacing, old
3063		 * nexthops have been replaced by first new, the rest should
3064		 * be added to it.
3065		 */
3066		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3067						     NLM_F_REPLACE);
3068		nhn++;
3069	}
3070
3071	goto cleanup;
3072
3073add_errout:
3074	/* Delete routes that were already added */
3075	list_for_each_entry(nh, &rt6_nh_list, next) {
3076		if (err_nh == nh)
3077			break;
3078		ip6_route_del(&nh->r_cfg);
3079	}
3080
3081cleanup:
3082	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3083		if (nh->rt6_info)
3084			dst_free(&nh->rt6_info->dst);
3085		kfree(nh->mxc.mx);
3086		list_del(&nh->next);
3087		kfree(nh);
3088	}
3089
3090	return err;
3091}
3092
3093static int ip6_route_multipath_del(struct fib6_config *cfg)
3094{
3095	struct fib6_config r_cfg;
3096	struct rtnexthop *rtnh;
3097	int remaining;
3098	int attrlen;
3099	int err = 1, last_err = 0;
3100
3101	remaining = cfg->fc_mp_len;
3102	rtnh = (struct rtnexthop *)cfg->fc_mp;
3103
3104	/* Parse a Multipath Entry */
3105	while (rtnh_ok(rtnh, remaining)) {
3106		memcpy(&r_cfg, cfg, sizeof(*cfg));
3107		if (rtnh->rtnh_ifindex)
3108			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3109
3110		attrlen = rtnh_attrlen(rtnh);
3111		if (attrlen > 0) {
3112			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3113
3114			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3115			if (nla) {
3116				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3117				r_cfg.fc_flags |= RTF_GATEWAY;
3118			}
3119		}
3120		err = ip6_route_del(&r_cfg);
3121		if (err)
3122			last_err = err;
3123
3124		rtnh = rtnh_next(rtnh, &remaining);
3125	}
3126
3127	return last_err;
3128}
3129
3130static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3131{
3132	struct fib6_config cfg;
3133	int err;
3134
3135	err = rtm_to_fib6_config(skb, nlh, &cfg);
3136	if (err < 0)
3137		return err;
3138
3139	if (cfg.fc_mp)
3140		return ip6_route_multipath_del(&cfg);
3141	else
3142		return ip6_route_del(&cfg);
3143}
3144
3145static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3146{
3147	struct fib6_config cfg;
3148	int err;
3149
3150	err = rtm_to_fib6_config(skb, nlh, &cfg);
3151	if (err < 0)
3152		return err;
3153
3154	if (cfg.fc_mp)
3155		return ip6_route_multipath_add(&cfg);
3156	else
3157		return ip6_route_add(&cfg);
3158}
3159
3160static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3161{
3162	return NLMSG_ALIGN(sizeof(struct rtmsg))
3163	       + nla_total_size(16) /* RTA_SRC */
3164	       + nla_total_size(16) /* RTA_DST */
3165	       + nla_total_size(16) /* RTA_GATEWAY */
3166	       + nla_total_size(16) /* RTA_PREFSRC */
3167	       + nla_total_size(4) /* RTA_TABLE */
3168	       + nla_total_size(4) /* RTA_IIF */
3169	       + nla_total_size(4) /* RTA_OIF */
3170	       + nla_total_size(4) /* RTA_PRIORITY */
3171	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3172	       + nla_total_size(sizeof(struct rta_cacheinfo))
3173	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3174	       + nla_total_size(1) /* RTA_PREF */
3175	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3176}
3177
3178static int rt6_fill_node(struct net *net,
3179			 struct sk_buff *skb, struct rt6_info *rt,
3180			 struct in6_addr *dst, struct in6_addr *src,
3181			 int iif, int type, u32 portid, u32 seq,
3182			 int prefix, int nowait, unsigned int flags)
3183{
3184	u32 metrics[RTAX_MAX];
3185	struct rtmsg *rtm;
3186	struct nlmsghdr *nlh;
3187	long expires;
3188	u32 table;
3189
3190	if (prefix) {	/* user wants prefix routes only */
3191		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3192			/* success since this is not a prefix route */
3193			return 1;
3194		}
3195	}
3196
3197	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3198	if (!nlh)
3199		return -EMSGSIZE;
3200
3201	rtm = nlmsg_data(nlh);
3202	rtm->rtm_family = AF_INET6;
3203	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3204	rtm->rtm_src_len = rt->rt6i_src.plen;
3205	rtm->rtm_tos = 0;
3206	if (rt->rt6i_table)
3207		table = rt->rt6i_table->tb6_id;
3208	else
3209		table = RT6_TABLE_UNSPEC;
3210	rtm->rtm_table = table;
3211	if (nla_put_u32(skb, RTA_TABLE, table))
3212		goto nla_put_failure;
3213	if (rt->rt6i_flags & RTF_REJECT) {
3214		switch (rt->dst.error) {
3215		case -EINVAL:
3216			rtm->rtm_type = RTN_BLACKHOLE;
3217			break;
3218		case -EACCES:
3219			rtm->rtm_type = RTN_PROHIBIT;
3220			break;
3221		case -EAGAIN:
3222			rtm->rtm_type = RTN_THROW;
3223			break;
3224		default:
3225			rtm->rtm_type = RTN_UNREACHABLE;
3226			break;
3227		}
3228	}
3229	else if (rt->rt6i_flags & RTF_LOCAL)
3230		rtm->rtm_type = RTN_LOCAL;
3231	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3232		rtm->rtm_type = RTN_LOCAL;
3233	else
3234		rtm->rtm_type = RTN_UNICAST;
3235	rtm->rtm_flags = 0;
3236	if (!netif_carrier_ok(rt->dst.dev)) {
3237		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3238		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3239			rtm->rtm_flags |= RTNH_F_DEAD;
3240	}
3241	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3242	rtm->rtm_protocol = rt->rt6i_protocol;
3243	if (rt->rt6i_flags & RTF_DYNAMIC)
3244		rtm->rtm_protocol = RTPROT_REDIRECT;
3245	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3246		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3247			rtm->rtm_protocol = RTPROT_RA;
3248		else
3249			rtm->rtm_protocol = RTPROT_KERNEL;
3250	}
3251
3252	if (rt->rt6i_flags & RTF_CACHE)
3253		rtm->rtm_flags |= RTM_F_CLONED;
3254
3255	if (dst) {
3256		if (nla_put_in6_addr(skb, RTA_DST, dst))
3257			goto nla_put_failure;
3258		rtm->rtm_dst_len = 128;
3259	} else if (rtm->rtm_dst_len)
3260		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3261			goto nla_put_failure;
3262#ifdef CONFIG_IPV6_SUBTREES
3263	if (src) {
3264		if (nla_put_in6_addr(skb, RTA_SRC, src))
3265			goto nla_put_failure;
3266		rtm->rtm_src_len = 128;
3267	} else if (rtm->rtm_src_len &&
3268		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3269		goto nla_put_failure;
3270#endif
3271	if (iif) {
3272#ifdef CONFIG_IPV6_MROUTE
3273		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3274			int err = ip6mr_get_route(net, skb, rtm, nowait,
3275						  portid);
3276
3277			if (err <= 0) {
3278				if (!nowait) {
3279					if (err == 0)
3280						return 0;
3281					goto nla_put_failure;
3282				} else {
3283					if (err == -EMSGSIZE)
3284						goto nla_put_failure;
3285				}
3286			}
3287		} else
3288#endif
3289			if (nla_put_u32(skb, RTA_IIF, iif))
3290				goto nla_put_failure;
3291	} else if (dst) {
3292		struct in6_addr saddr_buf;
3293		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3294		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3295			goto nla_put_failure;
3296	}
3297
3298	if (rt->rt6i_prefsrc.plen) {
3299		struct in6_addr saddr_buf;
3300		saddr_buf = rt->rt6i_prefsrc.addr;
3301		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3302			goto nla_put_failure;
3303	}
3304
3305	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3306	if (rt->rt6i_pmtu)
3307		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3308	if (rtnetlink_put_metrics(skb, metrics) < 0)
3309		goto nla_put_failure;
3310
3311	if (rt->rt6i_flags & RTF_GATEWAY) {
3312		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3313			goto nla_put_failure;
3314	}
3315
3316	if (rt->dst.dev &&
3317	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3318		goto nla_put_failure;
3319	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3320		goto nla_put_failure;
3321
3322	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3323
3324	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3325		goto nla_put_failure;
3326
3327	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3328		goto nla_put_failure;
3329
3330	if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3331		goto nla_put_failure;
3332
3333	nlmsg_end(skb, nlh);
3334	return 0;
3335
3336nla_put_failure:
3337	nlmsg_cancel(skb, nlh);
3338	return -EMSGSIZE;
3339}
3340
3341int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3342{
3343	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3344	int prefix;
3345
3346	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3347		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3348		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3349	} else
3350		prefix = 0;
3351
3352	return rt6_fill_node(arg->net,
3353		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3354		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3355		     prefix, 0, NLM_F_MULTI);
3356}
3357
3358static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3359{
3360	struct net *net = sock_net(in_skb->sk);
3361	struct nlattr *tb[RTA_MAX+1];
3362	struct rt6_info *rt;
3363	struct sk_buff *skb;
3364	struct rtmsg *rtm;
3365	struct flowi6 fl6;
3366	int err, iif = 0, oif = 0;
3367
3368	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3369	if (err < 0)
3370		goto errout;
3371
3372	err = -EINVAL;
3373	memset(&fl6, 0, sizeof(fl6));
3374	rtm = nlmsg_data(nlh);
3375	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3376
3377	if (tb[RTA_SRC]) {
3378		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3379			goto errout;
3380
3381		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3382	}
3383
3384	if (tb[RTA_DST]) {
3385		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3386			goto errout;
3387
3388		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3389	}
3390
3391	if (tb[RTA_IIF])
3392		iif = nla_get_u32(tb[RTA_IIF]);
3393
3394	if (tb[RTA_OIF])
3395		oif = nla_get_u32(tb[RTA_OIF]);
3396
3397	if (tb[RTA_MARK])
3398		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3399
3400	if (tb[RTA_UID])
3401		fl6.flowi6_uid = make_kuid(current_user_ns(),
3402					   nla_get_u32(tb[RTA_UID]));
3403	else
3404		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3405
3406	if (iif) {
3407		struct net_device *dev;
3408		int flags = 0;
3409
3410		dev = __dev_get_by_index(net, iif);
3411		if (!dev) {
3412			err = -ENODEV;
3413			goto errout;
3414		}
3415
3416		fl6.flowi6_iif = iif;
3417
3418		if (!ipv6_addr_any(&fl6.saddr))
3419			flags |= RT6_LOOKUP_F_HAS_SADDR;
3420
3421		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3422							       flags);
3423	} else {
3424		fl6.flowi6_oif = oif;
 
 
 
 
 
3425
3426		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3427	}
3428
3429	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3430	if (!skb) {
3431		ip6_rt_put(rt);
3432		err = -ENOBUFS;
3433		goto errout;
3434	}
3435
3436	/* Reserve room for dummy headers, this skb can pass
3437	   through good chunk of routing engine.
3438	 */
3439	skb_reset_mac_header(skb);
3440	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3441
3442	skb_dst_set(skb, &rt->dst);
3443
3444	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3445			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3446			    nlh->nlmsg_seq, 0, 0, 0);
3447	if (err < 0) {
3448		kfree_skb(skb);
3449		goto errout;
3450	}
3451
3452	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3453errout:
3454	return err;
3455}
3456
3457void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3458		     unsigned int nlm_flags)
3459{
3460	struct sk_buff *skb;
3461	struct net *net = info->nl_net;
3462	u32 seq;
3463	int err;
3464
3465	err = -ENOBUFS;
3466	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3467
3468	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3469	if (!skb)
3470		goto errout;
3471
3472	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3473				event, info->portid, seq, 0, 0, nlm_flags);
3474	if (err < 0) {
3475		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3476		WARN_ON(err == -EMSGSIZE);
3477		kfree_skb(skb);
3478		goto errout;
3479	}
3480	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3481		    info->nlh, gfp_any());
3482	return;
3483errout:
3484	if (err < 0)
3485		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3486}
3487
3488static int ip6_route_dev_notify(struct notifier_block *this,
3489				unsigned long event, void *ptr)
3490{
3491	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3492	struct net *net = dev_net(dev);
3493
3494	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3495		net->ipv6.ip6_null_entry->dst.dev = dev;
3496		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3497#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3498		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3499		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3500		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3501		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3502#endif
3503	}
3504
3505	return NOTIFY_OK;
3506}
3507
3508/*
3509 *	/proc
3510 */
3511
3512#ifdef CONFIG_PROC_FS
3513
3514static const struct file_operations ipv6_route_proc_fops = {
3515	.owner		= THIS_MODULE,
3516	.open		= ipv6_route_open,
3517	.read		= seq_read,
3518	.llseek		= seq_lseek,
3519	.release	= seq_release_net,
3520};
3521
3522static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3523{
3524	struct net *net = (struct net *)seq->private;
3525	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3526		   net->ipv6.rt6_stats->fib_nodes,
3527		   net->ipv6.rt6_stats->fib_route_nodes,
3528		   net->ipv6.rt6_stats->fib_rt_alloc,
3529		   net->ipv6.rt6_stats->fib_rt_entries,
3530		   net->ipv6.rt6_stats->fib_rt_cache,
3531		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3532		   net->ipv6.rt6_stats->fib_discarded_routes);
3533
3534	return 0;
3535}
3536
3537static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3538{
3539	return single_open_net(inode, file, rt6_stats_seq_show);
3540}
3541
3542static const struct file_operations rt6_stats_seq_fops = {
3543	.owner	 = THIS_MODULE,
3544	.open	 = rt6_stats_seq_open,
3545	.read	 = seq_read,
3546	.llseek	 = seq_lseek,
3547	.release = single_release_net,
3548};
3549#endif	/* CONFIG_PROC_FS */
3550
3551#ifdef CONFIG_SYSCTL
3552
3553static
3554int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3555			      void __user *buffer, size_t *lenp, loff_t *ppos)
3556{
3557	struct net *net;
3558	int delay;
3559	if (!write)
3560		return -EINVAL;
3561
3562	net = (struct net *)ctl->extra1;
3563	delay = net->ipv6.sysctl.flush_delay;
3564	proc_dointvec(ctl, write, buffer, lenp, ppos);
3565	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3566	return 0;
3567}
3568
3569struct ctl_table ipv6_route_table_template[] = {
3570	{
3571		.procname	=	"flush",
3572		.data		=	&init_net.ipv6.sysctl.flush_delay,
3573		.maxlen		=	sizeof(int),
3574		.mode		=	0200,
3575		.proc_handler	=	ipv6_sysctl_rtcache_flush
3576	},
3577	{
3578		.procname	=	"gc_thresh",
3579		.data		=	&ip6_dst_ops_template.gc_thresh,
3580		.maxlen		=	sizeof(int),
3581		.mode		=	0644,
3582		.proc_handler	=	proc_dointvec,
3583	},
3584	{
3585		.procname	=	"max_size",
3586		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3587		.maxlen		=	sizeof(int),
3588		.mode		=	0644,
3589		.proc_handler	=	proc_dointvec,
3590	},
3591	{
3592		.procname	=	"gc_min_interval",
3593		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3594		.maxlen		=	sizeof(int),
3595		.mode		=	0644,
3596		.proc_handler	=	proc_dointvec_jiffies,
3597	},
3598	{
3599		.procname	=	"gc_timeout",
3600		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3601		.maxlen		=	sizeof(int),
3602		.mode		=	0644,
3603		.proc_handler	=	proc_dointvec_jiffies,
3604	},
3605	{
3606		.procname	=	"gc_interval",
3607		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3608		.maxlen		=	sizeof(int),
3609		.mode		=	0644,
3610		.proc_handler	=	proc_dointvec_jiffies,
3611	},
3612	{
3613		.procname	=	"gc_elasticity",
3614		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3615		.maxlen		=	sizeof(int),
3616		.mode		=	0644,
3617		.proc_handler	=	proc_dointvec,
3618	},
3619	{
3620		.procname	=	"mtu_expires",
3621		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3622		.maxlen		=	sizeof(int),
3623		.mode		=	0644,
3624		.proc_handler	=	proc_dointvec_jiffies,
3625	},
3626	{
3627		.procname	=	"min_adv_mss",
3628		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3629		.maxlen		=	sizeof(int),
3630		.mode		=	0644,
3631		.proc_handler	=	proc_dointvec,
3632	},
3633	{
3634		.procname	=	"gc_min_interval_ms",
3635		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3636		.maxlen		=	sizeof(int),
3637		.mode		=	0644,
3638		.proc_handler	=	proc_dointvec_ms_jiffies,
3639	},
3640	{ }
3641};
3642
3643struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3644{
3645	struct ctl_table *table;
3646
3647	table = kmemdup(ipv6_route_table_template,
3648			sizeof(ipv6_route_table_template),
3649			GFP_KERNEL);
3650
3651	if (table) {
3652		table[0].data = &net->ipv6.sysctl.flush_delay;
3653		table[0].extra1 = net;
3654		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3655		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3656		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3657		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3658		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3659		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3660		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3661		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3662		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3663
3664		/* Don't export sysctls to unprivileged users */
3665		if (net->user_ns != &init_user_ns)
3666			table[0].procname = NULL;
3667	}
3668
3669	return table;
3670}
3671#endif
3672
3673static int __net_init ip6_route_net_init(struct net *net)
3674{
3675	int ret = -ENOMEM;
3676
3677	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3678	       sizeof(net->ipv6.ip6_dst_ops));
3679
3680	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3681		goto out_ip6_dst_ops;
3682
3683	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3684					   sizeof(*net->ipv6.ip6_null_entry),
3685					   GFP_KERNEL);
3686	if (!net->ipv6.ip6_null_entry)
3687		goto out_ip6_dst_entries;
3688	net->ipv6.ip6_null_entry->dst.path =
3689		(struct dst_entry *)net->ipv6.ip6_null_entry;
3690	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3691	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3692			 ip6_template_metrics, true);
3693
3694#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3695	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3696					       sizeof(*net->ipv6.ip6_prohibit_entry),
3697					       GFP_KERNEL);
3698	if (!net->ipv6.ip6_prohibit_entry)
3699		goto out_ip6_null_entry;
3700	net->ipv6.ip6_prohibit_entry->dst.path =
3701		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3702	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3703	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3704			 ip6_template_metrics, true);
3705
3706	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3707					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3708					       GFP_KERNEL);
3709	if (!net->ipv6.ip6_blk_hole_entry)
3710		goto out_ip6_prohibit_entry;
3711	net->ipv6.ip6_blk_hole_entry->dst.path =
3712		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3713	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3714	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3715			 ip6_template_metrics, true);
3716#endif
3717
3718	net->ipv6.sysctl.flush_delay = 0;
3719	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3720	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3721	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3722	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3723	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3724	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3725	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3726
3727	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3728
3729	ret = 0;
3730out:
3731	return ret;
3732
3733#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3734out_ip6_prohibit_entry:
3735	kfree(net->ipv6.ip6_prohibit_entry);
3736out_ip6_null_entry:
3737	kfree(net->ipv6.ip6_null_entry);
3738#endif
3739out_ip6_dst_entries:
3740	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3741out_ip6_dst_ops:
3742	goto out;
3743}
3744
3745static void __net_exit ip6_route_net_exit(struct net *net)
3746{
3747	kfree(net->ipv6.ip6_null_entry);
3748#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3749	kfree(net->ipv6.ip6_prohibit_entry);
3750	kfree(net->ipv6.ip6_blk_hole_entry);
3751#endif
3752	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3753}
3754
3755static int __net_init ip6_route_net_init_late(struct net *net)
3756{
3757#ifdef CONFIG_PROC_FS
3758	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3759	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3760#endif
3761	return 0;
3762}
3763
3764static void __net_exit ip6_route_net_exit_late(struct net *net)
3765{
3766#ifdef CONFIG_PROC_FS
3767	remove_proc_entry("ipv6_route", net->proc_net);
3768	remove_proc_entry("rt6_stats", net->proc_net);
3769#endif
3770}
3771
3772static struct pernet_operations ip6_route_net_ops = {
3773	.init = ip6_route_net_init,
3774	.exit = ip6_route_net_exit,
3775};
3776
3777static int __net_init ipv6_inetpeer_init(struct net *net)
3778{
3779	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3780
3781	if (!bp)
3782		return -ENOMEM;
3783	inet_peer_base_init(bp);
3784	net->ipv6.peers = bp;
3785	return 0;
3786}
3787
3788static void __net_exit ipv6_inetpeer_exit(struct net *net)
3789{
3790	struct inet_peer_base *bp = net->ipv6.peers;
3791
3792	net->ipv6.peers = NULL;
3793	inetpeer_invalidate_tree(bp);
3794	kfree(bp);
3795}
3796
3797static struct pernet_operations ipv6_inetpeer_ops = {
3798	.init	=	ipv6_inetpeer_init,
3799	.exit	=	ipv6_inetpeer_exit,
3800};
3801
3802static struct pernet_operations ip6_route_net_late_ops = {
3803	.init = ip6_route_net_init_late,
3804	.exit = ip6_route_net_exit_late,
3805};
3806
3807static struct notifier_block ip6_route_dev_notifier = {
3808	.notifier_call = ip6_route_dev_notify,
3809	.priority = 0,
3810};
3811
3812int __init ip6_route_init(void)
3813{
3814	int ret;
3815	int cpu;
3816
3817	ret = -ENOMEM;
3818	ip6_dst_ops_template.kmem_cachep =
3819		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3820				  SLAB_HWCACHE_ALIGN, NULL);
3821	if (!ip6_dst_ops_template.kmem_cachep)
3822		goto out;
3823
3824	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3825	if (ret)
3826		goto out_kmem_cache;
3827
3828	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3829	if (ret)
3830		goto out_dst_entries;
3831
3832	ret = register_pernet_subsys(&ip6_route_net_ops);
3833	if (ret)
3834		goto out_register_inetpeer;
3835
3836	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3837
3838	/* Registering of the loopback is done before this portion of code,
3839	 * the loopback reference in rt6_info will not be taken, do it
3840	 * manually for init_net */
3841	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3842	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3843  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3844	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3845	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3846	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3847	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3848  #endif
3849	ret = fib6_init();
3850	if (ret)
3851		goto out_register_subsys;
3852
3853	ret = xfrm6_init();
3854	if (ret)
3855		goto out_fib6_init;
3856
3857	ret = fib6_rules_init();
3858	if (ret)
3859		goto xfrm6_init;
3860
3861	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3862	if (ret)
3863		goto fib6_rules_init;
3864
3865	ret = -ENOBUFS;
3866	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3867	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3868	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3869		goto out_register_late_subsys;
3870
3871	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3872	if (ret)
3873		goto out_register_late_subsys;
3874
3875	for_each_possible_cpu(cpu) {
3876		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3877
3878		INIT_LIST_HEAD(&ul->head);
3879		spin_lock_init(&ul->lock);
3880	}
3881
3882out:
3883	return ret;
3884
3885out_register_late_subsys:
3886	unregister_pernet_subsys(&ip6_route_net_late_ops);
3887fib6_rules_init:
3888	fib6_rules_cleanup();
3889xfrm6_init:
3890	xfrm6_fini();
3891out_fib6_init:
3892	fib6_gc_cleanup();
3893out_register_subsys:
3894	unregister_pernet_subsys(&ip6_route_net_ops);
3895out_register_inetpeer:
3896	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3897out_dst_entries:
3898	dst_entries_destroy(&ip6_dst_blackhole_ops);
3899out_kmem_cache:
3900	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3901	goto out;
3902}
3903
3904void ip6_route_cleanup(void)
3905{
3906	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3907	unregister_pernet_subsys(&ip6_route_net_late_ops);
3908	fib6_rules_cleanup();
3909	xfrm6_fini();
3910	fib6_gc_cleanup();
3911	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3912	unregister_pernet_subsys(&ip6_route_net_ops);
3913	dst_entries_destroy(&ip6_dst_blackhole_ops);
3914	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3915}