Linux Audio

Check our new training course

Loading...
v4.6
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/dst_metadata.h>
  58#include <net/xfrm.h>
  59#include <net/netevent.h>
  60#include <net/netlink.h>
  61#include <net/nexthop.h>
  62#include <net/lwtunnel.h>
  63#include <net/ip_tunnels.h>
  64#include <net/l3mdev.h>
  65#include <trace/events/fib6.h>
  66
  67#include <asm/uaccess.h>
  68
  69#ifdef CONFIG_SYSCTL
  70#include <linux/sysctl.h>
  71#endif
  72
  73enum rt6_nud_state {
  74	RT6_NUD_FAIL_HARD = -3,
  75	RT6_NUD_FAIL_PROBE = -2,
  76	RT6_NUD_FAIL_DO_RR = -1,
  77	RT6_NUD_SUCCEED = 1
  78};
  79
  80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
  81static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  82static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  83static unsigned int	 ip6_mtu(const struct dst_entry *dst);
  84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  85static void		ip6_dst_destroy(struct dst_entry *);
  86static void		ip6_dst_ifdown(struct dst_entry *,
  87				       struct net_device *dev, int how);
  88static int		 ip6_dst_gc(struct dst_ops *ops);
  89
  90static int		ip6_pkt_discard(struct sk_buff *skb);
  91static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  92static int		ip6_pkt_prohibit(struct sk_buff *skb);
  93static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  94static void		ip6_link_failure(struct sk_buff *skb);
  95static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  96					   struct sk_buff *skb, u32 mtu);
  97static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  98					struct sk_buff *skb);
  99static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
 101
 102#ifdef CONFIG_IPV6_ROUTE_INFO
 103static struct rt6_info *rt6_add_route_info(struct net *net,
 104					   const struct in6_addr *prefix, int prefixlen,
 105					   const struct in6_addr *gwaddr, int ifindex,
 106					   unsigned int pref);
 107static struct rt6_info *rt6_get_route_info(struct net *net,
 108					   const struct in6_addr *prefix, int prefixlen,
 109					   const struct in6_addr *gwaddr, int ifindex);
 110#endif
 111
 112struct uncached_list {
 113	spinlock_t		lock;
 114	struct list_head	head;
 115};
 116
 117static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 118
 119static void rt6_uncached_list_add(struct rt6_info *rt)
 120{
 121	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 122
 123	rt->dst.flags |= DST_NOCACHE;
 124	rt->rt6i_uncached_list = ul;
 125
 126	spin_lock_bh(&ul->lock);
 127	list_add_tail(&rt->rt6i_uncached, &ul->head);
 128	spin_unlock_bh(&ul->lock);
 129}
 130
 131static void rt6_uncached_list_del(struct rt6_info *rt)
 132{
 133	if (!list_empty(&rt->rt6i_uncached)) {
 134		struct uncached_list *ul = rt->rt6i_uncached_list;
 135
 136		spin_lock_bh(&ul->lock);
 137		list_del(&rt->rt6i_uncached);
 138		spin_unlock_bh(&ul->lock);
 139	}
 140}
 141
 142static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 143{
 144	struct net_device *loopback_dev = net->loopback_dev;
 145	int cpu;
 146
 147	if (dev == loopback_dev)
 148		return;
 
 149
 150	for_each_possible_cpu(cpu) {
 151		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 152		struct rt6_info *rt;
 153
 154		spin_lock_bh(&ul->lock);
 155		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
 156			struct inet6_dev *rt_idev = rt->rt6i_idev;
 157			struct net_device *rt_dev = rt->dst.dev;
 158
 159			if (rt_idev->dev == dev) {
 160				rt->rt6i_idev = in6_dev_get(loopback_dev);
 161				in6_dev_put(rt_idev);
 162			}
 163
 164			if (rt_dev == dev) {
 165				rt->dst.dev = loopback_dev;
 166				dev_hold(rt->dst.dev);
 167				dev_put(rt_dev);
 168			}
 169		}
 170		spin_unlock_bh(&ul->lock);
 171	}
 
 172}
 173
 174static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
 175{
 176	return dst_metrics_write_ptr(rt->dst.from);
 177}
 178
 179static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 180{
 181	struct rt6_info *rt = (struct rt6_info *)dst;
 182
 183	if (rt->rt6i_flags & RTF_PCPU)
 184		return rt6_pcpu_cow_metrics(rt);
 185	else if (rt->rt6i_flags & RTF_CACHE)
 186		return NULL;
 187	else
 188		return dst_cow_metrics_generic(dst, old);
 189}
 190
 191static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 192					     struct sk_buff *skb,
 193					     const void *daddr)
 194{
 195	struct in6_addr *p = &rt->rt6i_gateway;
 196
 197	if (!ipv6_addr_any(p))
 198		return (const void *) p;
 199	else if (skb)
 200		return &ipv6_hdr(skb)->daddr;
 201	return daddr;
 202}
 203
 204static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 205					  struct sk_buff *skb,
 206					  const void *daddr)
 207{
 208	struct rt6_info *rt = (struct rt6_info *) dst;
 209	struct neighbour *n;
 210
 211	daddr = choose_neigh_daddr(rt, skb, daddr);
 212	n = __ipv6_neigh_lookup(dst->dev, daddr);
 213	if (n)
 214		return n;
 215	return neigh_create(&nd_tbl, daddr, dst->dev);
 216}
 217
 
 
 
 
 
 
 
 
 
 
 
 
 
 218static struct dst_ops ip6_dst_ops_template = {
 219	.family			=	AF_INET6,
 
 220	.gc			=	ip6_dst_gc,
 221	.gc_thresh		=	1024,
 222	.check			=	ip6_dst_check,
 223	.default_advmss		=	ip6_default_advmss,
 224	.mtu			=	ip6_mtu,
 225	.cow_metrics		=	ipv6_cow_metrics,
 226	.destroy		=	ip6_dst_destroy,
 227	.ifdown			=	ip6_dst_ifdown,
 228	.negative_advice	=	ip6_negative_advice,
 229	.link_failure		=	ip6_link_failure,
 230	.update_pmtu		=	ip6_rt_update_pmtu,
 231	.redirect		=	rt6_do_redirect,
 232	.local_out		=	__ip6_local_out,
 233	.neigh_lookup		=	ip6_neigh_lookup,
 234};
 235
 236static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 237{
 238	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 239
 240	return mtu ? : dst->dev->mtu;
 241}
 242
 243static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 244					 struct sk_buff *skb, u32 mtu)
 245{
 246}
 247
 248static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 249				      struct sk_buff *skb)
 250{
 
 251}
 252
 253static struct dst_ops ip6_dst_blackhole_ops = {
 254	.family			=	AF_INET6,
 
 255	.destroy		=	ip6_dst_destroy,
 256	.check			=	ip6_dst_check,
 257	.mtu			=	ip6_blackhole_mtu,
 258	.default_advmss		=	ip6_default_advmss,
 259	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 260	.redirect		=	ip6_rt_blackhole_redirect,
 261	.cow_metrics		=	dst_cow_metrics_generic,
 262	.neigh_lookup		=	ip6_neigh_lookup,
 263};
 264
 265static const u32 ip6_template_metrics[RTAX_MAX] = {
 266	[RTAX_HOPLIMIT - 1] = 0,
 267};
 268
 269static const struct rt6_info ip6_null_entry_template = {
 270	.dst = {
 271		.__refcnt	= ATOMIC_INIT(1),
 272		.__use		= 1,
 273		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 274		.error		= -ENETUNREACH,
 275		.input		= ip6_pkt_discard,
 276		.output		= ip6_pkt_discard_out,
 277	},
 278	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 279	.rt6i_protocol  = RTPROT_KERNEL,
 280	.rt6i_metric	= ~(u32) 0,
 281	.rt6i_ref	= ATOMIC_INIT(1),
 282};
 283
 284#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 285
 286static const struct rt6_info ip6_prohibit_entry_template = {
 
 
 
 287	.dst = {
 288		.__refcnt	= ATOMIC_INIT(1),
 289		.__use		= 1,
 290		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 291		.error		= -EACCES,
 292		.input		= ip6_pkt_prohibit,
 293		.output		= ip6_pkt_prohibit_out,
 294	},
 295	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 296	.rt6i_protocol  = RTPROT_KERNEL,
 297	.rt6i_metric	= ~(u32) 0,
 298	.rt6i_ref	= ATOMIC_INIT(1),
 299};
 300
 301static const struct rt6_info ip6_blk_hole_entry_template = {
 302	.dst = {
 303		.__refcnt	= ATOMIC_INIT(1),
 304		.__use		= 1,
 305		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 306		.error		= -EINVAL,
 307		.input		= dst_discard,
 308		.output		= dst_discard_out,
 309	},
 310	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 311	.rt6i_protocol  = RTPROT_KERNEL,
 312	.rt6i_metric	= ~(u32) 0,
 313	.rt6i_ref	= ATOMIC_INIT(1),
 314};
 315
 316#endif
 317
 318static void rt6_info_init(struct rt6_info *rt)
 319{
 320	struct dst_entry *dst = &rt->dst;
 321
 322	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 323	INIT_LIST_HEAD(&rt->rt6i_siblings);
 324	INIT_LIST_HEAD(&rt->rt6i_uncached);
 325}
 326
 327/* allocate dst with ip6_dst_ops */
 328static struct rt6_info *__ip6_dst_alloc(struct net *net,
 329					struct net_device *dev,
 330					int flags)
 331{
 332	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 333					0, DST_OBSOLETE_FORCE_CHK, flags);
 334
 335	if (rt)
 336		rt6_info_init(rt);
 337
 338	return rt;
 339}
 340
 341struct rt6_info *ip6_dst_alloc(struct net *net,
 342			       struct net_device *dev,
 343			       int flags)
 344{
 345	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
 346
 347	if (rt) {
 348		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
 349		if (rt->rt6i_pcpu) {
 350			int cpu;
 351
 352			for_each_possible_cpu(cpu) {
 353				struct rt6_info **p;
 354
 355				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
 356				/* no one shares rt */
 357				*p =  NULL;
 358			}
 359		} else {
 360			dst_destroy((struct dst_entry *)rt);
 361			return NULL;
 362		}
 363	}
 364
 365	return rt;
 366}
 367EXPORT_SYMBOL(ip6_dst_alloc);
 368
 369static void ip6_dst_destroy(struct dst_entry *dst)
 370{
 371	struct rt6_info *rt = (struct rt6_info *)dst;
 372	struct dst_entry *from = dst->from;
 373	struct inet6_dev *idev;
 374
 375	dst_destroy_metrics_generic(dst);
 376	free_percpu(rt->rt6i_pcpu);
 377	rt6_uncached_list_del(rt);
 378
 379	idev = rt->rt6i_idev;
 380	if (idev) {
 381		rt->rt6i_idev = NULL;
 382		in6_dev_put(idev);
 383	}
 384
 385	dst->from = NULL;
 386	dst_release(from);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 387}
 388
 389static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 390			   int how)
 391{
 392	struct rt6_info *rt = (struct rt6_info *)dst;
 393	struct inet6_dev *idev = rt->rt6i_idev;
 394	struct net_device *loopback_dev =
 395		dev_net(dev)->loopback_dev;
 396
 397	if (dev != loopback_dev) {
 398		if (idev && idev->dev == dev) {
 399			struct inet6_dev *loopback_idev =
 400				in6_dev_get(loopback_dev);
 401			if (loopback_idev) {
 402				rt->rt6i_idev = loopback_idev;
 403				in6_dev_put(idev);
 404			}
 405		}
 406	}
 407}
 408
 409static bool __rt6_check_expired(const struct rt6_info *rt)
 410{
 411	if (rt->rt6i_flags & RTF_EXPIRES)
 412		return time_after(jiffies, rt->dst.expires);
 413	else
 414		return false;
 415}
 416
 417static bool rt6_check_expired(const struct rt6_info *rt)
 418{
 
 
 419	if (rt->rt6i_flags & RTF_EXPIRES) {
 420		if (time_after(jiffies, rt->dst.expires))
 421			return true;
 422	} else if (rt->dst.from) {
 423		return rt6_check_expired((struct rt6_info *) rt->dst.from);
 
 
 424	}
 425	return false;
 426}
 427
 428/* Multipath route selection:
 429 *   Hash based function using packet header and flowlabel.
 430 * Adapted from fib_info_hashfn()
 431 */
 432static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 433			       const struct flowi6 *fl6)
 434{
 435	return get_hash_from_flowi6(fl6) % candidate_count;
 436}
 437
 438static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 439					     struct flowi6 *fl6, int oif,
 440					     int strict)
 441{
 442	struct rt6_info *sibling, *next_sibling;
 443	int route_choosen;
 444
 445	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
 446	/* Don't change the route, if route_choosen == 0
 447	 * (siblings does not include ourself)
 448	 */
 449	if (route_choosen)
 450		list_for_each_entry_safe(sibling, next_sibling,
 451				&match->rt6i_siblings, rt6i_siblings) {
 452			route_choosen--;
 453			if (route_choosen == 0) {
 454				if (rt6_score_route(sibling, oif, strict) < 0)
 455					break;
 456				match = sibling;
 457				break;
 458			}
 459		}
 460	return match;
 461}
 462
 463/*
 464 *	Route lookup. Any table->tb6_lock is implied.
 465 */
 466
 467static inline struct rt6_info *rt6_device_match(struct net *net,
 468						    struct rt6_info *rt,
 469						    const struct in6_addr *saddr,
 470						    int oif,
 471						    int flags)
 472{
 473	struct rt6_info *local = NULL;
 474	struct rt6_info *sprt;
 475
 476	if (!oif && ipv6_addr_any(saddr))
 477		goto out;
 478
 479	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 480		struct net_device *dev = sprt->dst.dev;
 481
 482		if (oif) {
 483			if (dev->ifindex == oif)
 484				return sprt;
 485			if (dev->flags & IFF_LOOPBACK) {
 486				if (!sprt->rt6i_idev ||
 487				    sprt->rt6i_idev->dev->ifindex != oif) {
 488					if (flags & RT6_LOOKUP_F_IFACE)
 489						continue;
 490					if (local &&
 491					    local->rt6i_idev->dev->ifindex == oif)
 492						continue;
 493				}
 494				local = sprt;
 495			}
 496		} else {
 497			if (ipv6_chk_addr(net, saddr, dev,
 498					  flags & RT6_LOOKUP_F_IFACE))
 499				return sprt;
 500		}
 501	}
 502
 503	if (oif) {
 504		if (local)
 505			return local;
 506
 507		if (flags & RT6_LOOKUP_F_IFACE)
 508			return net->ipv6.ip6_null_entry;
 509	}
 510out:
 511	return rt;
 512}
 513
 514#ifdef CONFIG_IPV6_ROUTER_PREF
 515struct __rt6_probe_work {
 516	struct work_struct work;
 517	struct in6_addr target;
 518	struct net_device *dev;
 519};
 520
 521static void rt6_probe_deferred(struct work_struct *w)
 522{
 523	struct in6_addr mcaddr;
 524	struct __rt6_probe_work *work =
 525		container_of(w, struct __rt6_probe_work, work);
 526
 527	addrconf_addr_solict_mult(&work->target, &mcaddr);
 528	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
 529	dev_put(work->dev);
 530	kfree(work);
 531}
 532
 533static void rt6_probe(struct rt6_info *rt)
 534{
 535	struct __rt6_probe_work *work;
 536	struct neighbour *neigh;
 537	/*
 538	 * Okay, this does not seem to be appropriate
 539	 * for now, however, we need to check if it
 540	 * is really so; aka Router Reachability Probing.
 541	 *
 542	 * Router Reachability Probe MUST be rate-limited
 543	 * to no more than one per minute.
 544	 */
 545	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 546		return;
 547	rcu_read_lock_bh();
 548	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 549	if (neigh) {
 550		if (neigh->nud_state & NUD_VALID)
 551			goto out;
 552
 553		work = NULL;
 554		write_lock(&neigh->lock);
 555		if (!(neigh->nud_state & NUD_VALID) &&
 556		    time_after(jiffies,
 557			       neigh->updated +
 558			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
 559			work = kmalloc(sizeof(*work), GFP_ATOMIC);
 560			if (work)
 561				__neigh_set_probe_once(neigh);
 562		}
 563		write_unlock(&neigh->lock);
 564	} else {
 565		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 566	}
 567
 568	if (work) {
 569		INIT_WORK(&work->work, rt6_probe_deferred);
 570		work->target = rt->rt6i_gateway;
 571		dev_hold(rt->dst.dev);
 572		work->dev = rt->dst.dev;
 573		schedule_work(&work->work);
 574	}
 575
 576out:
 577	rcu_read_unlock_bh();
 578}
 579#else
 580static inline void rt6_probe(struct rt6_info *rt)
 581{
 582}
 583#endif
 584
 585/*
 586 * Default Router Selection (RFC 2461 6.3.6)
 587 */
 588static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 589{
 590	struct net_device *dev = rt->dst.dev;
 591	if (!oif || dev->ifindex == oif)
 592		return 2;
 593	if ((dev->flags & IFF_LOOPBACK) &&
 594	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 595		return 1;
 596	return 0;
 597}
 598
 599static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 600{
 601	struct neighbour *neigh;
 602	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 603
 
 
 604	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 605	    !(rt->rt6i_flags & RTF_GATEWAY))
 606		return RT6_NUD_SUCCEED;
 607
 608	rcu_read_lock_bh();
 609	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 610	if (neigh) {
 611		read_lock(&neigh->lock);
 612		if (neigh->nud_state & NUD_VALID)
 613			ret = RT6_NUD_SUCCEED;
 614#ifdef CONFIG_IPV6_ROUTER_PREF
 615		else if (!(neigh->nud_state & NUD_FAILED))
 616			ret = RT6_NUD_SUCCEED;
 617		else
 618			ret = RT6_NUD_FAIL_PROBE;
 619#endif
 620		read_unlock(&neigh->lock);
 621	} else {
 622		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 623		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 624	}
 625	rcu_read_unlock_bh();
 626
 627	return ret;
 628}
 629
 630static int rt6_score_route(struct rt6_info *rt, int oif,
 631			   int strict)
 632{
 633	int m;
 634
 635	m = rt6_check_dev(rt, oif);
 636	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 637		return RT6_NUD_FAIL_HARD;
 638#ifdef CONFIG_IPV6_ROUTER_PREF
 639	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 640#endif
 641	if (strict & RT6_LOOKUP_F_REACHABLE) {
 642		int n = rt6_check_neigh(rt);
 643		if (n < 0)
 644			return n;
 645	}
 646	return m;
 647}
 648
 649static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 650				   int *mpri, struct rt6_info *match,
 651				   bool *do_rr)
 652{
 653	int m;
 654	bool match_do_rr = false;
 655	struct inet6_dev *idev = rt->rt6i_idev;
 656	struct net_device *dev = rt->dst.dev;
 657
 658	if (dev && !netif_carrier_ok(dev) &&
 659	    idev->cnf.ignore_routes_with_linkdown)
 660		goto out;
 661
 662	if (rt6_check_expired(rt))
 663		goto out;
 664
 665	m = rt6_score_route(rt, oif, strict);
 666	if (m == RT6_NUD_FAIL_DO_RR) {
 667		match_do_rr = true;
 668		m = 0; /* lowest valid score */
 669	} else if (m == RT6_NUD_FAIL_HARD) {
 670		goto out;
 671	}
 672
 673	if (strict & RT6_LOOKUP_F_REACHABLE)
 674		rt6_probe(rt);
 675
 676	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
 677	if (m > *mpri) {
 678		*do_rr = match_do_rr;
 
 679		*mpri = m;
 680		match = rt;
 
 
 681	}
 
 682out:
 683	return match;
 684}
 685
 686static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 687				     struct rt6_info *rr_head,
 688				     u32 metric, int oif, int strict,
 689				     bool *do_rr)
 690{
 691	struct rt6_info *rt, *match, *cont;
 692	int mpri = -1;
 693
 694	match = NULL;
 695	cont = NULL;
 696	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
 697		if (rt->rt6i_metric != metric) {
 698			cont = rt;
 699			break;
 700		}
 701
 702		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 703	}
 704
 705	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
 706		if (rt->rt6i_metric != metric) {
 707			cont = rt;
 708			break;
 709		}
 710
 711		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 712	}
 713
 714	if (match || !cont)
 715		return match;
 716
 717	for (rt = cont; rt; rt = rt->dst.rt6_next)
 718		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 719
 720	return match;
 721}
 722
 723static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 724{
 725	struct rt6_info *match, *rt0;
 726	struct net *net;
 727	bool do_rr = false;
 728
 729	rt0 = fn->rr_ptr;
 730	if (!rt0)
 731		fn->rr_ptr = rt0 = fn->leaf;
 732
 733	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
 734			     &do_rr);
 735
 736	if (do_rr) {
 
 737		struct rt6_info *next = rt0->dst.rt6_next;
 738
 739		/* no entries matched; do round-robin */
 740		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 741			next = fn->leaf;
 742
 743		if (next != rt0)
 744			fn->rr_ptr = next;
 745	}
 746
 747	net = dev_net(rt0->dst.dev);
 748	return match ? match : net->ipv6.ip6_null_entry;
 749}
 750
 751static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
 752{
 753	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
 754}
 755
 756#ifdef CONFIG_IPV6_ROUTE_INFO
 757int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 758		  const struct in6_addr *gwaddr)
 759{
 760	struct net *net = dev_net(dev);
 761	struct route_info *rinfo = (struct route_info *) opt;
 762	struct in6_addr prefix_buf, *prefix;
 763	unsigned int pref;
 764	unsigned long lifetime;
 765	struct rt6_info *rt;
 766
 767	if (len < sizeof(struct route_info)) {
 768		return -EINVAL;
 769	}
 770
 771	/* Sanity check for prefix_len and length */
 772	if (rinfo->length > 3) {
 773		return -EINVAL;
 774	} else if (rinfo->prefix_len > 128) {
 775		return -EINVAL;
 776	} else if (rinfo->prefix_len > 64) {
 777		if (rinfo->length < 2) {
 778			return -EINVAL;
 779		}
 780	} else if (rinfo->prefix_len > 0) {
 781		if (rinfo->length < 1) {
 782			return -EINVAL;
 783		}
 784	}
 785
 786	pref = rinfo->route_pref;
 787	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 788		return -EINVAL;
 789
 790	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 791
 792	if (rinfo->length == 3)
 793		prefix = (struct in6_addr *)rinfo->prefix;
 794	else {
 795		/* this function is safe */
 796		ipv6_addr_prefix(&prefix_buf,
 797				 (struct in6_addr *)rinfo->prefix,
 798				 rinfo->prefix_len);
 799		prefix = &prefix_buf;
 800	}
 801
 802	if (rinfo->prefix_len == 0)
 803		rt = rt6_get_dflt_router(gwaddr, dev);
 804	else
 805		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 806					gwaddr, dev->ifindex);
 807
 808	if (rt && !lifetime) {
 809		ip6_del_rt(rt);
 810		rt = NULL;
 811	}
 812
 813	if (!rt && lifetime)
 814		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 815					pref);
 816	else if (rt)
 817		rt->rt6i_flags = RTF_ROUTEINFO |
 818				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 819
 820	if (rt) {
 821		if (!addrconf_finite_timeout(lifetime))
 822			rt6_clean_expires(rt);
 823		else
 824			rt6_set_expires(rt, jiffies + HZ * lifetime);
 825
 826		ip6_rt_put(rt);
 827	}
 828	return 0;
 829}
 830#endif
 831
 832static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
 833					struct in6_addr *saddr)
 834{
 835	struct fib6_node *pn;
 836	while (1) {
 837		if (fn->fn_flags & RTN_TL_ROOT)
 838			return NULL;
 839		pn = fn->parent;
 840		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
 841			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
 842		else
 843			fn = pn;
 844		if (fn->fn_flags & RTN_RTINFO)
 845			return fn;
 846	}
 847}
 
 848
 849static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 850					     struct fib6_table *table,
 851					     struct flowi6 *fl6, int flags)
 852{
 853	struct fib6_node *fn;
 854	struct rt6_info *rt;
 855
 856	read_lock_bh(&table->tb6_lock);
 857	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 858restart:
 859	rt = fn->leaf;
 860	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 861	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
 862		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
 863	if (rt == net->ipv6.ip6_null_entry) {
 864		fn = fib6_backtrack(fn, &fl6->saddr);
 865		if (fn)
 866			goto restart;
 867	}
 868	dst_use(&rt->dst, jiffies);
 869	read_unlock_bh(&table->tb6_lock);
 870
 871	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
 872
 873	return rt;
 874
 875}
 876
 877struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 878				    int flags)
 879{
 880	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 881}
 882EXPORT_SYMBOL_GPL(ip6_route_lookup);
 883
 884struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 885			    const struct in6_addr *saddr, int oif, int strict)
 886{
 887	struct flowi6 fl6 = {
 888		.flowi6_oif = oif,
 889		.daddr = *daddr,
 890	};
 891	struct dst_entry *dst;
 892	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 893
 894	if (saddr) {
 895		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 896		flags |= RT6_LOOKUP_F_HAS_SADDR;
 897	}
 898
 899	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 900	if (dst->error == 0)
 901		return (struct rt6_info *) dst;
 902
 903	dst_release(dst);
 904
 905	return NULL;
 906}
 
 907EXPORT_SYMBOL(rt6_lookup);
 908
 909/* ip6_ins_rt is called with FREE table->tb6_lock.
 910   It takes new route entry, the addition fails by any reason the
 911   route is freed. In any case, if caller does not hold it, it may
 912   be destroyed.
 913 */
 914
 915static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 916			struct mx6_config *mxc)
 917{
 918	int err;
 919	struct fib6_table *table;
 920
 921	table = rt->rt6i_table;
 922	write_lock_bh(&table->tb6_lock);
 923	err = fib6_add(&table->tb6_root, rt, info, mxc);
 924	write_unlock_bh(&table->tb6_lock);
 925
 926	return err;
 927}
 928
 929int ip6_ins_rt(struct rt6_info *rt)
 930{
 931	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
 932	struct mx6_config mxc = { .mx = NULL, };
 933
 934	return __ip6_ins_rt(rt, &info, &mxc);
 935}
 936
 937static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 938					   const struct in6_addr *daddr,
 939					   const struct in6_addr *saddr)
 940{
 941	struct rt6_info *rt;
 942
 943	/*
 944	 *	Clone the route.
 945	 */
 946
 947	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
 948		ort = (struct rt6_info *)ort->dst.from;
 949
 950	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
 
 951
 952	if (!rt)
 953		return NULL;
 
 
 
 
 954
 955	ip6_rt_copy_init(rt, ort);
 956	rt->rt6i_flags |= RTF_CACHE;
 957	rt->rt6i_metric = 0;
 958	rt->dst.flags |= DST_HOST;
 959	rt->rt6i_dst.addr = *daddr;
 960	rt->rt6i_dst.plen = 128;
 961
 962	if (!rt6_is_gw_or_nonexthop(ort)) {
 963		if (ort->rt6i_dst.plen != 128 &&
 964		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 965			rt->rt6i_flags |= RTF_ANYCAST;
 966#ifdef CONFIG_IPV6_SUBTREES
 967		if (rt->rt6i_src.plen && saddr) {
 968			rt->rt6i_src.addr = *saddr;
 969			rt->rt6i_src.plen = 128;
 970		}
 971#endif
 972	}
 973
 974	return rt;
 975}
 976
 977static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 978{
 979	struct rt6_info *pcpu_rt;
 980
 981	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
 982				  rt->dst.dev, rt->dst.flags);
 983
 984	if (!pcpu_rt)
 985		return NULL;
 986	ip6_rt_copy_init(pcpu_rt, rt);
 987	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
 988	pcpu_rt->rt6i_flags |= RTF_PCPU;
 989	return pcpu_rt;
 990}
 991
 992/* It should be called with read_lock_bh(&tb6_lock) acquired */
 993static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 994{
 995	struct rt6_info *pcpu_rt, **p;
 996
 997	p = this_cpu_ptr(rt->rt6i_pcpu);
 998	pcpu_rt = *p;
 999
1000	if (pcpu_rt) {
1001		dst_hold(&pcpu_rt->dst);
1002		rt6_dst_from_metrics_check(pcpu_rt);
 
1003	}
1004	return pcpu_rt;
 
1005}
1006
1007static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
 
1008{
1009	struct fib6_table *table = rt->rt6i_table;
1010	struct rt6_info *pcpu_rt, *prev, **p;
1011
1012	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1013	if (!pcpu_rt) {
1014		struct net *net = dev_net(rt->dst.dev);
1015
1016		dst_hold(&net->ipv6.ip6_null_entry->dst);
1017		return net->ipv6.ip6_null_entry;
1018	}
1019
1020	read_lock_bh(&table->tb6_lock);
1021	if (rt->rt6i_pcpu) {
1022		p = this_cpu_ptr(rt->rt6i_pcpu);
1023		prev = cmpxchg(p, NULL, pcpu_rt);
1024		if (prev) {
1025			/* If someone did it before us, return prev instead */
1026			dst_destroy(&pcpu_rt->dst);
1027			pcpu_rt = prev;
1028		}
1029	} else {
1030		/* rt has been removed from the fib6 tree
1031		 * before we have a chance to acquire the read_lock.
1032		 * In this case, don't brother to create a pcpu rt
1033		 * since rt is going away anyway.  The next
1034		 * dst_check() will trigger a re-lookup.
1035		 */
1036		dst_destroy(&pcpu_rt->dst);
1037		pcpu_rt = rt;
1038	}
1039	dst_hold(&pcpu_rt->dst);
1040	rt6_dst_from_metrics_check(pcpu_rt);
1041	read_unlock_bh(&table->tb6_lock);
1042	return pcpu_rt;
1043}
1044
1045static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1046				      struct flowi6 *fl6, int flags)
1047{
1048	struct fib6_node *fn, *saved_fn;
1049	struct rt6_info *rt;
1050	int strict = 0;
 
 
 
1051
1052	strict |= flags & RT6_LOOKUP_F_IFACE;
1053	if (net->ipv6.devconf_all->forwarding == 0)
1054		strict |= RT6_LOOKUP_F_REACHABLE;
1055
 
1056	read_lock_bh(&table->tb6_lock);
1057
 
1058	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1059	saved_fn = fn;
1060
1061	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1062		oif = 0;
1063
1064redo_rt6_select:
1065	rt = rt6_select(fn, oif, strict);
1066	if (rt->rt6i_nsiblings)
1067		rt = rt6_multipath_select(rt, fl6, oif, strict);
1068	if (rt == net->ipv6.ip6_null_entry) {
1069		fn = fib6_backtrack(fn, &fl6->saddr);
1070		if (fn)
1071			goto redo_rt6_select;
1072		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1073			/* also consider unreachable route */
1074			strict &= ~RT6_LOOKUP_F_REACHABLE;
1075			fn = saved_fn;
1076			goto redo_rt6_select;
1077		}
1078	}
1079
1080
1081	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1082		dst_use(&rt->dst, jiffies);
1083		read_unlock_bh(&table->tb6_lock);
1084
1085		rt6_dst_from_metrics_check(rt);
1086
1087		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1088		return rt;
1089	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1090			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1091		/* Create a RTF_CACHE clone which will not be
1092		 * owned by the fib6 tree.  It is for the special case where
1093		 * the daddr in the skb during the neighbor look-up is different
1094		 * from the fl6->daddr used to look-up route here.
1095		 */
1096
1097		struct rt6_info *uncached_rt;
1098
1099		dst_use(&rt->dst, jiffies);
1100		read_unlock_bh(&table->tb6_lock);
1101
1102		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1103		dst_release(&rt->dst);
1104
1105		if (uncached_rt)
1106			rt6_uncached_list_add(uncached_rt);
1107		else
1108			uncached_rt = net->ipv6.ip6_null_entry;
1109
1110		dst_hold(&uncached_rt->dst);
 
 
 
1111
1112		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1113		return uncached_rt;
1114
1115	} else {
1116		/* Get a percpu copy */
 
 
 
 
1117
1118		struct rt6_info *pcpu_rt;
 
1119
1120		rt->dst.lastuse = jiffies;
1121		rt->dst.__use++;
1122		pcpu_rt = rt6_get_pcpu_route(rt);
 
 
 
1123
1124		if (pcpu_rt) {
1125			read_unlock_bh(&table->tb6_lock);
1126		} else {
1127			/* We have to do the read_unlock first
1128			 * because rt6_make_pcpu_route() may trigger
1129			 * ip6_dst_gc() which will take the write_lock.
1130			 */
1131			dst_hold(&rt->dst);
1132			read_unlock_bh(&table->tb6_lock);
1133			pcpu_rt = rt6_make_pcpu_route(rt);
1134			dst_release(&rt->dst);
1135		}
1136
1137		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1138		return pcpu_rt;
 
 
 
 
1139
 
 
 
 
1140	}
 
 
 
 
 
 
 
1141}
1142
1143static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1144					    struct flowi6 *fl6, int flags)
1145{
1146	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1147}
1148
1149static struct dst_entry *ip6_route_input_lookup(struct net *net,
1150						struct net_device *dev,
1151						struct flowi6 *fl6, int flags)
1152{
1153	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1154		flags |= RT6_LOOKUP_F_IFACE;
1155
1156	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1157}
1158
1159void ip6_route_input(struct sk_buff *skb)
1160{
1161	const struct ipv6hdr *iph = ipv6_hdr(skb);
1162	struct net *net = dev_net(skb->dev);
1163	int flags = RT6_LOOKUP_F_HAS_SADDR;
1164	struct ip_tunnel_info *tun_info;
1165	struct flowi6 fl6 = {
1166		.flowi6_iif = l3mdev_fib_oif(skb->dev),
1167		.daddr = iph->daddr,
1168		.saddr = iph->saddr,
1169		.flowlabel = ip6_flowinfo(iph),
1170		.flowi6_mark = skb->mark,
1171		.flowi6_proto = iph->nexthdr,
1172	};
1173
1174	tun_info = skb_tunnel_info(skb);
1175	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1176		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1177	skb_dst_drop(skb);
1178	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1179}
1180
1181static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1182					     struct flowi6 *fl6, int flags)
1183{
1184	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1185}
1186
1187struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1188					 struct flowi6 *fl6, int flags)
1189{
1190	struct dst_entry *dst;
1191	bool any_src;
1192
1193	dst = l3mdev_rt6_dst_by_oif(net, fl6);
1194	if (dst)
1195		return dst;
1196
1197	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1198
1199	any_src = ipv6_addr_any(&fl6->saddr);
1200	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1201	    (fl6->flowi6_oif && any_src))
1202		flags |= RT6_LOOKUP_F_IFACE;
1203
1204	if (!any_src)
1205		flags |= RT6_LOOKUP_F_HAS_SADDR;
1206	else if (sk)
1207		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1208
1209	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1210}
1211EXPORT_SYMBOL_GPL(ip6_route_output_flags);
 
1212
1213struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1214{
1215	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1216	struct dst_entry *new = NULL;
1217
1218	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1219	if (rt) {
1220		rt6_info_init(rt);
1221
1222		new = &rt->dst;
 
1223		new->__use = 1;
1224		new->input = dst_discard;
1225		new->output = dst_discard_out;
1226
1227		dst_copy_metrics(new, &ort->dst);
 
 
 
1228		rt->rt6i_idev = ort->rt6i_idev;
1229		if (rt->rt6i_idev)
1230			in6_dev_hold(rt->rt6i_idev);
1231
1232		rt->rt6i_gateway = ort->rt6i_gateway;
1233		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
 
1234		rt->rt6i_metric = 0;
1235
1236		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1237#ifdef CONFIG_IPV6_SUBTREES
1238		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1239#endif
1240
1241		dst_free(new);
1242	}
1243
1244	dst_release(dst_orig);
1245	return new ? new : ERR_PTR(-ENOMEM);
1246}
1247
1248/*
1249 *	Destination cache support functions
1250 */
1251
1252static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1253{
1254	if (rt->dst.from &&
1255	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1256		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1257}
1258
1259static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1260{
1261	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1262		return NULL;
1263
1264	if (rt6_check_expired(rt))
1265		return NULL;
1266
1267	return &rt->dst;
1268}
1269
1270static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1271{
1272	if (!__rt6_check_expired(rt) &&
1273	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1274	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1275		return &rt->dst;
1276	else
1277		return NULL;
1278}
1279
1280static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1281{
1282	struct rt6_info *rt;
1283
1284	rt = (struct rt6_info *) dst;
1285
1286	/* All IPV6 dsts are created with ->obsolete set to the value
1287	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288	 * into this function always.
1289	 */
1290
1291	rt6_dst_from_metrics_check(rt);
1292
1293	if (rt->rt6i_flags & RTF_PCPU ||
1294	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1295		return rt6_dst_from_check(rt, cookie);
1296	else
1297		return rt6_check(rt, cookie);
1298}
1299
1300static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1301{
1302	struct rt6_info *rt = (struct rt6_info *) dst;
1303
1304	if (rt) {
1305		if (rt->rt6i_flags & RTF_CACHE) {
1306			if (rt6_check_expired(rt)) {
1307				ip6_del_rt(rt);
1308				dst = NULL;
1309			}
1310		} else {
1311			dst_release(dst);
1312			dst = NULL;
1313		}
1314	}
1315	return dst;
1316}
1317
1318static void ip6_link_failure(struct sk_buff *skb)
1319{
1320	struct rt6_info *rt;
1321
1322	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1323
1324	rt = (struct rt6_info *) skb_dst(skb);
1325	if (rt) {
1326		if (rt->rt6i_flags & RTF_CACHE) {
1327			dst_hold(&rt->dst);
1328			ip6_del_rt(rt);
1329		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1330			rt->rt6i_node->fn_sernum = -1;
1331		}
1332	}
1333}
1334
1335static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1336{
1337	struct net *net = dev_net(rt->dst.dev);
1338
1339	rt->rt6i_flags |= RTF_MODIFIED;
1340	rt->rt6i_pmtu = mtu;
1341	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1342}
1343
1344static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1345{
1346	return !(rt->rt6i_flags & RTF_CACHE) &&
1347		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1348}
1349
1350static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1351				 const struct ipv6hdr *iph, u32 mtu)
1352{
1353	struct rt6_info *rt6 = (struct rt6_info *)dst;
1354
1355	if (rt6->rt6i_flags & RTF_LOCAL)
1356		return;
1357
1358	dst_confirm(dst);
1359	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1360	if (mtu >= dst_mtu(dst))
1361		return;
1362
1363	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1364		rt6_do_update_pmtu(rt6, mtu);
1365	} else {
1366		const struct in6_addr *daddr, *saddr;
1367		struct rt6_info *nrt6;
1368
1369		if (iph) {
1370			daddr = &iph->daddr;
1371			saddr = &iph->saddr;
1372		} else if (sk) {
1373			daddr = &sk->sk_v6_daddr;
1374			saddr = &inet6_sk(sk)->saddr;
1375		} else {
1376			return;
1377		}
1378		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1379		if (nrt6) {
1380			rt6_do_update_pmtu(nrt6, mtu);
1381
1382			/* ip6_ins_rt(nrt6) will bump the
1383			 * rt6->rt6i_node->fn_sernum
1384			 * which will fail the next rt6_check() and
1385			 * invalidate the sk->sk_dst_cache.
1386			 */
1387			ip6_ins_rt(nrt6);
1388		}
1389	}
1390}
1391
1392static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1393			       struct sk_buff *skb, u32 mtu)
1394{
1395	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1396}
1397
1398void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1399		     int oif, u32 mark)
1400{
1401	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1402	struct dst_entry *dst;
1403	struct flowi6 fl6;
1404
1405	memset(&fl6, 0, sizeof(fl6));
1406	fl6.flowi6_oif = oif;
1407	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1408	fl6.daddr = iph->daddr;
1409	fl6.saddr = iph->saddr;
1410	fl6.flowlabel = ip6_flowinfo(iph);
1411
1412	dst = ip6_route_output(net, NULL, &fl6);
1413	if (!dst->error)
1414		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1415	dst_release(dst);
1416}
1417EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1418
1419void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1420{
1421	struct dst_entry *dst;
1422
1423	ip6_update_pmtu(skb, sock_net(sk), mtu,
1424			sk->sk_bound_dev_if, sk->sk_mark);
1425
1426	dst = __sk_dst_get(sk);
1427	if (!dst || !dst->obsolete ||
1428	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1429		return;
1430
1431	bh_lock_sock(sk);
1432	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1433		ip6_datagram_dst_update(sk, false);
1434	bh_unlock_sock(sk);
1435}
1436EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1437
1438/* Handle redirects */
1439struct ip6rd_flowi {
1440	struct flowi6 fl6;
1441	struct in6_addr gateway;
1442};
1443
1444static struct rt6_info *__ip6_route_redirect(struct net *net,
1445					     struct fib6_table *table,
1446					     struct flowi6 *fl6,
1447					     int flags)
1448{
1449	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1450	struct rt6_info *rt;
1451	struct fib6_node *fn;
1452
1453	/* Get the "current" route for this destination and
1454	 * check if the redirect has come from approriate router.
1455	 *
1456	 * RFC 4861 specifies that redirects should only be
1457	 * accepted if they come from the nexthop to the target.
1458	 * Due to the way the routes are chosen, this notion
1459	 * is a bit fuzzy and one might need to check all possible
1460	 * routes.
1461	 */
1462
1463	read_lock_bh(&table->tb6_lock);
1464	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1465restart:
1466	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1467		if (rt6_check_expired(rt))
1468			continue;
1469		if (rt->dst.error)
1470			break;
1471		if (!(rt->rt6i_flags & RTF_GATEWAY))
1472			continue;
1473		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1474			continue;
1475		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1476			continue;
1477		break;
1478	}
1479
1480	if (!rt)
1481		rt = net->ipv6.ip6_null_entry;
1482	else if (rt->dst.error) {
1483		rt = net->ipv6.ip6_null_entry;
1484		goto out;
1485	}
1486
1487	if (rt == net->ipv6.ip6_null_entry) {
1488		fn = fib6_backtrack(fn, &fl6->saddr);
1489		if (fn)
1490			goto restart;
1491	}
1492
1493out:
1494	dst_hold(&rt->dst);
1495
1496	read_unlock_bh(&table->tb6_lock);
1497
1498	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1499	return rt;
1500};
1501
1502static struct dst_entry *ip6_route_redirect(struct net *net,
1503					const struct flowi6 *fl6,
1504					const struct in6_addr *gateway)
1505{
1506	int flags = RT6_LOOKUP_F_HAS_SADDR;
1507	struct ip6rd_flowi rdfl;
1508
1509	rdfl.fl6 = *fl6;
1510	rdfl.gateway = *gateway;
1511
1512	return fib6_rule_lookup(net, &rdfl.fl6,
1513				flags, __ip6_route_redirect);
1514}
1515
1516void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1517{
1518	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1519	struct dst_entry *dst;
1520	struct flowi6 fl6;
1521
1522	memset(&fl6, 0, sizeof(fl6));
1523	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1524	fl6.flowi6_oif = oif;
1525	fl6.flowi6_mark = mark;
1526	fl6.daddr = iph->daddr;
1527	fl6.saddr = iph->saddr;
1528	fl6.flowlabel = ip6_flowinfo(iph);
1529
1530	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1531	rt6_do_redirect(dst, NULL, skb);
1532	dst_release(dst);
1533}
1534EXPORT_SYMBOL_GPL(ip6_redirect);
1535
1536void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1537			    u32 mark)
1538{
1539	const struct ipv6hdr *iph = ipv6_hdr(skb);
1540	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1541	struct dst_entry *dst;
1542	struct flowi6 fl6;
1543
1544	memset(&fl6, 0, sizeof(fl6));
1545	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1546	fl6.flowi6_oif = oif;
1547	fl6.flowi6_mark = mark;
1548	fl6.daddr = msg->dest;
1549	fl6.saddr = iph->daddr;
1550
1551	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1552	rt6_do_redirect(dst, NULL, skb);
1553	dst_release(dst);
1554}
1555
1556void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1557{
1558	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1559}
1560EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1561
1562static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1563{
1564	struct net_device *dev = dst->dev;
1565	unsigned int mtu = dst_mtu(dst);
1566	struct net *net = dev_net(dev);
1567
1568	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1569
1570	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1571		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1572
1573	/*
1574	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1575	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1576	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1577	 * rely only on pmtu discovery"
1578	 */
1579	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1580		mtu = IPV6_MAXPLEN;
1581	return mtu;
1582}
1583
1584static unsigned int ip6_mtu(const struct dst_entry *dst)
1585{
1586	const struct rt6_info *rt = (const struct rt6_info *)dst;
1587	unsigned int mtu = rt->rt6i_pmtu;
1588	struct inet6_dev *idev;
 
1589
1590	if (mtu)
1591		goto out;
1592
1593	mtu = dst_metric_raw(dst, RTAX_MTU);
1594	if (mtu)
1595		goto out;
1596
1597	mtu = IPV6_MIN_MTU;
1598
1599	rcu_read_lock();
1600	idev = __in6_dev_get(dst->dev);
1601	if (idev)
1602		mtu = idev->cnf.mtu6;
1603	rcu_read_unlock();
1604
1605out:
1606	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1607}
1608
1609static struct dst_entry *icmp6_dst_gc_list;
1610static DEFINE_SPINLOCK(icmp6_dst_lock);
1611
1612struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 
1613				  struct flowi6 *fl6)
1614{
1615	struct dst_entry *dst;
1616	struct rt6_info *rt;
1617	struct inet6_dev *idev = in6_dev_get(dev);
1618	struct net *net = dev_net(dev);
1619
1620	if (unlikely(!idev))
1621		return ERR_PTR(-ENODEV);
1622
1623	rt = ip6_dst_alloc(net, dev, 0);
1624	if (unlikely(!rt)) {
1625		in6_dev_put(idev);
1626		dst = ERR_PTR(-ENOMEM);
1627		goto out;
1628	}
1629
 
 
 
 
 
 
 
 
 
 
 
1630	rt->dst.flags |= DST_HOST;
1631	rt->dst.output  = ip6_output;
 
1632	atomic_set(&rt->dst.__refcnt, 1);
1633	rt->rt6i_gateway  = fl6->daddr;
1634	rt->rt6i_dst.addr = fl6->daddr;
1635	rt->rt6i_dst.plen = 128;
1636	rt->rt6i_idev     = idev;
1637	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1638
1639	spin_lock_bh(&icmp6_dst_lock);
1640	rt->dst.next = icmp6_dst_gc_list;
1641	icmp6_dst_gc_list = &rt->dst;
1642	spin_unlock_bh(&icmp6_dst_lock);
1643
1644	fib6_force_start_gc(net);
1645
1646	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1647
1648out:
1649	return dst;
1650}
1651
1652int icmp6_dst_gc(void)
1653{
1654	struct dst_entry *dst, **pprev;
1655	int more = 0;
1656
1657	spin_lock_bh(&icmp6_dst_lock);
1658	pprev = &icmp6_dst_gc_list;
1659
1660	while ((dst = *pprev) != NULL) {
1661		if (!atomic_read(&dst->__refcnt)) {
1662			*pprev = dst->next;
1663			dst_free(dst);
1664		} else {
1665			pprev = &dst->next;
1666			++more;
1667		}
1668	}
1669
1670	spin_unlock_bh(&icmp6_dst_lock);
1671
1672	return more;
1673}
1674
1675static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1676			    void *arg)
1677{
1678	struct dst_entry *dst, **pprev;
1679
1680	spin_lock_bh(&icmp6_dst_lock);
1681	pprev = &icmp6_dst_gc_list;
1682	while ((dst = *pprev) != NULL) {
1683		struct rt6_info *rt = (struct rt6_info *) dst;
1684		if (func(rt, arg)) {
1685			*pprev = dst->next;
1686			dst_free(dst);
1687		} else {
1688			pprev = &dst->next;
1689		}
1690	}
1691	spin_unlock_bh(&icmp6_dst_lock);
1692}
1693
1694static int ip6_dst_gc(struct dst_ops *ops)
1695{
 
1696	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1697	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1698	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1699	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1700	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1701	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1702	int entries;
1703
1704	entries = dst_entries_get_fast(ops);
1705	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1706	    entries <= rt_max_size)
1707		goto out;
1708
1709	net->ipv6.ip6_rt_gc_expire++;
1710	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
 
1711	entries = dst_entries_get_slow(ops);
1712	if (entries < ops->gc_thresh)
1713		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1714out:
1715	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1716	return entries > rt_max_size;
1717}
1718
1719static int ip6_convert_metrics(struct mx6_config *mxc,
1720			       const struct fib6_config *cfg)
1721{
1722	bool ecn_ca = false;
1723	struct nlattr *nla;
1724	int remaining;
1725	u32 *mp;
1726
1727	if (!cfg->fc_mx)
1728		return 0;
1729
1730	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1731	if (unlikely(!mp))
1732		return -ENOMEM;
1733
1734	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1735		int type = nla_type(nla);
1736		u32 val;
1737
1738		if (!type)
1739			continue;
1740		if (unlikely(type > RTAX_MAX))
1741			goto err;
1742
1743		if (type == RTAX_CC_ALGO) {
1744			char tmp[TCP_CA_NAME_MAX];
1745
1746			nla_strlcpy(tmp, nla, sizeof(tmp));
1747			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1748			if (val == TCP_CA_UNSPEC)
1749				goto err;
1750		} else {
1751			val = nla_get_u32(nla);
1752		}
1753		if (type == RTAX_HOPLIMIT && val > 255)
1754			val = 255;
1755		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1756			goto err;
1757
1758		mp[type - 1] = val;
1759		__set_bit(type - 1, mxc->mx_valid);
1760	}
1761
1762	if (ecn_ca) {
1763		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1764		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
 
 
 
 
 
 
 
 
 
 
 
1765	}
1766
1767	mxc->mx = mp;
1768	return 0;
1769 err:
1770	kfree(mp);
1771	return -EINVAL;
1772}
 
 
 
 
 
1773
1774static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1775{
 
1776	struct net *net = cfg->fc_nlinfo.nl_net;
1777	struct rt6_info *rt = NULL;
1778	struct net_device *dev = NULL;
1779	struct inet6_dev *idev = NULL;
1780	struct fib6_table *table;
1781	int addr_type;
1782	int err = -EINVAL;
1783
1784	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1785		goto out;
1786#ifndef CONFIG_IPV6_SUBTREES
1787	if (cfg->fc_src_len)
1788		goto out;
1789#endif
1790	if (cfg->fc_ifindex) {
1791		err = -ENODEV;
1792		dev = dev_get_by_index(net, cfg->fc_ifindex);
1793		if (!dev)
1794			goto out;
1795		idev = in6_dev_get(dev);
1796		if (!idev)
1797			goto out;
1798	}
1799
1800	if (cfg->fc_metric == 0)
1801		cfg->fc_metric = IP6_RT_PRIO_USER;
1802
1803	err = -ENOBUFS;
1804	if (cfg->fc_nlinfo.nlh &&
1805	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1806		table = fib6_get_table(net, cfg->fc_table);
1807		if (!table) {
1808			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1809			table = fib6_new_table(net, cfg->fc_table);
1810		}
1811	} else {
1812		table = fib6_new_table(net, cfg->fc_table);
1813	}
1814
1815	if (!table)
1816		goto out;
1817
1818	rt = ip6_dst_alloc(net, NULL,
1819			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1820
1821	if (!rt) {
1822		err = -ENOMEM;
1823		goto out;
1824	}
1825
 
 
1826	if (cfg->fc_flags & RTF_EXPIRES)
1827		rt6_set_expires(rt, jiffies +
1828				clock_t_to_jiffies(cfg->fc_expires));
1829	else
1830		rt6_clean_expires(rt);
1831
1832	if (cfg->fc_protocol == RTPROT_UNSPEC)
1833		cfg->fc_protocol = RTPROT_BOOT;
1834	rt->rt6i_protocol = cfg->fc_protocol;
1835
1836	addr_type = ipv6_addr_type(&cfg->fc_dst);
1837
1838	if (addr_type & IPV6_ADDR_MULTICAST)
1839		rt->dst.input = ip6_mc_input;
1840	else if (cfg->fc_flags & RTF_LOCAL)
1841		rt->dst.input = ip6_input;
1842	else
1843		rt->dst.input = ip6_forward;
1844
1845	rt->dst.output = ip6_output;
1846
1847	if (cfg->fc_encap) {
1848		struct lwtunnel_state *lwtstate;
1849
1850		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1851					   cfg->fc_encap, AF_INET6, cfg,
1852					   &lwtstate);
1853		if (err)
1854			goto out;
1855		rt->dst.lwtstate = lwtstate_get(lwtstate);
1856		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1857			rt->dst.lwtstate->orig_output = rt->dst.output;
1858			rt->dst.output = lwtunnel_output;
1859		}
1860		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1861			rt->dst.lwtstate->orig_input = rt->dst.input;
1862			rt->dst.input = lwtunnel_input;
1863		}
1864	}
1865
1866	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1867	rt->rt6i_dst.plen = cfg->fc_dst_len;
1868	if (rt->rt6i_dst.plen == 128)
1869		rt->dst.flags |= DST_HOST;
1870
 
 
 
 
 
 
 
 
1871#ifdef CONFIG_IPV6_SUBTREES
1872	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1873	rt->rt6i_src.plen = cfg->fc_src_len;
1874#endif
1875
1876	rt->rt6i_metric = cfg->fc_metric;
1877
1878	/* We cannot add true routes via loopback here,
1879	   they would result in kernel looping; promote them to reject routes
1880	 */
1881	if ((cfg->fc_flags & RTF_REJECT) ||
1882	    (dev && (dev->flags & IFF_LOOPBACK) &&
1883	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1884	     !(cfg->fc_flags & RTF_LOCAL))) {
1885		/* hold loopback dev/idev if we haven't done so. */
1886		if (dev != net->loopback_dev) {
1887			if (dev) {
1888				dev_put(dev);
1889				in6_dev_put(idev);
1890			}
1891			dev = net->loopback_dev;
1892			dev_hold(dev);
1893			idev = in6_dev_get(dev);
1894			if (!idev) {
1895				err = -ENODEV;
1896				goto out;
1897			}
1898		}
 
 
 
1899		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1900		switch (cfg->fc_type) {
1901		case RTN_BLACKHOLE:
1902			rt->dst.error = -EINVAL;
1903			rt->dst.output = dst_discard_out;
1904			rt->dst.input = dst_discard;
1905			break;
1906		case RTN_PROHIBIT:
1907			rt->dst.error = -EACCES;
1908			rt->dst.output = ip6_pkt_prohibit_out;
1909			rt->dst.input = ip6_pkt_prohibit;
1910			break;
1911		case RTN_THROW:
1912		case RTN_UNREACHABLE:
1913		default:
1914			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1915					: (cfg->fc_type == RTN_UNREACHABLE)
1916					? -EHOSTUNREACH : -ENETUNREACH;
1917			rt->dst.output = ip6_pkt_discard_out;
1918			rt->dst.input = ip6_pkt_discard;
1919			break;
1920		}
1921		goto install_route;
1922	}
1923
1924	if (cfg->fc_flags & RTF_GATEWAY) {
1925		const struct in6_addr *gw_addr;
1926		int gwa_type;
1927
1928		gw_addr = &cfg->fc_gateway;
1929		gwa_type = ipv6_addr_type(gw_addr);
1930
1931		/* if gw_addr is local we will fail to detect this in case
1932		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1933		 * will return already-added prefix route via interface that
1934		 * prefix route was assigned to, which might be non-loopback.
1935		 */
1936		err = -EINVAL;
1937		if (ipv6_chk_addr_and_flags(net, gw_addr,
1938					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1939					    dev : NULL, 0, 0))
1940			goto out;
1941
1942		rt->rt6i_gateway = *gw_addr;
 
1943
1944		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1945			struct rt6_info *grt;
1946
1947			/* IPv6 strictly inhibits using not link-local
1948			   addresses as nexthop address.
1949			   Otherwise, router will not able to send redirects.
1950			   It is very good, but in some (rare!) circumstances
1951			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1952			   some exceptions. --ANK
1953			 */
 
1954			if (!(gwa_type & IPV6_ADDR_UNICAST))
1955				goto out;
1956
1957			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1958
1959			err = -EHOSTUNREACH;
1960			if (!grt)
1961				goto out;
1962			if (dev) {
1963				if (dev != grt->dst.dev) {
1964					ip6_rt_put(grt);
1965					goto out;
1966				}
1967			} else {
1968				dev = grt->dst.dev;
1969				idev = grt->rt6i_idev;
1970				dev_hold(dev);
1971				in6_dev_hold(grt->rt6i_idev);
1972			}
1973			if (!(grt->rt6i_flags & RTF_GATEWAY))
1974				err = 0;
1975			ip6_rt_put(grt);
1976
1977			if (err)
1978				goto out;
1979		}
1980		err = -EINVAL;
1981		if (!dev || (dev->flags & IFF_LOOPBACK))
1982			goto out;
1983	}
1984
1985	err = -ENODEV;
1986	if (!dev)
1987		goto out;
1988
1989	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1990		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1991			err = -EINVAL;
1992			goto out;
1993		}
1994		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1995		rt->rt6i_prefsrc.plen = 128;
1996	} else
1997		rt->rt6i_prefsrc.plen = 0;
1998
 
 
 
 
 
 
1999	rt->rt6i_flags = cfg->fc_flags;
2000
2001install_route:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2002	rt->dst.dev = dev;
2003	rt->rt6i_idev = idev;
2004	rt->rt6i_table = table;
2005
2006	cfg->fc_nlinfo.nl_net = dev_net(dev);
2007
2008	return rt;
 
2009out:
2010	if (dev)
2011		dev_put(dev);
2012	if (idev)
2013		in6_dev_put(idev);
2014	if (rt)
2015		dst_free(&rt->dst);
2016
2017	return ERR_PTR(err);
2018}
2019
2020int ip6_route_add(struct fib6_config *cfg)
2021{
2022	struct mx6_config mxc = { .mx = NULL, };
2023	struct rt6_info *rt;
2024	int err;
2025
2026	rt = ip6_route_info_create(cfg);
2027	if (IS_ERR(rt)) {
2028		err = PTR_ERR(rt);
2029		rt = NULL;
2030		goto out;
2031	}
2032
2033	err = ip6_convert_metrics(&mxc, cfg);
2034	if (err)
2035		goto out;
2036
2037	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2038
2039	kfree(mxc.mx);
2040
2041	return err;
2042out:
2043	if (rt)
2044		dst_free(&rt->dst);
2045
2046	return err;
2047}
2048
2049static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2050{
2051	int err;
2052	struct fib6_table *table;
2053	struct net *net = dev_net(rt->dst.dev);
2054
2055	if (rt == net->ipv6.ip6_null_entry ||
2056	    rt->dst.flags & DST_NOCACHE) {
2057		err = -ENOENT;
2058		goto out;
2059	}
2060
2061	table = rt->rt6i_table;
2062	write_lock_bh(&table->tb6_lock);
 
2063	err = fib6_del(rt, info);
 
 
2064	write_unlock_bh(&table->tb6_lock);
2065
2066out:
2067	ip6_rt_put(rt);
2068	return err;
2069}
2070
2071int ip6_del_rt(struct rt6_info *rt)
2072{
2073	struct nl_info info = {
2074		.nl_net = dev_net(rt->dst.dev),
2075	};
2076	return __ip6_del_rt(rt, &info);
2077}
2078
2079static int ip6_route_del(struct fib6_config *cfg)
2080{
2081	struct fib6_table *table;
2082	struct fib6_node *fn;
2083	struct rt6_info *rt;
2084	int err = -ESRCH;
2085
2086	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2087	if (!table)
2088		return err;
2089
2090	read_lock_bh(&table->tb6_lock);
2091
2092	fn = fib6_locate(&table->tb6_root,
2093			 &cfg->fc_dst, cfg->fc_dst_len,
2094			 &cfg->fc_src, cfg->fc_src_len);
2095
2096	if (fn) {
2097		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2098			if ((rt->rt6i_flags & RTF_CACHE) &&
2099			    !(cfg->fc_flags & RTF_CACHE))
2100				continue;
2101			if (cfg->fc_ifindex &&
2102			    (!rt->dst.dev ||
2103			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2104				continue;
2105			if (cfg->fc_flags & RTF_GATEWAY &&
2106			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2107				continue;
2108			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2109				continue;
2110			dst_hold(&rt->dst);
2111			read_unlock_bh(&table->tb6_lock);
2112
2113			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2114		}
2115	}
2116	read_unlock_bh(&table->tb6_lock);
2117
2118	return err;
2119}
2120
2121static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
 
 
 
 
 
 
 
 
 
 
 
2122{
2123	struct netevent_redirect netevent;
2124	struct rt6_info *rt, *nrt = NULL;
2125	struct ndisc_options ndopts;
2126	struct inet6_dev *in6_dev;
2127	struct neighbour *neigh;
2128	struct rd_msg *msg;
2129	int optlen, on_link;
2130	u8 *lladdr;
2131
2132	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2133	optlen -= sizeof(*msg);
 
 
 
 
 
 
 
 
2134
2135	if (optlen < 0) {
2136		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2137		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2138	}
2139
2140	msg = (struct rd_msg *)icmp6_hdr(skb);
 
 
 
 
2141
2142	if (ipv6_addr_is_multicast(&msg->dest)) {
2143		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2144		return;
2145	}
2146
2147	on_link = 0;
2148	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2149		on_link = 1;
2150	} else if (ipv6_addr_type(&msg->target) !=
2151		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2152		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2153		return;
2154	}
2155
2156	in6_dev = __in6_dev_get(skb->dev);
2157	if (!in6_dev)
2158		return;
2159	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2160		return;
 
 
 
 
 
 
 
 
 
2161
2162	/* RFC2461 8.1:
2163	 *	The IP source address of the Redirect MUST be the same as the current
2164	 *	first-hop router for the specified ICMP Destination Address.
2165	 */
2166
2167	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2168		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2169		return;
2170	}
2171
2172	lladdr = NULL;
2173	if (ndopts.nd_opts_tgt_lladdr) {
2174		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2175					     skb->dev);
2176		if (!lladdr) {
2177			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2178			return;
2179		}
2180	}
2181
2182	rt = (struct rt6_info *) dst;
2183	if (rt->rt6i_flags & RTF_REJECT) {
2184		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2185		return;
2186	}
 
 
2187
2188	/* Redirect received -> path was valid.
2189	 * Look, redirects are sent only in response to data packets,
2190	 * so that this nexthop apparently is reachable. --ANK
2191	 */
2192	dst_confirm(&rt->dst);
2193
2194	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2195	if (!neigh)
2196		return;
 
2197
2198	/*
2199	 *	We have finally decided to accept it.
2200	 */
2201
2202	neigh_update(neigh, lladdr, NUD_STALE,
2203		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2204		     NEIGH_UPDATE_F_OVERRIDE|
2205		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2206				     NEIGH_UPDATE_F_ISROUTER))
2207		     );
2208
2209	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
 
 
 
 
 
 
 
 
 
 
 
2210	if (!nrt)
2211		goto out;
2212
2213	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2214	if (on_link)
2215		nrt->rt6i_flags &= ~RTF_GATEWAY;
2216
2217	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 
2218
2219	if (ip6_ins_rt(nrt))
2220		goto out;
2221
2222	netevent.old = &rt->dst;
2223	netevent.new = &nrt->dst;
2224	netevent.daddr = &msg->dest;
2225	netevent.neigh = neigh;
2226	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2227
2228	if (rt->rt6i_flags & RTF_CACHE) {
2229		rt = (struct rt6_info *) dst_clone(&rt->dst);
2230		ip6_del_rt(rt);
 
2231	}
2232
2233out:
2234	neigh_release(neigh);
2235}
2236
2237/*
2238 *	Misc support functions
 
2239 */
2240
2241static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 
2242{
2243	BUG_ON(from->dst.from);
 
 
 
 
 
 
 
 
 
 
2244
2245	rt->rt6i_flags &= ~RTF_EXPIRES;
2246	dst_hold(&from->dst);
2247	rt->dst.from = &from->dst;
2248	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2249}
2250
2251static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
 
2252{
2253	rt->dst.input = ort->dst.input;
2254	rt->dst.output = ort->dst.output;
2255	rt->rt6i_dst = ort->rt6i_dst;
2256	rt->dst.error = ort->dst.error;
2257	rt->rt6i_idev = ort->rt6i_idev;
2258	if (rt->rt6i_idev)
2259		in6_dev_hold(rt->rt6i_idev);
2260	rt->dst.lastuse = jiffies;
2261	rt->rt6i_gateway = ort->rt6i_gateway;
2262	rt->rt6i_flags = ort->rt6i_flags;
2263	rt6_set_from(rt, ort);
2264	rt->rt6i_metric = ort->rt6i_metric;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2265#ifdef CONFIG_IPV6_SUBTREES
2266	rt->rt6i_src = ort->rt6i_src;
2267#endif
2268	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2269	rt->rt6i_table = ort->rt6i_table;
2270	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
 
2271}
2272
2273#ifdef CONFIG_IPV6_ROUTE_INFO
2274static struct rt6_info *rt6_get_route_info(struct net *net,
2275					   const struct in6_addr *prefix, int prefixlen,
2276					   const struct in6_addr *gwaddr, int ifindex)
2277{
2278	struct fib6_node *fn;
2279	struct rt6_info *rt = NULL;
2280	struct fib6_table *table;
2281
2282	table = fib6_get_table(net, RT6_TABLE_INFO);
2283	if (!table)
2284		return NULL;
2285
2286	read_lock_bh(&table->tb6_lock);
2287	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2288	if (!fn)
2289		goto out;
2290
2291	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2292		if (rt->dst.dev->ifindex != ifindex)
2293			continue;
2294		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2295			continue;
2296		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2297			continue;
2298		dst_hold(&rt->dst);
2299		break;
2300	}
2301out:
2302	read_unlock_bh(&table->tb6_lock);
2303	return rt;
2304}
2305
2306static struct rt6_info *rt6_add_route_info(struct net *net,
2307					   const struct in6_addr *prefix, int prefixlen,
2308					   const struct in6_addr *gwaddr, int ifindex,
2309					   unsigned int pref)
2310{
2311	struct fib6_config cfg = {
 
2312		.fc_metric	= IP6_RT_PRIO_USER,
2313		.fc_ifindex	= ifindex,
2314		.fc_dst_len	= prefixlen,
2315		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2316				  RTF_UP | RTF_PREF(pref),
2317		.fc_nlinfo.portid = 0,
2318		.fc_nlinfo.nlh = NULL,
2319		.fc_nlinfo.nl_net = net,
2320	};
2321
2322	cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2323	cfg.fc_dst = *prefix;
2324	cfg.fc_gateway = *gwaddr;
2325
2326	/* We should treat it as a default route if prefix length is 0. */
2327	if (!prefixlen)
2328		cfg.fc_flags |= RTF_DEFAULT;
2329
2330	ip6_route_add(&cfg);
2331
2332	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2333}
2334#endif
2335
2336struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2337{
2338	struct rt6_info *rt;
2339	struct fib6_table *table;
2340
2341	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2342	if (!table)
2343		return NULL;
2344
2345	read_lock_bh(&table->tb6_lock);
2346	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2347		if (dev == rt->dst.dev &&
2348		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2349		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2350			break;
2351	}
2352	if (rt)
2353		dst_hold(&rt->dst);
2354	read_unlock_bh(&table->tb6_lock);
2355	return rt;
2356}
2357
2358struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2359				     struct net_device *dev,
2360				     unsigned int pref)
2361{
2362	struct fib6_config cfg = {
2363		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2364		.fc_metric	= IP6_RT_PRIO_USER,
2365		.fc_ifindex	= dev->ifindex,
2366		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2367				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2368		.fc_nlinfo.portid = 0,
2369		.fc_nlinfo.nlh = NULL,
2370		.fc_nlinfo.nl_net = dev_net(dev),
2371	};
2372
2373	cfg.fc_gateway = *gwaddr;
2374
2375	ip6_route_add(&cfg);
2376
2377	return rt6_get_dflt_router(gwaddr, dev);
2378}
2379
2380void rt6_purge_dflt_routers(struct net *net)
2381{
2382	struct rt6_info *rt;
2383	struct fib6_table *table;
2384
2385	/* NOTE: Keep consistent with rt6_get_dflt_router */
2386	table = fib6_get_table(net, RT6_TABLE_DFLT);
2387	if (!table)
2388		return;
2389
2390restart:
2391	read_lock_bh(&table->tb6_lock);
2392	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2393		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2394		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2395			dst_hold(&rt->dst);
2396			read_unlock_bh(&table->tb6_lock);
2397			ip6_del_rt(rt);
2398			goto restart;
2399		}
2400	}
2401	read_unlock_bh(&table->tb6_lock);
2402}
2403
2404static void rtmsg_to_fib6_config(struct net *net,
2405				 struct in6_rtmsg *rtmsg,
2406				 struct fib6_config *cfg)
2407{
2408	memset(cfg, 0, sizeof(*cfg));
2409
2410	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2411			 : RT6_TABLE_MAIN;
2412	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2413	cfg->fc_metric = rtmsg->rtmsg_metric;
2414	cfg->fc_expires = rtmsg->rtmsg_info;
2415	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2416	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2417	cfg->fc_flags = rtmsg->rtmsg_flags;
2418
2419	cfg->fc_nlinfo.nl_net = net;
2420
2421	cfg->fc_dst = rtmsg->rtmsg_dst;
2422	cfg->fc_src = rtmsg->rtmsg_src;
2423	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2424}
2425
2426int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2427{
2428	struct fib6_config cfg;
2429	struct in6_rtmsg rtmsg;
2430	int err;
2431
2432	switch (cmd) {
2433	case SIOCADDRT:		/* Add a route */
2434	case SIOCDELRT:		/* Delete a route */
2435		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2436			return -EPERM;
2437		err = copy_from_user(&rtmsg, arg,
2438				     sizeof(struct in6_rtmsg));
2439		if (err)
2440			return -EFAULT;
2441
2442		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2443
2444		rtnl_lock();
2445		switch (cmd) {
2446		case SIOCADDRT:
2447			err = ip6_route_add(&cfg);
2448			break;
2449		case SIOCDELRT:
2450			err = ip6_route_del(&cfg);
2451			break;
2452		default:
2453			err = -EINVAL;
2454		}
2455		rtnl_unlock();
2456
2457		return err;
2458	}
2459
2460	return -EINVAL;
2461}
2462
2463/*
2464 *	Drop the packet on the floor
2465 */
2466
2467static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2468{
2469	int type;
2470	struct dst_entry *dst = skb_dst(skb);
2471	switch (ipstats_mib_noroutes) {
2472	case IPSTATS_MIB_INNOROUTES:
2473		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2474		if (type == IPV6_ADDR_ANY) {
2475			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2476				      IPSTATS_MIB_INADDRERRORS);
2477			break;
2478		}
2479		/* FALLTHROUGH */
2480	case IPSTATS_MIB_OUTNOROUTES:
2481		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2482			      ipstats_mib_noroutes);
2483		break;
2484	}
2485	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2486	kfree_skb(skb);
2487	return 0;
2488}
2489
2490static int ip6_pkt_discard(struct sk_buff *skb)
2491{
2492	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2493}
2494
2495static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2496{
2497	skb->dev = skb_dst(skb)->dev;
2498	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2499}
2500
 
 
2501static int ip6_pkt_prohibit(struct sk_buff *skb)
2502{
2503	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2504}
2505
2506static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2507{
2508	skb->dev = skb_dst(skb)->dev;
2509	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2510}
2511
 
 
2512/*
2513 *	Allocate a dst for local (unicast / anycast) address.
2514 */
2515
2516struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2517				    const struct in6_addr *addr,
2518				    bool anycast)
2519{
2520	u32 tb_id;
2521	struct net *net = dev_net(idev->dev);
2522	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2523					    DST_NOCOUNT);
2524	if (!rt)
 
 
 
2525		return ERR_PTR(-ENOMEM);
 
2526
2527	in6_dev_hold(idev);
2528
2529	rt->dst.flags |= DST_HOST;
2530	rt->dst.input = ip6_input;
2531	rt->dst.output = ip6_output;
2532	rt->rt6i_idev = idev;
 
2533
2534	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2535	if (anycast)
2536		rt->rt6i_flags |= RTF_ANYCAST;
2537	else
2538		rt->rt6i_flags |= RTF_LOCAL;
 
 
 
 
 
2539
2540	rt->rt6i_gateway  = *addr;
2541	rt->rt6i_dst.addr = *addr;
2542	rt->rt6i_dst.plen = 128;
2543	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2544	rt->rt6i_table = fib6_get_table(net, tb_id);
2545	rt->dst.flags |= DST_NOCACHE;
2546
2547	atomic_set(&rt->dst.__refcnt, 1);
2548
2549	return rt;
2550}
2551
2552int ip6_route_get_saddr(struct net *net,
2553			struct rt6_info *rt,
2554			const struct in6_addr *daddr,
2555			unsigned int prefs,
2556			struct in6_addr *saddr)
2557{
2558	struct inet6_dev *idev =
2559		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2560	int err = 0;
2561	if (rt && rt->rt6i_prefsrc.plen)
2562		*saddr = rt->rt6i_prefsrc.addr;
2563	else
2564		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2565					 daddr, prefs, saddr);
2566	return err;
2567}
2568
2569/* remove deleted ip from prefsrc entries */
2570struct arg_dev_net_ip {
2571	struct net_device *dev;
2572	struct net *net;
2573	struct in6_addr *addr;
2574};
2575
2576static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2577{
2578	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2579	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2580	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2581
2582	if (((void *)rt->dst.dev == dev || !dev) &&
2583	    rt != net->ipv6.ip6_null_entry &&
2584	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2585		/* remove prefsrc entry */
2586		rt->rt6i_prefsrc.plen = 0;
2587	}
2588	return 0;
2589}
2590
2591void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2592{
2593	struct net *net = dev_net(ifp->idev->dev);
2594	struct arg_dev_net_ip adni = {
2595		.dev = ifp->idev->dev,
2596		.net = net,
2597		.addr = &ifp->addr,
2598	};
2599	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2600}
2601
2602#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2603#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2604
2605/* Remove routers and update dst entries when gateway turn into host. */
2606static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2607{
2608	struct in6_addr *gateway = (struct in6_addr *)arg;
2609
2610	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2611	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2612	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2613		return -1;
2614	}
2615	return 0;
2616}
2617
2618void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2619{
2620	fib6_clean_all(net, fib6_clean_tohost, gateway);
2621}
2622
2623struct arg_dev_net {
2624	struct net_device *dev;
2625	struct net *net;
2626};
2627
2628static int fib6_ifdown(struct rt6_info *rt, void *arg)
2629{
2630	const struct arg_dev_net *adn = arg;
2631	const struct net_device *dev = adn->dev;
2632
2633	if ((rt->dst.dev == dev || !dev) &&
2634	    rt != adn->net->ipv6.ip6_null_entry)
2635		return -1;
2636
2637	return 0;
2638}
2639
2640void rt6_ifdown(struct net *net, struct net_device *dev)
2641{
2642	struct arg_dev_net adn = {
2643		.dev = dev,
2644		.net = net,
2645	};
2646
2647	fib6_clean_all(net, fib6_ifdown, &adn);
2648	icmp6_clean_all(fib6_ifdown, &adn);
2649	if (dev)
2650		rt6_uncached_list_flush_dev(net, dev);
2651}
2652
2653struct rt6_mtu_change_arg {
2654	struct net_device *dev;
2655	unsigned int mtu;
2656};
2657
2658static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2659{
2660	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2661	struct inet6_dev *idev;
2662
2663	/* In IPv6 pmtu discovery is not optional,
2664	   so that RTAX_MTU lock cannot disable it.
2665	   We still use this lock to block changes
2666	   caused by addrconf/ndisc.
2667	*/
2668
2669	idev = __in6_dev_get(arg->dev);
2670	if (!idev)
2671		return 0;
2672
2673	/* For administrative MTU increase, there is no way to discover
2674	   IPv6 PMTU increase, so PMTU increase should be updated here.
2675	   Since RFC 1981 doesn't include administrative MTU increase
2676	   update PMTU increase is a MUST. (i.e. jumbo frame)
2677	 */
2678	/*
2679	   If new MTU is less than route PMTU, this new MTU will be the
2680	   lowest MTU in the path, update the route PMTU to reflect PMTU
2681	   decreases; if new MTU is greater than route PMTU, and the
2682	   old MTU is the lowest MTU in the path, update the route PMTU
2683	   to reflect the increase. In this case if the other nodes' MTU
2684	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2685	   PMTU discouvery.
2686	 */
2687	if (rt->dst.dev == arg->dev &&
2688	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2689		if (rt->rt6i_flags & RTF_CACHE) {
2690			/* For RTF_CACHE with rt6i_pmtu == 0
2691			 * (i.e. a redirected route),
2692			 * the metrics of its rt->dst.from has already
2693			 * been updated.
2694			 */
2695			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2696				rt->rt6i_pmtu = arg->mtu;
2697		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2698			   (dst_mtu(&rt->dst) < arg->mtu &&
2699			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2700			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2701		}
2702	}
2703	return 0;
2704}
2705
2706void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2707{
2708	struct rt6_mtu_change_arg arg = {
2709		.dev = dev,
2710		.mtu = mtu,
2711	};
2712
2713	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2714}
2715
2716static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2717	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2718	[RTA_OIF]               = { .type = NLA_U32 },
2719	[RTA_IIF]		= { .type = NLA_U32 },
2720	[RTA_PRIORITY]          = { .type = NLA_U32 },
2721	[RTA_METRICS]           = { .type = NLA_NESTED },
2722	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2723	[RTA_PREF]              = { .type = NLA_U8 },
2724	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2725	[RTA_ENCAP]		= { .type = NLA_NESTED },
2726	[RTA_EXPIRES]		= { .type = NLA_U32 },
2727};
2728
2729static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2730			      struct fib6_config *cfg)
2731{
2732	struct rtmsg *rtm;
2733	struct nlattr *tb[RTA_MAX+1];
2734	unsigned int pref;
2735	int err;
2736
2737	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2738	if (err < 0)
2739		goto errout;
2740
2741	err = -EINVAL;
2742	rtm = nlmsg_data(nlh);
2743	memset(cfg, 0, sizeof(*cfg));
2744
2745	cfg->fc_table = rtm->rtm_table;
2746	cfg->fc_dst_len = rtm->rtm_dst_len;
2747	cfg->fc_src_len = rtm->rtm_src_len;
2748	cfg->fc_flags = RTF_UP;
2749	cfg->fc_protocol = rtm->rtm_protocol;
2750	cfg->fc_type = rtm->rtm_type;
2751
2752	if (rtm->rtm_type == RTN_UNREACHABLE ||
2753	    rtm->rtm_type == RTN_BLACKHOLE ||
2754	    rtm->rtm_type == RTN_PROHIBIT ||
2755	    rtm->rtm_type == RTN_THROW)
2756		cfg->fc_flags |= RTF_REJECT;
2757
2758	if (rtm->rtm_type == RTN_LOCAL)
2759		cfg->fc_flags |= RTF_LOCAL;
2760
2761	if (rtm->rtm_flags & RTM_F_CLONED)
2762		cfg->fc_flags |= RTF_CACHE;
2763
2764	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2765	cfg->fc_nlinfo.nlh = nlh;
2766	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2767
2768	if (tb[RTA_GATEWAY]) {
2769		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2770		cfg->fc_flags |= RTF_GATEWAY;
2771	}
2772
2773	if (tb[RTA_DST]) {
2774		int plen = (rtm->rtm_dst_len + 7) >> 3;
2775
2776		if (nla_len(tb[RTA_DST]) < plen)
2777			goto errout;
2778
2779		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2780	}
2781
2782	if (tb[RTA_SRC]) {
2783		int plen = (rtm->rtm_src_len + 7) >> 3;
2784
2785		if (nla_len(tb[RTA_SRC]) < plen)
2786			goto errout;
2787
2788		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2789	}
2790
2791	if (tb[RTA_PREFSRC])
2792		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2793
2794	if (tb[RTA_OIF])
2795		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2796
2797	if (tb[RTA_PRIORITY])
2798		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2799
2800	if (tb[RTA_METRICS]) {
2801		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2802		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2803	}
2804
2805	if (tb[RTA_TABLE])
2806		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2807
2808	if (tb[RTA_MULTIPATH]) {
2809		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2810		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2811	}
2812
2813	if (tb[RTA_PREF]) {
2814		pref = nla_get_u8(tb[RTA_PREF]);
2815		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2816		    pref != ICMPV6_ROUTER_PREF_HIGH)
2817			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2818		cfg->fc_flags |= RTF_PREF(pref);
2819	}
2820
2821	if (tb[RTA_ENCAP])
2822		cfg->fc_encap = tb[RTA_ENCAP];
2823
2824	if (tb[RTA_ENCAP_TYPE])
2825		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2826
2827	if (tb[RTA_EXPIRES]) {
2828		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2829
2830		if (addrconf_finite_timeout(timeout)) {
2831			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2832			cfg->fc_flags |= RTF_EXPIRES;
2833		}
2834	}
2835
2836	err = 0;
2837errout:
2838	return err;
2839}
2840
2841struct rt6_nh {
2842	struct rt6_info *rt6_info;
2843	struct fib6_config r_cfg;
2844	struct mx6_config mxc;
2845	struct list_head next;
2846};
2847
2848static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2849{
2850	struct rt6_nh *nh;
2851
2852	list_for_each_entry(nh, rt6_nh_list, next) {
2853		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2854		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2855		        nh->r_cfg.fc_ifindex);
2856	}
2857}
2858
2859static int ip6_route_info_append(struct list_head *rt6_nh_list,
2860				 struct rt6_info *rt, struct fib6_config *r_cfg)
2861{
2862	struct rt6_nh *nh;
2863	struct rt6_info *rtnh;
2864	int err = -EEXIST;
2865
2866	list_for_each_entry(nh, rt6_nh_list, next) {
2867		/* check if rt6_info already exists */
2868		rtnh = nh->rt6_info;
2869
2870		if (rtnh->dst.dev == rt->dst.dev &&
2871		    rtnh->rt6i_idev == rt->rt6i_idev &&
2872		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2873				    &rt->rt6i_gateway))
2874			return err;
2875	}
2876
2877	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2878	if (!nh)
2879		return -ENOMEM;
2880	nh->rt6_info = rt;
2881	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2882	if (err) {
2883		kfree(nh);
2884		return err;
2885	}
2886	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2887	list_add_tail(&nh->next, rt6_nh_list);
2888
2889	return 0;
2890}
2891
2892static int ip6_route_multipath_add(struct fib6_config *cfg)
2893{
2894	struct fib6_config r_cfg;
2895	struct rtnexthop *rtnh;
2896	struct rt6_info *rt;
2897	struct rt6_nh *err_nh;
2898	struct rt6_nh *nh, *nh_safe;
2899	int remaining;
2900	int attrlen;
2901	int err = 1;
2902	int nhn = 0;
2903	int replace = (cfg->fc_nlinfo.nlh &&
2904		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2905	LIST_HEAD(rt6_nh_list);
2906
2907	remaining = cfg->fc_mp_len;
2908	rtnh = (struct rtnexthop *)cfg->fc_mp;
2909
2910	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2911	 * rt6_info structs per nexthop
2912	 */
2913	while (rtnh_ok(rtnh, remaining)) {
2914		memcpy(&r_cfg, cfg, sizeof(*cfg));
2915		if (rtnh->rtnh_ifindex)
2916			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2917
2918		attrlen = rtnh_attrlen(rtnh);
2919		if (attrlen > 0) {
2920			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2921
2922			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2923			if (nla) {
2924				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2925				r_cfg.fc_flags |= RTF_GATEWAY;
2926			}
2927			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2928			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2929			if (nla)
2930				r_cfg.fc_encap_type = nla_get_u16(nla);
2931		}
2932
2933		rt = ip6_route_info_create(&r_cfg);
2934		if (IS_ERR(rt)) {
2935			err = PTR_ERR(rt);
2936			rt = NULL;
2937			goto cleanup;
2938		}
2939
2940		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2941		if (err) {
2942			dst_free(&rt->dst);
2943			goto cleanup;
2944		}
2945
2946		rtnh = rtnh_next(rtnh, &remaining);
2947	}
2948
2949	err_nh = NULL;
2950	list_for_each_entry(nh, &rt6_nh_list, next) {
2951		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2952		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2953		nh->rt6_info = NULL;
2954		if (err) {
2955			if (replace && nhn)
2956				ip6_print_replace_route_err(&rt6_nh_list);
2957			err_nh = nh;
2958			goto add_errout;
2959		}
2960
2961		/* Because each route is added like a single route we remove
2962		 * these flags after the first nexthop: if there is a collision,
2963		 * we have already failed to add the first nexthop:
2964		 * fib6_add_rt2node() has rejected it; when replacing, old
2965		 * nexthops have been replaced by first new, the rest should
2966		 * be added to it.
2967		 */
2968		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2969						     NLM_F_REPLACE);
2970		nhn++;
2971	}
2972
2973	goto cleanup;
2974
2975add_errout:
2976	/* Delete routes that were already added */
2977	list_for_each_entry(nh, &rt6_nh_list, next) {
2978		if (err_nh == nh)
2979			break;
2980		ip6_route_del(&nh->r_cfg);
2981	}
2982
2983cleanup:
2984	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2985		if (nh->rt6_info)
2986			dst_free(&nh->rt6_info->dst);
2987		kfree(nh->mxc.mx);
2988		list_del(&nh->next);
2989		kfree(nh);
2990	}
2991
2992	return err;
2993}
2994
2995static int ip6_route_multipath_del(struct fib6_config *cfg)
2996{
2997	struct fib6_config r_cfg;
2998	struct rtnexthop *rtnh;
2999	int remaining;
3000	int attrlen;
3001	int err = 1, last_err = 0;
3002
3003	remaining = cfg->fc_mp_len;
3004	rtnh = (struct rtnexthop *)cfg->fc_mp;
3005
3006	/* Parse a Multipath Entry */
3007	while (rtnh_ok(rtnh, remaining)) {
3008		memcpy(&r_cfg, cfg, sizeof(*cfg));
3009		if (rtnh->rtnh_ifindex)
3010			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3011
3012		attrlen = rtnh_attrlen(rtnh);
3013		if (attrlen > 0) {
3014			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3015
3016			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3017			if (nla) {
3018				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3019				r_cfg.fc_flags |= RTF_GATEWAY;
3020			}
3021		}
3022		err = ip6_route_del(&r_cfg);
3023		if (err)
3024			last_err = err;
3025
3026		rtnh = rtnh_next(rtnh, &remaining);
3027	}
3028
3029	return last_err;
3030}
3031
3032static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3033{
3034	struct fib6_config cfg;
3035	int err;
3036
3037	err = rtm_to_fib6_config(skb, nlh, &cfg);
3038	if (err < 0)
3039		return err;
3040
3041	if (cfg.fc_mp)
3042		return ip6_route_multipath_del(&cfg);
3043	else
3044		return ip6_route_del(&cfg);
3045}
3046
3047static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3048{
3049	struct fib6_config cfg;
3050	int err;
3051
3052	err = rtm_to_fib6_config(skb, nlh, &cfg);
3053	if (err < 0)
3054		return err;
3055
3056	if (cfg.fc_mp)
3057		return ip6_route_multipath_add(&cfg);
3058	else
3059		return ip6_route_add(&cfg);
3060}
3061
3062static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3063{
3064	return NLMSG_ALIGN(sizeof(struct rtmsg))
3065	       + nla_total_size(16) /* RTA_SRC */
3066	       + nla_total_size(16) /* RTA_DST */
3067	       + nla_total_size(16) /* RTA_GATEWAY */
3068	       + nla_total_size(16) /* RTA_PREFSRC */
3069	       + nla_total_size(4) /* RTA_TABLE */
3070	       + nla_total_size(4) /* RTA_IIF */
3071	       + nla_total_size(4) /* RTA_OIF */
3072	       + nla_total_size(4) /* RTA_PRIORITY */
3073	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3074	       + nla_total_size(sizeof(struct rta_cacheinfo))
3075	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3076	       + nla_total_size(1) /* RTA_PREF */
3077	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3078}
3079
3080static int rt6_fill_node(struct net *net,
3081			 struct sk_buff *skb, struct rt6_info *rt,
3082			 struct in6_addr *dst, struct in6_addr *src,
3083			 int iif, int type, u32 portid, u32 seq,
3084			 int prefix, int nowait, unsigned int flags)
3085{
3086	u32 metrics[RTAX_MAX];
3087	struct rtmsg *rtm;
3088	struct nlmsghdr *nlh;
3089	long expires;
3090	u32 table;
 
 
3091
3092	if (prefix) {	/* user wants prefix routes only */
3093		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3094			/* success since this is not a prefix route */
3095			return 1;
3096		}
3097	}
3098
3099	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3100	if (!nlh)
3101		return -EMSGSIZE;
3102
3103	rtm = nlmsg_data(nlh);
3104	rtm->rtm_family = AF_INET6;
3105	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3106	rtm->rtm_src_len = rt->rt6i_src.plen;
3107	rtm->rtm_tos = 0;
3108	if (rt->rt6i_table)
3109		table = rt->rt6i_table->tb6_id;
3110	else
3111		table = RT6_TABLE_UNSPEC;
3112	rtm->rtm_table = table;
3113	if (nla_put_u32(skb, RTA_TABLE, table))
3114		goto nla_put_failure;
3115	if (rt->rt6i_flags & RTF_REJECT) {
3116		switch (rt->dst.error) {
3117		case -EINVAL:
3118			rtm->rtm_type = RTN_BLACKHOLE;
3119			break;
3120		case -EACCES:
3121			rtm->rtm_type = RTN_PROHIBIT;
3122			break;
3123		case -EAGAIN:
3124			rtm->rtm_type = RTN_THROW;
3125			break;
3126		default:
3127			rtm->rtm_type = RTN_UNREACHABLE;
3128			break;
3129		}
3130	}
3131	else if (rt->rt6i_flags & RTF_LOCAL)
3132		rtm->rtm_type = RTN_LOCAL;
3133	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3134		rtm->rtm_type = RTN_LOCAL;
3135	else
3136		rtm->rtm_type = RTN_UNICAST;
3137	rtm->rtm_flags = 0;
3138	if (!netif_carrier_ok(rt->dst.dev)) {
3139		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3140		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3141			rtm->rtm_flags |= RTNH_F_DEAD;
3142	}
3143	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3144	rtm->rtm_protocol = rt->rt6i_protocol;
3145	if (rt->rt6i_flags & RTF_DYNAMIC)
3146		rtm->rtm_protocol = RTPROT_REDIRECT;
3147	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3148		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3149			rtm->rtm_protocol = RTPROT_RA;
3150		else
3151			rtm->rtm_protocol = RTPROT_KERNEL;
3152	}
3153
3154	if (rt->rt6i_flags & RTF_CACHE)
3155		rtm->rtm_flags |= RTM_F_CLONED;
3156
3157	if (dst) {
3158		if (nla_put_in6_addr(skb, RTA_DST, dst))
3159			goto nla_put_failure;
3160		rtm->rtm_dst_len = 128;
3161	} else if (rtm->rtm_dst_len)
3162		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3163			goto nla_put_failure;
3164#ifdef CONFIG_IPV6_SUBTREES
3165	if (src) {
3166		if (nla_put_in6_addr(skb, RTA_SRC, src))
3167			goto nla_put_failure;
3168		rtm->rtm_src_len = 128;
3169	} else if (rtm->rtm_src_len &&
3170		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3171		goto nla_put_failure;
3172#endif
3173	if (iif) {
3174#ifdef CONFIG_IPV6_MROUTE
3175		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3176			int err = ip6mr_get_route(net, skb, rtm, nowait);
3177			if (err <= 0) {
3178				if (!nowait) {
3179					if (err == 0)
3180						return 0;
3181					goto nla_put_failure;
3182				} else {
3183					if (err == -EMSGSIZE)
3184						goto nla_put_failure;
3185				}
3186			}
3187		} else
3188#endif
3189			if (nla_put_u32(skb, RTA_IIF, iif))
3190				goto nla_put_failure;
3191	} else if (dst) {
3192		struct in6_addr saddr_buf;
3193		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3194		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3195			goto nla_put_failure;
3196	}
3197
3198	if (rt->rt6i_prefsrc.plen) {
3199		struct in6_addr saddr_buf;
3200		saddr_buf = rt->rt6i_prefsrc.addr;
3201		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3202			goto nla_put_failure;
3203	}
3204
3205	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3206	if (rt->rt6i_pmtu)
3207		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3208	if (rtnetlink_put_metrics(skb, metrics) < 0)
3209		goto nla_put_failure;
3210
3211	if (rt->rt6i_flags & RTF_GATEWAY) {
3212		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
 
 
 
3213			goto nla_put_failure;
 
3214	}
 
3215
3216	if (rt->dst.dev &&
3217	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3218		goto nla_put_failure;
3219	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3220		goto nla_put_failure;
 
 
 
 
 
 
3221
3222	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3223
3224	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3225		goto nla_put_failure;
 
 
3226
3227	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
 
3228		goto nla_put_failure;
3229
3230	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3231
3232	nlmsg_end(skb, nlh);
3233	return 0;
3234
3235nla_put_failure:
3236	nlmsg_cancel(skb, nlh);
3237	return -EMSGSIZE;
3238}
3239
3240int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3241{
3242	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3243	int prefix;
3244
3245	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3246		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3247		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3248	} else
3249		prefix = 0;
3250
3251	return rt6_fill_node(arg->net,
3252		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3253		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3254		     prefix, 0, NLM_F_MULTI);
3255}
3256
3257static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3258{
3259	struct net *net = sock_net(in_skb->sk);
3260	struct nlattr *tb[RTA_MAX+1];
3261	struct rt6_info *rt;
3262	struct sk_buff *skb;
3263	struct rtmsg *rtm;
3264	struct flowi6 fl6;
3265	int err, iif = 0, oif = 0;
3266
3267	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3268	if (err < 0)
3269		goto errout;
3270
3271	err = -EINVAL;
3272	memset(&fl6, 0, sizeof(fl6));
3273
3274	if (tb[RTA_SRC]) {
3275		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3276			goto errout;
3277
3278		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3279	}
3280
3281	if (tb[RTA_DST]) {
3282		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3283			goto errout;
3284
3285		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3286	}
3287
3288	if (tb[RTA_IIF])
3289		iif = nla_get_u32(tb[RTA_IIF]);
3290
3291	if (tb[RTA_OIF])
3292		oif = nla_get_u32(tb[RTA_OIF]);
3293
3294	if (tb[RTA_MARK])
3295		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3296
3297	if (iif) {
3298		struct net_device *dev;
3299		int flags = 0;
3300
3301		dev = __dev_get_by_index(net, iif);
3302		if (!dev) {
3303			err = -ENODEV;
3304			goto errout;
3305		}
3306
3307		fl6.flowi6_iif = iif;
3308
3309		if (!ipv6_addr_any(&fl6.saddr))
3310			flags |= RT6_LOOKUP_F_HAS_SADDR;
3311
3312		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3313							       flags);
3314	} else {
3315		fl6.flowi6_oif = oif;
3316
3317		if (netif_index_is_l3_master(net, oif)) {
3318			fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3319					   FLOWI_FLAG_SKIP_NH_OIF;
3320		}
3321
3322		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3323	}
3324
3325	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3326	if (!skb) {
3327		ip6_rt_put(rt);
3328		err = -ENOBUFS;
3329		goto errout;
3330	}
3331
3332	/* Reserve room for dummy headers, this skb can pass
3333	   through good chunk of routing engine.
3334	 */
3335	skb_reset_mac_header(skb);
3336	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3337
3338	skb_dst_set(skb, &rt->dst);
3339
3340	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3341			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3342			    nlh->nlmsg_seq, 0, 0, 0);
3343	if (err < 0) {
3344		kfree_skb(skb);
3345		goto errout;
3346	}
3347
3348	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3349errout:
3350	return err;
3351}
3352
3353void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3354		     unsigned int nlm_flags)
3355{
3356	struct sk_buff *skb;
3357	struct net *net = info->nl_net;
3358	u32 seq;
3359	int err;
3360
3361	err = -ENOBUFS;
3362	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3363
3364	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3365	if (!skb)
3366		goto errout;
3367
3368	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3369				event, info->portid, seq, 0, 0, nlm_flags);
3370	if (err < 0) {
3371		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3372		WARN_ON(err == -EMSGSIZE);
3373		kfree_skb(skb);
3374		goto errout;
3375	}
3376	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3377		    info->nlh, gfp_any());
3378	return;
3379errout:
3380	if (err < 0)
3381		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3382}
3383
3384static int ip6_route_dev_notify(struct notifier_block *this,
3385				unsigned long event, void *ptr)
3386{
3387	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3388	struct net *net = dev_net(dev);
3389
3390	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3391		net->ipv6.ip6_null_entry->dst.dev = dev;
3392		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3393#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3394		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3395		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3396		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3397		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3398#endif
3399	}
3400
3401	return NOTIFY_OK;
3402}
3403
3404/*
3405 *	/proc
3406 */
3407
3408#ifdef CONFIG_PROC_FS
3409
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3410static const struct file_operations ipv6_route_proc_fops = {
3411	.owner		= THIS_MODULE,
3412	.open		= ipv6_route_open,
3413	.read		= seq_read,
3414	.llseek		= seq_lseek,
3415	.release	= seq_release_net,
3416};
3417
3418static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3419{
3420	struct net *net = (struct net *)seq->private;
3421	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3422		   net->ipv6.rt6_stats->fib_nodes,
3423		   net->ipv6.rt6_stats->fib_route_nodes,
3424		   net->ipv6.rt6_stats->fib_rt_alloc,
3425		   net->ipv6.rt6_stats->fib_rt_entries,
3426		   net->ipv6.rt6_stats->fib_rt_cache,
3427		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3428		   net->ipv6.rt6_stats->fib_discarded_routes);
3429
3430	return 0;
3431}
3432
3433static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3434{
3435	return single_open_net(inode, file, rt6_stats_seq_show);
3436}
3437
3438static const struct file_operations rt6_stats_seq_fops = {
3439	.owner	 = THIS_MODULE,
3440	.open	 = rt6_stats_seq_open,
3441	.read	 = seq_read,
3442	.llseek	 = seq_lseek,
3443	.release = single_release_net,
3444};
3445#endif	/* CONFIG_PROC_FS */
3446
3447#ifdef CONFIG_SYSCTL
3448
3449static
3450int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3451			      void __user *buffer, size_t *lenp, loff_t *ppos)
3452{
3453	struct net *net;
3454	int delay;
3455	if (!write)
3456		return -EINVAL;
3457
3458	net = (struct net *)ctl->extra1;
3459	delay = net->ipv6.sysctl.flush_delay;
3460	proc_dointvec(ctl, write, buffer, lenp, ppos);
3461	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3462	return 0;
3463}
3464
3465struct ctl_table ipv6_route_table_template[] = {
3466	{
3467		.procname	=	"flush",
3468		.data		=	&init_net.ipv6.sysctl.flush_delay,
3469		.maxlen		=	sizeof(int),
3470		.mode		=	0200,
3471		.proc_handler	=	ipv6_sysctl_rtcache_flush
3472	},
3473	{
3474		.procname	=	"gc_thresh",
3475		.data		=	&ip6_dst_ops_template.gc_thresh,
3476		.maxlen		=	sizeof(int),
3477		.mode		=	0644,
3478		.proc_handler	=	proc_dointvec,
3479	},
3480	{
3481		.procname	=	"max_size",
3482		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3483		.maxlen		=	sizeof(int),
3484		.mode		=	0644,
3485		.proc_handler	=	proc_dointvec,
3486	},
3487	{
3488		.procname	=	"gc_min_interval",
3489		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3490		.maxlen		=	sizeof(int),
3491		.mode		=	0644,
3492		.proc_handler	=	proc_dointvec_jiffies,
3493	},
3494	{
3495		.procname	=	"gc_timeout",
3496		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3497		.maxlen		=	sizeof(int),
3498		.mode		=	0644,
3499		.proc_handler	=	proc_dointvec_jiffies,
3500	},
3501	{
3502		.procname	=	"gc_interval",
3503		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3504		.maxlen		=	sizeof(int),
3505		.mode		=	0644,
3506		.proc_handler	=	proc_dointvec_jiffies,
3507	},
3508	{
3509		.procname	=	"gc_elasticity",
3510		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3511		.maxlen		=	sizeof(int),
3512		.mode		=	0644,
3513		.proc_handler	=	proc_dointvec,
3514	},
3515	{
3516		.procname	=	"mtu_expires",
3517		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3518		.maxlen		=	sizeof(int),
3519		.mode		=	0644,
3520		.proc_handler	=	proc_dointvec_jiffies,
3521	},
3522	{
3523		.procname	=	"min_adv_mss",
3524		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3525		.maxlen		=	sizeof(int),
3526		.mode		=	0644,
3527		.proc_handler	=	proc_dointvec,
3528	},
3529	{
3530		.procname	=	"gc_min_interval_ms",
3531		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3532		.maxlen		=	sizeof(int),
3533		.mode		=	0644,
3534		.proc_handler	=	proc_dointvec_ms_jiffies,
3535	},
3536	{ }
3537};
3538
3539struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3540{
3541	struct ctl_table *table;
3542
3543	table = kmemdup(ipv6_route_table_template,
3544			sizeof(ipv6_route_table_template),
3545			GFP_KERNEL);
3546
3547	if (table) {
3548		table[0].data = &net->ipv6.sysctl.flush_delay;
3549		table[0].extra1 = net;
3550		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3551		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3552		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3553		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3554		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3555		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3556		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3557		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3558		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3559
3560		/* Don't export sysctls to unprivileged users */
3561		if (net->user_ns != &init_user_ns)
3562			table[0].procname = NULL;
3563	}
3564
3565	return table;
3566}
3567#endif
3568
3569static int __net_init ip6_route_net_init(struct net *net)
3570{
3571	int ret = -ENOMEM;
3572
3573	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3574	       sizeof(net->ipv6.ip6_dst_ops));
3575
3576	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3577		goto out_ip6_dst_ops;
3578
3579	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3580					   sizeof(*net->ipv6.ip6_null_entry),
3581					   GFP_KERNEL);
3582	if (!net->ipv6.ip6_null_entry)
3583		goto out_ip6_dst_entries;
3584	net->ipv6.ip6_null_entry->dst.path =
3585		(struct dst_entry *)net->ipv6.ip6_null_entry;
3586	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3587	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3588			 ip6_template_metrics, true);
3589
3590#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3591	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3592					       sizeof(*net->ipv6.ip6_prohibit_entry),
3593					       GFP_KERNEL);
3594	if (!net->ipv6.ip6_prohibit_entry)
3595		goto out_ip6_null_entry;
3596	net->ipv6.ip6_prohibit_entry->dst.path =
3597		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3598	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3599	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3600			 ip6_template_metrics, true);
3601
3602	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3603					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3604					       GFP_KERNEL);
3605	if (!net->ipv6.ip6_blk_hole_entry)
3606		goto out_ip6_prohibit_entry;
3607	net->ipv6.ip6_blk_hole_entry->dst.path =
3608		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3609	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3610	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3611			 ip6_template_metrics, true);
3612#endif
3613
3614	net->ipv6.sysctl.flush_delay = 0;
3615	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3616	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3617	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3618	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3619	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3620	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3621	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3622
3623	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3624
3625	ret = 0;
3626out:
3627	return ret;
3628
3629#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3630out_ip6_prohibit_entry:
3631	kfree(net->ipv6.ip6_prohibit_entry);
3632out_ip6_null_entry:
3633	kfree(net->ipv6.ip6_null_entry);
3634#endif
3635out_ip6_dst_entries:
3636	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3637out_ip6_dst_ops:
3638	goto out;
3639}
3640
3641static void __net_exit ip6_route_net_exit(struct net *net)
3642{
3643	kfree(net->ipv6.ip6_null_entry);
3644#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3645	kfree(net->ipv6.ip6_prohibit_entry);
3646	kfree(net->ipv6.ip6_blk_hole_entry);
3647#endif
3648	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3649}
3650
3651static int __net_init ip6_route_net_init_late(struct net *net)
3652{
3653#ifdef CONFIG_PROC_FS
3654	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3655	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3656#endif
3657	return 0;
3658}
3659
3660static void __net_exit ip6_route_net_exit_late(struct net *net)
3661{
3662#ifdef CONFIG_PROC_FS
3663	remove_proc_entry("ipv6_route", net->proc_net);
3664	remove_proc_entry("rt6_stats", net->proc_net);
3665#endif
3666}
3667
3668static struct pernet_operations ip6_route_net_ops = {
3669	.init = ip6_route_net_init,
3670	.exit = ip6_route_net_exit,
3671};
3672
3673static int __net_init ipv6_inetpeer_init(struct net *net)
3674{
3675	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3676
3677	if (!bp)
3678		return -ENOMEM;
3679	inet_peer_base_init(bp);
3680	net->ipv6.peers = bp;
3681	return 0;
3682}
3683
3684static void __net_exit ipv6_inetpeer_exit(struct net *net)
3685{
3686	struct inet_peer_base *bp = net->ipv6.peers;
3687
3688	net->ipv6.peers = NULL;
3689	inetpeer_invalidate_tree(bp);
3690	kfree(bp);
3691}
3692
3693static struct pernet_operations ipv6_inetpeer_ops = {
3694	.init	=	ipv6_inetpeer_init,
3695	.exit	=	ipv6_inetpeer_exit,
3696};
3697
3698static struct pernet_operations ip6_route_net_late_ops = {
3699	.init = ip6_route_net_init_late,
3700	.exit = ip6_route_net_exit_late,
3701};
3702
3703static struct notifier_block ip6_route_dev_notifier = {
3704	.notifier_call = ip6_route_dev_notify,
3705	.priority = 0,
3706};
3707
3708int __init ip6_route_init(void)
3709{
3710	int ret;
3711	int cpu;
3712
3713	ret = -ENOMEM;
3714	ip6_dst_ops_template.kmem_cachep =
3715		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3716				  SLAB_HWCACHE_ALIGN, NULL);
3717	if (!ip6_dst_ops_template.kmem_cachep)
3718		goto out;
3719
3720	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3721	if (ret)
3722		goto out_kmem_cache;
3723
3724	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3725	if (ret)
3726		goto out_dst_entries;
3727
3728	ret = register_pernet_subsys(&ip6_route_net_ops);
3729	if (ret)
3730		goto out_register_inetpeer;
3731
3732	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3733
3734	/* Registering of the loopback is done before this portion of code,
3735	 * the loopback reference in rt6_info will not be taken, do it
3736	 * manually for init_net */
3737	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3738	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3739  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3740	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3741	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3742	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3743	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3744  #endif
3745	ret = fib6_init();
3746	if (ret)
3747		goto out_register_subsys;
3748
3749	ret = xfrm6_init();
3750	if (ret)
3751		goto out_fib6_init;
3752
3753	ret = fib6_rules_init();
3754	if (ret)
3755		goto xfrm6_init;
3756
3757	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3758	if (ret)
3759		goto fib6_rules_init;
3760
3761	ret = -ENOBUFS;
3762	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3763	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3764	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3765		goto out_register_late_subsys;
3766
3767	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3768	if (ret)
3769		goto out_register_late_subsys;
3770
3771	for_each_possible_cpu(cpu) {
3772		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3773
3774		INIT_LIST_HEAD(&ul->head);
3775		spin_lock_init(&ul->lock);
3776	}
3777
3778out:
3779	return ret;
3780
3781out_register_late_subsys:
3782	unregister_pernet_subsys(&ip6_route_net_late_ops);
3783fib6_rules_init:
3784	fib6_rules_cleanup();
3785xfrm6_init:
3786	xfrm6_fini();
3787out_fib6_init:
3788	fib6_gc_cleanup();
3789out_register_subsys:
3790	unregister_pernet_subsys(&ip6_route_net_ops);
3791out_register_inetpeer:
3792	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3793out_dst_entries:
3794	dst_entries_destroy(&ip6_dst_blackhole_ops);
3795out_kmem_cache:
3796	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3797	goto out;
3798}
3799
3800void ip6_route_cleanup(void)
3801{
3802	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3803	unregister_pernet_subsys(&ip6_route_net_late_ops);
3804	fib6_rules_cleanup();
3805	xfrm6_fini();
3806	fib6_gc_cleanup();
3807	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3808	unregister_pernet_subsys(&ip6_route_net_ops);
3809	dst_entries_destroy(&ip6_dst_blackhole_ops);
3810	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3811}
v3.5.6
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
 
  57#include <net/xfrm.h>
  58#include <net/netevent.h>
  59#include <net/netlink.h>
 
 
 
 
 
  60
  61#include <asm/uaccess.h>
  62
  63#ifdef CONFIG_SYSCTL
  64#include <linux/sysctl.h>
  65#endif
  66
  67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
  68				    const struct in6_addr *dest);
 
 
 
 
 
 
  69static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  70static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  71static unsigned int	 ip6_mtu(const struct dst_entry *dst);
  72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  73static void		ip6_dst_destroy(struct dst_entry *);
  74static void		ip6_dst_ifdown(struct dst_entry *,
  75				       struct net_device *dev, int how);
  76static int		 ip6_dst_gc(struct dst_ops *ops);
  77
  78static int		ip6_pkt_discard(struct sk_buff *skb);
  79static int		ip6_pkt_discard_out(struct sk_buff *skb);
 
 
  80static void		ip6_link_failure(struct sk_buff *skb);
  81static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 
 
 
 
 
  82
  83#ifdef CONFIG_IPV6_ROUTE_INFO
  84static struct rt6_info *rt6_add_route_info(struct net *net,
  85					   const struct in6_addr *prefix, int prefixlen,
  86					   const struct in6_addr *gwaddr, int ifindex,
  87					   unsigned int pref);
  88static struct rt6_info *rt6_get_route_info(struct net *net,
  89					   const struct in6_addr *prefix, int prefixlen,
  90					   const struct in6_addr *gwaddr, int ifindex);
  91#endif
  92
  93static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 
 
 
 
 
 
 
  94{
  95	struct rt6_info *rt = (struct rt6_info *) dst;
  96	struct inet_peer *peer;
  97	u32 *p = NULL;
 
 
 
 
 
 
  98
  99	if (!(rt->dst.flags & DST_HOST))
 100		return NULL;
 
 
 101
 102	if (!rt->rt6i_peer)
 103		rt6_bind_peer(rt, 1);
 
 
 
 104
 105	peer = rt->rt6i_peer;
 106	if (peer) {
 107		u32 *old_p = __DST_METRICS_PTR(old);
 108		unsigned long prev, new;
 109
 110		p = peer->metrics;
 111		if (inet_metrics_new(peer))
 112			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 113
 114		new = (unsigned long) p;
 115		prev = cmpxchg(&dst->_metrics, old, new);
 
 
 
 
 
 
 
 
 
 
 
 116
 117		if (prev != old) {
 118			p = __DST_METRICS_PTR(prev);
 119			if (prev & DST_METRICS_READ_ONLY)
 120				p = NULL;
 
 121		}
 
 122	}
 123	return p;
 124}
 125
 126static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 127{
 128	struct in6_addr *p = &rt->rt6i_gateway;
 129
 130	if (!ipv6_addr_any(p))
 131		return (const void *) p;
 
 
 132	return daddr;
 133}
 134
 135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
 
 
 136{
 137	struct rt6_info *rt = (struct rt6_info *) dst;
 138	struct neighbour *n;
 139
 140	daddr = choose_neigh_daddr(rt, daddr);
 141	n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
 142	if (n)
 143		return n;
 144	return neigh_create(&nd_tbl, daddr, dst->dev);
 145}
 146
 147static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
 148{
 149	struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
 150	if (!n) {
 151		n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
 152		if (IS_ERR(n))
 153			return PTR_ERR(n);
 154	}
 155	dst_set_neighbour(&rt->dst, n);
 156
 157	return 0;
 158}
 159
 160static struct dst_ops ip6_dst_ops_template = {
 161	.family			=	AF_INET6,
 162	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 163	.gc			=	ip6_dst_gc,
 164	.gc_thresh		=	1024,
 165	.check			=	ip6_dst_check,
 166	.default_advmss		=	ip6_default_advmss,
 167	.mtu			=	ip6_mtu,
 168	.cow_metrics		=	ipv6_cow_metrics,
 169	.destroy		=	ip6_dst_destroy,
 170	.ifdown			=	ip6_dst_ifdown,
 171	.negative_advice	=	ip6_negative_advice,
 172	.link_failure		=	ip6_link_failure,
 173	.update_pmtu		=	ip6_rt_update_pmtu,
 
 174	.local_out		=	__ip6_local_out,
 175	.neigh_lookup		=	ip6_neigh_lookup,
 176};
 177
 178static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 179{
 180	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 181
 182	return mtu ? : dst->dev->mtu;
 183}
 184
 185static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 
 186{
 187}
 188
 189static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 190					 unsigned long old)
 191{
 192	return NULL;
 193}
 194
 195static struct dst_ops ip6_dst_blackhole_ops = {
 196	.family			=	AF_INET6,
 197	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 198	.destroy		=	ip6_dst_destroy,
 199	.check			=	ip6_dst_check,
 200	.mtu			=	ip6_blackhole_mtu,
 201	.default_advmss		=	ip6_default_advmss,
 202	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 203	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
 
 204	.neigh_lookup		=	ip6_neigh_lookup,
 205};
 206
 207static const u32 ip6_template_metrics[RTAX_MAX] = {
 208	[RTAX_HOPLIMIT - 1] = 255,
 209};
 210
 211static struct rt6_info ip6_null_entry_template = {
 212	.dst = {
 213		.__refcnt	= ATOMIC_INIT(1),
 214		.__use		= 1,
 215		.obsolete	= -1,
 216		.error		= -ENETUNREACH,
 217		.input		= ip6_pkt_discard,
 218		.output		= ip6_pkt_discard_out,
 219	},
 220	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 221	.rt6i_protocol  = RTPROT_KERNEL,
 222	.rt6i_metric	= ~(u32) 0,
 223	.rt6i_ref	= ATOMIC_INIT(1),
 224};
 225
 226#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 227
 228static int ip6_pkt_prohibit(struct sk_buff *skb);
 229static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 230
 231static struct rt6_info ip6_prohibit_entry_template = {
 232	.dst = {
 233		.__refcnt	= ATOMIC_INIT(1),
 234		.__use		= 1,
 235		.obsolete	= -1,
 236		.error		= -EACCES,
 237		.input		= ip6_pkt_prohibit,
 238		.output		= ip6_pkt_prohibit_out,
 239	},
 240	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 241	.rt6i_protocol  = RTPROT_KERNEL,
 242	.rt6i_metric	= ~(u32) 0,
 243	.rt6i_ref	= ATOMIC_INIT(1),
 244};
 245
 246static struct rt6_info ip6_blk_hole_entry_template = {
 247	.dst = {
 248		.__refcnt	= ATOMIC_INIT(1),
 249		.__use		= 1,
 250		.obsolete	= -1,
 251		.error		= -EINVAL,
 252		.input		= dst_discard,
 253		.output		= dst_discard,
 254	},
 255	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 256	.rt6i_protocol  = RTPROT_KERNEL,
 257	.rt6i_metric	= ~(u32) 0,
 258	.rt6i_ref	= ATOMIC_INIT(1),
 259};
 260
 261#endif
 262
 
 
 
 
 
 
 
 
 
 263/* allocate dst with ip6_dst_ops */
 264static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
 265					     struct net_device *dev,
 266					     int flags)
 267{
 268	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
 
 269
 270	if (rt)
 271		memset(&rt->rt6i_table, 0,
 272		       sizeof(*rt) - sizeof(struct dst_entry));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 273
 274	return rt;
 275}
 
 276
 277static void ip6_dst_destroy(struct dst_entry *dst)
 278{
 279	struct rt6_info *rt = (struct rt6_info *)dst;
 280	struct inet6_dev *idev = rt->rt6i_idev;
 281	struct inet_peer *peer = rt->rt6i_peer;
 282
 283	if (!(rt->dst.flags & DST_HOST))
 284		dst_destroy_metrics_generic(dst);
 
 285
 
 286	if (idev) {
 287		rt->rt6i_idev = NULL;
 288		in6_dev_put(idev);
 289	}
 290
 291	if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
 292		dst_release(dst->from);
 293
 294	if (peer) {
 295		rt->rt6i_peer = NULL;
 296		inet_putpeer(peer);
 297	}
 298}
 299
 300static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
 301
 302static u32 rt6_peer_genid(void)
 303{
 304	return atomic_read(&__rt6_peer_genid);
 305}
 306
 307void rt6_bind_peer(struct rt6_info *rt, int create)
 308{
 309	struct inet_peer *peer;
 310
 311	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
 312	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 313		inet_putpeer(peer);
 314	else
 315		rt->rt6i_peer_genid = rt6_peer_genid();
 316}
 317
 318static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 319			   int how)
 320{
 321	struct rt6_info *rt = (struct rt6_info *)dst;
 322	struct inet6_dev *idev = rt->rt6i_idev;
 323	struct net_device *loopback_dev =
 324		dev_net(dev)->loopback_dev;
 325
 326	if (dev != loopback_dev && idev && idev->dev == dev) {
 327		struct inet6_dev *loopback_idev =
 328			in6_dev_get(loopback_dev);
 329		if (loopback_idev) {
 330			rt->rt6i_idev = loopback_idev;
 331			in6_dev_put(idev);
 
 
 332		}
 333	}
 334}
 335
 
 
 
 
 
 
 
 
 336static bool rt6_check_expired(const struct rt6_info *rt)
 337{
 338	struct rt6_info *ort = NULL;
 339
 340	if (rt->rt6i_flags & RTF_EXPIRES) {
 341		if (time_after(jiffies, rt->dst.expires))
 342			return true;
 343	} else if (rt->dst.from) {
 344		ort = (struct rt6_info *) rt->dst.from;
 345		return (ort->rt6i_flags & RTF_EXPIRES) &&
 346			time_after(jiffies, ort->dst.expires);
 347	}
 348	return false;
 349}
 350
 351static bool rt6_need_strict(const struct in6_addr *daddr)
 
 
 
 
 
 352{
 353	return ipv6_addr_type(daddr) &
 354		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 355}
 356
 357/*
 358 *	Route lookup. Any table->tb6_lock is implied.
 359 */
 360
 361static inline struct rt6_info *rt6_device_match(struct net *net,
 362						    struct rt6_info *rt,
 363						    const struct in6_addr *saddr,
 364						    int oif,
 365						    int flags)
 366{
 367	struct rt6_info *local = NULL;
 368	struct rt6_info *sprt;
 369
 370	if (!oif && ipv6_addr_any(saddr))
 371		goto out;
 372
 373	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 374		struct net_device *dev = sprt->dst.dev;
 375
 376		if (oif) {
 377			if (dev->ifindex == oif)
 378				return sprt;
 379			if (dev->flags & IFF_LOOPBACK) {
 380				if (!sprt->rt6i_idev ||
 381				    sprt->rt6i_idev->dev->ifindex != oif) {
 382					if (flags & RT6_LOOKUP_F_IFACE && oif)
 383						continue;
 384					if (local && (!oif ||
 385						      local->rt6i_idev->dev->ifindex == oif))
 386						continue;
 387				}
 388				local = sprt;
 389			}
 390		} else {
 391			if (ipv6_chk_addr(net, saddr, dev,
 392					  flags & RT6_LOOKUP_F_IFACE))
 393				return sprt;
 394		}
 395	}
 396
 397	if (oif) {
 398		if (local)
 399			return local;
 400
 401		if (flags & RT6_LOOKUP_F_IFACE)
 402			return net->ipv6.ip6_null_entry;
 403	}
 404out:
 405	return rt;
 406}
 407
 408#ifdef CONFIG_IPV6_ROUTER_PREF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 409static void rt6_probe(struct rt6_info *rt)
 410{
 
 411	struct neighbour *neigh;
 412	/*
 413	 * Okay, this does not seem to be appropriate
 414	 * for now, however, we need to check if it
 415	 * is really so; aka Router Reachability Probing.
 416	 *
 417	 * Router Reachability Probe MUST be rate-limited
 418	 * to no more than one per minute.
 419	 */
 420	rcu_read_lock();
 421	neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
 422	if (!neigh || (neigh->nud_state & NUD_VALID))
 423		goto out;
 424	read_lock_bh(&neigh->lock);
 425	if (!(neigh->nud_state & NUD_VALID) &&
 426	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 427		struct in6_addr mcaddr;
 428		struct in6_addr *target;
 429
 430		neigh->updated = jiffies;
 431		read_unlock_bh(&neigh->lock);
 432
 433		target = (struct in6_addr *)&neigh->primary_key;
 434		addrconf_addr_solict_mult(target, &mcaddr);
 435		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
 
 
 
 436	} else {
 437		read_unlock_bh(&neigh->lock);
 438	}
 
 
 
 
 
 
 
 
 
 439out:
 440	rcu_read_unlock();
 441}
 442#else
 443static inline void rt6_probe(struct rt6_info *rt)
 444{
 445}
 446#endif
 447
 448/*
 449 * Default Router Selection (RFC 2461 6.3.6)
 450 */
 451static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 452{
 453	struct net_device *dev = rt->dst.dev;
 454	if (!oif || dev->ifindex == oif)
 455		return 2;
 456	if ((dev->flags & IFF_LOOPBACK) &&
 457	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 458		return 1;
 459	return 0;
 460}
 461
 462static inline int rt6_check_neigh(struct rt6_info *rt)
 463{
 464	struct neighbour *neigh;
 465	int m;
 466
 467	rcu_read_lock();
 468	neigh = dst_get_neighbour_noref(&rt->dst);
 469	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 470	    !(rt->rt6i_flags & RTF_GATEWAY))
 471		m = 1;
 472	else if (neigh) {
 473		read_lock_bh(&neigh->lock);
 
 
 
 474		if (neigh->nud_state & NUD_VALID)
 475			m = 2;
 476#ifdef CONFIG_IPV6_ROUTER_PREF
 477		else if (neigh->nud_state & NUD_FAILED)
 478			m = 0;
 
 
 479#endif
 480		else
 481			m = 1;
 482		read_unlock_bh(&neigh->lock);
 483	} else
 484		m = 0;
 485	rcu_read_unlock();
 486	return m;
 
 487}
 488
 489static int rt6_score_route(struct rt6_info *rt, int oif,
 490			   int strict)
 491{
 492	int m, n;
 493
 494	m = rt6_check_dev(rt, oif);
 495	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 496		return -1;
 497#ifdef CONFIG_IPV6_ROUTER_PREF
 498	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 499#endif
 500	n = rt6_check_neigh(rt);
 501	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
 502		return -1;
 
 
 503	return m;
 504}
 505
 506static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 507				   int *mpri, struct rt6_info *match)
 
 508{
 509	int m;
 
 
 
 
 
 
 
 510
 511	if (rt6_check_expired(rt))
 512		goto out;
 513
 514	m = rt6_score_route(rt, oif, strict);
 515	if (m < 0)
 
 
 
 516		goto out;
 
 517
 
 
 
 
 518	if (m > *mpri) {
 519		if (strict & RT6_LOOKUP_F_REACHABLE)
 520			rt6_probe(match);
 521		*mpri = m;
 522		match = rt;
 523	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
 524		rt6_probe(rt);
 525	}
 526
 527out:
 528	return match;
 529}
 530
 531static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 532				     struct rt6_info *rr_head,
 533				     u32 metric, int oif, int strict)
 
 534{
 535	struct rt6_info *rt, *match;
 536	int mpri = -1;
 537
 538	match = NULL;
 539	for (rt = rr_head; rt && rt->rt6i_metric == metric;
 540	     rt = rt->dst.rt6_next)
 541		match = find_match(rt, oif, strict, &mpri, match);
 542	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 543	     rt = rt->dst.rt6_next)
 544		match = find_match(rt, oif, strict, &mpri, match);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 545
 546	return match;
 547}
 548
 549static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 550{
 551	struct rt6_info *match, *rt0;
 552	struct net *net;
 
 553
 554	rt0 = fn->rr_ptr;
 555	if (!rt0)
 556		fn->rr_ptr = rt0 = fn->leaf;
 557
 558	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 
 559
 560	if (!match &&
 561	    (strict & RT6_LOOKUP_F_REACHABLE)) {
 562		struct rt6_info *next = rt0->dst.rt6_next;
 563
 564		/* no entries matched; do round-robin */
 565		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 566			next = fn->leaf;
 567
 568		if (next != rt0)
 569			fn->rr_ptr = next;
 570	}
 571
 572	net = dev_net(rt0->dst.dev);
 573	return match ? match : net->ipv6.ip6_null_entry;
 574}
 575
 
 
 
 
 
 576#ifdef CONFIG_IPV6_ROUTE_INFO
 577int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 578		  const struct in6_addr *gwaddr)
 579{
 580	struct net *net = dev_net(dev);
 581	struct route_info *rinfo = (struct route_info *) opt;
 582	struct in6_addr prefix_buf, *prefix;
 583	unsigned int pref;
 584	unsigned long lifetime;
 585	struct rt6_info *rt;
 586
 587	if (len < sizeof(struct route_info)) {
 588		return -EINVAL;
 589	}
 590
 591	/* Sanity check for prefix_len and length */
 592	if (rinfo->length > 3) {
 593		return -EINVAL;
 594	} else if (rinfo->prefix_len > 128) {
 595		return -EINVAL;
 596	} else if (rinfo->prefix_len > 64) {
 597		if (rinfo->length < 2) {
 598			return -EINVAL;
 599		}
 600	} else if (rinfo->prefix_len > 0) {
 601		if (rinfo->length < 1) {
 602			return -EINVAL;
 603		}
 604	}
 605
 606	pref = rinfo->route_pref;
 607	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 608		return -EINVAL;
 609
 610	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 611
 612	if (rinfo->length == 3)
 613		prefix = (struct in6_addr *)rinfo->prefix;
 614	else {
 615		/* this function is safe */
 616		ipv6_addr_prefix(&prefix_buf,
 617				 (struct in6_addr *)rinfo->prefix,
 618				 rinfo->prefix_len);
 619		prefix = &prefix_buf;
 620	}
 621
 622	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 623				dev->ifindex);
 
 
 
 624
 625	if (rt && !lifetime) {
 626		ip6_del_rt(rt);
 627		rt = NULL;
 628	}
 629
 630	if (!rt && lifetime)
 631		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 632					pref);
 633	else if (rt)
 634		rt->rt6i_flags = RTF_ROUTEINFO |
 635				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 636
 637	if (rt) {
 638		if (!addrconf_finite_timeout(lifetime))
 639			rt6_clean_expires(rt);
 640		else
 641			rt6_set_expires(rt, jiffies + HZ * lifetime);
 642
 643		dst_release(&rt->dst);
 644	}
 645	return 0;
 646}
 647#endif
 648
 649#define BACKTRACK(__net, saddr)			\
 650do { \
 651	if (rt == __net->ipv6.ip6_null_entry) {	\
 652		struct fib6_node *pn; \
 653		while (1) { \
 654			if (fn->fn_flags & RTN_TL_ROOT) \
 655				goto out; \
 656			pn = fn->parent; \
 657			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 658				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 659			else \
 660				fn = pn; \
 661			if (fn->fn_flags & RTN_RTINFO) \
 662				goto restart; \
 663		} \
 664	} \
 665} while (0)
 666
 667static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 668					     struct fib6_table *table,
 669					     struct flowi6 *fl6, int flags)
 670{
 671	struct fib6_node *fn;
 672	struct rt6_info *rt;
 673
 674	read_lock_bh(&table->tb6_lock);
 675	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 676restart:
 677	rt = fn->leaf;
 678	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 679	BACKTRACK(net, &fl6->saddr);
 680out:
 
 
 
 
 
 681	dst_use(&rt->dst, jiffies);
 682	read_unlock_bh(&table->tb6_lock);
 
 
 
 683	return rt;
 684
 685}
 686
 687struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 688				    int flags)
 689{
 690	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 691}
 692EXPORT_SYMBOL_GPL(ip6_route_lookup);
 693
 694struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 695			    const struct in6_addr *saddr, int oif, int strict)
 696{
 697	struct flowi6 fl6 = {
 698		.flowi6_oif = oif,
 699		.daddr = *daddr,
 700	};
 701	struct dst_entry *dst;
 702	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 703
 704	if (saddr) {
 705		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 706		flags |= RT6_LOOKUP_F_HAS_SADDR;
 707	}
 708
 709	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 710	if (dst->error == 0)
 711		return (struct rt6_info *) dst;
 712
 713	dst_release(dst);
 714
 715	return NULL;
 716}
 717
 718EXPORT_SYMBOL(rt6_lookup);
 719
 720/* ip6_ins_rt is called with FREE table->tb6_lock.
 721   It takes new route entry, the addition fails by any reason the
 722   route is freed. In any case, if caller does not hold it, it may
 723   be destroyed.
 724 */
 725
 726static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 
 727{
 728	int err;
 729	struct fib6_table *table;
 730
 731	table = rt->rt6i_table;
 732	write_lock_bh(&table->tb6_lock);
 733	err = fib6_add(&table->tb6_root, rt, info);
 734	write_unlock_bh(&table->tb6_lock);
 735
 736	return err;
 737}
 738
 739int ip6_ins_rt(struct rt6_info *rt)
 740{
 741	struct nl_info info = {
 742		.nl_net = dev_net(rt->dst.dev),
 743	};
 744	return __ip6_ins_rt(rt, &info);
 745}
 746
 747static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
 748				      const struct in6_addr *daddr,
 749				      const struct in6_addr *saddr)
 750{
 751	struct rt6_info *rt;
 752
 753	/*
 754	 *	Clone the route.
 755	 */
 756
 757	rt = ip6_rt_copy(ort, daddr);
 
 758
 759	if (rt) {
 760		int attempts = !in_softirq();
 761
 762		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
 763			if (ort->rt6i_dst.plen != 128 &&
 764			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 765				rt->rt6i_flags |= RTF_ANYCAST;
 766			rt->rt6i_gateway = *daddr;
 767		}
 768
 769		rt->rt6i_flags |= RTF_CACHE;
 
 
 
 
 
 770
 
 
 
 
 771#ifdef CONFIG_IPV6_SUBTREES
 772		if (rt->rt6i_src.plen && saddr) {
 773			rt->rt6i_src.addr = *saddr;
 774			rt->rt6i_src.plen = 128;
 775		}
 776#endif
 
 777
 778	retry:
 779		if (rt6_bind_neighbour(rt, rt->dst.dev)) {
 780			struct net *net = dev_net(rt->dst.dev);
 781			int saved_rt_min_interval =
 782				net->ipv6.sysctl.ip6_rt_gc_min_interval;
 783			int saved_rt_elasticity =
 784				net->ipv6.sysctl.ip6_rt_gc_elasticity;
 785
 786			if (attempts-- > 0) {
 787				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
 788				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
 789
 790				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
 791
 792				net->ipv6.sysctl.ip6_rt_gc_elasticity =
 793					saved_rt_elasticity;
 794				net->ipv6.sysctl.ip6_rt_gc_min_interval =
 795					saved_rt_min_interval;
 796				goto retry;
 797			}
 
 
 
 
 
 798
 799			net_warn_ratelimited("Neighbour table overflow\n");
 800			dst_free(&rt->dst);
 801			return NULL;
 802		}
 803	}
 804
 805	return rt;
 806}
 807
 808static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 809					const struct in6_addr *daddr)
 810{
 811	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 
 
 
 
 
 812
 813	if (rt) {
 814		rt->rt6i_flags |= RTF_CACHE;
 815		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 816	}
 817	return rt;
 
 
 
 818}
 819
 820static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 821				      struct flowi6 *fl6, int flags)
 822{
 823	struct fib6_node *fn;
 824	struct rt6_info *rt, *nrt;
 825	int strict = 0;
 826	int attempts = 3;
 827	int err;
 828	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 829
 830	strict |= flags & RT6_LOOKUP_F_IFACE;
 
 
 831
 832relookup:
 833	read_lock_bh(&table->tb6_lock);
 834
 835restart_2:
 836	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 837
 838restart:
 839	rt = rt6_select(fn, oif, strict | reachable);
 
 
 
 
 
 
 
 
 840
 841	BACKTRACK(net, &fl6->saddr);
 842	if (rt == net->ipv6.ip6_null_entry ||
 843	    rt->rt6i_flags & RTF_CACHE)
 844		goto out;
 845
 846	dst_hold(&rt->dst);
 847	read_unlock_bh(&table->tb6_lock);
 848
 849	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
 850		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 851	else if (!(rt->dst.flags & DST_HOST))
 852		nrt = rt6_alloc_clone(rt, &fl6->daddr);
 853	else
 854		goto out2;
 855
 856	dst_release(&rt->dst);
 857	rt = nrt ? : net->ipv6.ip6_null_entry;
 858
 859	dst_hold(&rt->dst);
 860	if (nrt) {
 861		err = ip6_ins_rt(nrt);
 862		if (!err)
 863			goto out2;
 864	}
 865
 866	if (--attempts <= 0)
 867		goto out2;
 
 
 
 
 
 
 
 
 
 
 868
 869	/*
 870	 * Race condition! In the gap, when table->tb6_lock was
 871	 * released someone could insert this route.  Relookup.
 872	 */
 873	dst_release(&rt->dst);
 874	goto relookup;
 875
 876out:
 877	if (reachable) {
 878		reachable = 0;
 879		goto restart_2;
 880	}
 881	dst_hold(&rt->dst);
 882	read_unlock_bh(&table->tb6_lock);
 883out2:
 884	rt->dst.lastuse = jiffies;
 885	rt->dst.__use++;
 886
 887	return rt;
 888}
 889
 890static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 891					    struct flowi6 *fl6, int flags)
 892{
 893	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 894}
 895
 896static struct dst_entry *ip6_route_input_lookup(struct net *net,
 897						struct net_device *dev,
 898						struct flowi6 *fl6, int flags)
 899{
 900	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
 901		flags |= RT6_LOOKUP_F_IFACE;
 902
 903	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
 904}
 905
 906void ip6_route_input(struct sk_buff *skb)
 907{
 908	const struct ipv6hdr *iph = ipv6_hdr(skb);
 909	struct net *net = dev_net(skb->dev);
 910	int flags = RT6_LOOKUP_F_HAS_SADDR;
 
 911	struct flowi6 fl6 = {
 912		.flowi6_iif = skb->dev->ifindex,
 913		.daddr = iph->daddr,
 914		.saddr = iph->saddr,
 915		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
 916		.flowi6_mark = skb->mark,
 917		.flowi6_proto = iph->nexthdr,
 918	};
 919
 
 
 
 
 920	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 921}
 922
 923static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 924					     struct flowi6 *fl6, int flags)
 925{
 926	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 927}
 928
 929struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 930				    struct flowi6 *fl6)
 931{
 932	int flags = 0;
 
 933
 934	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 
 
 
 
 
 
 
 
 935		flags |= RT6_LOOKUP_F_IFACE;
 936
 937	if (!ipv6_addr_any(&fl6->saddr))
 938		flags |= RT6_LOOKUP_F_HAS_SADDR;
 939	else if (sk)
 940		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 941
 942	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 943}
 944
 945EXPORT_SYMBOL(ip6_route_output);
 946
 947struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 948{
 949	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 950	struct dst_entry *new = NULL;
 951
 952	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
 953	if (rt) {
 954		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
 955
 956		new = &rt->dst;
 957
 958		new->__use = 1;
 959		new->input = dst_discard;
 960		new->output = dst_discard;
 961
 962		if (dst_metrics_read_only(&ort->dst))
 963			new->_metrics = ort->dst._metrics;
 964		else
 965			dst_copy_metrics(new, &ort->dst);
 966		rt->rt6i_idev = ort->rt6i_idev;
 967		if (rt->rt6i_idev)
 968			in6_dev_hold(rt->rt6i_idev);
 969
 970		rt->rt6i_gateway = ort->rt6i_gateway;
 971		rt->rt6i_flags = ort->rt6i_flags;
 972		rt6_clean_expires(rt);
 973		rt->rt6i_metric = 0;
 974
 975		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 976#ifdef CONFIG_IPV6_SUBTREES
 977		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 978#endif
 979
 980		dst_free(new);
 981	}
 982
 983	dst_release(dst_orig);
 984	return new ? new : ERR_PTR(-ENOMEM);
 985}
 986
 987/*
 988 *	Destination cache support functions
 989 */
 990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 991static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 992{
 993	struct rt6_info *rt;
 994
 995	rt = (struct rt6_info *) dst;
 996
 997	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
 998		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
 999			if (!rt->rt6i_peer)
1000				rt6_bind_peer(rt, 0);
1001			rt->rt6i_peer_genid = rt6_peer_genid();
1002		}
1003		return dst;
1004	}
1005	return NULL;
 
 
 
1006}
1007
1008static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1009{
1010	struct rt6_info *rt = (struct rt6_info *) dst;
1011
1012	if (rt) {
1013		if (rt->rt6i_flags & RTF_CACHE) {
1014			if (rt6_check_expired(rt)) {
1015				ip6_del_rt(rt);
1016				dst = NULL;
1017			}
1018		} else {
1019			dst_release(dst);
1020			dst = NULL;
1021		}
1022	}
1023	return dst;
1024}
1025
1026static void ip6_link_failure(struct sk_buff *skb)
1027{
1028	struct rt6_info *rt;
1029
1030	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1031
1032	rt = (struct rt6_info *) skb_dst(skb);
1033	if (rt) {
1034		if (rt->rt6i_flags & RTF_CACHE)
1035			rt6_update_expires(rt, 0);
1036		else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
 
1037			rt->rt6i_node->fn_sernum = -1;
 
1038	}
1039}
1040
1041static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1042{
1043	struct rt6_info *rt6 = (struct rt6_info*)dst;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1044
1045	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1046		rt6->rt6i_flags |= RTF_MODIFIED;
1047		if (mtu < IPV6_MIN_MTU) {
1048			u32 features = dst_metric(dst, RTAX_FEATURES);
1049			mtu = IPV6_MIN_MTU;
1050			features |= RTAX_FEATURE_ALLFRAG;
1051			dst_metric_set(dst, RTAX_FEATURES, features);
 
 
 
 
 
 
 
1052		}
1053		dst_metric_set(dst, RTAX_MTU, mtu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1054	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1055}
1056
 
 
 
 
 
 
1057static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1058{
1059	struct net_device *dev = dst->dev;
1060	unsigned int mtu = dst_mtu(dst);
1061	struct net *net = dev_net(dev);
1062
1063	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1064
1065	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1066		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1067
1068	/*
1069	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1070	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1071	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1072	 * rely only on pmtu discovery"
1073	 */
1074	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1075		mtu = IPV6_MAXPLEN;
1076	return mtu;
1077}
1078
1079static unsigned int ip6_mtu(const struct dst_entry *dst)
1080{
 
 
1081	struct inet6_dev *idev;
1082	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1083
1084	if (mtu)
1085		return mtu;
 
 
 
 
1086
1087	mtu = IPV6_MIN_MTU;
1088
1089	rcu_read_lock();
1090	idev = __in6_dev_get(dst->dev);
1091	if (idev)
1092		mtu = idev->cnf.mtu6;
1093	rcu_read_unlock();
1094
1095	return mtu;
 
1096}
1097
1098static struct dst_entry *icmp6_dst_gc_list;
1099static DEFINE_SPINLOCK(icmp6_dst_lock);
1100
1101struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1102				  struct neighbour *neigh,
1103				  struct flowi6 *fl6)
1104{
1105	struct dst_entry *dst;
1106	struct rt6_info *rt;
1107	struct inet6_dev *idev = in6_dev_get(dev);
1108	struct net *net = dev_net(dev);
1109
1110	if (unlikely(!idev))
1111		return ERR_PTR(-ENODEV);
1112
1113	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1114	if (unlikely(!rt)) {
1115		in6_dev_put(idev);
1116		dst = ERR_PTR(-ENOMEM);
1117		goto out;
1118	}
1119
1120	if (neigh)
1121		neigh_hold(neigh);
1122	else {
1123		neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1124		if (IS_ERR(neigh)) {
1125			in6_dev_put(idev);
1126			dst_free(&rt->dst);
1127			return ERR_CAST(neigh);
1128		}
1129	}
1130
1131	rt->dst.flags |= DST_HOST;
1132	rt->dst.output  = ip6_output;
1133	dst_set_neighbour(&rt->dst, neigh);
1134	atomic_set(&rt->dst.__refcnt, 1);
 
1135	rt->rt6i_dst.addr = fl6->daddr;
1136	rt->rt6i_dst.plen = 128;
1137	rt->rt6i_idev     = idev;
1138	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1139
1140	spin_lock_bh(&icmp6_dst_lock);
1141	rt->dst.next = icmp6_dst_gc_list;
1142	icmp6_dst_gc_list = &rt->dst;
1143	spin_unlock_bh(&icmp6_dst_lock);
1144
1145	fib6_force_start_gc(net);
1146
1147	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1148
1149out:
1150	return dst;
1151}
1152
1153int icmp6_dst_gc(void)
1154{
1155	struct dst_entry *dst, **pprev;
1156	int more = 0;
1157
1158	spin_lock_bh(&icmp6_dst_lock);
1159	pprev = &icmp6_dst_gc_list;
1160
1161	while ((dst = *pprev) != NULL) {
1162		if (!atomic_read(&dst->__refcnt)) {
1163			*pprev = dst->next;
1164			dst_free(dst);
1165		} else {
1166			pprev = &dst->next;
1167			++more;
1168		}
1169	}
1170
1171	spin_unlock_bh(&icmp6_dst_lock);
1172
1173	return more;
1174}
1175
1176static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1177			    void *arg)
1178{
1179	struct dst_entry *dst, **pprev;
1180
1181	spin_lock_bh(&icmp6_dst_lock);
1182	pprev = &icmp6_dst_gc_list;
1183	while ((dst = *pprev) != NULL) {
1184		struct rt6_info *rt = (struct rt6_info *) dst;
1185		if (func(rt, arg)) {
1186			*pprev = dst->next;
1187			dst_free(dst);
1188		} else {
1189			pprev = &dst->next;
1190		}
1191	}
1192	spin_unlock_bh(&icmp6_dst_lock);
1193}
1194
1195static int ip6_dst_gc(struct dst_ops *ops)
1196{
1197	unsigned long now = jiffies;
1198	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1199	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1200	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1201	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1202	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1203	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1204	int entries;
1205
1206	entries = dst_entries_get_fast(ops);
1207	if (time_after(rt_last_gc + rt_min_interval, now) &&
1208	    entries <= rt_max_size)
1209		goto out;
1210
1211	net->ipv6.ip6_rt_gc_expire++;
1212	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1213	net->ipv6.ip6_rt_last_gc = now;
1214	entries = dst_entries_get_slow(ops);
1215	if (entries < ops->gc_thresh)
1216		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1217out:
1218	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1219	return entries > rt_max_size;
1220}
1221
1222/* Clean host part of a prefix. Not necessary in radix tree,
1223   but results in cleaner routing tables.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1224
1225   Remove it only when all the things will work!
1226 */
 
 
 
 
 
 
 
 
 
 
 
 
 
1227
1228int ip6_dst_hoplimit(struct dst_entry *dst)
1229{
1230	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1231	if (hoplimit == 0) {
1232		struct net_device *dev = dst->dev;
1233		struct inet6_dev *idev;
1234
1235		rcu_read_lock();
1236		idev = __in6_dev_get(dev);
1237		if (idev)
1238			hoplimit = idev->cnf.hop_limit;
1239		else
1240			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1241		rcu_read_unlock();
1242	}
1243	return hoplimit;
 
 
 
 
 
1244}
1245EXPORT_SYMBOL(ip6_dst_hoplimit);
1246
1247/*
1248 *
1249 */
1250
1251int ip6_route_add(struct fib6_config *cfg)
1252{
1253	int err;
1254	struct net *net = cfg->fc_nlinfo.nl_net;
1255	struct rt6_info *rt = NULL;
1256	struct net_device *dev = NULL;
1257	struct inet6_dev *idev = NULL;
1258	struct fib6_table *table;
1259	int addr_type;
 
1260
1261	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1262		return -EINVAL;
1263#ifndef CONFIG_IPV6_SUBTREES
1264	if (cfg->fc_src_len)
1265		return -EINVAL;
1266#endif
1267	if (cfg->fc_ifindex) {
1268		err = -ENODEV;
1269		dev = dev_get_by_index(net, cfg->fc_ifindex);
1270		if (!dev)
1271			goto out;
1272		idev = in6_dev_get(dev);
1273		if (!idev)
1274			goto out;
1275	}
1276
1277	if (cfg->fc_metric == 0)
1278		cfg->fc_metric = IP6_RT_PRIO_USER;
1279
1280	err = -ENOBUFS;
1281	if (cfg->fc_nlinfo.nlh &&
1282	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1283		table = fib6_get_table(net, cfg->fc_table);
1284		if (!table) {
1285			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1286			table = fib6_new_table(net, cfg->fc_table);
1287		}
1288	} else {
1289		table = fib6_new_table(net, cfg->fc_table);
1290	}
1291
1292	if (!table)
1293		goto out;
1294
1295	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
 
1296
1297	if (!rt) {
1298		err = -ENOMEM;
1299		goto out;
1300	}
1301
1302	rt->dst.obsolete = -1;
1303
1304	if (cfg->fc_flags & RTF_EXPIRES)
1305		rt6_set_expires(rt, jiffies +
1306				clock_t_to_jiffies(cfg->fc_expires));
1307	else
1308		rt6_clean_expires(rt);
1309
1310	if (cfg->fc_protocol == RTPROT_UNSPEC)
1311		cfg->fc_protocol = RTPROT_BOOT;
1312	rt->rt6i_protocol = cfg->fc_protocol;
1313
1314	addr_type = ipv6_addr_type(&cfg->fc_dst);
1315
1316	if (addr_type & IPV6_ADDR_MULTICAST)
1317		rt->dst.input = ip6_mc_input;
1318	else if (cfg->fc_flags & RTF_LOCAL)
1319		rt->dst.input = ip6_input;
1320	else
1321		rt->dst.input = ip6_forward;
1322
1323	rt->dst.output = ip6_output;
1324
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1325	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1326	rt->rt6i_dst.plen = cfg->fc_dst_len;
1327	if (rt->rt6i_dst.plen == 128)
1328	       rt->dst.flags |= DST_HOST;
1329
1330	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1331		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1332		if (!metrics) {
1333			err = -ENOMEM;
1334			goto out;
1335		}
1336		dst_init_metrics(&rt->dst, metrics, 0);
1337	}
1338#ifdef CONFIG_IPV6_SUBTREES
1339	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1340	rt->rt6i_src.plen = cfg->fc_src_len;
1341#endif
1342
1343	rt->rt6i_metric = cfg->fc_metric;
1344
1345	/* We cannot add true routes via loopback here,
1346	   they would result in kernel looping; promote them to reject routes
1347	 */
1348	if ((cfg->fc_flags & RTF_REJECT) ||
1349	    (dev && (dev->flags & IFF_LOOPBACK) &&
1350	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1351	     !(cfg->fc_flags & RTF_LOCAL))) {
1352		/* hold loopback dev/idev if we haven't done so. */
1353		if (dev != net->loopback_dev) {
1354			if (dev) {
1355				dev_put(dev);
1356				in6_dev_put(idev);
1357			}
1358			dev = net->loopback_dev;
1359			dev_hold(dev);
1360			idev = in6_dev_get(dev);
1361			if (!idev) {
1362				err = -ENODEV;
1363				goto out;
1364			}
1365		}
1366		rt->dst.output = ip6_pkt_discard_out;
1367		rt->dst.input = ip6_pkt_discard;
1368		rt->dst.error = -ENETUNREACH;
1369		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1370		goto install_route;
1371	}
1372
1373	if (cfg->fc_flags & RTF_GATEWAY) {
1374		const struct in6_addr *gw_addr;
1375		int gwa_type;
1376
1377		gw_addr = &cfg->fc_gateway;
 
 
 
 
 
 
 
 
 
 
 
 
 
1378		rt->rt6i_gateway = *gw_addr;
1379		gwa_type = ipv6_addr_type(gw_addr);
1380
1381		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1382			struct rt6_info *grt;
1383
1384			/* IPv6 strictly inhibits using not link-local
1385			   addresses as nexthop address.
1386			   Otherwise, router will not able to send redirects.
1387			   It is very good, but in some (rare!) circumstances
1388			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1389			   some exceptions. --ANK
1390			 */
1391			err = -EINVAL;
1392			if (!(gwa_type & IPV6_ADDR_UNICAST))
1393				goto out;
1394
1395			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1396
1397			err = -EHOSTUNREACH;
1398			if (!grt)
1399				goto out;
1400			if (dev) {
1401				if (dev != grt->dst.dev) {
1402					dst_release(&grt->dst);
1403					goto out;
1404				}
1405			} else {
1406				dev = grt->dst.dev;
1407				idev = grt->rt6i_idev;
1408				dev_hold(dev);
1409				in6_dev_hold(grt->rt6i_idev);
1410			}
1411			if (!(grt->rt6i_flags & RTF_GATEWAY))
1412				err = 0;
1413			dst_release(&grt->dst);
1414
1415			if (err)
1416				goto out;
1417		}
1418		err = -EINVAL;
1419		if (!dev || (dev->flags & IFF_LOOPBACK))
1420			goto out;
1421	}
1422
1423	err = -ENODEV;
1424	if (!dev)
1425		goto out;
1426
1427	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1428		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1429			err = -EINVAL;
1430			goto out;
1431		}
1432		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1433		rt->rt6i_prefsrc.plen = 128;
1434	} else
1435		rt->rt6i_prefsrc.plen = 0;
1436
1437	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1438		err = rt6_bind_neighbour(rt, dev);
1439		if (err)
1440			goto out;
1441	}
1442
1443	rt->rt6i_flags = cfg->fc_flags;
1444
1445install_route:
1446	if (cfg->fc_mx) {
1447		struct nlattr *nla;
1448		int remaining;
1449
1450		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1451			int type = nla_type(nla);
1452
1453			if (type) {
1454				if (type > RTAX_MAX) {
1455					err = -EINVAL;
1456					goto out;
1457				}
1458
1459				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1460			}
1461		}
1462	}
1463
1464	rt->dst.dev = dev;
1465	rt->rt6i_idev = idev;
1466	rt->rt6i_table = table;
1467
1468	cfg->fc_nlinfo.nl_net = dev_net(dev);
1469
1470	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1471
1472out:
1473	if (dev)
1474		dev_put(dev);
1475	if (idev)
1476		in6_dev_put(idev);
1477	if (rt)
1478		dst_free(&rt->dst);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1479	return err;
1480}
1481
1482static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1483{
1484	int err;
1485	struct fib6_table *table;
1486	struct net *net = dev_net(rt->dst.dev);
1487
1488	if (rt == net->ipv6.ip6_null_entry)
1489		return -ENOENT;
 
 
 
1490
1491	table = rt->rt6i_table;
1492	write_lock_bh(&table->tb6_lock);
1493
1494	err = fib6_del(rt, info);
1495	dst_release(&rt->dst);
1496
1497	write_unlock_bh(&table->tb6_lock);
1498
 
 
1499	return err;
1500}
1501
1502int ip6_del_rt(struct rt6_info *rt)
1503{
1504	struct nl_info info = {
1505		.nl_net = dev_net(rt->dst.dev),
1506	};
1507	return __ip6_del_rt(rt, &info);
1508}
1509
1510static int ip6_route_del(struct fib6_config *cfg)
1511{
1512	struct fib6_table *table;
1513	struct fib6_node *fn;
1514	struct rt6_info *rt;
1515	int err = -ESRCH;
1516
1517	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1518	if (!table)
1519		return err;
1520
1521	read_lock_bh(&table->tb6_lock);
1522
1523	fn = fib6_locate(&table->tb6_root,
1524			 &cfg->fc_dst, cfg->fc_dst_len,
1525			 &cfg->fc_src, cfg->fc_src_len);
1526
1527	if (fn) {
1528		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 
 
 
1529			if (cfg->fc_ifindex &&
1530			    (!rt->dst.dev ||
1531			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1532				continue;
1533			if (cfg->fc_flags & RTF_GATEWAY &&
1534			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1535				continue;
1536			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1537				continue;
1538			dst_hold(&rt->dst);
1539			read_unlock_bh(&table->tb6_lock);
1540
1541			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1542		}
1543	}
1544	read_unlock_bh(&table->tb6_lock);
1545
1546	return err;
1547}
1548
1549/*
1550 *	Handle redirects
1551 */
1552struct ip6rd_flowi {
1553	struct flowi6 fl6;
1554	struct in6_addr gateway;
1555};
1556
1557static struct rt6_info *__ip6_route_redirect(struct net *net,
1558					     struct fib6_table *table,
1559					     struct flowi6 *fl6,
1560					     int flags)
1561{
1562	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1563	struct rt6_info *rt;
1564	struct fib6_node *fn;
 
 
 
 
 
1565
1566	/*
1567	 * Get the "current" route for this destination and
1568	 * check if the redirect has come from approriate router.
1569	 *
1570	 * RFC 2461 specifies that redirects should only be
1571	 * accepted if they come from the nexthop to the target.
1572	 * Due to the way the routes are chosen, this notion
1573	 * is a bit fuzzy and one might need to check all possible
1574	 * routes.
1575	 */
1576
1577	read_lock_bh(&table->tb6_lock);
1578	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1579restart:
1580	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1581		/*
1582		 * Current route is on-link; redirect is always invalid.
1583		 *
1584		 * Seems, previous statement is not true. It could
1585		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1586		 * But then router serving it might decide, that we should
1587		 * know truth 8)8) --ANK (980726).
1588		 */
1589		if (rt6_check_expired(rt))
1590			continue;
1591		if (!(rt->rt6i_flags & RTF_GATEWAY))
1592			continue;
1593		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1594			continue;
1595		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1596			continue;
1597		break;
1598	}
1599
1600	if (!rt)
1601		rt = net->ipv6.ip6_null_entry;
1602	BACKTRACK(net, &fl6->saddr);
1603out:
1604	dst_hold(&rt->dst);
1605
1606	read_unlock_bh(&table->tb6_lock);
 
 
 
1607
1608	return rt;
1609};
 
 
 
 
 
 
1610
1611static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1612					   const struct in6_addr *src,
1613					   const struct in6_addr *gateway,
1614					   struct net_device *dev)
1615{
1616	int flags = RT6_LOOKUP_F_HAS_SADDR;
1617	struct net *net = dev_net(dev);
1618	struct ip6rd_flowi rdfl = {
1619		.fl6 = {
1620			.flowi6_oif = dev->ifindex,
1621			.daddr = *dest,
1622			.saddr = *src,
1623		},
1624	};
1625
1626	rdfl.gateway = *gateway;
 
 
 
1627
1628	if (rt6_need_strict(dest))
1629		flags |= RT6_LOOKUP_F_IFACE;
 
 
1630
1631	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1632						   flags, __ip6_route_redirect);
1633}
 
 
 
 
 
 
1634
1635void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1636		  const struct in6_addr *saddr,
1637		  struct neighbour *neigh, u8 *lladdr, int on_link)
1638{
1639	struct rt6_info *rt, *nrt = NULL;
1640	struct netevent_redirect netevent;
1641	struct net *net = dev_net(neigh->dev);
1642
1643	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
 
 
 
 
1644
1645	if (rt == net->ipv6.ip6_null_entry) {
1646		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1647		goto out;
1648	}
1649
1650	/*
1651	 *	We have finally decided to accept it.
1652	 */
1653
1654	neigh_update(neigh, lladdr, NUD_STALE,
1655		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1656		     NEIGH_UPDATE_F_OVERRIDE|
1657		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1658				     NEIGH_UPDATE_F_ISROUTER))
1659		     );
1660
1661	/*
1662	 * Redirect received -> path was valid.
1663	 * Look, redirects are sent only in response to data packets,
1664	 * so that this nexthop apparently is reachable. --ANK
1665	 */
1666	dst_confirm(&rt->dst);
1667
1668	/* Duplicate redirect: silently ignore. */
1669	if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1670		goto out;
1671
1672	nrt = ip6_rt_copy(rt, dest);
1673	if (!nrt)
1674		goto out;
1675
1676	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1677	if (on_link)
1678		nrt->rt6i_flags &= ~RTF_GATEWAY;
1679
1680	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1681	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1682
1683	if (ip6_ins_rt(nrt))
1684		goto out;
1685
1686	netevent.old = &rt->dst;
1687	netevent.new = &nrt->dst;
 
 
1688	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1689
1690	if (rt->rt6i_flags & RTF_CACHE) {
 
1691		ip6_del_rt(rt);
1692		return;
1693	}
1694
1695out:
1696	dst_release(&rt->dst);
1697}
1698
1699/*
1700 *	Handle ICMP "packet too big" messages
1701 *	i.e. Path MTU discovery
1702 */
1703
1704static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1705			     struct net *net, u32 pmtu, int ifindex)
1706{
1707	struct rt6_info *rt, *nrt;
1708	int allfrag = 0;
1709again:
1710	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1711	if (!rt)
1712		return;
1713
1714	if (rt6_check_expired(rt)) {
1715		ip6_del_rt(rt);
1716		goto again;
1717	}
1718
1719	if (pmtu >= dst_mtu(&rt->dst))
1720		goto out;
1721
1722	if (pmtu < IPV6_MIN_MTU) {
1723		/*
1724		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1725		 * MTU (1280) and a fragment header should always be included
1726		 * after a node receiving Too Big message reporting PMTU is
1727		 * less than the IPv6 Minimum Link MTU.
1728		 */
1729		pmtu = IPV6_MIN_MTU;
1730		allfrag = 1;
1731	}
1732
1733	/* New mtu received -> path was valid.
1734	   They are sent only in response to data packets,
1735	   so that this nexthop apparently is reachable. --ANK
1736	 */
1737	dst_confirm(&rt->dst);
1738
1739	/* Host route. If it is static, it would be better
1740	   not to override it, but add new one, so that
1741	   when cache entry will expire old pmtu
1742	   would return automatically.
1743	 */
1744	if (rt->rt6i_flags & RTF_CACHE) {
1745		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1746		if (allfrag) {
1747			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1748			features |= RTAX_FEATURE_ALLFRAG;
1749			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1750		}
1751		rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1752		rt->rt6i_flags |= RTF_MODIFIED;
1753		goto out;
1754	}
1755
1756	/* Network route.
1757	   Two cases are possible:
1758	   1. It is connected route. Action: COW
1759	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1760	 */
1761	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1762		nrt = rt6_alloc_cow(rt, daddr, saddr);
1763	else
1764		nrt = rt6_alloc_clone(rt, daddr);
1765
1766	if (nrt) {
1767		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1768		if (allfrag) {
1769			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1770			features |= RTAX_FEATURE_ALLFRAG;
1771			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1772		}
1773
1774		/* According to RFC 1981, detecting PMTU increase shouldn't be
1775		 * happened within 5 mins, the recommended timer is 10 mins.
1776		 * Here this route expiration time is set to ip6_rt_mtu_expires
1777		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1778		 * and detecting PMTU increase will be automatically happened.
1779		 */
1780		rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1781		nrt->rt6i_flags |= RTF_DYNAMIC;
1782		ip6_ins_rt(nrt);
1783	}
1784out:
1785	dst_release(&rt->dst);
1786}
1787
1788void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1789			struct net_device *dev, u32 pmtu)
1790{
1791	struct net *net = dev_net(dev);
1792
1793	/*
1794	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1795	 * is sending along the path" that caused the Packet Too Big message.
1796	 * Since it's not possible in the general case to determine which
1797	 * interface was used to send the original packet, we update the MTU
1798	 * on the interface that will be used to send future packets. We also
1799	 * update the MTU on the interface that received the Packet Too Big in
1800	 * case the original packet was forced out that interface with
1801	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1802	 * correct behaviour, which would be to update the MTU on all
1803	 * interfaces.
1804	 */
1805	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1806	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1807}
1808
1809/*
1810 *	Misc support functions
1811 */
1812
1813static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1814				    const struct in6_addr *dest)
1815{
1816	struct net *net = dev_net(ort->dst.dev);
1817	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1818					    ort->dst.dev, 0);
1819
1820	if (rt) {
1821		rt->dst.input = ort->dst.input;
1822		rt->dst.output = ort->dst.output;
1823		rt->dst.flags |= DST_HOST;
1824
1825		rt->rt6i_dst.addr = *dest;
1826		rt->rt6i_dst.plen = 128;
1827		dst_copy_metrics(&rt->dst, &ort->dst);
1828		rt->dst.error = ort->dst.error;
1829		rt->rt6i_idev = ort->rt6i_idev;
1830		if (rt->rt6i_idev)
1831			in6_dev_hold(rt->rt6i_idev);
1832		rt->dst.lastuse = jiffies;
1833
1834		rt->rt6i_gateway = ort->rt6i_gateway;
1835		rt->rt6i_flags = ort->rt6i_flags;
1836		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1837		    (RTF_DEFAULT | RTF_ADDRCONF))
1838			rt6_set_from(rt, ort);
1839		else
1840			rt6_clean_expires(rt);
1841		rt->rt6i_metric = 0;
1842
1843#ifdef CONFIG_IPV6_SUBTREES
1844		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1845#endif
1846		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1847		rt->rt6i_table = ort->rt6i_table;
1848	}
1849	return rt;
1850}
1851
1852#ifdef CONFIG_IPV6_ROUTE_INFO
1853static struct rt6_info *rt6_get_route_info(struct net *net,
1854					   const struct in6_addr *prefix, int prefixlen,
1855					   const struct in6_addr *gwaddr, int ifindex)
1856{
1857	struct fib6_node *fn;
1858	struct rt6_info *rt = NULL;
1859	struct fib6_table *table;
1860
1861	table = fib6_get_table(net, RT6_TABLE_INFO);
1862	if (!table)
1863		return NULL;
1864
1865	write_lock_bh(&table->tb6_lock);
1866	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1867	if (!fn)
1868		goto out;
1869
1870	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1871		if (rt->dst.dev->ifindex != ifindex)
1872			continue;
1873		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1874			continue;
1875		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1876			continue;
1877		dst_hold(&rt->dst);
1878		break;
1879	}
1880out:
1881	write_unlock_bh(&table->tb6_lock);
1882	return rt;
1883}
1884
1885static struct rt6_info *rt6_add_route_info(struct net *net,
1886					   const struct in6_addr *prefix, int prefixlen,
1887					   const struct in6_addr *gwaddr, int ifindex,
1888					   unsigned int pref)
1889{
1890	struct fib6_config cfg = {
1891		.fc_table	= RT6_TABLE_INFO,
1892		.fc_metric	= IP6_RT_PRIO_USER,
1893		.fc_ifindex	= ifindex,
1894		.fc_dst_len	= prefixlen,
1895		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1896				  RTF_UP | RTF_PREF(pref),
1897		.fc_nlinfo.pid = 0,
1898		.fc_nlinfo.nlh = NULL,
1899		.fc_nlinfo.nl_net = net,
1900	};
1901
 
1902	cfg.fc_dst = *prefix;
1903	cfg.fc_gateway = *gwaddr;
1904
1905	/* We should treat it as a default route if prefix length is 0. */
1906	if (!prefixlen)
1907		cfg.fc_flags |= RTF_DEFAULT;
1908
1909	ip6_route_add(&cfg);
1910
1911	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1912}
1913#endif
1914
1915struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1916{
1917	struct rt6_info *rt;
1918	struct fib6_table *table;
1919
1920	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1921	if (!table)
1922		return NULL;
1923
1924	write_lock_bh(&table->tb6_lock);
1925	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1926		if (dev == rt->dst.dev &&
1927		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1928		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1929			break;
1930	}
1931	if (rt)
1932		dst_hold(&rt->dst);
1933	write_unlock_bh(&table->tb6_lock);
1934	return rt;
1935}
1936
1937struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1938				     struct net_device *dev,
1939				     unsigned int pref)
1940{
1941	struct fib6_config cfg = {
1942		.fc_table	= RT6_TABLE_DFLT,
1943		.fc_metric	= IP6_RT_PRIO_USER,
1944		.fc_ifindex	= dev->ifindex,
1945		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1946				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1947		.fc_nlinfo.pid = 0,
1948		.fc_nlinfo.nlh = NULL,
1949		.fc_nlinfo.nl_net = dev_net(dev),
1950	};
1951
1952	cfg.fc_gateway = *gwaddr;
1953
1954	ip6_route_add(&cfg);
1955
1956	return rt6_get_dflt_router(gwaddr, dev);
1957}
1958
1959void rt6_purge_dflt_routers(struct net *net)
1960{
1961	struct rt6_info *rt;
1962	struct fib6_table *table;
1963
1964	/* NOTE: Keep consistent with rt6_get_dflt_router */
1965	table = fib6_get_table(net, RT6_TABLE_DFLT);
1966	if (!table)
1967		return;
1968
1969restart:
1970	read_lock_bh(&table->tb6_lock);
1971	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1972		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
 
1973			dst_hold(&rt->dst);
1974			read_unlock_bh(&table->tb6_lock);
1975			ip6_del_rt(rt);
1976			goto restart;
1977		}
1978	}
1979	read_unlock_bh(&table->tb6_lock);
1980}
1981
1982static void rtmsg_to_fib6_config(struct net *net,
1983				 struct in6_rtmsg *rtmsg,
1984				 struct fib6_config *cfg)
1985{
1986	memset(cfg, 0, sizeof(*cfg));
1987
1988	cfg->fc_table = RT6_TABLE_MAIN;
 
1989	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1990	cfg->fc_metric = rtmsg->rtmsg_metric;
1991	cfg->fc_expires = rtmsg->rtmsg_info;
1992	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1993	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1994	cfg->fc_flags = rtmsg->rtmsg_flags;
1995
1996	cfg->fc_nlinfo.nl_net = net;
1997
1998	cfg->fc_dst = rtmsg->rtmsg_dst;
1999	cfg->fc_src = rtmsg->rtmsg_src;
2000	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2001}
2002
2003int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2004{
2005	struct fib6_config cfg;
2006	struct in6_rtmsg rtmsg;
2007	int err;
2008
2009	switch(cmd) {
2010	case SIOCADDRT:		/* Add a route */
2011	case SIOCDELRT:		/* Delete a route */
2012		if (!capable(CAP_NET_ADMIN))
2013			return -EPERM;
2014		err = copy_from_user(&rtmsg, arg,
2015				     sizeof(struct in6_rtmsg));
2016		if (err)
2017			return -EFAULT;
2018
2019		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2020
2021		rtnl_lock();
2022		switch (cmd) {
2023		case SIOCADDRT:
2024			err = ip6_route_add(&cfg);
2025			break;
2026		case SIOCDELRT:
2027			err = ip6_route_del(&cfg);
2028			break;
2029		default:
2030			err = -EINVAL;
2031		}
2032		rtnl_unlock();
2033
2034		return err;
2035	}
2036
2037	return -EINVAL;
2038}
2039
2040/*
2041 *	Drop the packet on the floor
2042 */
2043
2044static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2045{
2046	int type;
2047	struct dst_entry *dst = skb_dst(skb);
2048	switch (ipstats_mib_noroutes) {
2049	case IPSTATS_MIB_INNOROUTES:
2050		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2051		if (type == IPV6_ADDR_ANY) {
2052			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2053				      IPSTATS_MIB_INADDRERRORS);
2054			break;
2055		}
2056		/* FALLTHROUGH */
2057	case IPSTATS_MIB_OUTNOROUTES:
2058		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2059			      ipstats_mib_noroutes);
2060		break;
2061	}
2062	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2063	kfree_skb(skb);
2064	return 0;
2065}
2066
2067static int ip6_pkt_discard(struct sk_buff *skb)
2068{
2069	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2070}
2071
2072static int ip6_pkt_discard_out(struct sk_buff *skb)
2073{
2074	skb->dev = skb_dst(skb)->dev;
2075	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2076}
2077
2078#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2079
2080static int ip6_pkt_prohibit(struct sk_buff *skb)
2081{
2082	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2083}
2084
2085static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2086{
2087	skb->dev = skb_dst(skb)->dev;
2088	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2089}
2090
2091#endif
2092
2093/*
2094 *	Allocate a dst for local (unicast / anycast) address.
2095 */
2096
2097struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2098				    const struct in6_addr *addr,
2099				    bool anycast)
2100{
 
2101	struct net *net = dev_net(idev->dev);
2102	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2103					    net->loopback_dev, 0);
2104	int err;
2105
2106	if (!rt) {
2107		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2108		return ERR_PTR(-ENOMEM);
2109	}
2110
2111	in6_dev_hold(idev);
2112
2113	rt->dst.flags |= DST_HOST;
2114	rt->dst.input = ip6_input;
2115	rt->dst.output = ip6_output;
2116	rt->rt6i_idev = idev;
2117	rt->dst.obsolete = -1;
2118
2119	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2120	if (anycast)
2121		rt->rt6i_flags |= RTF_ANYCAST;
2122	else
2123		rt->rt6i_flags |= RTF_LOCAL;
2124	err = rt6_bind_neighbour(rt, rt->dst.dev);
2125	if (err) {
2126		dst_free(&rt->dst);
2127		return ERR_PTR(err);
2128	}
2129
 
2130	rt->rt6i_dst.addr = *addr;
2131	rt->rt6i_dst.plen = 128;
2132	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
 
 
2133
2134	atomic_set(&rt->dst.__refcnt, 1);
2135
2136	return rt;
2137}
2138
2139int ip6_route_get_saddr(struct net *net,
2140			struct rt6_info *rt,
2141			const struct in6_addr *daddr,
2142			unsigned int prefs,
2143			struct in6_addr *saddr)
2144{
2145	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
 
2146	int err = 0;
2147	if (rt->rt6i_prefsrc.plen)
2148		*saddr = rt->rt6i_prefsrc.addr;
2149	else
2150		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2151					 daddr, prefs, saddr);
2152	return err;
2153}
2154
2155/* remove deleted ip from prefsrc entries */
2156struct arg_dev_net_ip {
2157	struct net_device *dev;
2158	struct net *net;
2159	struct in6_addr *addr;
2160};
2161
2162static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2163{
2164	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2165	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2166	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2167
2168	if (((void *)rt->dst.dev == dev || !dev) &&
2169	    rt != net->ipv6.ip6_null_entry &&
2170	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2171		/* remove prefsrc entry */
2172		rt->rt6i_prefsrc.plen = 0;
2173	}
2174	return 0;
2175}
2176
2177void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2178{
2179	struct net *net = dev_net(ifp->idev->dev);
2180	struct arg_dev_net_ip adni = {
2181		.dev = ifp->idev->dev,
2182		.net = net,
2183		.addr = &ifp->addr,
2184	};
2185	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2186}
2187
2188struct arg_dev_net {
2189	struct net_device *dev;
2190	struct net *net;
2191};
2192
2193static int fib6_ifdown(struct rt6_info *rt, void *arg)
2194{
2195	const struct arg_dev_net *adn = arg;
2196	const struct net_device *dev = adn->dev;
2197
2198	if ((rt->dst.dev == dev || !dev) &&
2199	    rt != adn->net->ipv6.ip6_null_entry)
2200		return -1;
2201
2202	return 0;
2203}
2204
2205void rt6_ifdown(struct net *net, struct net_device *dev)
2206{
2207	struct arg_dev_net adn = {
2208		.dev = dev,
2209		.net = net,
2210	};
2211
2212	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2213	icmp6_clean_all(fib6_ifdown, &adn);
 
 
2214}
2215
2216struct rt6_mtu_change_arg {
2217	struct net_device *dev;
2218	unsigned int mtu;
2219};
2220
2221static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2222{
2223	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2224	struct inet6_dev *idev;
2225
2226	/* In IPv6 pmtu discovery is not optional,
2227	   so that RTAX_MTU lock cannot disable it.
2228	   We still use this lock to block changes
2229	   caused by addrconf/ndisc.
2230	*/
2231
2232	idev = __in6_dev_get(arg->dev);
2233	if (!idev)
2234		return 0;
2235
2236	/* For administrative MTU increase, there is no way to discover
2237	   IPv6 PMTU increase, so PMTU increase should be updated here.
2238	   Since RFC 1981 doesn't include administrative MTU increase
2239	   update PMTU increase is a MUST. (i.e. jumbo frame)
2240	 */
2241	/*
2242	   If new MTU is less than route PMTU, this new MTU will be the
2243	   lowest MTU in the path, update the route PMTU to reflect PMTU
2244	   decreases; if new MTU is greater than route PMTU, and the
2245	   old MTU is the lowest MTU in the path, update the route PMTU
2246	   to reflect the increase. In this case if the other nodes' MTU
2247	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2248	   PMTU discouvery.
2249	 */
2250	if (rt->dst.dev == arg->dev &&
2251	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2252	    (dst_mtu(&rt->dst) >= arg->mtu ||
2253	     (dst_mtu(&rt->dst) < arg->mtu &&
2254	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2255		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
 
 
 
 
 
 
 
 
 
2256	}
2257	return 0;
2258}
2259
2260void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2261{
2262	struct rt6_mtu_change_arg arg = {
2263		.dev = dev,
2264		.mtu = mtu,
2265	};
2266
2267	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2268}
2269
2270static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2271	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2272	[RTA_OIF]               = { .type = NLA_U32 },
2273	[RTA_IIF]		= { .type = NLA_U32 },
2274	[RTA_PRIORITY]          = { .type = NLA_U32 },
2275	[RTA_METRICS]           = { .type = NLA_NESTED },
 
 
 
 
 
2276};
2277
2278static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2279			      struct fib6_config *cfg)
2280{
2281	struct rtmsg *rtm;
2282	struct nlattr *tb[RTA_MAX+1];
 
2283	int err;
2284
2285	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2286	if (err < 0)
2287		goto errout;
2288
2289	err = -EINVAL;
2290	rtm = nlmsg_data(nlh);
2291	memset(cfg, 0, sizeof(*cfg));
2292
2293	cfg->fc_table = rtm->rtm_table;
2294	cfg->fc_dst_len = rtm->rtm_dst_len;
2295	cfg->fc_src_len = rtm->rtm_src_len;
2296	cfg->fc_flags = RTF_UP;
2297	cfg->fc_protocol = rtm->rtm_protocol;
 
2298
2299	if (rtm->rtm_type == RTN_UNREACHABLE)
 
 
 
2300		cfg->fc_flags |= RTF_REJECT;
2301
2302	if (rtm->rtm_type == RTN_LOCAL)
2303		cfg->fc_flags |= RTF_LOCAL;
2304
2305	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
 
 
 
2306	cfg->fc_nlinfo.nlh = nlh;
2307	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2308
2309	if (tb[RTA_GATEWAY]) {
2310		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2311		cfg->fc_flags |= RTF_GATEWAY;
2312	}
2313
2314	if (tb[RTA_DST]) {
2315		int plen = (rtm->rtm_dst_len + 7) >> 3;
2316
2317		if (nla_len(tb[RTA_DST]) < plen)
2318			goto errout;
2319
2320		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2321	}
2322
2323	if (tb[RTA_SRC]) {
2324		int plen = (rtm->rtm_src_len + 7) >> 3;
2325
2326		if (nla_len(tb[RTA_SRC]) < plen)
2327			goto errout;
2328
2329		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2330	}
2331
2332	if (tb[RTA_PREFSRC])
2333		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2334
2335	if (tb[RTA_OIF])
2336		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2337
2338	if (tb[RTA_PRIORITY])
2339		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2340
2341	if (tb[RTA_METRICS]) {
2342		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2343		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2344	}
2345
2346	if (tb[RTA_TABLE])
2347		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2349	err = 0;
2350errout:
2351	return err;
2352}
2353
2354static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2355{
2356	struct fib6_config cfg;
2357	int err;
2358
2359	err = rtm_to_fib6_config(skb, nlh, &cfg);
2360	if (err < 0)
2361		return err;
2362
2363	return ip6_route_del(&cfg);
 
 
 
2364}
2365
2366static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2367{
2368	struct fib6_config cfg;
2369	int err;
2370
2371	err = rtm_to_fib6_config(skb, nlh, &cfg);
2372	if (err < 0)
2373		return err;
2374
2375	return ip6_route_add(&cfg);
 
 
 
2376}
2377
2378static inline size_t rt6_nlmsg_size(void)
2379{
2380	return NLMSG_ALIGN(sizeof(struct rtmsg))
2381	       + nla_total_size(16) /* RTA_SRC */
2382	       + nla_total_size(16) /* RTA_DST */
2383	       + nla_total_size(16) /* RTA_GATEWAY */
2384	       + nla_total_size(16) /* RTA_PREFSRC */
2385	       + nla_total_size(4) /* RTA_TABLE */
2386	       + nla_total_size(4) /* RTA_IIF */
2387	       + nla_total_size(4) /* RTA_OIF */
2388	       + nla_total_size(4) /* RTA_PRIORITY */
2389	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2390	       + nla_total_size(sizeof(struct rta_cacheinfo));
 
 
 
2391}
2392
2393static int rt6_fill_node(struct net *net,
2394			 struct sk_buff *skb, struct rt6_info *rt,
2395			 struct in6_addr *dst, struct in6_addr *src,
2396			 int iif, int type, u32 pid, u32 seq,
2397			 int prefix, int nowait, unsigned int flags)
2398{
2399	const struct inet_peer *peer;
2400	struct rtmsg *rtm;
2401	struct nlmsghdr *nlh;
2402	long expires;
2403	u32 table;
2404	struct neighbour *n;
2405	u32 ts, tsage;
2406
2407	if (prefix) {	/* user wants prefix routes only */
2408		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2409			/* success since this is not a prefix route */
2410			return 1;
2411		}
2412	}
2413
2414	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2415	if (!nlh)
2416		return -EMSGSIZE;
2417
2418	rtm = nlmsg_data(nlh);
2419	rtm->rtm_family = AF_INET6;
2420	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2421	rtm->rtm_src_len = rt->rt6i_src.plen;
2422	rtm->rtm_tos = 0;
2423	if (rt->rt6i_table)
2424		table = rt->rt6i_table->tb6_id;
2425	else
2426		table = RT6_TABLE_UNSPEC;
2427	rtm->rtm_table = table;
2428	if (nla_put_u32(skb, RTA_TABLE, table))
2429		goto nla_put_failure;
2430	if (rt->rt6i_flags & RTF_REJECT)
2431		rtm->rtm_type = RTN_UNREACHABLE;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2432	else if (rt->rt6i_flags & RTF_LOCAL)
2433		rtm->rtm_type = RTN_LOCAL;
2434	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2435		rtm->rtm_type = RTN_LOCAL;
2436	else
2437		rtm->rtm_type = RTN_UNICAST;
2438	rtm->rtm_flags = 0;
 
 
 
 
 
2439	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2440	rtm->rtm_protocol = rt->rt6i_protocol;
2441	if (rt->rt6i_flags & RTF_DYNAMIC)
2442		rtm->rtm_protocol = RTPROT_REDIRECT;
2443	else if (rt->rt6i_flags & RTF_ADDRCONF)
2444		rtm->rtm_protocol = RTPROT_KERNEL;
2445	else if (rt->rt6i_flags & RTF_DEFAULT)
2446		rtm->rtm_protocol = RTPROT_RA;
 
 
2447
2448	if (rt->rt6i_flags & RTF_CACHE)
2449		rtm->rtm_flags |= RTM_F_CLONED;
2450
2451	if (dst) {
2452		if (nla_put(skb, RTA_DST, 16, dst))
2453			goto nla_put_failure;
2454		rtm->rtm_dst_len = 128;
2455	} else if (rtm->rtm_dst_len)
2456		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2457			goto nla_put_failure;
2458#ifdef CONFIG_IPV6_SUBTREES
2459	if (src) {
2460		if (nla_put(skb, RTA_SRC, 16, src))
2461			goto nla_put_failure;
2462		rtm->rtm_src_len = 128;
2463	} else if (rtm->rtm_src_len &&
2464		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2465		goto nla_put_failure;
2466#endif
2467	if (iif) {
2468#ifdef CONFIG_IPV6_MROUTE
2469		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2470			int err = ip6mr_get_route(net, skb, rtm, nowait);
2471			if (err <= 0) {
2472				if (!nowait) {
2473					if (err == 0)
2474						return 0;
2475					goto nla_put_failure;
2476				} else {
2477					if (err == -EMSGSIZE)
2478						goto nla_put_failure;
2479				}
2480			}
2481		} else
2482#endif
2483			if (nla_put_u32(skb, RTA_IIF, iif))
2484				goto nla_put_failure;
2485	} else if (dst) {
2486		struct in6_addr saddr_buf;
2487		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2488		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2489			goto nla_put_failure;
2490	}
2491
2492	if (rt->rt6i_prefsrc.plen) {
2493		struct in6_addr saddr_buf;
2494		saddr_buf = rt->rt6i_prefsrc.addr;
2495		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2496			goto nla_put_failure;
2497	}
2498
2499	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
 
 
 
2500		goto nla_put_failure;
2501
2502	rcu_read_lock();
2503	n = dst_get_neighbour_noref(&rt->dst);
2504	if (n) {
2505		if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2506			rcu_read_unlock();
2507			goto nla_put_failure;
2508		}
2509	}
2510	rcu_read_unlock();
2511
2512	if (rt->dst.dev &&
2513	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2514		goto nla_put_failure;
2515	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2516		goto nla_put_failure;
2517	if (!(rt->rt6i_flags & RTF_EXPIRES))
2518		expires = 0;
2519	else if (rt->dst.expires - jiffies < INT_MAX)
2520		expires = rt->dst.expires - jiffies;
2521	else
2522		expires = INT_MAX;
2523
2524	peer = rt->rt6i_peer;
2525	ts = tsage = 0;
2526	if (peer && peer->tcp_ts_stamp) {
2527		ts = peer->tcp_ts;
2528		tsage = get_seconds() - peer->tcp_ts_stamp;
2529	}
2530
2531	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2532			       expires, rt->dst.error) < 0)
2533		goto nla_put_failure;
2534
2535	return nlmsg_end(skb, nlh);
 
 
 
2536
2537nla_put_failure:
2538	nlmsg_cancel(skb, nlh);
2539	return -EMSGSIZE;
2540}
2541
2542int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2543{
2544	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2545	int prefix;
2546
2547	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2548		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2549		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2550	} else
2551		prefix = 0;
2552
2553	return rt6_fill_node(arg->net,
2554		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2555		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2556		     prefix, 0, NLM_F_MULTI);
2557}
2558
2559static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2560{
2561	struct net *net = sock_net(in_skb->sk);
2562	struct nlattr *tb[RTA_MAX+1];
2563	struct rt6_info *rt;
2564	struct sk_buff *skb;
2565	struct rtmsg *rtm;
2566	struct flowi6 fl6;
2567	int err, iif = 0, oif = 0;
2568
2569	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2570	if (err < 0)
2571		goto errout;
2572
2573	err = -EINVAL;
2574	memset(&fl6, 0, sizeof(fl6));
2575
2576	if (tb[RTA_SRC]) {
2577		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2578			goto errout;
2579
2580		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2581	}
2582
2583	if (tb[RTA_DST]) {
2584		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2585			goto errout;
2586
2587		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2588	}
2589
2590	if (tb[RTA_IIF])
2591		iif = nla_get_u32(tb[RTA_IIF]);
2592
2593	if (tb[RTA_OIF])
2594		oif = nla_get_u32(tb[RTA_OIF]);
2595
 
 
 
2596	if (iif) {
2597		struct net_device *dev;
2598		int flags = 0;
2599
2600		dev = __dev_get_by_index(net, iif);
2601		if (!dev) {
2602			err = -ENODEV;
2603			goto errout;
2604		}
2605
2606		fl6.flowi6_iif = iif;
2607
2608		if (!ipv6_addr_any(&fl6.saddr))
2609			flags |= RT6_LOOKUP_F_HAS_SADDR;
2610
2611		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2612							       flags);
2613	} else {
2614		fl6.flowi6_oif = oif;
2615
 
 
 
 
 
2616		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2617	}
2618
2619	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2620	if (!skb) {
2621		dst_release(&rt->dst);
2622		err = -ENOBUFS;
2623		goto errout;
2624	}
2625
2626	/* Reserve room for dummy headers, this skb can pass
2627	   through good chunk of routing engine.
2628	 */
2629	skb_reset_mac_header(skb);
2630	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2631
2632	skb_dst_set(skb, &rt->dst);
2633
2634	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2635			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2636			    nlh->nlmsg_seq, 0, 0, 0);
2637	if (err < 0) {
2638		kfree_skb(skb);
2639		goto errout;
2640	}
2641
2642	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2643errout:
2644	return err;
2645}
2646
2647void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 
2648{
2649	struct sk_buff *skb;
2650	struct net *net = info->nl_net;
2651	u32 seq;
2652	int err;
2653
2654	err = -ENOBUFS;
2655	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2656
2657	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2658	if (!skb)
2659		goto errout;
2660
2661	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2662				event, info->pid, seq, 0, 0, 0);
2663	if (err < 0) {
2664		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2665		WARN_ON(err == -EMSGSIZE);
2666		kfree_skb(skb);
2667		goto errout;
2668	}
2669	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2670		    info->nlh, gfp_any());
2671	return;
2672errout:
2673	if (err < 0)
2674		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2675}
2676
2677static int ip6_route_dev_notify(struct notifier_block *this,
2678				unsigned long event, void *data)
2679{
2680	struct net_device *dev = (struct net_device *)data;
2681	struct net *net = dev_net(dev);
2682
2683	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2684		net->ipv6.ip6_null_entry->dst.dev = dev;
2685		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2686#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2687		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2688		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2689		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2690		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2691#endif
2692	}
2693
2694	return NOTIFY_OK;
2695}
2696
2697/*
2698 *	/proc
2699 */
2700
2701#ifdef CONFIG_PROC_FS
2702
2703struct rt6_proc_arg
2704{
2705	char *buffer;
2706	int offset;
2707	int length;
2708	int skip;
2709	int len;
2710};
2711
2712static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2713{
2714	struct seq_file *m = p_arg;
2715	struct neighbour *n;
2716
2717	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2718
2719#ifdef CONFIG_IPV6_SUBTREES
2720	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2721#else
2722	seq_puts(m, "00000000000000000000000000000000 00 ");
2723#endif
2724	rcu_read_lock();
2725	n = dst_get_neighbour_noref(&rt->dst);
2726	if (n) {
2727		seq_printf(m, "%pi6", n->primary_key);
2728	} else {
2729		seq_puts(m, "00000000000000000000000000000000");
2730	}
2731	rcu_read_unlock();
2732	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2733		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2734		   rt->dst.__use, rt->rt6i_flags,
2735		   rt->dst.dev ? rt->dst.dev->name : "");
2736	return 0;
2737}
2738
2739static int ipv6_route_show(struct seq_file *m, void *v)
2740{
2741	struct net *net = (struct net *)m->private;
2742	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2743	return 0;
2744}
2745
2746static int ipv6_route_open(struct inode *inode, struct file *file)
2747{
2748	return single_open_net(inode, file, ipv6_route_show);
2749}
2750
2751static const struct file_operations ipv6_route_proc_fops = {
2752	.owner		= THIS_MODULE,
2753	.open		= ipv6_route_open,
2754	.read		= seq_read,
2755	.llseek		= seq_lseek,
2756	.release	= single_release_net,
2757};
2758
2759static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2760{
2761	struct net *net = (struct net *)seq->private;
2762	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2763		   net->ipv6.rt6_stats->fib_nodes,
2764		   net->ipv6.rt6_stats->fib_route_nodes,
2765		   net->ipv6.rt6_stats->fib_rt_alloc,
2766		   net->ipv6.rt6_stats->fib_rt_entries,
2767		   net->ipv6.rt6_stats->fib_rt_cache,
2768		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2769		   net->ipv6.rt6_stats->fib_discarded_routes);
2770
2771	return 0;
2772}
2773
2774static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2775{
2776	return single_open_net(inode, file, rt6_stats_seq_show);
2777}
2778
2779static const struct file_operations rt6_stats_seq_fops = {
2780	.owner	 = THIS_MODULE,
2781	.open	 = rt6_stats_seq_open,
2782	.read	 = seq_read,
2783	.llseek	 = seq_lseek,
2784	.release = single_release_net,
2785};
2786#endif	/* CONFIG_PROC_FS */
2787
2788#ifdef CONFIG_SYSCTL
2789
2790static
2791int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2792			      void __user *buffer, size_t *lenp, loff_t *ppos)
2793{
2794	struct net *net;
2795	int delay;
2796	if (!write)
2797		return -EINVAL;
2798
2799	net = (struct net *)ctl->extra1;
2800	delay = net->ipv6.sysctl.flush_delay;
2801	proc_dointvec(ctl, write, buffer, lenp, ppos);
2802	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2803	return 0;
2804}
2805
2806ctl_table ipv6_route_table_template[] = {
2807	{
2808		.procname	=	"flush",
2809		.data		=	&init_net.ipv6.sysctl.flush_delay,
2810		.maxlen		=	sizeof(int),
2811		.mode		=	0200,
2812		.proc_handler	=	ipv6_sysctl_rtcache_flush
2813	},
2814	{
2815		.procname	=	"gc_thresh",
2816		.data		=	&ip6_dst_ops_template.gc_thresh,
2817		.maxlen		=	sizeof(int),
2818		.mode		=	0644,
2819		.proc_handler	=	proc_dointvec,
2820	},
2821	{
2822		.procname	=	"max_size",
2823		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2824		.maxlen		=	sizeof(int),
2825		.mode		=	0644,
2826		.proc_handler	=	proc_dointvec,
2827	},
2828	{
2829		.procname	=	"gc_min_interval",
2830		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2831		.maxlen		=	sizeof(int),
2832		.mode		=	0644,
2833		.proc_handler	=	proc_dointvec_jiffies,
2834	},
2835	{
2836		.procname	=	"gc_timeout",
2837		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2838		.maxlen		=	sizeof(int),
2839		.mode		=	0644,
2840		.proc_handler	=	proc_dointvec_jiffies,
2841	},
2842	{
2843		.procname	=	"gc_interval",
2844		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2845		.maxlen		=	sizeof(int),
2846		.mode		=	0644,
2847		.proc_handler	=	proc_dointvec_jiffies,
2848	},
2849	{
2850		.procname	=	"gc_elasticity",
2851		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2852		.maxlen		=	sizeof(int),
2853		.mode		=	0644,
2854		.proc_handler	=	proc_dointvec,
2855	},
2856	{
2857		.procname	=	"mtu_expires",
2858		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2859		.maxlen		=	sizeof(int),
2860		.mode		=	0644,
2861		.proc_handler	=	proc_dointvec_jiffies,
2862	},
2863	{
2864		.procname	=	"min_adv_mss",
2865		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2866		.maxlen		=	sizeof(int),
2867		.mode		=	0644,
2868		.proc_handler	=	proc_dointvec,
2869	},
2870	{
2871		.procname	=	"gc_min_interval_ms",
2872		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2873		.maxlen		=	sizeof(int),
2874		.mode		=	0644,
2875		.proc_handler	=	proc_dointvec_ms_jiffies,
2876	},
2877	{ }
2878};
2879
2880struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2881{
2882	struct ctl_table *table;
2883
2884	table = kmemdup(ipv6_route_table_template,
2885			sizeof(ipv6_route_table_template),
2886			GFP_KERNEL);
2887
2888	if (table) {
2889		table[0].data = &net->ipv6.sysctl.flush_delay;
2890		table[0].extra1 = net;
2891		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2892		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2893		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2894		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2895		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2896		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2897		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2898		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2899		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
 
 
 
 
2900	}
2901
2902	return table;
2903}
2904#endif
2905
2906static int __net_init ip6_route_net_init(struct net *net)
2907{
2908	int ret = -ENOMEM;
2909
2910	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2911	       sizeof(net->ipv6.ip6_dst_ops));
2912
2913	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2914		goto out_ip6_dst_ops;
2915
2916	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2917					   sizeof(*net->ipv6.ip6_null_entry),
2918					   GFP_KERNEL);
2919	if (!net->ipv6.ip6_null_entry)
2920		goto out_ip6_dst_entries;
2921	net->ipv6.ip6_null_entry->dst.path =
2922		(struct dst_entry *)net->ipv6.ip6_null_entry;
2923	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2924	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2925			 ip6_template_metrics, true);
2926
2927#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2928	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2929					       sizeof(*net->ipv6.ip6_prohibit_entry),
2930					       GFP_KERNEL);
2931	if (!net->ipv6.ip6_prohibit_entry)
2932		goto out_ip6_null_entry;
2933	net->ipv6.ip6_prohibit_entry->dst.path =
2934		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2935	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2936	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2937			 ip6_template_metrics, true);
2938
2939	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2940					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2941					       GFP_KERNEL);
2942	if (!net->ipv6.ip6_blk_hole_entry)
2943		goto out_ip6_prohibit_entry;
2944	net->ipv6.ip6_blk_hole_entry->dst.path =
2945		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2946	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2947	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2948			 ip6_template_metrics, true);
2949#endif
2950
2951	net->ipv6.sysctl.flush_delay = 0;
2952	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2953	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2954	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2955	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2956	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2957	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2958	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2959
2960	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2961
2962	ret = 0;
2963out:
2964	return ret;
2965
2966#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2967out_ip6_prohibit_entry:
2968	kfree(net->ipv6.ip6_prohibit_entry);
2969out_ip6_null_entry:
2970	kfree(net->ipv6.ip6_null_entry);
2971#endif
2972out_ip6_dst_entries:
2973	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2974out_ip6_dst_ops:
2975	goto out;
2976}
2977
2978static void __net_exit ip6_route_net_exit(struct net *net)
2979{
2980	kfree(net->ipv6.ip6_null_entry);
2981#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2982	kfree(net->ipv6.ip6_prohibit_entry);
2983	kfree(net->ipv6.ip6_blk_hole_entry);
2984#endif
2985	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2986}
2987
2988static int __net_init ip6_route_net_init_late(struct net *net)
2989{
2990#ifdef CONFIG_PROC_FS
2991	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2992	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2993#endif
2994	return 0;
2995}
2996
2997static void __net_exit ip6_route_net_exit_late(struct net *net)
2998{
2999#ifdef CONFIG_PROC_FS
3000	proc_net_remove(net, "ipv6_route");
3001	proc_net_remove(net, "rt6_stats");
3002#endif
3003}
3004
3005static struct pernet_operations ip6_route_net_ops = {
3006	.init = ip6_route_net_init,
3007	.exit = ip6_route_net_exit,
3008};
3009
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3010static struct pernet_operations ip6_route_net_late_ops = {
3011	.init = ip6_route_net_init_late,
3012	.exit = ip6_route_net_exit_late,
3013};
3014
3015static struct notifier_block ip6_route_dev_notifier = {
3016	.notifier_call = ip6_route_dev_notify,
3017	.priority = 0,
3018};
3019
3020int __init ip6_route_init(void)
3021{
3022	int ret;
 
3023
3024	ret = -ENOMEM;
3025	ip6_dst_ops_template.kmem_cachep =
3026		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3027				  SLAB_HWCACHE_ALIGN, NULL);
3028	if (!ip6_dst_ops_template.kmem_cachep)
3029		goto out;
3030
3031	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3032	if (ret)
3033		goto out_kmem_cache;
3034
 
 
 
 
3035	ret = register_pernet_subsys(&ip6_route_net_ops);
3036	if (ret)
3037		goto out_dst_entries;
3038
3039	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3040
3041	/* Registering of the loopback is done before this portion of code,
3042	 * the loopback reference in rt6_info will not be taken, do it
3043	 * manually for init_net */
3044	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3045	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3046  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3047	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3048	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3049	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3050	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3051  #endif
3052	ret = fib6_init();
3053	if (ret)
3054		goto out_register_subsys;
3055
3056	ret = xfrm6_init();
3057	if (ret)
3058		goto out_fib6_init;
3059
3060	ret = fib6_rules_init();
3061	if (ret)
3062		goto xfrm6_init;
3063
3064	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3065	if (ret)
3066		goto fib6_rules_init;
3067
3068	ret = -ENOBUFS;
3069	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3070	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3071	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3072		goto out_register_late_subsys;
3073
3074	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3075	if (ret)
3076		goto out_register_late_subsys;
3077
 
 
 
 
 
 
 
3078out:
3079	return ret;
3080
3081out_register_late_subsys:
3082	unregister_pernet_subsys(&ip6_route_net_late_ops);
3083fib6_rules_init:
3084	fib6_rules_cleanup();
3085xfrm6_init:
3086	xfrm6_fini();
3087out_fib6_init:
3088	fib6_gc_cleanup();
3089out_register_subsys:
3090	unregister_pernet_subsys(&ip6_route_net_ops);
 
 
3091out_dst_entries:
3092	dst_entries_destroy(&ip6_dst_blackhole_ops);
3093out_kmem_cache:
3094	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3095	goto out;
3096}
3097
3098void ip6_route_cleanup(void)
3099{
3100	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3101	unregister_pernet_subsys(&ip6_route_net_late_ops);
3102	fib6_rules_cleanup();
3103	xfrm6_fini();
3104	fib6_gc_cleanup();
 
3105	unregister_pernet_subsys(&ip6_route_net_ops);
3106	dst_entries_destroy(&ip6_dst_blackhole_ops);
3107	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3108}