Linux Audio

Check our new training course

Loading...
v3.1
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
 
 
  27#include <linux/capability.h>
  28#include <linux/errno.h>
 
  29#include <linux/types.h>
  30#include <linux/times.h>
  31#include <linux/socket.h>
  32#include <linux/sockios.h>
  33#include <linux/net.h>
  34#include <linux/route.h>
  35#include <linux/netdevice.h>
  36#include <linux/in6.h>
  37#include <linux/mroute6.h>
  38#include <linux/init.h>
  39#include <linux/if_arp.h>
  40#include <linux/proc_fs.h>
  41#include <linux/seq_file.h>
  42#include <linux/nsproxy.h>
  43#include <linux/slab.h>
  44#include <net/net_namespace.h>
  45#include <net/snmp.h>
  46#include <net/ipv6.h>
  47#include <net/ip6_fib.h>
  48#include <net/ip6_route.h>
  49#include <net/ndisc.h>
  50#include <net/addrconf.h>
  51#include <net/tcp.h>
  52#include <linux/rtnetlink.h>
  53#include <net/dst.h>
  54#include <net/xfrm.h>
  55#include <net/netevent.h>
  56#include <net/netlink.h>
 
  57
  58#include <asm/uaccess.h>
  59
  60#ifdef CONFIG_SYSCTL
  61#include <linux/sysctl.h>
  62#endif
  63
  64/* Set to 3 to get tracing. */
  65#define RT6_DEBUG 2
  66
  67#if RT6_DEBUG >= 3
  68#define RDBG(x) printk x
  69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
  70#else
  71#define RDBG(x)
  72#define RT6_TRACE(x...) do { ; } while (0)
  73#endif
  74
  75static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
  76				    const struct in6_addr *dest);
  77static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  78static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  79static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
  80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  81static void		ip6_dst_destroy(struct dst_entry *);
  82static void		ip6_dst_ifdown(struct dst_entry *,
  83				       struct net_device *dev, int how);
  84static int		 ip6_dst_gc(struct dst_ops *ops);
  85
  86static int		ip6_pkt_discard(struct sk_buff *skb);
  87static int		ip6_pkt_discard_out(struct sk_buff *skb);
 
 
  88static void		ip6_link_failure(struct sk_buff *skb);
  89static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 
 
 
 
  90
  91#ifdef CONFIG_IPV6_ROUTE_INFO
  92static struct rt6_info *rt6_add_route_info(struct net *net,
  93					   const struct in6_addr *prefix, int prefixlen,
  94					   const struct in6_addr *gwaddr, int ifindex,
  95					   unsigned pref);
  96static struct rt6_info *rt6_get_route_info(struct net *net,
  97					   const struct in6_addr *prefix, int prefixlen,
  98					   const struct in6_addr *gwaddr, int ifindex);
  99#endif
 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 101static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 102{
 103	struct rt6_info *rt = (struct rt6_info *) dst;
 104	struct inet_peer *peer;
 105	u32 *p = NULL;
 106
 107	if (!(rt->dst.flags & DST_HOST))
 108		return NULL;
 109
 110	if (!rt->rt6i_peer)
 111		rt6_bind_peer(rt, 1);
 112
 113	peer = rt->rt6i_peer;
 114	if (peer) {
 115		u32 *old_p = __DST_METRICS_PTR(old);
 116		unsigned long prev, new;
 117
 118		p = peer->metrics;
 119		if (inet_metrics_new(peer))
 
 120			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 121
 122		new = (unsigned long) p;
 123		prev = cmpxchg(&dst->_metrics, old, new);
 124
 125		if (prev != old) {
 126			p = __DST_METRICS_PTR(prev);
 127			if (prev & DST_METRICS_READ_ONLY)
 128				p = NULL;
 129		}
 130	}
 131	return p;
 132}
 133
 134static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 135{
 136	return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
 
 
 
 
 
 
 
 137}
 138
 139static struct dst_ops ip6_dst_ops_template = {
 140	.family			=	AF_INET6,
 141	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 142	.gc			=	ip6_dst_gc,
 143	.gc_thresh		=	1024,
 144	.check			=	ip6_dst_check,
 145	.default_advmss		=	ip6_default_advmss,
 146	.default_mtu		=	ip6_default_mtu,
 147	.cow_metrics		=	ipv6_cow_metrics,
 148	.destroy		=	ip6_dst_destroy,
 149	.ifdown			=	ip6_dst_ifdown,
 150	.negative_advice	=	ip6_negative_advice,
 151	.link_failure		=	ip6_link_failure,
 152	.update_pmtu		=	ip6_rt_update_pmtu,
 
 153	.local_out		=	__ip6_local_out,
 154	.neigh_lookup		=	ip6_neigh_lookup,
 155};
 156
 157static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
 158{
 159	return 0;
 
 
 160}
 161
 162static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 
 
 
 
 
 
 163{
 164}
 165
 166static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 167					 unsigned long old)
 168{
 169	return NULL;
 170}
 171
 172static struct dst_ops ip6_dst_blackhole_ops = {
 173	.family			=	AF_INET6,
 174	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 175	.destroy		=	ip6_dst_destroy,
 176	.check			=	ip6_dst_check,
 177	.default_mtu		=	ip6_blackhole_default_mtu,
 178	.default_advmss		=	ip6_default_advmss,
 179	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 
 180	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
 181	.neigh_lookup		=	ip6_neigh_lookup,
 182};
 183
 184static const u32 ip6_template_metrics[RTAX_MAX] = {
 185	[RTAX_HOPLIMIT - 1] = 255,
 186};
 187
 188static struct rt6_info ip6_null_entry_template = {
 189	.dst = {
 190		.__refcnt	= ATOMIC_INIT(1),
 191		.__use		= 1,
 192		.obsolete	= -1,
 193		.error		= -ENETUNREACH,
 194		.input		= ip6_pkt_discard,
 195		.output		= ip6_pkt_discard_out,
 196	},
 197	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 198	.rt6i_protocol  = RTPROT_KERNEL,
 199	.rt6i_metric	= ~(u32) 0,
 200	.rt6i_ref	= ATOMIC_INIT(1),
 201};
 202
 203#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 204
 205static int ip6_pkt_prohibit(struct sk_buff *skb);
 206static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 207
 208static struct rt6_info ip6_prohibit_entry_template = {
 209	.dst = {
 210		.__refcnt	= ATOMIC_INIT(1),
 211		.__use		= 1,
 212		.obsolete	= -1,
 213		.error		= -EACCES,
 214		.input		= ip6_pkt_prohibit,
 215		.output		= ip6_pkt_prohibit_out,
 216	},
 217	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 218	.rt6i_protocol  = RTPROT_KERNEL,
 219	.rt6i_metric	= ~(u32) 0,
 220	.rt6i_ref	= ATOMIC_INIT(1),
 221};
 222
 223static struct rt6_info ip6_blk_hole_entry_template = {
 224	.dst = {
 225		.__refcnt	= ATOMIC_INIT(1),
 226		.__use		= 1,
 227		.obsolete	= -1,
 228		.error		= -EINVAL,
 229		.input		= dst_discard,
 230		.output		= dst_discard,
 231	},
 232	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 233	.rt6i_protocol  = RTPROT_KERNEL,
 234	.rt6i_metric	= ~(u32) 0,
 235	.rt6i_ref	= ATOMIC_INIT(1),
 236};
 237
 238#endif
 239
 240/* allocate dst with ip6_dst_ops */
 241static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
 242					     struct net_device *dev,
 243					     int flags)
 
 244{
 245	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
 
 246
 247	if (rt != NULL)
 248		memset(&rt->rt6i_table, 0,
 249			sizeof(*rt) - sizeof(struct dst_entry));
 250
 
 
 
 
 
 251	return rt;
 252}
 253
 254static void ip6_dst_destroy(struct dst_entry *dst)
 255{
 256	struct rt6_info *rt = (struct rt6_info *)dst;
 257	struct inet6_dev *idev = rt->rt6i_idev;
 258	struct inet_peer *peer = rt->rt6i_peer;
 259
 260	if (!(rt->dst.flags & DST_HOST))
 261		dst_destroy_metrics_generic(dst);
 262
 263	if (idev != NULL) {
 264		rt->rt6i_idev = NULL;
 265		in6_dev_put(idev);
 266	}
 267	if (peer) {
 268		rt->rt6i_peer = NULL;
 269		inet_putpeer(peer);
 270	}
 271}
 272
 273static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
 274
 275static u32 rt6_peer_genid(void)
 276{
 277	return atomic_read(&__rt6_peer_genid);
 278}
 279
 280void rt6_bind_peer(struct rt6_info *rt, int create)
 281{
 282	struct inet_peer *peer;
 283
 284	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
 285	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 286		inet_putpeer(peer);
 287	else
 288		rt->rt6i_peer_genid = rt6_peer_genid();
 289}
 290
 291static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 292			   int how)
 293{
 294	struct rt6_info *rt = (struct rt6_info *)dst;
 295	struct inet6_dev *idev = rt->rt6i_idev;
 296	struct net_device *loopback_dev =
 297		dev_net(dev)->loopback_dev;
 298
 299	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
 300		struct inet6_dev *loopback_idev =
 301			in6_dev_get(loopback_dev);
 302		if (loopback_idev != NULL) {
 303			rt->rt6i_idev = loopback_idev;
 304			in6_dev_put(idev);
 
 
 305		}
 306	}
 307}
 308
 309static __inline__ int rt6_check_expired(const struct rt6_info *rt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 310{
 311	return (rt->rt6i_flags & RTF_EXPIRES) &&
 312		time_after(jiffies, rt->rt6i_expires);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 313}
 314
 315static inline int rt6_need_strict(const struct in6_addr *daddr)
 
 
 316{
 317	return ipv6_addr_type(daddr) &
 318		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 319}
 320
 321/*
 322 *	Route lookup. Any table->tb6_lock is implied.
 323 */
 324
 325static inline struct rt6_info *rt6_device_match(struct net *net,
 326						    struct rt6_info *rt,
 327						    const struct in6_addr *saddr,
 328						    int oif,
 329						    int flags)
 330{
 331	struct rt6_info *local = NULL;
 332	struct rt6_info *sprt;
 333
 334	if (!oif && ipv6_addr_any(saddr))
 335		goto out;
 336
 337	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 338		struct net_device *dev = sprt->rt6i_dev;
 339
 340		if (oif) {
 341			if (dev->ifindex == oif)
 342				return sprt;
 343			if (dev->flags & IFF_LOOPBACK) {
 344				if (sprt->rt6i_idev == NULL ||
 345				    sprt->rt6i_idev->dev->ifindex != oif) {
 346					if (flags & RT6_LOOKUP_F_IFACE && oif)
 347						continue;
 348					if (local && (!oif ||
 349						      local->rt6i_idev->dev->ifindex == oif))
 350						continue;
 351				}
 352				local = sprt;
 353			}
 354		} else {
 355			if (ipv6_chk_addr(net, saddr, dev,
 356					  flags & RT6_LOOKUP_F_IFACE))
 357				return sprt;
 358		}
 359	}
 360
 361	if (oif) {
 362		if (local)
 363			return local;
 364
 365		if (flags & RT6_LOOKUP_F_IFACE)
 366			return net->ipv6.ip6_null_entry;
 367	}
 368out:
 369	return rt;
 370}
 371
 372#ifdef CONFIG_IPV6_ROUTER_PREF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 373static void rt6_probe(struct rt6_info *rt)
 374{
 375	struct neighbour *neigh;
 376	/*
 377	 * Okay, this does not seem to be appropriate
 378	 * for now, however, we need to check if it
 379	 * is really so; aka Router Reachability Probing.
 380	 *
 381	 * Router Reachability Probe MUST be rate-limited
 382	 * to no more than one per minute.
 383	 */
 384	rcu_read_lock();
 385	neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
 386	if (!neigh || (neigh->nud_state & NUD_VALID))
 387		goto out;
 388	read_lock_bh(&neigh->lock);
 389	if (!(neigh->nud_state & NUD_VALID) &&
 
 
 
 
 
 390	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 391		struct in6_addr mcaddr;
 392		struct in6_addr *target;
 393
 394		neigh->updated = jiffies;
 395		read_unlock_bh(&neigh->lock);
 396
 397		target = (struct in6_addr *)&neigh->primary_key;
 398		addrconf_addr_solict_mult(target, &mcaddr);
 399		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
 
 
 
 
 
 
 
 
 
 
 400	} else {
 401		read_unlock_bh(&neigh->lock);
 402	}
 403out:
 404	rcu_read_unlock();
 
 
 405}
 406#else
 407static inline void rt6_probe(struct rt6_info *rt)
 408{
 409}
 410#endif
 411
 412/*
 413 * Default Router Selection (RFC 2461 6.3.6)
 414 */
 415static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 416{
 417	struct net_device *dev = rt->rt6i_dev;
 418	if (!oif || dev->ifindex == oif)
 419		return 2;
 420	if ((dev->flags & IFF_LOOPBACK) &&
 421	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 422		return 1;
 423	return 0;
 424}
 425
 426static inline int rt6_check_neigh(struct rt6_info *rt)
 427{
 428	struct neighbour *neigh;
 429	int m;
 430
 431	rcu_read_lock();
 432	neigh = dst_get_neighbour(&rt->dst);
 433	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 434	    !(rt->rt6i_flags & RTF_GATEWAY))
 435		m = 1;
 436	else if (neigh) {
 437		read_lock_bh(&neigh->lock);
 
 
 
 438		if (neigh->nud_state & NUD_VALID)
 439			m = 2;
 440#ifdef CONFIG_IPV6_ROUTER_PREF
 441		else if (neigh->nud_state & NUD_FAILED)
 442			m = 0;
 443#endif
 444		else
 445			m = 1;
 446		read_unlock_bh(&neigh->lock);
 447	} else
 448		m = 0;
 449	rcu_read_unlock();
 450	return m;
 
 
 
 
 451}
 452
 453static int rt6_score_route(struct rt6_info *rt, int oif,
 454			   int strict)
 455{
 456	int m, n;
 457
 458	m = rt6_check_dev(rt, oif);
 459	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 460		return -1;
 461#ifdef CONFIG_IPV6_ROUTER_PREF
 462	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 463#endif
 464	n = rt6_check_neigh(rt);
 465	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
 466		return -1;
 
 
 467	return m;
 468}
 469
 470static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 471				   int *mpri, struct rt6_info *match)
 
 472{
 473	int m;
 
 474
 475	if (rt6_check_expired(rt))
 476		goto out;
 477
 478	m = rt6_score_route(rt, oif, strict);
 479	if (m < 0)
 
 
 
 480		goto out;
 
 481
 
 
 
 
 482	if (m > *mpri) {
 483		if (strict & RT6_LOOKUP_F_REACHABLE)
 484			rt6_probe(match);
 485		*mpri = m;
 486		match = rt;
 487	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
 488		rt6_probe(rt);
 489	}
 490
 491out:
 492	return match;
 493}
 494
 495static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 496				     struct rt6_info *rr_head,
 497				     u32 metric, int oif, int strict)
 
 498{
 499	struct rt6_info *rt, *match;
 500	int mpri = -1;
 501
 502	match = NULL;
 503	for (rt = rr_head; rt && rt->rt6i_metric == metric;
 504	     rt = rt->dst.rt6_next)
 505		match = find_match(rt, oif, strict, &mpri, match);
 506	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 507	     rt = rt->dst.rt6_next)
 508		match = find_match(rt, oif, strict, &mpri, match);
 509
 510	return match;
 511}
 512
 513static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 514{
 515	struct rt6_info *match, *rt0;
 516	struct net *net;
 517
 518	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
 519		  __func__, fn->leaf, oif);
 520
 521	rt0 = fn->rr_ptr;
 522	if (!rt0)
 523		fn->rr_ptr = rt0 = fn->leaf;
 524
 525	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 
 526
 527	if (!match &&
 528	    (strict & RT6_LOOKUP_F_REACHABLE)) {
 529		struct rt6_info *next = rt0->dst.rt6_next;
 530
 531		/* no entries matched; do round-robin */
 532		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 533			next = fn->leaf;
 534
 535		if (next != rt0)
 536			fn->rr_ptr = next;
 537	}
 538
 539	RT6_TRACE("%s() => %p\n",
 540		  __func__, match);
 541
 542	net = dev_net(rt0->rt6i_dev);
 543	return match ? match : net->ipv6.ip6_null_entry;
 544}
 545
 546#ifdef CONFIG_IPV6_ROUTE_INFO
 547int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 548		  const struct in6_addr *gwaddr)
 549{
 550	struct net *net = dev_net(dev);
 551	struct route_info *rinfo = (struct route_info *) opt;
 552	struct in6_addr prefix_buf, *prefix;
 553	unsigned int pref;
 554	unsigned long lifetime;
 555	struct rt6_info *rt;
 556
 557	if (len < sizeof(struct route_info)) {
 558		return -EINVAL;
 559	}
 560
 561	/* Sanity check for prefix_len and length */
 562	if (rinfo->length > 3) {
 563		return -EINVAL;
 564	} else if (rinfo->prefix_len > 128) {
 565		return -EINVAL;
 566	} else if (rinfo->prefix_len > 64) {
 567		if (rinfo->length < 2) {
 568			return -EINVAL;
 569		}
 570	} else if (rinfo->prefix_len > 0) {
 571		if (rinfo->length < 1) {
 572			return -EINVAL;
 573		}
 574	}
 575
 576	pref = rinfo->route_pref;
 577	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 578		return -EINVAL;
 579
 580	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 581
 582	if (rinfo->length == 3)
 583		prefix = (struct in6_addr *)rinfo->prefix;
 584	else {
 585		/* this function is safe */
 586		ipv6_addr_prefix(&prefix_buf,
 587				 (struct in6_addr *)rinfo->prefix,
 588				 rinfo->prefix_len);
 589		prefix = &prefix_buf;
 590	}
 591
 592	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 593				dev->ifindex);
 
 
 
 594
 595	if (rt && !lifetime) {
 596		ip6_del_rt(rt);
 597		rt = NULL;
 598	}
 599
 600	if (!rt && lifetime)
 601		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 602					pref);
 603	else if (rt)
 604		rt->rt6i_flags = RTF_ROUTEINFO |
 605				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 606
 607	if (rt) {
 608		if (!addrconf_finite_timeout(lifetime)) {
 609			rt->rt6i_flags &= ~RTF_EXPIRES;
 610		} else {
 611			rt->rt6i_expires = jiffies + HZ * lifetime;
 612			rt->rt6i_flags |= RTF_EXPIRES;
 613		}
 614		dst_release(&rt->dst);
 615	}
 616	return 0;
 617}
 618#endif
 619
 620#define BACKTRACK(__net, saddr)			\
 621do { \
 622	if (rt == __net->ipv6.ip6_null_entry) {	\
 623		struct fib6_node *pn; \
 624		while (1) { \
 625			if (fn->fn_flags & RTN_TL_ROOT) \
 626				goto out; \
 627			pn = fn->parent; \
 628			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 629				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 630			else \
 631				fn = pn; \
 632			if (fn->fn_flags & RTN_RTINFO) \
 633				goto restart; \
 634		} \
 635	} \
 636} while(0)
 637
 638static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 639					     struct fib6_table *table,
 640					     struct flowi6 *fl6, int flags)
 641{
 642	struct fib6_node *fn;
 643	struct rt6_info *rt;
 644
 645	read_lock_bh(&table->tb6_lock);
 646	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 647restart:
 648	rt = fn->leaf;
 649	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 
 
 650	BACKTRACK(net, &fl6->saddr);
 651out:
 652	dst_use(&rt->dst, jiffies);
 653	read_unlock_bh(&table->tb6_lock);
 654	return rt;
 655
 656}
 657
 
 
 
 
 
 
 
 658struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 659			    const struct in6_addr *saddr, int oif, int strict)
 660{
 661	struct flowi6 fl6 = {
 662		.flowi6_oif = oif,
 663		.daddr = *daddr,
 664	};
 665	struct dst_entry *dst;
 666	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 667
 668	if (saddr) {
 669		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 670		flags |= RT6_LOOKUP_F_HAS_SADDR;
 671	}
 672
 673	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 674	if (dst->error == 0)
 675		return (struct rt6_info *) dst;
 676
 677	dst_release(dst);
 678
 679	return NULL;
 680}
 681
 682EXPORT_SYMBOL(rt6_lookup);
 683
 684/* ip6_ins_rt is called with FREE table->tb6_lock.
 685   It takes new route entry, the addition fails by any reason the
 686   route is freed. In any case, if caller does not hold it, it may
 687   be destroyed.
 688 */
 689
 690static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 
 691{
 692	int err;
 693	struct fib6_table *table;
 694
 695	table = rt->rt6i_table;
 696	write_lock_bh(&table->tb6_lock);
 697	err = fib6_add(&table->tb6_root, rt, info);
 698	write_unlock_bh(&table->tb6_lock);
 699
 700	return err;
 701}
 702
 703int ip6_ins_rt(struct rt6_info *rt)
 704{
 705	struct nl_info info = {
 706		.nl_net = dev_net(rt->rt6i_dev),
 707	};
 708	return __ip6_ins_rt(rt, &info);
 709}
 710
 711static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
 712				      const struct in6_addr *daddr,
 713				      const struct in6_addr *saddr)
 714{
 715	struct rt6_info *rt;
 716
 717	/*
 718	 *	Clone the route.
 719	 */
 720
 721	rt = ip6_rt_copy(ort, daddr);
 722
 723	if (rt) {
 724		struct neighbour *neigh;
 725		int attempts = !in_softirq();
 726
 727		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
 728			if (rt->rt6i_dst.plen != 128 &&
 729			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 730				rt->rt6i_flags |= RTF_ANYCAST;
 731			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
 732		}
 733
 734		rt->rt6i_flags |= RTF_CACHE;
 735
 736#ifdef CONFIG_IPV6_SUBTREES
 737		if (rt->rt6i_src.plen && saddr) {
 738			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
 739			rt->rt6i_src.plen = 128;
 740		}
 741#endif
 742
 743	retry:
 744		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 745		if (IS_ERR(neigh)) {
 746			struct net *net = dev_net(rt->rt6i_dev);
 747			int saved_rt_min_interval =
 748				net->ipv6.sysctl.ip6_rt_gc_min_interval;
 749			int saved_rt_elasticity =
 750				net->ipv6.sysctl.ip6_rt_gc_elasticity;
 751
 752			if (attempts-- > 0) {
 753				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
 754				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
 755
 756				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
 757
 758				net->ipv6.sysctl.ip6_rt_gc_elasticity =
 759					saved_rt_elasticity;
 760				net->ipv6.sysctl.ip6_rt_gc_min_interval =
 761					saved_rt_min_interval;
 762				goto retry;
 763			}
 764
 765			if (net_ratelimit())
 766				printk(KERN_WARNING
 767				       "ipv6: Neighbour table overflow.\n");
 768			dst_free(&rt->dst);
 769			return NULL;
 770		}
 771		dst_set_neighbour(&rt->dst, neigh);
 772
 773	}
 774
 775	return rt;
 776}
 777
 778static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 779					const struct in6_addr *daddr)
 780{
 781	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 782
 783	if (rt) {
 784		rt->rt6i_flags |= RTF_CACHE;
 785		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
 786	}
 787	return rt;
 788}
 789
 790static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 791				      struct flowi6 *fl6, int flags)
 792{
 793	struct fib6_node *fn;
 794	struct rt6_info *rt, *nrt;
 795	int strict = 0;
 796	int attempts = 3;
 797	int err;
 798	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 799
 800	strict |= flags & RT6_LOOKUP_F_IFACE;
 801
 802relookup:
 803	read_lock_bh(&table->tb6_lock);
 804
 805restart_2:
 806	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 807
 808restart:
 809	rt = rt6_select(fn, oif, strict | reachable);
 810
 
 811	BACKTRACK(net, &fl6->saddr);
 812	if (rt == net->ipv6.ip6_null_entry ||
 813	    rt->rt6i_flags & RTF_CACHE)
 814		goto out;
 815
 816	dst_hold(&rt->dst);
 817	read_unlock_bh(&table->tb6_lock);
 818
 819	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
 820		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 821	else if (!(rt->dst.flags & DST_HOST))
 822		nrt = rt6_alloc_clone(rt, &fl6->daddr);
 823	else
 824		goto out2;
 825
 826	dst_release(&rt->dst);
 827	rt = nrt ? : net->ipv6.ip6_null_entry;
 828
 829	dst_hold(&rt->dst);
 830	if (nrt) {
 831		err = ip6_ins_rt(nrt);
 832		if (!err)
 833			goto out2;
 834	}
 835
 836	if (--attempts <= 0)
 837		goto out2;
 838
 839	/*
 840	 * Race condition! In the gap, when table->tb6_lock was
 841	 * released someone could insert this route.  Relookup.
 842	 */
 843	dst_release(&rt->dst);
 844	goto relookup;
 845
 846out:
 847	if (reachable) {
 848		reachable = 0;
 849		goto restart_2;
 850	}
 851	dst_hold(&rt->dst);
 852	read_unlock_bh(&table->tb6_lock);
 853out2:
 854	rt->dst.lastuse = jiffies;
 855	rt->dst.__use++;
 856
 857	return rt;
 858}
 859
 860static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 861					    struct flowi6 *fl6, int flags)
 862{
 863	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 864}
 865
 
 
 
 
 
 
 
 
 
 
 866void ip6_route_input(struct sk_buff *skb)
 867{
 868	const struct ipv6hdr *iph = ipv6_hdr(skb);
 869	struct net *net = dev_net(skb->dev);
 870	int flags = RT6_LOOKUP_F_HAS_SADDR;
 871	struct flowi6 fl6 = {
 872		.flowi6_iif = skb->dev->ifindex,
 873		.daddr = iph->daddr,
 874		.saddr = iph->saddr,
 875		.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
 876		.flowi6_mark = skb->mark,
 877		.flowi6_proto = iph->nexthdr,
 878	};
 879
 880	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
 881		flags |= RT6_LOOKUP_F_IFACE;
 882
 883	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
 884}
 885
 886static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 887					     struct flowi6 *fl6, int flags)
 888{
 889	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 890}
 891
 892struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 893				    struct flowi6 *fl6)
 894{
 895	int flags = 0;
 896
 
 
 897	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 898		flags |= RT6_LOOKUP_F_IFACE;
 899
 900	if (!ipv6_addr_any(&fl6->saddr))
 901		flags |= RT6_LOOKUP_F_HAS_SADDR;
 902	else if (sk)
 903		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 904
 905	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 906}
 907
 908EXPORT_SYMBOL(ip6_route_output);
 909
 910struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 911{
 912	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 913	struct dst_entry *new = NULL;
 914
 915	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
 916	if (rt) {
 917		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
 918
 919		new = &rt->dst;
 920
 
 
 
 921		new->__use = 1;
 922		new->input = dst_discard;
 923		new->output = dst_discard;
 924
 925		if (dst_metrics_read_only(&ort->dst))
 926			new->_metrics = ort->dst._metrics;
 927		else
 928			dst_copy_metrics(new, &ort->dst);
 929		rt->rt6i_idev = ort->rt6i_idev;
 930		if (rt->rt6i_idev)
 931			in6_dev_hold(rt->rt6i_idev);
 932		rt->rt6i_expires = 0;
 933
 934		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
 935		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 936		rt->rt6i_metric = 0;
 937
 938		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 939#ifdef CONFIG_IPV6_SUBTREES
 940		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 941#endif
 942
 943		dst_free(new);
 944	}
 945
 946	dst_release(dst_orig);
 947	return new ? new : ERR_PTR(-ENOMEM);
 948}
 949
 950/*
 951 *	Destination cache support functions
 952 */
 953
 954static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 955{
 956	struct rt6_info *rt;
 957
 958	rt = (struct rt6_info *) dst;
 959
 960	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
 961		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
 962			if (!rt->rt6i_peer)
 963				rt6_bind_peer(rt, 0);
 964			rt->rt6i_peer_genid = rt6_peer_genid();
 965		}
 966		return dst;
 967	}
 968	return NULL;
 
 
 
 
 
 969}
 970
 971static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 972{
 973	struct rt6_info *rt = (struct rt6_info *) dst;
 974
 975	if (rt) {
 976		if (rt->rt6i_flags & RTF_CACHE) {
 977			if (rt6_check_expired(rt)) {
 978				ip6_del_rt(rt);
 979				dst = NULL;
 980			}
 981		} else {
 982			dst_release(dst);
 983			dst = NULL;
 984		}
 985	}
 986	return dst;
 987}
 988
 989static void ip6_link_failure(struct sk_buff *skb)
 990{
 991	struct rt6_info *rt;
 992
 993	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
 994
 995	rt = (struct rt6_info *) skb_dst(skb);
 996	if (rt) {
 997		if (rt->rt6i_flags&RTF_CACHE) {
 998			dst_set_expires(&rt->dst, 0);
 999			rt->rt6i_flags |= RTF_EXPIRES;
1000		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
 
1001			rt->rt6i_node->fn_sernum = -1;
 
1002	}
1003}
1004
1005static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 
1006{
1007	struct rt6_info *rt6 = (struct rt6_info*)dst;
1008
 
1009	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
 
 
1010		rt6->rt6i_flags |= RTF_MODIFIED;
1011		if (mtu < IPV6_MIN_MTU) {
1012			u32 features = dst_metric(dst, RTAX_FEATURES);
1013			mtu = IPV6_MIN_MTU;
1014			features |= RTAX_FEATURE_ALLFRAG;
1015			dst_metric_set(dst, RTAX_FEATURES, features);
1016		}
1017		dst_metric_set(dst, RTAX_MTU, mtu);
 
1018	}
1019}
1020
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1021static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1022{
1023	struct net_device *dev = dst->dev;
1024	unsigned int mtu = dst_mtu(dst);
1025	struct net *net = dev_net(dev);
1026
1027	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1028
1029	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1030		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1031
1032	/*
1033	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1034	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1035	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1036	 * rely only on pmtu discovery"
1037	 */
1038	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1039		mtu = IPV6_MAXPLEN;
1040	return mtu;
1041}
1042
1043static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1044{
1045	unsigned int mtu = IPV6_MIN_MTU;
1046	struct inet6_dev *idev;
 
 
 
 
 
 
1047
1048	rcu_read_lock();
1049	idev = __in6_dev_get(dst->dev);
1050	if (idev)
1051		mtu = idev->cnf.mtu6;
1052	rcu_read_unlock();
1053
1054	return mtu;
 
1055}
1056
1057static struct dst_entry *icmp6_dst_gc_list;
1058static DEFINE_SPINLOCK(icmp6_dst_lock);
1059
1060struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1061				  struct neighbour *neigh,
1062				  const struct in6_addr *addr)
1063{
 
1064	struct rt6_info *rt;
1065	struct inet6_dev *idev = in6_dev_get(dev);
1066	struct net *net = dev_net(dev);
1067
1068	if (unlikely(idev == NULL))
1069		return NULL;
1070
1071	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1072	if (unlikely(rt == NULL)) {
1073		in6_dev_put(idev);
 
1074		goto out;
1075	}
1076
1077	if (neigh)
1078		neigh_hold(neigh);
1079	else {
1080		neigh = ndisc_get_neigh(dev, addr);
1081		if (IS_ERR(neigh))
1082			neigh = NULL;
1083	}
1084
1085	rt->dst.flags |= DST_HOST;
1086	rt->dst.output  = ip6_output;
1087	dst_set_neighbour(&rt->dst, neigh);
1088	atomic_set(&rt->dst.__refcnt, 1);
1089	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1090
1091	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1092	rt->rt6i_dst.plen = 128;
1093	rt->rt6i_idev     = idev;
 
1094
1095	spin_lock_bh(&icmp6_dst_lock);
1096	rt->dst.next = icmp6_dst_gc_list;
1097	icmp6_dst_gc_list = &rt->dst;
1098	spin_unlock_bh(&icmp6_dst_lock);
1099
1100	fib6_force_start_gc(net);
1101
 
 
1102out:
1103	return &rt->dst;
1104}
1105
1106int icmp6_dst_gc(void)
1107{
1108	struct dst_entry *dst, **pprev;
1109	int more = 0;
1110
1111	spin_lock_bh(&icmp6_dst_lock);
1112	pprev = &icmp6_dst_gc_list;
1113
1114	while ((dst = *pprev) != NULL) {
1115		if (!atomic_read(&dst->__refcnt)) {
1116			*pprev = dst->next;
1117			dst_free(dst);
1118		} else {
1119			pprev = &dst->next;
1120			++more;
1121		}
1122	}
1123
1124	spin_unlock_bh(&icmp6_dst_lock);
1125
1126	return more;
1127}
1128
1129static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1130			    void *arg)
1131{
1132	struct dst_entry *dst, **pprev;
1133
1134	spin_lock_bh(&icmp6_dst_lock);
1135	pprev = &icmp6_dst_gc_list;
1136	while ((dst = *pprev) != NULL) {
1137		struct rt6_info *rt = (struct rt6_info *) dst;
1138		if (func(rt, arg)) {
1139			*pprev = dst->next;
1140			dst_free(dst);
1141		} else {
1142			pprev = &dst->next;
1143		}
1144	}
1145	spin_unlock_bh(&icmp6_dst_lock);
1146}
1147
1148static int ip6_dst_gc(struct dst_ops *ops)
1149{
1150	unsigned long now = jiffies;
1151	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1152	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1153	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1154	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1155	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1156	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1157	int entries;
1158
1159	entries = dst_entries_get_fast(ops);
1160	if (time_after(rt_last_gc + rt_min_interval, now) &&
1161	    entries <= rt_max_size)
1162		goto out;
1163
1164	net->ipv6.ip6_rt_gc_expire++;
1165	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1166	net->ipv6.ip6_rt_last_gc = now;
1167	entries = dst_entries_get_slow(ops);
1168	if (entries < ops->gc_thresh)
1169		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1170out:
1171	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1172	return entries > rt_max_size;
1173}
1174
1175/* Clean host part of a prefix. Not necessary in radix tree,
1176   but results in cleaner routing tables.
1177
1178   Remove it only when all the things will work!
1179 */
1180
1181int ip6_dst_hoplimit(struct dst_entry *dst)
1182{
1183	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1184	if (hoplimit == 0) {
1185		struct net_device *dev = dst->dev;
1186		struct inet6_dev *idev;
1187
1188		rcu_read_lock();
1189		idev = __in6_dev_get(dev);
1190		if (idev)
1191			hoplimit = idev->cnf.hop_limit;
1192		else
1193			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1194		rcu_read_unlock();
1195	}
1196	return hoplimit;
1197}
1198EXPORT_SYMBOL(ip6_dst_hoplimit);
1199
1200/*
1201 *
1202 */
1203
1204int ip6_route_add(struct fib6_config *cfg)
1205{
1206	int err;
1207	struct net *net = cfg->fc_nlinfo.nl_net;
1208	struct rt6_info *rt = NULL;
1209	struct net_device *dev = NULL;
1210	struct inet6_dev *idev = NULL;
1211	struct fib6_table *table;
1212	int addr_type;
1213
1214	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1215		return -EINVAL;
1216#ifndef CONFIG_IPV6_SUBTREES
1217	if (cfg->fc_src_len)
1218		return -EINVAL;
1219#endif
1220	if (cfg->fc_ifindex) {
1221		err = -ENODEV;
1222		dev = dev_get_by_index(net, cfg->fc_ifindex);
1223		if (!dev)
1224			goto out;
1225		idev = in6_dev_get(dev);
1226		if (!idev)
1227			goto out;
1228	}
1229
1230	if (cfg->fc_metric == 0)
1231		cfg->fc_metric = IP6_RT_PRIO_USER;
1232
1233	table = fib6_new_table(net, cfg->fc_table);
1234	if (table == NULL) {
1235		err = -ENOBUFS;
1236		goto out;
 
 
 
 
 
 
1237	}
1238
1239	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
 
1240
1241	if (rt == NULL) {
 
 
1242		err = -ENOMEM;
1243		goto out;
1244	}
1245
1246	rt->dst.obsolete = -1;
1247	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1248				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1249				0;
 
1250
1251	if (cfg->fc_protocol == RTPROT_UNSPEC)
1252		cfg->fc_protocol = RTPROT_BOOT;
1253	rt->rt6i_protocol = cfg->fc_protocol;
1254
1255	addr_type = ipv6_addr_type(&cfg->fc_dst);
1256
1257	if (addr_type & IPV6_ADDR_MULTICAST)
1258		rt->dst.input = ip6_mc_input;
1259	else if (cfg->fc_flags & RTF_LOCAL)
1260		rt->dst.input = ip6_input;
1261	else
1262		rt->dst.input = ip6_forward;
1263
1264	rt->dst.output = ip6_output;
1265
1266	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1267	rt->rt6i_dst.plen = cfg->fc_dst_len;
1268	if (rt->rt6i_dst.plen == 128)
1269	       rt->dst.flags |= DST_HOST;
1270
1271	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1272		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1273		if (!metrics) {
1274			err = -ENOMEM;
1275			goto out;
1276		}
1277		dst_init_metrics(&rt->dst, metrics, 0);
1278	}
 
1279#ifdef CONFIG_IPV6_SUBTREES
1280	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1281	rt->rt6i_src.plen = cfg->fc_src_len;
1282#endif
1283
1284	rt->rt6i_metric = cfg->fc_metric;
1285
1286	/* We cannot add true routes via loopback here,
1287	   they would result in kernel looping; promote them to reject routes
1288	 */
1289	if ((cfg->fc_flags & RTF_REJECT) ||
1290	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1291					      && !(cfg->fc_flags&RTF_LOCAL))) {
 
1292		/* hold loopback dev/idev if we haven't done so. */
1293		if (dev != net->loopback_dev) {
1294			if (dev) {
1295				dev_put(dev);
1296				in6_dev_put(idev);
1297			}
1298			dev = net->loopback_dev;
1299			dev_hold(dev);
1300			idev = in6_dev_get(dev);
1301			if (!idev) {
1302				err = -ENODEV;
1303				goto out;
1304			}
1305		}
1306		rt->dst.output = ip6_pkt_discard_out;
1307		rt->dst.input = ip6_pkt_discard;
1308		rt->dst.error = -ENETUNREACH;
1309		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1310		goto install_route;
1311	}
1312
1313	if (cfg->fc_flags & RTF_GATEWAY) {
1314		const struct in6_addr *gw_addr;
1315		int gwa_type;
1316
1317		gw_addr = &cfg->fc_gateway;
1318		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1319		gwa_type = ipv6_addr_type(gw_addr);
1320
1321		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1322			struct rt6_info *grt;
1323
1324			/* IPv6 strictly inhibits using not link-local
1325			   addresses as nexthop address.
1326			   Otherwise, router will not able to send redirects.
1327			   It is very good, but in some (rare!) circumstances
1328			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1329			   some exceptions. --ANK
1330			 */
1331			err = -EINVAL;
1332			if (!(gwa_type&IPV6_ADDR_UNICAST))
1333				goto out;
1334
1335			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1336
1337			err = -EHOSTUNREACH;
1338			if (grt == NULL)
1339				goto out;
1340			if (dev) {
1341				if (dev != grt->rt6i_dev) {
1342					dst_release(&grt->dst);
1343					goto out;
1344				}
1345			} else {
1346				dev = grt->rt6i_dev;
1347				idev = grt->rt6i_idev;
1348				dev_hold(dev);
1349				in6_dev_hold(grt->rt6i_idev);
1350			}
1351			if (!(grt->rt6i_flags&RTF_GATEWAY))
1352				err = 0;
1353			dst_release(&grt->dst);
1354
1355			if (err)
1356				goto out;
1357		}
1358		err = -EINVAL;
1359		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1360			goto out;
1361	}
1362
1363	err = -ENODEV;
1364	if (dev == NULL)
1365		goto out;
1366
1367	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1368		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1369			err = -EINVAL;
1370			goto out;
1371		}
1372		ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1373		rt->rt6i_prefsrc.plen = 128;
1374	} else
1375		rt->rt6i_prefsrc.plen = 0;
1376
1377	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1378		struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1379		if (IS_ERR(n)) {
1380			err = PTR_ERR(n);
1381			goto out;
1382		}
1383		dst_set_neighbour(&rt->dst, n);
1384	}
1385
1386	rt->rt6i_flags = cfg->fc_flags;
1387
1388install_route:
1389	if (cfg->fc_mx) {
1390		struct nlattr *nla;
1391		int remaining;
1392
1393		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1394			int type = nla_type(nla);
1395
1396			if (type) {
1397				if (type > RTAX_MAX) {
1398					err = -EINVAL;
1399					goto out;
1400				}
1401
1402				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1403			}
1404		}
1405	}
1406
1407	rt->dst.dev = dev;
1408	rt->rt6i_idev = idev;
1409	rt->rt6i_table = table;
1410
1411	cfg->fc_nlinfo.nl_net = dev_net(dev);
1412
1413	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1414
1415out:
1416	if (dev)
1417		dev_put(dev);
1418	if (idev)
1419		in6_dev_put(idev);
1420	if (rt)
1421		dst_free(&rt->dst);
1422	return err;
1423}
1424
1425static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1426{
1427	int err;
1428	struct fib6_table *table;
1429	struct net *net = dev_net(rt->rt6i_dev);
1430
1431	if (rt == net->ipv6.ip6_null_entry)
1432		return -ENOENT;
 
 
1433
1434	table = rt->rt6i_table;
1435	write_lock_bh(&table->tb6_lock);
1436
1437	err = fib6_del(rt, info);
1438	dst_release(&rt->dst);
1439
1440	write_unlock_bh(&table->tb6_lock);
1441
 
 
1442	return err;
1443}
1444
1445int ip6_del_rt(struct rt6_info *rt)
1446{
1447	struct nl_info info = {
1448		.nl_net = dev_net(rt->rt6i_dev),
1449	};
1450	return __ip6_del_rt(rt, &info);
1451}
1452
1453static int ip6_route_del(struct fib6_config *cfg)
1454{
1455	struct fib6_table *table;
1456	struct fib6_node *fn;
1457	struct rt6_info *rt;
1458	int err = -ESRCH;
1459
1460	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1461	if (table == NULL)
1462		return err;
1463
1464	read_lock_bh(&table->tb6_lock);
1465
1466	fn = fib6_locate(&table->tb6_root,
1467			 &cfg->fc_dst, cfg->fc_dst_len,
1468			 &cfg->fc_src, cfg->fc_src_len);
1469
1470	if (fn) {
1471		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1472			if (cfg->fc_ifindex &&
1473			    (rt->rt6i_dev == NULL ||
1474			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1475				continue;
1476			if (cfg->fc_flags & RTF_GATEWAY &&
1477			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1478				continue;
1479			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1480				continue;
1481			dst_hold(&rt->dst);
1482			read_unlock_bh(&table->tb6_lock);
1483
1484			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1485		}
1486	}
1487	read_unlock_bh(&table->tb6_lock);
1488
1489	return err;
1490}
1491
1492/*
1493 *	Handle redirects
1494 */
1495struct ip6rd_flowi {
1496	struct flowi6 fl6;
1497	struct in6_addr gateway;
1498};
1499
1500static struct rt6_info *__ip6_route_redirect(struct net *net,
1501					     struct fib6_table *table,
1502					     struct flowi6 *fl6,
1503					     int flags)
1504{
1505	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1506	struct rt6_info *rt;
1507	struct fib6_node *fn;
 
 
 
 
 
 
1508
1509	/*
1510	 * Get the "current" route for this destination and
1511	 * check if the redirect has come from approriate router.
1512	 *
1513	 * RFC 2461 specifies that redirects should only be
1514	 * accepted if they come from the nexthop to the target.
1515	 * Due to the way the routes are chosen, this notion
1516	 * is a bit fuzzy and one might need to check all possible
1517	 * routes.
1518	 */
1519
1520	read_lock_bh(&table->tb6_lock);
1521	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1522restart:
1523	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1524		/*
1525		 * Current route is on-link; redirect is always invalid.
1526		 *
1527		 * Seems, previous statement is not true. It could
1528		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1529		 * But then router serving it might decide, that we should
1530		 * know truth 8)8) --ANK (980726).
1531		 */
1532		if (rt6_check_expired(rt))
1533			continue;
1534		if (!(rt->rt6i_flags & RTF_GATEWAY))
1535			continue;
1536		if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1537			continue;
1538		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1539			continue;
1540		break;
1541	}
1542
1543	if (!rt)
1544		rt = net->ipv6.ip6_null_entry;
1545	BACKTRACK(net, &fl6->saddr);
1546out:
1547	dst_hold(&rt->dst);
1548
1549	read_unlock_bh(&table->tb6_lock);
1550
1551	return rt;
1552};
1553
1554static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1555					   const struct in6_addr *src,
1556					   const struct in6_addr *gateway,
1557					   struct net_device *dev)
1558{
1559	int flags = RT6_LOOKUP_F_HAS_SADDR;
1560	struct net *net = dev_net(dev);
1561	struct ip6rd_flowi rdfl = {
1562		.fl6 = {
1563			.flowi6_oif = dev->ifindex,
1564			.daddr = *dest,
1565			.saddr = *src,
1566		},
1567	};
1568
1569	ipv6_addr_copy(&rdfl.gateway, gateway);
 
 
 
 
 
 
 
1570
1571	if (rt6_need_strict(dest))
1572		flags |= RT6_LOOKUP_F_IFACE;
 
 
 
1573
1574	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1575						   flags, __ip6_route_redirect);
1576}
 
1577
1578void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1579		  const struct in6_addr *saddr,
1580		  struct neighbour *neigh, u8 *lladdr, int on_link)
1581{
1582	struct rt6_info *rt, *nrt = NULL;
1583	struct netevent_redirect netevent;
1584	struct net *net = dev_net(neigh->dev);
1585
1586	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
 
 
 
 
 
 
 
 
1587
 
1588	if (rt == net->ipv6.ip6_null_entry) {
1589		if (net_ratelimit())
1590			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1591			       "for redirect target\n");
1592		goto out;
1593	}
1594
 
 
 
 
 
 
 
 
 
 
1595	/*
1596	 *	We have finally decided to accept it.
1597	 */
1598
1599	neigh_update(neigh, lladdr, NUD_STALE,
1600		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1601		     NEIGH_UPDATE_F_OVERRIDE|
1602		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1603				     NEIGH_UPDATE_F_ISROUTER))
1604		     );
1605
1606	/*
1607	 * Redirect received -> path was valid.
1608	 * Look, redirects are sent only in response to data packets,
1609	 * so that this nexthop apparently is reachable. --ANK
1610	 */
1611	dst_confirm(&rt->dst);
1612
1613	/* Duplicate redirect: silently ignore. */
1614	if (neigh == dst_get_neighbour_raw(&rt->dst))
1615		goto out;
1616
1617	nrt = ip6_rt_copy(rt, dest);
1618	if (nrt == NULL)
1619		goto out;
1620
1621	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1622	if (on_link)
1623		nrt->rt6i_flags &= ~RTF_GATEWAY;
1624
1625	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1626	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1627
1628	if (ip6_ins_rt(nrt))
1629		goto out;
1630
1631	netevent.old = &rt->dst;
1632	netevent.new = &nrt->dst;
 
 
1633	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1634
1635	if (rt->rt6i_flags&RTF_CACHE) {
 
1636		ip6_del_rt(rt);
1637		return;
1638	}
1639
1640out:
1641	dst_release(&rt->dst);
1642}
1643
1644/*
1645 *	Handle ICMP "packet too big" messages
1646 *	i.e. Path MTU discovery
1647 */
1648
1649static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1650			     struct net *net, u32 pmtu, int ifindex)
1651{
1652	struct rt6_info *rt, *nrt;
1653	int allfrag = 0;
1654again:
1655	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1656	if (rt == NULL)
1657		return;
1658
1659	if (rt6_check_expired(rt)) {
1660		ip6_del_rt(rt);
1661		goto again;
1662	}
1663
1664	if (pmtu >= dst_mtu(&rt->dst))
1665		goto out;
1666
1667	if (pmtu < IPV6_MIN_MTU) {
1668		/*
1669		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1670		 * MTU (1280) and a fragment header should always be included
1671		 * after a node receiving Too Big message reporting PMTU is
1672		 * less than the IPv6 Minimum Link MTU.
1673		 */
1674		pmtu = IPV6_MIN_MTU;
1675		allfrag = 1;
1676	}
1677
1678	/* New mtu received -> path was valid.
1679	   They are sent only in response to data packets,
1680	   so that this nexthop apparently is reachable. --ANK
1681	 */
1682	dst_confirm(&rt->dst);
1683
1684	/* Host route. If it is static, it would be better
1685	   not to override it, but add new one, so that
1686	   when cache entry will expire old pmtu
1687	   would return automatically.
1688	 */
1689	if (rt->rt6i_flags & RTF_CACHE) {
1690		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1691		if (allfrag) {
1692			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1693			features |= RTAX_FEATURE_ALLFRAG;
1694			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1695		}
1696		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1697		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1698		goto out;
1699	}
1700
1701	/* Network route.
1702	   Two cases are possible:
1703	   1. It is connected route. Action: COW
1704	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1705	 */
1706	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1707		nrt = rt6_alloc_cow(rt, daddr, saddr);
1708	else
1709		nrt = rt6_alloc_clone(rt, daddr);
1710
1711	if (nrt) {
1712		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1713		if (allfrag) {
1714			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1715			features |= RTAX_FEATURE_ALLFRAG;
1716			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1717		}
1718
1719		/* According to RFC 1981, detecting PMTU increase shouldn't be
1720		 * happened within 5 mins, the recommended timer is 10 mins.
1721		 * Here this route expiration time is set to ip6_rt_mtu_expires
1722		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1723		 * and detecting PMTU increase will be automatically happened.
1724		 */
1725		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1726		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1727
1728		ip6_ins_rt(nrt);
1729	}
1730out:
1731	dst_release(&rt->dst);
1732}
1733
1734void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1735			struct net_device *dev, u32 pmtu)
1736{
1737	struct net *net = dev_net(dev);
1738
1739	/*
1740	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1741	 * is sending along the path" that caused the Packet Too Big message.
1742	 * Since it's not possible in the general case to determine which
1743	 * interface was used to send the original packet, we update the MTU
1744	 * on the interface that will be used to send future packets. We also
1745	 * update the MTU on the interface that received the Packet Too Big in
1746	 * case the original packet was forced out that interface with
1747	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1748	 * correct behaviour, which would be to update the MTU on all
1749	 * interfaces.
1750	 */
1751	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1752	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1753}
1754
1755/*
1756 *	Misc support functions
1757 */
1758
1759static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1760				    const struct in6_addr *dest)
1761{
1762	struct net *net = dev_net(ort->rt6i_dev);
1763	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1764					    ort->dst.dev, 0);
1765
1766	if (rt) {
1767		rt->dst.input = ort->dst.input;
1768		rt->dst.output = ort->dst.output;
1769		rt->dst.flags |= DST_HOST;
1770
1771		ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1772		rt->rt6i_dst.plen = 128;
1773		dst_copy_metrics(&rt->dst, &ort->dst);
1774		rt->dst.error = ort->dst.error;
1775		rt->rt6i_idev = ort->rt6i_idev;
1776		if (rt->rt6i_idev)
1777			in6_dev_hold(rt->rt6i_idev);
1778		rt->dst.lastuse = jiffies;
1779		rt->rt6i_expires = 0;
1780
1781		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1782		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 
 
 
 
1783		rt->rt6i_metric = 0;
1784
1785#ifdef CONFIG_IPV6_SUBTREES
1786		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1787#endif
1788		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1789		rt->rt6i_table = ort->rt6i_table;
1790	}
1791	return rt;
1792}
1793
1794#ifdef CONFIG_IPV6_ROUTE_INFO
1795static struct rt6_info *rt6_get_route_info(struct net *net,
1796					   const struct in6_addr *prefix, int prefixlen,
1797					   const struct in6_addr *gwaddr, int ifindex)
1798{
1799	struct fib6_node *fn;
1800	struct rt6_info *rt = NULL;
1801	struct fib6_table *table;
1802
1803	table = fib6_get_table(net, RT6_TABLE_INFO);
1804	if (table == NULL)
1805		return NULL;
1806
1807	write_lock_bh(&table->tb6_lock);
1808	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1809	if (!fn)
1810		goto out;
1811
1812	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1813		if (rt->rt6i_dev->ifindex != ifindex)
1814			continue;
1815		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1816			continue;
1817		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1818			continue;
1819		dst_hold(&rt->dst);
1820		break;
1821	}
1822out:
1823	write_unlock_bh(&table->tb6_lock);
1824	return rt;
1825}
1826
1827static struct rt6_info *rt6_add_route_info(struct net *net,
1828					   const struct in6_addr *prefix, int prefixlen,
1829					   const struct in6_addr *gwaddr, int ifindex,
1830					   unsigned pref)
1831{
1832	struct fib6_config cfg = {
1833		.fc_table	= RT6_TABLE_INFO,
1834		.fc_metric	= IP6_RT_PRIO_USER,
1835		.fc_ifindex	= ifindex,
1836		.fc_dst_len	= prefixlen,
1837		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1838				  RTF_UP | RTF_PREF(pref),
1839		.fc_nlinfo.pid = 0,
1840		.fc_nlinfo.nlh = NULL,
1841		.fc_nlinfo.nl_net = net,
1842	};
1843
1844	ipv6_addr_copy(&cfg.fc_dst, prefix);
1845	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1846
1847	/* We should treat it as a default route if prefix length is 0. */
1848	if (!prefixlen)
1849		cfg.fc_flags |= RTF_DEFAULT;
1850
1851	ip6_route_add(&cfg);
1852
1853	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1854}
1855#endif
1856
1857struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1858{
1859	struct rt6_info *rt;
1860	struct fib6_table *table;
1861
1862	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1863	if (table == NULL)
1864		return NULL;
1865
1866	write_lock_bh(&table->tb6_lock);
1867	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1868		if (dev == rt->rt6i_dev &&
1869		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1870		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1871			break;
1872	}
1873	if (rt)
1874		dst_hold(&rt->dst);
1875	write_unlock_bh(&table->tb6_lock);
1876	return rt;
1877}
1878
1879struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1880				     struct net_device *dev,
1881				     unsigned int pref)
1882{
1883	struct fib6_config cfg = {
1884		.fc_table	= RT6_TABLE_DFLT,
1885		.fc_metric	= IP6_RT_PRIO_USER,
1886		.fc_ifindex	= dev->ifindex,
1887		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1888				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1889		.fc_nlinfo.pid = 0,
1890		.fc_nlinfo.nlh = NULL,
1891		.fc_nlinfo.nl_net = dev_net(dev),
1892	};
1893
1894	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1895
1896	ip6_route_add(&cfg);
1897
1898	return rt6_get_dflt_router(gwaddr, dev);
1899}
1900
1901void rt6_purge_dflt_routers(struct net *net)
1902{
1903	struct rt6_info *rt;
1904	struct fib6_table *table;
1905
1906	/* NOTE: Keep consistent with rt6_get_dflt_router */
1907	table = fib6_get_table(net, RT6_TABLE_DFLT);
1908	if (table == NULL)
1909		return;
1910
1911restart:
1912	read_lock_bh(&table->tb6_lock);
1913	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1914		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
 
1915			dst_hold(&rt->dst);
1916			read_unlock_bh(&table->tb6_lock);
1917			ip6_del_rt(rt);
1918			goto restart;
1919		}
1920	}
1921	read_unlock_bh(&table->tb6_lock);
1922}
1923
1924static void rtmsg_to_fib6_config(struct net *net,
1925				 struct in6_rtmsg *rtmsg,
1926				 struct fib6_config *cfg)
1927{
1928	memset(cfg, 0, sizeof(*cfg));
1929
1930	cfg->fc_table = RT6_TABLE_MAIN;
1931	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1932	cfg->fc_metric = rtmsg->rtmsg_metric;
1933	cfg->fc_expires = rtmsg->rtmsg_info;
1934	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1935	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1936	cfg->fc_flags = rtmsg->rtmsg_flags;
1937
1938	cfg->fc_nlinfo.nl_net = net;
1939
1940	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1941	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1942	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1943}
1944
1945int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1946{
1947	struct fib6_config cfg;
1948	struct in6_rtmsg rtmsg;
1949	int err;
1950
1951	switch(cmd) {
1952	case SIOCADDRT:		/* Add a route */
1953	case SIOCDELRT:		/* Delete a route */
1954		if (!capable(CAP_NET_ADMIN))
1955			return -EPERM;
1956		err = copy_from_user(&rtmsg, arg,
1957				     sizeof(struct in6_rtmsg));
1958		if (err)
1959			return -EFAULT;
1960
1961		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1962
1963		rtnl_lock();
1964		switch (cmd) {
1965		case SIOCADDRT:
1966			err = ip6_route_add(&cfg);
1967			break;
1968		case SIOCDELRT:
1969			err = ip6_route_del(&cfg);
1970			break;
1971		default:
1972			err = -EINVAL;
1973		}
1974		rtnl_unlock();
1975
1976		return err;
1977	}
1978
1979	return -EINVAL;
1980}
1981
1982/*
1983 *	Drop the packet on the floor
1984 */
1985
1986static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1987{
1988	int type;
1989	struct dst_entry *dst = skb_dst(skb);
1990	switch (ipstats_mib_noroutes) {
1991	case IPSTATS_MIB_INNOROUTES:
1992		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1993		if (type == IPV6_ADDR_ANY) {
1994			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1995				      IPSTATS_MIB_INADDRERRORS);
1996			break;
1997		}
1998		/* FALLTHROUGH */
1999	case IPSTATS_MIB_OUTNOROUTES:
2000		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2001			      ipstats_mib_noroutes);
2002		break;
2003	}
2004	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2005	kfree_skb(skb);
2006	return 0;
2007}
2008
2009static int ip6_pkt_discard(struct sk_buff *skb)
2010{
2011	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2012}
2013
2014static int ip6_pkt_discard_out(struct sk_buff *skb)
2015{
2016	skb->dev = skb_dst(skb)->dev;
2017	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2018}
2019
2020#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2021
2022static int ip6_pkt_prohibit(struct sk_buff *skb)
2023{
2024	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2025}
2026
2027static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2028{
2029	skb->dev = skb_dst(skb)->dev;
2030	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2031}
2032
2033#endif
2034
2035/*
2036 *	Allocate a dst for local (unicast / anycast) address.
2037 */
2038
2039struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2040				    const struct in6_addr *addr,
2041				    int anycast)
2042{
2043	struct net *net = dev_net(idev->dev);
2044	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2045					    net->loopback_dev, 0);
2046	struct neighbour *neigh;
2047
2048	if (rt == NULL) {
2049		if (net_ratelimit())
2050			pr_warning("IPv6:  Maximum number of routes reached,"
2051				   " consider increasing route/max_size.\n");
2052		return ERR_PTR(-ENOMEM);
2053	}
2054
2055	in6_dev_hold(idev);
2056
2057	rt->dst.flags |= DST_HOST;
2058	rt->dst.input = ip6_input;
2059	rt->dst.output = ip6_output;
2060	rt->rt6i_idev = idev;
2061	rt->dst.obsolete = -1;
2062
2063	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2064	if (anycast)
2065		rt->rt6i_flags |= RTF_ANYCAST;
2066	else
2067		rt->rt6i_flags |= RTF_LOCAL;
2068	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2069	if (IS_ERR(neigh)) {
2070		dst_free(&rt->dst);
2071
2072		return ERR_CAST(neigh);
2073	}
2074	dst_set_neighbour(&rt->dst, neigh);
2075
2076	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2077	rt->rt6i_dst.plen = 128;
2078	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2079
2080	atomic_set(&rt->dst.__refcnt, 1);
2081
2082	return rt;
2083}
2084
2085int ip6_route_get_saddr(struct net *net,
2086			struct rt6_info *rt,
2087			const struct in6_addr *daddr,
2088			unsigned int prefs,
2089			struct in6_addr *saddr)
2090{
2091	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2092	int err = 0;
2093	if (rt->rt6i_prefsrc.plen)
2094		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2095	else
2096		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2097					 daddr, prefs, saddr);
2098	return err;
2099}
2100
2101/* remove deleted ip from prefsrc entries */
2102struct arg_dev_net_ip {
2103	struct net_device *dev;
2104	struct net *net;
2105	struct in6_addr *addr;
2106};
2107
2108static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2109{
2110	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2111	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2112	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2113
2114	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2115	    rt != net->ipv6.ip6_null_entry &&
2116	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2117		/* remove prefsrc entry */
2118		rt->rt6i_prefsrc.plen = 0;
2119	}
2120	return 0;
2121}
2122
2123void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2124{
2125	struct net *net = dev_net(ifp->idev->dev);
2126	struct arg_dev_net_ip adni = {
2127		.dev = ifp->idev->dev,
2128		.net = net,
2129		.addr = &ifp->addr,
2130	};
2131	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2132}
2133
2134struct arg_dev_net {
2135	struct net_device *dev;
2136	struct net *net;
2137};
2138
2139static int fib6_ifdown(struct rt6_info *rt, void *arg)
2140{
2141	const struct arg_dev_net *adn = arg;
2142	const struct net_device *dev = adn->dev;
2143
2144	if ((rt->rt6i_dev == dev || dev == NULL) &&
2145	    rt != adn->net->ipv6.ip6_null_entry) {
2146		RT6_TRACE("deleted by ifdown %p\n", rt);
2147		return -1;
2148	}
2149	return 0;
2150}
2151
2152void rt6_ifdown(struct net *net, struct net_device *dev)
2153{
2154	struct arg_dev_net adn = {
2155		.dev = dev,
2156		.net = net,
2157	};
2158
2159	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2160	icmp6_clean_all(fib6_ifdown, &adn);
2161}
2162
2163struct rt6_mtu_change_arg
2164{
2165	struct net_device *dev;
2166	unsigned mtu;
2167};
2168
2169static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2170{
2171	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2172	struct inet6_dev *idev;
2173
2174	/* In IPv6 pmtu discovery is not optional,
2175	   so that RTAX_MTU lock cannot disable it.
2176	   We still use this lock to block changes
2177	   caused by addrconf/ndisc.
2178	*/
2179
2180	idev = __in6_dev_get(arg->dev);
2181	if (idev == NULL)
2182		return 0;
2183
2184	/* For administrative MTU increase, there is no way to discover
2185	   IPv6 PMTU increase, so PMTU increase should be updated here.
2186	   Since RFC 1981 doesn't include administrative MTU increase
2187	   update PMTU increase is a MUST. (i.e. jumbo frame)
2188	 */
2189	/*
2190	   If new MTU is less than route PMTU, this new MTU will be the
2191	   lowest MTU in the path, update the route PMTU to reflect PMTU
2192	   decreases; if new MTU is greater than route PMTU, and the
2193	   old MTU is the lowest MTU in the path, update the route PMTU
2194	   to reflect the increase. In this case if the other nodes' MTU
2195	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2196	   PMTU discouvery.
2197	 */
2198	if (rt->rt6i_dev == arg->dev &&
2199	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2200	    (dst_mtu(&rt->dst) >= arg->mtu ||
2201	     (dst_mtu(&rt->dst) < arg->mtu &&
2202	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2203		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2204	}
2205	return 0;
2206}
2207
2208void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2209{
2210	struct rt6_mtu_change_arg arg = {
2211		.dev = dev,
2212		.mtu = mtu,
2213	};
2214
2215	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2216}
2217
2218static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2219	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2220	[RTA_OIF]               = { .type = NLA_U32 },
2221	[RTA_IIF]		= { .type = NLA_U32 },
2222	[RTA_PRIORITY]          = { .type = NLA_U32 },
2223	[RTA_METRICS]           = { .type = NLA_NESTED },
 
2224};
2225
2226static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2227			      struct fib6_config *cfg)
2228{
2229	struct rtmsg *rtm;
2230	struct nlattr *tb[RTA_MAX+1];
2231	int err;
2232
2233	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2234	if (err < 0)
2235		goto errout;
2236
2237	err = -EINVAL;
2238	rtm = nlmsg_data(nlh);
2239	memset(cfg, 0, sizeof(*cfg));
2240
2241	cfg->fc_table = rtm->rtm_table;
2242	cfg->fc_dst_len = rtm->rtm_dst_len;
2243	cfg->fc_src_len = rtm->rtm_src_len;
2244	cfg->fc_flags = RTF_UP;
2245	cfg->fc_protocol = rtm->rtm_protocol;
 
2246
2247	if (rtm->rtm_type == RTN_UNREACHABLE)
 
 
 
2248		cfg->fc_flags |= RTF_REJECT;
2249
2250	if (rtm->rtm_type == RTN_LOCAL)
2251		cfg->fc_flags |= RTF_LOCAL;
2252
2253	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2254	cfg->fc_nlinfo.nlh = nlh;
2255	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2256
2257	if (tb[RTA_GATEWAY]) {
2258		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2259		cfg->fc_flags |= RTF_GATEWAY;
2260	}
2261
2262	if (tb[RTA_DST]) {
2263		int plen = (rtm->rtm_dst_len + 7) >> 3;
2264
2265		if (nla_len(tb[RTA_DST]) < plen)
2266			goto errout;
2267
2268		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2269	}
2270
2271	if (tb[RTA_SRC]) {
2272		int plen = (rtm->rtm_src_len + 7) >> 3;
2273
2274		if (nla_len(tb[RTA_SRC]) < plen)
2275			goto errout;
2276
2277		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2278	}
2279
2280	if (tb[RTA_PREFSRC])
2281		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2282
2283	if (tb[RTA_OIF])
2284		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2285
2286	if (tb[RTA_PRIORITY])
2287		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2288
2289	if (tb[RTA_METRICS]) {
2290		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2291		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2292	}
2293
2294	if (tb[RTA_TABLE])
2295		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2296
 
 
 
 
 
2297	err = 0;
2298errout:
2299	return err;
2300}
2301
2302static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2303{
2304	struct fib6_config cfg;
2305	int err;
2306
2307	err = rtm_to_fib6_config(skb, nlh, &cfg);
2308	if (err < 0)
2309		return err;
2310
2311	return ip6_route_del(&cfg);
 
 
 
2312}
2313
2314static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2315{
2316	struct fib6_config cfg;
2317	int err;
2318
2319	err = rtm_to_fib6_config(skb, nlh, &cfg);
2320	if (err < 0)
2321		return err;
2322
2323	return ip6_route_add(&cfg);
 
 
 
2324}
2325
2326static inline size_t rt6_nlmsg_size(void)
2327{
2328	return NLMSG_ALIGN(sizeof(struct rtmsg))
2329	       + nla_total_size(16) /* RTA_SRC */
2330	       + nla_total_size(16) /* RTA_DST */
2331	       + nla_total_size(16) /* RTA_GATEWAY */
2332	       + nla_total_size(16) /* RTA_PREFSRC */
2333	       + nla_total_size(4) /* RTA_TABLE */
2334	       + nla_total_size(4) /* RTA_IIF */
2335	       + nla_total_size(4) /* RTA_OIF */
2336	       + nla_total_size(4) /* RTA_PRIORITY */
2337	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2338	       + nla_total_size(sizeof(struct rta_cacheinfo));
2339}
2340
2341static int rt6_fill_node(struct net *net,
2342			 struct sk_buff *skb, struct rt6_info *rt,
2343			 struct in6_addr *dst, struct in6_addr *src,
2344			 int iif, int type, u32 pid, u32 seq,
2345			 int prefix, int nowait, unsigned int flags)
2346{
2347	struct rtmsg *rtm;
2348	struct nlmsghdr *nlh;
2349	long expires;
2350	u32 table;
2351	struct neighbour *n;
2352
2353	if (prefix) {	/* user wants prefix routes only */
2354		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2355			/* success since this is not a prefix route */
2356			return 1;
2357		}
2358	}
2359
2360	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2361	if (nlh == NULL)
2362		return -EMSGSIZE;
2363
2364	rtm = nlmsg_data(nlh);
2365	rtm->rtm_family = AF_INET6;
2366	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2367	rtm->rtm_src_len = rt->rt6i_src.plen;
2368	rtm->rtm_tos = 0;
2369	if (rt->rt6i_table)
2370		table = rt->rt6i_table->tb6_id;
2371	else
2372		table = RT6_TABLE_UNSPEC;
2373	rtm->rtm_table = table;
2374	NLA_PUT_U32(skb, RTA_TABLE, table);
2375	if (rt->rt6i_flags&RTF_REJECT)
2376		rtm->rtm_type = RTN_UNREACHABLE;
2377	else if (rt->rt6i_flags&RTF_LOCAL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2378		rtm->rtm_type = RTN_LOCAL;
2379	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2380		rtm->rtm_type = RTN_LOCAL;
2381	else
2382		rtm->rtm_type = RTN_UNICAST;
2383	rtm->rtm_flags = 0;
2384	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2385	rtm->rtm_protocol = rt->rt6i_protocol;
2386	if (rt->rt6i_flags&RTF_DYNAMIC)
2387		rtm->rtm_protocol = RTPROT_REDIRECT;
2388	else if (rt->rt6i_flags & RTF_ADDRCONF)
2389		rtm->rtm_protocol = RTPROT_KERNEL;
2390	else if (rt->rt6i_flags&RTF_DEFAULT)
2391		rtm->rtm_protocol = RTPROT_RA;
 
 
2392
2393	if (rt->rt6i_flags&RTF_CACHE)
2394		rtm->rtm_flags |= RTM_F_CLONED;
2395
2396	if (dst) {
2397		NLA_PUT(skb, RTA_DST, 16, dst);
 
2398		rtm->rtm_dst_len = 128;
2399	} else if (rtm->rtm_dst_len)
2400		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
 
2401#ifdef CONFIG_IPV6_SUBTREES
2402	if (src) {
2403		NLA_PUT(skb, RTA_SRC, 16, src);
 
2404		rtm->rtm_src_len = 128;
2405	} else if (rtm->rtm_src_len)
2406		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
 
2407#endif
2408	if (iif) {
2409#ifdef CONFIG_IPV6_MROUTE
2410		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2411			int err = ip6mr_get_route(net, skb, rtm, nowait);
2412			if (err <= 0) {
2413				if (!nowait) {
2414					if (err == 0)
2415						return 0;
2416					goto nla_put_failure;
2417				} else {
2418					if (err == -EMSGSIZE)
2419						goto nla_put_failure;
2420				}
2421			}
2422		} else
2423#endif
2424			NLA_PUT_U32(skb, RTA_IIF, iif);
 
2425	} else if (dst) {
2426		struct in6_addr saddr_buf;
2427		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2428			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 
2429	}
2430
2431	if (rt->rt6i_prefsrc.plen) {
2432		struct in6_addr saddr_buf;
2433		ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2434		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 
2435	}
2436
2437	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2438		goto nla_put_failure;
2439
2440	rcu_read_lock();
2441	n = dst_get_neighbour(&rt->dst);
2442	if (n)
2443		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2444	rcu_read_unlock();
2445
2446	if (rt->dst.dev)
2447		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2448
2449	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
 
 
 
 
2450
2451	if (!(rt->rt6i_flags & RTF_EXPIRES))
2452		expires = 0;
2453	else if (rt->rt6i_expires - jiffies < INT_MAX)
2454		expires = rt->rt6i_expires - jiffies;
2455	else
2456		expires = INT_MAX;
2457
2458	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2459			       expires, rt->dst.error) < 0)
2460		goto nla_put_failure;
2461
2462	return nlmsg_end(skb, nlh);
2463
2464nla_put_failure:
2465	nlmsg_cancel(skb, nlh);
2466	return -EMSGSIZE;
2467}
2468
2469int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2470{
2471	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2472	int prefix;
2473
2474	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2475		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2476		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2477	} else
2478		prefix = 0;
2479
2480	return rt6_fill_node(arg->net,
2481		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2482		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2483		     prefix, 0, NLM_F_MULTI);
2484}
2485
2486static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2487{
2488	struct net *net = sock_net(in_skb->sk);
2489	struct nlattr *tb[RTA_MAX+1];
2490	struct rt6_info *rt;
2491	struct sk_buff *skb;
2492	struct rtmsg *rtm;
2493	struct flowi6 fl6;
2494	int err, iif = 0;
2495
2496	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2497	if (err < 0)
2498		goto errout;
2499
2500	err = -EINVAL;
2501	memset(&fl6, 0, sizeof(fl6));
2502
2503	if (tb[RTA_SRC]) {
2504		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2505			goto errout;
2506
2507		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2508	}
2509
2510	if (tb[RTA_DST]) {
2511		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2512			goto errout;
2513
2514		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2515	}
2516
2517	if (tb[RTA_IIF])
2518		iif = nla_get_u32(tb[RTA_IIF]);
2519
2520	if (tb[RTA_OIF])
2521		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
 
 
 
2522
2523	if (iif) {
2524		struct net_device *dev;
 
 
2525		dev = __dev_get_by_index(net, iif);
2526		if (!dev) {
2527			err = -ENODEV;
2528			goto errout;
2529		}
 
 
 
 
 
 
 
 
 
 
 
 
2530	}
2531
2532	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2533	if (skb == NULL) {
 
2534		err = -ENOBUFS;
2535		goto errout;
2536	}
2537
2538	/* Reserve room for dummy headers, this skb can pass
2539	   through good chunk of routing engine.
2540	 */
2541	skb_reset_mac_header(skb);
2542	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2543
2544	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2545	skb_dst_set(skb, &rt->dst);
2546
2547	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2548			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2549			    nlh->nlmsg_seq, 0, 0, 0);
2550	if (err < 0) {
2551		kfree_skb(skb);
2552		goto errout;
2553	}
2554
2555	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2556errout:
2557	return err;
2558}
2559
2560void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2561{
2562	struct sk_buff *skb;
2563	struct net *net = info->nl_net;
2564	u32 seq;
2565	int err;
2566
2567	err = -ENOBUFS;
2568	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2569
2570	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2571	if (skb == NULL)
2572		goto errout;
2573
2574	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2575				event, info->pid, seq, 0, 0, 0);
2576	if (err < 0) {
2577		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2578		WARN_ON(err == -EMSGSIZE);
2579		kfree_skb(skb);
2580		goto errout;
2581	}
2582	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2583		    info->nlh, gfp_any());
2584	return;
2585errout:
2586	if (err < 0)
2587		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2588}
2589
2590static int ip6_route_dev_notify(struct notifier_block *this,
2591				unsigned long event, void *data)
2592{
2593	struct net_device *dev = (struct net_device *)data;
2594	struct net *net = dev_net(dev);
2595
2596	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2597		net->ipv6.ip6_null_entry->dst.dev = dev;
2598		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2599#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2600		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2601		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2602		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2603		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2604#endif
2605	}
2606
2607	return NOTIFY_OK;
2608}
2609
2610/*
2611 *	/proc
2612 */
2613
2614#ifdef CONFIG_PROC_FS
2615
2616struct rt6_proc_arg
2617{
2618	char *buffer;
2619	int offset;
2620	int length;
2621	int skip;
2622	int len;
2623};
2624
2625static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2626{
2627	struct seq_file *m = p_arg;
2628	struct neighbour *n;
2629
2630	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2631
2632#ifdef CONFIG_IPV6_SUBTREES
2633	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2634#else
2635	seq_puts(m, "00000000000000000000000000000000 00 ");
2636#endif
2637	rcu_read_lock();
2638	n = dst_get_neighbour(&rt->dst);
2639	if (n) {
2640		seq_printf(m, "%pi6", n->primary_key);
2641	} else {
2642		seq_puts(m, "00000000000000000000000000000000");
2643	}
2644	rcu_read_unlock();
2645	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2646		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2647		   rt->dst.__use, rt->rt6i_flags,
2648		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2649	return 0;
2650}
2651
2652static int ipv6_route_show(struct seq_file *m, void *v)
2653{
2654	struct net *net = (struct net *)m->private;
2655	fib6_clean_all(net, rt6_info_route, 0, m);
2656	return 0;
2657}
2658
2659static int ipv6_route_open(struct inode *inode, struct file *file)
2660{
2661	return single_open_net(inode, file, ipv6_route_show);
2662}
2663
2664static const struct file_operations ipv6_route_proc_fops = {
2665	.owner		= THIS_MODULE,
2666	.open		= ipv6_route_open,
2667	.read		= seq_read,
2668	.llseek		= seq_lseek,
2669	.release	= single_release_net,
2670};
2671
2672static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2673{
2674	struct net *net = (struct net *)seq->private;
2675	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2676		   net->ipv6.rt6_stats->fib_nodes,
2677		   net->ipv6.rt6_stats->fib_route_nodes,
2678		   net->ipv6.rt6_stats->fib_rt_alloc,
2679		   net->ipv6.rt6_stats->fib_rt_entries,
2680		   net->ipv6.rt6_stats->fib_rt_cache,
2681		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2682		   net->ipv6.rt6_stats->fib_discarded_routes);
2683
2684	return 0;
2685}
2686
2687static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2688{
2689	return single_open_net(inode, file, rt6_stats_seq_show);
2690}
2691
2692static const struct file_operations rt6_stats_seq_fops = {
2693	.owner	 = THIS_MODULE,
2694	.open	 = rt6_stats_seq_open,
2695	.read	 = seq_read,
2696	.llseek	 = seq_lseek,
2697	.release = single_release_net,
2698};
2699#endif	/* CONFIG_PROC_FS */
2700
2701#ifdef CONFIG_SYSCTL
2702
2703static
2704int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2705			      void __user *buffer, size_t *lenp, loff_t *ppos)
2706{
2707	struct net *net;
2708	int delay;
2709	if (!write)
2710		return -EINVAL;
2711
2712	net = (struct net *)ctl->extra1;
2713	delay = net->ipv6.sysctl.flush_delay;
2714	proc_dointvec(ctl, write, buffer, lenp, ppos);
2715	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2716	return 0;
2717}
2718
2719ctl_table ipv6_route_table_template[] = {
2720	{
2721		.procname	=	"flush",
2722		.data		=	&init_net.ipv6.sysctl.flush_delay,
2723		.maxlen		=	sizeof(int),
2724		.mode		=	0200,
2725		.proc_handler	=	ipv6_sysctl_rtcache_flush
2726	},
2727	{
2728		.procname	=	"gc_thresh",
2729		.data		=	&ip6_dst_ops_template.gc_thresh,
2730		.maxlen		=	sizeof(int),
2731		.mode		=	0644,
2732		.proc_handler	=	proc_dointvec,
2733	},
2734	{
2735		.procname	=	"max_size",
2736		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2737		.maxlen		=	sizeof(int),
2738		.mode		=	0644,
2739		.proc_handler	=	proc_dointvec,
2740	},
2741	{
2742		.procname	=	"gc_min_interval",
2743		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2744		.maxlen		=	sizeof(int),
2745		.mode		=	0644,
2746		.proc_handler	=	proc_dointvec_jiffies,
2747	},
2748	{
2749		.procname	=	"gc_timeout",
2750		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2751		.maxlen		=	sizeof(int),
2752		.mode		=	0644,
2753		.proc_handler	=	proc_dointvec_jiffies,
2754	},
2755	{
2756		.procname	=	"gc_interval",
2757		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2758		.maxlen		=	sizeof(int),
2759		.mode		=	0644,
2760		.proc_handler	=	proc_dointvec_jiffies,
2761	},
2762	{
2763		.procname	=	"gc_elasticity",
2764		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2765		.maxlen		=	sizeof(int),
2766		.mode		=	0644,
2767		.proc_handler	=	proc_dointvec,
2768	},
2769	{
2770		.procname	=	"mtu_expires",
2771		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2772		.maxlen		=	sizeof(int),
2773		.mode		=	0644,
2774		.proc_handler	=	proc_dointvec_jiffies,
2775	},
2776	{
2777		.procname	=	"min_adv_mss",
2778		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2779		.maxlen		=	sizeof(int),
2780		.mode		=	0644,
2781		.proc_handler	=	proc_dointvec,
2782	},
2783	{
2784		.procname	=	"gc_min_interval_ms",
2785		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2786		.maxlen		=	sizeof(int),
2787		.mode		=	0644,
2788		.proc_handler	=	proc_dointvec_ms_jiffies,
2789	},
2790	{ }
2791};
2792
2793struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2794{
2795	struct ctl_table *table;
2796
2797	table = kmemdup(ipv6_route_table_template,
2798			sizeof(ipv6_route_table_template),
2799			GFP_KERNEL);
2800
2801	if (table) {
2802		table[0].data = &net->ipv6.sysctl.flush_delay;
2803		table[0].extra1 = net;
2804		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2805		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2806		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2807		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2808		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2809		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2810		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2811		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2812		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
 
 
 
 
2813	}
2814
2815	return table;
2816}
2817#endif
2818
2819static int __net_init ip6_route_net_init(struct net *net)
2820{
2821	int ret = -ENOMEM;
2822
2823	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2824	       sizeof(net->ipv6.ip6_dst_ops));
2825
2826	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2827		goto out_ip6_dst_ops;
2828
2829	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2830					   sizeof(*net->ipv6.ip6_null_entry),
2831					   GFP_KERNEL);
2832	if (!net->ipv6.ip6_null_entry)
2833		goto out_ip6_dst_entries;
2834	net->ipv6.ip6_null_entry->dst.path =
2835		(struct dst_entry *)net->ipv6.ip6_null_entry;
2836	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2837	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2838			 ip6_template_metrics, true);
2839
2840#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2841	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2842					       sizeof(*net->ipv6.ip6_prohibit_entry),
2843					       GFP_KERNEL);
2844	if (!net->ipv6.ip6_prohibit_entry)
2845		goto out_ip6_null_entry;
2846	net->ipv6.ip6_prohibit_entry->dst.path =
2847		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2848	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2849	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2850			 ip6_template_metrics, true);
2851
2852	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2853					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2854					       GFP_KERNEL);
2855	if (!net->ipv6.ip6_blk_hole_entry)
2856		goto out_ip6_prohibit_entry;
2857	net->ipv6.ip6_blk_hole_entry->dst.path =
2858		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2859	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2860	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2861			 ip6_template_metrics, true);
2862#endif
2863
2864	net->ipv6.sysctl.flush_delay = 0;
2865	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2866	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2867	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2868	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2869	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2870	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2871	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2872
2873#ifdef CONFIG_PROC_FS
2874	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2875	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2876#endif
2877	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2878
2879	ret = 0;
2880out:
2881	return ret;
2882
2883#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2884out_ip6_prohibit_entry:
2885	kfree(net->ipv6.ip6_prohibit_entry);
2886out_ip6_null_entry:
2887	kfree(net->ipv6.ip6_null_entry);
2888#endif
2889out_ip6_dst_entries:
2890	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2891out_ip6_dst_ops:
2892	goto out;
2893}
2894
2895static void __net_exit ip6_route_net_exit(struct net *net)
2896{
2897#ifdef CONFIG_PROC_FS
2898	proc_net_remove(net, "ipv6_route");
2899	proc_net_remove(net, "rt6_stats");
2900#endif
2901	kfree(net->ipv6.ip6_null_entry);
2902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2903	kfree(net->ipv6.ip6_prohibit_entry);
2904	kfree(net->ipv6.ip6_blk_hole_entry);
2905#endif
2906	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2907}
2908
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2909static struct pernet_operations ip6_route_net_ops = {
2910	.init = ip6_route_net_init,
2911	.exit = ip6_route_net_exit,
2912};
2913
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2914static struct notifier_block ip6_route_dev_notifier = {
2915	.notifier_call = ip6_route_dev_notify,
2916	.priority = 0,
2917};
2918
2919int __init ip6_route_init(void)
2920{
2921	int ret;
2922
2923	ret = -ENOMEM;
2924	ip6_dst_ops_template.kmem_cachep =
2925		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2926				  SLAB_HWCACHE_ALIGN, NULL);
2927	if (!ip6_dst_ops_template.kmem_cachep)
2928		goto out;
2929
2930	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2931	if (ret)
2932		goto out_kmem_cache;
2933
2934	ret = register_pernet_subsys(&ip6_route_net_ops);
2935	if (ret)
2936		goto out_dst_entries;
2937
 
 
 
 
2938	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2939
2940	/* Registering of the loopback is done before this portion of code,
2941	 * the loopback reference in rt6_info will not be taken, do it
2942	 * manually for init_net */
2943	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2944	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2945  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2946	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2947	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2948	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2949	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2950  #endif
2951	ret = fib6_init();
2952	if (ret)
2953		goto out_register_subsys;
2954
2955	ret = xfrm6_init();
2956	if (ret)
2957		goto out_fib6_init;
2958
2959	ret = fib6_rules_init();
2960	if (ret)
2961		goto xfrm6_init;
2962
 
 
 
 
2963	ret = -ENOBUFS;
2964	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2965	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2966	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2967		goto fib6_rules_init;
2968
2969	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2970	if (ret)
2971		goto fib6_rules_init;
2972
2973out:
2974	return ret;
2975
 
 
2976fib6_rules_init:
2977	fib6_rules_cleanup();
2978xfrm6_init:
2979	xfrm6_fini();
2980out_fib6_init:
2981	fib6_gc_cleanup();
2982out_register_subsys:
2983	unregister_pernet_subsys(&ip6_route_net_ops);
 
 
2984out_dst_entries:
2985	dst_entries_destroy(&ip6_dst_blackhole_ops);
2986out_kmem_cache:
2987	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2988	goto out;
2989}
2990
2991void ip6_route_cleanup(void)
2992{
2993	unregister_netdevice_notifier(&ip6_route_dev_notifier);
 
2994	fib6_rules_cleanup();
2995	xfrm6_fini();
2996	fib6_gc_cleanup();
 
2997	unregister_pernet_subsys(&ip6_route_net_ops);
2998	dst_entries_destroy(&ip6_dst_blackhole_ops);
2999	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3000}
v3.15
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/xfrm.h>
  58#include <net/netevent.h>
  59#include <net/netlink.h>
  60#include <net/nexthop.h>
  61
  62#include <asm/uaccess.h>
  63
  64#ifdef CONFIG_SYSCTL
  65#include <linux/sysctl.h>
  66#endif
  67
  68enum rt6_nud_state {
  69	RT6_NUD_FAIL_HARD = -3,
  70	RT6_NUD_FAIL_PROBE = -2,
  71	RT6_NUD_FAIL_DO_RR = -1,
  72	RT6_NUD_SUCCEED = 1
  73};
 
 
 
 
  74
  75static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
  76				    const struct in6_addr *dest);
  77static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  78static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  79static unsigned int	 ip6_mtu(const struct dst_entry *dst);
  80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  81static void		ip6_dst_destroy(struct dst_entry *);
  82static void		ip6_dst_ifdown(struct dst_entry *,
  83				       struct net_device *dev, int how);
  84static int		 ip6_dst_gc(struct dst_ops *ops);
  85
  86static int		ip6_pkt_discard(struct sk_buff *skb);
  87static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
  88static int		ip6_pkt_prohibit(struct sk_buff *skb);
  89static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
  90static void		ip6_link_failure(struct sk_buff *skb);
  91static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  92					   struct sk_buff *skb, u32 mtu);
  93static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  94					struct sk_buff *skb);
  95static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
  96
  97#ifdef CONFIG_IPV6_ROUTE_INFO
  98static struct rt6_info *rt6_add_route_info(struct net *net,
  99					   const struct in6_addr *prefix, int prefixlen,
 100					   const struct in6_addr *gwaddr, int ifindex,
 101					   unsigned int pref);
 102static struct rt6_info *rt6_get_route_info(struct net *net,
 103					   const struct in6_addr *prefix, int prefixlen,
 104					   const struct in6_addr *gwaddr, int ifindex);
 105#endif
 106
 107static void rt6_bind_peer(struct rt6_info *rt, int create)
 108{
 109	struct inet_peer_base *base;
 110	struct inet_peer *peer;
 111
 112	base = inetpeer_base_ptr(rt->_rt6i_peer);
 113	if (!base)
 114		return;
 115
 116	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
 117	if (peer) {
 118		if (!rt6_set_peer(rt, peer))
 119			inet_putpeer(peer);
 120	}
 121}
 122
 123static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
 124{
 125	if (rt6_has_peer(rt))
 126		return rt6_peer_ptr(rt);
 127
 128	rt6_bind_peer(rt, create);
 129	return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
 130}
 131
 132static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
 133{
 134	return __rt6_get_peer(rt, 1);
 135}
 136
 137static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 138{
 139	struct rt6_info *rt = (struct rt6_info *) dst;
 140	struct inet_peer *peer;
 141	u32 *p = NULL;
 142
 143	if (!(rt->dst.flags & DST_HOST))
 144		return NULL;
 145
 146	peer = rt6_get_peer_create(rt);
 
 
 
 147	if (peer) {
 148		u32 *old_p = __DST_METRICS_PTR(old);
 149		unsigned long prev, new;
 150
 151		p = peer->metrics;
 152		if (inet_metrics_new(peer) ||
 153		    (old & DST_METRICS_FORCE_OVERWRITE))
 154			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 155
 156		new = (unsigned long) p;
 157		prev = cmpxchg(&dst->_metrics, old, new);
 158
 159		if (prev != old) {
 160			p = __DST_METRICS_PTR(prev);
 161			if (prev & DST_METRICS_READ_ONLY)
 162				p = NULL;
 163		}
 164	}
 165	return p;
 166}
 167
 168static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 169					     struct sk_buff *skb,
 170					     const void *daddr)
 171{
 172	struct in6_addr *p = &rt->rt6i_gateway;
 173
 174	if (!ipv6_addr_any(p))
 175		return (const void *) p;
 176	else if (skb)
 177		return &ipv6_hdr(skb)->daddr;
 178	return daddr;
 179}
 180
 181static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 182					  struct sk_buff *skb,
 183					  const void *daddr)
 184{
 185	struct rt6_info *rt = (struct rt6_info *) dst;
 186	struct neighbour *n;
 187
 188	daddr = choose_neigh_daddr(rt, skb, daddr);
 189	n = __ipv6_neigh_lookup(dst->dev, daddr);
 190	if (n)
 191		return n;
 192	return neigh_create(&nd_tbl, daddr, dst->dev);
 193}
 194
 195static struct dst_ops ip6_dst_ops_template = {
 196	.family			=	AF_INET6,
 197	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 198	.gc			=	ip6_dst_gc,
 199	.gc_thresh		=	1024,
 200	.check			=	ip6_dst_check,
 201	.default_advmss		=	ip6_default_advmss,
 202	.mtu			=	ip6_mtu,
 203	.cow_metrics		=	ipv6_cow_metrics,
 204	.destroy		=	ip6_dst_destroy,
 205	.ifdown			=	ip6_dst_ifdown,
 206	.negative_advice	=	ip6_negative_advice,
 207	.link_failure		=	ip6_link_failure,
 208	.update_pmtu		=	ip6_rt_update_pmtu,
 209	.redirect		=	rt6_do_redirect,
 210	.local_out		=	__ip6_local_out,
 211	.neigh_lookup		=	ip6_neigh_lookup,
 212};
 213
 214static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 215{
 216	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 217
 218	return mtu ? : dst->dev->mtu;
 219}
 220
 221static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 222					 struct sk_buff *skb, u32 mtu)
 223{
 224}
 225
 226static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 227				      struct sk_buff *skb)
 228{
 229}
 230
 231static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 232					 unsigned long old)
 233{
 234	return NULL;
 235}
 236
 237static struct dst_ops ip6_dst_blackhole_ops = {
 238	.family			=	AF_INET6,
 239	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 240	.destroy		=	ip6_dst_destroy,
 241	.check			=	ip6_dst_check,
 242	.mtu			=	ip6_blackhole_mtu,
 243	.default_advmss		=	ip6_default_advmss,
 244	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 245	.redirect		=	ip6_rt_blackhole_redirect,
 246	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
 247	.neigh_lookup		=	ip6_neigh_lookup,
 248};
 249
 250static const u32 ip6_template_metrics[RTAX_MAX] = {
 251	[RTAX_HOPLIMIT - 1] = 0,
 252};
 253
 254static const struct rt6_info ip6_null_entry_template = {
 255	.dst = {
 256		.__refcnt	= ATOMIC_INIT(1),
 257		.__use		= 1,
 258		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 259		.error		= -ENETUNREACH,
 260		.input		= ip6_pkt_discard,
 261		.output		= ip6_pkt_discard_out,
 262	},
 263	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 264	.rt6i_protocol  = RTPROT_KERNEL,
 265	.rt6i_metric	= ~(u32) 0,
 266	.rt6i_ref	= ATOMIC_INIT(1),
 267};
 268
 269#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 270
 271static const struct rt6_info ip6_prohibit_entry_template = {
 
 
 
 272	.dst = {
 273		.__refcnt	= ATOMIC_INIT(1),
 274		.__use		= 1,
 275		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 276		.error		= -EACCES,
 277		.input		= ip6_pkt_prohibit,
 278		.output		= ip6_pkt_prohibit_out,
 279	},
 280	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 281	.rt6i_protocol  = RTPROT_KERNEL,
 282	.rt6i_metric	= ~(u32) 0,
 283	.rt6i_ref	= ATOMIC_INIT(1),
 284};
 285
 286static const struct rt6_info ip6_blk_hole_entry_template = {
 287	.dst = {
 288		.__refcnt	= ATOMIC_INIT(1),
 289		.__use		= 1,
 290		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 291		.error		= -EINVAL,
 292		.input		= dst_discard,
 293		.output		= dst_discard_sk,
 294	},
 295	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 296	.rt6i_protocol  = RTPROT_KERNEL,
 297	.rt6i_metric	= ~(u32) 0,
 298	.rt6i_ref	= ATOMIC_INIT(1),
 299};
 300
 301#endif
 302
 303/* allocate dst with ip6_dst_ops */
 304static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 305					     struct net_device *dev,
 306					     int flags,
 307					     struct fib6_table *table)
 308{
 309	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 310					0, DST_OBSOLETE_FORCE_CHK, flags);
 311
 312	if (rt) {
 313		struct dst_entry *dst = &rt->dst;
 
 314
 315		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 316		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
 317		rt->rt6i_genid = rt_genid_ipv6(net);
 318		INIT_LIST_HEAD(&rt->rt6i_siblings);
 319	}
 320	return rt;
 321}
 322
 323static void ip6_dst_destroy(struct dst_entry *dst)
 324{
 325	struct rt6_info *rt = (struct rt6_info *)dst;
 326	struct inet6_dev *idev = rt->rt6i_idev;
 327	struct dst_entry *from = dst->from;
 328
 329	if (!(rt->dst.flags & DST_HOST))
 330		dst_destroy_metrics_generic(dst);
 331
 332	if (idev) {
 333		rt->rt6i_idev = NULL;
 334		in6_dev_put(idev);
 335	}
 
 
 
 
 
 336
 337	dst->from = NULL;
 338	dst_release(from);
 
 
 
 
 
 
 
 
 339
 340	if (rt6_has_peer(rt)) {
 341		struct inet_peer *peer = rt6_peer_ptr(rt);
 342		inet_putpeer(peer);
 343	}
 
 344}
 345
 346static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 347			   int how)
 348{
 349	struct rt6_info *rt = (struct rt6_info *)dst;
 350	struct inet6_dev *idev = rt->rt6i_idev;
 351	struct net_device *loopback_dev =
 352		dev_net(dev)->loopback_dev;
 353
 354	if (dev != loopback_dev) {
 355		if (idev && idev->dev == dev) {
 356			struct inet6_dev *loopback_idev =
 357				in6_dev_get(loopback_dev);
 358			if (loopback_idev) {
 359				rt->rt6i_idev = loopback_idev;
 360				in6_dev_put(idev);
 361			}
 362		}
 363	}
 364}
 365
 366static bool rt6_check_expired(const struct rt6_info *rt)
 367{
 368	if (rt->rt6i_flags & RTF_EXPIRES) {
 369		if (time_after(jiffies, rt->dst.expires))
 370			return true;
 371	} else if (rt->dst.from) {
 372		return rt6_check_expired((struct rt6_info *) rt->dst.from);
 373	}
 374	return false;
 375}
 376
 377/* Multipath route selection:
 378 *   Hash based function using packet header and flowlabel.
 379 * Adapted from fib_info_hashfn()
 380 */
 381static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 382			       const struct flowi6 *fl6)
 383{
 384	unsigned int val = fl6->flowi6_proto;
 385
 386	val ^= ipv6_addr_hash(&fl6->daddr);
 387	val ^= ipv6_addr_hash(&fl6->saddr);
 388
 389	/* Work only if this not encapsulated */
 390	switch (fl6->flowi6_proto) {
 391	case IPPROTO_UDP:
 392	case IPPROTO_TCP:
 393	case IPPROTO_SCTP:
 394		val ^= (__force u16)fl6->fl6_sport;
 395		val ^= (__force u16)fl6->fl6_dport;
 396		break;
 397
 398	case IPPROTO_ICMPV6:
 399		val ^= (__force u16)fl6->fl6_icmp_type;
 400		val ^= (__force u16)fl6->fl6_icmp_code;
 401		break;
 402	}
 403	/* RFC6438 recommands to use flowlabel */
 404	val ^= (__force u32)fl6->flowlabel;
 405
 406	/* Perhaps, we need to tune, this function? */
 407	val = val ^ (val >> 7) ^ (val >> 12);
 408	return val % candidate_count;
 409}
 410
 411static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 412					     struct flowi6 *fl6, int oif,
 413					     int strict)
 414{
 415	struct rt6_info *sibling, *next_sibling;
 416	int route_choosen;
 417
 418	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
 419	/* Don't change the route, if route_choosen == 0
 420	 * (siblings does not include ourself)
 421	 */
 422	if (route_choosen)
 423		list_for_each_entry_safe(sibling, next_sibling,
 424				&match->rt6i_siblings, rt6i_siblings) {
 425			route_choosen--;
 426			if (route_choosen == 0) {
 427				if (rt6_score_route(sibling, oif, strict) < 0)
 428					break;
 429				match = sibling;
 430				break;
 431			}
 432		}
 433	return match;
 434}
 435
 436/*
 437 *	Route lookup. Any table->tb6_lock is implied.
 438 */
 439
 440static inline struct rt6_info *rt6_device_match(struct net *net,
 441						    struct rt6_info *rt,
 442						    const struct in6_addr *saddr,
 443						    int oif,
 444						    int flags)
 445{
 446	struct rt6_info *local = NULL;
 447	struct rt6_info *sprt;
 448
 449	if (!oif && ipv6_addr_any(saddr))
 450		goto out;
 451
 452	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 453		struct net_device *dev = sprt->dst.dev;
 454
 455		if (oif) {
 456			if (dev->ifindex == oif)
 457				return sprt;
 458			if (dev->flags & IFF_LOOPBACK) {
 459				if (!sprt->rt6i_idev ||
 460				    sprt->rt6i_idev->dev->ifindex != oif) {
 461					if (flags & RT6_LOOKUP_F_IFACE && oif)
 462						continue;
 463					if (local && (!oif ||
 464						      local->rt6i_idev->dev->ifindex == oif))
 465						continue;
 466				}
 467				local = sprt;
 468			}
 469		} else {
 470			if (ipv6_chk_addr(net, saddr, dev,
 471					  flags & RT6_LOOKUP_F_IFACE))
 472				return sprt;
 473		}
 474	}
 475
 476	if (oif) {
 477		if (local)
 478			return local;
 479
 480		if (flags & RT6_LOOKUP_F_IFACE)
 481			return net->ipv6.ip6_null_entry;
 482	}
 483out:
 484	return rt;
 485}
 486
 487#ifdef CONFIG_IPV6_ROUTER_PREF
 488struct __rt6_probe_work {
 489	struct work_struct work;
 490	struct in6_addr target;
 491	struct net_device *dev;
 492};
 493
 494static void rt6_probe_deferred(struct work_struct *w)
 495{
 496	struct in6_addr mcaddr;
 497	struct __rt6_probe_work *work =
 498		container_of(w, struct __rt6_probe_work, work);
 499
 500	addrconf_addr_solict_mult(&work->target, &mcaddr);
 501	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
 502	dev_put(work->dev);
 503	kfree(w);
 504}
 505
 506static void rt6_probe(struct rt6_info *rt)
 507{
 508	struct neighbour *neigh;
 509	/*
 510	 * Okay, this does not seem to be appropriate
 511	 * for now, however, we need to check if it
 512	 * is really so; aka Router Reachability Probing.
 513	 *
 514	 * Router Reachability Probe MUST be rate-limited
 515	 * to no more than one per minute.
 516	 */
 517	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 518		return;
 519	rcu_read_lock_bh();
 520	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 521	if (neigh) {
 522		write_lock(&neigh->lock);
 523		if (neigh->nud_state & NUD_VALID)
 524			goto out;
 525	}
 526
 527	if (!neigh ||
 528	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 529		struct __rt6_probe_work *work;
 
 530
 531		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 
 532
 533		if (neigh && work)
 534			__neigh_set_probe_once(neigh);
 535
 536		if (neigh)
 537			write_unlock(&neigh->lock);
 538
 539		if (work) {
 540			INIT_WORK(&work->work, rt6_probe_deferred);
 541			work->target = rt->rt6i_gateway;
 542			dev_hold(rt->dst.dev);
 543			work->dev = rt->dst.dev;
 544			schedule_work(&work->work);
 545		}
 546	} else {
 
 
 547out:
 548		write_unlock(&neigh->lock);
 549	}
 550	rcu_read_unlock_bh();
 551}
 552#else
 553static inline void rt6_probe(struct rt6_info *rt)
 554{
 555}
 556#endif
 557
 558/*
 559 * Default Router Selection (RFC 2461 6.3.6)
 560 */
 561static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 562{
 563	struct net_device *dev = rt->dst.dev;
 564	if (!oif || dev->ifindex == oif)
 565		return 2;
 566	if ((dev->flags & IFF_LOOPBACK) &&
 567	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 568		return 1;
 569	return 0;
 570}
 571
 572static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 573{
 574	struct neighbour *neigh;
 575	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 576
 
 
 577	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 578	    !(rt->rt6i_flags & RTF_GATEWAY))
 579		return RT6_NUD_SUCCEED;
 580
 581	rcu_read_lock_bh();
 582	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 583	if (neigh) {
 584		read_lock(&neigh->lock);
 585		if (neigh->nud_state & NUD_VALID)
 586			ret = RT6_NUD_SUCCEED;
 587#ifdef CONFIG_IPV6_ROUTER_PREF
 588		else if (!(neigh->nud_state & NUD_FAILED))
 589			ret = RT6_NUD_SUCCEED;
 
 590		else
 591			ret = RT6_NUD_FAIL_PROBE;
 592#endif
 593		read_unlock(&neigh->lock);
 594	} else {
 595		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 596		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 597	}
 598	rcu_read_unlock_bh();
 599
 600	return ret;
 601}
 602
 603static int rt6_score_route(struct rt6_info *rt, int oif,
 604			   int strict)
 605{
 606	int m;
 607
 608	m = rt6_check_dev(rt, oif);
 609	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 610		return RT6_NUD_FAIL_HARD;
 611#ifdef CONFIG_IPV6_ROUTER_PREF
 612	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 613#endif
 614	if (strict & RT6_LOOKUP_F_REACHABLE) {
 615		int n = rt6_check_neigh(rt);
 616		if (n < 0)
 617			return n;
 618	}
 619	return m;
 620}
 621
 622static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 623				   int *mpri, struct rt6_info *match,
 624				   bool *do_rr)
 625{
 626	int m;
 627	bool match_do_rr = false;
 628
 629	if (rt6_check_expired(rt))
 630		goto out;
 631
 632	m = rt6_score_route(rt, oif, strict);
 633	if (m == RT6_NUD_FAIL_DO_RR) {
 634		match_do_rr = true;
 635		m = 0; /* lowest valid score */
 636	} else if (m == RT6_NUD_FAIL_HARD) {
 637		goto out;
 638	}
 639
 640	if (strict & RT6_LOOKUP_F_REACHABLE)
 641		rt6_probe(rt);
 642
 643	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
 644	if (m > *mpri) {
 645		*do_rr = match_do_rr;
 
 646		*mpri = m;
 647		match = rt;
 
 
 648	}
 
 649out:
 650	return match;
 651}
 652
 653static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 654				     struct rt6_info *rr_head,
 655				     u32 metric, int oif, int strict,
 656				     bool *do_rr)
 657{
 658	struct rt6_info *rt, *match;
 659	int mpri = -1;
 660
 661	match = NULL;
 662	for (rt = rr_head; rt && rt->rt6i_metric == metric;
 663	     rt = rt->dst.rt6_next)
 664		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 665	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 666	     rt = rt->dst.rt6_next)
 667		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 668
 669	return match;
 670}
 671
 672static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 673{
 674	struct rt6_info *match, *rt0;
 675	struct net *net;
 676	bool do_rr = false;
 
 
 677
 678	rt0 = fn->rr_ptr;
 679	if (!rt0)
 680		fn->rr_ptr = rt0 = fn->leaf;
 681
 682	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
 683			     &do_rr);
 684
 685	if (do_rr) {
 
 686		struct rt6_info *next = rt0->dst.rt6_next;
 687
 688		/* no entries matched; do round-robin */
 689		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 690			next = fn->leaf;
 691
 692		if (next != rt0)
 693			fn->rr_ptr = next;
 694	}
 695
 696	net = dev_net(rt0->dst.dev);
 
 
 
 697	return match ? match : net->ipv6.ip6_null_entry;
 698}
 699
 700#ifdef CONFIG_IPV6_ROUTE_INFO
 701int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 702		  const struct in6_addr *gwaddr)
 703{
 704	struct net *net = dev_net(dev);
 705	struct route_info *rinfo = (struct route_info *) opt;
 706	struct in6_addr prefix_buf, *prefix;
 707	unsigned int pref;
 708	unsigned long lifetime;
 709	struct rt6_info *rt;
 710
 711	if (len < sizeof(struct route_info)) {
 712		return -EINVAL;
 713	}
 714
 715	/* Sanity check for prefix_len and length */
 716	if (rinfo->length > 3) {
 717		return -EINVAL;
 718	} else if (rinfo->prefix_len > 128) {
 719		return -EINVAL;
 720	} else if (rinfo->prefix_len > 64) {
 721		if (rinfo->length < 2) {
 722			return -EINVAL;
 723		}
 724	} else if (rinfo->prefix_len > 0) {
 725		if (rinfo->length < 1) {
 726			return -EINVAL;
 727		}
 728	}
 729
 730	pref = rinfo->route_pref;
 731	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 732		return -EINVAL;
 733
 734	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 735
 736	if (rinfo->length == 3)
 737		prefix = (struct in6_addr *)rinfo->prefix;
 738	else {
 739		/* this function is safe */
 740		ipv6_addr_prefix(&prefix_buf,
 741				 (struct in6_addr *)rinfo->prefix,
 742				 rinfo->prefix_len);
 743		prefix = &prefix_buf;
 744	}
 745
 746	if (rinfo->prefix_len == 0)
 747		rt = rt6_get_dflt_router(gwaddr, dev);
 748	else
 749		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 750					gwaddr, dev->ifindex);
 751
 752	if (rt && !lifetime) {
 753		ip6_del_rt(rt);
 754		rt = NULL;
 755	}
 756
 757	if (!rt && lifetime)
 758		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 759					pref);
 760	else if (rt)
 761		rt->rt6i_flags = RTF_ROUTEINFO |
 762				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 763
 764	if (rt) {
 765		if (!addrconf_finite_timeout(lifetime))
 766			rt6_clean_expires(rt);
 767		else
 768			rt6_set_expires(rt, jiffies + HZ * lifetime);
 769
 770		ip6_rt_put(rt);
 
 771	}
 772	return 0;
 773}
 774#endif
 775
 776#define BACKTRACK(__net, saddr)			\
 777do { \
 778	if (rt == __net->ipv6.ip6_null_entry) {	\
 779		struct fib6_node *pn; \
 780		while (1) { \
 781			if (fn->fn_flags & RTN_TL_ROOT) \
 782				goto out; \
 783			pn = fn->parent; \
 784			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 785				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 786			else \
 787				fn = pn; \
 788			if (fn->fn_flags & RTN_RTINFO) \
 789				goto restart; \
 790		} \
 791	} \
 792} while (0)
 793
 794static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 795					     struct fib6_table *table,
 796					     struct flowi6 *fl6, int flags)
 797{
 798	struct fib6_node *fn;
 799	struct rt6_info *rt;
 800
 801	read_lock_bh(&table->tb6_lock);
 802	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 803restart:
 804	rt = fn->leaf;
 805	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 806	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
 807		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
 808	BACKTRACK(net, &fl6->saddr);
 809out:
 810	dst_use(&rt->dst, jiffies);
 811	read_unlock_bh(&table->tb6_lock);
 812	return rt;
 813
 814}
 815
 816struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 817				    int flags)
 818{
 819	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 820}
 821EXPORT_SYMBOL_GPL(ip6_route_lookup);
 822
 823struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 824			    const struct in6_addr *saddr, int oif, int strict)
 825{
 826	struct flowi6 fl6 = {
 827		.flowi6_oif = oif,
 828		.daddr = *daddr,
 829	};
 830	struct dst_entry *dst;
 831	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 832
 833	if (saddr) {
 834		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 835		flags |= RT6_LOOKUP_F_HAS_SADDR;
 836	}
 837
 838	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 839	if (dst->error == 0)
 840		return (struct rt6_info *) dst;
 841
 842	dst_release(dst);
 843
 844	return NULL;
 845}
 846
 847EXPORT_SYMBOL(rt6_lookup);
 848
 849/* ip6_ins_rt is called with FREE table->tb6_lock.
 850   It takes new route entry, the addition fails by any reason the
 851   route is freed. In any case, if caller does not hold it, it may
 852   be destroyed.
 853 */
 854
 855static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 856			struct nlattr *mx, int mx_len)
 857{
 858	int err;
 859	struct fib6_table *table;
 860
 861	table = rt->rt6i_table;
 862	write_lock_bh(&table->tb6_lock);
 863	err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
 864	write_unlock_bh(&table->tb6_lock);
 865
 866	return err;
 867}
 868
 869int ip6_ins_rt(struct rt6_info *rt)
 870{
 871	struct nl_info info = {
 872		.nl_net = dev_net(rt->dst.dev),
 873	};
 874	return __ip6_ins_rt(rt, &info, NULL, 0);
 875}
 876
 877static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
 878				      const struct in6_addr *daddr,
 879				      const struct in6_addr *saddr)
 880{
 881	struct rt6_info *rt;
 882
 883	/*
 884	 *	Clone the route.
 885	 */
 886
 887	rt = ip6_rt_copy(ort, daddr);
 888
 889	if (rt) {
 890		if (ort->rt6i_dst.plen != 128 &&
 891		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 892			rt->rt6i_flags |= RTF_ANYCAST;
 
 
 
 
 
 
 893
 894		rt->rt6i_flags |= RTF_CACHE;
 895
 896#ifdef CONFIG_IPV6_SUBTREES
 897		if (rt->rt6i_src.plen && saddr) {
 898			rt->rt6i_src.addr = *saddr;
 899			rt->rt6i_src.plen = 128;
 900		}
 901#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 902	}
 903
 904	return rt;
 905}
 906
 907static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 908					const struct in6_addr *daddr)
 909{
 910	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 911
 912	if (rt)
 913		rt->rt6i_flags |= RTF_CACHE;
 
 
 914	return rt;
 915}
 916
 917static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 918				      struct flowi6 *fl6, int flags)
 919{
 920	struct fib6_node *fn;
 921	struct rt6_info *rt, *nrt;
 922	int strict = 0;
 923	int attempts = 3;
 924	int err;
 925	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 926
 927	strict |= flags & RT6_LOOKUP_F_IFACE;
 928
 929relookup:
 930	read_lock_bh(&table->tb6_lock);
 931
 932restart_2:
 933	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 934
 935restart:
 936	rt = rt6_select(fn, oif, strict | reachable);
 937	if (rt->rt6i_nsiblings)
 938		rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
 939	BACKTRACK(net, &fl6->saddr);
 940	if (rt == net->ipv6.ip6_null_entry ||
 941	    rt->rt6i_flags & RTF_CACHE)
 942		goto out;
 943
 944	dst_hold(&rt->dst);
 945	read_unlock_bh(&table->tb6_lock);
 946
 947	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
 948		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 949	else if (!(rt->dst.flags & DST_HOST))
 950		nrt = rt6_alloc_clone(rt, &fl6->daddr);
 951	else
 952		goto out2;
 953
 954	ip6_rt_put(rt);
 955	rt = nrt ? : net->ipv6.ip6_null_entry;
 956
 957	dst_hold(&rt->dst);
 958	if (nrt) {
 959		err = ip6_ins_rt(nrt);
 960		if (!err)
 961			goto out2;
 962	}
 963
 964	if (--attempts <= 0)
 965		goto out2;
 966
 967	/*
 968	 * Race condition! In the gap, when table->tb6_lock was
 969	 * released someone could insert this route.  Relookup.
 970	 */
 971	ip6_rt_put(rt);
 972	goto relookup;
 973
 974out:
 975	if (reachable) {
 976		reachable = 0;
 977		goto restart_2;
 978	}
 979	dst_hold(&rt->dst);
 980	read_unlock_bh(&table->tb6_lock);
 981out2:
 982	rt->dst.lastuse = jiffies;
 983	rt->dst.__use++;
 984
 985	return rt;
 986}
 987
 988static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 989					    struct flowi6 *fl6, int flags)
 990{
 991	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 992}
 993
 994static struct dst_entry *ip6_route_input_lookup(struct net *net,
 995						struct net_device *dev,
 996						struct flowi6 *fl6, int flags)
 997{
 998	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
 999		flags |= RT6_LOOKUP_F_IFACE;
1000
1001	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1002}
1003
1004void ip6_route_input(struct sk_buff *skb)
1005{
1006	const struct ipv6hdr *iph = ipv6_hdr(skb);
1007	struct net *net = dev_net(skb->dev);
1008	int flags = RT6_LOOKUP_F_HAS_SADDR;
1009	struct flowi6 fl6 = {
1010		.flowi6_iif = skb->dev->ifindex,
1011		.daddr = iph->daddr,
1012		.saddr = iph->saddr,
1013		.flowlabel = ip6_flowinfo(iph),
1014		.flowi6_mark = skb->mark,
1015		.flowi6_proto = iph->nexthdr,
1016	};
1017
1018	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 
 
 
1019}
1020
1021static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1022					     struct flowi6 *fl6, int flags)
1023{
1024	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1025}
1026
1027struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1028				    struct flowi6 *fl6)
1029{
1030	int flags = 0;
1031
1032	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1033
1034	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1035		flags |= RT6_LOOKUP_F_IFACE;
1036
1037	if (!ipv6_addr_any(&fl6->saddr))
1038		flags |= RT6_LOOKUP_F_HAS_SADDR;
1039	else if (sk)
1040		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1041
1042	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1043}
1044
1045EXPORT_SYMBOL(ip6_route_output);
1046
1047struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1048{
1049	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1050	struct dst_entry *new = NULL;
1051
1052	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1053	if (rt) {
 
 
1054		new = &rt->dst;
1055
1056		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1057		rt6_init_peer(rt, net->ipv6.peers);
1058
1059		new->__use = 1;
1060		new->input = dst_discard;
1061		new->output = dst_discard_sk;
1062
1063		if (dst_metrics_read_only(&ort->dst))
1064			new->_metrics = ort->dst._metrics;
1065		else
1066			dst_copy_metrics(new, &ort->dst);
1067		rt->rt6i_idev = ort->rt6i_idev;
1068		if (rt->rt6i_idev)
1069			in6_dev_hold(rt->rt6i_idev);
 
1070
1071		rt->rt6i_gateway = ort->rt6i_gateway;
1072		rt->rt6i_flags = ort->rt6i_flags;
1073		rt->rt6i_metric = 0;
1074
1075		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1076#ifdef CONFIG_IPV6_SUBTREES
1077		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1078#endif
1079
1080		dst_free(new);
1081	}
1082
1083	dst_release(dst_orig);
1084	return new ? new : ERR_PTR(-ENOMEM);
1085}
1086
1087/*
1088 *	Destination cache support functions
1089 */
1090
1091static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1092{
1093	struct rt6_info *rt;
1094
1095	rt = (struct rt6_info *) dst;
1096
1097	/* All IPV6 dsts are created with ->obsolete set to the value
1098	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1099	 * into this function always.
1100	 */
1101	if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
1102		return NULL;
1103
1104	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1105		return NULL;
1106
1107	if (rt6_check_expired(rt))
1108		return NULL;
1109
1110	return dst;
1111}
1112
1113static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1114{
1115	struct rt6_info *rt = (struct rt6_info *) dst;
1116
1117	if (rt) {
1118		if (rt->rt6i_flags & RTF_CACHE) {
1119			if (rt6_check_expired(rt)) {
1120				ip6_del_rt(rt);
1121				dst = NULL;
1122			}
1123		} else {
1124			dst_release(dst);
1125			dst = NULL;
1126		}
1127	}
1128	return dst;
1129}
1130
1131static void ip6_link_failure(struct sk_buff *skb)
1132{
1133	struct rt6_info *rt;
1134
1135	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1136
1137	rt = (struct rt6_info *) skb_dst(skb);
1138	if (rt) {
1139		if (rt->rt6i_flags & RTF_CACHE) {
1140			dst_hold(&rt->dst);
1141			if (ip6_del_rt(rt))
1142				dst_free(&rt->dst);
1143		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1144			rt->rt6i_node->fn_sernum = -1;
1145		}
1146	}
1147}
1148
1149static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1150			       struct sk_buff *skb, u32 mtu)
1151{
1152	struct rt6_info *rt6 = (struct rt6_info*)dst;
1153
1154	dst_confirm(dst);
1155	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1156		struct net *net = dev_net(dst->dev);
1157
1158		rt6->rt6i_flags |= RTF_MODIFIED;
1159		if (mtu < IPV6_MIN_MTU) {
1160			u32 features = dst_metric(dst, RTAX_FEATURES);
1161			mtu = IPV6_MIN_MTU;
1162			features |= RTAX_FEATURE_ALLFRAG;
1163			dst_metric_set(dst, RTAX_FEATURES, features);
1164		}
1165		dst_metric_set(dst, RTAX_MTU, mtu);
1166		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1167	}
1168}
1169
1170void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1171		     int oif, u32 mark)
1172{
1173	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1174	struct dst_entry *dst;
1175	struct flowi6 fl6;
1176
1177	memset(&fl6, 0, sizeof(fl6));
1178	fl6.flowi6_oif = oif;
1179	fl6.flowi6_mark = mark;
1180	fl6.daddr = iph->daddr;
1181	fl6.saddr = iph->saddr;
1182	fl6.flowlabel = ip6_flowinfo(iph);
1183
1184	dst = ip6_route_output(net, NULL, &fl6);
1185	if (!dst->error)
1186		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1187	dst_release(dst);
1188}
1189EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1190
1191void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1192{
1193	ip6_update_pmtu(skb, sock_net(sk), mtu,
1194			sk->sk_bound_dev_if, sk->sk_mark);
1195}
1196EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1197
1198/* Handle redirects */
1199struct ip6rd_flowi {
1200	struct flowi6 fl6;
1201	struct in6_addr gateway;
1202};
1203
1204static struct rt6_info *__ip6_route_redirect(struct net *net,
1205					     struct fib6_table *table,
1206					     struct flowi6 *fl6,
1207					     int flags)
1208{
1209	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1210	struct rt6_info *rt;
1211	struct fib6_node *fn;
1212
1213	/* Get the "current" route for this destination and
1214	 * check if the redirect has come from approriate router.
1215	 *
1216	 * RFC 4861 specifies that redirects should only be
1217	 * accepted if they come from the nexthop to the target.
1218	 * Due to the way the routes are chosen, this notion
1219	 * is a bit fuzzy and one might need to check all possible
1220	 * routes.
1221	 */
1222
1223	read_lock_bh(&table->tb6_lock);
1224	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1225restart:
1226	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1227		if (rt6_check_expired(rt))
1228			continue;
1229		if (rt->dst.error)
1230			break;
1231		if (!(rt->rt6i_flags & RTF_GATEWAY))
1232			continue;
1233		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1234			continue;
1235		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1236			continue;
1237		break;
1238	}
1239
1240	if (!rt)
1241		rt = net->ipv6.ip6_null_entry;
1242	else if (rt->dst.error) {
1243		rt = net->ipv6.ip6_null_entry;
1244		goto out;
1245	}
1246	BACKTRACK(net, &fl6->saddr);
1247out:
1248	dst_hold(&rt->dst);
1249
1250	read_unlock_bh(&table->tb6_lock);
1251
1252	return rt;
1253};
1254
1255static struct dst_entry *ip6_route_redirect(struct net *net,
1256					const struct flowi6 *fl6,
1257					const struct in6_addr *gateway)
1258{
1259	int flags = RT6_LOOKUP_F_HAS_SADDR;
1260	struct ip6rd_flowi rdfl;
1261
1262	rdfl.fl6 = *fl6;
1263	rdfl.gateway = *gateway;
1264
1265	return fib6_rule_lookup(net, &rdfl.fl6,
1266				flags, __ip6_route_redirect);
1267}
1268
1269void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1270{
1271	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1272	struct dst_entry *dst;
1273	struct flowi6 fl6;
1274
1275	memset(&fl6, 0, sizeof(fl6));
1276	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1277	fl6.flowi6_oif = oif;
1278	fl6.flowi6_mark = mark;
1279	fl6.daddr = iph->daddr;
1280	fl6.saddr = iph->saddr;
1281	fl6.flowlabel = ip6_flowinfo(iph);
1282
1283	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1284	rt6_do_redirect(dst, NULL, skb);
1285	dst_release(dst);
1286}
1287EXPORT_SYMBOL_GPL(ip6_redirect);
1288
1289void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1290			    u32 mark)
1291{
1292	const struct ipv6hdr *iph = ipv6_hdr(skb);
1293	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1294	struct dst_entry *dst;
1295	struct flowi6 fl6;
1296
1297	memset(&fl6, 0, sizeof(fl6));
1298	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1299	fl6.flowi6_oif = oif;
1300	fl6.flowi6_mark = mark;
1301	fl6.daddr = msg->dest;
1302	fl6.saddr = iph->daddr;
1303
1304	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1305	rt6_do_redirect(dst, NULL, skb);
1306	dst_release(dst);
1307}
1308
1309void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1310{
1311	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1312}
1313EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1314
1315static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1316{
1317	struct net_device *dev = dst->dev;
1318	unsigned int mtu = dst_mtu(dst);
1319	struct net *net = dev_net(dev);
1320
1321	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1322
1323	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1324		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1325
1326	/*
1327	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1328	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1329	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1330	 * rely only on pmtu discovery"
1331	 */
1332	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1333		mtu = IPV6_MAXPLEN;
1334	return mtu;
1335}
1336
1337static unsigned int ip6_mtu(const struct dst_entry *dst)
1338{
 
1339	struct inet6_dev *idev;
1340	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1341
1342	if (mtu)
1343		goto out;
1344
1345	mtu = IPV6_MIN_MTU;
1346
1347	rcu_read_lock();
1348	idev = __in6_dev_get(dst->dev);
1349	if (idev)
1350		mtu = idev->cnf.mtu6;
1351	rcu_read_unlock();
1352
1353out:
1354	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1355}
1356
1357static struct dst_entry *icmp6_dst_gc_list;
1358static DEFINE_SPINLOCK(icmp6_dst_lock);
1359
1360struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1361				  struct flowi6 *fl6)
 
1362{
1363	struct dst_entry *dst;
1364	struct rt6_info *rt;
1365	struct inet6_dev *idev = in6_dev_get(dev);
1366	struct net *net = dev_net(dev);
1367
1368	if (unlikely(!idev))
1369		return ERR_PTR(-ENODEV);
1370
1371	rt = ip6_dst_alloc(net, dev, 0, NULL);
1372	if (unlikely(!rt)) {
1373		in6_dev_put(idev);
1374		dst = ERR_PTR(-ENOMEM);
1375		goto out;
1376	}
1377
 
 
 
 
 
 
 
 
1378	rt->dst.flags |= DST_HOST;
1379	rt->dst.output  = ip6_output;
 
1380	atomic_set(&rt->dst.__refcnt, 1);
1381	rt->rt6i_gateway  = fl6->daddr;
1382	rt->rt6i_dst.addr = fl6->daddr;
 
1383	rt->rt6i_dst.plen = 128;
1384	rt->rt6i_idev     = idev;
1385	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1386
1387	spin_lock_bh(&icmp6_dst_lock);
1388	rt->dst.next = icmp6_dst_gc_list;
1389	icmp6_dst_gc_list = &rt->dst;
1390	spin_unlock_bh(&icmp6_dst_lock);
1391
1392	fib6_force_start_gc(net);
1393
1394	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1395
1396out:
1397	return dst;
1398}
1399
1400int icmp6_dst_gc(void)
1401{
1402	struct dst_entry *dst, **pprev;
1403	int more = 0;
1404
1405	spin_lock_bh(&icmp6_dst_lock);
1406	pprev = &icmp6_dst_gc_list;
1407
1408	while ((dst = *pprev) != NULL) {
1409		if (!atomic_read(&dst->__refcnt)) {
1410			*pprev = dst->next;
1411			dst_free(dst);
1412		} else {
1413			pprev = &dst->next;
1414			++more;
1415		}
1416	}
1417
1418	spin_unlock_bh(&icmp6_dst_lock);
1419
1420	return more;
1421}
1422
1423static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1424			    void *arg)
1425{
1426	struct dst_entry *dst, **pprev;
1427
1428	spin_lock_bh(&icmp6_dst_lock);
1429	pprev = &icmp6_dst_gc_list;
1430	while ((dst = *pprev) != NULL) {
1431		struct rt6_info *rt = (struct rt6_info *) dst;
1432		if (func(rt, arg)) {
1433			*pprev = dst->next;
1434			dst_free(dst);
1435		} else {
1436			pprev = &dst->next;
1437		}
1438	}
1439	spin_unlock_bh(&icmp6_dst_lock);
1440}
1441
1442static int ip6_dst_gc(struct dst_ops *ops)
1443{
 
1444	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1445	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1446	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1447	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1448	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1449	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1450	int entries;
1451
1452	entries = dst_entries_get_fast(ops);
1453	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1454	    entries <= rt_max_size)
1455		goto out;
1456
1457	net->ipv6.ip6_rt_gc_expire++;
1458	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
 
1459	entries = dst_entries_get_slow(ops);
1460	if (entries < ops->gc_thresh)
1461		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1462out:
1463	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1464	return entries > rt_max_size;
1465}
1466
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1467/*
1468 *
1469 */
1470
1471int ip6_route_add(struct fib6_config *cfg)
1472{
1473	int err;
1474	struct net *net = cfg->fc_nlinfo.nl_net;
1475	struct rt6_info *rt = NULL;
1476	struct net_device *dev = NULL;
1477	struct inet6_dev *idev = NULL;
1478	struct fib6_table *table;
1479	int addr_type;
1480
1481	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1482		return -EINVAL;
1483#ifndef CONFIG_IPV6_SUBTREES
1484	if (cfg->fc_src_len)
1485		return -EINVAL;
1486#endif
1487	if (cfg->fc_ifindex) {
1488		err = -ENODEV;
1489		dev = dev_get_by_index(net, cfg->fc_ifindex);
1490		if (!dev)
1491			goto out;
1492		idev = in6_dev_get(dev);
1493		if (!idev)
1494			goto out;
1495	}
1496
1497	if (cfg->fc_metric == 0)
1498		cfg->fc_metric = IP6_RT_PRIO_USER;
1499
1500	err = -ENOBUFS;
1501	if (cfg->fc_nlinfo.nlh &&
1502	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1503		table = fib6_get_table(net, cfg->fc_table);
1504		if (!table) {
1505			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1506			table = fib6_new_table(net, cfg->fc_table);
1507		}
1508	} else {
1509		table = fib6_new_table(net, cfg->fc_table);
1510	}
1511
1512	if (!table)
1513		goto out;
1514
1515	rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1516
1517	if (!rt) {
1518		err = -ENOMEM;
1519		goto out;
1520	}
1521
1522	if (cfg->fc_flags & RTF_EXPIRES)
1523		rt6_set_expires(rt, jiffies +
1524				clock_t_to_jiffies(cfg->fc_expires));
1525	else
1526		rt6_clean_expires(rt);
1527
1528	if (cfg->fc_protocol == RTPROT_UNSPEC)
1529		cfg->fc_protocol = RTPROT_BOOT;
1530	rt->rt6i_protocol = cfg->fc_protocol;
1531
1532	addr_type = ipv6_addr_type(&cfg->fc_dst);
1533
1534	if (addr_type & IPV6_ADDR_MULTICAST)
1535		rt->dst.input = ip6_mc_input;
1536	else if (cfg->fc_flags & RTF_LOCAL)
1537		rt->dst.input = ip6_input;
1538	else
1539		rt->dst.input = ip6_forward;
1540
1541	rt->dst.output = ip6_output;
1542
1543	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1544	rt->rt6i_dst.plen = cfg->fc_dst_len;
1545	if (rt->rt6i_dst.plen == 128) {
1546		rt->dst.flags |= DST_HOST;
1547		dst_metrics_set_force_overwrite(&rt->dst);
 
 
 
 
 
 
 
1548	}
1549
1550#ifdef CONFIG_IPV6_SUBTREES
1551	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1552	rt->rt6i_src.plen = cfg->fc_src_len;
1553#endif
1554
1555	rt->rt6i_metric = cfg->fc_metric;
1556
1557	/* We cannot add true routes via loopback here,
1558	   they would result in kernel looping; promote them to reject routes
1559	 */
1560	if ((cfg->fc_flags & RTF_REJECT) ||
1561	    (dev && (dev->flags & IFF_LOOPBACK) &&
1562	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1563	     !(cfg->fc_flags & RTF_LOCAL))) {
1564		/* hold loopback dev/idev if we haven't done so. */
1565		if (dev != net->loopback_dev) {
1566			if (dev) {
1567				dev_put(dev);
1568				in6_dev_put(idev);
1569			}
1570			dev = net->loopback_dev;
1571			dev_hold(dev);
1572			idev = in6_dev_get(dev);
1573			if (!idev) {
1574				err = -ENODEV;
1575				goto out;
1576			}
1577		}
 
 
 
1578		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1579		switch (cfg->fc_type) {
1580		case RTN_BLACKHOLE:
1581			rt->dst.error = -EINVAL;
1582			rt->dst.output = dst_discard_sk;
1583			rt->dst.input = dst_discard;
1584			break;
1585		case RTN_PROHIBIT:
1586			rt->dst.error = -EACCES;
1587			rt->dst.output = ip6_pkt_prohibit_out;
1588			rt->dst.input = ip6_pkt_prohibit;
1589			break;
1590		case RTN_THROW:
1591		default:
1592			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1593					: -ENETUNREACH;
1594			rt->dst.output = ip6_pkt_discard_out;
1595			rt->dst.input = ip6_pkt_discard;
1596			break;
1597		}
1598		goto install_route;
1599	}
1600
1601	if (cfg->fc_flags & RTF_GATEWAY) {
1602		const struct in6_addr *gw_addr;
1603		int gwa_type;
1604
1605		gw_addr = &cfg->fc_gateway;
1606		rt->rt6i_gateway = *gw_addr;
1607		gwa_type = ipv6_addr_type(gw_addr);
1608
1609		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1610			struct rt6_info *grt;
1611
1612			/* IPv6 strictly inhibits using not link-local
1613			   addresses as nexthop address.
1614			   Otherwise, router will not able to send redirects.
1615			   It is very good, but in some (rare!) circumstances
1616			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1617			   some exceptions. --ANK
1618			 */
1619			err = -EINVAL;
1620			if (!(gwa_type & IPV6_ADDR_UNICAST))
1621				goto out;
1622
1623			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1624
1625			err = -EHOSTUNREACH;
1626			if (!grt)
1627				goto out;
1628			if (dev) {
1629				if (dev != grt->dst.dev) {
1630					ip6_rt_put(grt);
1631					goto out;
1632				}
1633			} else {
1634				dev = grt->dst.dev;
1635				idev = grt->rt6i_idev;
1636				dev_hold(dev);
1637				in6_dev_hold(grt->rt6i_idev);
1638			}
1639			if (!(grt->rt6i_flags & RTF_GATEWAY))
1640				err = 0;
1641			ip6_rt_put(grt);
1642
1643			if (err)
1644				goto out;
1645		}
1646		err = -EINVAL;
1647		if (!dev || (dev->flags & IFF_LOOPBACK))
1648			goto out;
1649	}
1650
1651	err = -ENODEV;
1652	if (!dev)
1653		goto out;
1654
1655	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1656		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1657			err = -EINVAL;
1658			goto out;
1659		}
1660		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1661		rt->rt6i_prefsrc.plen = 128;
1662	} else
1663		rt->rt6i_prefsrc.plen = 0;
1664
 
 
 
 
 
 
 
 
 
1665	rt->rt6i_flags = cfg->fc_flags;
1666
1667install_route:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1668	rt->dst.dev = dev;
1669	rt->rt6i_idev = idev;
1670	rt->rt6i_table = table;
1671
1672	cfg->fc_nlinfo.nl_net = dev_net(dev);
1673
1674	return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
1675
1676out:
1677	if (dev)
1678		dev_put(dev);
1679	if (idev)
1680		in6_dev_put(idev);
1681	if (rt)
1682		dst_free(&rt->dst);
1683	return err;
1684}
1685
1686static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1687{
1688	int err;
1689	struct fib6_table *table;
1690	struct net *net = dev_net(rt->dst.dev);
1691
1692	if (rt == net->ipv6.ip6_null_entry) {
1693		err = -ENOENT;
1694		goto out;
1695	}
1696
1697	table = rt->rt6i_table;
1698	write_lock_bh(&table->tb6_lock);
 
1699	err = fib6_del(rt, info);
 
 
1700	write_unlock_bh(&table->tb6_lock);
1701
1702out:
1703	ip6_rt_put(rt);
1704	return err;
1705}
1706
1707int ip6_del_rt(struct rt6_info *rt)
1708{
1709	struct nl_info info = {
1710		.nl_net = dev_net(rt->dst.dev),
1711	};
1712	return __ip6_del_rt(rt, &info);
1713}
1714
1715static int ip6_route_del(struct fib6_config *cfg)
1716{
1717	struct fib6_table *table;
1718	struct fib6_node *fn;
1719	struct rt6_info *rt;
1720	int err = -ESRCH;
1721
1722	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1723	if (!table)
1724		return err;
1725
1726	read_lock_bh(&table->tb6_lock);
1727
1728	fn = fib6_locate(&table->tb6_root,
1729			 &cfg->fc_dst, cfg->fc_dst_len,
1730			 &cfg->fc_src, cfg->fc_src_len);
1731
1732	if (fn) {
1733		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1734			if (cfg->fc_ifindex &&
1735			    (!rt->dst.dev ||
1736			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1737				continue;
1738			if (cfg->fc_flags & RTF_GATEWAY &&
1739			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1740				continue;
1741			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1742				continue;
1743			dst_hold(&rt->dst);
1744			read_unlock_bh(&table->tb6_lock);
1745
1746			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1747		}
1748	}
1749	read_unlock_bh(&table->tb6_lock);
1750
1751	return err;
1752}
1753
1754static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
 
 
 
 
 
 
 
 
 
 
 
1755{
1756	struct net *net = dev_net(skb->dev);
1757	struct netevent_redirect netevent;
1758	struct rt6_info *rt, *nrt = NULL;
1759	struct ndisc_options ndopts;
1760	struct inet6_dev *in6_dev;
1761	struct neighbour *neigh;
1762	struct rd_msg *msg;
1763	int optlen, on_link;
1764	u8 *lladdr;
1765
1766	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1767	optlen -= sizeof(*msg);
 
 
 
 
 
 
 
 
1768
1769	if (optlen < 0) {
1770		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1771		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1772	}
1773
1774	msg = (struct rd_msg *)icmp6_hdr(skb);
 
 
 
 
 
 
1775
1776	if (ipv6_addr_is_multicast(&msg->dest)) {
1777		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1778		return;
1779	}
 
 
 
 
 
 
 
 
 
 
 
 
 
1780
1781	on_link = 0;
1782	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1783		on_link = 1;
1784	} else if (ipv6_addr_type(&msg->target) !=
1785		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1786		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1787		return;
1788	}
1789
1790	in6_dev = __in6_dev_get(skb->dev);
1791	if (!in6_dev)
1792		return;
1793	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1794		return;
1795
1796	/* RFC2461 8.1:
1797	 *	The IP source address of the Redirect MUST be the same as the current
1798	 *	first-hop router for the specified ICMP Destination Address.
1799	 */
1800
1801	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1802		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1803		return;
1804	}
 
 
 
1805
1806	lladdr = NULL;
1807	if (ndopts.nd_opts_tgt_lladdr) {
1808		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1809					     skb->dev);
1810		if (!lladdr) {
1811			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1812			return;
1813		}
1814	}
1815
1816	rt = (struct rt6_info *) dst;
1817	if (rt == net->ipv6.ip6_null_entry) {
1818		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1819		return;
 
 
1820	}
1821
1822	/* Redirect received -> path was valid.
1823	 * Look, redirects are sent only in response to data packets,
1824	 * so that this nexthop apparently is reachable. --ANK
1825	 */
1826	dst_confirm(&rt->dst);
1827
1828	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1829	if (!neigh)
1830		return;
1831
1832	/*
1833	 *	We have finally decided to accept it.
1834	 */
1835
1836	neigh_update(neigh, lladdr, NUD_STALE,
1837		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1838		     NEIGH_UPDATE_F_OVERRIDE|
1839		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1840				     NEIGH_UPDATE_F_ISROUTER))
1841		     );
1842
1843	nrt = ip6_rt_copy(rt, &msg->dest);
1844	if (!nrt)
 
 
 
 
 
 
 
 
 
 
 
1845		goto out;
1846
1847	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1848	if (on_link)
1849		nrt->rt6i_flags &= ~RTF_GATEWAY;
1850
1851	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 
1852
1853	if (ip6_ins_rt(nrt))
1854		goto out;
1855
1856	netevent.old = &rt->dst;
1857	netevent.new = &nrt->dst;
1858	netevent.daddr = &msg->dest;
1859	netevent.neigh = neigh;
1860	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1861
1862	if (rt->rt6i_flags & RTF_CACHE) {
1863		rt = (struct rt6_info *) dst_clone(&rt->dst);
1864		ip6_del_rt(rt);
 
1865	}
1866
1867out:
1868	neigh_release(neigh);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1869}
1870
1871/*
1872 *	Misc support functions
1873 */
1874
1875static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1876				    const struct in6_addr *dest)
1877{
1878	struct net *net = dev_net(ort->dst.dev);
1879	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1880					    ort->rt6i_table);
1881
1882	if (rt) {
1883		rt->dst.input = ort->dst.input;
1884		rt->dst.output = ort->dst.output;
1885		rt->dst.flags |= DST_HOST;
1886
1887		rt->rt6i_dst.addr = *dest;
1888		rt->rt6i_dst.plen = 128;
1889		dst_copy_metrics(&rt->dst, &ort->dst);
1890		rt->dst.error = ort->dst.error;
1891		rt->rt6i_idev = ort->rt6i_idev;
1892		if (rt->rt6i_idev)
1893			in6_dev_hold(rt->rt6i_idev);
1894		rt->dst.lastuse = jiffies;
 
1895
1896		if (ort->rt6i_flags & RTF_GATEWAY)
1897			rt->rt6i_gateway = ort->rt6i_gateway;
1898		else
1899			rt->rt6i_gateway = *dest;
1900		rt->rt6i_flags = ort->rt6i_flags;
1901		rt6_set_from(rt, ort);
1902		rt->rt6i_metric = 0;
1903
1904#ifdef CONFIG_IPV6_SUBTREES
1905		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1906#endif
1907		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1908		rt->rt6i_table = ort->rt6i_table;
1909	}
1910	return rt;
1911}
1912
1913#ifdef CONFIG_IPV6_ROUTE_INFO
1914static struct rt6_info *rt6_get_route_info(struct net *net,
1915					   const struct in6_addr *prefix, int prefixlen,
1916					   const struct in6_addr *gwaddr, int ifindex)
1917{
1918	struct fib6_node *fn;
1919	struct rt6_info *rt = NULL;
1920	struct fib6_table *table;
1921
1922	table = fib6_get_table(net, RT6_TABLE_INFO);
1923	if (!table)
1924		return NULL;
1925
1926	read_lock_bh(&table->tb6_lock);
1927	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1928	if (!fn)
1929		goto out;
1930
1931	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1932		if (rt->dst.dev->ifindex != ifindex)
1933			continue;
1934		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1935			continue;
1936		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1937			continue;
1938		dst_hold(&rt->dst);
1939		break;
1940	}
1941out:
1942	read_unlock_bh(&table->tb6_lock);
1943	return rt;
1944}
1945
1946static struct rt6_info *rt6_add_route_info(struct net *net,
1947					   const struct in6_addr *prefix, int prefixlen,
1948					   const struct in6_addr *gwaddr, int ifindex,
1949					   unsigned int pref)
1950{
1951	struct fib6_config cfg = {
1952		.fc_table	= RT6_TABLE_INFO,
1953		.fc_metric	= IP6_RT_PRIO_USER,
1954		.fc_ifindex	= ifindex,
1955		.fc_dst_len	= prefixlen,
1956		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1957				  RTF_UP | RTF_PREF(pref),
1958		.fc_nlinfo.portid = 0,
1959		.fc_nlinfo.nlh = NULL,
1960		.fc_nlinfo.nl_net = net,
1961	};
1962
1963	cfg.fc_dst = *prefix;
1964	cfg.fc_gateway = *gwaddr;
1965
1966	/* We should treat it as a default route if prefix length is 0. */
1967	if (!prefixlen)
1968		cfg.fc_flags |= RTF_DEFAULT;
1969
1970	ip6_route_add(&cfg);
1971
1972	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1973}
1974#endif
1975
1976struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1977{
1978	struct rt6_info *rt;
1979	struct fib6_table *table;
1980
1981	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1982	if (!table)
1983		return NULL;
1984
1985	read_lock_bh(&table->tb6_lock);
1986	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1987		if (dev == rt->dst.dev &&
1988		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1989		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1990			break;
1991	}
1992	if (rt)
1993		dst_hold(&rt->dst);
1994	read_unlock_bh(&table->tb6_lock);
1995	return rt;
1996}
1997
1998struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1999				     struct net_device *dev,
2000				     unsigned int pref)
2001{
2002	struct fib6_config cfg = {
2003		.fc_table	= RT6_TABLE_DFLT,
2004		.fc_metric	= IP6_RT_PRIO_USER,
2005		.fc_ifindex	= dev->ifindex,
2006		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2007				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2008		.fc_nlinfo.portid = 0,
2009		.fc_nlinfo.nlh = NULL,
2010		.fc_nlinfo.nl_net = dev_net(dev),
2011	};
2012
2013	cfg.fc_gateway = *gwaddr;
2014
2015	ip6_route_add(&cfg);
2016
2017	return rt6_get_dflt_router(gwaddr, dev);
2018}
2019
2020void rt6_purge_dflt_routers(struct net *net)
2021{
2022	struct rt6_info *rt;
2023	struct fib6_table *table;
2024
2025	/* NOTE: Keep consistent with rt6_get_dflt_router */
2026	table = fib6_get_table(net, RT6_TABLE_DFLT);
2027	if (!table)
2028		return;
2029
2030restart:
2031	read_lock_bh(&table->tb6_lock);
2032	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2033		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2034		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2035			dst_hold(&rt->dst);
2036			read_unlock_bh(&table->tb6_lock);
2037			ip6_del_rt(rt);
2038			goto restart;
2039		}
2040	}
2041	read_unlock_bh(&table->tb6_lock);
2042}
2043
2044static void rtmsg_to_fib6_config(struct net *net,
2045				 struct in6_rtmsg *rtmsg,
2046				 struct fib6_config *cfg)
2047{
2048	memset(cfg, 0, sizeof(*cfg));
2049
2050	cfg->fc_table = RT6_TABLE_MAIN;
2051	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2052	cfg->fc_metric = rtmsg->rtmsg_metric;
2053	cfg->fc_expires = rtmsg->rtmsg_info;
2054	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2055	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2056	cfg->fc_flags = rtmsg->rtmsg_flags;
2057
2058	cfg->fc_nlinfo.nl_net = net;
2059
2060	cfg->fc_dst = rtmsg->rtmsg_dst;
2061	cfg->fc_src = rtmsg->rtmsg_src;
2062	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2063}
2064
2065int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2066{
2067	struct fib6_config cfg;
2068	struct in6_rtmsg rtmsg;
2069	int err;
2070
2071	switch(cmd) {
2072	case SIOCADDRT:		/* Add a route */
2073	case SIOCDELRT:		/* Delete a route */
2074		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2075			return -EPERM;
2076		err = copy_from_user(&rtmsg, arg,
2077				     sizeof(struct in6_rtmsg));
2078		if (err)
2079			return -EFAULT;
2080
2081		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2082
2083		rtnl_lock();
2084		switch (cmd) {
2085		case SIOCADDRT:
2086			err = ip6_route_add(&cfg);
2087			break;
2088		case SIOCDELRT:
2089			err = ip6_route_del(&cfg);
2090			break;
2091		default:
2092			err = -EINVAL;
2093		}
2094		rtnl_unlock();
2095
2096		return err;
2097	}
2098
2099	return -EINVAL;
2100}
2101
2102/*
2103 *	Drop the packet on the floor
2104 */
2105
2106static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2107{
2108	int type;
2109	struct dst_entry *dst = skb_dst(skb);
2110	switch (ipstats_mib_noroutes) {
2111	case IPSTATS_MIB_INNOROUTES:
2112		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2113		if (type == IPV6_ADDR_ANY) {
2114			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2115				      IPSTATS_MIB_INADDRERRORS);
2116			break;
2117		}
2118		/* FALLTHROUGH */
2119	case IPSTATS_MIB_OUTNOROUTES:
2120		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2121			      ipstats_mib_noroutes);
2122		break;
2123	}
2124	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2125	kfree_skb(skb);
2126	return 0;
2127}
2128
2129static int ip6_pkt_discard(struct sk_buff *skb)
2130{
2131	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2132}
2133
2134static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2135{
2136	skb->dev = skb_dst(skb)->dev;
2137	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2138}
2139
 
 
2140static int ip6_pkt_prohibit(struct sk_buff *skb)
2141{
2142	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2143}
2144
2145static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2146{
2147	skb->dev = skb_dst(skb)->dev;
2148	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2149}
2150
 
 
2151/*
2152 *	Allocate a dst for local (unicast / anycast) address.
2153 */
2154
2155struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2156				    const struct in6_addr *addr,
2157				    bool anycast)
2158{
2159	struct net *net = dev_net(idev->dev);
2160	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2161					    DST_NOCOUNT, NULL);
2162	if (!rt)
 
 
 
 
 
2163		return ERR_PTR(-ENOMEM);
 
2164
2165	in6_dev_hold(idev);
2166
2167	rt->dst.flags |= DST_HOST;
2168	rt->dst.input = ip6_input;
2169	rt->dst.output = ip6_output;
2170	rt->rt6i_idev = idev;
 
2171
2172	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2173	if (anycast)
2174		rt->rt6i_flags |= RTF_ANYCAST;
2175	else
2176		rt->rt6i_flags |= RTF_LOCAL;
 
 
 
2177
2178	rt->rt6i_gateway  = *addr;
2179	rt->rt6i_dst.addr = *addr;
 
 
 
2180	rt->rt6i_dst.plen = 128;
2181	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2182
2183	atomic_set(&rt->dst.__refcnt, 1);
2184
2185	return rt;
2186}
2187
2188int ip6_route_get_saddr(struct net *net,
2189			struct rt6_info *rt,
2190			const struct in6_addr *daddr,
2191			unsigned int prefs,
2192			struct in6_addr *saddr)
2193{
2194	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2195	int err = 0;
2196	if (rt->rt6i_prefsrc.plen)
2197		*saddr = rt->rt6i_prefsrc.addr;
2198	else
2199		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2200					 daddr, prefs, saddr);
2201	return err;
2202}
2203
2204/* remove deleted ip from prefsrc entries */
2205struct arg_dev_net_ip {
2206	struct net_device *dev;
2207	struct net *net;
2208	struct in6_addr *addr;
2209};
2210
2211static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2212{
2213	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2214	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2215	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2216
2217	if (((void *)rt->dst.dev == dev || !dev) &&
2218	    rt != net->ipv6.ip6_null_entry &&
2219	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2220		/* remove prefsrc entry */
2221		rt->rt6i_prefsrc.plen = 0;
2222	}
2223	return 0;
2224}
2225
2226void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2227{
2228	struct net *net = dev_net(ifp->idev->dev);
2229	struct arg_dev_net_ip adni = {
2230		.dev = ifp->idev->dev,
2231		.net = net,
2232		.addr = &ifp->addr,
2233	};
2234	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2235}
2236
2237#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2238#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2239
2240/* Remove routers and update dst entries when gateway turn into host. */
2241static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2242{
2243	struct in6_addr *gateway = (struct in6_addr *)arg;
2244
2245	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2246	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2247	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2248		return -1;
2249	}
2250	return 0;
2251}
2252
2253void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2254{
2255	fib6_clean_all(net, fib6_clean_tohost, gateway);
2256}
2257
2258struct arg_dev_net {
2259	struct net_device *dev;
2260	struct net *net;
2261};
2262
2263static int fib6_ifdown(struct rt6_info *rt, void *arg)
2264{
2265	const struct arg_dev_net *adn = arg;
2266	const struct net_device *dev = adn->dev;
2267
2268	if ((rt->dst.dev == dev || !dev) &&
2269	    rt != adn->net->ipv6.ip6_null_entry)
 
2270		return -1;
2271
2272	return 0;
2273}
2274
2275void rt6_ifdown(struct net *net, struct net_device *dev)
2276{
2277	struct arg_dev_net adn = {
2278		.dev = dev,
2279		.net = net,
2280	};
2281
2282	fib6_clean_all(net, fib6_ifdown, &adn);
2283	icmp6_clean_all(fib6_ifdown, &adn);
2284}
2285
2286struct rt6_mtu_change_arg {
 
2287	struct net_device *dev;
2288	unsigned int mtu;
2289};
2290
2291static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2292{
2293	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2294	struct inet6_dev *idev;
2295
2296	/* In IPv6 pmtu discovery is not optional,
2297	   so that RTAX_MTU lock cannot disable it.
2298	   We still use this lock to block changes
2299	   caused by addrconf/ndisc.
2300	*/
2301
2302	idev = __in6_dev_get(arg->dev);
2303	if (!idev)
2304		return 0;
2305
2306	/* For administrative MTU increase, there is no way to discover
2307	   IPv6 PMTU increase, so PMTU increase should be updated here.
2308	   Since RFC 1981 doesn't include administrative MTU increase
2309	   update PMTU increase is a MUST. (i.e. jumbo frame)
2310	 */
2311	/*
2312	   If new MTU is less than route PMTU, this new MTU will be the
2313	   lowest MTU in the path, update the route PMTU to reflect PMTU
2314	   decreases; if new MTU is greater than route PMTU, and the
2315	   old MTU is the lowest MTU in the path, update the route PMTU
2316	   to reflect the increase. In this case if the other nodes' MTU
2317	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2318	   PMTU discouvery.
2319	 */
2320	if (rt->dst.dev == arg->dev &&
2321	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2322	    (dst_mtu(&rt->dst) >= arg->mtu ||
2323	     (dst_mtu(&rt->dst) < arg->mtu &&
2324	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2325		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2326	}
2327	return 0;
2328}
2329
2330void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2331{
2332	struct rt6_mtu_change_arg arg = {
2333		.dev = dev,
2334		.mtu = mtu,
2335	};
2336
2337	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2338}
2339
2340static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2341	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2342	[RTA_OIF]               = { .type = NLA_U32 },
2343	[RTA_IIF]		= { .type = NLA_U32 },
2344	[RTA_PRIORITY]          = { .type = NLA_U32 },
2345	[RTA_METRICS]           = { .type = NLA_NESTED },
2346	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2347};
2348
2349static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2350			      struct fib6_config *cfg)
2351{
2352	struct rtmsg *rtm;
2353	struct nlattr *tb[RTA_MAX+1];
2354	int err;
2355
2356	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2357	if (err < 0)
2358		goto errout;
2359
2360	err = -EINVAL;
2361	rtm = nlmsg_data(nlh);
2362	memset(cfg, 0, sizeof(*cfg));
2363
2364	cfg->fc_table = rtm->rtm_table;
2365	cfg->fc_dst_len = rtm->rtm_dst_len;
2366	cfg->fc_src_len = rtm->rtm_src_len;
2367	cfg->fc_flags = RTF_UP;
2368	cfg->fc_protocol = rtm->rtm_protocol;
2369	cfg->fc_type = rtm->rtm_type;
2370
2371	if (rtm->rtm_type == RTN_UNREACHABLE ||
2372	    rtm->rtm_type == RTN_BLACKHOLE ||
2373	    rtm->rtm_type == RTN_PROHIBIT ||
2374	    rtm->rtm_type == RTN_THROW)
2375		cfg->fc_flags |= RTF_REJECT;
2376
2377	if (rtm->rtm_type == RTN_LOCAL)
2378		cfg->fc_flags |= RTF_LOCAL;
2379
2380	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2381	cfg->fc_nlinfo.nlh = nlh;
2382	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2383
2384	if (tb[RTA_GATEWAY]) {
2385		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2386		cfg->fc_flags |= RTF_GATEWAY;
2387	}
2388
2389	if (tb[RTA_DST]) {
2390		int plen = (rtm->rtm_dst_len + 7) >> 3;
2391
2392		if (nla_len(tb[RTA_DST]) < plen)
2393			goto errout;
2394
2395		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2396	}
2397
2398	if (tb[RTA_SRC]) {
2399		int plen = (rtm->rtm_src_len + 7) >> 3;
2400
2401		if (nla_len(tb[RTA_SRC]) < plen)
2402			goto errout;
2403
2404		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2405	}
2406
2407	if (tb[RTA_PREFSRC])
2408		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2409
2410	if (tb[RTA_OIF])
2411		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2412
2413	if (tb[RTA_PRIORITY])
2414		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2415
2416	if (tb[RTA_METRICS]) {
2417		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2418		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2419	}
2420
2421	if (tb[RTA_TABLE])
2422		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2423
2424	if (tb[RTA_MULTIPATH]) {
2425		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2426		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2427	}
2428
2429	err = 0;
2430errout:
2431	return err;
2432}
2433
2434static int ip6_route_multipath(struct fib6_config *cfg, int add)
2435{
2436	struct fib6_config r_cfg;
2437	struct rtnexthop *rtnh;
2438	int remaining;
2439	int attrlen;
2440	int err = 0, last_err = 0;
2441
2442beginning:
2443	rtnh = (struct rtnexthop *)cfg->fc_mp;
2444	remaining = cfg->fc_mp_len;
2445
2446	/* Parse a Multipath Entry */
2447	while (rtnh_ok(rtnh, remaining)) {
2448		memcpy(&r_cfg, cfg, sizeof(*cfg));
2449		if (rtnh->rtnh_ifindex)
2450			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2451
2452		attrlen = rtnh_attrlen(rtnh);
2453		if (attrlen > 0) {
2454			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2455
2456			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2457			if (nla) {
2458				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2459				r_cfg.fc_flags |= RTF_GATEWAY;
2460			}
2461		}
2462		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2463		if (err) {
2464			last_err = err;
2465			/* If we are trying to remove a route, do not stop the
2466			 * loop when ip6_route_del() fails (because next hop is
2467			 * already gone), we should try to remove all next hops.
2468			 */
2469			if (add) {
2470				/* If add fails, we should try to delete all
2471				 * next hops that have been already added.
2472				 */
2473				add = 0;
2474				goto beginning;
2475			}
2476		}
2477		/* Because each route is added like a single route we remove
2478		 * this flag after the first nexthop (if there is a collision,
2479		 * we have already fail to add the first nexthop:
2480		 * fib6_add_rt2node() has reject it).
2481		 */
2482		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2483		rtnh = rtnh_next(rtnh, &remaining);
2484	}
2485
2486	return last_err;
2487}
2488
2489static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2490{
2491	struct fib6_config cfg;
2492	int err;
2493
2494	err = rtm_to_fib6_config(skb, nlh, &cfg);
2495	if (err < 0)
2496		return err;
2497
2498	if (cfg.fc_mp)
2499		return ip6_route_multipath(&cfg, 0);
2500	else
2501		return ip6_route_del(&cfg);
2502}
2503
2504static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2505{
2506	struct fib6_config cfg;
2507	int err;
2508
2509	err = rtm_to_fib6_config(skb, nlh, &cfg);
2510	if (err < 0)
2511		return err;
2512
2513	if (cfg.fc_mp)
2514		return ip6_route_multipath(&cfg, 1);
2515	else
2516		return ip6_route_add(&cfg);
2517}
2518
2519static inline size_t rt6_nlmsg_size(void)
2520{
2521	return NLMSG_ALIGN(sizeof(struct rtmsg))
2522	       + nla_total_size(16) /* RTA_SRC */
2523	       + nla_total_size(16) /* RTA_DST */
2524	       + nla_total_size(16) /* RTA_GATEWAY */
2525	       + nla_total_size(16) /* RTA_PREFSRC */
2526	       + nla_total_size(4) /* RTA_TABLE */
2527	       + nla_total_size(4) /* RTA_IIF */
2528	       + nla_total_size(4) /* RTA_OIF */
2529	       + nla_total_size(4) /* RTA_PRIORITY */
2530	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2531	       + nla_total_size(sizeof(struct rta_cacheinfo));
2532}
2533
2534static int rt6_fill_node(struct net *net,
2535			 struct sk_buff *skb, struct rt6_info *rt,
2536			 struct in6_addr *dst, struct in6_addr *src,
2537			 int iif, int type, u32 portid, u32 seq,
2538			 int prefix, int nowait, unsigned int flags)
2539{
2540	struct rtmsg *rtm;
2541	struct nlmsghdr *nlh;
2542	long expires;
2543	u32 table;
 
2544
2545	if (prefix) {	/* user wants prefix routes only */
2546		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2547			/* success since this is not a prefix route */
2548			return 1;
2549		}
2550	}
2551
2552	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2553	if (!nlh)
2554		return -EMSGSIZE;
2555
2556	rtm = nlmsg_data(nlh);
2557	rtm->rtm_family = AF_INET6;
2558	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2559	rtm->rtm_src_len = rt->rt6i_src.plen;
2560	rtm->rtm_tos = 0;
2561	if (rt->rt6i_table)
2562		table = rt->rt6i_table->tb6_id;
2563	else
2564		table = RT6_TABLE_UNSPEC;
2565	rtm->rtm_table = table;
2566	if (nla_put_u32(skb, RTA_TABLE, table))
2567		goto nla_put_failure;
2568	if (rt->rt6i_flags & RTF_REJECT) {
2569		switch (rt->dst.error) {
2570		case -EINVAL:
2571			rtm->rtm_type = RTN_BLACKHOLE;
2572			break;
2573		case -EACCES:
2574			rtm->rtm_type = RTN_PROHIBIT;
2575			break;
2576		case -EAGAIN:
2577			rtm->rtm_type = RTN_THROW;
2578			break;
2579		default:
2580			rtm->rtm_type = RTN_UNREACHABLE;
2581			break;
2582		}
2583	}
2584	else if (rt->rt6i_flags & RTF_LOCAL)
2585		rtm->rtm_type = RTN_LOCAL;
2586	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2587		rtm->rtm_type = RTN_LOCAL;
2588	else
2589		rtm->rtm_type = RTN_UNICAST;
2590	rtm->rtm_flags = 0;
2591	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2592	rtm->rtm_protocol = rt->rt6i_protocol;
2593	if (rt->rt6i_flags & RTF_DYNAMIC)
2594		rtm->rtm_protocol = RTPROT_REDIRECT;
2595	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2596		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2597			rtm->rtm_protocol = RTPROT_RA;
2598		else
2599			rtm->rtm_protocol = RTPROT_KERNEL;
2600	}
2601
2602	if (rt->rt6i_flags & RTF_CACHE)
2603		rtm->rtm_flags |= RTM_F_CLONED;
2604
2605	if (dst) {
2606		if (nla_put(skb, RTA_DST, 16, dst))
2607			goto nla_put_failure;
2608		rtm->rtm_dst_len = 128;
2609	} else if (rtm->rtm_dst_len)
2610		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2611			goto nla_put_failure;
2612#ifdef CONFIG_IPV6_SUBTREES
2613	if (src) {
2614		if (nla_put(skb, RTA_SRC, 16, src))
2615			goto nla_put_failure;
2616		rtm->rtm_src_len = 128;
2617	} else if (rtm->rtm_src_len &&
2618		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2619		goto nla_put_failure;
2620#endif
2621	if (iif) {
2622#ifdef CONFIG_IPV6_MROUTE
2623		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2624			int err = ip6mr_get_route(net, skb, rtm, nowait);
2625			if (err <= 0) {
2626				if (!nowait) {
2627					if (err == 0)
2628						return 0;
2629					goto nla_put_failure;
2630				} else {
2631					if (err == -EMSGSIZE)
2632						goto nla_put_failure;
2633				}
2634			}
2635		} else
2636#endif
2637			if (nla_put_u32(skb, RTA_IIF, iif))
2638				goto nla_put_failure;
2639	} else if (dst) {
2640		struct in6_addr saddr_buf;
2641		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2642		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2643			goto nla_put_failure;
2644	}
2645
2646	if (rt->rt6i_prefsrc.plen) {
2647		struct in6_addr saddr_buf;
2648		saddr_buf = rt->rt6i_prefsrc.addr;
2649		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2650			goto nla_put_failure;
2651	}
2652
2653	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2654		goto nla_put_failure;
2655
2656	if (rt->rt6i_flags & RTF_GATEWAY) {
2657		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2658			goto nla_put_failure;
2659	}
 
 
 
 
2660
2661	if (rt->dst.dev &&
2662	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2663		goto nla_put_failure;
2664	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2665		goto nla_put_failure;
2666
2667	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
 
 
 
 
 
2668
2669	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
 
2670		goto nla_put_failure;
2671
2672	return nlmsg_end(skb, nlh);
2673
2674nla_put_failure:
2675	nlmsg_cancel(skb, nlh);
2676	return -EMSGSIZE;
2677}
2678
2679int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2680{
2681	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2682	int prefix;
2683
2684	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2685		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2686		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2687	} else
2688		prefix = 0;
2689
2690	return rt6_fill_node(arg->net,
2691		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2692		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2693		     prefix, 0, NLM_F_MULTI);
2694}
2695
2696static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2697{
2698	struct net *net = sock_net(in_skb->sk);
2699	struct nlattr *tb[RTA_MAX+1];
2700	struct rt6_info *rt;
2701	struct sk_buff *skb;
2702	struct rtmsg *rtm;
2703	struct flowi6 fl6;
2704	int err, iif = 0, oif = 0;
2705
2706	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2707	if (err < 0)
2708		goto errout;
2709
2710	err = -EINVAL;
2711	memset(&fl6, 0, sizeof(fl6));
2712
2713	if (tb[RTA_SRC]) {
2714		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2715			goto errout;
2716
2717		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2718	}
2719
2720	if (tb[RTA_DST]) {
2721		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2722			goto errout;
2723
2724		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2725	}
2726
2727	if (tb[RTA_IIF])
2728		iif = nla_get_u32(tb[RTA_IIF]);
2729
2730	if (tb[RTA_OIF])
2731		oif = nla_get_u32(tb[RTA_OIF]);
2732
2733	if (tb[RTA_MARK])
2734		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2735
2736	if (iif) {
2737		struct net_device *dev;
2738		int flags = 0;
2739
2740		dev = __dev_get_by_index(net, iif);
2741		if (!dev) {
2742			err = -ENODEV;
2743			goto errout;
2744		}
2745
2746		fl6.flowi6_iif = iif;
2747
2748		if (!ipv6_addr_any(&fl6.saddr))
2749			flags |= RT6_LOOKUP_F_HAS_SADDR;
2750
2751		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2752							       flags);
2753	} else {
2754		fl6.flowi6_oif = oif;
2755
2756		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2757	}
2758
2759	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2760	if (!skb) {
2761		ip6_rt_put(rt);
2762		err = -ENOBUFS;
2763		goto errout;
2764	}
2765
2766	/* Reserve room for dummy headers, this skb can pass
2767	   through good chunk of routing engine.
2768	 */
2769	skb_reset_mac_header(skb);
2770	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2771
 
2772	skb_dst_set(skb, &rt->dst);
2773
2774	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2775			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2776			    nlh->nlmsg_seq, 0, 0, 0);
2777	if (err < 0) {
2778		kfree_skb(skb);
2779		goto errout;
2780	}
2781
2782	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2783errout:
2784	return err;
2785}
2786
2787void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2788{
2789	struct sk_buff *skb;
2790	struct net *net = info->nl_net;
2791	u32 seq;
2792	int err;
2793
2794	err = -ENOBUFS;
2795	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2796
2797	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2798	if (!skb)
2799		goto errout;
2800
2801	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2802				event, info->portid, seq, 0, 0, 0);
2803	if (err < 0) {
2804		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2805		WARN_ON(err == -EMSGSIZE);
2806		kfree_skb(skb);
2807		goto errout;
2808	}
2809	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2810		    info->nlh, gfp_any());
2811	return;
2812errout:
2813	if (err < 0)
2814		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2815}
2816
2817static int ip6_route_dev_notify(struct notifier_block *this,
2818				unsigned long event, void *ptr)
2819{
2820	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2821	struct net *net = dev_net(dev);
2822
2823	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2824		net->ipv6.ip6_null_entry->dst.dev = dev;
2825		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2826#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2827		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2828		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2829		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2830		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2831#endif
2832	}
2833
2834	return NOTIFY_OK;
2835}
2836
2837/*
2838 *	/proc
2839 */
2840
2841#ifdef CONFIG_PROC_FS
2842
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2843static const struct file_operations ipv6_route_proc_fops = {
2844	.owner		= THIS_MODULE,
2845	.open		= ipv6_route_open,
2846	.read		= seq_read,
2847	.llseek		= seq_lseek,
2848	.release	= seq_release_net,
2849};
2850
2851static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2852{
2853	struct net *net = (struct net *)seq->private;
2854	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2855		   net->ipv6.rt6_stats->fib_nodes,
2856		   net->ipv6.rt6_stats->fib_route_nodes,
2857		   net->ipv6.rt6_stats->fib_rt_alloc,
2858		   net->ipv6.rt6_stats->fib_rt_entries,
2859		   net->ipv6.rt6_stats->fib_rt_cache,
2860		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2861		   net->ipv6.rt6_stats->fib_discarded_routes);
2862
2863	return 0;
2864}
2865
2866static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2867{
2868	return single_open_net(inode, file, rt6_stats_seq_show);
2869}
2870
2871static const struct file_operations rt6_stats_seq_fops = {
2872	.owner	 = THIS_MODULE,
2873	.open	 = rt6_stats_seq_open,
2874	.read	 = seq_read,
2875	.llseek	 = seq_lseek,
2876	.release = single_release_net,
2877};
2878#endif	/* CONFIG_PROC_FS */
2879
2880#ifdef CONFIG_SYSCTL
2881
2882static
2883int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2884			      void __user *buffer, size_t *lenp, loff_t *ppos)
2885{
2886	struct net *net;
2887	int delay;
2888	if (!write)
2889		return -EINVAL;
2890
2891	net = (struct net *)ctl->extra1;
2892	delay = net->ipv6.sysctl.flush_delay;
2893	proc_dointvec(ctl, write, buffer, lenp, ppos);
2894	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2895	return 0;
2896}
2897
2898struct ctl_table ipv6_route_table_template[] = {
2899	{
2900		.procname	=	"flush",
2901		.data		=	&init_net.ipv6.sysctl.flush_delay,
2902		.maxlen		=	sizeof(int),
2903		.mode		=	0200,
2904		.proc_handler	=	ipv6_sysctl_rtcache_flush
2905	},
2906	{
2907		.procname	=	"gc_thresh",
2908		.data		=	&ip6_dst_ops_template.gc_thresh,
2909		.maxlen		=	sizeof(int),
2910		.mode		=	0644,
2911		.proc_handler	=	proc_dointvec,
2912	},
2913	{
2914		.procname	=	"max_size",
2915		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2916		.maxlen		=	sizeof(int),
2917		.mode		=	0644,
2918		.proc_handler	=	proc_dointvec,
2919	},
2920	{
2921		.procname	=	"gc_min_interval",
2922		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2923		.maxlen		=	sizeof(int),
2924		.mode		=	0644,
2925		.proc_handler	=	proc_dointvec_jiffies,
2926	},
2927	{
2928		.procname	=	"gc_timeout",
2929		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2930		.maxlen		=	sizeof(int),
2931		.mode		=	0644,
2932		.proc_handler	=	proc_dointvec_jiffies,
2933	},
2934	{
2935		.procname	=	"gc_interval",
2936		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2937		.maxlen		=	sizeof(int),
2938		.mode		=	0644,
2939		.proc_handler	=	proc_dointvec_jiffies,
2940	},
2941	{
2942		.procname	=	"gc_elasticity",
2943		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2944		.maxlen		=	sizeof(int),
2945		.mode		=	0644,
2946		.proc_handler	=	proc_dointvec,
2947	},
2948	{
2949		.procname	=	"mtu_expires",
2950		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2951		.maxlen		=	sizeof(int),
2952		.mode		=	0644,
2953		.proc_handler	=	proc_dointvec_jiffies,
2954	},
2955	{
2956		.procname	=	"min_adv_mss",
2957		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2958		.maxlen		=	sizeof(int),
2959		.mode		=	0644,
2960		.proc_handler	=	proc_dointvec,
2961	},
2962	{
2963		.procname	=	"gc_min_interval_ms",
2964		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2965		.maxlen		=	sizeof(int),
2966		.mode		=	0644,
2967		.proc_handler	=	proc_dointvec_ms_jiffies,
2968	},
2969	{ }
2970};
2971
2972struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2973{
2974	struct ctl_table *table;
2975
2976	table = kmemdup(ipv6_route_table_template,
2977			sizeof(ipv6_route_table_template),
2978			GFP_KERNEL);
2979
2980	if (table) {
2981		table[0].data = &net->ipv6.sysctl.flush_delay;
2982		table[0].extra1 = net;
2983		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2984		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2985		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2986		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2987		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2988		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2989		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2990		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2991		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2992
2993		/* Don't export sysctls to unprivileged users */
2994		if (net->user_ns != &init_user_ns)
2995			table[0].procname = NULL;
2996	}
2997
2998	return table;
2999}
3000#endif
3001
3002static int __net_init ip6_route_net_init(struct net *net)
3003{
3004	int ret = -ENOMEM;
3005
3006	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3007	       sizeof(net->ipv6.ip6_dst_ops));
3008
3009	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3010		goto out_ip6_dst_ops;
3011
3012	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3013					   sizeof(*net->ipv6.ip6_null_entry),
3014					   GFP_KERNEL);
3015	if (!net->ipv6.ip6_null_entry)
3016		goto out_ip6_dst_entries;
3017	net->ipv6.ip6_null_entry->dst.path =
3018		(struct dst_entry *)net->ipv6.ip6_null_entry;
3019	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3020	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3021			 ip6_template_metrics, true);
3022
3023#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3024	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3025					       sizeof(*net->ipv6.ip6_prohibit_entry),
3026					       GFP_KERNEL);
3027	if (!net->ipv6.ip6_prohibit_entry)
3028		goto out_ip6_null_entry;
3029	net->ipv6.ip6_prohibit_entry->dst.path =
3030		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3031	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3032	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3033			 ip6_template_metrics, true);
3034
3035	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3036					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3037					       GFP_KERNEL);
3038	if (!net->ipv6.ip6_blk_hole_entry)
3039		goto out_ip6_prohibit_entry;
3040	net->ipv6.ip6_blk_hole_entry->dst.path =
3041		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3042	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3043	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3044			 ip6_template_metrics, true);
3045#endif
3046
3047	net->ipv6.sysctl.flush_delay = 0;
3048	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3049	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3050	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3051	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3052	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3053	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3054	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3055
 
 
 
 
3056	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3057
3058	ret = 0;
3059out:
3060	return ret;
3061
3062#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3063out_ip6_prohibit_entry:
3064	kfree(net->ipv6.ip6_prohibit_entry);
3065out_ip6_null_entry:
3066	kfree(net->ipv6.ip6_null_entry);
3067#endif
3068out_ip6_dst_entries:
3069	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3070out_ip6_dst_ops:
3071	goto out;
3072}
3073
3074static void __net_exit ip6_route_net_exit(struct net *net)
3075{
 
 
 
 
3076	kfree(net->ipv6.ip6_null_entry);
3077#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3078	kfree(net->ipv6.ip6_prohibit_entry);
3079	kfree(net->ipv6.ip6_blk_hole_entry);
3080#endif
3081	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3082}
3083
3084static int __net_init ip6_route_net_init_late(struct net *net)
3085{
3086#ifdef CONFIG_PROC_FS
3087	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3088	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3089#endif
3090	return 0;
3091}
3092
3093static void __net_exit ip6_route_net_exit_late(struct net *net)
3094{
3095#ifdef CONFIG_PROC_FS
3096	remove_proc_entry("ipv6_route", net->proc_net);
3097	remove_proc_entry("rt6_stats", net->proc_net);
3098#endif
3099}
3100
3101static struct pernet_operations ip6_route_net_ops = {
3102	.init = ip6_route_net_init,
3103	.exit = ip6_route_net_exit,
3104};
3105
3106static int __net_init ipv6_inetpeer_init(struct net *net)
3107{
3108	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3109
3110	if (!bp)
3111		return -ENOMEM;
3112	inet_peer_base_init(bp);
3113	net->ipv6.peers = bp;
3114	return 0;
3115}
3116
3117static void __net_exit ipv6_inetpeer_exit(struct net *net)
3118{
3119	struct inet_peer_base *bp = net->ipv6.peers;
3120
3121	net->ipv6.peers = NULL;
3122	inetpeer_invalidate_tree(bp);
3123	kfree(bp);
3124}
3125
3126static struct pernet_operations ipv6_inetpeer_ops = {
3127	.init	=	ipv6_inetpeer_init,
3128	.exit	=	ipv6_inetpeer_exit,
3129};
3130
3131static struct pernet_operations ip6_route_net_late_ops = {
3132	.init = ip6_route_net_init_late,
3133	.exit = ip6_route_net_exit_late,
3134};
3135
3136static struct notifier_block ip6_route_dev_notifier = {
3137	.notifier_call = ip6_route_dev_notify,
3138	.priority = 0,
3139};
3140
3141int __init ip6_route_init(void)
3142{
3143	int ret;
3144
3145	ret = -ENOMEM;
3146	ip6_dst_ops_template.kmem_cachep =
3147		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3148				  SLAB_HWCACHE_ALIGN, NULL);
3149	if (!ip6_dst_ops_template.kmem_cachep)
3150		goto out;
3151
3152	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3153	if (ret)
3154		goto out_kmem_cache;
3155
3156	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3157	if (ret)
3158		goto out_dst_entries;
3159
3160	ret = register_pernet_subsys(&ip6_route_net_ops);
3161	if (ret)
3162		goto out_register_inetpeer;
3163
3164	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3165
3166	/* Registering of the loopback is done before this portion of code,
3167	 * the loopback reference in rt6_info will not be taken, do it
3168	 * manually for init_net */
3169	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3170	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3171  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3172	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3173	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3174	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3175	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3176  #endif
3177	ret = fib6_init();
3178	if (ret)
3179		goto out_register_subsys;
3180
3181	ret = xfrm6_init();
3182	if (ret)
3183		goto out_fib6_init;
3184
3185	ret = fib6_rules_init();
3186	if (ret)
3187		goto xfrm6_init;
3188
3189	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3190	if (ret)
3191		goto fib6_rules_init;
3192
3193	ret = -ENOBUFS;
3194	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3195	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3196	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3197		goto out_register_late_subsys;
3198
3199	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3200	if (ret)
3201		goto out_register_late_subsys;
3202
3203out:
3204	return ret;
3205
3206out_register_late_subsys:
3207	unregister_pernet_subsys(&ip6_route_net_late_ops);
3208fib6_rules_init:
3209	fib6_rules_cleanup();
3210xfrm6_init:
3211	xfrm6_fini();
3212out_fib6_init:
3213	fib6_gc_cleanup();
3214out_register_subsys:
3215	unregister_pernet_subsys(&ip6_route_net_ops);
3216out_register_inetpeer:
3217	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3218out_dst_entries:
3219	dst_entries_destroy(&ip6_dst_blackhole_ops);
3220out_kmem_cache:
3221	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3222	goto out;
3223}
3224
3225void ip6_route_cleanup(void)
3226{
3227	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3228	unregister_pernet_subsys(&ip6_route_net_late_ops);
3229	fib6_rules_cleanup();
3230	xfrm6_fini();
3231	fib6_gc_cleanup();
3232	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3233	unregister_pernet_subsys(&ip6_route_net_ops);
3234	dst_entries_destroy(&ip6_dst_blackhole_ops);
3235	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3236}