Linux Audio

Check our new training course

Loading...
v3.1
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
 
 
  27#include <linux/capability.h>
  28#include <linux/errno.h>
 
  29#include <linux/types.h>
  30#include <linux/times.h>
  31#include <linux/socket.h>
  32#include <linux/sockios.h>
  33#include <linux/net.h>
  34#include <linux/route.h>
  35#include <linux/netdevice.h>
  36#include <linux/in6.h>
  37#include <linux/mroute6.h>
  38#include <linux/init.h>
  39#include <linux/if_arp.h>
  40#include <linux/proc_fs.h>
  41#include <linux/seq_file.h>
  42#include <linux/nsproxy.h>
  43#include <linux/slab.h>
  44#include <net/net_namespace.h>
  45#include <net/snmp.h>
  46#include <net/ipv6.h>
  47#include <net/ip6_fib.h>
  48#include <net/ip6_route.h>
  49#include <net/ndisc.h>
  50#include <net/addrconf.h>
  51#include <net/tcp.h>
  52#include <linux/rtnetlink.h>
  53#include <net/dst.h>
  54#include <net/xfrm.h>
  55#include <net/netevent.h>
  56#include <net/netlink.h>
  57
  58#include <asm/uaccess.h>
  59
  60#ifdef CONFIG_SYSCTL
  61#include <linux/sysctl.h>
  62#endif
  63
  64/* Set to 3 to get tracing. */
  65#define RT6_DEBUG 2
  66
  67#if RT6_DEBUG >= 3
  68#define RDBG(x) printk x
  69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
  70#else
  71#define RDBG(x)
  72#define RT6_TRACE(x...) do { ; } while (0)
  73#endif
  74
  75static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
  76				    const struct in6_addr *dest);
  77static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  78static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  79static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
  80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  81static void		ip6_dst_destroy(struct dst_entry *);
  82static void		ip6_dst_ifdown(struct dst_entry *,
  83				       struct net_device *dev, int how);
  84static int		 ip6_dst_gc(struct dst_ops *ops);
  85
  86static int		ip6_pkt_discard(struct sk_buff *skb);
  87static int		ip6_pkt_discard_out(struct sk_buff *skb);
  88static void		ip6_link_failure(struct sk_buff *skb);
  89static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
  90
  91#ifdef CONFIG_IPV6_ROUTE_INFO
  92static struct rt6_info *rt6_add_route_info(struct net *net,
  93					   const struct in6_addr *prefix, int prefixlen,
  94					   const struct in6_addr *gwaddr, int ifindex,
  95					   unsigned pref);
  96static struct rt6_info *rt6_get_route_info(struct net *net,
  97					   const struct in6_addr *prefix, int prefixlen,
  98					   const struct in6_addr *gwaddr, int ifindex);
  99#endif
 100
 101static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 102{
 103	struct rt6_info *rt = (struct rt6_info *) dst;
 104	struct inet_peer *peer;
 105	u32 *p = NULL;
 106
 107	if (!(rt->dst.flags & DST_HOST))
 108		return NULL;
 109
 110	if (!rt->rt6i_peer)
 111		rt6_bind_peer(rt, 1);
 112
 113	peer = rt->rt6i_peer;
 114	if (peer) {
 115		u32 *old_p = __DST_METRICS_PTR(old);
 116		unsigned long prev, new;
 117
 118		p = peer->metrics;
 119		if (inet_metrics_new(peer))
 120			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 121
 122		new = (unsigned long) p;
 123		prev = cmpxchg(&dst->_metrics, old, new);
 124
 125		if (prev != old) {
 126			p = __DST_METRICS_PTR(prev);
 127			if (prev & DST_METRICS_READ_ONLY)
 128				p = NULL;
 129		}
 130	}
 131	return p;
 132}
 133
 
 
 
 
 
 
 
 
 
 134static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
 135{
 136	return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 137}
 138
 139static struct dst_ops ip6_dst_ops_template = {
 140	.family			=	AF_INET6,
 141	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 142	.gc			=	ip6_dst_gc,
 143	.gc_thresh		=	1024,
 144	.check			=	ip6_dst_check,
 145	.default_advmss		=	ip6_default_advmss,
 146	.default_mtu		=	ip6_default_mtu,
 147	.cow_metrics		=	ipv6_cow_metrics,
 148	.destroy		=	ip6_dst_destroy,
 149	.ifdown			=	ip6_dst_ifdown,
 150	.negative_advice	=	ip6_negative_advice,
 151	.link_failure		=	ip6_link_failure,
 152	.update_pmtu		=	ip6_rt_update_pmtu,
 153	.local_out		=	__ip6_local_out,
 154	.neigh_lookup		=	ip6_neigh_lookup,
 155};
 156
 157static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
 158{
 159	return 0;
 
 
 160}
 161
 162static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 163{
 164}
 165
 166static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 167					 unsigned long old)
 168{
 169	return NULL;
 170}
 171
 172static struct dst_ops ip6_dst_blackhole_ops = {
 173	.family			=	AF_INET6,
 174	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 175	.destroy		=	ip6_dst_destroy,
 176	.check			=	ip6_dst_check,
 177	.default_mtu		=	ip6_blackhole_default_mtu,
 178	.default_advmss		=	ip6_default_advmss,
 179	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 180	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
 181	.neigh_lookup		=	ip6_neigh_lookup,
 182};
 183
 184static const u32 ip6_template_metrics[RTAX_MAX] = {
 185	[RTAX_HOPLIMIT - 1] = 255,
 186};
 187
 188static struct rt6_info ip6_null_entry_template = {
 189	.dst = {
 190		.__refcnt	= ATOMIC_INIT(1),
 191		.__use		= 1,
 192		.obsolete	= -1,
 193		.error		= -ENETUNREACH,
 194		.input		= ip6_pkt_discard,
 195		.output		= ip6_pkt_discard_out,
 196	},
 197	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 198	.rt6i_protocol  = RTPROT_KERNEL,
 199	.rt6i_metric	= ~(u32) 0,
 200	.rt6i_ref	= ATOMIC_INIT(1),
 201};
 202
 203#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 204
 205static int ip6_pkt_prohibit(struct sk_buff *skb);
 206static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 207
 208static struct rt6_info ip6_prohibit_entry_template = {
 209	.dst = {
 210		.__refcnt	= ATOMIC_INIT(1),
 211		.__use		= 1,
 212		.obsolete	= -1,
 213		.error		= -EACCES,
 214		.input		= ip6_pkt_prohibit,
 215		.output		= ip6_pkt_prohibit_out,
 216	},
 217	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 218	.rt6i_protocol  = RTPROT_KERNEL,
 219	.rt6i_metric	= ~(u32) 0,
 220	.rt6i_ref	= ATOMIC_INIT(1),
 221};
 222
 223static struct rt6_info ip6_blk_hole_entry_template = {
 224	.dst = {
 225		.__refcnt	= ATOMIC_INIT(1),
 226		.__use		= 1,
 227		.obsolete	= -1,
 228		.error		= -EINVAL,
 229		.input		= dst_discard,
 230		.output		= dst_discard,
 231	},
 232	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 233	.rt6i_protocol  = RTPROT_KERNEL,
 234	.rt6i_metric	= ~(u32) 0,
 235	.rt6i_ref	= ATOMIC_INIT(1),
 236};
 237
 238#endif
 239
 240/* allocate dst with ip6_dst_ops */
 241static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
 242					     struct net_device *dev,
 243					     int flags)
 244{
 245	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
 246
 247	if (rt != NULL)
 248		memset(&rt->rt6i_table, 0,
 249			sizeof(*rt) - sizeof(struct dst_entry));
 250
 251	return rt;
 252}
 253
 254static void ip6_dst_destroy(struct dst_entry *dst)
 255{
 256	struct rt6_info *rt = (struct rt6_info *)dst;
 257	struct inet6_dev *idev = rt->rt6i_idev;
 258	struct inet_peer *peer = rt->rt6i_peer;
 259
 260	if (!(rt->dst.flags & DST_HOST))
 261		dst_destroy_metrics_generic(dst);
 262
 263	if (idev != NULL) {
 264		rt->rt6i_idev = NULL;
 265		in6_dev_put(idev);
 266	}
 
 
 
 
 267	if (peer) {
 268		rt->rt6i_peer = NULL;
 269		inet_putpeer(peer);
 270	}
 271}
 272
 273static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
 274
 275static u32 rt6_peer_genid(void)
 276{
 277	return atomic_read(&__rt6_peer_genid);
 278}
 279
 280void rt6_bind_peer(struct rt6_info *rt, int create)
 281{
 282	struct inet_peer *peer;
 283
 284	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
 285	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 286		inet_putpeer(peer);
 287	else
 288		rt->rt6i_peer_genid = rt6_peer_genid();
 289}
 290
 291static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 292			   int how)
 293{
 294	struct rt6_info *rt = (struct rt6_info *)dst;
 295	struct inet6_dev *idev = rt->rt6i_idev;
 296	struct net_device *loopback_dev =
 297		dev_net(dev)->loopback_dev;
 298
 299	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
 300		struct inet6_dev *loopback_idev =
 301			in6_dev_get(loopback_dev);
 302		if (loopback_idev != NULL) {
 303			rt->rt6i_idev = loopback_idev;
 304			in6_dev_put(idev);
 305		}
 306	}
 307}
 308
 309static __inline__ int rt6_check_expired(const struct rt6_info *rt)
 310{
 311	return (rt->rt6i_flags & RTF_EXPIRES) &&
 312		time_after(jiffies, rt->rt6i_expires);
 
 
 
 
 
 
 
 
 
 313}
 314
 315static inline int rt6_need_strict(const struct in6_addr *daddr)
 316{
 317	return ipv6_addr_type(daddr) &
 318		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 319}
 320
 321/*
 322 *	Route lookup. Any table->tb6_lock is implied.
 323 */
 324
 325static inline struct rt6_info *rt6_device_match(struct net *net,
 326						    struct rt6_info *rt,
 327						    const struct in6_addr *saddr,
 328						    int oif,
 329						    int flags)
 330{
 331	struct rt6_info *local = NULL;
 332	struct rt6_info *sprt;
 333
 334	if (!oif && ipv6_addr_any(saddr))
 335		goto out;
 336
 337	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 338		struct net_device *dev = sprt->rt6i_dev;
 339
 340		if (oif) {
 341			if (dev->ifindex == oif)
 342				return sprt;
 343			if (dev->flags & IFF_LOOPBACK) {
 344				if (sprt->rt6i_idev == NULL ||
 345				    sprt->rt6i_idev->dev->ifindex != oif) {
 346					if (flags & RT6_LOOKUP_F_IFACE && oif)
 347						continue;
 348					if (local && (!oif ||
 349						      local->rt6i_idev->dev->ifindex == oif))
 350						continue;
 351				}
 352				local = sprt;
 353			}
 354		} else {
 355			if (ipv6_chk_addr(net, saddr, dev,
 356					  flags & RT6_LOOKUP_F_IFACE))
 357				return sprt;
 358		}
 359	}
 360
 361	if (oif) {
 362		if (local)
 363			return local;
 364
 365		if (flags & RT6_LOOKUP_F_IFACE)
 366			return net->ipv6.ip6_null_entry;
 367	}
 368out:
 369	return rt;
 370}
 371
 372#ifdef CONFIG_IPV6_ROUTER_PREF
 373static void rt6_probe(struct rt6_info *rt)
 374{
 375	struct neighbour *neigh;
 376	/*
 377	 * Okay, this does not seem to be appropriate
 378	 * for now, however, we need to check if it
 379	 * is really so; aka Router Reachability Probing.
 380	 *
 381	 * Router Reachability Probe MUST be rate-limited
 382	 * to no more than one per minute.
 383	 */
 384	rcu_read_lock();
 385	neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
 386	if (!neigh || (neigh->nud_state & NUD_VALID))
 387		goto out;
 388	read_lock_bh(&neigh->lock);
 389	if (!(neigh->nud_state & NUD_VALID) &&
 390	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 391		struct in6_addr mcaddr;
 392		struct in6_addr *target;
 393
 394		neigh->updated = jiffies;
 395		read_unlock_bh(&neigh->lock);
 396
 397		target = (struct in6_addr *)&neigh->primary_key;
 398		addrconf_addr_solict_mult(target, &mcaddr);
 399		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
 400	} else {
 401		read_unlock_bh(&neigh->lock);
 402	}
 403out:
 404	rcu_read_unlock();
 405}
 406#else
 407static inline void rt6_probe(struct rt6_info *rt)
 408{
 409}
 410#endif
 411
 412/*
 413 * Default Router Selection (RFC 2461 6.3.6)
 414 */
 415static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 416{
 417	struct net_device *dev = rt->rt6i_dev;
 418	if (!oif || dev->ifindex == oif)
 419		return 2;
 420	if ((dev->flags & IFF_LOOPBACK) &&
 421	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 422		return 1;
 423	return 0;
 424}
 425
 426static inline int rt6_check_neigh(struct rt6_info *rt)
 427{
 428	struct neighbour *neigh;
 429	int m;
 430
 431	rcu_read_lock();
 432	neigh = dst_get_neighbour(&rt->dst);
 433	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 434	    !(rt->rt6i_flags & RTF_GATEWAY))
 435		m = 1;
 436	else if (neigh) {
 437		read_lock_bh(&neigh->lock);
 438		if (neigh->nud_state & NUD_VALID)
 439			m = 2;
 440#ifdef CONFIG_IPV6_ROUTER_PREF
 441		else if (neigh->nud_state & NUD_FAILED)
 442			m = 0;
 443#endif
 444		else
 445			m = 1;
 446		read_unlock_bh(&neigh->lock);
 447	} else
 448		m = 0;
 449	rcu_read_unlock();
 450	return m;
 451}
 452
 453static int rt6_score_route(struct rt6_info *rt, int oif,
 454			   int strict)
 455{
 456	int m, n;
 457
 458	m = rt6_check_dev(rt, oif);
 459	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 460		return -1;
 461#ifdef CONFIG_IPV6_ROUTER_PREF
 462	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 463#endif
 464	n = rt6_check_neigh(rt);
 465	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
 466		return -1;
 467	return m;
 468}
 469
 470static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 471				   int *mpri, struct rt6_info *match)
 472{
 473	int m;
 474
 475	if (rt6_check_expired(rt))
 476		goto out;
 477
 478	m = rt6_score_route(rt, oif, strict);
 479	if (m < 0)
 480		goto out;
 481
 482	if (m > *mpri) {
 483		if (strict & RT6_LOOKUP_F_REACHABLE)
 484			rt6_probe(match);
 485		*mpri = m;
 486		match = rt;
 487	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
 488		rt6_probe(rt);
 489	}
 490
 491out:
 492	return match;
 493}
 494
 495static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 496				     struct rt6_info *rr_head,
 497				     u32 metric, int oif, int strict)
 498{
 499	struct rt6_info *rt, *match;
 500	int mpri = -1;
 501
 502	match = NULL;
 503	for (rt = rr_head; rt && rt->rt6i_metric == metric;
 504	     rt = rt->dst.rt6_next)
 505		match = find_match(rt, oif, strict, &mpri, match);
 506	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 507	     rt = rt->dst.rt6_next)
 508		match = find_match(rt, oif, strict, &mpri, match);
 509
 510	return match;
 511}
 512
 513static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 514{
 515	struct rt6_info *match, *rt0;
 516	struct net *net;
 517
 518	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
 519		  __func__, fn->leaf, oif);
 520
 521	rt0 = fn->rr_ptr;
 522	if (!rt0)
 523		fn->rr_ptr = rt0 = fn->leaf;
 524
 525	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 526
 527	if (!match &&
 528	    (strict & RT6_LOOKUP_F_REACHABLE)) {
 529		struct rt6_info *next = rt0->dst.rt6_next;
 530
 531		/* no entries matched; do round-robin */
 532		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 533			next = fn->leaf;
 534
 535		if (next != rt0)
 536			fn->rr_ptr = next;
 537	}
 538
 539	RT6_TRACE("%s() => %p\n",
 540		  __func__, match);
 541
 542	net = dev_net(rt0->rt6i_dev);
 543	return match ? match : net->ipv6.ip6_null_entry;
 544}
 545
 546#ifdef CONFIG_IPV6_ROUTE_INFO
 547int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 548		  const struct in6_addr *gwaddr)
 549{
 550	struct net *net = dev_net(dev);
 551	struct route_info *rinfo = (struct route_info *) opt;
 552	struct in6_addr prefix_buf, *prefix;
 553	unsigned int pref;
 554	unsigned long lifetime;
 555	struct rt6_info *rt;
 556
 557	if (len < sizeof(struct route_info)) {
 558		return -EINVAL;
 559	}
 560
 561	/* Sanity check for prefix_len and length */
 562	if (rinfo->length > 3) {
 563		return -EINVAL;
 564	} else if (rinfo->prefix_len > 128) {
 565		return -EINVAL;
 566	} else if (rinfo->prefix_len > 64) {
 567		if (rinfo->length < 2) {
 568			return -EINVAL;
 569		}
 570	} else if (rinfo->prefix_len > 0) {
 571		if (rinfo->length < 1) {
 572			return -EINVAL;
 573		}
 574	}
 575
 576	pref = rinfo->route_pref;
 577	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 578		return -EINVAL;
 579
 580	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 581
 582	if (rinfo->length == 3)
 583		prefix = (struct in6_addr *)rinfo->prefix;
 584	else {
 585		/* this function is safe */
 586		ipv6_addr_prefix(&prefix_buf,
 587				 (struct in6_addr *)rinfo->prefix,
 588				 rinfo->prefix_len);
 589		prefix = &prefix_buf;
 590	}
 591
 592	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 593				dev->ifindex);
 594
 595	if (rt && !lifetime) {
 596		ip6_del_rt(rt);
 597		rt = NULL;
 598	}
 599
 600	if (!rt && lifetime)
 601		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 602					pref);
 603	else if (rt)
 604		rt->rt6i_flags = RTF_ROUTEINFO |
 605				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 606
 607	if (rt) {
 608		if (!addrconf_finite_timeout(lifetime)) {
 609			rt->rt6i_flags &= ~RTF_EXPIRES;
 610		} else {
 611			rt->rt6i_expires = jiffies + HZ * lifetime;
 612			rt->rt6i_flags |= RTF_EXPIRES;
 613		}
 614		dst_release(&rt->dst);
 615	}
 616	return 0;
 617}
 618#endif
 619
 620#define BACKTRACK(__net, saddr)			\
 621do { \
 622	if (rt == __net->ipv6.ip6_null_entry) {	\
 623		struct fib6_node *pn; \
 624		while (1) { \
 625			if (fn->fn_flags & RTN_TL_ROOT) \
 626				goto out; \
 627			pn = fn->parent; \
 628			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 629				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 630			else \
 631				fn = pn; \
 632			if (fn->fn_flags & RTN_RTINFO) \
 633				goto restart; \
 634		} \
 635	} \
 636} while(0)
 637
 638static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 639					     struct fib6_table *table,
 640					     struct flowi6 *fl6, int flags)
 641{
 642	struct fib6_node *fn;
 643	struct rt6_info *rt;
 644
 645	read_lock_bh(&table->tb6_lock);
 646	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 647restart:
 648	rt = fn->leaf;
 649	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 650	BACKTRACK(net, &fl6->saddr);
 651out:
 652	dst_use(&rt->dst, jiffies);
 653	read_unlock_bh(&table->tb6_lock);
 654	return rt;
 655
 656}
 657
 
 
 
 
 
 
 
 658struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 659			    const struct in6_addr *saddr, int oif, int strict)
 660{
 661	struct flowi6 fl6 = {
 662		.flowi6_oif = oif,
 663		.daddr = *daddr,
 664	};
 665	struct dst_entry *dst;
 666	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 667
 668	if (saddr) {
 669		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 670		flags |= RT6_LOOKUP_F_HAS_SADDR;
 671	}
 672
 673	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 674	if (dst->error == 0)
 675		return (struct rt6_info *) dst;
 676
 677	dst_release(dst);
 678
 679	return NULL;
 680}
 681
 682EXPORT_SYMBOL(rt6_lookup);
 683
 684/* ip6_ins_rt is called with FREE table->tb6_lock.
 685   It takes new route entry, the addition fails by any reason the
 686   route is freed. In any case, if caller does not hold it, it may
 687   be destroyed.
 688 */
 689
 690static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 691{
 692	int err;
 693	struct fib6_table *table;
 694
 695	table = rt->rt6i_table;
 696	write_lock_bh(&table->tb6_lock);
 697	err = fib6_add(&table->tb6_root, rt, info);
 698	write_unlock_bh(&table->tb6_lock);
 699
 700	return err;
 701}
 702
 703int ip6_ins_rt(struct rt6_info *rt)
 704{
 705	struct nl_info info = {
 706		.nl_net = dev_net(rt->rt6i_dev),
 707	};
 708	return __ip6_ins_rt(rt, &info);
 709}
 710
 711static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
 712				      const struct in6_addr *daddr,
 713				      const struct in6_addr *saddr)
 714{
 715	struct rt6_info *rt;
 716
 717	/*
 718	 *	Clone the route.
 719	 */
 720
 721	rt = ip6_rt_copy(ort, daddr);
 722
 723	if (rt) {
 724		struct neighbour *neigh;
 725		int attempts = !in_softirq();
 726
 727		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
 728			if (rt->rt6i_dst.plen != 128 &&
 729			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 730				rt->rt6i_flags |= RTF_ANYCAST;
 731			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
 732		}
 733
 734		rt->rt6i_flags |= RTF_CACHE;
 735
 736#ifdef CONFIG_IPV6_SUBTREES
 737		if (rt->rt6i_src.plen && saddr) {
 738			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
 739			rt->rt6i_src.plen = 128;
 740		}
 741#endif
 742
 743	retry:
 744		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 745		if (IS_ERR(neigh)) {
 746			struct net *net = dev_net(rt->rt6i_dev);
 747			int saved_rt_min_interval =
 748				net->ipv6.sysctl.ip6_rt_gc_min_interval;
 749			int saved_rt_elasticity =
 750				net->ipv6.sysctl.ip6_rt_gc_elasticity;
 751
 752			if (attempts-- > 0) {
 753				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
 754				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
 755
 756				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
 757
 758				net->ipv6.sysctl.ip6_rt_gc_elasticity =
 759					saved_rt_elasticity;
 760				net->ipv6.sysctl.ip6_rt_gc_min_interval =
 761					saved_rt_min_interval;
 762				goto retry;
 763			}
 764
 765			if (net_ratelimit())
 766				printk(KERN_WARNING
 767				       "ipv6: Neighbour table overflow.\n");
 768			dst_free(&rt->dst);
 769			return NULL;
 770		}
 771		dst_set_neighbour(&rt->dst, neigh);
 772
 773	}
 774
 775	return rt;
 776}
 777
 778static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 779					const struct in6_addr *daddr)
 780{
 781	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 782
 783	if (rt) {
 784		rt->rt6i_flags |= RTF_CACHE;
 785		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
 786	}
 787	return rt;
 788}
 789
 790static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 791				      struct flowi6 *fl6, int flags)
 792{
 793	struct fib6_node *fn;
 794	struct rt6_info *rt, *nrt;
 795	int strict = 0;
 796	int attempts = 3;
 797	int err;
 798	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 799
 800	strict |= flags & RT6_LOOKUP_F_IFACE;
 801
 802relookup:
 803	read_lock_bh(&table->tb6_lock);
 804
 805restart_2:
 806	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 807
 808restart:
 809	rt = rt6_select(fn, oif, strict | reachable);
 810
 811	BACKTRACK(net, &fl6->saddr);
 812	if (rt == net->ipv6.ip6_null_entry ||
 813	    rt->rt6i_flags & RTF_CACHE)
 814		goto out;
 815
 816	dst_hold(&rt->dst);
 817	read_unlock_bh(&table->tb6_lock);
 818
 819	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
 820		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 821	else if (!(rt->dst.flags & DST_HOST))
 822		nrt = rt6_alloc_clone(rt, &fl6->daddr);
 823	else
 824		goto out2;
 825
 826	dst_release(&rt->dst);
 827	rt = nrt ? : net->ipv6.ip6_null_entry;
 828
 829	dst_hold(&rt->dst);
 830	if (nrt) {
 831		err = ip6_ins_rt(nrt);
 832		if (!err)
 833			goto out2;
 834	}
 835
 836	if (--attempts <= 0)
 837		goto out2;
 838
 839	/*
 840	 * Race condition! In the gap, when table->tb6_lock was
 841	 * released someone could insert this route.  Relookup.
 842	 */
 843	dst_release(&rt->dst);
 844	goto relookup;
 845
 846out:
 847	if (reachable) {
 848		reachable = 0;
 849		goto restart_2;
 850	}
 851	dst_hold(&rt->dst);
 852	read_unlock_bh(&table->tb6_lock);
 853out2:
 854	rt->dst.lastuse = jiffies;
 855	rt->dst.__use++;
 856
 857	return rt;
 858}
 859
 860static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 861					    struct flowi6 *fl6, int flags)
 862{
 863	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 864}
 865
 
 
 
 
 
 
 
 
 
 
 866void ip6_route_input(struct sk_buff *skb)
 867{
 868	const struct ipv6hdr *iph = ipv6_hdr(skb);
 869	struct net *net = dev_net(skb->dev);
 870	int flags = RT6_LOOKUP_F_HAS_SADDR;
 871	struct flowi6 fl6 = {
 872		.flowi6_iif = skb->dev->ifindex,
 873		.daddr = iph->daddr,
 874		.saddr = iph->saddr,
 875		.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
 876		.flowi6_mark = skb->mark,
 877		.flowi6_proto = iph->nexthdr,
 878	};
 879
 880	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
 881		flags |= RT6_LOOKUP_F_IFACE;
 882
 883	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
 884}
 885
 886static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 887					     struct flowi6 *fl6, int flags)
 888{
 889	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 890}
 891
 892struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 893				    struct flowi6 *fl6)
 894{
 895	int flags = 0;
 896
 897	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 898		flags |= RT6_LOOKUP_F_IFACE;
 899
 900	if (!ipv6_addr_any(&fl6->saddr))
 901		flags |= RT6_LOOKUP_F_HAS_SADDR;
 902	else if (sk)
 903		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 904
 905	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 906}
 907
 908EXPORT_SYMBOL(ip6_route_output);
 909
 910struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 911{
 912	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 913	struct dst_entry *new = NULL;
 914
 915	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
 916	if (rt) {
 917		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
 918
 919		new = &rt->dst;
 920
 921		new->__use = 1;
 922		new->input = dst_discard;
 923		new->output = dst_discard;
 924
 925		if (dst_metrics_read_only(&ort->dst))
 926			new->_metrics = ort->dst._metrics;
 927		else
 928			dst_copy_metrics(new, &ort->dst);
 929		rt->rt6i_idev = ort->rt6i_idev;
 930		if (rt->rt6i_idev)
 931			in6_dev_hold(rt->rt6i_idev);
 932		rt->rt6i_expires = 0;
 933
 934		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
 935		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 
 936		rt->rt6i_metric = 0;
 937
 938		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 939#ifdef CONFIG_IPV6_SUBTREES
 940		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 941#endif
 942
 943		dst_free(new);
 944	}
 945
 946	dst_release(dst_orig);
 947	return new ? new : ERR_PTR(-ENOMEM);
 948}
 949
 950/*
 951 *	Destination cache support functions
 952 */
 953
 954static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 955{
 956	struct rt6_info *rt;
 957
 958	rt = (struct rt6_info *) dst;
 959
 960	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
 961		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
 962			if (!rt->rt6i_peer)
 963				rt6_bind_peer(rt, 0);
 964			rt->rt6i_peer_genid = rt6_peer_genid();
 965		}
 966		return dst;
 967	}
 968	return NULL;
 969}
 970
 971static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 972{
 973	struct rt6_info *rt = (struct rt6_info *) dst;
 974
 975	if (rt) {
 976		if (rt->rt6i_flags & RTF_CACHE) {
 977			if (rt6_check_expired(rt)) {
 978				ip6_del_rt(rt);
 979				dst = NULL;
 980			}
 981		} else {
 982			dst_release(dst);
 983			dst = NULL;
 984		}
 985	}
 986	return dst;
 987}
 988
 989static void ip6_link_failure(struct sk_buff *skb)
 990{
 991	struct rt6_info *rt;
 992
 993	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
 994
 995	rt = (struct rt6_info *) skb_dst(skb);
 996	if (rt) {
 997		if (rt->rt6i_flags&RTF_CACHE) {
 998			dst_set_expires(&rt->dst, 0);
 999			rt->rt6i_flags |= RTF_EXPIRES;
1000		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1001			rt->rt6i_node->fn_sernum = -1;
1002	}
1003}
1004
1005static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1006{
1007	struct rt6_info *rt6 = (struct rt6_info*)dst;
1008
1009	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1010		rt6->rt6i_flags |= RTF_MODIFIED;
1011		if (mtu < IPV6_MIN_MTU) {
1012			u32 features = dst_metric(dst, RTAX_FEATURES);
1013			mtu = IPV6_MIN_MTU;
1014			features |= RTAX_FEATURE_ALLFRAG;
1015			dst_metric_set(dst, RTAX_FEATURES, features);
1016		}
1017		dst_metric_set(dst, RTAX_MTU, mtu);
1018	}
1019}
1020
1021static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1022{
1023	struct net_device *dev = dst->dev;
1024	unsigned int mtu = dst_mtu(dst);
1025	struct net *net = dev_net(dev);
1026
1027	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1028
1029	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1030		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1031
1032	/*
1033	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1034	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1035	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1036	 * rely only on pmtu discovery"
1037	 */
1038	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1039		mtu = IPV6_MAXPLEN;
1040	return mtu;
1041}
1042
1043static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1044{
1045	unsigned int mtu = IPV6_MIN_MTU;
1046	struct inet6_dev *idev;
 
 
 
 
 
 
1047
1048	rcu_read_lock();
1049	idev = __in6_dev_get(dst->dev);
1050	if (idev)
1051		mtu = idev->cnf.mtu6;
1052	rcu_read_unlock();
1053
1054	return mtu;
1055}
1056
1057static struct dst_entry *icmp6_dst_gc_list;
1058static DEFINE_SPINLOCK(icmp6_dst_lock);
1059
1060struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1061				  struct neighbour *neigh,
1062				  const struct in6_addr *addr)
1063{
 
1064	struct rt6_info *rt;
1065	struct inet6_dev *idev = in6_dev_get(dev);
1066	struct net *net = dev_net(dev);
1067
1068	if (unlikely(idev == NULL))
1069		return NULL;
1070
1071	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1072	if (unlikely(rt == NULL)) {
1073		in6_dev_put(idev);
 
1074		goto out;
1075	}
1076
1077	if (neigh)
1078		neigh_hold(neigh);
1079	else {
1080		neigh = ndisc_get_neigh(dev, addr);
1081		if (IS_ERR(neigh))
1082			neigh = NULL;
 
 
 
1083	}
1084
1085	rt->dst.flags |= DST_HOST;
1086	rt->dst.output  = ip6_output;
1087	dst_set_neighbour(&rt->dst, neigh);
1088	atomic_set(&rt->dst.__refcnt, 1);
1089	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1090
1091	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1092	rt->rt6i_dst.plen = 128;
1093	rt->rt6i_idev     = idev;
 
1094
1095	spin_lock_bh(&icmp6_dst_lock);
1096	rt->dst.next = icmp6_dst_gc_list;
1097	icmp6_dst_gc_list = &rt->dst;
1098	spin_unlock_bh(&icmp6_dst_lock);
1099
1100	fib6_force_start_gc(net);
1101
 
 
1102out:
1103	return &rt->dst;
1104}
1105
1106int icmp6_dst_gc(void)
1107{
1108	struct dst_entry *dst, **pprev;
1109	int more = 0;
1110
1111	spin_lock_bh(&icmp6_dst_lock);
1112	pprev = &icmp6_dst_gc_list;
1113
1114	while ((dst = *pprev) != NULL) {
1115		if (!atomic_read(&dst->__refcnt)) {
1116			*pprev = dst->next;
1117			dst_free(dst);
1118		} else {
1119			pprev = &dst->next;
1120			++more;
1121		}
1122	}
1123
1124	spin_unlock_bh(&icmp6_dst_lock);
1125
1126	return more;
1127}
1128
1129static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1130			    void *arg)
1131{
1132	struct dst_entry *dst, **pprev;
1133
1134	spin_lock_bh(&icmp6_dst_lock);
1135	pprev = &icmp6_dst_gc_list;
1136	while ((dst = *pprev) != NULL) {
1137		struct rt6_info *rt = (struct rt6_info *) dst;
1138		if (func(rt, arg)) {
1139			*pprev = dst->next;
1140			dst_free(dst);
1141		} else {
1142			pprev = &dst->next;
1143		}
1144	}
1145	spin_unlock_bh(&icmp6_dst_lock);
1146}
1147
1148static int ip6_dst_gc(struct dst_ops *ops)
1149{
1150	unsigned long now = jiffies;
1151	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1152	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1153	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1154	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1155	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1156	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1157	int entries;
1158
1159	entries = dst_entries_get_fast(ops);
1160	if (time_after(rt_last_gc + rt_min_interval, now) &&
1161	    entries <= rt_max_size)
1162		goto out;
1163
1164	net->ipv6.ip6_rt_gc_expire++;
1165	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1166	net->ipv6.ip6_rt_last_gc = now;
1167	entries = dst_entries_get_slow(ops);
1168	if (entries < ops->gc_thresh)
1169		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1170out:
1171	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1172	return entries > rt_max_size;
1173}
1174
1175/* Clean host part of a prefix. Not necessary in radix tree,
1176   but results in cleaner routing tables.
1177
1178   Remove it only when all the things will work!
1179 */
1180
1181int ip6_dst_hoplimit(struct dst_entry *dst)
1182{
1183	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1184	if (hoplimit == 0) {
1185		struct net_device *dev = dst->dev;
1186		struct inet6_dev *idev;
1187
1188		rcu_read_lock();
1189		idev = __in6_dev_get(dev);
1190		if (idev)
1191			hoplimit = idev->cnf.hop_limit;
1192		else
1193			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1194		rcu_read_unlock();
1195	}
1196	return hoplimit;
1197}
1198EXPORT_SYMBOL(ip6_dst_hoplimit);
1199
1200/*
1201 *
1202 */
1203
1204int ip6_route_add(struct fib6_config *cfg)
1205{
1206	int err;
1207	struct net *net = cfg->fc_nlinfo.nl_net;
1208	struct rt6_info *rt = NULL;
1209	struct net_device *dev = NULL;
1210	struct inet6_dev *idev = NULL;
1211	struct fib6_table *table;
1212	int addr_type;
1213
1214	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1215		return -EINVAL;
1216#ifndef CONFIG_IPV6_SUBTREES
1217	if (cfg->fc_src_len)
1218		return -EINVAL;
1219#endif
1220	if (cfg->fc_ifindex) {
1221		err = -ENODEV;
1222		dev = dev_get_by_index(net, cfg->fc_ifindex);
1223		if (!dev)
1224			goto out;
1225		idev = in6_dev_get(dev);
1226		if (!idev)
1227			goto out;
1228	}
1229
1230	if (cfg->fc_metric == 0)
1231		cfg->fc_metric = IP6_RT_PRIO_USER;
1232
1233	table = fib6_new_table(net, cfg->fc_table);
1234	if (table == NULL) {
1235		err = -ENOBUFS;
1236		goto out;
 
 
 
 
 
 
1237	}
1238
 
 
 
1239	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1240
1241	if (rt == NULL) {
1242		err = -ENOMEM;
1243		goto out;
1244	}
1245
1246	rt->dst.obsolete = -1;
1247	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1248				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1249				0;
 
 
 
1250
1251	if (cfg->fc_protocol == RTPROT_UNSPEC)
1252		cfg->fc_protocol = RTPROT_BOOT;
1253	rt->rt6i_protocol = cfg->fc_protocol;
1254
1255	addr_type = ipv6_addr_type(&cfg->fc_dst);
1256
1257	if (addr_type & IPV6_ADDR_MULTICAST)
1258		rt->dst.input = ip6_mc_input;
1259	else if (cfg->fc_flags & RTF_LOCAL)
1260		rt->dst.input = ip6_input;
1261	else
1262		rt->dst.input = ip6_forward;
1263
1264	rt->dst.output = ip6_output;
1265
1266	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1267	rt->rt6i_dst.plen = cfg->fc_dst_len;
1268	if (rt->rt6i_dst.plen == 128)
1269	       rt->dst.flags |= DST_HOST;
1270
1271	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1272		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1273		if (!metrics) {
1274			err = -ENOMEM;
1275			goto out;
1276		}
1277		dst_init_metrics(&rt->dst, metrics, 0);
1278	}
1279#ifdef CONFIG_IPV6_SUBTREES
1280	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1281	rt->rt6i_src.plen = cfg->fc_src_len;
1282#endif
1283
1284	rt->rt6i_metric = cfg->fc_metric;
1285
1286	/* We cannot add true routes via loopback here,
1287	   they would result in kernel looping; promote them to reject routes
1288	 */
1289	if ((cfg->fc_flags & RTF_REJECT) ||
1290	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1291					      && !(cfg->fc_flags&RTF_LOCAL))) {
 
1292		/* hold loopback dev/idev if we haven't done so. */
1293		if (dev != net->loopback_dev) {
1294			if (dev) {
1295				dev_put(dev);
1296				in6_dev_put(idev);
1297			}
1298			dev = net->loopback_dev;
1299			dev_hold(dev);
1300			idev = in6_dev_get(dev);
1301			if (!idev) {
1302				err = -ENODEV;
1303				goto out;
1304			}
1305		}
1306		rt->dst.output = ip6_pkt_discard_out;
1307		rt->dst.input = ip6_pkt_discard;
1308		rt->dst.error = -ENETUNREACH;
1309		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1310		goto install_route;
1311	}
1312
1313	if (cfg->fc_flags & RTF_GATEWAY) {
1314		const struct in6_addr *gw_addr;
1315		int gwa_type;
1316
1317		gw_addr = &cfg->fc_gateway;
1318		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1319		gwa_type = ipv6_addr_type(gw_addr);
1320
1321		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1322			struct rt6_info *grt;
1323
1324			/* IPv6 strictly inhibits using not link-local
1325			   addresses as nexthop address.
1326			   Otherwise, router will not able to send redirects.
1327			   It is very good, but in some (rare!) circumstances
1328			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1329			   some exceptions. --ANK
1330			 */
1331			err = -EINVAL;
1332			if (!(gwa_type&IPV6_ADDR_UNICAST))
1333				goto out;
1334
1335			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1336
1337			err = -EHOSTUNREACH;
1338			if (grt == NULL)
1339				goto out;
1340			if (dev) {
1341				if (dev != grt->rt6i_dev) {
1342					dst_release(&grt->dst);
1343					goto out;
1344				}
1345			} else {
1346				dev = grt->rt6i_dev;
1347				idev = grt->rt6i_idev;
1348				dev_hold(dev);
1349				in6_dev_hold(grt->rt6i_idev);
1350			}
1351			if (!(grt->rt6i_flags&RTF_GATEWAY))
1352				err = 0;
1353			dst_release(&grt->dst);
1354
1355			if (err)
1356				goto out;
1357		}
1358		err = -EINVAL;
1359		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1360			goto out;
1361	}
1362
1363	err = -ENODEV;
1364	if (dev == NULL)
1365		goto out;
1366
1367	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1368		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1369			err = -EINVAL;
1370			goto out;
1371		}
1372		ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1373		rt->rt6i_prefsrc.plen = 128;
1374	} else
1375		rt->rt6i_prefsrc.plen = 0;
1376
1377	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1378		struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1379		if (IS_ERR(n)) {
1380			err = PTR_ERR(n);
1381			goto out;
1382		}
1383		dst_set_neighbour(&rt->dst, n);
1384	}
1385
1386	rt->rt6i_flags = cfg->fc_flags;
1387
1388install_route:
1389	if (cfg->fc_mx) {
1390		struct nlattr *nla;
1391		int remaining;
1392
1393		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1394			int type = nla_type(nla);
1395
1396			if (type) {
1397				if (type > RTAX_MAX) {
1398					err = -EINVAL;
1399					goto out;
1400				}
1401
1402				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1403			}
1404		}
1405	}
1406
1407	rt->dst.dev = dev;
1408	rt->rt6i_idev = idev;
1409	rt->rt6i_table = table;
1410
1411	cfg->fc_nlinfo.nl_net = dev_net(dev);
1412
1413	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1414
1415out:
1416	if (dev)
1417		dev_put(dev);
1418	if (idev)
1419		in6_dev_put(idev);
1420	if (rt)
1421		dst_free(&rt->dst);
1422	return err;
1423}
1424
1425static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1426{
1427	int err;
1428	struct fib6_table *table;
1429	struct net *net = dev_net(rt->rt6i_dev);
1430
1431	if (rt == net->ipv6.ip6_null_entry)
1432		return -ENOENT;
1433
1434	table = rt->rt6i_table;
1435	write_lock_bh(&table->tb6_lock);
1436
1437	err = fib6_del(rt, info);
1438	dst_release(&rt->dst);
1439
1440	write_unlock_bh(&table->tb6_lock);
1441
1442	return err;
1443}
1444
1445int ip6_del_rt(struct rt6_info *rt)
1446{
1447	struct nl_info info = {
1448		.nl_net = dev_net(rt->rt6i_dev),
1449	};
1450	return __ip6_del_rt(rt, &info);
1451}
1452
1453static int ip6_route_del(struct fib6_config *cfg)
1454{
1455	struct fib6_table *table;
1456	struct fib6_node *fn;
1457	struct rt6_info *rt;
1458	int err = -ESRCH;
1459
1460	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1461	if (table == NULL)
1462		return err;
1463
1464	read_lock_bh(&table->tb6_lock);
1465
1466	fn = fib6_locate(&table->tb6_root,
1467			 &cfg->fc_dst, cfg->fc_dst_len,
1468			 &cfg->fc_src, cfg->fc_src_len);
1469
1470	if (fn) {
1471		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1472			if (cfg->fc_ifindex &&
1473			    (rt->rt6i_dev == NULL ||
1474			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1475				continue;
1476			if (cfg->fc_flags & RTF_GATEWAY &&
1477			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1478				continue;
1479			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1480				continue;
1481			dst_hold(&rt->dst);
1482			read_unlock_bh(&table->tb6_lock);
1483
1484			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1485		}
1486	}
1487	read_unlock_bh(&table->tb6_lock);
1488
1489	return err;
1490}
1491
1492/*
1493 *	Handle redirects
1494 */
1495struct ip6rd_flowi {
1496	struct flowi6 fl6;
1497	struct in6_addr gateway;
1498};
1499
1500static struct rt6_info *__ip6_route_redirect(struct net *net,
1501					     struct fib6_table *table,
1502					     struct flowi6 *fl6,
1503					     int flags)
1504{
1505	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1506	struct rt6_info *rt;
1507	struct fib6_node *fn;
1508
1509	/*
1510	 * Get the "current" route for this destination and
1511	 * check if the redirect has come from approriate router.
1512	 *
1513	 * RFC 2461 specifies that redirects should only be
1514	 * accepted if they come from the nexthop to the target.
1515	 * Due to the way the routes are chosen, this notion
1516	 * is a bit fuzzy and one might need to check all possible
1517	 * routes.
1518	 */
1519
1520	read_lock_bh(&table->tb6_lock);
1521	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1522restart:
1523	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1524		/*
1525		 * Current route is on-link; redirect is always invalid.
1526		 *
1527		 * Seems, previous statement is not true. It could
1528		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1529		 * But then router serving it might decide, that we should
1530		 * know truth 8)8) --ANK (980726).
1531		 */
1532		if (rt6_check_expired(rt))
1533			continue;
1534		if (!(rt->rt6i_flags & RTF_GATEWAY))
1535			continue;
1536		if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1537			continue;
1538		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1539			continue;
1540		break;
1541	}
1542
1543	if (!rt)
1544		rt = net->ipv6.ip6_null_entry;
1545	BACKTRACK(net, &fl6->saddr);
1546out:
1547	dst_hold(&rt->dst);
1548
1549	read_unlock_bh(&table->tb6_lock);
1550
1551	return rt;
1552};
1553
1554static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1555					   const struct in6_addr *src,
1556					   const struct in6_addr *gateway,
1557					   struct net_device *dev)
1558{
1559	int flags = RT6_LOOKUP_F_HAS_SADDR;
1560	struct net *net = dev_net(dev);
1561	struct ip6rd_flowi rdfl = {
1562		.fl6 = {
1563			.flowi6_oif = dev->ifindex,
1564			.daddr = *dest,
1565			.saddr = *src,
1566		},
1567	};
1568
1569	ipv6_addr_copy(&rdfl.gateway, gateway);
1570
1571	if (rt6_need_strict(dest))
1572		flags |= RT6_LOOKUP_F_IFACE;
1573
1574	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1575						   flags, __ip6_route_redirect);
1576}
1577
1578void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1579		  const struct in6_addr *saddr,
1580		  struct neighbour *neigh, u8 *lladdr, int on_link)
1581{
1582	struct rt6_info *rt, *nrt = NULL;
1583	struct netevent_redirect netevent;
1584	struct net *net = dev_net(neigh->dev);
1585
1586	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1587
1588	if (rt == net->ipv6.ip6_null_entry) {
1589		if (net_ratelimit())
1590			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1591			       "for redirect target\n");
1592		goto out;
1593	}
1594
1595	/*
1596	 *	We have finally decided to accept it.
1597	 */
1598
1599	neigh_update(neigh, lladdr, NUD_STALE,
1600		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1601		     NEIGH_UPDATE_F_OVERRIDE|
1602		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1603				     NEIGH_UPDATE_F_ISROUTER))
1604		     );
1605
1606	/*
1607	 * Redirect received -> path was valid.
1608	 * Look, redirects are sent only in response to data packets,
1609	 * so that this nexthop apparently is reachable. --ANK
1610	 */
1611	dst_confirm(&rt->dst);
1612
1613	/* Duplicate redirect: silently ignore. */
1614	if (neigh == dst_get_neighbour_raw(&rt->dst))
1615		goto out;
1616
1617	nrt = ip6_rt_copy(rt, dest);
1618	if (nrt == NULL)
1619		goto out;
1620
1621	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1622	if (on_link)
1623		nrt->rt6i_flags &= ~RTF_GATEWAY;
1624
1625	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1626	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1627
1628	if (ip6_ins_rt(nrt))
1629		goto out;
1630
1631	netevent.old = &rt->dst;
1632	netevent.new = &nrt->dst;
1633	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1634
1635	if (rt->rt6i_flags&RTF_CACHE) {
1636		ip6_del_rt(rt);
1637		return;
1638	}
1639
1640out:
1641	dst_release(&rt->dst);
1642}
1643
1644/*
1645 *	Handle ICMP "packet too big" messages
1646 *	i.e. Path MTU discovery
1647 */
1648
1649static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1650			     struct net *net, u32 pmtu, int ifindex)
1651{
1652	struct rt6_info *rt, *nrt;
1653	int allfrag = 0;
1654again:
1655	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1656	if (rt == NULL)
1657		return;
1658
1659	if (rt6_check_expired(rt)) {
1660		ip6_del_rt(rt);
1661		goto again;
1662	}
1663
1664	if (pmtu >= dst_mtu(&rt->dst))
1665		goto out;
1666
1667	if (pmtu < IPV6_MIN_MTU) {
1668		/*
1669		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1670		 * MTU (1280) and a fragment header should always be included
1671		 * after a node receiving Too Big message reporting PMTU is
1672		 * less than the IPv6 Minimum Link MTU.
1673		 */
1674		pmtu = IPV6_MIN_MTU;
1675		allfrag = 1;
1676	}
1677
1678	/* New mtu received -> path was valid.
1679	   They are sent only in response to data packets,
1680	   so that this nexthop apparently is reachable. --ANK
1681	 */
1682	dst_confirm(&rt->dst);
1683
1684	/* Host route. If it is static, it would be better
1685	   not to override it, but add new one, so that
1686	   when cache entry will expire old pmtu
1687	   would return automatically.
1688	 */
1689	if (rt->rt6i_flags & RTF_CACHE) {
1690		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1691		if (allfrag) {
1692			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1693			features |= RTAX_FEATURE_ALLFRAG;
1694			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1695		}
1696		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1697		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1698		goto out;
1699	}
1700
1701	/* Network route.
1702	   Two cases are possible:
1703	   1. It is connected route. Action: COW
1704	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1705	 */
1706	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1707		nrt = rt6_alloc_cow(rt, daddr, saddr);
1708	else
1709		nrt = rt6_alloc_clone(rt, daddr);
1710
1711	if (nrt) {
1712		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1713		if (allfrag) {
1714			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1715			features |= RTAX_FEATURE_ALLFRAG;
1716			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1717		}
1718
1719		/* According to RFC 1981, detecting PMTU increase shouldn't be
1720		 * happened within 5 mins, the recommended timer is 10 mins.
1721		 * Here this route expiration time is set to ip6_rt_mtu_expires
1722		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1723		 * and detecting PMTU increase will be automatically happened.
1724		 */
1725		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1726		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1727
1728		ip6_ins_rt(nrt);
1729	}
1730out:
1731	dst_release(&rt->dst);
1732}
1733
1734void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1735			struct net_device *dev, u32 pmtu)
1736{
1737	struct net *net = dev_net(dev);
1738
1739	/*
1740	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1741	 * is sending along the path" that caused the Packet Too Big message.
1742	 * Since it's not possible in the general case to determine which
1743	 * interface was used to send the original packet, we update the MTU
1744	 * on the interface that will be used to send future packets. We also
1745	 * update the MTU on the interface that received the Packet Too Big in
1746	 * case the original packet was forced out that interface with
1747	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1748	 * correct behaviour, which would be to update the MTU on all
1749	 * interfaces.
1750	 */
1751	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1752	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1753}
1754
1755/*
1756 *	Misc support functions
1757 */
1758
1759static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1760				    const struct in6_addr *dest)
1761{
1762	struct net *net = dev_net(ort->rt6i_dev);
1763	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1764					    ort->dst.dev, 0);
1765
1766	if (rt) {
1767		rt->dst.input = ort->dst.input;
1768		rt->dst.output = ort->dst.output;
1769		rt->dst.flags |= DST_HOST;
1770
1771		ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1772		rt->rt6i_dst.plen = 128;
1773		dst_copy_metrics(&rt->dst, &ort->dst);
1774		rt->dst.error = ort->dst.error;
1775		rt->rt6i_idev = ort->rt6i_idev;
1776		if (rt->rt6i_idev)
1777			in6_dev_hold(rt->rt6i_idev);
1778		rt->dst.lastuse = jiffies;
1779		rt->rt6i_expires = 0;
1780
1781		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1782		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 
 
 
 
 
1783		rt->rt6i_metric = 0;
1784
1785#ifdef CONFIG_IPV6_SUBTREES
1786		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1787#endif
1788		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1789		rt->rt6i_table = ort->rt6i_table;
1790	}
1791	return rt;
1792}
1793
1794#ifdef CONFIG_IPV6_ROUTE_INFO
1795static struct rt6_info *rt6_get_route_info(struct net *net,
1796					   const struct in6_addr *prefix, int prefixlen,
1797					   const struct in6_addr *gwaddr, int ifindex)
1798{
1799	struct fib6_node *fn;
1800	struct rt6_info *rt = NULL;
1801	struct fib6_table *table;
1802
1803	table = fib6_get_table(net, RT6_TABLE_INFO);
1804	if (table == NULL)
1805		return NULL;
1806
1807	write_lock_bh(&table->tb6_lock);
1808	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1809	if (!fn)
1810		goto out;
1811
1812	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1813		if (rt->rt6i_dev->ifindex != ifindex)
1814			continue;
1815		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1816			continue;
1817		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1818			continue;
1819		dst_hold(&rt->dst);
1820		break;
1821	}
1822out:
1823	write_unlock_bh(&table->tb6_lock);
1824	return rt;
1825}
1826
1827static struct rt6_info *rt6_add_route_info(struct net *net,
1828					   const struct in6_addr *prefix, int prefixlen,
1829					   const struct in6_addr *gwaddr, int ifindex,
1830					   unsigned pref)
1831{
1832	struct fib6_config cfg = {
1833		.fc_table	= RT6_TABLE_INFO,
1834		.fc_metric	= IP6_RT_PRIO_USER,
1835		.fc_ifindex	= ifindex,
1836		.fc_dst_len	= prefixlen,
1837		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1838				  RTF_UP | RTF_PREF(pref),
1839		.fc_nlinfo.pid = 0,
1840		.fc_nlinfo.nlh = NULL,
1841		.fc_nlinfo.nl_net = net,
1842	};
1843
1844	ipv6_addr_copy(&cfg.fc_dst, prefix);
1845	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1846
1847	/* We should treat it as a default route if prefix length is 0. */
1848	if (!prefixlen)
1849		cfg.fc_flags |= RTF_DEFAULT;
1850
1851	ip6_route_add(&cfg);
1852
1853	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1854}
1855#endif
1856
1857struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1858{
1859	struct rt6_info *rt;
1860	struct fib6_table *table;
1861
1862	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1863	if (table == NULL)
1864		return NULL;
1865
1866	write_lock_bh(&table->tb6_lock);
1867	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1868		if (dev == rt->rt6i_dev &&
1869		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1870		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1871			break;
1872	}
1873	if (rt)
1874		dst_hold(&rt->dst);
1875	write_unlock_bh(&table->tb6_lock);
1876	return rt;
1877}
1878
1879struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1880				     struct net_device *dev,
1881				     unsigned int pref)
1882{
1883	struct fib6_config cfg = {
1884		.fc_table	= RT6_TABLE_DFLT,
1885		.fc_metric	= IP6_RT_PRIO_USER,
1886		.fc_ifindex	= dev->ifindex,
1887		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1888				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1889		.fc_nlinfo.pid = 0,
1890		.fc_nlinfo.nlh = NULL,
1891		.fc_nlinfo.nl_net = dev_net(dev),
1892	};
1893
1894	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1895
1896	ip6_route_add(&cfg);
1897
1898	return rt6_get_dflt_router(gwaddr, dev);
1899}
1900
1901void rt6_purge_dflt_routers(struct net *net)
1902{
1903	struct rt6_info *rt;
1904	struct fib6_table *table;
1905
1906	/* NOTE: Keep consistent with rt6_get_dflt_router */
1907	table = fib6_get_table(net, RT6_TABLE_DFLT);
1908	if (table == NULL)
1909		return;
1910
1911restart:
1912	read_lock_bh(&table->tb6_lock);
1913	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1914		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1915			dst_hold(&rt->dst);
1916			read_unlock_bh(&table->tb6_lock);
1917			ip6_del_rt(rt);
1918			goto restart;
1919		}
1920	}
1921	read_unlock_bh(&table->tb6_lock);
1922}
1923
1924static void rtmsg_to_fib6_config(struct net *net,
1925				 struct in6_rtmsg *rtmsg,
1926				 struct fib6_config *cfg)
1927{
1928	memset(cfg, 0, sizeof(*cfg));
1929
1930	cfg->fc_table = RT6_TABLE_MAIN;
1931	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1932	cfg->fc_metric = rtmsg->rtmsg_metric;
1933	cfg->fc_expires = rtmsg->rtmsg_info;
1934	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1935	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1936	cfg->fc_flags = rtmsg->rtmsg_flags;
1937
1938	cfg->fc_nlinfo.nl_net = net;
1939
1940	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1941	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1942	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1943}
1944
1945int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1946{
1947	struct fib6_config cfg;
1948	struct in6_rtmsg rtmsg;
1949	int err;
1950
1951	switch(cmd) {
1952	case SIOCADDRT:		/* Add a route */
1953	case SIOCDELRT:		/* Delete a route */
1954		if (!capable(CAP_NET_ADMIN))
1955			return -EPERM;
1956		err = copy_from_user(&rtmsg, arg,
1957				     sizeof(struct in6_rtmsg));
1958		if (err)
1959			return -EFAULT;
1960
1961		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1962
1963		rtnl_lock();
1964		switch (cmd) {
1965		case SIOCADDRT:
1966			err = ip6_route_add(&cfg);
1967			break;
1968		case SIOCDELRT:
1969			err = ip6_route_del(&cfg);
1970			break;
1971		default:
1972			err = -EINVAL;
1973		}
1974		rtnl_unlock();
1975
1976		return err;
1977	}
1978
1979	return -EINVAL;
1980}
1981
1982/*
1983 *	Drop the packet on the floor
1984 */
1985
1986static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1987{
1988	int type;
1989	struct dst_entry *dst = skb_dst(skb);
1990	switch (ipstats_mib_noroutes) {
1991	case IPSTATS_MIB_INNOROUTES:
1992		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1993		if (type == IPV6_ADDR_ANY) {
1994			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1995				      IPSTATS_MIB_INADDRERRORS);
1996			break;
1997		}
1998		/* FALLTHROUGH */
1999	case IPSTATS_MIB_OUTNOROUTES:
2000		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2001			      ipstats_mib_noroutes);
2002		break;
2003	}
2004	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2005	kfree_skb(skb);
2006	return 0;
2007}
2008
2009static int ip6_pkt_discard(struct sk_buff *skb)
2010{
2011	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2012}
2013
2014static int ip6_pkt_discard_out(struct sk_buff *skb)
2015{
2016	skb->dev = skb_dst(skb)->dev;
2017	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2018}
2019
2020#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2021
2022static int ip6_pkt_prohibit(struct sk_buff *skb)
2023{
2024	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2025}
2026
2027static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2028{
2029	skb->dev = skb_dst(skb)->dev;
2030	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2031}
2032
2033#endif
2034
2035/*
2036 *	Allocate a dst for local (unicast / anycast) address.
2037 */
2038
2039struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2040				    const struct in6_addr *addr,
2041				    int anycast)
2042{
2043	struct net *net = dev_net(idev->dev);
2044	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2045					    net->loopback_dev, 0);
2046	struct neighbour *neigh;
2047
2048	if (rt == NULL) {
2049		if (net_ratelimit())
2050			pr_warning("IPv6:  Maximum number of routes reached,"
2051				   " consider increasing route/max_size.\n");
2052		return ERR_PTR(-ENOMEM);
2053	}
2054
2055	in6_dev_hold(idev);
2056
2057	rt->dst.flags |= DST_HOST;
2058	rt->dst.input = ip6_input;
2059	rt->dst.output = ip6_output;
2060	rt->rt6i_idev = idev;
2061	rt->dst.obsolete = -1;
2062
2063	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2064	if (anycast)
2065		rt->rt6i_flags |= RTF_ANYCAST;
2066	else
2067		rt->rt6i_flags |= RTF_LOCAL;
2068	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2069	if (IS_ERR(neigh)) {
2070		dst_free(&rt->dst);
2071
2072		return ERR_CAST(neigh);
2073	}
2074	dst_set_neighbour(&rt->dst, neigh);
2075
2076	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2077	rt->rt6i_dst.plen = 128;
2078	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2079
2080	atomic_set(&rt->dst.__refcnt, 1);
2081
2082	return rt;
2083}
2084
2085int ip6_route_get_saddr(struct net *net,
2086			struct rt6_info *rt,
2087			const struct in6_addr *daddr,
2088			unsigned int prefs,
2089			struct in6_addr *saddr)
2090{
2091	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2092	int err = 0;
2093	if (rt->rt6i_prefsrc.plen)
2094		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2095	else
2096		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2097					 daddr, prefs, saddr);
2098	return err;
2099}
2100
2101/* remove deleted ip from prefsrc entries */
2102struct arg_dev_net_ip {
2103	struct net_device *dev;
2104	struct net *net;
2105	struct in6_addr *addr;
2106};
2107
2108static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2109{
2110	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2111	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2112	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2113
2114	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2115	    rt != net->ipv6.ip6_null_entry &&
2116	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2117		/* remove prefsrc entry */
2118		rt->rt6i_prefsrc.plen = 0;
2119	}
2120	return 0;
2121}
2122
2123void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2124{
2125	struct net *net = dev_net(ifp->idev->dev);
2126	struct arg_dev_net_ip adni = {
2127		.dev = ifp->idev->dev,
2128		.net = net,
2129		.addr = &ifp->addr,
2130	};
2131	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2132}
2133
2134struct arg_dev_net {
2135	struct net_device *dev;
2136	struct net *net;
2137};
2138
2139static int fib6_ifdown(struct rt6_info *rt, void *arg)
2140{
2141	const struct arg_dev_net *adn = arg;
2142	const struct net_device *dev = adn->dev;
2143
2144	if ((rt->rt6i_dev == dev || dev == NULL) &&
2145	    rt != adn->net->ipv6.ip6_null_entry) {
2146		RT6_TRACE("deleted by ifdown %p\n", rt);
2147		return -1;
2148	}
2149	return 0;
2150}
2151
2152void rt6_ifdown(struct net *net, struct net_device *dev)
2153{
2154	struct arg_dev_net adn = {
2155		.dev = dev,
2156		.net = net,
2157	};
2158
2159	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2160	icmp6_clean_all(fib6_ifdown, &adn);
2161}
2162
2163struct rt6_mtu_change_arg
2164{
2165	struct net_device *dev;
2166	unsigned mtu;
2167};
2168
2169static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2170{
2171	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2172	struct inet6_dev *idev;
2173
2174	/* In IPv6 pmtu discovery is not optional,
2175	   so that RTAX_MTU lock cannot disable it.
2176	   We still use this lock to block changes
2177	   caused by addrconf/ndisc.
2178	*/
2179
2180	idev = __in6_dev_get(arg->dev);
2181	if (idev == NULL)
2182		return 0;
2183
2184	/* For administrative MTU increase, there is no way to discover
2185	   IPv6 PMTU increase, so PMTU increase should be updated here.
2186	   Since RFC 1981 doesn't include administrative MTU increase
2187	   update PMTU increase is a MUST. (i.e. jumbo frame)
2188	 */
2189	/*
2190	   If new MTU is less than route PMTU, this new MTU will be the
2191	   lowest MTU in the path, update the route PMTU to reflect PMTU
2192	   decreases; if new MTU is greater than route PMTU, and the
2193	   old MTU is the lowest MTU in the path, update the route PMTU
2194	   to reflect the increase. In this case if the other nodes' MTU
2195	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2196	   PMTU discouvery.
2197	 */
2198	if (rt->rt6i_dev == arg->dev &&
2199	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2200	    (dst_mtu(&rt->dst) >= arg->mtu ||
2201	     (dst_mtu(&rt->dst) < arg->mtu &&
2202	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2203		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2204	}
2205	return 0;
2206}
2207
2208void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2209{
2210	struct rt6_mtu_change_arg arg = {
2211		.dev = dev,
2212		.mtu = mtu,
2213	};
2214
2215	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2216}
2217
2218static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2219	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2220	[RTA_OIF]               = { .type = NLA_U32 },
2221	[RTA_IIF]		= { .type = NLA_U32 },
2222	[RTA_PRIORITY]          = { .type = NLA_U32 },
2223	[RTA_METRICS]           = { .type = NLA_NESTED },
2224};
2225
2226static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2227			      struct fib6_config *cfg)
2228{
2229	struct rtmsg *rtm;
2230	struct nlattr *tb[RTA_MAX+1];
2231	int err;
2232
2233	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2234	if (err < 0)
2235		goto errout;
2236
2237	err = -EINVAL;
2238	rtm = nlmsg_data(nlh);
2239	memset(cfg, 0, sizeof(*cfg));
2240
2241	cfg->fc_table = rtm->rtm_table;
2242	cfg->fc_dst_len = rtm->rtm_dst_len;
2243	cfg->fc_src_len = rtm->rtm_src_len;
2244	cfg->fc_flags = RTF_UP;
2245	cfg->fc_protocol = rtm->rtm_protocol;
2246
2247	if (rtm->rtm_type == RTN_UNREACHABLE)
2248		cfg->fc_flags |= RTF_REJECT;
2249
2250	if (rtm->rtm_type == RTN_LOCAL)
2251		cfg->fc_flags |= RTF_LOCAL;
2252
2253	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2254	cfg->fc_nlinfo.nlh = nlh;
2255	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2256
2257	if (tb[RTA_GATEWAY]) {
2258		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2259		cfg->fc_flags |= RTF_GATEWAY;
2260	}
2261
2262	if (tb[RTA_DST]) {
2263		int plen = (rtm->rtm_dst_len + 7) >> 3;
2264
2265		if (nla_len(tb[RTA_DST]) < plen)
2266			goto errout;
2267
2268		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2269	}
2270
2271	if (tb[RTA_SRC]) {
2272		int plen = (rtm->rtm_src_len + 7) >> 3;
2273
2274		if (nla_len(tb[RTA_SRC]) < plen)
2275			goto errout;
2276
2277		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2278	}
2279
2280	if (tb[RTA_PREFSRC])
2281		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2282
2283	if (tb[RTA_OIF])
2284		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2285
2286	if (tb[RTA_PRIORITY])
2287		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2288
2289	if (tb[RTA_METRICS]) {
2290		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2291		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2292	}
2293
2294	if (tb[RTA_TABLE])
2295		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2296
2297	err = 0;
2298errout:
2299	return err;
2300}
2301
2302static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2303{
2304	struct fib6_config cfg;
2305	int err;
2306
2307	err = rtm_to_fib6_config(skb, nlh, &cfg);
2308	if (err < 0)
2309		return err;
2310
2311	return ip6_route_del(&cfg);
2312}
2313
2314static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2315{
2316	struct fib6_config cfg;
2317	int err;
2318
2319	err = rtm_to_fib6_config(skb, nlh, &cfg);
2320	if (err < 0)
2321		return err;
2322
2323	return ip6_route_add(&cfg);
2324}
2325
2326static inline size_t rt6_nlmsg_size(void)
2327{
2328	return NLMSG_ALIGN(sizeof(struct rtmsg))
2329	       + nla_total_size(16) /* RTA_SRC */
2330	       + nla_total_size(16) /* RTA_DST */
2331	       + nla_total_size(16) /* RTA_GATEWAY */
2332	       + nla_total_size(16) /* RTA_PREFSRC */
2333	       + nla_total_size(4) /* RTA_TABLE */
2334	       + nla_total_size(4) /* RTA_IIF */
2335	       + nla_total_size(4) /* RTA_OIF */
2336	       + nla_total_size(4) /* RTA_PRIORITY */
2337	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2338	       + nla_total_size(sizeof(struct rta_cacheinfo));
2339}
2340
2341static int rt6_fill_node(struct net *net,
2342			 struct sk_buff *skb, struct rt6_info *rt,
2343			 struct in6_addr *dst, struct in6_addr *src,
2344			 int iif, int type, u32 pid, u32 seq,
2345			 int prefix, int nowait, unsigned int flags)
2346{
 
2347	struct rtmsg *rtm;
2348	struct nlmsghdr *nlh;
2349	long expires;
2350	u32 table;
2351	struct neighbour *n;
 
2352
2353	if (prefix) {	/* user wants prefix routes only */
2354		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2355			/* success since this is not a prefix route */
2356			return 1;
2357		}
2358	}
2359
2360	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2361	if (nlh == NULL)
2362		return -EMSGSIZE;
2363
2364	rtm = nlmsg_data(nlh);
2365	rtm->rtm_family = AF_INET6;
2366	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2367	rtm->rtm_src_len = rt->rt6i_src.plen;
2368	rtm->rtm_tos = 0;
2369	if (rt->rt6i_table)
2370		table = rt->rt6i_table->tb6_id;
2371	else
2372		table = RT6_TABLE_UNSPEC;
2373	rtm->rtm_table = table;
2374	NLA_PUT_U32(skb, RTA_TABLE, table);
2375	if (rt->rt6i_flags&RTF_REJECT)
 
2376		rtm->rtm_type = RTN_UNREACHABLE;
2377	else if (rt->rt6i_flags&RTF_LOCAL)
2378		rtm->rtm_type = RTN_LOCAL;
2379	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2380		rtm->rtm_type = RTN_LOCAL;
2381	else
2382		rtm->rtm_type = RTN_UNICAST;
2383	rtm->rtm_flags = 0;
2384	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2385	rtm->rtm_protocol = rt->rt6i_protocol;
2386	if (rt->rt6i_flags&RTF_DYNAMIC)
2387		rtm->rtm_protocol = RTPROT_REDIRECT;
2388	else if (rt->rt6i_flags & RTF_ADDRCONF)
2389		rtm->rtm_protocol = RTPROT_KERNEL;
2390	else if (rt->rt6i_flags&RTF_DEFAULT)
2391		rtm->rtm_protocol = RTPROT_RA;
2392
2393	if (rt->rt6i_flags&RTF_CACHE)
2394		rtm->rtm_flags |= RTM_F_CLONED;
2395
2396	if (dst) {
2397		NLA_PUT(skb, RTA_DST, 16, dst);
 
2398		rtm->rtm_dst_len = 128;
2399	} else if (rtm->rtm_dst_len)
2400		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
 
2401#ifdef CONFIG_IPV6_SUBTREES
2402	if (src) {
2403		NLA_PUT(skb, RTA_SRC, 16, src);
 
2404		rtm->rtm_src_len = 128;
2405	} else if (rtm->rtm_src_len)
2406		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
 
2407#endif
2408	if (iif) {
2409#ifdef CONFIG_IPV6_MROUTE
2410		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2411			int err = ip6mr_get_route(net, skb, rtm, nowait);
2412			if (err <= 0) {
2413				if (!nowait) {
2414					if (err == 0)
2415						return 0;
2416					goto nla_put_failure;
2417				} else {
2418					if (err == -EMSGSIZE)
2419						goto nla_put_failure;
2420				}
2421			}
2422		} else
2423#endif
2424			NLA_PUT_U32(skb, RTA_IIF, iif);
 
2425	} else if (dst) {
2426		struct in6_addr saddr_buf;
2427		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2428			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 
2429	}
2430
2431	if (rt->rt6i_prefsrc.plen) {
2432		struct in6_addr saddr_buf;
2433		ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2434		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 
2435	}
2436
2437	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2438		goto nla_put_failure;
2439
2440	rcu_read_lock();
2441	n = dst_get_neighbour(&rt->dst);
2442	if (n)
2443		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
 
 
 
 
2444	rcu_read_unlock();
2445
2446	if (rt->dst.dev)
2447		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2448
2449	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2450
2451	if (!(rt->rt6i_flags & RTF_EXPIRES))
2452		expires = 0;
2453	else if (rt->rt6i_expires - jiffies < INT_MAX)
2454		expires = rt->rt6i_expires - jiffies;
2455	else
2456		expires = INT_MAX;
2457
2458	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
 
 
 
 
 
 
 
2459			       expires, rt->dst.error) < 0)
2460		goto nla_put_failure;
2461
2462	return nlmsg_end(skb, nlh);
2463
2464nla_put_failure:
2465	nlmsg_cancel(skb, nlh);
2466	return -EMSGSIZE;
2467}
2468
2469int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2470{
2471	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2472	int prefix;
2473
2474	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2475		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2476		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2477	} else
2478		prefix = 0;
2479
2480	return rt6_fill_node(arg->net,
2481		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2482		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2483		     prefix, 0, NLM_F_MULTI);
2484}
2485
2486static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2487{
2488	struct net *net = sock_net(in_skb->sk);
2489	struct nlattr *tb[RTA_MAX+1];
2490	struct rt6_info *rt;
2491	struct sk_buff *skb;
2492	struct rtmsg *rtm;
2493	struct flowi6 fl6;
2494	int err, iif = 0;
2495
2496	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2497	if (err < 0)
2498		goto errout;
2499
2500	err = -EINVAL;
2501	memset(&fl6, 0, sizeof(fl6));
2502
2503	if (tb[RTA_SRC]) {
2504		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2505			goto errout;
2506
2507		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2508	}
2509
2510	if (tb[RTA_DST]) {
2511		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2512			goto errout;
2513
2514		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2515	}
2516
2517	if (tb[RTA_IIF])
2518		iif = nla_get_u32(tb[RTA_IIF]);
2519
2520	if (tb[RTA_OIF])
2521		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2522
2523	if (iif) {
2524		struct net_device *dev;
 
 
2525		dev = __dev_get_by_index(net, iif);
2526		if (!dev) {
2527			err = -ENODEV;
2528			goto errout;
2529		}
 
 
 
 
 
 
 
 
 
 
 
 
2530	}
2531
2532	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2533	if (skb == NULL) {
 
2534		err = -ENOBUFS;
2535		goto errout;
2536	}
2537
2538	/* Reserve room for dummy headers, this skb can pass
2539	   through good chunk of routing engine.
2540	 */
2541	skb_reset_mac_header(skb);
2542	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2543
2544	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2545	skb_dst_set(skb, &rt->dst);
2546
2547	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2548			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2549			    nlh->nlmsg_seq, 0, 0, 0);
2550	if (err < 0) {
2551		kfree_skb(skb);
2552		goto errout;
2553	}
2554
2555	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2556errout:
2557	return err;
2558}
2559
2560void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2561{
2562	struct sk_buff *skb;
2563	struct net *net = info->nl_net;
2564	u32 seq;
2565	int err;
2566
2567	err = -ENOBUFS;
2568	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2569
2570	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2571	if (skb == NULL)
2572		goto errout;
2573
2574	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2575				event, info->pid, seq, 0, 0, 0);
2576	if (err < 0) {
2577		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2578		WARN_ON(err == -EMSGSIZE);
2579		kfree_skb(skb);
2580		goto errout;
2581	}
2582	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2583		    info->nlh, gfp_any());
2584	return;
2585errout:
2586	if (err < 0)
2587		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2588}
2589
2590static int ip6_route_dev_notify(struct notifier_block *this,
2591				unsigned long event, void *data)
2592{
2593	struct net_device *dev = (struct net_device *)data;
2594	struct net *net = dev_net(dev);
2595
2596	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2597		net->ipv6.ip6_null_entry->dst.dev = dev;
2598		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2599#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2600		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2601		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2602		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2603		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2604#endif
2605	}
2606
2607	return NOTIFY_OK;
2608}
2609
2610/*
2611 *	/proc
2612 */
2613
2614#ifdef CONFIG_PROC_FS
2615
2616struct rt6_proc_arg
2617{
2618	char *buffer;
2619	int offset;
2620	int length;
2621	int skip;
2622	int len;
2623};
2624
2625static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2626{
2627	struct seq_file *m = p_arg;
2628	struct neighbour *n;
2629
2630	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2631
2632#ifdef CONFIG_IPV6_SUBTREES
2633	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2634#else
2635	seq_puts(m, "00000000000000000000000000000000 00 ");
2636#endif
2637	rcu_read_lock();
2638	n = dst_get_neighbour(&rt->dst);
2639	if (n) {
2640		seq_printf(m, "%pi6", n->primary_key);
2641	} else {
2642		seq_puts(m, "00000000000000000000000000000000");
2643	}
2644	rcu_read_unlock();
2645	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2646		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2647		   rt->dst.__use, rt->rt6i_flags,
2648		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2649	return 0;
2650}
2651
2652static int ipv6_route_show(struct seq_file *m, void *v)
2653{
2654	struct net *net = (struct net *)m->private;
2655	fib6_clean_all(net, rt6_info_route, 0, m);
2656	return 0;
2657}
2658
2659static int ipv6_route_open(struct inode *inode, struct file *file)
2660{
2661	return single_open_net(inode, file, ipv6_route_show);
2662}
2663
2664static const struct file_operations ipv6_route_proc_fops = {
2665	.owner		= THIS_MODULE,
2666	.open		= ipv6_route_open,
2667	.read		= seq_read,
2668	.llseek		= seq_lseek,
2669	.release	= single_release_net,
2670};
2671
2672static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2673{
2674	struct net *net = (struct net *)seq->private;
2675	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2676		   net->ipv6.rt6_stats->fib_nodes,
2677		   net->ipv6.rt6_stats->fib_route_nodes,
2678		   net->ipv6.rt6_stats->fib_rt_alloc,
2679		   net->ipv6.rt6_stats->fib_rt_entries,
2680		   net->ipv6.rt6_stats->fib_rt_cache,
2681		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2682		   net->ipv6.rt6_stats->fib_discarded_routes);
2683
2684	return 0;
2685}
2686
2687static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2688{
2689	return single_open_net(inode, file, rt6_stats_seq_show);
2690}
2691
2692static const struct file_operations rt6_stats_seq_fops = {
2693	.owner	 = THIS_MODULE,
2694	.open	 = rt6_stats_seq_open,
2695	.read	 = seq_read,
2696	.llseek	 = seq_lseek,
2697	.release = single_release_net,
2698};
2699#endif	/* CONFIG_PROC_FS */
2700
2701#ifdef CONFIG_SYSCTL
2702
2703static
2704int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2705			      void __user *buffer, size_t *lenp, loff_t *ppos)
2706{
2707	struct net *net;
2708	int delay;
2709	if (!write)
2710		return -EINVAL;
2711
2712	net = (struct net *)ctl->extra1;
2713	delay = net->ipv6.sysctl.flush_delay;
2714	proc_dointvec(ctl, write, buffer, lenp, ppos);
2715	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2716	return 0;
2717}
2718
2719ctl_table ipv6_route_table_template[] = {
2720	{
2721		.procname	=	"flush",
2722		.data		=	&init_net.ipv6.sysctl.flush_delay,
2723		.maxlen		=	sizeof(int),
2724		.mode		=	0200,
2725		.proc_handler	=	ipv6_sysctl_rtcache_flush
2726	},
2727	{
2728		.procname	=	"gc_thresh",
2729		.data		=	&ip6_dst_ops_template.gc_thresh,
2730		.maxlen		=	sizeof(int),
2731		.mode		=	0644,
2732		.proc_handler	=	proc_dointvec,
2733	},
2734	{
2735		.procname	=	"max_size",
2736		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2737		.maxlen		=	sizeof(int),
2738		.mode		=	0644,
2739		.proc_handler	=	proc_dointvec,
2740	},
2741	{
2742		.procname	=	"gc_min_interval",
2743		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2744		.maxlen		=	sizeof(int),
2745		.mode		=	0644,
2746		.proc_handler	=	proc_dointvec_jiffies,
2747	},
2748	{
2749		.procname	=	"gc_timeout",
2750		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2751		.maxlen		=	sizeof(int),
2752		.mode		=	0644,
2753		.proc_handler	=	proc_dointvec_jiffies,
2754	},
2755	{
2756		.procname	=	"gc_interval",
2757		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2758		.maxlen		=	sizeof(int),
2759		.mode		=	0644,
2760		.proc_handler	=	proc_dointvec_jiffies,
2761	},
2762	{
2763		.procname	=	"gc_elasticity",
2764		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2765		.maxlen		=	sizeof(int),
2766		.mode		=	0644,
2767		.proc_handler	=	proc_dointvec,
2768	},
2769	{
2770		.procname	=	"mtu_expires",
2771		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2772		.maxlen		=	sizeof(int),
2773		.mode		=	0644,
2774		.proc_handler	=	proc_dointvec_jiffies,
2775	},
2776	{
2777		.procname	=	"min_adv_mss",
2778		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2779		.maxlen		=	sizeof(int),
2780		.mode		=	0644,
2781		.proc_handler	=	proc_dointvec,
2782	},
2783	{
2784		.procname	=	"gc_min_interval_ms",
2785		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2786		.maxlen		=	sizeof(int),
2787		.mode		=	0644,
2788		.proc_handler	=	proc_dointvec_ms_jiffies,
2789	},
2790	{ }
2791};
2792
2793struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2794{
2795	struct ctl_table *table;
2796
2797	table = kmemdup(ipv6_route_table_template,
2798			sizeof(ipv6_route_table_template),
2799			GFP_KERNEL);
2800
2801	if (table) {
2802		table[0].data = &net->ipv6.sysctl.flush_delay;
2803		table[0].extra1 = net;
2804		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2805		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2806		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2807		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2808		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2809		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2810		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2811		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2812		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2813	}
2814
2815	return table;
2816}
2817#endif
2818
2819static int __net_init ip6_route_net_init(struct net *net)
2820{
2821	int ret = -ENOMEM;
2822
2823	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2824	       sizeof(net->ipv6.ip6_dst_ops));
2825
2826	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2827		goto out_ip6_dst_ops;
2828
2829	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2830					   sizeof(*net->ipv6.ip6_null_entry),
2831					   GFP_KERNEL);
2832	if (!net->ipv6.ip6_null_entry)
2833		goto out_ip6_dst_entries;
2834	net->ipv6.ip6_null_entry->dst.path =
2835		(struct dst_entry *)net->ipv6.ip6_null_entry;
2836	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2837	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2838			 ip6_template_metrics, true);
2839
2840#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2841	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2842					       sizeof(*net->ipv6.ip6_prohibit_entry),
2843					       GFP_KERNEL);
2844	if (!net->ipv6.ip6_prohibit_entry)
2845		goto out_ip6_null_entry;
2846	net->ipv6.ip6_prohibit_entry->dst.path =
2847		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2848	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2849	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2850			 ip6_template_metrics, true);
2851
2852	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2853					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2854					       GFP_KERNEL);
2855	if (!net->ipv6.ip6_blk_hole_entry)
2856		goto out_ip6_prohibit_entry;
2857	net->ipv6.ip6_blk_hole_entry->dst.path =
2858		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2859	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2860	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2861			 ip6_template_metrics, true);
2862#endif
2863
2864	net->ipv6.sysctl.flush_delay = 0;
2865	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2866	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2867	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2868	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2869	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2870	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2871	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2872
2873#ifdef CONFIG_PROC_FS
2874	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2875	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2876#endif
2877	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2878
2879	ret = 0;
2880out:
2881	return ret;
2882
2883#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2884out_ip6_prohibit_entry:
2885	kfree(net->ipv6.ip6_prohibit_entry);
2886out_ip6_null_entry:
2887	kfree(net->ipv6.ip6_null_entry);
2888#endif
2889out_ip6_dst_entries:
2890	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2891out_ip6_dst_ops:
2892	goto out;
2893}
2894
2895static void __net_exit ip6_route_net_exit(struct net *net)
2896{
2897#ifdef CONFIG_PROC_FS
2898	proc_net_remove(net, "ipv6_route");
2899	proc_net_remove(net, "rt6_stats");
2900#endif
2901	kfree(net->ipv6.ip6_null_entry);
2902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2903	kfree(net->ipv6.ip6_prohibit_entry);
2904	kfree(net->ipv6.ip6_blk_hole_entry);
2905#endif
2906	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2907}
2908
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2909static struct pernet_operations ip6_route_net_ops = {
2910	.init = ip6_route_net_init,
2911	.exit = ip6_route_net_exit,
2912};
2913
 
 
 
 
 
2914static struct notifier_block ip6_route_dev_notifier = {
2915	.notifier_call = ip6_route_dev_notify,
2916	.priority = 0,
2917};
2918
2919int __init ip6_route_init(void)
2920{
2921	int ret;
2922
2923	ret = -ENOMEM;
2924	ip6_dst_ops_template.kmem_cachep =
2925		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2926				  SLAB_HWCACHE_ALIGN, NULL);
2927	if (!ip6_dst_ops_template.kmem_cachep)
2928		goto out;
2929
2930	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2931	if (ret)
2932		goto out_kmem_cache;
2933
2934	ret = register_pernet_subsys(&ip6_route_net_ops);
2935	if (ret)
2936		goto out_dst_entries;
2937
2938	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2939
2940	/* Registering of the loopback is done before this portion of code,
2941	 * the loopback reference in rt6_info will not be taken, do it
2942	 * manually for init_net */
2943	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2944	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2945  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2946	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2947	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2948	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2949	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2950  #endif
2951	ret = fib6_init();
2952	if (ret)
2953		goto out_register_subsys;
2954
2955	ret = xfrm6_init();
2956	if (ret)
2957		goto out_fib6_init;
2958
2959	ret = fib6_rules_init();
2960	if (ret)
2961		goto xfrm6_init;
2962
 
 
 
 
2963	ret = -ENOBUFS;
2964	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2965	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2966	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2967		goto fib6_rules_init;
2968
2969	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2970	if (ret)
2971		goto fib6_rules_init;
2972
2973out:
2974	return ret;
2975
 
 
2976fib6_rules_init:
2977	fib6_rules_cleanup();
2978xfrm6_init:
2979	xfrm6_fini();
2980out_fib6_init:
2981	fib6_gc_cleanup();
2982out_register_subsys:
2983	unregister_pernet_subsys(&ip6_route_net_ops);
2984out_dst_entries:
2985	dst_entries_destroy(&ip6_dst_blackhole_ops);
2986out_kmem_cache:
2987	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2988	goto out;
2989}
2990
2991void ip6_route_cleanup(void)
2992{
2993	unregister_netdevice_notifier(&ip6_route_dev_notifier);
 
2994	fib6_rules_cleanup();
2995	xfrm6_fini();
2996	fib6_gc_cleanup();
2997	unregister_pernet_subsys(&ip6_route_net_ops);
2998	dst_entries_destroy(&ip6_dst_blackhole_ops);
2999	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3000}
v3.5.6
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/xfrm.h>
  58#include <net/netevent.h>
  59#include <net/netlink.h>
  60
  61#include <asm/uaccess.h>
  62
  63#ifdef CONFIG_SYSCTL
  64#include <linux/sysctl.h>
  65#endif
  66
  67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
 
 
 
 
 
 
 
 
 
 
 
  68				    const struct in6_addr *dest);
  69static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  70static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  71static unsigned int	 ip6_mtu(const struct dst_entry *dst);
  72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  73static void		ip6_dst_destroy(struct dst_entry *);
  74static void		ip6_dst_ifdown(struct dst_entry *,
  75				       struct net_device *dev, int how);
  76static int		 ip6_dst_gc(struct dst_ops *ops);
  77
  78static int		ip6_pkt_discard(struct sk_buff *skb);
  79static int		ip6_pkt_discard_out(struct sk_buff *skb);
  80static void		ip6_link_failure(struct sk_buff *skb);
  81static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
  82
  83#ifdef CONFIG_IPV6_ROUTE_INFO
  84static struct rt6_info *rt6_add_route_info(struct net *net,
  85					   const struct in6_addr *prefix, int prefixlen,
  86					   const struct in6_addr *gwaddr, int ifindex,
  87					   unsigned int pref);
  88static struct rt6_info *rt6_get_route_info(struct net *net,
  89					   const struct in6_addr *prefix, int prefixlen,
  90					   const struct in6_addr *gwaddr, int ifindex);
  91#endif
  92
  93static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
  94{
  95	struct rt6_info *rt = (struct rt6_info *) dst;
  96	struct inet_peer *peer;
  97	u32 *p = NULL;
  98
  99	if (!(rt->dst.flags & DST_HOST))
 100		return NULL;
 101
 102	if (!rt->rt6i_peer)
 103		rt6_bind_peer(rt, 1);
 104
 105	peer = rt->rt6i_peer;
 106	if (peer) {
 107		u32 *old_p = __DST_METRICS_PTR(old);
 108		unsigned long prev, new;
 109
 110		p = peer->metrics;
 111		if (inet_metrics_new(peer))
 112			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 113
 114		new = (unsigned long) p;
 115		prev = cmpxchg(&dst->_metrics, old, new);
 116
 117		if (prev != old) {
 118			p = __DST_METRICS_PTR(prev);
 119			if (prev & DST_METRICS_READ_ONLY)
 120				p = NULL;
 121		}
 122	}
 123	return p;
 124}
 125
 126static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
 127{
 128	struct in6_addr *p = &rt->rt6i_gateway;
 129
 130	if (!ipv6_addr_any(p))
 131		return (const void *) p;
 132	return daddr;
 133}
 134
 135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
 136{
 137	struct rt6_info *rt = (struct rt6_info *) dst;
 138	struct neighbour *n;
 139
 140	daddr = choose_neigh_daddr(rt, daddr);
 141	n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
 142	if (n)
 143		return n;
 144	return neigh_create(&nd_tbl, daddr, dst->dev);
 145}
 146
 147static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
 148{
 149	struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
 150	if (!n) {
 151		n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
 152		if (IS_ERR(n))
 153			return PTR_ERR(n);
 154	}
 155	dst_set_neighbour(&rt->dst, n);
 156
 157	return 0;
 158}
 159
 160static struct dst_ops ip6_dst_ops_template = {
 161	.family			=	AF_INET6,
 162	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 163	.gc			=	ip6_dst_gc,
 164	.gc_thresh		=	1024,
 165	.check			=	ip6_dst_check,
 166	.default_advmss		=	ip6_default_advmss,
 167	.mtu			=	ip6_mtu,
 168	.cow_metrics		=	ipv6_cow_metrics,
 169	.destroy		=	ip6_dst_destroy,
 170	.ifdown			=	ip6_dst_ifdown,
 171	.negative_advice	=	ip6_negative_advice,
 172	.link_failure		=	ip6_link_failure,
 173	.update_pmtu		=	ip6_rt_update_pmtu,
 174	.local_out		=	__ip6_local_out,
 175	.neigh_lookup		=	ip6_neigh_lookup,
 176};
 177
 178static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 179{
 180	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 181
 182	return mtu ? : dst->dev->mtu;
 183}
 184
 185static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 186{
 187}
 188
 189static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 190					 unsigned long old)
 191{
 192	return NULL;
 193}
 194
 195static struct dst_ops ip6_dst_blackhole_ops = {
 196	.family			=	AF_INET6,
 197	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 198	.destroy		=	ip6_dst_destroy,
 199	.check			=	ip6_dst_check,
 200	.mtu			=	ip6_blackhole_mtu,
 201	.default_advmss		=	ip6_default_advmss,
 202	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 203	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
 204	.neigh_lookup		=	ip6_neigh_lookup,
 205};
 206
 207static const u32 ip6_template_metrics[RTAX_MAX] = {
 208	[RTAX_HOPLIMIT - 1] = 255,
 209};
 210
 211static struct rt6_info ip6_null_entry_template = {
 212	.dst = {
 213		.__refcnt	= ATOMIC_INIT(1),
 214		.__use		= 1,
 215		.obsolete	= -1,
 216		.error		= -ENETUNREACH,
 217		.input		= ip6_pkt_discard,
 218		.output		= ip6_pkt_discard_out,
 219	},
 220	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 221	.rt6i_protocol  = RTPROT_KERNEL,
 222	.rt6i_metric	= ~(u32) 0,
 223	.rt6i_ref	= ATOMIC_INIT(1),
 224};
 225
 226#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 227
 228static int ip6_pkt_prohibit(struct sk_buff *skb);
 229static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 230
 231static struct rt6_info ip6_prohibit_entry_template = {
 232	.dst = {
 233		.__refcnt	= ATOMIC_INIT(1),
 234		.__use		= 1,
 235		.obsolete	= -1,
 236		.error		= -EACCES,
 237		.input		= ip6_pkt_prohibit,
 238		.output		= ip6_pkt_prohibit_out,
 239	},
 240	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 241	.rt6i_protocol  = RTPROT_KERNEL,
 242	.rt6i_metric	= ~(u32) 0,
 243	.rt6i_ref	= ATOMIC_INIT(1),
 244};
 245
 246static struct rt6_info ip6_blk_hole_entry_template = {
 247	.dst = {
 248		.__refcnt	= ATOMIC_INIT(1),
 249		.__use		= 1,
 250		.obsolete	= -1,
 251		.error		= -EINVAL,
 252		.input		= dst_discard,
 253		.output		= dst_discard,
 254	},
 255	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 256	.rt6i_protocol  = RTPROT_KERNEL,
 257	.rt6i_metric	= ~(u32) 0,
 258	.rt6i_ref	= ATOMIC_INIT(1),
 259};
 260
 261#endif
 262
 263/* allocate dst with ip6_dst_ops */
 264static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
 265					     struct net_device *dev,
 266					     int flags)
 267{
 268	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
 269
 270	if (rt)
 271		memset(&rt->rt6i_table, 0,
 272		       sizeof(*rt) - sizeof(struct dst_entry));
 273
 274	return rt;
 275}
 276
 277static void ip6_dst_destroy(struct dst_entry *dst)
 278{
 279	struct rt6_info *rt = (struct rt6_info *)dst;
 280	struct inet6_dev *idev = rt->rt6i_idev;
 281	struct inet_peer *peer = rt->rt6i_peer;
 282
 283	if (!(rt->dst.flags & DST_HOST))
 284		dst_destroy_metrics_generic(dst);
 285
 286	if (idev) {
 287		rt->rt6i_idev = NULL;
 288		in6_dev_put(idev);
 289	}
 290
 291	if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
 292		dst_release(dst->from);
 293
 294	if (peer) {
 295		rt->rt6i_peer = NULL;
 296		inet_putpeer(peer);
 297	}
 298}
 299
 300static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
 301
 302static u32 rt6_peer_genid(void)
 303{
 304	return atomic_read(&__rt6_peer_genid);
 305}
 306
 307void rt6_bind_peer(struct rt6_info *rt, int create)
 308{
 309	struct inet_peer *peer;
 310
 311	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
 312	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 313		inet_putpeer(peer);
 314	else
 315		rt->rt6i_peer_genid = rt6_peer_genid();
 316}
 317
 318static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 319			   int how)
 320{
 321	struct rt6_info *rt = (struct rt6_info *)dst;
 322	struct inet6_dev *idev = rt->rt6i_idev;
 323	struct net_device *loopback_dev =
 324		dev_net(dev)->loopback_dev;
 325
 326	if (dev != loopback_dev && idev && idev->dev == dev) {
 327		struct inet6_dev *loopback_idev =
 328			in6_dev_get(loopback_dev);
 329		if (loopback_idev) {
 330			rt->rt6i_idev = loopback_idev;
 331			in6_dev_put(idev);
 332		}
 333	}
 334}
 335
 336static bool rt6_check_expired(const struct rt6_info *rt)
 337{
 338	struct rt6_info *ort = NULL;
 339
 340	if (rt->rt6i_flags & RTF_EXPIRES) {
 341		if (time_after(jiffies, rt->dst.expires))
 342			return true;
 343	} else if (rt->dst.from) {
 344		ort = (struct rt6_info *) rt->dst.from;
 345		return (ort->rt6i_flags & RTF_EXPIRES) &&
 346			time_after(jiffies, ort->dst.expires);
 347	}
 348	return false;
 349}
 350
 351static bool rt6_need_strict(const struct in6_addr *daddr)
 352{
 353	return ipv6_addr_type(daddr) &
 354		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 355}
 356
 357/*
 358 *	Route lookup. Any table->tb6_lock is implied.
 359 */
 360
 361static inline struct rt6_info *rt6_device_match(struct net *net,
 362						    struct rt6_info *rt,
 363						    const struct in6_addr *saddr,
 364						    int oif,
 365						    int flags)
 366{
 367	struct rt6_info *local = NULL;
 368	struct rt6_info *sprt;
 369
 370	if (!oif && ipv6_addr_any(saddr))
 371		goto out;
 372
 373	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 374		struct net_device *dev = sprt->dst.dev;
 375
 376		if (oif) {
 377			if (dev->ifindex == oif)
 378				return sprt;
 379			if (dev->flags & IFF_LOOPBACK) {
 380				if (!sprt->rt6i_idev ||
 381				    sprt->rt6i_idev->dev->ifindex != oif) {
 382					if (flags & RT6_LOOKUP_F_IFACE && oif)
 383						continue;
 384					if (local && (!oif ||
 385						      local->rt6i_idev->dev->ifindex == oif))
 386						continue;
 387				}
 388				local = sprt;
 389			}
 390		} else {
 391			if (ipv6_chk_addr(net, saddr, dev,
 392					  flags & RT6_LOOKUP_F_IFACE))
 393				return sprt;
 394		}
 395	}
 396
 397	if (oif) {
 398		if (local)
 399			return local;
 400
 401		if (flags & RT6_LOOKUP_F_IFACE)
 402			return net->ipv6.ip6_null_entry;
 403	}
 404out:
 405	return rt;
 406}
 407
 408#ifdef CONFIG_IPV6_ROUTER_PREF
 409static void rt6_probe(struct rt6_info *rt)
 410{
 411	struct neighbour *neigh;
 412	/*
 413	 * Okay, this does not seem to be appropriate
 414	 * for now, however, we need to check if it
 415	 * is really so; aka Router Reachability Probing.
 416	 *
 417	 * Router Reachability Probe MUST be rate-limited
 418	 * to no more than one per minute.
 419	 */
 420	rcu_read_lock();
 421	neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
 422	if (!neigh || (neigh->nud_state & NUD_VALID))
 423		goto out;
 424	read_lock_bh(&neigh->lock);
 425	if (!(neigh->nud_state & NUD_VALID) &&
 426	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 427		struct in6_addr mcaddr;
 428		struct in6_addr *target;
 429
 430		neigh->updated = jiffies;
 431		read_unlock_bh(&neigh->lock);
 432
 433		target = (struct in6_addr *)&neigh->primary_key;
 434		addrconf_addr_solict_mult(target, &mcaddr);
 435		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
 436	} else {
 437		read_unlock_bh(&neigh->lock);
 438	}
 439out:
 440	rcu_read_unlock();
 441}
 442#else
 443static inline void rt6_probe(struct rt6_info *rt)
 444{
 445}
 446#endif
 447
 448/*
 449 * Default Router Selection (RFC 2461 6.3.6)
 450 */
 451static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 452{
 453	struct net_device *dev = rt->dst.dev;
 454	if (!oif || dev->ifindex == oif)
 455		return 2;
 456	if ((dev->flags & IFF_LOOPBACK) &&
 457	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 458		return 1;
 459	return 0;
 460}
 461
 462static inline int rt6_check_neigh(struct rt6_info *rt)
 463{
 464	struct neighbour *neigh;
 465	int m;
 466
 467	rcu_read_lock();
 468	neigh = dst_get_neighbour_noref(&rt->dst);
 469	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 470	    !(rt->rt6i_flags & RTF_GATEWAY))
 471		m = 1;
 472	else if (neigh) {
 473		read_lock_bh(&neigh->lock);
 474		if (neigh->nud_state & NUD_VALID)
 475			m = 2;
 476#ifdef CONFIG_IPV6_ROUTER_PREF
 477		else if (neigh->nud_state & NUD_FAILED)
 478			m = 0;
 479#endif
 480		else
 481			m = 1;
 482		read_unlock_bh(&neigh->lock);
 483	} else
 484		m = 0;
 485	rcu_read_unlock();
 486	return m;
 487}
 488
 489static int rt6_score_route(struct rt6_info *rt, int oif,
 490			   int strict)
 491{
 492	int m, n;
 493
 494	m = rt6_check_dev(rt, oif);
 495	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 496		return -1;
 497#ifdef CONFIG_IPV6_ROUTER_PREF
 498	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 499#endif
 500	n = rt6_check_neigh(rt);
 501	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
 502		return -1;
 503	return m;
 504}
 505
 506static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 507				   int *mpri, struct rt6_info *match)
 508{
 509	int m;
 510
 511	if (rt6_check_expired(rt))
 512		goto out;
 513
 514	m = rt6_score_route(rt, oif, strict);
 515	if (m < 0)
 516		goto out;
 517
 518	if (m > *mpri) {
 519		if (strict & RT6_LOOKUP_F_REACHABLE)
 520			rt6_probe(match);
 521		*mpri = m;
 522		match = rt;
 523	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
 524		rt6_probe(rt);
 525	}
 526
 527out:
 528	return match;
 529}
 530
 531static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 532				     struct rt6_info *rr_head,
 533				     u32 metric, int oif, int strict)
 534{
 535	struct rt6_info *rt, *match;
 536	int mpri = -1;
 537
 538	match = NULL;
 539	for (rt = rr_head; rt && rt->rt6i_metric == metric;
 540	     rt = rt->dst.rt6_next)
 541		match = find_match(rt, oif, strict, &mpri, match);
 542	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 543	     rt = rt->dst.rt6_next)
 544		match = find_match(rt, oif, strict, &mpri, match);
 545
 546	return match;
 547}
 548
 549static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 550{
 551	struct rt6_info *match, *rt0;
 552	struct net *net;
 553
 
 
 
 554	rt0 = fn->rr_ptr;
 555	if (!rt0)
 556		fn->rr_ptr = rt0 = fn->leaf;
 557
 558	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 559
 560	if (!match &&
 561	    (strict & RT6_LOOKUP_F_REACHABLE)) {
 562		struct rt6_info *next = rt0->dst.rt6_next;
 563
 564		/* no entries matched; do round-robin */
 565		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 566			next = fn->leaf;
 567
 568		if (next != rt0)
 569			fn->rr_ptr = next;
 570	}
 571
 572	net = dev_net(rt0->dst.dev);
 
 
 
 573	return match ? match : net->ipv6.ip6_null_entry;
 574}
 575
 576#ifdef CONFIG_IPV6_ROUTE_INFO
 577int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 578		  const struct in6_addr *gwaddr)
 579{
 580	struct net *net = dev_net(dev);
 581	struct route_info *rinfo = (struct route_info *) opt;
 582	struct in6_addr prefix_buf, *prefix;
 583	unsigned int pref;
 584	unsigned long lifetime;
 585	struct rt6_info *rt;
 586
 587	if (len < sizeof(struct route_info)) {
 588		return -EINVAL;
 589	}
 590
 591	/* Sanity check for prefix_len and length */
 592	if (rinfo->length > 3) {
 593		return -EINVAL;
 594	} else if (rinfo->prefix_len > 128) {
 595		return -EINVAL;
 596	} else if (rinfo->prefix_len > 64) {
 597		if (rinfo->length < 2) {
 598			return -EINVAL;
 599		}
 600	} else if (rinfo->prefix_len > 0) {
 601		if (rinfo->length < 1) {
 602			return -EINVAL;
 603		}
 604	}
 605
 606	pref = rinfo->route_pref;
 607	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 608		return -EINVAL;
 609
 610	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 611
 612	if (rinfo->length == 3)
 613		prefix = (struct in6_addr *)rinfo->prefix;
 614	else {
 615		/* this function is safe */
 616		ipv6_addr_prefix(&prefix_buf,
 617				 (struct in6_addr *)rinfo->prefix,
 618				 rinfo->prefix_len);
 619		prefix = &prefix_buf;
 620	}
 621
 622	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 623				dev->ifindex);
 624
 625	if (rt && !lifetime) {
 626		ip6_del_rt(rt);
 627		rt = NULL;
 628	}
 629
 630	if (!rt && lifetime)
 631		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 632					pref);
 633	else if (rt)
 634		rt->rt6i_flags = RTF_ROUTEINFO |
 635				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 636
 637	if (rt) {
 638		if (!addrconf_finite_timeout(lifetime))
 639			rt6_clean_expires(rt);
 640		else
 641			rt6_set_expires(rt, jiffies + HZ * lifetime);
 642
 
 643		dst_release(&rt->dst);
 644	}
 645	return 0;
 646}
 647#endif
 648
 649#define BACKTRACK(__net, saddr)			\
 650do { \
 651	if (rt == __net->ipv6.ip6_null_entry) {	\
 652		struct fib6_node *pn; \
 653		while (1) { \
 654			if (fn->fn_flags & RTN_TL_ROOT) \
 655				goto out; \
 656			pn = fn->parent; \
 657			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 658				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 659			else \
 660				fn = pn; \
 661			if (fn->fn_flags & RTN_RTINFO) \
 662				goto restart; \
 663		} \
 664	} \
 665} while (0)
 666
 667static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 668					     struct fib6_table *table,
 669					     struct flowi6 *fl6, int flags)
 670{
 671	struct fib6_node *fn;
 672	struct rt6_info *rt;
 673
 674	read_lock_bh(&table->tb6_lock);
 675	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 676restart:
 677	rt = fn->leaf;
 678	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 679	BACKTRACK(net, &fl6->saddr);
 680out:
 681	dst_use(&rt->dst, jiffies);
 682	read_unlock_bh(&table->tb6_lock);
 683	return rt;
 684
 685}
 686
 687struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 688				    int flags)
 689{
 690	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 691}
 692EXPORT_SYMBOL_GPL(ip6_route_lookup);
 693
 694struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 695			    const struct in6_addr *saddr, int oif, int strict)
 696{
 697	struct flowi6 fl6 = {
 698		.flowi6_oif = oif,
 699		.daddr = *daddr,
 700	};
 701	struct dst_entry *dst;
 702	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 703
 704	if (saddr) {
 705		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 706		flags |= RT6_LOOKUP_F_HAS_SADDR;
 707	}
 708
 709	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 710	if (dst->error == 0)
 711		return (struct rt6_info *) dst;
 712
 713	dst_release(dst);
 714
 715	return NULL;
 716}
 717
 718EXPORT_SYMBOL(rt6_lookup);
 719
 720/* ip6_ins_rt is called with FREE table->tb6_lock.
 721   It takes new route entry, the addition fails by any reason the
 722   route is freed. In any case, if caller does not hold it, it may
 723   be destroyed.
 724 */
 725
 726static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 727{
 728	int err;
 729	struct fib6_table *table;
 730
 731	table = rt->rt6i_table;
 732	write_lock_bh(&table->tb6_lock);
 733	err = fib6_add(&table->tb6_root, rt, info);
 734	write_unlock_bh(&table->tb6_lock);
 735
 736	return err;
 737}
 738
 739int ip6_ins_rt(struct rt6_info *rt)
 740{
 741	struct nl_info info = {
 742		.nl_net = dev_net(rt->dst.dev),
 743	};
 744	return __ip6_ins_rt(rt, &info);
 745}
 746
 747static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
 748				      const struct in6_addr *daddr,
 749				      const struct in6_addr *saddr)
 750{
 751	struct rt6_info *rt;
 752
 753	/*
 754	 *	Clone the route.
 755	 */
 756
 757	rt = ip6_rt_copy(ort, daddr);
 758
 759	if (rt) {
 
 760		int attempts = !in_softirq();
 761
 762		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
 763			if (ort->rt6i_dst.plen != 128 &&
 764			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 765				rt->rt6i_flags |= RTF_ANYCAST;
 766			rt->rt6i_gateway = *daddr;
 767		}
 768
 769		rt->rt6i_flags |= RTF_CACHE;
 770
 771#ifdef CONFIG_IPV6_SUBTREES
 772		if (rt->rt6i_src.plen && saddr) {
 773			rt->rt6i_src.addr = *saddr;
 774			rt->rt6i_src.plen = 128;
 775		}
 776#endif
 777
 778	retry:
 779		if (rt6_bind_neighbour(rt, rt->dst.dev)) {
 780			struct net *net = dev_net(rt->dst.dev);
 
 781			int saved_rt_min_interval =
 782				net->ipv6.sysctl.ip6_rt_gc_min_interval;
 783			int saved_rt_elasticity =
 784				net->ipv6.sysctl.ip6_rt_gc_elasticity;
 785
 786			if (attempts-- > 0) {
 787				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
 788				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
 789
 790				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
 791
 792				net->ipv6.sysctl.ip6_rt_gc_elasticity =
 793					saved_rt_elasticity;
 794				net->ipv6.sysctl.ip6_rt_gc_min_interval =
 795					saved_rt_min_interval;
 796				goto retry;
 797			}
 798
 799			net_warn_ratelimited("Neighbour table overflow\n");
 
 
 800			dst_free(&rt->dst);
 801			return NULL;
 802		}
 
 
 803	}
 804
 805	return rt;
 806}
 807
 808static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 809					const struct in6_addr *daddr)
 810{
 811	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 812
 813	if (rt) {
 814		rt->rt6i_flags |= RTF_CACHE;
 815		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
 816	}
 817	return rt;
 818}
 819
 820static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 821				      struct flowi6 *fl6, int flags)
 822{
 823	struct fib6_node *fn;
 824	struct rt6_info *rt, *nrt;
 825	int strict = 0;
 826	int attempts = 3;
 827	int err;
 828	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 829
 830	strict |= flags & RT6_LOOKUP_F_IFACE;
 831
 832relookup:
 833	read_lock_bh(&table->tb6_lock);
 834
 835restart_2:
 836	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 837
 838restart:
 839	rt = rt6_select(fn, oif, strict | reachable);
 840
 841	BACKTRACK(net, &fl6->saddr);
 842	if (rt == net->ipv6.ip6_null_entry ||
 843	    rt->rt6i_flags & RTF_CACHE)
 844		goto out;
 845
 846	dst_hold(&rt->dst);
 847	read_unlock_bh(&table->tb6_lock);
 848
 849	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
 850		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 851	else if (!(rt->dst.flags & DST_HOST))
 852		nrt = rt6_alloc_clone(rt, &fl6->daddr);
 853	else
 854		goto out2;
 855
 856	dst_release(&rt->dst);
 857	rt = nrt ? : net->ipv6.ip6_null_entry;
 858
 859	dst_hold(&rt->dst);
 860	if (nrt) {
 861		err = ip6_ins_rt(nrt);
 862		if (!err)
 863			goto out2;
 864	}
 865
 866	if (--attempts <= 0)
 867		goto out2;
 868
 869	/*
 870	 * Race condition! In the gap, when table->tb6_lock was
 871	 * released someone could insert this route.  Relookup.
 872	 */
 873	dst_release(&rt->dst);
 874	goto relookup;
 875
 876out:
 877	if (reachable) {
 878		reachable = 0;
 879		goto restart_2;
 880	}
 881	dst_hold(&rt->dst);
 882	read_unlock_bh(&table->tb6_lock);
 883out2:
 884	rt->dst.lastuse = jiffies;
 885	rt->dst.__use++;
 886
 887	return rt;
 888}
 889
 890static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 891					    struct flowi6 *fl6, int flags)
 892{
 893	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 894}
 895
 896static struct dst_entry *ip6_route_input_lookup(struct net *net,
 897						struct net_device *dev,
 898						struct flowi6 *fl6, int flags)
 899{
 900	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
 901		flags |= RT6_LOOKUP_F_IFACE;
 902
 903	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
 904}
 905
 906void ip6_route_input(struct sk_buff *skb)
 907{
 908	const struct ipv6hdr *iph = ipv6_hdr(skb);
 909	struct net *net = dev_net(skb->dev);
 910	int flags = RT6_LOOKUP_F_HAS_SADDR;
 911	struct flowi6 fl6 = {
 912		.flowi6_iif = skb->dev->ifindex,
 913		.daddr = iph->daddr,
 914		.saddr = iph->saddr,
 915		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
 916		.flowi6_mark = skb->mark,
 917		.flowi6_proto = iph->nexthdr,
 918	};
 919
 920	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 
 
 
 921}
 922
 923static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 924					     struct flowi6 *fl6, int flags)
 925{
 926	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 927}
 928
 929struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 930				    struct flowi6 *fl6)
 931{
 932	int flags = 0;
 933
 934	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 935		flags |= RT6_LOOKUP_F_IFACE;
 936
 937	if (!ipv6_addr_any(&fl6->saddr))
 938		flags |= RT6_LOOKUP_F_HAS_SADDR;
 939	else if (sk)
 940		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 941
 942	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 943}
 944
 945EXPORT_SYMBOL(ip6_route_output);
 946
 947struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 948{
 949	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 950	struct dst_entry *new = NULL;
 951
 952	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
 953	if (rt) {
 954		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
 955
 956		new = &rt->dst;
 957
 958		new->__use = 1;
 959		new->input = dst_discard;
 960		new->output = dst_discard;
 961
 962		if (dst_metrics_read_only(&ort->dst))
 963			new->_metrics = ort->dst._metrics;
 964		else
 965			dst_copy_metrics(new, &ort->dst);
 966		rt->rt6i_idev = ort->rt6i_idev;
 967		if (rt->rt6i_idev)
 968			in6_dev_hold(rt->rt6i_idev);
 
 969
 970		rt->rt6i_gateway = ort->rt6i_gateway;
 971		rt->rt6i_flags = ort->rt6i_flags;
 972		rt6_clean_expires(rt);
 973		rt->rt6i_metric = 0;
 974
 975		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 976#ifdef CONFIG_IPV6_SUBTREES
 977		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 978#endif
 979
 980		dst_free(new);
 981	}
 982
 983	dst_release(dst_orig);
 984	return new ? new : ERR_PTR(-ENOMEM);
 985}
 986
 987/*
 988 *	Destination cache support functions
 989 */
 990
 991static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 992{
 993	struct rt6_info *rt;
 994
 995	rt = (struct rt6_info *) dst;
 996
 997	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
 998		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
 999			if (!rt->rt6i_peer)
1000				rt6_bind_peer(rt, 0);
1001			rt->rt6i_peer_genid = rt6_peer_genid();
1002		}
1003		return dst;
1004	}
1005	return NULL;
1006}
1007
1008static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1009{
1010	struct rt6_info *rt = (struct rt6_info *) dst;
1011
1012	if (rt) {
1013		if (rt->rt6i_flags & RTF_CACHE) {
1014			if (rt6_check_expired(rt)) {
1015				ip6_del_rt(rt);
1016				dst = NULL;
1017			}
1018		} else {
1019			dst_release(dst);
1020			dst = NULL;
1021		}
1022	}
1023	return dst;
1024}
1025
1026static void ip6_link_failure(struct sk_buff *skb)
1027{
1028	struct rt6_info *rt;
1029
1030	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1031
1032	rt = (struct rt6_info *) skb_dst(skb);
1033	if (rt) {
1034		if (rt->rt6i_flags & RTF_CACHE)
1035			rt6_update_expires(rt, 0);
1036		else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
 
1037			rt->rt6i_node->fn_sernum = -1;
1038	}
1039}
1040
1041static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1042{
1043	struct rt6_info *rt6 = (struct rt6_info*)dst;
1044
1045	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1046		rt6->rt6i_flags |= RTF_MODIFIED;
1047		if (mtu < IPV6_MIN_MTU) {
1048			u32 features = dst_metric(dst, RTAX_FEATURES);
1049			mtu = IPV6_MIN_MTU;
1050			features |= RTAX_FEATURE_ALLFRAG;
1051			dst_metric_set(dst, RTAX_FEATURES, features);
1052		}
1053		dst_metric_set(dst, RTAX_MTU, mtu);
1054	}
1055}
1056
1057static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1058{
1059	struct net_device *dev = dst->dev;
1060	unsigned int mtu = dst_mtu(dst);
1061	struct net *net = dev_net(dev);
1062
1063	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1064
1065	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1066		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1067
1068	/*
1069	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1070	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1071	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1072	 * rely only on pmtu discovery"
1073	 */
1074	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1075		mtu = IPV6_MAXPLEN;
1076	return mtu;
1077}
1078
1079static unsigned int ip6_mtu(const struct dst_entry *dst)
1080{
 
1081	struct inet6_dev *idev;
1082	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1083
1084	if (mtu)
1085		return mtu;
1086
1087	mtu = IPV6_MIN_MTU;
1088
1089	rcu_read_lock();
1090	idev = __in6_dev_get(dst->dev);
1091	if (idev)
1092		mtu = idev->cnf.mtu6;
1093	rcu_read_unlock();
1094
1095	return mtu;
1096}
1097
1098static struct dst_entry *icmp6_dst_gc_list;
1099static DEFINE_SPINLOCK(icmp6_dst_lock);
1100
1101struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1102				  struct neighbour *neigh,
1103				  struct flowi6 *fl6)
1104{
1105	struct dst_entry *dst;
1106	struct rt6_info *rt;
1107	struct inet6_dev *idev = in6_dev_get(dev);
1108	struct net *net = dev_net(dev);
1109
1110	if (unlikely(!idev))
1111		return ERR_PTR(-ENODEV);
1112
1113	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1114	if (unlikely(!rt)) {
1115		in6_dev_put(idev);
1116		dst = ERR_PTR(-ENOMEM);
1117		goto out;
1118	}
1119
1120	if (neigh)
1121		neigh_hold(neigh);
1122	else {
1123		neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1124		if (IS_ERR(neigh)) {
1125			in6_dev_put(idev);
1126			dst_free(&rt->dst);
1127			return ERR_CAST(neigh);
1128		}
1129	}
1130
1131	rt->dst.flags |= DST_HOST;
1132	rt->dst.output  = ip6_output;
1133	dst_set_neighbour(&rt->dst, neigh);
1134	atomic_set(&rt->dst.__refcnt, 1);
1135	rt->rt6i_dst.addr = fl6->daddr;
 
 
1136	rt->rt6i_dst.plen = 128;
1137	rt->rt6i_idev     = idev;
1138	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1139
1140	spin_lock_bh(&icmp6_dst_lock);
1141	rt->dst.next = icmp6_dst_gc_list;
1142	icmp6_dst_gc_list = &rt->dst;
1143	spin_unlock_bh(&icmp6_dst_lock);
1144
1145	fib6_force_start_gc(net);
1146
1147	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1148
1149out:
1150	return dst;
1151}
1152
1153int icmp6_dst_gc(void)
1154{
1155	struct dst_entry *dst, **pprev;
1156	int more = 0;
1157
1158	spin_lock_bh(&icmp6_dst_lock);
1159	pprev = &icmp6_dst_gc_list;
1160
1161	while ((dst = *pprev) != NULL) {
1162		if (!atomic_read(&dst->__refcnt)) {
1163			*pprev = dst->next;
1164			dst_free(dst);
1165		} else {
1166			pprev = &dst->next;
1167			++more;
1168		}
1169	}
1170
1171	spin_unlock_bh(&icmp6_dst_lock);
1172
1173	return more;
1174}
1175
1176static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1177			    void *arg)
1178{
1179	struct dst_entry *dst, **pprev;
1180
1181	spin_lock_bh(&icmp6_dst_lock);
1182	pprev = &icmp6_dst_gc_list;
1183	while ((dst = *pprev) != NULL) {
1184		struct rt6_info *rt = (struct rt6_info *) dst;
1185		if (func(rt, arg)) {
1186			*pprev = dst->next;
1187			dst_free(dst);
1188		} else {
1189			pprev = &dst->next;
1190		}
1191	}
1192	spin_unlock_bh(&icmp6_dst_lock);
1193}
1194
1195static int ip6_dst_gc(struct dst_ops *ops)
1196{
1197	unsigned long now = jiffies;
1198	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1199	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1200	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1201	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1202	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1203	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1204	int entries;
1205
1206	entries = dst_entries_get_fast(ops);
1207	if (time_after(rt_last_gc + rt_min_interval, now) &&
1208	    entries <= rt_max_size)
1209		goto out;
1210
1211	net->ipv6.ip6_rt_gc_expire++;
1212	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1213	net->ipv6.ip6_rt_last_gc = now;
1214	entries = dst_entries_get_slow(ops);
1215	if (entries < ops->gc_thresh)
1216		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1217out:
1218	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1219	return entries > rt_max_size;
1220}
1221
1222/* Clean host part of a prefix. Not necessary in radix tree,
1223   but results in cleaner routing tables.
1224
1225   Remove it only when all the things will work!
1226 */
1227
1228int ip6_dst_hoplimit(struct dst_entry *dst)
1229{
1230	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1231	if (hoplimit == 0) {
1232		struct net_device *dev = dst->dev;
1233		struct inet6_dev *idev;
1234
1235		rcu_read_lock();
1236		idev = __in6_dev_get(dev);
1237		if (idev)
1238			hoplimit = idev->cnf.hop_limit;
1239		else
1240			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1241		rcu_read_unlock();
1242	}
1243	return hoplimit;
1244}
1245EXPORT_SYMBOL(ip6_dst_hoplimit);
1246
1247/*
1248 *
1249 */
1250
1251int ip6_route_add(struct fib6_config *cfg)
1252{
1253	int err;
1254	struct net *net = cfg->fc_nlinfo.nl_net;
1255	struct rt6_info *rt = NULL;
1256	struct net_device *dev = NULL;
1257	struct inet6_dev *idev = NULL;
1258	struct fib6_table *table;
1259	int addr_type;
1260
1261	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1262		return -EINVAL;
1263#ifndef CONFIG_IPV6_SUBTREES
1264	if (cfg->fc_src_len)
1265		return -EINVAL;
1266#endif
1267	if (cfg->fc_ifindex) {
1268		err = -ENODEV;
1269		dev = dev_get_by_index(net, cfg->fc_ifindex);
1270		if (!dev)
1271			goto out;
1272		idev = in6_dev_get(dev);
1273		if (!idev)
1274			goto out;
1275	}
1276
1277	if (cfg->fc_metric == 0)
1278		cfg->fc_metric = IP6_RT_PRIO_USER;
1279
1280	err = -ENOBUFS;
1281	if (cfg->fc_nlinfo.nlh &&
1282	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1283		table = fib6_get_table(net, cfg->fc_table);
1284		if (!table) {
1285			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1286			table = fib6_new_table(net, cfg->fc_table);
1287		}
1288	} else {
1289		table = fib6_new_table(net, cfg->fc_table);
1290	}
1291
1292	if (!table)
1293		goto out;
1294
1295	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1296
1297	if (!rt) {
1298		err = -ENOMEM;
1299		goto out;
1300	}
1301
1302	rt->dst.obsolete = -1;
1303
1304	if (cfg->fc_flags & RTF_EXPIRES)
1305		rt6_set_expires(rt, jiffies +
1306				clock_t_to_jiffies(cfg->fc_expires));
1307	else
1308		rt6_clean_expires(rt);
1309
1310	if (cfg->fc_protocol == RTPROT_UNSPEC)
1311		cfg->fc_protocol = RTPROT_BOOT;
1312	rt->rt6i_protocol = cfg->fc_protocol;
1313
1314	addr_type = ipv6_addr_type(&cfg->fc_dst);
1315
1316	if (addr_type & IPV6_ADDR_MULTICAST)
1317		rt->dst.input = ip6_mc_input;
1318	else if (cfg->fc_flags & RTF_LOCAL)
1319		rt->dst.input = ip6_input;
1320	else
1321		rt->dst.input = ip6_forward;
1322
1323	rt->dst.output = ip6_output;
1324
1325	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1326	rt->rt6i_dst.plen = cfg->fc_dst_len;
1327	if (rt->rt6i_dst.plen == 128)
1328	       rt->dst.flags |= DST_HOST;
1329
1330	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1331		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1332		if (!metrics) {
1333			err = -ENOMEM;
1334			goto out;
1335		}
1336		dst_init_metrics(&rt->dst, metrics, 0);
1337	}
1338#ifdef CONFIG_IPV6_SUBTREES
1339	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1340	rt->rt6i_src.plen = cfg->fc_src_len;
1341#endif
1342
1343	rt->rt6i_metric = cfg->fc_metric;
1344
1345	/* We cannot add true routes via loopback here,
1346	   they would result in kernel looping; promote them to reject routes
1347	 */
1348	if ((cfg->fc_flags & RTF_REJECT) ||
1349	    (dev && (dev->flags & IFF_LOOPBACK) &&
1350	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1351	     !(cfg->fc_flags & RTF_LOCAL))) {
1352		/* hold loopback dev/idev if we haven't done so. */
1353		if (dev != net->loopback_dev) {
1354			if (dev) {
1355				dev_put(dev);
1356				in6_dev_put(idev);
1357			}
1358			dev = net->loopback_dev;
1359			dev_hold(dev);
1360			idev = in6_dev_get(dev);
1361			if (!idev) {
1362				err = -ENODEV;
1363				goto out;
1364			}
1365		}
1366		rt->dst.output = ip6_pkt_discard_out;
1367		rt->dst.input = ip6_pkt_discard;
1368		rt->dst.error = -ENETUNREACH;
1369		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1370		goto install_route;
1371	}
1372
1373	if (cfg->fc_flags & RTF_GATEWAY) {
1374		const struct in6_addr *gw_addr;
1375		int gwa_type;
1376
1377		gw_addr = &cfg->fc_gateway;
1378		rt->rt6i_gateway = *gw_addr;
1379		gwa_type = ipv6_addr_type(gw_addr);
1380
1381		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1382			struct rt6_info *grt;
1383
1384			/* IPv6 strictly inhibits using not link-local
1385			   addresses as nexthop address.
1386			   Otherwise, router will not able to send redirects.
1387			   It is very good, but in some (rare!) circumstances
1388			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1389			   some exceptions. --ANK
1390			 */
1391			err = -EINVAL;
1392			if (!(gwa_type & IPV6_ADDR_UNICAST))
1393				goto out;
1394
1395			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1396
1397			err = -EHOSTUNREACH;
1398			if (!grt)
1399				goto out;
1400			if (dev) {
1401				if (dev != grt->dst.dev) {
1402					dst_release(&grt->dst);
1403					goto out;
1404				}
1405			} else {
1406				dev = grt->dst.dev;
1407				idev = grt->rt6i_idev;
1408				dev_hold(dev);
1409				in6_dev_hold(grt->rt6i_idev);
1410			}
1411			if (!(grt->rt6i_flags & RTF_GATEWAY))
1412				err = 0;
1413			dst_release(&grt->dst);
1414
1415			if (err)
1416				goto out;
1417		}
1418		err = -EINVAL;
1419		if (!dev || (dev->flags & IFF_LOOPBACK))
1420			goto out;
1421	}
1422
1423	err = -ENODEV;
1424	if (!dev)
1425		goto out;
1426
1427	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1428		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1429			err = -EINVAL;
1430			goto out;
1431		}
1432		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1433		rt->rt6i_prefsrc.plen = 128;
1434	} else
1435		rt->rt6i_prefsrc.plen = 0;
1436
1437	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1438		err = rt6_bind_neighbour(rt, dev);
1439		if (err)
 
1440			goto out;
 
 
1441	}
1442
1443	rt->rt6i_flags = cfg->fc_flags;
1444
1445install_route:
1446	if (cfg->fc_mx) {
1447		struct nlattr *nla;
1448		int remaining;
1449
1450		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1451			int type = nla_type(nla);
1452
1453			if (type) {
1454				if (type > RTAX_MAX) {
1455					err = -EINVAL;
1456					goto out;
1457				}
1458
1459				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1460			}
1461		}
1462	}
1463
1464	rt->dst.dev = dev;
1465	rt->rt6i_idev = idev;
1466	rt->rt6i_table = table;
1467
1468	cfg->fc_nlinfo.nl_net = dev_net(dev);
1469
1470	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1471
1472out:
1473	if (dev)
1474		dev_put(dev);
1475	if (idev)
1476		in6_dev_put(idev);
1477	if (rt)
1478		dst_free(&rt->dst);
1479	return err;
1480}
1481
1482static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1483{
1484	int err;
1485	struct fib6_table *table;
1486	struct net *net = dev_net(rt->dst.dev);
1487
1488	if (rt == net->ipv6.ip6_null_entry)
1489		return -ENOENT;
1490
1491	table = rt->rt6i_table;
1492	write_lock_bh(&table->tb6_lock);
1493
1494	err = fib6_del(rt, info);
1495	dst_release(&rt->dst);
1496
1497	write_unlock_bh(&table->tb6_lock);
1498
1499	return err;
1500}
1501
1502int ip6_del_rt(struct rt6_info *rt)
1503{
1504	struct nl_info info = {
1505		.nl_net = dev_net(rt->dst.dev),
1506	};
1507	return __ip6_del_rt(rt, &info);
1508}
1509
1510static int ip6_route_del(struct fib6_config *cfg)
1511{
1512	struct fib6_table *table;
1513	struct fib6_node *fn;
1514	struct rt6_info *rt;
1515	int err = -ESRCH;
1516
1517	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1518	if (!table)
1519		return err;
1520
1521	read_lock_bh(&table->tb6_lock);
1522
1523	fn = fib6_locate(&table->tb6_root,
1524			 &cfg->fc_dst, cfg->fc_dst_len,
1525			 &cfg->fc_src, cfg->fc_src_len);
1526
1527	if (fn) {
1528		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1529			if (cfg->fc_ifindex &&
1530			    (!rt->dst.dev ||
1531			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1532				continue;
1533			if (cfg->fc_flags & RTF_GATEWAY &&
1534			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1535				continue;
1536			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1537				continue;
1538			dst_hold(&rt->dst);
1539			read_unlock_bh(&table->tb6_lock);
1540
1541			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1542		}
1543	}
1544	read_unlock_bh(&table->tb6_lock);
1545
1546	return err;
1547}
1548
1549/*
1550 *	Handle redirects
1551 */
1552struct ip6rd_flowi {
1553	struct flowi6 fl6;
1554	struct in6_addr gateway;
1555};
1556
1557static struct rt6_info *__ip6_route_redirect(struct net *net,
1558					     struct fib6_table *table,
1559					     struct flowi6 *fl6,
1560					     int flags)
1561{
1562	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1563	struct rt6_info *rt;
1564	struct fib6_node *fn;
1565
1566	/*
1567	 * Get the "current" route for this destination and
1568	 * check if the redirect has come from approriate router.
1569	 *
1570	 * RFC 2461 specifies that redirects should only be
1571	 * accepted if they come from the nexthop to the target.
1572	 * Due to the way the routes are chosen, this notion
1573	 * is a bit fuzzy and one might need to check all possible
1574	 * routes.
1575	 */
1576
1577	read_lock_bh(&table->tb6_lock);
1578	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1579restart:
1580	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1581		/*
1582		 * Current route is on-link; redirect is always invalid.
1583		 *
1584		 * Seems, previous statement is not true. It could
1585		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1586		 * But then router serving it might decide, that we should
1587		 * know truth 8)8) --ANK (980726).
1588		 */
1589		if (rt6_check_expired(rt))
1590			continue;
1591		if (!(rt->rt6i_flags & RTF_GATEWAY))
1592			continue;
1593		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1594			continue;
1595		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1596			continue;
1597		break;
1598	}
1599
1600	if (!rt)
1601		rt = net->ipv6.ip6_null_entry;
1602	BACKTRACK(net, &fl6->saddr);
1603out:
1604	dst_hold(&rt->dst);
1605
1606	read_unlock_bh(&table->tb6_lock);
1607
1608	return rt;
1609};
1610
1611static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1612					   const struct in6_addr *src,
1613					   const struct in6_addr *gateway,
1614					   struct net_device *dev)
1615{
1616	int flags = RT6_LOOKUP_F_HAS_SADDR;
1617	struct net *net = dev_net(dev);
1618	struct ip6rd_flowi rdfl = {
1619		.fl6 = {
1620			.flowi6_oif = dev->ifindex,
1621			.daddr = *dest,
1622			.saddr = *src,
1623		},
1624	};
1625
1626	rdfl.gateway = *gateway;
1627
1628	if (rt6_need_strict(dest))
1629		flags |= RT6_LOOKUP_F_IFACE;
1630
1631	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1632						   flags, __ip6_route_redirect);
1633}
1634
1635void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1636		  const struct in6_addr *saddr,
1637		  struct neighbour *neigh, u8 *lladdr, int on_link)
1638{
1639	struct rt6_info *rt, *nrt = NULL;
1640	struct netevent_redirect netevent;
1641	struct net *net = dev_net(neigh->dev);
1642
1643	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1644
1645	if (rt == net->ipv6.ip6_null_entry) {
1646		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
 
 
1647		goto out;
1648	}
1649
1650	/*
1651	 *	We have finally decided to accept it.
1652	 */
1653
1654	neigh_update(neigh, lladdr, NUD_STALE,
1655		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1656		     NEIGH_UPDATE_F_OVERRIDE|
1657		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1658				     NEIGH_UPDATE_F_ISROUTER))
1659		     );
1660
1661	/*
1662	 * Redirect received -> path was valid.
1663	 * Look, redirects are sent only in response to data packets,
1664	 * so that this nexthop apparently is reachable. --ANK
1665	 */
1666	dst_confirm(&rt->dst);
1667
1668	/* Duplicate redirect: silently ignore. */
1669	if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1670		goto out;
1671
1672	nrt = ip6_rt_copy(rt, dest);
1673	if (!nrt)
1674		goto out;
1675
1676	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1677	if (on_link)
1678		nrt->rt6i_flags &= ~RTF_GATEWAY;
1679
1680	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1681	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1682
1683	if (ip6_ins_rt(nrt))
1684		goto out;
1685
1686	netevent.old = &rt->dst;
1687	netevent.new = &nrt->dst;
1688	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1689
1690	if (rt->rt6i_flags & RTF_CACHE) {
1691		ip6_del_rt(rt);
1692		return;
1693	}
1694
1695out:
1696	dst_release(&rt->dst);
1697}
1698
1699/*
1700 *	Handle ICMP "packet too big" messages
1701 *	i.e. Path MTU discovery
1702 */
1703
1704static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1705			     struct net *net, u32 pmtu, int ifindex)
1706{
1707	struct rt6_info *rt, *nrt;
1708	int allfrag = 0;
1709again:
1710	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1711	if (!rt)
1712		return;
1713
1714	if (rt6_check_expired(rt)) {
1715		ip6_del_rt(rt);
1716		goto again;
1717	}
1718
1719	if (pmtu >= dst_mtu(&rt->dst))
1720		goto out;
1721
1722	if (pmtu < IPV6_MIN_MTU) {
1723		/*
1724		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1725		 * MTU (1280) and a fragment header should always be included
1726		 * after a node receiving Too Big message reporting PMTU is
1727		 * less than the IPv6 Minimum Link MTU.
1728		 */
1729		pmtu = IPV6_MIN_MTU;
1730		allfrag = 1;
1731	}
1732
1733	/* New mtu received -> path was valid.
1734	   They are sent only in response to data packets,
1735	   so that this nexthop apparently is reachable. --ANK
1736	 */
1737	dst_confirm(&rt->dst);
1738
1739	/* Host route. If it is static, it would be better
1740	   not to override it, but add new one, so that
1741	   when cache entry will expire old pmtu
1742	   would return automatically.
1743	 */
1744	if (rt->rt6i_flags & RTF_CACHE) {
1745		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1746		if (allfrag) {
1747			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1748			features |= RTAX_FEATURE_ALLFRAG;
1749			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1750		}
1751		rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1752		rt->rt6i_flags |= RTF_MODIFIED;
1753		goto out;
1754	}
1755
1756	/* Network route.
1757	   Two cases are possible:
1758	   1. It is connected route. Action: COW
1759	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1760	 */
1761	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1762		nrt = rt6_alloc_cow(rt, daddr, saddr);
1763	else
1764		nrt = rt6_alloc_clone(rt, daddr);
1765
1766	if (nrt) {
1767		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1768		if (allfrag) {
1769			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1770			features |= RTAX_FEATURE_ALLFRAG;
1771			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1772		}
1773
1774		/* According to RFC 1981, detecting PMTU increase shouldn't be
1775		 * happened within 5 mins, the recommended timer is 10 mins.
1776		 * Here this route expiration time is set to ip6_rt_mtu_expires
1777		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1778		 * and detecting PMTU increase will be automatically happened.
1779		 */
1780		rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1781		nrt->rt6i_flags |= RTF_DYNAMIC;
 
1782		ip6_ins_rt(nrt);
1783	}
1784out:
1785	dst_release(&rt->dst);
1786}
1787
1788void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1789			struct net_device *dev, u32 pmtu)
1790{
1791	struct net *net = dev_net(dev);
1792
1793	/*
1794	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1795	 * is sending along the path" that caused the Packet Too Big message.
1796	 * Since it's not possible in the general case to determine which
1797	 * interface was used to send the original packet, we update the MTU
1798	 * on the interface that will be used to send future packets. We also
1799	 * update the MTU on the interface that received the Packet Too Big in
1800	 * case the original packet was forced out that interface with
1801	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1802	 * correct behaviour, which would be to update the MTU on all
1803	 * interfaces.
1804	 */
1805	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1806	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1807}
1808
1809/*
1810 *	Misc support functions
1811 */
1812
1813static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1814				    const struct in6_addr *dest)
1815{
1816	struct net *net = dev_net(ort->dst.dev);
1817	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1818					    ort->dst.dev, 0);
1819
1820	if (rt) {
1821		rt->dst.input = ort->dst.input;
1822		rt->dst.output = ort->dst.output;
1823		rt->dst.flags |= DST_HOST;
1824
1825		rt->rt6i_dst.addr = *dest;
1826		rt->rt6i_dst.plen = 128;
1827		dst_copy_metrics(&rt->dst, &ort->dst);
1828		rt->dst.error = ort->dst.error;
1829		rt->rt6i_idev = ort->rt6i_idev;
1830		if (rt->rt6i_idev)
1831			in6_dev_hold(rt->rt6i_idev);
1832		rt->dst.lastuse = jiffies;
 
1833
1834		rt->rt6i_gateway = ort->rt6i_gateway;
1835		rt->rt6i_flags = ort->rt6i_flags;
1836		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1837		    (RTF_DEFAULT | RTF_ADDRCONF))
1838			rt6_set_from(rt, ort);
1839		else
1840			rt6_clean_expires(rt);
1841		rt->rt6i_metric = 0;
1842
1843#ifdef CONFIG_IPV6_SUBTREES
1844		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1845#endif
1846		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1847		rt->rt6i_table = ort->rt6i_table;
1848	}
1849	return rt;
1850}
1851
1852#ifdef CONFIG_IPV6_ROUTE_INFO
1853static struct rt6_info *rt6_get_route_info(struct net *net,
1854					   const struct in6_addr *prefix, int prefixlen,
1855					   const struct in6_addr *gwaddr, int ifindex)
1856{
1857	struct fib6_node *fn;
1858	struct rt6_info *rt = NULL;
1859	struct fib6_table *table;
1860
1861	table = fib6_get_table(net, RT6_TABLE_INFO);
1862	if (!table)
1863		return NULL;
1864
1865	write_lock_bh(&table->tb6_lock);
1866	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1867	if (!fn)
1868		goto out;
1869
1870	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1871		if (rt->dst.dev->ifindex != ifindex)
1872			continue;
1873		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1874			continue;
1875		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1876			continue;
1877		dst_hold(&rt->dst);
1878		break;
1879	}
1880out:
1881	write_unlock_bh(&table->tb6_lock);
1882	return rt;
1883}
1884
1885static struct rt6_info *rt6_add_route_info(struct net *net,
1886					   const struct in6_addr *prefix, int prefixlen,
1887					   const struct in6_addr *gwaddr, int ifindex,
1888					   unsigned int pref)
1889{
1890	struct fib6_config cfg = {
1891		.fc_table	= RT6_TABLE_INFO,
1892		.fc_metric	= IP6_RT_PRIO_USER,
1893		.fc_ifindex	= ifindex,
1894		.fc_dst_len	= prefixlen,
1895		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1896				  RTF_UP | RTF_PREF(pref),
1897		.fc_nlinfo.pid = 0,
1898		.fc_nlinfo.nlh = NULL,
1899		.fc_nlinfo.nl_net = net,
1900	};
1901
1902	cfg.fc_dst = *prefix;
1903	cfg.fc_gateway = *gwaddr;
1904
1905	/* We should treat it as a default route if prefix length is 0. */
1906	if (!prefixlen)
1907		cfg.fc_flags |= RTF_DEFAULT;
1908
1909	ip6_route_add(&cfg);
1910
1911	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1912}
1913#endif
1914
1915struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1916{
1917	struct rt6_info *rt;
1918	struct fib6_table *table;
1919
1920	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1921	if (!table)
1922		return NULL;
1923
1924	write_lock_bh(&table->tb6_lock);
1925	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1926		if (dev == rt->dst.dev &&
1927		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1928		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1929			break;
1930	}
1931	if (rt)
1932		dst_hold(&rt->dst);
1933	write_unlock_bh(&table->tb6_lock);
1934	return rt;
1935}
1936
1937struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1938				     struct net_device *dev,
1939				     unsigned int pref)
1940{
1941	struct fib6_config cfg = {
1942		.fc_table	= RT6_TABLE_DFLT,
1943		.fc_metric	= IP6_RT_PRIO_USER,
1944		.fc_ifindex	= dev->ifindex,
1945		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1946				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1947		.fc_nlinfo.pid = 0,
1948		.fc_nlinfo.nlh = NULL,
1949		.fc_nlinfo.nl_net = dev_net(dev),
1950	};
1951
1952	cfg.fc_gateway = *gwaddr;
1953
1954	ip6_route_add(&cfg);
1955
1956	return rt6_get_dflt_router(gwaddr, dev);
1957}
1958
1959void rt6_purge_dflt_routers(struct net *net)
1960{
1961	struct rt6_info *rt;
1962	struct fib6_table *table;
1963
1964	/* NOTE: Keep consistent with rt6_get_dflt_router */
1965	table = fib6_get_table(net, RT6_TABLE_DFLT);
1966	if (!table)
1967		return;
1968
1969restart:
1970	read_lock_bh(&table->tb6_lock);
1971	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1972		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1973			dst_hold(&rt->dst);
1974			read_unlock_bh(&table->tb6_lock);
1975			ip6_del_rt(rt);
1976			goto restart;
1977		}
1978	}
1979	read_unlock_bh(&table->tb6_lock);
1980}
1981
1982static void rtmsg_to_fib6_config(struct net *net,
1983				 struct in6_rtmsg *rtmsg,
1984				 struct fib6_config *cfg)
1985{
1986	memset(cfg, 0, sizeof(*cfg));
1987
1988	cfg->fc_table = RT6_TABLE_MAIN;
1989	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1990	cfg->fc_metric = rtmsg->rtmsg_metric;
1991	cfg->fc_expires = rtmsg->rtmsg_info;
1992	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1993	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1994	cfg->fc_flags = rtmsg->rtmsg_flags;
1995
1996	cfg->fc_nlinfo.nl_net = net;
1997
1998	cfg->fc_dst = rtmsg->rtmsg_dst;
1999	cfg->fc_src = rtmsg->rtmsg_src;
2000	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2001}
2002
2003int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2004{
2005	struct fib6_config cfg;
2006	struct in6_rtmsg rtmsg;
2007	int err;
2008
2009	switch(cmd) {
2010	case SIOCADDRT:		/* Add a route */
2011	case SIOCDELRT:		/* Delete a route */
2012		if (!capable(CAP_NET_ADMIN))
2013			return -EPERM;
2014		err = copy_from_user(&rtmsg, arg,
2015				     sizeof(struct in6_rtmsg));
2016		if (err)
2017			return -EFAULT;
2018
2019		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2020
2021		rtnl_lock();
2022		switch (cmd) {
2023		case SIOCADDRT:
2024			err = ip6_route_add(&cfg);
2025			break;
2026		case SIOCDELRT:
2027			err = ip6_route_del(&cfg);
2028			break;
2029		default:
2030			err = -EINVAL;
2031		}
2032		rtnl_unlock();
2033
2034		return err;
2035	}
2036
2037	return -EINVAL;
2038}
2039
2040/*
2041 *	Drop the packet on the floor
2042 */
2043
2044static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2045{
2046	int type;
2047	struct dst_entry *dst = skb_dst(skb);
2048	switch (ipstats_mib_noroutes) {
2049	case IPSTATS_MIB_INNOROUTES:
2050		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2051		if (type == IPV6_ADDR_ANY) {
2052			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2053				      IPSTATS_MIB_INADDRERRORS);
2054			break;
2055		}
2056		/* FALLTHROUGH */
2057	case IPSTATS_MIB_OUTNOROUTES:
2058		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2059			      ipstats_mib_noroutes);
2060		break;
2061	}
2062	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2063	kfree_skb(skb);
2064	return 0;
2065}
2066
2067static int ip6_pkt_discard(struct sk_buff *skb)
2068{
2069	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2070}
2071
2072static int ip6_pkt_discard_out(struct sk_buff *skb)
2073{
2074	skb->dev = skb_dst(skb)->dev;
2075	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2076}
2077
2078#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2079
2080static int ip6_pkt_prohibit(struct sk_buff *skb)
2081{
2082	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2083}
2084
2085static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2086{
2087	skb->dev = skb_dst(skb)->dev;
2088	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2089}
2090
2091#endif
2092
2093/*
2094 *	Allocate a dst for local (unicast / anycast) address.
2095 */
2096
2097struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2098				    const struct in6_addr *addr,
2099				    bool anycast)
2100{
2101	struct net *net = dev_net(idev->dev);
2102	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2103					    net->loopback_dev, 0);
2104	int err;
2105
2106	if (!rt) {
2107		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
 
 
2108		return ERR_PTR(-ENOMEM);
2109	}
2110
2111	in6_dev_hold(idev);
2112
2113	rt->dst.flags |= DST_HOST;
2114	rt->dst.input = ip6_input;
2115	rt->dst.output = ip6_output;
2116	rt->rt6i_idev = idev;
2117	rt->dst.obsolete = -1;
2118
2119	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2120	if (anycast)
2121		rt->rt6i_flags |= RTF_ANYCAST;
2122	else
2123		rt->rt6i_flags |= RTF_LOCAL;
2124	err = rt6_bind_neighbour(rt, rt->dst.dev);
2125	if (err) {
2126		dst_free(&rt->dst);
2127		return ERR_PTR(err);
 
2128	}
 
2129
2130	rt->rt6i_dst.addr = *addr;
2131	rt->rt6i_dst.plen = 128;
2132	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2133
2134	atomic_set(&rt->dst.__refcnt, 1);
2135
2136	return rt;
2137}
2138
2139int ip6_route_get_saddr(struct net *net,
2140			struct rt6_info *rt,
2141			const struct in6_addr *daddr,
2142			unsigned int prefs,
2143			struct in6_addr *saddr)
2144{
2145	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2146	int err = 0;
2147	if (rt->rt6i_prefsrc.plen)
2148		*saddr = rt->rt6i_prefsrc.addr;
2149	else
2150		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2151					 daddr, prefs, saddr);
2152	return err;
2153}
2154
2155/* remove deleted ip from prefsrc entries */
2156struct arg_dev_net_ip {
2157	struct net_device *dev;
2158	struct net *net;
2159	struct in6_addr *addr;
2160};
2161
2162static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2163{
2164	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2165	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2166	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2167
2168	if (((void *)rt->dst.dev == dev || !dev) &&
2169	    rt != net->ipv6.ip6_null_entry &&
2170	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2171		/* remove prefsrc entry */
2172		rt->rt6i_prefsrc.plen = 0;
2173	}
2174	return 0;
2175}
2176
2177void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2178{
2179	struct net *net = dev_net(ifp->idev->dev);
2180	struct arg_dev_net_ip adni = {
2181		.dev = ifp->idev->dev,
2182		.net = net,
2183		.addr = &ifp->addr,
2184	};
2185	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2186}
2187
2188struct arg_dev_net {
2189	struct net_device *dev;
2190	struct net *net;
2191};
2192
2193static int fib6_ifdown(struct rt6_info *rt, void *arg)
2194{
2195	const struct arg_dev_net *adn = arg;
2196	const struct net_device *dev = adn->dev;
2197
2198	if ((rt->dst.dev == dev || !dev) &&
2199	    rt != adn->net->ipv6.ip6_null_entry)
 
2200		return -1;
2201
2202	return 0;
2203}
2204
2205void rt6_ifdown(struct net *net, struct net_device *dev)
2206{
2207	struct arg_dev_net adn = {
2208		.dev = dev,
2209		.net = net,
2210	};
2211
2212	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2213	icmp6_clean_all(fib6_ifdown, &adn);
2214}
2215
2216struct rt6_mtu_change_arg {
 
2217	struct net_device *dev;
2218	unsigned int mtu;
2219};
2220
2221static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2222{
2223	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2224	struct inet6_dev *idev;
2225
2226	/* In IPv6 pmtu discovery is not optional,
2227	   so that RTAX_MTU lock cannot disable it.
2228	   We still use this lock to block changes
2229	   caused by addrconf/ndisc.
2230	*/
2231
2232	idev = __in6_dev_get(arg->dev);
2233	if (!idev)
2234		return 0;
2235
2236	/* For administrative MTU increase, there is no way to discover
2237	   IPv6 PMTU increase, so PMTU increase should be updated here.
2238	   Since RFC 1981 doesn't include administrative MTU increase
2239	   update PMTU increase is a MUST. (i.e. jumbo frame)
2240	 */
2241	/*
2242	   If new MTU is less than route PMTU, this new MTU will be the
2243	   lowest MTU in the path, update the route PMTU to reflect PMTU
2244	   decreases; if new MTU is greater than route PMTU, and the
2245	   old MTU is the lowest MTU in the path, update the route PMTU
2246	   to reflect the increase. In this case if the other nodes' MTU
2247	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2248	   PMTU discouvery.
2249	 */
2250	if (rt->dst.dev == arg->dev &&
2251	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2252	    (dst_mtu(&rt->dst) >= arg->mtu ||
2253	     (dst_mtu(&rt->dst) < arg->mtu &&
2254	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2255		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2256	}
2257	return 0;
2258}
2259
2260void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2261{
2262	struct rt6_mtu_change_arg arg = {
2263		.dev = dev,
2264		.mtu = mtu,
2265	};
2266
2267	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2268}
2269
2270static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2271	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2272	[RTA_OIF]               = { .type = NLA_U32 },
2273	[RTA_IIF]		= { .type = NLA_U32 },
2274	[RTA_PRIORITY]          = { .type = NLA_U32 },
2275	[RTA_METRICS]           = { .type = NLA_NESTED },
2276};
2277
2278static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2279			      struct fib6_config *cfg)
2280{
2281	struct rtmsg *rtm;
2282	struct nlattr *tb[RTA_MAX+1];
2283	int err;
2284
2285	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2286	if (err < 0)
2287		goto errout;
2288
2289	err = -EINVAL;
2290	rtm = nlmsg_data(nlh);
2291	memset(cfg, 0, sizeof(*cfg));
2292
2293	cfg->fc_table = rtm->rtm_table;
2294	cfg->fc_dst_len = rtm->rtm_dst_len;
2295	cfg->fc_src_len = rtm->rtm_src_len;
2296	cfg->fc_flags = RTF_UP;
2297	cfg->fc_protocol = rtm->rtm_protocol;
2298
2299	if (rtm->rtm_type == RTN_UNREACHABLE)
2300		cfg->fc_flags |= RTF_REJECT;
2301
2302	if (rtm->rtm_type == RTN_LOCAL)
2303		cfg->fc_flags |= RTF_LOCAL;
2304
2305	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2306	cfg->fc_nlinfo.nlh = nlh;
2307	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2308
2309	if (tb[RTA_GATEWAY]) {
2310		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2311		cfg->fc_flags |= RTF_GATEWAY;
2312	}
2313
2314	if (tb[RTA_DST]) {
2315		int plen = (rtm->rtm_dst_len + 7) >> 3;
2316
2317		if (nla_len(tb[RTA_DST]) < plen)
2318			goto errout;
2319
2320		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2321	}
2322
2323	if (tb[RTA_SRC]) {
2324		int plen = (rtm->rtm_src_len + 7) >> 3;
2325
2326		if (nla_len(tb[RTA_SRC]) < plen)
2327			goto errout;
2328
2329		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2330	}
2331
2332	if (tb[RTA_PREFSRC])
2333		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2334
2335	if (tb[RTA_OIF])
2336		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2337
2338	if (tb[RTA_PRIORITY])
2339		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2340
2341	if (tb[RTA_METRICS]) {
2342		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2343		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2344	}
2345
2346	if (tb[RTA_TABLE])
2347		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2348
2349	err = 0;
2350errout:
2351	return err;
2352}
2353
2354static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2355{
2356	struct fib6_config cfg;
2357	int err;
2358
2359	err = rtm_to_fib6_config(skb, nlh, &cfg);
2360	if (err < 0)
2361		return err;
2362
2363	return ip6_route_del(&cfg);
2364}
2365
2366static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2367{
2368	struct fib6_config cfg;
2369	int err;
2370
2371	err = rtm_to_fib6_config(skb, nlh, &cfg);
2372	if (err < 0)
2373		return err;
2374
2375	return ip6_route_add(&cfg);
2376}
2377
2378static inline size_t rt6_nlmsg_size(void)
2379{
2380	return NLMSG_ALIGN(sizeof(struct rtmsg))
2381	       + nla_total_size(16) /* RTA_SRC */
2382	       + nla_total_size(16) /* RTA_DST */
2383	       + nla_total_size(16) /* RTA_GATEWAY */
2384	       + nla_total_size(16) /* RTA_PREFSRC */
2385	       + nla_total_size(4) /* RTA_TABLE */
2386	       + nla_total_size(4) /* RTA_IIF */
2387	       + nla_total_size(4) /* RTA_OIF */
2388	       + nla_total_size(4) /* RTA_PRIORITY */
2389	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2390	       + nla_total_size(sizeof(struct rta_cacheinfo));
2391}
2392
2393static int rt6_fill_node(struct net *net,
2394			 struct sk_buff *skb, struct rt6_info *rt,
2395			 struct in6_addr *dst, struct in6_addr *src,
2396			 int iif, int type, u32 pid, u32 seq,
2397			 int prefix, int nowait, unsigned int flags)
2398{
2399	const struct inet_peer *peer;
2400	struct rtmsg *rtm;
2401	struct nlmsghdr *nlh;
2402	long expires;
2403	u32 table;
2404	struct neighbour *n;
2405	u32 ts, tsage;
2406
2407	if (prefix) {	/* user wants prefix routes only */
2408		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2409			/* success since this is not a prefix route */
2410			return 1;
2411		}
2412	}
2413
2414	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2415	if (!nlh)
2416		return -EMSGSIZE;
2417
2418	rtm = nlmsg_data(nlh);
2419	rtm->rtm_family = AF_INET6;
2420	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2421	rtm->rtm_src_len = rt->rt6i_src.plen;
2422	rtm->rtm_tos = 0;
2423	if (rt->rt6i_table)
2424		table = rt->rt6i_table->tb6_id;
2425	else
2426		table = RT6_TABLE_UNSPEC;
2427	rtm->rtm_table = table;
2428	if (nla_put_u32(skb, RTA_TABLE, table))
2429		goto nla_put_failure;
2430	if (rt->rt6i_flags & RTF_REJECT)
2431		rtm->rtm_type = RTN_UNREACHABLE;
2432	else if (rt->rt6i_flags & RTF_LOCAL)
2433		rtm->rtm_type = RTN_LOCAL;
2434	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2435		rtm->rtm_type = RTN_LOCAL;
2436	else
2437		rtm->rtm_type = RTN_UNICAST;
2438	rtm->rtm_flags = 0;
2439	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2440	rtm->rtm_protocol = rt->rt6i_protocol;
2441	if (rt->rt6i_flags & RTF_DYNAMIC)
2442		rtm->rtm_protocol = RTPROT_REDIRECT;
2443	else if (rt->rt6i_flags & RTF_ADDRCONF)
2444		rtm->rtm_protocol = RTPROT_KERNEL;
2445	else if (rt->rt6i_flags & RTF_DEFAULT)
2446		rtm->rtm_protocol = RTPROT_RA;
2447
2448	if (rt->rt6i_flags & RTF_CACHE)
2449		rtm->rtm_flags |= RTM_F_CLONED;
2450
2451	if (dst) {
2452		if (nla_put(skb, RTA_DST, 16, dst))
2453			goto nla_put_failure;
2454		rtm->rtm_dst_len = 128;
2455	} else if (rtm->rtm_dst_len)
2456		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2457			goto nla_put_failure;
2458#ifdef CONFIG_IPV6_SUBTREES
2459	if (src) {
2460		if (nla_put(skb, RTA_SRC, 16, src))
2461			goto nla_put_failure;
2462		rtm->rtm_src_len = 128;
2463	} else if (rtm->rtm_src_len &&
2464		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2465		goto nla_put_failure;
2466#endif
2467	if (iif) {
2468#ifdef CONFIG_IPV6_MROUTE
2469		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2470			int err = ip6mr_get_route(net, skb, rtm, nowait);
2471			if (err <= 0) {
2472				if (!nowait) {
2473					if (err == 0)
2474						return 0;
2475					goto nla_put_failure;
2476				} else {
2477					if (err == -EMSGSIZE)
2478						goto nla_put_failure;
2479				}
2480			}
2481		} else
2482#endif
2483			if (nla_put_u32(skb, RTA_IIF, iif))
2484				goto nla_put_failure;
2485	} else if (dst) {
2486		struct in6_addr saddr_buf;
2487		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2488		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2489			goto nla_put_failure;
2490	}
2491
2492	if (rt->rt6i_prefsrc.plen) {
2493		struct in6_addr saddr_buf;
2494		saddr_buf = rt->rt6i_prefsrc.addr;
2495		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2496			goto nla_put_failure;
2497	}
2498
2499	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2500		goto nla_put_failure;
2501
2502	rcu_read_lock();
2503	n = dst_get_neighbour_noref(&rt->dst);
2504	if (n) {
2505		if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2506			rcu_read_unlock();
2507			goto nla_put_failure;
2508		}
2509	}
2510	rcu_read_unlock();
2511
2512	if (rt->dst.dev &&
2513	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2514		goto nla_put_failure;
2515	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2516		goto nla_put_failure;
2517	if (!(rt->rt6i_flags & RTF_EXPIRES))
2518		expires = 0;
2519	else if (rt->dst.expires - jiffies < INT_MAX)
2520		expires = rt->dst.expires - jiffies;
2521	else
2522		expires = INT_MAX;
2523
2524	peer = rt->rt6i_peer;
2525	ts = tsage = 0;
2526	if (peer && peer->tcp_ts_stamp) {
2527		ts = peer->tcp_ts;
2528		tsage = get_seconds() - peer->tcp_ts_stamp;
2529	}
2530
2531	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2532			       expires, rt->dst.error) < 0)
2533		goto nla_put_failure;
2534
2535	return nlmsg_end(skb, nlh);
2536
2537nla_put_failure:
2538	nlmsg_cancel(skb, nlh);
2539	return -EMSGSIZE;
2540}
2541
2542int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2543{
2544	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2545	int prefix;
2546
2547	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2548		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2549		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2550	} else
2551		prefix = 0;
2552
2553	return rt6_fill_node(arg->net,
2554		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2555		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2556		     prefix, 0, NLM_F_MULTI);
2557}
2558
2559static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2560{
2561	struct net *net = sock_net(in_skb->sk);
2562	struct nlattr *tb[RTA_MAX+1];
2563	struct rt6_info *rt;
2564	struct sk_buff *skb;
2565	struct rtmsg *rtm;
2566	struct flowi6 fl6;
2567	int err, iif = 0, oif = 0;
2568
2569	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2570	if (err < 0)
2571		goto errout;
2572
2573	err = -EINVAL;
2574	memset(&fl6, 0, sizeof(fl6));
2575
2576	if (tb[RTA_SRC]) {
2577		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2578			goto errout;
2579
2580		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2581	}
2582
2583	if (tb[RTA_DST]) {
2584		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2585			goto errout;
2586
2587		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2588	}
2589
2590	if (tb[RTA_IIF])
2591		iif = nla_get_u32(tb[RTA_IIF]);
2592
2593	if (tb[RTA_OIF])
2594		oif = nla_get_u32(tb[RTA_OIF]);
2595
2596	if (iif) {
2597		struct net_device *dev;
2598		int flags = 0;
2599
2600		dev = __dev_get_by_index(net, iif);
2601		if (!dev) {
2602			err = -ENODEV;
2603			goto errout;
2604		}
2605
2606		fl6.flowi6_iif = iif;
2607
2608		if (!ipv6_addr_any(&fl6.saddr))
2609			flags |= RT6_LOOKUP_F_HAS_SADDR;
2610
2611		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2612							       flags);
2613	} else {
2614		fl6.flowi6_oif = oif;
2615
2616		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2617	}
2618
2619	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2620	if (!skb) {
2621		dst_release(&rt->dst);
2622		err = -ENOBUFS;
2623		goto errout;
2624	}
2625
2626	/* Reserve room for dummy headers, this skb can pass
2627	   through good chunk of routing engine.
2628	 */
2629	skb_reset_mac_header(skb);
2630	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2631
 
2632	skb_dst_set(skb, &rt->dst);
2633
2634	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2635			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2636			    nlh->nlmsg_seq, 0, 0, 0);
2637	if (err < 0) {
2638		kfree_skb(skb);
2639		goto errout;
2640	}
2641
2642	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2643errout:
2644	return err;
2645}
2646
2647void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2648{
2649	struct sk_buff *skb;
2650	struct net *net = info->nl_net;
2651	u32 seq;
2652	int err;
2653
2654	err = -ENOBUFS;
2655	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2656
2657	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2658	if (!skb)
2659		goto errout;
2660
2661	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2662				event, info->pid, seq, 0, 0, 0);
2663	if (err < 0) {
2664		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2665		WARN_ON(err == -EMSGSIZE);
2666		kfree_skb(skb);
2667		goto errout;
2668	}
2669	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2670		    info->nlh, gfp_any());
2671	return;
2672errout:
2673	if (err < 0)
2674		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2675}
2676
2677static int ip6_route_dev_notify(struct notifier_block *this,
2678				unsigned long event, void *data)
2679{
2680	struct net_device *dev = (struct net_device *)data;
2681	struct net *net = dev_net(dev);
2682
2683	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2684		net->ipv6.ip6_null_entry->dst.dev = dev;
2685		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2686#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2687		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2688		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2689		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2690		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2691#endif
2692	}
2693
2694	return NOTIFY_OK;
2695}
2696
2697/*
2698 *	/proc
2699 */
2700
2701#ifdef CONFIG_PROC_FS
2702
2703struct rt6_proc_arg
2704{
2705	char *buffer;
2706	int offset;
2707	int length;
2708	int skip;
2709	int len;
2710};
2711
2712static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2713{
2714	struct seq_file *m = p_arg;
2715	struct neighbour *n;
2716
2717	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2718
2719#ifdef CONFIG_IPV6_SUBTREES
2720	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2721#else
2722	seq_puts(m, "00000000000000000000000000000000 00 ");
2723#endif
2724	rcu_read_lock();
2725	n = dst_get_neighbour_noref(&rt->dst);
2726	if (n) {
2727		seq_printf(m, "%pi6", n->primary_key);
2728	} else {
2729		seq_puts(m, "00000000000000000000000000000000");
2730	}
2731	rcu_read_unlock();
2732	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2733		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2734		   rt->dst.__use, rt->rt6i_flags,
2735		   rt->dst.dev ? rt->dst.dev->name : "");
2736	return 0;
2737}
2738
2739static int ipv6_route_show(struct seq_file *m, void *v)
2740{
2741	struct net *net = (struct net *)m->private;
2742	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2743	return 0;
2744}
2745
2746static int ipv6_route_open(struct inode *inode, struct file *file)
2747{
2748	return single_open_net(inode, file, ipv6_route_show);
2749}
2750
2751static const struct file_operations ipv6_route_proc_fops = {
2752	.owner		= THIS_MODULE,
2753	.open		= ipv6_route_open,
2754	.read		= seq_read,
2755	.llseek		= seq_lseek,
2756	.release	= single_release_net,
2757};
2758
2759static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2760{
2761	struct net *net = (struct net *)seq->private;
2762	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2763		   net->ipv6.rt6_stats->fib_nodes,
2764		   net->ipv6.rt6_stats->fib_route_nodes,
2765		   net->ipv6.rt6_stats->fib_rt_alloc,
2766		   net->ipv6.rt6_stats->fib_rt_entries,
2767		   net->ipv6.rt6_stats->fib_rt_cache,
2768		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2769		   net->ipv6.rt6_stats->fib_discarded_routes);
2770
2771	return 0;
2772}
2773
2774static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2775{
2776	return single_open_net(inode, file, rt6_stats_seq_show);
2777}
2778
2779static const struct file_operations rt6_stats_seq_fops = {
2780	.owner	 = THIS_MODULE,
2781	.open	 = rt6_stats_seq_open,
2782	.read	 = seq_read,
2783	.llseek	 = seq_lseek,
2784	.release = single_release_net,
2785};
2786#endif	/* CONFIG_PROC_FS */
2787
2788#ifdef CONFIG_SYSCTL
2789
2790static
2791int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2792			      void __user *buffer, size_t *lenp, loff_t *ppos)
2793{
2794	struct net *net;
2795	int delay;
2796	if (!write)
2797		return -EINVAL;
2798
2799	net = (struct net *)ctl->extra1;
2800	delay = net->ipv6.sysctl.flush_delay;
2801	proc_dointvec(ctl, write, buffer, lenp, ppos);
2802	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2803	return 0;
2804}
2805
2806ctl_table ipv6_route_table_template[] = {
2807	{
2808		.procname	=	"flush",
2809		.data		=	&init_net.ipv6.sysctl.flush_delay,
2810		.maxlen		=	sizeof(int),
2811		.mode		=	0200,
2812		.proc_handler	=	ipv6_sysctl_rtcache_flush
2813	},
2814	{
2815		.procname	=	"gc_thresh",
2816		.data		=	&ip6_dst_ops_template.gc_thresh,
2817		.maxlen		=	sizeof(int),
2818		.mode		=	0644,
2819		.proc_handler	=	proc_dointvec,
2820	},
2821	{
2822		.procname	=	"max_size",
2823		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2824		.maxlen		=	sizeof(int),
2825		.mode		=	0644,
2826		.proc_handler	=	proc_dointvec,
2827	},
2828	{
2829		.procname	=	"gc_min_interval",
2830		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2831		.maxlen		=	sizeof(int),
2832		.mode		=	0644,
2833		.proc_handler	=	proc_dointvec_jiffies,
2834	},
2835	{
2836		.procname	=	"gc_timeout",
2837		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2838		.maxlen		=	sizeof(int),
2839		.mode		=	0644,
2840		.proc_handler	=	proc_dointvec_jiffies,
2841	},
2842	{
2843		.procname	=	"gc_interval",
2844		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2845		.maxlen		=	sizeof(int),
2846		.mode		=	0644,
2847		.proc_handler	=	proc_dointvec_jiffies,
2848	},
2849	{
2850		.procname	=	"gc_elasticity",
2851		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2852		.maxlen		=	sizeof(int),
2853		.mode		=	0644,
2854		.proc_handler	=	proc_dointvec,
2855	},
2856	{
2857		.procname	=	"mtu_expires",
2858		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2859		.maxlen		=	sizeof(int),
2860		.mode		=	0644,
2861		.proc_handler	=	proc_dointvec_jiffies,
2862	},
2863	{
2864		.procname	=	"min_adv_mss",
2865		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2866		.maxlen		=	sizeof(int),
2867		.mode		=	0644,
2868		.proc_handler	=	proc_dointvec,
2869	},
2870	{
2871		.procname	=	"gc_min_interval_ms",
2872		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2873		.maxlen		=	sizeof(int),
2874		.mode		=	0644,
2875		.proc_handler	=	proc_dointvec_ms_jiffies,
2876	},
2877	{ }
2878};
2879
2880struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2881{
2882	struct ctl_table *table;
2883
2884	table = kmemdup(ipv6_route_table_template,
2885			sizeof(ipv6_route_table_template),
2886			GFP_KERNEL);
2887
2888	if (table) {
2889		table[0].data = &net->ipv6.sysctl.flush_delay;
2890		table[0].extra1 = net;
2891		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2892		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2893		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2894		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2895		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2896		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2897		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2898		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2899		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2900	}
2901
2902	return table;
2903}
2904#endif
2905
2906static int __net_init ip6_route_net_init(struct net *net)
2907{
2908	int ret = -ENOMEM;
2909
2910	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2911	       sizeof(net->ipv6.ip6_dst_ops));
2912
2913	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2914		goto out_ip6_dst_ops;
2915
2916	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2917					   sizeof(*net->ipv6.ip6_null_entry),
2918					   GFP_KERNEL);
2919	if (!net->ipv6.ip6_null_entry)
2920		goto out_ip6_dst_entries;
2921	net->ipv6.ip6_null_entry->dst.path =
2922		(struct dst_entry *)net->ipv6.ip6_null_entry;
2923	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2924	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2925			 ip6_template_metrics, true);
2926
2927#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2928	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2929					       sizeof(*net->ipv6.ip6_prohibit_entry),
2930					       GFP_KERNEL);
2931	if (!net->ipv6.ip6_prohibit_entry)
2932		goto out_ip6_null_entry;
2933	net->ipv6.ip6_prohibit_entry->dst.path =
2934		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2935	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2936	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2937			 ip6_template_metrics, true);
2938
2939	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2940					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2941					       GFP_KERNEL);
2942	if (!net->ipv6.ip6_blk_hole_entry)
2943		goto out_ip6_prohibit_entry;
2944	net->ipv6.ip6_blk_hole_entry->dst.path =
2945		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2946	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2947	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2948			 ip6_template_metrics, true);
2949#endif
2950
2951	net->ipv6.sysctl.flush_delay = 0;
2952	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2953	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2954	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2955	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2956	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2957	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2958	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2959
 
 
 
 
2960	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2961
2962	ret = 0;
2963out:
2964	return ret;
2965
2966#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2967out_ip6_prohibit_entry:
2968	kfree(net->ipv6.ip6_prohibit_entry);
2969out_ip6_null_entry:
2970	kfree(net->ipv6.ip6_null_entry);
2971#endif
2972out_ip6_dst_entries:
2973	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2974out_ip6_dst_ops:
2975	goto out;
2976}
2977
2978static void __net_exit ip6_route_net_exit(struct net *net)
2979{
 
 
 
 
2980	kfree(net->ipv6.ip6_null_entry);
2981#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2982	kfree(net->ipv6.ip6_prohibit_entry);
2983	kfree(net->ipv6.ip6_blk_hole_entry);
2984#endif
2985	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2986}
2987
2988static int __net_init ip6_route_net_init_late(struct net *net)
2989{
2990#ifdef CONFIG_PROC_FS
2991	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2992	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2993#endif
2994	return 0;
2995}
2996
2997static void __net_exit ip6_route_net_exit_late(struct net *net)
2998{
2999#ifdef CONFIG_PROC_FS
3000	proc_net_remove(net, "ipv6_route");
3001	proc_net_remove(net, "rt6_stats");
3002#endif
3003}
3004
3005static struct pernet_operations ip6_route_net_ops = {
3006	.init = ip6_route_net_init,
3007	.exit = ip6_route_net_exit,
3008};
3009
3010static struct pernet_operations ip6_route_net_late_ops = {
3011	.init = ip6_route_net_init_late,
3012	.exit = ip6_route_net_exit_late,
3013};
3014
3015static struct notifier_block ip6_route_dev_notifier = {
3016	.notifier_call = ip6_route_dev_notify,
3017	.priority = 0,
3018};
3019
3020int __init ip6_route_init(void)
3021{
3022	int ret;
3023
3024	ret = -ENOMEM;
3025	ip6_dst_ops_template.kmem_cachep =
3026		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3027				  SLAB_HWCACHE_ALIGN, NULL);
3028	if (!ip6_dst_ops_template.kmem_cachep)
3029		goto out;
3030
3031	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3032	if (ret)
3033		goto out_kmem_cache;
3034
3035	ret = register_pernet_subsys(&ip6_route_net_ops);
3036	if (ret)
3037		goto out_dst_entries;
3038
3039	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3040
3041	/* Registering of the loopback is done before this portion of code,
3042	 * the loopback reference in rt6_info will not be taken, do it
3043	 * manually for init_net */
3044	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3045	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3046  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3047	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3048	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3049	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3050	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3051  #endif
3052	ret = fib6_init();
3053	if (ret)
3054		goto out_register_subsys;
3055
3056	ret = xfrm6_init();
3057	if (ret)
3058		goto out_fib6_init;
3059
3060	ret = fib6_rules_init();
3061	if (ret)
3062		goto xfrm6_init;
3063
3064	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3065	if (ret)
3066		goto fib6_rules_init;
3067
3068	ret = -ENOBUFS;
3069	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3070	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3071	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3072		goto out_register_late_subsys;
3073
3074	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3075	if (ret)
3076		goto out_register_late_subsys;
3077
3078out:
3079	return ret;
3080
3081out_register_late_subsys:
3082	unregister_pernet_subsys(&ip6_route_net_late_ops);
3083fib6_rules_init:
3084	fib6_rules_cleanup();
3085xfrm6_init:
3086	xfrm6_fini();
3087out_fib6_init:
3088	fib6_gc_cleanup();
3089out_register_subsys:
3090	unregister_pernet_subsys(&ip6_route_net_ops);
3091out_dst_entries:
3092	dst_entries_destroy(&ip6_dst_blackhole_ops);
3093out_kmem_cache:
3094	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3095	goto out;
3096}
3097
3098void ip6_route_cleanup(void)
3099{
3100	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3101	unregister_pernet_subsys(&ip6_route_net_late_ops);
3102	fib6_rules_cleanup();
3103	xfrm6_fini();
3104	fib6_gc_cleanup();
3105	unregister_pernet_subsys(&ip6_route_net_ops);
3106	dst_entries_destroy(&ip6_dst_blackhole_ops);
3107	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3108}