Linux Audio

Check our new training course

Loading...
v3.1
 
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
 
 
  27#include <linux/capability.h>
  28#include <linux/errno.h>
 
  29#include <linux/types.h>
  30#include <linux/times.h>
  31#include <linux/socket.h>
  32#include <linux/sockios.h>
  33#include <linux/net.h>
  34#include <linux/route.h>
  35#include <linux/netdevice.h>
  36#include <linux/in6.h>
  37#include <linux/mroute6.h>
  38#include <linux/init.h>
  39#include <linux/if_arp.h>
  40#include <linux/proc_fs.h>
  41#include <linux/seq_file.h>
  42#include <linux/nsproxy.h>
  43#include <linux/slab.h>
 
  44#include <net/net_namespace.h>
  45#include <net/snmp.h>
  46#include <net/ipv6.h>
  47#include <net/ip6_fib.h>
  48#include <net/ip6_route.h>
  49#include <net/ndisc.h>
  50#include <net/addrconf.h>
  51#include <net/tcp.h>
  52#include <linux/rtnetlink.h>
  53#include <net/dst.h>
 
  54#include <net/xfrm.h>
  55#include <net/netevent.h>
  56#include <net/netlink.h>
  57
  58#include <asm/uaccess.h>
 
 
 
 
 
  59
  60#ifdef CONFIG_SYSCTL
  61#include <linux/sysctl.h>
  62#endif
  63
  64/* Set to 3 to get tracing. */
  65#define RT6_DEBUG 2
  66
  67#if RT6_DEBUG >= 3
  68#define RDBG(x) printk x
  69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
  70#else
  71#define RDBG(x)
  72#define RT6_TRACE(x...) do { ; } while (0)
  73#endif
 
 
 
 
  74
  75static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
  76				    const struct in6_addr *dest);
  77static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  78static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  79static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
  80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  81static void		ip6_dst_destroy(struct dst_entry *);
  82static void		ip6_dst_ifdown(struct dst_entry *,
  83				       struct net_device *dev, int how);
  84static int		 ip6_dst_gc(struct dst_ops *ops);
  85
  86static int		ip6_pkt_discard(struct sk_buff *skb);
  87static int		ip6_pkt_discard_out(struct sk_buff *skb);
 
 
  88static void		ip6_link_failure(struct sk_buff *skb);
  89static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  90
  91#ifdef CONFIG_IPV6_ROUTE_INFO
  92static struct rt6_info *rt6_add_route_info(struct net *net,
  93					   const struct in6_addr *prefix, int prefixlen,
  94					   const struct in6_addr *gwaddr, int ifindex,
  95					   unsigned pref);
  96static struct rt6_info *rt6_get_route_info(struct net *net,
 
  97					   const struct in6_addr *prefix, int prefixlen,
  98					   const struct in6_addr *gwaddr, int ifindex);
 
  99#endif
 100
 101static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 
 
 
 
 
 
 
 102{
 103	struct rt6_info *rt = (struct rt6_info *) dst;
 104	struct inet_peer *peer;
 105	u32 *p = NULL;
 106
 107	if (!(rt->dst.flags & DST_HOST))
 108		return NULL;
 109
 110	if (!rt->rt6i_peer)
 111		rt6_bind_peer(rt, 1);
 
 
 112
 113	peer = rt->rt6i_peer;
 114	if (peer) {
 115		u32 *old_p = __DST_METRICS_PTR(old);
 116		unsigned long prev, new;
 
 117
 118		p = peer->metrics;
 119		if (inet_metrics_new(peer))
 120			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 
 
 
 121
 122		new = (unsigned long) p;
 123		prev = cmpxchg(&dst->_metrics, old, new);
 
 
 124
 125		if (prev != old) {
 126			p = __DST_METRICS_PTR(prev);
 127			if (prev & DST_METRICS_READ_ONLY)
 128				p = NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 129		}
 
 130	}
 131	return p;
 132}
 133
 134static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 135{
 136	return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
 
 
 
 
 
 
 
 
 
 
 137}
 138
 139static struct dst_ops ip6_dst_ops_template = {
 140	.family			=	AF_INET6,
 141	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 142	.gc			=	ip6_dst_gc,
 143	.gc_thresh		=	1024,
 144	.check			=	ip6_dst_check,
 145	.default_advmss		=	ip6_default_advmss,
 146	.default_mtu		=	ip6_default_mtu,
 147	.cow_metrics		=	ipv6_cow_metrics,
 148	.destroy		=	ip6_dst_destroy,
 149	.ifdown			=	ip6_dst_ifdown,
 150	.negative_advice	=	ip6_negative_advice,
 151	.link_failure		=	ip6_link_failure,
 152	.update_pmtu		=	ip6_rt_update_pmtu,
 
 153	.local_out		=	__ip6_local_out,
 154	.neigh_lookup		=	ip6_neigh_lookup,
 
 155};
 156
 157static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
 158{
 159	return 0;
 
 
 160}
 161
 162static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 
 
 163{
 164}
 165
 166static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 167					 unsigned long old)
 168{
 169	return NULL;
 170}
 171
 172static struct dst_ops ip6_dst_blackhole_ops = {
 173	.family			=	AF_INET6,
 174	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 175	.destroy		=	ip6_dst_destroy,
 176	.check			=	ip6_dst_check,
 177	.default_mtu		=	ip6_blackhole_default_mtu,
 178	.default_advmss		=	ip6_default_advmss,
 179	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 180	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
 181	.neigh_lookup		=	ip6_neigh_lookup,
 
 182};
 183
 184static const u32 ip6_template_metrics[RTAX_MAX] = {
 185	[RTAX_HOPLIMIT - 1] = 255,
 
 
 
 
 
 
 
 
 
 186};
 187
 188static struct rt6_info ip6_null_entry_template = {
 189	.dst = {
 190		.__refcnt	= ATOMIC_INIT(1),
 191		.__use		= 1,
 192		.obsolete	= -1,
 193		.error		= -ENETUNREACH,
 194		.input		= ip6_pkt_discard,
 195		.output		= ip6_pkt_discard_out,
 196	},
 197	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 198	.rt6i_protocol  = RTPROT_KERNEL,
 199	.rt6i_metric	= ~(u32) 0,
 200	.rt6i_ref	= ATOMIC_INIT(1),
 201};
 202
 203#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 204
 205static int ip6_pkt_prohibit(struct sk_buff *skb);
 206static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 207
 208static struct rt6_info ip6_prohibit_entry_template = {
 209	.dst = {
 210		.__refcnt	= ATOMIC_INIT(1),
 211		.__use		= 1,
 212		.obsolete	= -1,
 213		.error		= -EACCES,
 214		.input		= ip6_pkt_prohibit,
 215		.output		= ip6_pkt_prohibit_out,
 216	},
 217	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 218	.rt6i_protocol  = RTPROT_KERNEL,
 219	.rt6i_metric	= ~(u32) 0,
 220	.rt6i_ref	= ATOMIC_INIT(1),
 221};
 222
 223static struct rt6_info ip6_blk_hole_entry_template = {
 224	.dst = {
 225		.__refcnt	= ATOMIC_INIT(1),
 226		.__use		= 1,
 227		.obsolete	= -1,
 228		.error		= -EINVAL,
 229		.input		= dst_discard,
 230		.output		= dst_discard,
 231	},
 232	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 233	.rt6i_protocol  = RTPROT_KERNEL,
 234	.rt6i_metric	= ~(u32) 0,
 235	.rt6i_ref	= ATOMIC_INIT(1),
 236};
 237
 238#endif
 239
 
 
 
 
 
 
 
 
 240/* allocate dst with ip6_dst_ops */
 241static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
 242					     struct net_device *dev,
 243					     int flags)
 244{
 245	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
 
 246
 247	if (rt != NULL)
 248		memset(&rt->rt6i_table, 0,
 249			sizeof(*rt) - sizeof(struct dst_entry));
 
 250
 251	return rt;
 252}
 
 253
 254static void ip6_dst_destroy(struct dst_entry *dst)
 255{
 256	struct rt6_info *rt = (struct rt6_info *)dst;
 257	struct inet6_dev *idev = rt->rt6i_idev;
 258	struct inet_peer *peer = rt->rt6i_peer;
 259
 260	if (!(rt->dst.flags & DST_HOST))
 261		dst_destroy_metrics_generic(dst);
 262
 263	if (idev != NULL) {
 
 264		rt->rt6i_idev = NULL;
 265		in6_dev_put(idev);
 266	}
 267	if (peer) {
 268		rt->rt6i_peer = NULL;
 269		inet_putpeer(peer);
 270	}
 271}
 272
 273static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
 274
 275static u32 rt6_peer_genid(void)
 276{
 277	return atomic_read(&__rt6_peer_genid);
 278}
 279
 280void rt6_bind_peer(struct rt6_info *rt, int create)
 281{
 282	struct inet_peer *peer;
 283
 284	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
 285	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 286		inet_putpeer(peer);
 287	else
 288		rt->rt6i_peer_genid = rt6_peer_genid();
 289}
 290
 291static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 292			   int how)
 293{
 294	struct rt6_info *rt = (struct rt6_info *)dst;
 295	struct inet6_dev *idev = rt->rt6i_idev;
 296	struct net_device *loopback_dev =
 297		dev_net(dev)->loopback_dev;
 298
 299	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
 300		struct inet6_dev *loopback_idev =
 301			in6_dev_get(loopback_dev);
 302		if (loopback_idev != NULL) {
 303			rt->rt6i_idev = loopback_idev;
 304			in6_dev_put(idev);
 305		}
 306	}
 307}
 308
 309static __inline__ int rt6_check_expired(const struct rt6_info *rt)
 
 
 
 
 
 
 
 
 310{
 311	return (rt->rt6i_flags & RTF_EXPIRES) &&
 312		time_after(jiffies, rt->rt6i_expires);
 
 
 
 
 
 
 
 
 
 
 313}
 314
 315static inline int rt6_need_strict(const struct in6_addr *daddr)
 
 
 316{
 317	return ipv6_addr_type(daddr) &
 318		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 319}
 320
 321/*
 322 *	Route lookup. Any table->tb6_lock is implied.
 323 */
 324
 325static inline struct rt6_info *rt6_device_match(struct net *net,
 326						    struct rt6_info *rt,
 327						    const struct in6_addr *saddr,
 328						    int oif,
 329						    int flags)
 330{
 331	struct rt6_info *local = NULL;
 332	struct rt6_info *sprt;
 333
 334	if (!oif && ipv6_addr_any(saddr))
 335		goto out;
 336
 337	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 338		struct net_device *dev = sprt->rt6i_dev;
 339
 340		if (oif) {
 341			if (dev->ifindex == oif)
 342				return sprt;
 343			if (dev->flags & IFF_LOOPBACK) {
 344				if (sprt->rt6i_idev == NULL ||
 345				    sprt->rt6i_idev->dev->ifindex != oif) {
 346					if (flags & RT6_LOOKUP_F_IFACE && oif)
 347						continue;
 348					if (local && (!oif ||
 349						      local->rt6i_idev->dev->ifindex == oif))
 350						continue;
 351				}
 352				local = sprt;
 353			}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 354		} else {
 355			if (ipv6_chk_addr(net, saddr, dev,
 356					  flags & RT6_LOOKUP_F_IFACE))
 357				return sprt;
 358		}
 
 
 359	}
 360
 361	if (oif) {
 362		if (local)
 363			return local;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 364
 365		if (flags & RT6_LOOKUP_F_IFACE)
 366			return net->ipv6.ip6_null_entry;
 
 367	}
 368out:
 369	return rt;
 
 
 
 
 
 
 
 
 370}
 371
 372#ifdef CONFIG_IPV6_ROUTER_PREF
 373static void rt6_probe(struct rt6_info *rt)
 
 
 
 
 
 
 374{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 375	struct neighbour *neigh;
 
 
 
 376	/*
 377	 * Okay, this does not seem to be appropriate
 378	 * for now, however, we need to check if it
 379	 * is really so; aka Router Reachability Probing.
 380	 *
 381	 * Router Reachability Probe MUST be rate-limited
 382	 * to no more than one per minute.
 383	 */
 384	rcu_read_lock();
 385	neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
 386	if (!neigh || (neigh->nud_state & NUD_VALID))
 387		goto out;
 388	read_lock_bh(&neigh->lock);
 389	if (!(neigh->nud_state & NUD_VALID) &&
 390	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 391		struct in6_addr mcaddr;
 392		struct in6_addr *target;
 393
 394		neigh->updated = jiffies;
 395		read_unlock_bh(&neigh->lock);
 396
 397		target = (struct in6_addr *)&neigh->primary_key;
 398		addrconf_addr_solict_mult(target, &mcaddr);
 399		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 400	} else {
 401		read_unlock_bh(&neigh->lock);
 
 
 
 
 402	}
 
 403out:
 404	rcu_read_unlock();
 405}
 406#else
 407static inline void rt6_probe(struct rt6_info *rt)
 408{
 409}
 410#endif
 411
 412/*
 413 * Default Router Selection (RFC 2461 6.3.6)
 414 */
 415static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 416{
 417	struct net_device *dev = rt->rt6i_dev;
 418	if (!oif || dev->ifindex == oif)
 419		return 2;
 420	if ((dev->flags & IFF_LOOPBACK) &&
 421	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 422		return 1;
 423	return 0;
 424}
 425
 426static inline int rt6_check_neigh(struct rt6_info *rt)
 427{
 
 428	struct neighbour *neigh;
 429	int m;
 430
 431	rcu_read_lock();
 432	neigh = dst_get_neighbour(&rt->dst);
 433	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 434	    !(rt->rt6i_flags & RTF_GATEWAY))
 435		m = 1;
 436	else if (neigh) {
 437		read_lock_bh(&neigh->lock);
 438		if (neigh->nud_state & NUD_VALID)
 439			m = 2;
 440#ifdef CONFIG_IPV6_ROUTER_PREF
 441		else if (neigh->nud_state & NUD_FAILED)
 442			m = 0;
 443#endif
 444		else
 445			m = 1;
 446		read_unlock_bh(&neigh->lock);
 447	} else
 448		m = 0;
 449	rcu_read_unlock();
 450	return m;
 
 
 
 
 451}
 452
 453static int rt6_score_route(struct rt6_info *rt, int oif,
 454			   int strict)
 455{
 456	int m, n;
 
 
 
 457
 458	m = rt6_check_dev(rt, oif);
 459	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 460		return -1;
 461#ifdef CONFIG_IPV6_ROUTER_PREF
 462	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 463#endif
 464	n = rt6_check_neigh(rt);
 465	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
 466		return -1;
 
 
 
 467	return m;
 468}
 469
 470static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 471				   int *mpri, struct rt6_info *match)
 472{
 
 
 473	int m;
 474
 475	if (rt6_check_expired(rt))
 
 
 
 
 
 476		goto out;
 477
 478	m = rt6_score_route(rt, oif, strict);
 479	if (m < 0)
 
 
 
 480		goto out;
 
 481
 
 
 
 
 482	if (m > *mpri) {
 483		if (strict & RT6_LOOKUP_F_REACHABLE)
 484			rt6_probe(match);
 485		*mpri = m;
 486		match = rt;
 487	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
 488		rt6_probe(rt);
 489	}
 490
 491out:
 492	return match;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 493}
 494
 495static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 496				     struct rt6_info *rr_head,
 497				     u32 metric, int oif, int strict)
 498{
 499	struct rt6_info *rt, *match;
 
 500	int mpri = -1;
 501
 502	match = NULL;
 503	for (rt = rr_head; rt && rt->rt6i_metric == metric;
 504	     rt = rt->dst.rt6_next)
 505		match = find_match(rt, oif, strict, &mpri, match);
 506	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 507	     rt = rt->dst.rt6_next)
 508		match = find_match(rt, oif, strict, &mpri, match);
 509
 510	return match;
 
 
 
 
 
 
 
 511}
 512
 513static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 
 514{
 515	struct rt6_info *match, *rt0;
 516	struct net *net;
 
 
 517
 518	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
 519		  __func__, fn->leaf, oif);
 
 
 
 520
 521	rt0 = fn->rr_ptr;
 522	if (!rt0)
 523		fn->rr_ptr = rt0 = fn->leaf;
 524
 525	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 
 
 
 
 
 
 
 
 
 
 
 526
 527	if (!match &&
 528	    (strict & RT6_LOOKUP_F_REACHABLE)) {
 529		struct rt6_info *next = rt0->dst.rt6_next;
 530
 531		/* no entries matched; do round-robin */
 532		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 533			next = fn->leaf;
 534
 535		if (next != rt0)
 536			fn->rr_ptr = next;
 
 
 
 
 
 537	}
 538
 539	RT6_TRACE("%s() => %p\n",
 540		  __func__, match);
 
 
 
 
 
 
 541
 542	net = dev_net(rt0->rt6i_dev);
 543	return match ? match : net->ipv6.ip6_null_entry;
 
 
 544}
 545
 546#ifdef CONFIG_IPV6_ROUTE_INFO
 547int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 548		  const struct in6_addr *gwaddr)
 549{
 550	struct net *net = dev_net(dev);
 551	struct route_info *rinfo = (struct route_info *) opt;
 552	struct in6_addr prefix_buf, *prefix;
 553	unsigned int pref;
 554	unsigned long lifetime;
 555	struct rt6_info *rt;
 556
 557	if (len < sizeof(struct route_info)) {
 558		return -EINVAL;
 559	}
 560
 561	/* Sanity check for prefix_len and length */
 562	if (rinfo->length > 3) {
 563		return -EINVAL;
 564	} else if (rinfo->prefix_len > 128) {
 565		return -EINVAL;
 566	} else if (rinfo->prefix_len > 64) {
 567		if (rinfo->length < 2) {
 568			return -EINVAL;
 569		}
 570	} else if (rinfo->prefix_len > 0) {
 571		if (rinfo->length < 1) {
 572			return -EINVAL;
 573		}
 574	}
 575
 576	pref = rinfo->route_pref;
 577	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 578		return -EINVAL;
 579
 580	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 581
 582	if (rinfo->length == 3)
 583		prefix = (struct in6_addr *)rinfo->prefix;
 584	else {
 585		/* this function is safe */
 586		ipv6_addr_prefix(&prefix_buf,
 587				 (struct in6_addr *)rinfo->prefix,
 588				 rinfo->prefix_len);
 589		prefix = &prefix_buf;
 590	}
 591
 592	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 593				dev->ifindex);
 
 
 
 594
 595	if (rt && !lifetime) {
 596		ip6_del_rt(rt);
 597		rt = NULL;
 598	}
 599
 600	if (!rt && lifetime)
 601		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 602					pref);
 603	else if (rt)
 604		rt->rt6i_flags = RTF_ROUTEINFO |
 605				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 606
 607	if (rt) {
 608		if (!addrconf_finite_timeout(lifetime)) {
 609			rt->rt6i_flags &= ~RTF_EXPIRES;
 610		} else {
 611			rt->rt6i_expires = jiffies + HZ * lifetime;
 612			rt->rt6i_flags |= RTF_EXPIRES;
 613		}
 614		dst_release(&rt->dst);
 615	}
 616	return 0;
 617}
 618#endif
 619
 620#define BACKTRACK(__net, saddr)			\
 621do { \
 622	if (rt == __net->ipv6.ip6_null_entry) {	\
 623		struct fib6_node *pn; \
 624		while (1) { \
 625			if (fn->fn_flags & RTN_TL_ROOT) \
 626				goto out; \
 627			pn = fn->parent; \
 628			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 629				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 630			else \
 631				fn = pn; \
 632			if (fn->fn_flags & RTN_RTINFO) \
 633				goto restart; \
 634		} \
 635	} \
 636} while(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 637
 638static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 639					     struct fib6_table *table,
 640					     struct flowi6 *fl6, int flags)
 
 
 641{
 
 642	struct fib6_node *fn;
 643	struct rt6_info *rt;
 644
 645	read_lock_bh(&table->tb6_lock);
 646	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 
 
 
 647restart:
 648	rt = fn->leaf;
 649	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 650	BACKTRACK(net, &fl6->saddr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 651out:
 652	dst_use(&rt->dst, jiffies);
 653	read_unlock_bh(&table->tb6_lock);
 
 
 654	return rt;
 
 655
 
 
 
 
 656}
 
 657
 658struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 659			    const struct in6_addr *saddr, int oif, int strict)
 
 660{
 661	struct flowi6 fl6 = {
 662		.flowi6_oif = oif,
 663		.daddr = *daddr,
 664	};
 665	struct dst_entry *dst;
 666	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 667
 668	if (saddr) {
 669		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 670		flags |= RT6_LOOKUP_F_HAS_SADDR;
 671	}
 672
 673	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 674	if (dst->error == 0)
 675		return (struct rt6_info *) dst;
 676
 677	dst_release(dst);
 678
 679	return NULL;
 680}
 681
 682EXPORT_SYMBOL(rt6_lookup);
 683
 684/* ip6_ins_rt is called with FREE table->tb6_lock.
 685   It takes new route entry, the addition fails by any reason the
 686   route is freed. In any case, if caller does not hold it, it may
 687   be destroyed.
 688 */
 689
 690static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 
 691{
 692	int err;
 693	struct fib6_table *table;
 694
 695	table = rt->rt6i_table;
 696	write_lock_bh(&table->tb6_lock);
 697	err = fib6_add(&table->tb6_root, rt, info);
 698	write_unlock_bh(&table->tb6_lock);
 699
 700	return err;
 701}
 702
 703int ip6_ins_rt(struct rt6_info *rt)
 704{
 705	struct nl_info info = {
 706		.nl_net = dev_net(rt->rt6i_dev),
 707	};
 708	return __ip6_ins_rt(rt, &info);
 709}
 710
 711static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
 712				      const struct in6_addr *daddr,
 713				      const struct in6_addr *saddr)
 714{
 
 
 715	struct rt6_info *rt;
 716
 717	/*
 718	 *	Clone the route.
 719	 */
 720
 721	rt = ip6_rt_copy(ort, daddr);
 722
 723	if (rt) {
 724		struct neighbour *neigh;
 725		int attempts = !in_softirq();
 726
 727		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
 728			if (rt->rt6i_dst.plen != 128 &&
 729			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 730				rt->rt6i_flags |= RTF_ANYCAST;
 731			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
 732		}
 733
 734		rt->rt6i_flags |= RTF_CACHE;
 
 
 
 735
 
 
 
 
 736#ifdef CONFIG_IPV6_SUBTREES
 737		if (rt->rt6i_src.plen && saddr) {
 738			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
 739			rt->rt6i_src.plen = 128;
 740		}
 741#endif
 
 742
 743	retry:
 744		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 745		if (IS_ERR(neigh)) {
 746			struct net *net = dev_net(rt->rt6i_dev);
 747			int saved_rt_min_interval =
 748				net->ipv6.sysctl.ip6_rt_gc_min_interval;
 749			int saved_rt_elasticity =
 750				net->ipv6.sysctl.ip6_rt_gc_elasticity;
 751
 752			if (attempts-- > 0) {
 753				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
 754				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
 755
 756				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
 757
 758				net->ipv6.sysctl.ip6_rt_gc_elasticity =
 759					saved_rt_elasticity;
 760				net->ipv6.sysctl.ip6_rt_gc_min_interval =
 761					saved_rt_min_interval;
 762				goto retry;
 763			}
 764
 765			if (net_ratelimit())
 766				printk(KERN_WARNING
 767				       "ipv6: Neighbour table overflow.\n");
 768			dst_free(&rt->dst);
 769			return NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 770		}
 771		dst_set_neighbour(&rt->dst, neigh);
 772
 
 773	}
 774
 775	return rt;
 776}
 777
 778static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 779					const struct in6_addr *daddr)
 780{
 781	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 782
 783	if (rt) {
 784		rt->rt6i_flags |= RTF_CACHE;
 785		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
 
 
 
 
 
 
 
 
 
 
 786	}
 787	return rt;
 
 788}
 789
 790static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 791				      struct flowi6 *fl6, int flags)
 
 
 
 
 
 
 
 792{
 793	struct fib6_node *fn;
 794	struct rt6_info *rt, *nrt;
 795	int strict = 0;
 796	int attempts = 3;
 797	int err;
 798	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 799
 800	strict |= flags & RT6_LOOKUP_F_IFACE;
 
 801
 802relookup:
 803	read_lock_bh(&table->tb6_lock);
 804
 805restart_2:
 806	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 
 
 
 
 
 
 
 
 
 
 
 807
 808restart:
 809	rt = rt6_select(fn, oif, strict | reachable);
 
 
 
 
 810
 811	BACKTRACK(net, &fl6->saddr);
 812	if (rt == net->ipv6.ip6_null_entry ||
 813	    rt->rt6i_flags & RTF_CACHE)
 814		goto out;
 
 
 
 
 
 
 
 
 
 
 
 815
 816	dst_hold(&rt->dst);
 817	read_unlock_bh(&table->tb6_lock);
 818
 819	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
 820		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 821	else if (!(rt->dst.flags & DST_HOST))
 822		nrt = rt6_alloc_clone(rt, &fl6->daddr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 823	else
 824		goto out2;
 825
 826	dst_release(&rt->dst);
 827	rt = nrt ? : net->ipv6.ip6_null_entry;
 
 828
 829	dst_hold(&rt->dst);
 830	if (nrt) {
 831		err = ip6_ins_rt(nrt);
 832		if (!err)
 833			goto out2;
 834	}
 835
 836	if (--attempts <= 0)
 837		goto out2;
 838
 839	/*
 840	 * Race condition! In the gap, when table->tb6_lock was
 841	 * released someone could insert this route.  Relookup.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 842	 */
 843	dst_release(&rt->dst);
 844	goto relookup;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 845
 846out:
 847	if (reachable) {
 848		reachable = 0;
 849		goto restart_2;
 
 
 
 
 
 850	}
 851	dst_hold(&rt->dst);
 852	read_unlock_bh(&table->tb6_lock);
 853out2:
 854	rt->dst.lastuse = jiffies;
 855	rt->dst.__use++;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 856
 857	return rt;
 858}
 
 859
 860static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 861					    struct flowi6 *fl6, int flags)
 
 
 
 862{
 863	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 864}
 865
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 866void ip6_route_input(struct sk_buff *skb)
 867{
 868	const struct ipv6hdr *iph = ipv6_hdr(skb);
 869	struct net *net = dev_net(skb->dev);
 870	int flags = RT6_LOOKUP_F_HAS_SADDR;
 
 871	struct flowi6 fl6 = {
 872		.flowi6_iif = skb->dev->ifindex,
 873		.daddr = iph->daddr,
 874		.saddr = iph->saddr,
 875		.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
 876		.flowi6_mark = skb->mark,
 877		.flowi6_proto = iph->nexthdr,
 878	};
 
 879
 880	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
 881		flags |= RT6_LOOKUP_F_IFACE;
 
 882
 883	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
 
 
 
 
 
 
 
 884}
 885
 886static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 887					     struct flowi6 *fl6, int flags)
 
 
 
 888{
 889	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 890}
 891
 892struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 893				    struct flowi6 *fl6)
 
 894{
 895	int flags = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 896
 897	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 
 
 
 898		flags |= RT6_LOOKUP_F_IFACE;
 899
 900	if (!ipv6_addr_any(&fl6->saddr))
 901		flags |= RT6_LOOKUP_F_HAS_SADDR;
 902	else if (sk)
 903		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 904
 905	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 906}
 
 907
 908EXPORT_SYMBOL(ip6_route_output);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 909
 910struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 911{
 912	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 
 913	struct dst_entry *new = NULL;
 914
 915	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
 
 916	if (rt) {
 917		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
 
 918
 919		new = &rt->dst;
 920
 921		new->__use = 1;
 922		new->input = dst_discard;
 923		new->output = dst_discard;
 924
 925		if (dst_metrics_read_only(&ort->dst))
 926			new->_metrics = ort->dst._metrics;
 927		else
 928			dst_copy_metrics(new, &ort->dst);
 929		rt->rt6i_idev = ort->rt6i_idev;
 930		if (rt->rt6i_idev)
 931			in6_dev_hold(rt->rt6i_idev);
 932		rt->rt6i_expires = 0;
 933
 934		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
 935		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 936		rt->rt6i_metric = 0;
 937
 938		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 939#ifdef CONFIG_IPV6_SUBTREES
 940		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 941#endif
 942
 943		dst_free(new);
 944	}
 945
 946	dst_release(dst_orig);
 947	return new ? new : ERR_PTR(-ENOMEM);
 948}
 949
 950/*
 951 *	Destination cache support functions
 952 */
 953
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 954static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 955{
 
 
 956	struct rt6_info *rt;
 957
 958	rt = (struct rt6_info *) dst;
 959
 960	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
 961		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
 962			if (!rt->rt6i_peer)
 963				rt6_bind_peer(rt, 0);
 964			rt->rt6i_peer_genid = rt6_peer_genid();
 965		}
 966		return dst;
 967	}
 968	return NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 969}
 970
 971static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 972{
 973	struct rt6_info *rt = (struct rt6_info *) dst;
 974
 975	if (rt) {
 976		if (rt->rt6i_flags & RTF_CACHE) {
 
 977			if (rt6_check_expired(rt)) {
 978				ip6_del_rt(rt);
 979				dst = NULL;
 980			}
 
 981		} else {
 982			dst_release(dst);
 983			dst = NULL;
 984		}
 985	}
 986	return dst;
 987}
 988
 989static void ip6_link_failure(struct sk_buff *skb)
 990{
 991	struct rt6_info *rt;
 992
 993	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
 994
 995	rt = (struct rt6_info *) skb_dst(skb);
 996	if (rt) {
 997		if (rt->rt6i_flags&RTF_CACHE) {
 998			dst_set_expires(&rt->dst, 0);
 999			rt->rt6i_flags |= RTF_EXPIRES;
1000		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1001			rt->rt6i_node->fn_sernum = -1;
 
 
 
 
 
 
 
 
 
 
1002	}
1003}
1004
1005static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1006{
1007	struct rt6_info *rt6 = (struct rt6_info*)dst;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1008
1009	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1010		rt6->rt6i_flags |= RTF_MODIFIED;
1011		if (mtu < IPV6_MIN_MTU) {
1012			u32 features = dst_metric(dst, RTAX_FEATURES);
1013			mtu = IPV6_MIN_MTU;
1014			features |= RTAX_FEATURE_ALLFRAG;
1015			dst_metric_set(dst, RTAX_FEATURES, features);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1016		}
1017		dst_metric_set(dst, RTAX_MTU, mtu);
1018	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1019}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1020
1021static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1022{
1023	struct net_device *dev = dst->dev;
1024	unsigned int mtu = dst_mtu(dst);
1025	struct net *net = dev_net(dev);
1026
1027	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1028
1029	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1030		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1031
1032	/*
1033	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1034	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1035	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1036	 * rely only on pmtu discovery"
1037	 */
1038	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1039		mtu = IPV6_MAXPLEN;
1040	return mtu;
1041}
1042
1043static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1044{
1045	unsigned int mtu = IPV6_MIN_MTU;
1046	struct inet6_dev *idev;
 
 
 
 
 
 
 
1047
1048	rcu_read_lock();
1049	idev = __in6_dev_get(dst->dev);
1050	if (idev)
1051		mtu = idev->cnf.mtu6;
1052	rcu_read_unlock();
1053
1054	return mtu;
 
 
 
1055}
1056
1057static struct dst_entry *icmp6_dst_gc_list;
1058static DEFINE_SPINLOCK(icmp6_dst_lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1059
1060struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1061				  struct neighbour *neigh,
1062				  const struct in6_addr *addr)
1063{
 
1064	struct rt6_info *rt;
1065	struct inet6_dev *idev = in6_dev_get(dev);
1066	struct net *net = dev_net(dev);
1067
1068	if (unlikely(idev == NULL))
1069		return NULL;
1070
1071	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1072	if (unlikely(rt == NULL)) {
1073		in6_dev_put(idev);
 
1074		goto out;
1075	}
1076
1077	if (neigh)
1078		neigh_hold(neigh);
1079	else {
1080		neigh = ndisc_get_neigh(dev, addr);
1081		if (IS_ERR(neigh))
1082			neigh = NULL;
1083	}
1084
1085	rt->dst.flags |= DST_HOST;
1086	rt->dst.output  = ip6_output;
1087	dst_set_neighbour(&rt->dst, neigh);
1088	atomic_set(&rt->dst.__refcnt, 1);
1089	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1090
1091	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1092	rt->rt6i_dst.plen = 128;
1093	rt->rt6i_idev     = idev;
 
1094
1095	spin_lock_bh(&icmp6_dst_lock);
1096	rt->dst.next = icmp6_dst_gc_list;
1097	icmp6_dst_gc_list = &rt->dst;
1098	spin_unlock_bh(&icmp6_dst_lock);
 
1099
1100	fib6_force_start_gc(net);
1101
1102out:
1103	return &rt->dst;
1104}
1105
1106int icmp6_dst_gc(void)
1107{
1108	struct dst_entry *dst, **pprev;
1109	int more = 0;
1110
1111	spin_lock_bh(&icmp6_dst_lock);
1112	pprev = &icmp6_dst_gc_list;
1113
1114	while ((dst = *pprev) != NULL) {
1115		if (!atomic_read(&dst->__refcnt)) {
1116			*pprev = dst->next;
1117			dst_free(dst);
1118		} else {
1119			pprev = &dst->next;
1120			++more;
1121		}
1122	}
1123
1124	spin_unlock_bh(&icmp6_dst_lock);
1125
1126	return more;
1127}
1128
1129static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1130			    void *arg)
1131{
1132	struct dst_entry *dst, **pprev;
1133
1134	spin_lock_bh(&icmp6_dst_lock);
1135	pprev = &icmp6_dst_gc_list;
1136	while ((dst = *pprev) != NULL) {
1137		struct rt6_info *rt = (struct rt6_info *) dst;
1138		if (func(rt, arg)) {
1139			*pprev = dst->next;
1140			dst_free(dst);
1141		} else {
1142			pprev = &dst->next;
1143		}
1144	}
1145	spin_unlock_bh(&icmp6_dst_lock);
1146}
1147
1148static int ip6_dst_gc(struct dst_ops *ops)
1149{
1150	unsigned long now = jiffies;
1151	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1152	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1153	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1154	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1155	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1156	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1157	int entries;
1158
1159	entries = dst_entries_get_fast(ops);
1160	if (time_after(rt_last_gc + rt_min_interval, now) &&
 
 
 
1161	    entries <= rt_max_size)
1162		goto out;
1163
1164	net->ipv6.ip6_rt_gc_expire++;
1165	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1166	net->ipv6.ip6_rt_last_gc = now;
1167	entries = dst_entries_get_slow(ops);
1168	if (entries < ops->gc_thresh)
1169		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1170out:
1171	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1172	return entries > rt_max_size;
1173}
1174
1175/* Clean host part of a prefix. Not necessary in radix tree,
1176   but results in cleaner routing tables.
 
 
 
 
 
 
 
 
 
1177
1178   Remove it only when all the things will work!
1179 */
 
1180
1181int ip6_dst_hoplimit(struct dst_entry *dst)
1182{
1183	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1184	if (hoplimit == 0) {
1185		struct net_device *dev = dst->dev;
1186		struct inet6_dev *idev;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1187
1188		rcu_read_lock();
1189		idev = __in6_dev_get(dev);
1190		if (idev)
1191			hoplimit = idev->cnf.hop_limit;
1192		else
1193			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
 
1194		rcu_read_unlock();
 
 
 
1195	}
1196	return hoplimit;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1197}
1198EXPORT_SYMBOL(ip6_dst_hoplimit);
1199
1200/*
1201 *
1202 */
 
 
 
 
1203
1204int ip6_route_add(struct fib6_config *cfg)
 
 
 
 
 
1205{
1206	int err;
1207	struct net *net = cfg->fc_nlinfo.nl_net;
1208	struct rt6_info *rt = NULL;
1209	struct net_device *dev = NULL;
1210	struct inet6_dev *idev = NULL;
1211	struct fib6_table *table;
1212	int addr_type;
 
1213
1214	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1215		return -EINVAL;
1216#ifndef CONFIG_IPV6_SUBTREES
1217	if (cfg->fc_src_len)
1218		return -EINVAL;
1219#endif
 
 
 
 
 
 
 
1220	if (cfg->fc_ifindex) {
1221		err = -ENODEV;
1222		dev = dev_get_by_index(net, cfg->fc_ifindex);
1223		if (!dev)
1224			goto out;
1225		idev = in6_dev_get(dev);
1226		if (!idev)
1227			goto out;
1228	}
1229
1230	if (cfg->fc_metric == 0)
1231		cfg->fc_metric = IP6_RT_PRIO_USER;
1232
1233	table = fib6_new_table(net, cfg->fc_table);
1234	if (table == NULL) {
1235		err = -ENOBUFS;
1236		goto out;
1237	}
1238
1239	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1240
1241	if (rt == NULL) {
1242		err = -ENOMEM;
1243		goto out;
1244	}
1245
1246	rt->dst.obsolete = -1;
1247	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1248				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1249				0;
1250
1251	if (cfg->fc_protocol == RTPROT_UNSPEC)
1252		cfg->fc_protocol = RTPROT_BOOT;
1253	rt->rt6i_protocol = cfg->fc_protocol;
1254
1255	addr_type = ipv6_addr_type(&cfg->fc_dst);
1256
1257	if (addr_type & IPV6_ADDR_MULTICAST)
1258		rt->dst.input = ip6_mc_input;
1259	else if (cfg->fc_flags & RTF_LOCAL)
1260		rt->dst.input = ip6_input;
1261	else
1262		rt->dst.input = ip6_forward;
1263
1264	rt->dst.output = ip6_output;
1265
1266	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1267	rt->rt6i_dst.plen = cfg->fc_dst_len;
1268	if (rt->rt6i_dst.plen == 128)
1269	       rt->dst.flags |= DST_HOST;
1270
1271	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1272		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1273		if (!metrics) {
1274			err = -ENOMEM;
1275			goto out;
1276		}
1277		dst_init_metrics(&rt->dst, metrics, 0);
 
1278	}
1279#ifdef CONFIG_IPV6_SUBTREES
1280	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1281	rt->rt6i_src.plen = cfg->fc_src_len;
1282#endif
1283
1284	rt->rt6i_metric = cfg->fc_metric;
1285
1286	/* We cannot add true routes via loopback here,
1287	   they would result in kernel looping; promote them to reject routes
1288	 */
1289	if ((cfg->fc_flags & RTF_REJECT) ||
1290	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1291					      && !(cfg->fc_flags&RTF_LOCAL))) {
1292		/* hold loopback dev/idev if we haven't done so. */
1293		if (dev != net->loopback_dev) {
1294			if (dev) {
1295				dev_put(dev);
1296				in6_dev_put(idev);
1297			}
1298			dev = net->loopback_dev;
1299			dev_hold(dev);
1300			idev = in6_dev_get(dev);
1301			if (!idev) {
1302				err = -ENODEV;
1303				goto out;
1304			}
1305		}
1306		rt->dst.output = ip6_pkt_discard_out;
1307		rt->dst.input = ip6_pkt_discard;
1308		rt->dst.error = -ENETUNREACH;
1309		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1310		goto install_route;
1311	}
1312
1313	if (cfg->fc_flags & RTF_GATEWAY) {
1314		const struct in6_addr *gw_addr;
1315		int gwa_type;
 
1316
1317		gw_addr = &cfg->fc_gateway;
1318		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1319		gwa_type = ipv6_addr_type(gw_addr);
1320
1321		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1322			struct rt6_info *grt;
1323
1324			/* IPv6 strictly inhibits using not link-local
1325			   addresses as nexthop address.
1326			   Otherwise, router will not able to send redirects.
1327			   It is very good, but in some (rare!) circumstances
1328			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1329			   some exceptions. --ANK
1330			 */
1331			err = -EINVAL;
1332			if (!(gwa_type&IPV6_ADDR_UNICAST))
1333				goto out;
1334
1335			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
 
 
1336
1337			err = -EHOSTUNREACH;
1338			if (grt == NULL)
1339				goto out;
1340			if (dev) {
1341				if (dev != grt->rt6i_dev) {
1342					dst_release(&grt->dst);
1343					goto out;
1344				}
1345			} else {
1346				dev = grt->rt6i_dev;
1347				idev = grt->rt6i_idev;
1348				dev_hold(dev);
1349				in6_dev_hold(grt->rt6i_idev);
1350			}
1351			if (!(grt->rt6i_flags&RTF_GATEWAY))
1352				err = 0;
1353			dst_release(&grt->dst);
1354
1355			if (err)
1356				goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1357		}
1358		err = -EINVAL;
1359		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1360			goto out;
1361	}
1362
1363	err = -ENODEV;
1364	if (dev == NULL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1365		goto out;
 
1366
1367	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1368		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1369			err = -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1370			goto out;
1371		}
1372		ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1373		rt->rt6i_prefsrc.plen = 128;
1374	} else
1375		rt->rt6i_prefsrc.plen = 0;
1376
1377	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1378		struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1379		if (IS_ERR(n)) {
1380			err = PTR_ERR(n);
1381			goto out;
 
 
 
 
 
 
 
 
 
1382		}
1383		dst_set_neighbour(&rt->dst, n);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1384	}
1385
1386	rt->rt6i_flags = cfg->fc_flags;
 
1387
1388install_route:
1389	if (cfg->fc_mx) {
1390		struct nlattr *nla;
1391		int remaining;
 
1392
1393		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1394			int type = nla_type(nla);
 
1395
1396			if (type) {
1397				if (type > RTAX_MAX) {
1398					err = -EINVAL;
1399					goto out;
1400				}
1401
1402				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1403			}
 
 
 
 
 
 
 
 
 
1404		}
1405	}
 
 
 
 
 
 
 
 
 
 
 
1406
1407	rt->dst.dev = dev;
1408	rt->rt6i_idev = idev;
1409	rt->rt6i_table = table;
 
 
 
 
 
1410
1411	cfg->fc_nlinfo.nl_net = dev_net(dev);
 
1412
1413	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
 
 
 
 
 
 
 
 
1414
 
1415out:
1416	if (dev)
1417		dev_put(dev);
1418	if (idev)
1419		in6_dev_put(idev);
1420	if (rt)
1421		dst_free(&rt->dst);
1422	return err;
1423}
1424
1425static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 
1426{
 
1427	int err;
1428	struct fib6_table *table;
1429	struct net *net = dev_net(rt->rt6i_dev);
1430
1431	if (rt == net->ipv6.ip6_null_entry)
1432		return -ENOENT;
 
1433
1434	table = rt->rt6i_table;
1435	write_lock_bh(&table->tb6_lock);
1436
1437	err = fib6_del(rt, info);
1438	dst_release(&rt->dst);
1439
1440	write_unlock_bh(&table->tb6_lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1441
 
 
1442	return err;
1443}
1444
1445int ip6_del_rt(struct rt6_info *rt)
1446{
1447	struct nl_info info = {
1448		.nl_net = dev_net(rt->rt6i_dev),
 
1449	};
 
1450	return __ip6_del_rt(rt, &info);
1451}
1452
1453static int ip6_route_del(struct fib6_config *cfg)
1454{
 
 
 
1455	struct fib6_table *table;
1456	struct fib6_node *fn;
1457	struct rt6_info *rt;
1458	int err = -ESRCH;
1459
1460	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1461	if (table == NULL)
1462		return err;
1463
1464	read_lock_bh(&table->tb6_lock);
1465
1466	fn = fib6_locate(&table->tb6_root,
1467			 &cfg->fc_dst, cfg->fc_dst_len,
1468			 &cfg->fc_src, cfg->fc_src_len);
1469
1470	if (fn) {
1471		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1472			if (cfg->fc_ifindex &&
1473			    (rt->rt6i_dev == NULL ||
1474			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1475				continue;
1476			if (cfg->fc_flags & RTF_GATEWAY &&
1477			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1478				continue;
1479			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1480				continue;
1481			dst_hold(&rt->dst);
1482			read_unlock_bh(&table->tb6_lock);
 
 
 
 
 
 
 
 
 
1483
1484			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1485		}
1486	}
1487	read_unlock_bh(&table->tb6_lock);
1488
 
 
 
 
 
 
 
 
 
 
1489	return err;
1490}
1491
1492/*
1493 *	Handle redirects
1494 */
1495struct ip6rd_flowi {
1496	struct flowi6 fl6;
1497	struct in6_addr gateway;
1498};
1499
1500static struct rt6_info *__ip6_route_redirect(struct net *net,
1501					     struct fib6_table *table,
1502					     struct flowi6 *fl6,
1503					     int flags)
1504{
1505	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1506	struct rt6_info *rt;
1507	struct fib6_node *fn;
1508
1509	/*
1510	 * Get the "current" route for this destination and
1511	 * check if the redirect has come from approriate router.
1512	 *
1513	 * RFC 2461 specifies that redirects should only be
1514	 * accepted if they come from the nexthop to the target.
1515	 * Due to the way the routes are chosen, this notion
1516	 * is a bit fuzzy and one might need to check all possible
1517	 * routes.
1518	 */
1519
1520	read_lock_bh(&table->tb6_lock);
1521	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1522restart:
1523	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1524		/*
1525		 * Current route is on-link; redirect is always invalid.
1526		 *
1527		 * Seems, previous statement is not true. It could
1528		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1529		 * But then router serving it might decide, that we should
1530		 * know truth 8)8) --ANK (980726).
1531		 */
1532		if (rt6_check_expired(rt))
1533			continue;
1534		if (!(rt->rt6i_flags & RTF_GATEWAY))
1535			continue;
1536		if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1537			continue;
1538		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1539			continue;
1540		break;
1541	}
1542
1543	if (!rt)
1544		rt = net->ipv6.ip6_null_entry;
1545	BACKTRACK(net, &fl6->saddr);
1546out:
1547	dst_hold(&rt->dst);
 
1548
1549	read_unlock_bh(&table->tb6_lock);
 
 
 
 
 
 
 
1550
1551	return rt;
 
 
 
 
 
 
 
 
 
1552};
1553
1554static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1555					   const struct in6_addr *src,
1556					   const struct in6_addr *gateway,
1557					   struct net_device *dev)
1558{
1559	int flags = RT6_LOOKUP_F_HAS_SADDR;
1560	struct net *net = dev_net(dev);
1561	struct ip6rd_flowi rdfl = {
1562		.fl6 = {
1563			.flowi6_oif = dev->ifindex,
1564			.daddr = *dest,
1565			.saddr = *src,
1566		},
1567	};
1568
1569	ipv6_addr_copy(&rdfl.gateway, gateway);
 
 
1570
1571	if (rt6_need_strict(dest))
1572		flags |= RT6_LOOKUP_F_IFACE;
 
 
 
 
1573
1574	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1575						   flags, __ip6_route_redirect);
1576}
1577
1578void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1579		  const struct in6_addr *saddr,
1580		  struct neighbour *neigh, u8 *lladdr, int on_link)
1581{
1582	struct rt6_info *rt, *nrt = NULL;
1583	struct netevent_redirect netevent;
1584	struct net *net = dev_net(neigh->dev);
1585
1586	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1587
1588	if (rt == net->ipv6.ip6_null_entry) {
1589		if (net_ratelimit())
1590			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1591			       "for redirect target\n");
1592		goto out;
1593	}
1594
1595	/*
1596	 *	We have finally decided to accept it.
1597	 */
1598
1599	neigh_update(neigh, lladdr, NUD_STALE,
1600		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1601		     NEIGH_UPDATE_F_OVERRIDE|
1602		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1603				     NEIGH_UPDATE_F_ISROUTER))
1604		     );
1605
1606	/*
1607	 * Redirect received -> path was valid.
1608	 * Look, redirects are sent only in response to data packets,
1609	 * so that this nexthop apparently is reachable. --ANK
1610	 */
1611	dst_confirm(&rt->dst);
1612
1613	/* Duplicate redirect: silently ignore. */
1614	if (neigh == dst_get_neighbour_raw(&rt->dst))
1615		goto out;
1616
1617	nrt = ip6_rt_copy(rt, dest);
1618	if (nrt == NULL)
1619		goto out;
1620
1621	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1622	if (on_link)
1623		nrt->rt6i_flags &= ~RTF_GATEWAY;
 
 
 
 
 
 
 
 
 
 
 
1624
1625	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1626	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
 
 
 
1627
1628	if (ip6_ins_rt(nrt))
1629		goto out;
 
 
1630
1631	netevent.old = &rt->dst;
1632	netevent.new = &nrt->dst;
1633	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
1634
1635	if (rt->rt6i_flags&RTF_CACHE) {
1636		ip6_del_rt(rt);
1637		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1638	}
 
1639
1640out:
1641	dst_release(&rt->dst);
1642}
1643
1644/*
1645 *	Handle ICMP "packet too big" messages
1646 *	i.e. Path MTU discovery
1647 */
1648
1649static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1650			     struct net *net, u32 pmtu, int ifindex)
1651{
1652	struct rt6_info *rt, *nrt;
1653	int allfrag = 0;
1654again:
1655	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1656	if (rt == NULL)
1657		return;
 
 
 
1658
1659	if (rt6_check_expired(rt)) {
1660		ip6_del_rt(rt);
1661		goto again;
 
 
 
1662	}
1663
1664	if (pmtu >= dst_mtu(&rt->dst))
1665		goto out;
1666
1667	if (pmtu < IPV6_MIN_MTU) {
1668		/*
1669		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1670		 * MTU (1280) and a fragment header should always be included
1671		 * after a node receiving Too Big message reporting PMTU is
1672		 * less than the IPv6 Minimum Link MTU.
1673		 */
1674		pmtu = IPV6_MIN_MTU;
1675		allfrag = 1;
1676	}
1677
1678	/* New mtu received -> path was valid.
1679	   They are sent only in response to data packets,
1680	   so that this nexthop apparently is reachable. --ANK
1681	 */
1682	dst_confirm(&rt->dst);
1683
1684	/* Host route. If it is static, it would be better
1685	   not to override it, but add new one, so that
1686	   when cache entry will expire old pmtu
1687	   would return automatically.
1688	 */
1689	if (rt->rt6i_flags & RTF_CACHE) {
1690		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1691		if (allfrag) {
1692			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1693			features |= RTAX_FEATURE_ALLFRAG;
1694			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1695		}
1696		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1697		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1698		goto out;
1699	}
1700
1701	/* Network route.
1702	   Two cases are possible:
1703	   1. It is connected route. Action: COW
1704	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
 
 
 
 
 
1705	 */
1706	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1707		nrt = rt6_alloc_cow(rt, daddr, saddr);
1708	else
1709		nrt = rt6_alloc_clone(rt, daddr);
1710
1711	if (nrt) {
1712		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1713		if (allfrag) {
1714			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1715			features |= RTAX_FEATURE_ALLFRAG;
1716			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1717		}
1718
1719		/* According to RFC 1981, detecting PMTU increase shouldn't be
1720		 * happened within 5 mins, the recommended timer is 10 mins.
1721		 * Here this route expiration time is set to ip6_rt_mtu_expires
1722		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1723		 * and detecting PMTU increase will be automatically happened.
1724		 */
1725		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1726		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
 
1727
1728		ip6_ins_rt(nrt);
 
 
 
1729	}
1730out:
1731	dst_release(&rt->dst);
1732}
1733
1734void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1735			struct net_device *dev, u32 pmtu)
1736{
1737	struct net *net = dev_net(dev);
 
 
 
 
 
1738
1739	/*
1740	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1741	 * is sending along the path" that caused the Packet Too Big message.
1742	 * Since it's not possible in the general case to determine which
1743	 * interface was used to send the original packet, we update the MTU
1744	 * on the interface that will be used to send future packets. We also
1745	 * update the MTU on the interface that received the Packet Too Big in
1746	 * case the original packet was forced out that interface with
1747	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1748	 * correct behaviour, which would be to update the MTU on all
1749	 * interfaces.
1750	 */
1751	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1752	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1753}
1754
1755/*
1756 *	Misc support functions
1757 */
 
 
 
1758
1759static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1760				    const struct in6_addr *dest)
1761{
1762	struct net *net = dev_net(ort->rt6i_dev);
1763	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1764					    ort->dst.dev, 0);
1765
1766	if (rt) {
1767		rt->dst.input = ort->dst.input;
1768		rt->dst.output = ort->dst.output;
1769		rt->dst.flags |= DST_HOST;
1770
1771		ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1772		rt->rt6i_dst.plen = 128;
1773		dst_copy_metrics(&rt->dst, &ort->dst);
1774		rt->dst.error = ort->dst.error;
1775		rt->rt6i_idev = ort->rt6i_idev;
1776		if (rt->rt6i_idev)
1777			in6_dev_hold(rt->rt6i_idev);
1778		rt->dst.lastuse = jiffies;
1779		rt->rt6i_expires = 0;
1780
1781		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1782		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1783		rt->rt6i_metric = 0;
1784
1785#ifdef CONFIG_IPV6_SUBTREES
1786		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1787#endif
1788		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1789		rt->rt6i_table = ort->rt6i_table;
 
 
 
 
 
 
1790	}
1791	return rt;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1792}
1793
1794#ifdef CONFIG_IPV6_ROUTE_INFO
1795static struct rt6_info *rt6_get_route_info(struct net *net,
1796					   const struct in6_addr *prefix, int prefixlen,
1797					   const struct in6_addr *gwaddr, int ifindex)
 
1798{
 
 
1799	struct fib6_node *fn;
1800	struct rt6_info *rt = NULL;
1801	struct fib6_table *table;
1802
1803	table = fib6_get_table(net, RT6_TABLE_INFO);
1804	if (table == NULL)
1805		return NULL;
1806
1807	write_lock_bh(&table->tb6_lock);
1808	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1809	if (!fn)
1810		goto out;
1811
1812	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1813		if (rt->rt6i_dev->ifindex != ifindex)
 
1814			continue;
1815		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1816			continue;
1817		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
 
 
 
 
 
1818			continue;
1819		dst_hold(&rt->dst);
1820		break;
1821	}
1822out:
1823	write_unlock_bh(&table->tb6_lock);
1824	return rt;
1825}
1826
1827static struct rt6_info *rt6_add_route_info(struct net *net,
1828					   const struct in6_addr *prefix, int prefixlen,
1829					   const struct in6_addr *gwaddr, int ifindex,
1830					   unsigned pref)
 
1831{
1832	struct fib6_config cfg = {
1833		.fc_table	= RT6_TABLE_INFO,
1834		.fc_metric	= IP6_RT_PRIO_USER,
1835		.fc_ifindex	= ifindex,
1836		.fc_dst_len	= prefixlen,
1837		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1838				  RTF_UP | RTF_PREF(pref),
1839		.fc_nlinfo.pid = 0,
 
 
1840		.fc_nlinfo.nlh = NULL,
1841		.fc_nlinfo.nl_net = net,
1842	};
1843
1844	ipv6_addr_copy(&cfg.fc_dst, prefix);
1845	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
 
1846
1847	/* We should treat it as a default route if prefix length is 0. */
1848	if (!prefixlen)
1849		cfg.fc_flags |= RTF_DEFAULT;
1850
1851	ip6_route_add(&cfg);
1852
1853	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1854}
1855#endif
1856
1857struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
 
 
1858{
1859	struct rt6_info *rt;
 
1860	struct fib6_table *table;
1861
1862	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1863	if (table == NULL)
1864		return NULL;
1865
1866	write_lock_bh(&table->tb6_lock);
1867	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1868		if (dev == rt->rt6i_dev &&
1869		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1870		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
 
 
 
 
 
 
 
1871			break;
1872	}
1873	if (rt)
1874		dst_hold(&rt->dst);
1875	write_unlock_bh(&table->tb6_lock);
1876	return rt;
1877}
1878
1879struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 
1880				     struct net_device *dev,
1881				     unsigned int pref)
1882{
1883	struct fib6_config cfg = {
1884		.fc_table	= RT6_TABLE_DFLT,
1885		.fc_metric	= IP6_RT_PRIO_USER,
1886		.fc_ifindex	= dev->ifindex,
1887		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1888				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1889		.fc_nlinfo.pid = 0,
 
 
1890		.fc_nlinfo.nlh = NULL,
1891		.fc_nlinfo.nl_net = dev_net(dev),
1892	};
1893
1894	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1895
1896	ip6_route_add(&cfg);
 
1897
1898	return rt6_get_dflt_router(gwaddr, dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1899}
1900
1901void rt6_purge_dflt_routers(struct net *net)
1902{
1903	struct rt6_info *rt;
1904	struct fib6_table *table;
 
 
1905
1906	/* NOTE: Keep consistent with rt6_get_dflt_router */
1907	table = fib6_get_table(net, RT6_TABLE_DFLT);
1908	if (table == NULL)
1909		return;
1910
1911restart:
1912	read_lock_bh(&table->tb6_lock);
1913	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1914		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1915			dst_hold(&rt->dst);
1916			read_unlock_bh(&table->tb6_lock);
1917			ip6_del_rt(rt);
1918			goto restart;
1919		}
1920	}
1921	read_unlock_bh(&table->tb6_lock);
 
1922}
1923
1924static void rtmsg_to_fib6_config(struct net *net,
1925				 struct in6_rtmsg *rtmsg,
1926				 struct fib6_config *cfg)
1927{
1928	memset(cfg, 0, sizeof(*cfg));
 
 
 
 
 
 
 
 
 
1929
1930	cfg->fc_table = RT6_TABLE_MAIN;
1931	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1932	cfg->fc_metric = rtmsg->rtmsg_metric;
1933	cfg->fc_expires = rtmsg->rtmsg_info;
1934	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1935	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1936	cfg->fc_flags = rtmsg->rtmsg_flags;
1937
1938	cfg->fc_nlinfo.nl_net = net;
1939
1940	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1941	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1942	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
 
1943}
1944
1945int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1946{
1947	struct fib6_config cfg;
1948	struct in6_rtmsg rtmsg;
1949	int err;
1950
1951	switch(cmd) {
1952	case SIOCADDRT:		/* Add a route */
1953	case SIOCDELRT:		/* Delete a route */
1954		if (!capable(CAP_NET_ADMIN))
1955			return -EPERM;
1956		err = copy_from_user(&rtmsg, arg,
1957				     sizeof(struct in6_rtmsg));
1958		if (err)
1959			return -EFAULT;
1960
1961		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1962
1963		rtnl_lock();
1964		switch (cmd) {
1965		case SIOCADDRT:
1966			err = ip6_route_add(&cfg);
1967			break;
1968		case SIOCDELRT:
1969			err = ip6_route_del(&cfg);
1970			break;
1971		default:
1972			err = -EINVAL;
1973		}
1974		rtnl_unlock();
1975
1976		return err;
 
 
 
 
 
 
 
1977	}
1978
1979	return -EINVAL;
1980}
1981
1982/*
1983 *	Drop the packet on the floor
1984 */
1985
1986static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1987{
1988	int type;
1989	struct dst_entry *dst = skb_dst(skb);
 
 
 
 
 
 
 
 
 
 
1990	switch (ipstats_mib_noroutes) {
1991	case IPSTATS_MIB_INNOROUTES:
1992		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1993		if (type == IPV6_ADDR_ANY) {
1994			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1995				      IPSTATS_MIB_INADDRERRORS);
1996			break;
1997		}
1998		/* FALLTHROUGH */
1999	case IPSTATS_MIB_OUTNOROUTES:
2000		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2001			      ipstats_mib_noroutes);
2002		break;
2003	}
 
 
 
 
 
2004	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2005	kfree_skb(skb);
2006	return 0;
2007}
2008
2009static int ip6_pkt_discard(struct sk_buff *skb)
2010{
2011	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2012}
2013
2014static int ip6_pkt_discard_out(struct sk_buff *skb)
2015{
2016	skb->dev = skb_dst(skb)->dev;
2017	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2018}
2019
2020#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2021
2022static int ip6_pkt_prohibit(struct sk_buff *skb)
2023{
2024	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2025}
2026
2027static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2028{
2029	skb->dev = skb_dst(skb)->dev;
2030	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2031}
2032
2033#endif
2034
2035/*
2036 *	Allocate a dst for local (unicast / anycast) address.
2037 */
2038
2039struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2040				    const struct in6_addr *addr,
2041				    int anycast)
2042{
2043	struct net *net = dev_net(idev->dev);
2044	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2045					    net->loopback_dev, 0);
2046	struct neighbour *neigh;
2047
2048	if (rt == NULL) {
2049		if (net_ratelimit())
2050			pr_warning("IPv6:  Maximum number of routes reached,"
2051				   " consider increasing route/max_size.\n");
2052		return ERR_PTR(-ENOMEM);
2053	}
2054
2055	in6_dev_hold(idev);
2056
2057	rt->dst.flags |= DST_HOST;
2058	rt->dst.input = ip6_input;
2059	rt->dst.output = ip6_output;
2060	rt->rt6i_idev = idev;
2061	rt->dst.obsolete = -1;
2062
2063	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2064	if (anycast)
2065		rt->rt6i_flags |= RTF_ANYCAST;
2066	else
2067		rt->rt6i_flags |= RTF_LOCAL;
2068	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2069	if (IS_ERR(neigh)) {
2070		dst_free(&rt->dst);
2071
2072		return ERR_CAST(neigh);
 
 
 
 
 
2073	}
2074	dst_set_neighbour(&rt->dst, neigh);
2075
2076	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2077	rt->rt6i_dst.plen = 128;
2078	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2079
2080	atomic_set(&rt->dst.__refcnt, 1);
2081
2082	return rt;
2083}
2084
2085int ip6_route_get_saddr(struct net *net,
2086			struct rt6_info *rt,
2087			const struct in6_addr *daddr,
2088			unsigned int prefs,
2089			struct in6_addr *saddr)
2090{
2091	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2092	int err = 0;
2093	if (rt->rt6i_prefsrc.plen)
2094		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2095	else
2096		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2097					 daddr, prefs, saddr);
2098	return err;
2099}
2100
2101/* remove deleted ip from prefsrc entries */
2102struct arg_dev_net_ip {
2103	struct net_device *dev;
2104	struct net *net;
2105	struct in6_addr *addr;
2106};
2107
2108static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2109{
2110	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2111	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2112	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2113
2114	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2115	    rt != net->ipv6.ip6_null_entry &&
2116	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
 
 
2117		/* remove prefsrc entry */
2118		rt->rt6i_prefsrc.plen = 0;
 
2119	}
2120	return 0;
2121}
2122
2123void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2124{
2125	struct net *net = dev_net(ifp->idev->dev);
2126	struct arg_dev_net_ip adni = {
2127		.dev = ifp->idev->dev,
2128		.net = net,
2129		.addr = &ifp->addr,
2130	};
2131	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2132}
2133
2134struct arg_dev_net {
2135	struct net_device *dev;
2136	struct net *net;
2137};
2138
2139static int fib6_ifdown(struct rt6_info *rt, void *arg)
 
2140{
2141	const struct arg_dev_net *adn = arg;
2142	const struct net_device *dev = adn->dev;
 
 
 
 
2143
2144	if ((rt->rt6i_dev == dev || dev == NULL) &&
2145	    rt != adn->net->ipv6.ip6_null_entry) {
2146		RT6_TRACE("deleted by ifdown %p\n", rt);
2147		return -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2148	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2149	return 0;
2150}
2151
2152void rt6_ifdown(struct net *net, struct net_device *dev)
2153{
2154	struct arg_dev_net adn = {
2155		.dev = dev,
2156		.net = net,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2157	};
 
2158
2159	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2160	icmp6_clean_all(fib6_ifdown, &adn);
 
 
2161}
2162
2163struct rt6_mtu_change_arg
2164{
 
 
 
 
 
 
2165	struct net_device *dev;
2166	unsigned mtu;
 
2167};
2168
2169static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2170{
2171	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2172	struct inet6_dev *idev;
2173
2174	/* In IPv6 pmtu discovery is not optional,
2175	   so that RTAX_MTU lock cannot disable it.
2176	   We still use this lock to block changes
2177	   caused by addrconf/ndisc.
2178	*/
2179
2180	idev = __in6_dev_get(arg->dev);
2181	if (idev == NULL)
2182		return 0;
2183
2184	/* For administrative MTU increase, there is no way to discover
2185	   IPv6 PMTU increase, so PMTU increase should be updated here.
2186	   Since RFC 1981 doesn't include administrative MTU increase
2187	   update PMTU increase is a MUST. (i.e. jumbo frame)
2188	 */
2189	/*
2190	   If new MTU is less than route PMTU, this new MTU will be the
2191	   lowest MTU in the path, update the route PMTU to reflect PMTU
2192	   decreases; if new MTU is greater than route PMTU, and the
2193	   old MTU is the lowest MTU in the path, update the route PMTU
2194	   to reflect the increase. In this case if the other nodes' MTU
2195	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2196	   PMTU discouvery.
2197	 */
2198	if (rt->rt6i_dev == arg->dev &&
2199	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2200	    (dst_mtu(&rt->dst) >= arg->mtu ||
2201	     (dst_mtu(&rt->dst) < arg->mtu &&
2202	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2203		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2204	}
2205	return 0;
 
2206}
2207
2208void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2209{
2210	struct rt6_mtu_change_arg arg = {
2211		.dev = dev,
2212		.mtu = mtu,
2213	};
2214
2215	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2216}
2217
2218static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 
2219	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
 
2220	[RTA_OIF]               = { .type = NLA_U32 },
2221	[RTA_IIF]		= { .type = NLA_U32 },
2222	[RTA_PRIORITY]          = { .type = NLA_U32 },
2223	[RTA_METRICS]           = { .type = NLA_NESTED },
 
 
 
 
 
 
 
 
 
 
 
 
2224};
2225
2226static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2227			      struct fib6_config *cfg)
 
2228{
2229	struct rtmsg *rtm;
2230	struct nlattr *tb[RTA_MAX+1];
 
2231	int err;
2232
2233	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
 
2234	if (err < 0)
2235		goto errout;
2236
2237	err = -EINVAL;
2238	rtm = nlmsg_data(nlh);
2239	memset(cfg, 0, sizeof(*cfg));
2240
2241	cfg->fc_table = rtm->rtm_table;
2242	cfg->fc_dst_len = rtm->rtm_dst_len;
2243	cfg->fc_src_len = rtm->rtm_src_len;
2244	cfg->fc_flags = RTF_UP;
2245	cfg->fc_protocol = rtm->rtm_protocol;
 
 
 
 
 
 
 
2246
2247	if (rtm->rtm_type == RTN_UNREACHABLE)
 
 
 
2248		cfg->fc_flags |= RTF_REJECT;
2249
2250	if (rtm->rtm_type == RTN_LOCAL)
2251		cfg->fc_flags |= RTF_LOCAL;
2252
2253	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2254	cfg->fc_nlinfo.nlh = nlh;
2255	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
 
 
 
 
 
 
 
 
 
 
 
2256
2257	if (tb[RTA_GATEWAY]) {
2258		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2259		cfg->fc_flags |= RTF_GATEWAY;
2260	}
 
 
 
 
2261
2262	if (tb[RTA_DST]) {
2263		int plen = (rtm->rtm_dst_len + 7) >> 3;
2264
2265		if (nla_len(tb[RTA_DST]) < plen)
2266			goto errout;
2267
2268		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2269	}
2270
2271	if (tb[RTA_SRC]) {
2272		int plen = (rtm->rtm_src_len + 7) >> 3;
2273
2274		if (nla_len(tb[RTA_SRC]) < plen)
2275			goto errout;
2276
2277		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2278	}
2279
2280	if (tb[RTA_PREFSRC])
2281		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2282
2283	if (tb[RTA_OIF])
2284		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2285
2286	if (tb[RTA_PRIORITY])
2287		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2288
2289	if (tb[RTA_METRICS]) {
2290		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2291		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2292	}
2293
2294	if (tb[RTA_TABLE])
2295		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2297	err = 0;
2298errout:
2299	return err;
2300}
2301
2302static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2303{
2304	struct fib6_config cfg;
2305	int err;
2306
2307	err = rtm_to_fib6_config(skb, nlh, &cfg);
2308	if (err < 0)
2309		return err;
2310
2311	return ip6_route_del(&cfg);
 
 
 
 
 
 
 
 
 
 
 
2312}
2313
2314static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
2315{
2316	struct fib6_config cfg;
2317	int err;
2318
2319	err = rtm_to_fib6_config(skb, nlh, &cfg);
2320	if (err < 0)
2321		return err;
2322
2323	return ip6_route_add(&cfg);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2324}
2325
2326static inline size_t rt6_nlmsg_size(void)
2327{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2328	return NLMSG_ALIGN(sizeof(struct rtmsg))
2329	       + nla_total_size(16) /* RTA_SRC */
2330	       + nla_total_size(16) /* RTA_DST */
2331	       + nla_total_size(16) /* RTA_GATEWAY */
2332	       + nla_total_size(16) /* RTA_PREFSRC */
2333	       + nla_total_size(4) /* RTA_TABLE */
2334	       + nla_total_size(4) /* RTA_IIF */
2335	       + nla_total_size(4) /* RTA_OIF */
2336	       + nla_total_size(4) /* RTA_PRIORITY */
2337	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2338	       + nla_total_size(sizeof(struct rta_cacheinfo));
 
 
 
2339}
2340
2341static int rt6_fill_node(struct net *net,
2342			 struct sk_buff *skb, struct rt6_info *rt,
2343			 struct in6_addr *dst, struct in6_addr *src,
2344			 int iif, int type, u32 pid, u32 seq,
2345			 int prefix, int nowait, unsigned int flags)
2346{
2347	struct rtmsg *rtm;
2348	struct nlmsghdr *nlh;
2349	long expires;
2350	u32 table;
2351	struct neighbour *n;
2352
2353	if (prefix) {	/* user wants prefix routes only */
2354		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2355			/* success since this is not a prefix route */
2356			return 1;
2357		}
 
 
 
 
 
 
 
 
 
 
2358	}
2359
2360	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2361	if (nlh == NULL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2362		return -EMSGSIZE;
2363
 
 
 
 
 
 
 
 
 
 
2364	rtm = nlmsg_data(nlh);
2365	rtm->rtm_family = AF_INET6;
2366	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2367	rtm->rtm_src_len = rt->rt6i_src.plen;
2368	rtm->rtm_tos = 0;
2369	if (rt->rt6i_table)
2370		table = rt->rt6i_table->tb6_id;
2371	else
2372		table = RT6_TABLE_UNSPEC;
2373	rtm->rtm_table = table;
2374	NLA_PUT_U32(skb, RTA_TABLE, table);
2375	if (rt->rt6i_flags&RTF_REJECT)
2376		rtm->rtm_type = RTN_UNREACHABLE;
2377	else if (rt->rt6i_flags&RTF_LOCAL)
2378		rtm->rtm_type = RTN_LOCAL;
2379	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2380		rtm->rtm_type = RTN_LOCAL;
2381	else
2382		rtm->rtm_type = RTN_UNICAST;
2383	rtm->rtm_flags = 0;
2384	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2385	rtm->rtm_protocol = rt->rt6i_protocol;
2386	if (rt->rt6i_flags&RTF_DYNAMIC)
2387		rtm->rtm_protocol = RTPROT_REDIRECT;
2388	else if (rt->rt6i_flags & RTF_ADDRCONF)
2389		rtm->rtm_protocol = RTPROT_KERNEL;
2390	else if (rt->rt6i_flags&RTF_DEFAULT)
2391		rtm->rtm_protocol = RTPROT_RA;
2392
2393	if (rt->rt6i_flags&RTF_CACHE)
2394		rtm->rtm_flags |= RTM_F_CLONED;
2395
2396	if (dst) {
2397		NLA_PUT(skb, RTA_DST, 16, dst);
 
2398		rtm->rtm_dst_len = 128;
2399	} else if (rtm->rtm_dst_len)
2400		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
 
2401#ifdef CONFIG_IPV6_SUBTREES
2402	if (src) {
2403		NLA_PUT(skb, RTA_SRC, 16, src);
 
2404		rtm->rtm_src_len = 128;
2405	} else if (rtm->rtm_src_len)
2406		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
 
2407#endif
2408	if (iif) {
2409#ifdef CONFIG_IPV6_MROUTE
2410		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2411			int err = ip6mr_get_route(net, skb, rtm, nowait);
2412			if (err <= 0) {
2413				if (!nowait) {
2414					if (err == 0)
2415						return 0;
2416					goto nla_put_failure;
2417				} else {
2418					if (err == -EMSGSIZE)
2419						goto nla_put_failure;
2420				}
2421			}
2422		} else
2423#endif
2424			NLA_PUT_U32(skb, RTA_IIF, iif);
2425	} else if (dst) {
 
2426		struct in6_addr saddr_buf;
2427		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2428			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 
2429	}
2430
2431	if (rt->rt6i_prefsrc.plen) {
2432		struct in6_addr saddr_buf;
2433		ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2434		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 
2435	}
2436
2437	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
 
2438		goto nla_put_failure;
2439
2440	rcu_read_lock();
2441	n = dst_get_neighbour(&rt->dst);
2442	if (n)
2443		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2444	rcu_read_unlock();
2445
2446	if (rt->dst.dev)
2447		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2448
2449	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
 
 
 
 
 
 
 
 
 
 
2450
2451	if (!(rt->rt6i_flags & RTF_EXPIRES))
2452		expires = 0;
2453	else if (rt->rt6i_expires - jiffies < INT_MAX)
2454		expires = rt->rt6i_expires - jiffies;
2455	else
2456		expires = INT_MAX;
 
 
 
 
 
 
 
2457
2458	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2459			       expires, rt->dst.error) < 0)
 
 
 
 
 
 
 
 
 
2460		goto nla_put_failure;
2461
2462	return nlmsg_end(skb, nlh);
 
 
2463
2464nla_put_failure:
2465	nlmsg_cancel(skb, nlh);
2466	return -EMSGSIZE;
2467}
2468
2469int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2470{
2471	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2472	int prefix;
 
 
 
2473
2474	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2475		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2476		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2477	} else
2478		prefix = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2479
2480	return rt6_fill_node(arg->net,
2481		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2482		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2483		     prefix, 0, NLM_F_MULTI);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2484}
2485
2486static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 
2487{
2488	struct net *net = sock_net(in_skb->sk);
2489	struct nlattr *tb[RTA_MAX+1];
 
 
 
2490	struct rt6_info *rt;
2491	struct sk_buff *skb;
2492	struct rtmsg *rtm;
2493	struct flowi6 fl6;
2494	int err, iif = 0;
2495
2496	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2497	if (err < 0)
2498		goto errout;
2499
2500	err = -EINVAL;
2501	memset(&fl6, 0, sizeof(fl6));
 
 
2502
2503	if (tb[RTA_SRC]) {
2504		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2505			goto errout;
2506
2507		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2508	}
2509
2510	if (tb[RTA_DST]) {
2511		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2512			goto errout;
2513
2514		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2515	}
2516
2517	if (tb[RTA_IIF])
2518		iif = nla_get_u32(tb[RTA_IIF]);
2519
2520	if (tb[RTA_OIF])
2521		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2522
2523	if (iif) {
2524		struct net_device *dev;
2525		dev = __dev_get_by_index(net, iif);
 
 
 
 
2526		if (!dev) {
 
2527			err = -ENODEV;
2528			goto errout;
2529		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2530	}
2531
2532	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2533	if (skb == NULL) {
 
2534		err = -ENOBUFS;
2535		goto errout;
2536	}
2537
2538	/* Reserve room for dummy headers, this skb can pass
2539	   through good chunk of routing engine.
2540	 */
2541	skb_reset_mac_header(skb);
2542	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2543
2544	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2545	skb_dst_set(skb, &rt->dst);
2546
2547	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2548			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2549			    nlh->nlmsg_seq, 0, 0, 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2550	if (err < 0) {
2551		kfree_skb(skb);
2552		goto errout;
2553	}
2554
2555	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2556errout:
2557	return err;
2558}
2559
2560void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 
2561{
2562	struct sk_buff *skb;
2563	struct net *net = info->nl_net;
2564	u32 seq;
2565	int err;
2566
2567	err = -ENOBUFS;
2568	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2569
2570	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2571	if (skb == NULL)
2572		goto errout;
2573
2574	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2575				event, info->pid, seq, 0, 0, 0);
2576	if (err < 0) {
2577		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2578		WARN_ON(err == -EMSGSIZE);
2579		kfree_skb(skb);
2580		goto errout;
2581	}
2582	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2583		    info->nlh, gfp_any());
2584	return;
2585errout:
2586	if (err < 0)
2587		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2588}
2589
2590static int ip6_route_dev_notify(struct notifier_block *this,
2591				unsigned long event, void *data)
2592{
2593	struct net_device *dev = (struct net_device *)data;
2594	struct net *net = dev_net(dev);
2595
2596	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
 
 
 
 
2597		net->ipv6.ip6_null_entry->dst.dev = dev;
2598		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2599#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2600		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2601		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2602		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2603		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2604#endif
 
 
 
 
 
 
 
 
 
 
2605	}
2606
2607	return NOTIFY_OK;
2608}
2609
2610/*
2611 *	/proc
2612 */
2613
2614#ifdef CONFIG_PROC_FS
2615
2616struct rt6_proc_arg
2617{
2618	char *buffer;
2619	int offset;
2620	int length;
2621	int skip;
2622	int len;
2623};
2624
2625static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2626{
2627	struct seq_file *m = p_arg;
2628	struct neighbour *n;
2629
2630	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2631
2632#ifdef CONFIG_IPV6_SUBTREES
2633	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2634#else
2635	seq_puts(m, "00000000000000000000000000000000 00 ");
2636#endif
2637	rcu_read_lock();
2638	n = dst_get_neighbour(&rt->dst);
2639	if (n) {
2640		seq_printf(m, "%pi6", n->primary_key);
2641	} else {
2642		seq_puts(m, "00000000000000000000000000000000");
2643	}
2644	rcu_read_unlock();
2645	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2646		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2647		   rt->dst.__use, rt->rt6i_flags,
2648		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2649	return 0;
2650}
2651
2652static int ipv6_route_show(struct seq_file *m, void *v)
2653{
2654	struct net *net = (struct net *)m->private;
2655	fib6_clean_all(net, rt6_info_route, 0, m);
2656	return 0;
2657}
2658
2659static int ipv6_route_open(struct inode *inode, struct file *file)
2660{
2661	return single_open_net(inode, file, ipv6_route_show);
2662}
2663
2664static const struct file_operations ipv6_route_proc_fops = {
2665	.owner		= THIS_MODULE,
2666	.open		= ipv6_route_open,
2667	.read		= seq_read,
2668	.llseek		= seq_lseek,
2669	.release	= single_release_net,
2670};
2671
2672static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2673{
2674	struct net *net = (struct net *)seq->private;
2675	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2676		   net->ipv6.rt6_stats->fib_nodes,
2677		   net->ipv6.rt6_stats->fib_route_nodes,
2678		   net->ipv6.rt6_stats->fib_rt_alloc,
2679		   net->ipv6.rt6_stats->fib_rt_entries,
2680		   net->ipv6.rt6_stats->fib_rt_cache,
2681		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2682		   net->ipv6.rt6_stats->fib_discarded_routes);
2683
2684	return 0;
2685}
2686
2687static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2688{
2689	return single_open_net(inode, file, rt6_stats_seq_show);
2690}
2691
2692static const struct file_operations rt6_stats_seq_fops = {
2693	.owner	 = THIS_MODULE,
2694	.open	 = rt6_stats_seq_open,
2695	.read	 = seq_read,
2696	.llseek	 = seq_lseek,
2697	.release = single_release_net,
2698};
2699#endif	/* CONFIG_PROC_FS */
2700
2701#ifdef CONFIG_SYSCTL
2702
2703static
2704int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2705			      void __user *buffer, size_t *lenp, loff_t *ppos)
2706{
2707	struct net *net;
2708	int delay;
 
2709	if (!write)
2710		return -EINVAL;
2711
2712	net = (struct net *)ctl->extra1;
2713	delay = net->ipv6.sysctl.flush_delay;
2714	proc_dointvec(ctl, write, buffer, lenp, ppos);
2715	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
 
 
 
2716	return 0;
2717}
2718
2719ctl_table ipv6_route_table_template[] = {
2720	{
2721		.procname	=	"flush",
2722		.data		=	&init_net.ipv6.sysctl.flush_delay,
2723		.maxlen		=	sizeof(int),
2724		.mode		=	0200,
2725		.proc_handler	=	ipv6_sysctl_rtcache_flush
2726	},
2727	{
2728		.procname	=	"gc_thresh",
2729		.data		=	&ip6_dst_ops_template.gc_thresh,
2730		.maxlen		=	sizeof(int),
2731		.mode		=	0644,
2732		.proc_handler	=	proc_dointvec,
2733	},
2734	{
2735		.procname	=	"max_size",
2736		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2737		.maxlen		=	sizeof(int),
2738		.mode		=	0644,
2739		.proc_handler	=	proc_dointvec,
2740	},
2741	{
2742		.procname	=	"gc_min_interval",
2743		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2744		.maxlen		=	sizeof(int),
2745		.mode		=	0644,
2746		.proc_handler	=	proc_dointvec_jiffies,
2747	},
2748	{
2749		.procname	=	"gc_timeout",
2750		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2751		.maxlen		=	sizeof(int),
2752		.mode		=	0644,
2753		.proc_handler	=	proc_dointvec_jiffies,
2754	},
2755	{
2756		.procname	=	"gc_interval",
2757		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2758		.maxlen		=	sizeof(int),
2759		.mode		=	0644,
2760		.proc_handler	=	proc_dointvec_jiffies,
2761	},
2762	{
2763		.procname	=	"gc_elasticity",
2764		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2765		.maxlen		=	sizeof(int),
2766		.mode		=	0644,
2767		.proc_handler	=	proc_dointvec,
2768	},
2769	{
2770		.procname	=	"mtu_expires",
2771		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2772		.maxlen		=	sizeof(int),
2773		.mode		=	0644,
2774		.proc_handler	=	proc_dointvec_jiffies,
2775	},
2776	{
2777		.procname	=	"min_adv_mss",
2778		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2779		.maxlen		=	sizeof(int),
2780		.mode		=	0644,
2781		.proc_handler	=	proc_dointvec,
2782	},
2783	{
2784		.procname	=	"gc_min_interval_ms",
2785		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2786		.maxlen		=	sizeof(int),
2787		.mode		=	0644,
2788		.proc_handler	=	proc_dointvec_ms_jiffies,
2789	},
 
 
 
 
 
 
 
 
 
2790	{ }
2791};
2792
2793struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2794{
2795	struct ctl_table *table;
2796
2797	table = kmemdup(ipv6_route_table_template,
2798			sizeof(ipv6_route_table_template),
2799			GFP_KERNEL);
2800
2801	if (table) {
2802		table[0].data = &net->ipv6.sysctl.flush_delay;
2803		table[0].extra1 = net;
2804		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2805		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2806		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2807		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2808		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2809		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2810		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2811		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2812		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
 
 
 
 
 
2813	}
2814
2815	return table;
2816}
2817#endif
2818
2819static int __net_init ip6_route_net_init(struct net *net)
2820{
2821	int ret = -ENOMEM;
2822
2823	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2824	       sizeof(net->ipv6.ip6_dst_ops));
2825
2826	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2827		goto out_ip6_dst_ops;
2828
 
 
 
 
 
 
2829	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2830					   sizeof(*net->ipv6.ip6_null_entry),
2831					   GFP_KERNEL);
2832	if (!net->ipv6.ip6_null_entry)
2833		goto out_ip6_dst_entries;
2834	net->ipv6.ip6_null_entry->dst.path =
2835		(struct dst_entry *)net->ipv6.ip6_null_entry;
2836	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2837	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2838			 ip6_template_metrics, true);
 
2839
2840#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
2841	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2842					       sizeof(*net->ipv6.ip6_prohibit_entry),
2843					       GFP_KERNEL);
2844	if (!net->ipv6.ip6_prohibit_entry)
2845		goto out_ip6_null_entry;
2846	net->ipv6.ip6_prohibit_entry->dst.path =
2847		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2848	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2849	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2850			 ip6_template_metrics, true);
 
2851
2852	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2853					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2854					       GFP_KERNEL);
2855	if (!net->ipv6.ip6_blk_hole_entry)
2856		goto out_ip6_prohibit_entry;
2857	net->ipv6.ip6_blk_hole_entry->dst.path =
2858		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2859	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2860	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2861			 ip6_template_metrics, true);
 
 
 
 
2862#endif
2863
2864	net->ipv6.sysctl.flush_delay = 0;
2865	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2866	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2867	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2868	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2869	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2870	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2871	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 
2872
2873#ifdef CONFIG_PROC_FS
2874	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2875	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2876#endif
2877	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2878
2879	ret = 0;
2880out:
2881	return ret;
2882
2883#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2884out_ip6_prohibit_entry:
2885	kfree(net->ipv6.ip6_prohibit_entry);
2886out_ip6_null_entry:
2887	kfree(net->ipv6.ip6_null_entry);
2888#endif
 
 
2889out_ip6_dst_entries:
2890	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2891out_ip6_dst_ops:
2892	goto out;
2893}
2894
2895static void __net_exit ip6_route_net_exit(struct net *net)
2896{
2897#ifdef CONFIG_PROC_FS
2898	proc_net_remove(net, "ipv6_route");
2899	proc_net_remove(net, "rt6_stats");
2900#endif
2901	kfree(net->ipv6.ip6_null_entry);
2902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2903	kfree(net->ipv6.ip6_prohibit_entry);
2904	kfree(net->ipv6.ip6_blk_hole_entry);
2905#endif
2906	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2907}
2908
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2909static struct pernet_operations ip6_route_net_ops = {
2910	.init = ip6_route_net_init,
2911	.exit = ip6_route_net_exit,
2912};
2913
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2914static struct notifier_block ip6_route_dev_notifier = {
2915	.notifier_call = ip6_route_dev_notify,
2916	.priority = 0,
2917};
2918
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2919int __init ip6_route_init(void)
2920{
2921	int ret;
 
2922
2923	ret = -ENOMEM;
2924	ip6_dst_ops_template.kmem_cachep =
2925		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2926				  SLAB_HWCACHE_ALIGN, NULL);
2927	if (!ip6_dst_ops_template.kmem_cachep)
2928		goto out;
2929
2930	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2931	if (ret)
2932		goto out_kmem_cache;
2933
2934	ret = register_pernet_subsys(&ip6_route_net_ops);
2935	if (ret)
2936		goto out_dst_entries;
2937
 
 
 
 
2938	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2939
2940	/* Registering of the loopback is done before this portion of code,
2941	 * the loopback reference in rt6_info will not be taken, do it
2942	 * manually for init_net */
2943	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2944	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2945  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2946	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2947	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2948	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2949	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2950  #endif
2951	ret = fib6_init();
2952	if (ret)
2953		goto out_register_subsys;
2954
2955	ret = xfrm6_init();
2956	if (ret)
2957		goto out_fib6_init;
2958
2959	ret = fib6_rules_init();
2960	if (ret)
2961		goto xfrm6_init;
2962
2963	ret = -ENOBUFS;
2964	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2965	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2966	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2967		goto fib6_rules_init;
2968
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2969	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2970	if (ret)
2971		goto fib6_rules_init;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2972
2973out:
2974	return ret;
2975
 
 
 
2976fib6_rules_init:
2977	fib6_rules_cleanup();
2978xfrm6_init:
2979	xfrm6_fini();
2980out_fib6_init:
2981	fib6_gc_cleanup();
2982out_register_subsys:
2983	unregister_pernet_subsys(&ip6_route_net_ops);
 
 
2984out_dst_entries:
2985	dst_entries_destroy(&ip6_dst_blackhole_ops);
2986out_kmem_cache:
2987	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2988	goto out;
2989}
2990
2991void ip6_route_cleanup(void)
2992{
 
 
 
 
 
2993	unregister_netdevice_notifier(&ip6_route_dev_notifier);
 
2994	fib6_rules_cleanup();
2995	xfrm6_fini();
2996	fib6_gc_cleanup();
 
2997	unregister_pernet_subsys(&ip6_route_net_ops);
2998	dst_entries_destroy(&ip6_dst_blackhole_ops);
2999	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3000}
v5.9
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	Linux INET6 implementation
   4 *	FIB front-end.
   5 *
   6 *	Authors:
   7 *	Pedro Roque		<roque@di.fc.ul.pt>
 
 
 
 
 
   8 */
   9
  10/*	Changes:
  11 *
  12 *	YOSHIFUJI Hideaki @USAGI
  13 *		reworked default router selection.
  14 *		- respect outgoing interface
  15 *		- select from (probably) reachable routers (i.e.
  16 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  17 *		- always select the same router if it is (probably)
  18 *		reachable.  otherwise, round-robin the list.
  19 *	Ville Nuorvala
  20 *		Fixed routing subtrees.
  21 */
  22
  23#define pr_fmt(fmt) "IPv6: " fmt
  24
  25#include <linux/capability.h>
  26#include <linux/errno.h>
  27#include <linux/export.h>
  28#include <linux/types.h>
  29#include <linux/times.h>
  30#include <linux/socket.h>
  31#include <linux/sockios.h>
  32#include <linux/net.h>
  33#include <linux/route.h>
  34#include <linux/netdevice.h>
  35#include <linux/in6.h>
  36#include <linux/mroute6.h>
  37#include <linux/init.h>
  38#include <linux/if_arp.h>
  39#include <linux/proc_fs.h>
  40#include <linux/seq_file.h>
  41#include <linux/nsproxy.h>
  42#include <linux/slab.h>
  43#include <linux/jhash.h>
  44#include <net/net_namespace.h>
  45#include <net/snmp.h>
  46#include <net/ipv6.h>
  47#include <net/ip6_fib.h>
  48#include <net/ip6_route.h>
  49#include <net/ndisc.h>
  50#include <net/addrconf.h>
  51#include <net/tcp.h>
  52#include <linux/rtnetlink.h>
  53#include <net/dst.h>
  54#include <net/dst_metadata.h>
  55#include <net/xfrm.h>
  56#include <net/netevent.h>
  57#include <net/netlink.h>
  58#include <net/rtnh.h>
  59#include <net/lwtunnel.h>
  60#include <net/ip_tunnels.h>
  61#include <net/l3mdev.h>
  62#include <net/ip.h>
  63#include <linux/uaccess.h>
  64#include <linux/btf_ids.h>
  65
  66#ifdef CONFIG_SYSCTL
  67#include <linux/sysctl.h>
  68#endif
  69
  70static int ip6_rt_type_to_error(u8 fib6_type);
 
  71
  72#define CREATE_TRACE_POINTS
  73#include <trace/events/fib6.h>
  74EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
  75#undef CREATE_TRACE_POINTS
  76
  77enum rt6_nud_state {
  78	RT6_NUD_FAIL_HARD = -3,
  79	RT6_NUD_FAIL_PROBE = -2,
  80	RT6_NUD_FAIL_DO_RR = -1,
  81	RT6_NUD_SUCCEED = 1
  82};
  83
 
 
  84static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  85static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  86static unsigned int	 ip6_mtu(const struct dst_entry *dst);
  87static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  88static void		ip6_dst_destroy(struct dst_entry *);
  89static void		ip6_dst_ifdown(struct dst_entry *,
  90				       struct net_device *dev, int how);
  91static int		 ip6_dst_gc(struct dst_ops *ops);
  92
  93static int		ip6_pkt_discard(struct sk_buff *skb);
  94static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  95static int		ip6_pkt_prohibit(struct sk_buff *skb);
  96static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  97static void		ip6_link_failure(struct sk_buff *skb);
  98static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  99					   struct sk_buff *skb, u32 mtu,
 100					   bool confirm_neigh);
 101static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 102					struct sk_buff *skb);
 103static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 104			   int strict);
 105static size_t rt6_nlmsg_size(struct fib6_info *f6i);
 106static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 107			 struct fib6_info *rt, struct dst_entry *dst,
 108			 struct in6_addr *dest, struct in6_addr *src,
 109			 int iif, int type, u32 portid, u32 seq,
 110			 unsigned int flags);
 111static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
 112					   const struct in6_addr *daddr,
 113					   const struct in6_addr *saddr);
 114
 115#ifdef CONFIG_IPV6_ROUTE_INFO
 116static struct fib6_info *rt6_add_route_info(struct net *net,
 117					   const struct in6_addr *prefix, int prefixlen,
 118					   const struct in6_addr *gwaddr,
 119					   struct net_device *dev,
 120					   unsigned int pref);
 121static struct fib6_info *rt6_get_route_info(struct net *net,
 122					   const struct in6_addr *prefix, int prefixlen,
 123					   const struct in6_addr *gwaddr,
 124					   struct net_device *dev);
 125#endif
 126
 127struct uncached_list {
 128	spinlock_t		lock;
 129	struct list_head	head;
 130};
 131
 132static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 133
 134void rt6_uncached_list_add(struct rt6_info *rt)
 135{
 136	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 
 
 137
 138	rt->rt6i_uncached_list = ul;
 
 139
 140	spin_lock_bh(&ul->lock);
 141	list_add_tail(&rt->rt6i_uncached, &ul->head);
 142	spin_unlock_bh(&ul->lock);
 143}
 144
 145void rt6_uncached_list_del(struct rt6_info *rt)
 146{
 147	if (!list_empty(&rt->rt6i_uncached)) {
 148		struct uncached_list *ul = rt->rt6i_uncached_list;
 149		struct net *net = dev_net(rt->dst.dev);
 150
 151		spin_lock_bh(&ul->lock);
 152		list_del(&rt->rt6i_uncached);
 153		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
 154		spin_unlock_bh(&ul->lock);
 155	}
 156}
 157
 158static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 159{
 160	struct net_device *loopback_dev = net->loopback_dev;
 161	int cpu;
 162
 163	if (dev == loopback_dev)
 164		return;
 165
 166	for_each_possible_cpu(cpu) {
 167		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 168		struct rt6_info *rt;
 169
 170		spin_lock_bh(&ul->lock);
 171		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
 172			struct inet6_dev *rt_idev = rt->rt6i_idev;
 173			struct net_device *rt_dev = rt->dst.dev;
 174
 175			if (rt_idev->dev == dev) {
 176				rt->rt6i_idev = in6_dev_get(loopback_dev);
 177				in6_dev_put(rt_idev);
 178			}
 179
 180			if (rt_dev == dev) {
 181				rt->dst.dev = blackhole_netdev;
 182				dev_hold(rt->dst.dev);
 183				dev_put(rt_dev);
 184			}
 185		}
 186		spin_unlock_bh(&ul->lock);
 187	}
 
 188}
 189
 190static inline const void *choose_neigh_daddr(const struct in6_addr *p,
 191					     struct sk_buff *skb,
 192					     const void *daddr)
 193{
 194	if (!ipv6_addr_any(p))
 195		return (const void *) p;
 196	else if (skb)
 197		return &ipv6_hdr(skb)->daddr;
 198	return daddr;
 199}
 200
 201struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
 202				   struct net_device *dev,
 203				   struct sk_buff *skb,
 204				   const void *daddr)
 205{
 206	struct neighbour *n;
 207
 208	daddr = choose_neigh_daddr(gw, skb, daddr);
 209	n = __ipv6_neigh_lookup(dev, daddr);
 210	if (n)
 211		return n;
 212
 213	n = neigh_create(&nd_tbl, daddr, dev);
 214	return IS_ERR(n) ? NULL : n;
 215}
 216
 217static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
 218					      struct sk_buff *skb,
 219					      const void *daddr)
 220{
 221	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
 222
 223	return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
 224				dst->dev, skb, daddr);
 225}
 226
 227static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 228{
 229	struct net_device *dev = dst->dev;
 230	struct rt6_info *rt = (struct rt6_info *)dst;
 231
 232	daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
 233	if (!daddr)
 234		return;
 235	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
 236		return;
 237	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
 238		return;
 239	__ipv6_confirm_neigh(dev, daddr);
 240}
 241
 242static struct dst_ops ip6_dst_ops_template = {
 243	.family			=	AF_INET6,
 
 244	.gc			=	ip6_dst_gc,
 245	.gc_thresh		=	1024,
 246	.check			=	ip6_dst_check,
 247	.default_advmss		=	ip6_default_advmss,
 248	.mtu			=	ip6_mtu,
 249	.cow_metrics		=	dst_cow_metrics_generic,
 250	.destroy		=	ip6_dst_destroy,
 251	.ifdown			=	ip6_dst_ifdown,
 252	.negative_advice	=	ip6_negative_advice,
 253	.link_failure		=	ip6_link_failure,
 254	.update_pmtu		=	ip6_rt_update_pmtu,
 255	.redirect		=	rt6_do_redirect,
 256	.local_out		=	__ip6_local_out,
 257	.neigh_lookup		=	ip6_dst_neigh_lookup,
 258	.confirm_neigh		=	ip6_confirm_neigh,
 259};
 260
 261static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 262{
 263	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 264
 265	return mtu ? : dst->dev->mtu;
 266}
 267
 268static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 269					 struct sk_buff *skb, u32 mtu,
 270					 bool confirm_neigh)
 271{
 272}
 273
 274static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 275				      struct sk_buff *skb)
 276{
 
 277}
 278
 279static struct dst_ops ip6_dst_blackhole_ops = {
 280	.family			=	AF_INET6,
 
 281	.destroy		=	ip6_dst_destroy,
 282	.check			=	ip6_dst_check,
 283	.mtu			=	ip6_blackhole_mtu,
 284	.default_advmss		=	ip6_default_advmss,
 285	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 286	.redirect		=	ip6_rt_blackhole_redirect,
 287	.cow_metrics		=	dst_cow_metrics_generic,
 288	.neigh_lookup		=	ip6_dst_neigh_lookup,
 289};
 290
 291static const u32 ip6_template_metrics[RTAX_MAX] = {
 292	[RTAX_HOPLIMIT - 1] = 0,
 293};
 294
 295static const struct fib6_info fib6_null_entry_template = {
 296	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 297	.fib6_protocol  = RTPROT_KERNEL,
 298	.fib6_metric	= ~(u32)0,
 299	.fib6_ref	= REFCOUNT_INIT(1),
 300	.fib6_type	= RTN_UNREACHABLE,
 301	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
 302};
 303
 304static const struct rt6_info ip6_null_entry_template = {
 305	.dst = {
 306		.__refcnt	= ATOMIC_INIT(1),
 307		.__use		= 1,
 308		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 309		.error		= -ENETUNREACH,
 310		.input		= ip6_pkt_discard,
 311		.output		= ip6_pkt_discard_out,
 312	},
 313	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 314};
 315
 316#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 317
 318static const struct rt6_info ip6_prohibit_entry_template = {
 
 
 
 319	.dst = {
 320		.__refcnt	= ATOMIC_INIT(1),
 321		.__use		= 1,
 322		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 323		.error		= -EACCES,
 324		.input		= ip6_pkt_prohibit,
 325		.output		= ip6_pkt_prohibit_out,
 326	},
 327	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 328};
 329
 330static const struct rt6_info ip6_blk_hole_entry_template = {
 331	.dst = {
 332		.__refcnt	= ATOMIC_INIT(1),
 333		.__use		= 1,
 334		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 335		.error		= -EINVAL,
 336		.input		= dst_discard,
 337		.output		= dst_discard_out,
 338	},
 339	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 340};
 341
 342#endif
 343
 344static void rt6_info_init(struct rt6_info *rt)
 345{
 346	struct dst_entry *dst = &rt->dst;
 347
 348	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 349	INIT_LIST_HEAD(&rt->rt6i_uncached);
 350}
 351
 352/* allocate dst with ip6_dst_ops */
 353struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
 354			       int flags)
 
 355{
 356	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 357					1, DST_OBSOLETE_FORCE_CHK, flags);
 358
 359	if (rt) {
 360		rt6_info_init(rt);
 361		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
 362	}
 363
 364	return rt;
 365}
 366EXPORT_SYMBOL(ip6_dst_alloc);
 367
 368static void ip6_dst_destroy(struct dst_entry *dst)
 369{
 370	struct rt6_info *rt = (struct rt6_info *)dst;
 371	struct fib6_info *from;
 372	struct inet6_dev *idev;
 373
 374	ip_dst_metrics_put(dst);
 375	rt6_uncached_list_del(rt);
 376
 377	idev = rt->rt6i_idev;
 378	if (idev) {
 379		rt->rt6i_idev = NULL;
 380		in6_dev_put(idev);
 381	}
 
 
 
 
 
 382
 383	from = xchg((__force struct fib6_info **)&rt->from, NULL);
 384	fib6_info_release(from);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 385}
 386
 387static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 388			   int how)
 389{
 390	struct rt6_info *rt = (struct rt6_info *)dst;
 391	struct inet6_dev *idev = rt->rt6i_idev;
 392	struct net_device *loopback_dev =
 393		dev_net(dev)->loopback_dev;
 394
 395	if (idev && idev->dev != loopback_dev) {
 396		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
 397		if (loopback_idev) {
 
 398			rt->rt6i_idev = loopback_idev;
 399			in6_dev_put(idev);
 400		}
 401	}
 402}
 403
 404static bool __rt6_check_expired(const struct rt6_info *rt)
 405{
 406	if (rt->rt6i_flags & RTF_EXPIRES)
 407		return time_after(jiffies, rt->dst.expires);
 408	else
 409		return false;
 410}
 411
 412static bool rt6_check_expired(const struct rt6_info *rt)
 413{
 414	struct fib6_info *from;
 415
 416	from = rcu_dereference(rt->from);
 417
 418	if (rt->rt6i_flags & RTF_EXPIRES) {
 419		if (time_after(jiffies, rt->dst.expires))
 420			return true;
 421	} else if (from) {
 422		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
 423			fib6_check_expired(from);
 424	}
 425	return false;
 426}
 427
 428void fib6_select_path(const struct net *net, struct fib6_result *res,
 429		      struct flowi6 *fl6, int oif, bool have_oif_match,
 430		      const struct sk_buff *skb, int strict)
 431{
 432	struct fib6_info *sibling, *next_sibling;
 433	struct fib6_info *match = res->f6i;
 434
 435	if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
 436		goto out;
 437
 438	if (match->nh && have_oif_match && res->nh)
 439		return;
 440
 441	/* We might have already computed the hash for ICMPv6 errors. In such
 442	 * case it will always be non-zero. Otherwise now is the time to do it.
 443	 */
 444	if (!fl6->mp_hash &&
 445	    (!match->nh || nexthop_is_multipath(match->nh)))
 446		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 447
 448	if (unlikely(match->nh)) {
 449		nexthop_path_fib6_result(res, fl6->mp_hash);
 450		return;
 451	}
 452
 453	if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
 454		goto out;
 455
 456	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
 457				 fib6_siblings) {
 458		const struct fib6_nh *nh = sibling->fib6_nh;
 459		int nh_upper_bound;
 460
 461		nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
 462		if (fl6->mp_hash > nh_upper_bound)
 463			continue;
 464		if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
 465			break;
 466		match = sibling;
 467		break;
 468	}
 469
 470out:
 471	res->f6i = match;
 472	res->nh = match->fib6_nh;
 473}
 474
 475/*
 476 *	Route lookup. rcu_read_lock() should be held.
 477 */
 478
 479static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
 480			       const struct in6_addr *saddr, int oif, int flags)
 481{
 482	const struct net_device *dev;
 483
 484	if (nh->fib_nh_flags & RTNH_F_DEAD)
 485		return false;
 486
 487	dev = nh->fib_nh_dev;
 488	if (oif) {
 489		if (dev->ifindex == oif)
 490			return true;
 491	} else {
 492		if (ipv6_chk_addr(net, saddr, dev,
 493				  flags & RT6_LOOKUP_F_IFACE))
 494			return true;
 495	}
 496
 497	return false;
 498}
 499
 500struct fib6_nh_dm_arg {
 501	struct net		*net;
 502	const struct in6_addr	*saddr;
 503	int			oif;
 504	int			flags;
 505	struct fib6_nh		*nh;
 506};
 507
 508static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
 509{
 510	struct fib6_nh_dm_arg *arg = _arg;
 511
 512	arg->nh = nh;
 513	return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
 514				  arg->flags);
 515}
 516
 517/* returns fib6_nh from nexthop or NULL */
 518static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
 519					struct fib6_result *res,
 520					const struct in6_addr *saddr,
 521					int oif, int flags)
 522{
 523	struct fib6_nh_dm_arg arg = {
 524		.net   = net,
 525		.saddr = saddr,
 526		.oif   = oif,
 527		.flags = flags,
 528	};
 529
 530	if (nexthop_is_blackhole(nh))
 531		return NULL;
 532
 533	if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
 534		return arg.nh;
 535
 536	return NULL;
 537}
 538
 539static void rt6_device_match(struct net *net, struct fib6_result *res,
 540			     const struct in6_addr *saddr, int oif, int flags)
 541{
 542	struct fib6_info *f6i = res->f6i;
 543	struct fib6_info *spf6i;
 544	struct fib6_nh *nh;
 545
 546	if (!oif && ipv6_addr_any(saddr)) {
 547		if (unlikely(f6i->nh)) {
 548			nh = nexthop_fib6_nh(f6i->nh);
 549			if (nexthop_is_blackhole(f6i->nh))
 550				goto out_blackhole;
 551		} else {
 552			nh = f6i->fib6_nh;
 
 
 553		}
 554		if (!(nh->fib_nh_flags & RTNH_F_DEAD))
 555			goto out;
 556	}
 557
 558	for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
 559		bool matched = false;
 560
 561		if (unlikely(spf6i->nh)) {
 562			nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
 563					      oif, flags);
 564			if (nh)
 565				matched = true;
 566		} else {
 567			nh = spf6i->fib6_nh;
 568			if (__rt6_device_match(net, nh, saddr, oif, flags))
 569				matched = true;
 570		}
 571		if (matched) {
 572			res->f6i = spf6i;
 573			goto out;
 574		}
 575	}
 576
 577	if (oif && flags & RT6_LOOKUP_F_IFACE) {
 578		res->f6i = net->ipv6.fib6_null_entry;
 579		nh = res->f6i->fib6_nh;
 580		goto out;
 581	}
 582
 583	if (unlikely(f6i->nh)) {
 584		nh = nexthop_fib6_nh(f6i->nh);
 585		if (nexthop_is_blackhole(f6i->nh))
 586			goto out_blackhole;
 587	} else {
 588		nh = f6i->fib6_nh;
 589	}
 590
 591	if (nh->fib_nh_flags & RTNH_F_DEAD) {
 592		res->f6i = net->ipv6.fib6_null_entry;
 593		nh = res->f6i->fib6_nh;
 594	}
 595out:
 596	res->nh = nh;
 597	res->fib6_type = res->f6i->fib6_type;
 598	res->fib6_flags = res->f6i->fib6_flags;
 599	return;
 600
 601out_blackhole:
 602	res->fib6_flags |= RTF_REJECT;
 603	res->fib6_type = RTN_BLACKHOLE;
 604	res->nh = nh;
 605}
 606
 607#ifdef CONFIG_IPV6_ROUTER_PREF
 608struct __rt6_probe_work {
 609	struct work_struct work;
 610	struct in6_addr target;
 611	struct net_device *dev;
 612};
 613
 614static void rt6_probe_deferred(struct work_struct *w)
 615{
 616	struct in6_addr mcaddr;
 617	struct __rt6_probe_work *work =
 618		container_of(w, struct __rt6_probe_work, work);
 619
 620	addrconf_addr_solict_mult(&work->target, &mcaddr);
 621	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
 622	dev_put(work->dev);
 623	kfree(work);
 624}
 625
 626static void rt6_probe(struct fib6_nh *fib6_nh)
 627{
 628	struct __rt6_probe_work *work = NULL;
 629	const struct in6_addr *nh_gw;
 630	unsigned long last_probe;
 631	struct neighbour *neigh;
 632	struct net_device *dev;
 633	struct inet6_dev *idev;
 634
 635	/*
 636	 * Okay, this does not seem to be appropriate
 637	 * for now, however, we need to check if it
 638	 * is really so; aka Router Reachability Probing.
 639	 *
 640	 * Router Reachability Probe MUST be rate-limited
 641	 * to no more than one per minute.
 642	 */
 643	if (!fib6_nh->fib_nh_gw_family)
 644		return;
 645
 646	nh_gw = &fib6_nh->fib_nh_gw6;
 647	dev = fib6_nh->fib_nh_dev;
 648	rcu_read_lock_bh();
 649	last_probe = READ_ONCE(fib6_nh->last_probe);
 650	idev = __in6_dev_get(dev);
 651	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 652	if (neigh) {
 653		if (neigh->nud_state & NUD_VALID)
 654			goto out;
 655
 656		write_lock(&neigh->lock);
 657		if (!(neigh->nud_state & NUD_VALID) &&
 658		    time_after(jiffies,
 659			       neigh->updated + idev->cnf.rtr_probe_interval)) {
 660			work = kmalloc(sizeof(*work), GFP_ATOMIC);
 661			if (work)
 662				__neigh_set_probe_once(neigh);
 663		}
 664		write_unlock(&neigh->lock);
 665	} else if (time_after(jiffies, last_probe +
 666				       idev->cnf.rtr_probe_interval)) {
 667		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 668	}
 669
 670	if (!work || cmpxchg(&fib6_nh->last_probe,
 671			     last_probe, jiffies) != last_probe) {
 672		kfree(work);
 673	} else {
 674		INIT_WORK(&work->work, rt6_probe_deferred);
 675		work->target = *nh_gw;
 676		dev_hold(dev);
 677		work->dev = dev;
 678		schedule_work(&work->work);
 679	}
 680
 681out:
 682	rcu_read_unlock_bh();
 683}
 684#else
 685static inline void rt6_probe(struct fib6_nh *fib6_nh)
 686{
 687}
 688#endif
 689
 690/*
 691 * Default Router Selection (RFC 2461 6.3.6)
 692 */
 693static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
 
 
 
 
 
 
 
 
 
 
 
 694{
 695	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 696	struct neighbour *neigh;
 
 697
 698	rcu_read_lock_bh();
 699	neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
 700					  &fib6_nh->fib_nh_gw6);
 701	if (neigh) {
 702		read_lock(&neigh->lock);
 
 
 703		if (neigh->nud_state & NUD_VALID)
 704			ret = RT6_NUD_SUCCEED;
 705#ifdef CONFIG_IPV6_ROUTER_PREF
 706		else if (!(neigh->nud_state & NUD_FAILED))
 707			ret = RT6_NUD_SUCCEED;
 
 708		else
 709			ret = RT6_NUD_FAIL_PROBE;
 710#endif
 711		read_unlock(&neigh->lock);
 712	} else {
 713		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 714		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 715	}
 716	rcu_read_unlock_bh();
 717
 718	return ret;
 719}
 720
 721static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 722			   int strict)
 723{
 724	int m = 0;
 725
 726	if (!oif || nh->fib_nh_dev->ifindex == oif)
 727		m = 2;
 728
 
 729	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 730		return RT6_NUD_FAIL_HARD;
 731#ifdef CONFIG_IPV6_ROUTER_PREF
 732	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
 733#endif
 734	if ((strict & RT6_LOOKUP_F_REACHABLE) &&
 735	    !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
 736		int n = rt6_check_neigh(nh);
 737		if (n < 0)
 738			return n;
 739	}
 740	return m;
 741}
 742
 743static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
 744		       int oif, int strict, int *mpri, bool *do_rr)
 745{
 746	bool match_do_rr = false;
 747	bool rc = false;
 748	int m;
 749
 750	if (nh->fib_nh_flags & RTNH_F_DEAD)
 751		goto out;
 752
 753	if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
 754	    nh->fib_nh_flags & RTNH_F_LINKDOWN &&
 755	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 756		goto out;
 757
 758	m = rt6_score_route(nh, fib6_flags, oif, strict);
 759	if (m == RT6_NUD_FAIL_DO_RR) {
 760		match_do_rr = true;
 761		m = 0; /* lowest valid score */
 762	} else if (m == RT6_NUD_FAIL_HARD) {
 763		goto out;
 764	}
 765
 766	if (strict & RT6_LOOKUP_F_REACHABLE)
 767		rt6_probe(nh);
 768
 769	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
 770	if (m > *mpri) {
 771		*do_rr = match_do_rr;
 
 772		*mpri = m;
 773		rc = true;
 
 
 774	}
 
 775out:
 776	return rc;
 777}
 778
 779struct fib6_nh_frl_arg {
 780	u32		flags;
 781	int		oif;
 782	int		strict;
 783	int		*mpri;
 784	bool		*do_rr;
 785	struct fib6_nh	*nh;
 786};
 787
 788static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
 789{
 790	struct fib6_nh_frl_arg *arg = _arg;
 791
 792	arg->nh = nh;
 793	return find_match(nh, arg->flags, arg->oif, arg->strict,
 794			  arg->mpri, arg->do_rr);
 795}
 796
 797static void __find_rr_leaf(struct fib6_info *f6i_start,
 798			   struct fib6_info *nomatch, u32 metric,
 799			   struct fib6_result *res, struct fib6_info **cont,
 800			   int oif, int strict, bool *do_rr, int *mpri)
 801{
 802	struct fib6_info *f6i;
 803
 804	for (f6i = f6i_start;
 805	     f6i && f6i != nomatch;
 806	     f6i = rcu_dereference(f6i->fib6_next)) {
 807		bool matched = false;
 808		struct fib6_nh *nh;
 809
 810		if (cont && f6i->fib6_metric != metric) {
 811			*cont = f6i;
 812			return;
 813		}
 814
 815		if (fib6_check_expired(f6i))
 816			continue;
 817
 818		if (unlikely(f6i->nh)) {
 819			struct fib6_nh_frl_arg arg = {
 820				.flags  = f6i->fib6_flags,
 821				.oif    = oif,
 822				.strict = strict,
 823				.mpri   = mpri,
 824				.do_rr  = do_rr
 825			};
 826
 827			if (nexthop_is_blackhole(f6i->nh)) {
 828				res->fib6_flags = RTF_REJECT;
 829				res->fib6_type = RTN_BLACKHOLE;
 830				res->f6i = f6i;
 831				res->nh = nexthop_fib6_nh(f6i->nh);
 832				return;
 833			}
 834			if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
 835						     &arg)) {
 836				matched = true;
 837				nh = arg.nh;
 838			}
 839		} else {
 840			nh = f6i->fib6_nh;
 841			if (find_match(nh, f6i->fib6_flags, oif, strict,
 842				       mpri, do_rr))
 843				matched = true;
 844		}
 845		if (matched) {
 846			res->f6i = f6i;
 847			res->nh = nh;
 848			res->fib6_flags = f6i->fib6_flags;
 849			res->fib6_type = f6i->fib6_type;
 850		}
 851	}
 852}
 853
 854static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
 855			 struct fib6_info *rr_head, int oif, int strict,
 856			 bool *do_rr, struct fib6_result *res)
 857{
 858	u32 metric = rr_head->fib6_metric;
 859	struct fib6_info *cont = NULL;
 860	int mpri = -1;
 861
 862	__find_rr_leaf(rr_head, NULL, metric, res, &cont,
 863		       oif, strict, do_rr, &mpri);
 
 
 
 
 
 864
 865	__find_rr_leaf(leaf, rr_head, metric, res, &cont,
 866		       oif, strict, do_rr, &mpri);
 867
 868	if (res->f6i || !cont)
 869		return;
 870
 871	__find_rr_leaf(cont, NULL, metric, res, NULL,
 872		       oif, strict, do_rr, &mpri);
 873}
 874
 875static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
 876		       struct fib6_result *res, int strict)
 877{
 878	struct fib6_info *leaf = rcu_dereference(fn->leaf);
 879	struct fib6_info *rt0;
 880	bool do_rr = false;
 881	int key_plen;
 882
 883	/* make sure this function or its helpers sets f6i */
 884	res->f6i = NULL;
 885
 886	if (!leaf || leaf == net->ipv6.fib6_null_entry)
 887		goto out;
 888
 889	rt0 = rcu_dereference(fn->rr_ptr);
 890	if (!rt0)
 891		rt0 = leaf;
 892
 893	/* Double check to make sure fn is not an intermediate node
 894	 * and fn->leaf does not points to its child's leaf
 895	 * (This might happen if all routes under fn are deleted from
 896	 * the tree and fib6_repair_tree() is called on the node.)
 897	 */
 898	key_plen = rt0->fib6_dst.plen;
 899#ifdef CONFIG_IPV6_SUBTREES
 900	if (rt0->fib6_src.plen)
 901		key_plen = rt0->fib6_src.plen;
 902#endif
 903	if (fn->fn_bit != key_plen)
 904		goto out;
 905
 906	find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
 907	if (do_rr) {
 908		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
 909
 910		/* no entries matched; do round-robin */
 911		if (!next || next->fib6_metric != rt0->fib6_metric)
 912			next = leaf;
 913
 914		if (next != rt0) {
 915			spin_lock_bh(&leaf->fib6_table->tb6_lock);
 916			/* make sure next is not being deleted from the tree */
 917			if (next->fib6_node)
 918				rcu_assign_pointer(fn->rr_ptr, next);
 919			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
 920		}
 921	}
 922
 923out:
 924	if (!res->f6i) {
 925		res->f6i = net->ipv6.fib6_null_entry;
 926		res->nh = res->f6i->fib6_nh;
 927		res->fib6_flags = res->f6i->fib6_flags;
 928		res->fib6_type = res->f6i->fib6_type;
 929	}
 930}
 931
 932static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
 933{
 934	return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
 935	       res->nh->fib_nh_gw_family;
 936}
 937
 938#ifdef CONFIG_IPV6_ROUTE_INFO
 939int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 940		  const struct in6_addr *gwaddr)
 941{
 942	struct net *net = dev_net(dev);
 943	struct route_info *rinfo = (struct route_info *) opt;
 944	struct in6_addr prefix_buf, *prefix;
 945	unsigned int pref;
 946	unsigned long lifetime;
 947	struct fib6_info *rt;
 948
 949	if (len < sizeof(struct route_info)) {
 950		return -EINVAL;
 951	}
 952
 953	/* Sanity check for prefix_len and length */
 954	if (rinfo->length > 3) {
 955		return -EINVAL;
 956	} else if (rinfo->prefix_len > 128) {
 957		return -EINVAL;
 958	} else if (rinfo->prefix_len > 64) {
 959		if (rinfo->length < 2) {
 960			return -EINVAL;
 961		}
 962	} else if (rinfo->prefix_len > 0) {
 963		if (rinfo->length < 1) {
 964			return -EINVAL;
 965		}
 966	}
 967
 968	pref = rinfo->route_pref;
 969	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 970		return -EINVAL;
 971
 972	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 973
 974	if (rinfo->length == 3)
 975		prefix = (struct in6_addr *)rinfo->prefix;
 976	else {
 977		/* this function is safe */
 978		ipv6_addr_prefix(&prefix_buf,
 979				 (struct in6_addr *)rinfo->prefix,
 980				 rinfo->prefix_len);
 981		prefix = &prefix_buf;
 982	}
 983
 984	if (rinfo->prefix_len == 0)
 985		rt = rt6_get_dflt_router(net, gwaddr, dev);
 986	else
 987		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 988					gwaddr, dev);
 989
 990	if (rt && !lifetime) {
 991		ip6_del_rt(net, rt, false);
 992		rt = NULL;
 993	}
 994
 995	if (!rt && lifetime)
 996		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 997					dev, pref);
 998	else if (rt)
 999		rt->fib6_flags = RTF_ROUTEINFO |
1000				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
1001
1002	if (rt) {
1003		if (!addrconf_finite_timeout(lifetime))
1004			fib6_clean_expires(rt);
1005		else
1006			fib6_set_expires(rt, jiffies + HZ * lifetime);
1007
1008		fib6_info_release(rt);
 
1009	}
1010	return 0;
1011}
1012#endif
1013
1014/*
1015 *	Misc support functions
1016 */
1017
1018/* called with rcu_lock held */
1019static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
1020{
1021	struct net_device *dev = res->nh->fib_nh_dev;
1022
1023	if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
1024		/* for copies of local routes, dst->dev needs to be the
1025		 * device if it is a master device, the master device if
1026		 * device is enslaved, and the loopback as the default
1027		 */
1028		if (netif_is_l3_slave(dev) &&
1029		    !rt6_need_strict(&res->f6i->fib6_dst.addr))
1030			dev = l3mdev_master_dev_rcu(dev);
1031		else if (!netif_is_l3_master(dev))
1032			dev = dev_net(dev)->loopback_dev;
1033		/* last case is netif_is_l3_master(dev) is true in which
1034		 * case we want dev returned to be dev
1035		 */
1036	}
1037
1038	return dev;
1039}
1040
1041static const int fib6_prop[RTN_MAX + 1] = {
1042	[RTN_UNSPEC]	= 0,
1043	[RTN_UNICAST]	= 0,
1044	[RTN_LOCAL]	= 0,
1045	[RTN_BROADCAST]	= 0,
1046	[RTN_ANYCAST]	= 0,
1047	[RTN_MULTICAST]	= 0,
1048	[RTN_BLACKHOLE]	= -EINVAL,
1049	[RTN_UNREACHABLE] = -EHOSTUNREACH,
1050	[RTN_PROHIBIT]	= -EACCES,
1051	[RTN_THROW]	= -EAGAIN,
1052	[RTN_NAT]	= -EINVAL,
1053	[RTN_XRESOLVE]	= -EINVAL,
1054};
1055
1056static int ip6_rt_type_to_error(u8 fib6_type)
1057{
1058	return fib6_prop[fib6_type];
1059}
1060
1061static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
1062{
1063	unsigned short flags = 0;
1064
1065	if (rt->dst_nocount)
1066		flags |= DST_NOCOUNT;
1067	if (rt->dst_nopolicy)
1068		flags |= DST_NOPOLICY;
1069
1070	return flags;
1071}
1072
1073static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
1074{
1075	rt->dst.error = ip6_rt_type_to_error(fib6_type);
1076
1077	switch (fib6_type) {
1078	case RTN_BLACKHOLE:
1079		rt->dst.output = dst_discard_out;
1080		rt->dst.input = dst_discard;
1081		break;
1082	case RTN_PROHIBIT:
1083		rt->dst.output = ip6_pkt_prohibit_out;
1084		rt->dst.input = ip6_pkt_prohibit;
1085		break;
1086	case RTN_THROW:
1087	case RTN_UNREACHABLE:
1088	default:
1089		rt->dst.output = ip6_pkt_discard_out;
1090		rt->dst.input = ip6_pkt_discard;
1091		break;
1092	}
1093}
1094
1095static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
1096{
1097	struct fib6_info *f6i = res->f6i;
1098
1099	if (res->fib6_flags & RTF_REJECT) {
1100		ip6_rt_init_dst_reject(rt, res->fib6_type);
1101		return;
1102	}
1103
1104	rt->dst.error = 0;
1105	rt->dst.output = ip6_output;
1106
1107	if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
1108		rt->dst.input = ip6_input;
1109	} else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
1110		rt->dst.input = ip6_mc_input;
1111	} else {
1112		rt->dst.input = ip6_forward;
1113	}
1114
1115	if (res->nh->fib_nh_lws) {
1116		rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
1117		lwtunnel_set_redirect(&rt->dst);
1118	}
1119
1120	rt->dst.lastuse = jiffies;
1121}
1122
1123/* Caller must already hold reference to @from */
1124static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
1125{
1126	rt->rt6i_flags &= ~RTF_EXPIRES;
1127	rcu_assign_pointer(rt->from, from);
1128	ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
1129}
1130
1131/* Caller must already hold reference to f6i in result */
1132static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
1133{
1134	const struct fib6_nh *nh = res->nh;
1135	const struct net_device *dev = nh->fib_nh_dev;
1136	struct fib6_info *f6i = res->f6i;
1137
1138	ip6_rt_init_dst(rt, res);
1139
1140	rt->rt6i_dst = f6i->fib6_dst;
1141	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
1142	rt->rt6i_flags = res->fib6_flags;
1143	if (nh->fib_nh_gw_family) {
1144		rt->rt6i_gateway = nh->fib_nh_gw6;
1145		rt->rt6i_flags |= RTF_GATEWAY;
1146	}
1147	rt6_set_from(rt, f6i);
1148#ifdef CONFIG_IPV6_SUBTREES
1149	rt->rt6i_src = f6i->fib6_src;
1150#endif
1151}
1152
1153static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1154					struct in6_addr *saddr)
1155{
1156	struct fib6_node *pn, *sn;
1157	while (1) {
1158		if (fn->fn_flags & RTN_TL_ROOT)
1159			return NULL;
1160		pn = rcu_dereference(fn->parent);
1161		sn = FIB6_SUBTREE(pn);
1162		if (sn && sn != fn)
1163			fn = fib6_node_lookup(sn, NULL, saddr);
1164		else
1165			fn = pn;
1166		if (fn->fn_flags & RTN_RTINFO)
1167			return fn;
1168	}
1169}
1170
1171static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1172{
1173	struct rt6_info *rt = *prt;
1174
1175	if (dst_hold_safe(&rt->dst))
1176		return true;
1177	if (net) {
1178		rt = net->ipv6.ip6_null_entry;
1179		dst_hold(&rt->dst);
1180	} else {
1181		rt = NULL;
1182	}
1183	*prt = rt;
1184	return false;
1185}
1186
1187/* called with rcu_lock held */
1188static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
1189{
1190	struct net_device *dev = res->nh->fib_nh_dev;
1191	struct fib6_info *f6i = res->f6i;
1192	unsigned short flags;
1193	struct rt6_info *nrt;
1194
1195	if (!fib6_info_hold_safe(f6i))
1196		goto fallback;
1197
1198	flags = fib6_info_dst_flags(f6i);
1199	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1200	if (!nrt) {
1201		fib6_info_release(f6i);
1202		goto fallback;
1203	}
1204
1205	ip6_rt_copy_init(nrt, res);
1206	return nrt;
1207
1208fallback:
1209	nrt = dev_net(dev)->ipv6.ip6_null_entry;
1210	dst_hold(&nrt->dst);
1211	return nrt;
1212}
1213
1214INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
1215					     struct fib6_table *table,
1216					     struct flowi6 *fl6,
1217					     const struct sk_buff *skb,
1218					     int flags)
1219{
1220	struct fib6_result res = {};
1221	struct fib6_node *fn;
1222	struct rt6_info *rt;
1223
1224	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1225		flags &= ~RT6_LOOKUP_F_IFACE;
1226
1227	rcu_read_lock();
1228	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1229restart:
1230	res.f6i = rcu_dereference(fn->leaf);
1231	if (!res.f6i)
1232		res.f6i = net->ipv6.fib6_null_entry;
1233	else
1234		rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
1235				 flags);
1236
1237	if (res.f6i == net->ipv6.fib6_null_entry) {
1238		fn = fib6_backtrack(fn, &fl6->saddr);
1239		if (fn)
1240			goto restart;
1241
1242		rt = net->ipv6.ip6_null_entry;
1243		dst_hold(&rt->dst);
1244		goto out;
1245	} else if (res.fib6_flags & RTF_REJECT) {
1246		goto do_create;
1247	}
1248
1249	fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1250			 fl6->flowi6_oif != 0, skb, flags);
1251
1252	/* Search through exception table */
1253	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
1254	if (rt) {
1255		if (ip6_hold_safe(net, &rt))
1256			dst_use_noref(&rt->dst, jiffies);
1257	} else {
1258do_create:
1259		rt = ip6_create_rt_rcu(&res);
1260	}
1261
1262out:
1263	trace_fib6_table_lookup(net, &res, table, fl6);
1264
1265	rcu_read_unlock();
1266
1267	return rt;
1268}
1269
1270struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1271				   const struct sk_buff *skb, int flags)
1272{
1273	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1274}
1275EXPORT_SYMBOL_GPL(ip6_route_lookup);
1276
1277struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1278			    const struct in6_addr *saddr, int oif,
1279			    const struct sk_buff *skb, int strict)
1280{
1281	struct flowi6 fl6 = {
1282		.flowi6_oif = oif,
1283		.daddr = *daddr,
1284	};
1285	struct dst_entry *dst;
1286	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1287
1288	if (saddr) {
1289		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1290		flags |= RT6_LOOKUP_F_HAS_SADDR;
1291	}
1292
1293	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1294	if (dst->error == 0)
1295		return (struct rt6_info *) dst;
1296
1297	dst_release(dst);
1298
1299	return NULL;
1300}
 
1301EXPORT_SYMBOL(rt6_lookup);
1302
1303/* ip6_ins_rt is called with FREE table->tb6_lock.
1304 * It takes new route entry, the addition fails by any reason the
1305 * route is released.
1306 * Caller must hold dst before calling it.
1307 */
1308
1309static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1310			struct netlink_ext_ack *extack)
1311{
1312	int err;
1313	struct fib6_table *table;
1314
1315	table = rt->fib6_table;
1316	spin_lock_bh(&table->tb6_lock);
1317	err = fib6_add(&table->tb6_root, rt, info, extack);
1318	spin_unlock_bh(&table->tb6_lock);
1319
1320	return err;
1321}
1322
1323int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1324{
1325	struct nl_info info = {	.nl_net = net, };
1326
1327	return __ip6_ins_rt(rt, &info, NULL);
 
1328}
1329
1330static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
1331					   const struct in6_addr *daddr,
1332					   const struct in6_addr *saddr)
1333{
1334	struct fib6_info *f6i = res->f6i;
1335	struct net_device *dev;
1336	struct rt6_info *rt;
1337
1338	/*
1339	 *	Clone the route.
1340	 */
1341
1342	if (!fib6_info_hold_safe(f6i))
1343		return NULL;
 
 
 
1344
1345	dev = ip6_rt_get_dev_rcu(res);
1346	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1347	if (!rt) {
1348		fib6_info_release(f6i);
1349		return NULL;
1350	}
1351
1352	ip6_rt_copy_init(rt, res);
1353	rt->rt6i_flags |= RTF_CACHE;
1354	rt->rt6i_dst.addr = *daddr;
1355	rt->rt6i_dst.plen = 128;
1356
1357	if (!rt6_is_gw_or_nonexthop(res)) {
1358		if (f6i->fib6_dst.plen != 128 &&
1359		    ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
1360			rt->rt6i_flags |= RTF_ANYCAST;
1361#ifdef CONFIG_IPV6_SUBTREES
1362		if (rt->rt6i_src.plen && saddr) {
1363			rt->rt6i_src.addr = *saddr;
1364			rt->rt6i_src.plen = 128;
1365		}
1366#endif
1367	}
1368
1369	return rt;
1370}
1371
1372static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
1373{
1374	struct fib6_info *f6i = res->f6i;
1375	unsigned short flags = fib6_info_dst_flags(f6i);
1376	struct net_device *dev;
1377	struct rt6_info *pcpu_rt;
1378
1379	if (!fib6_info_hold_safe(f6i))
1380		return NULL;
1381
1382	rcu_read_lock();
1383	dev = ip6_rt_get_dev_rcu(res);
1384	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags | DST_NOCOUNT);
1385	rcu_read_unlock();
1386	if (!pcpu_rt) {
1387		fib6_info_release(f6i);
1388		return NULL;
1389	}
1390	ip6_rt_copy_init(pcpu_rt, res);
1391	pcpu_rt->rt6i_flags |= RTF_PCPU;
1392
1393	if (f6i->nh)
1394		pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev));
1395
1396	return pcpu_rt;
1397}
1398
1399static bool rt6_is_valid(const struct rt6_info *rt6)
1400{
1401	return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev));
1402}
1403
1404/* It should be called with rcu_read_lock() acquired */
1405static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
1406{
1407	struct rt6_info *pcpu_rt;
1408
1409	pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
1410
1411	if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) {
1412		struct rt6_info *prev, **p;
1413
1414		p = this_cpu_ptr(res->nh->rt6i_pcpu);
1415		prev = xchg(p, NULL);
1416		if (prev) {
1417			dst_dev_put(&prev->dst);
1418			dst_release(&prev->dst);
1419		}
 
1420
1421		pcpu_rt = NULL;
1422	}
1423
1424	return pcpu_rt;
1425}
1426
1427static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1428					    const struct fib6_result *res)
1429{
1430	struct rt6_info *pcpu_rt, *prev, **p;
1431
1432	pcpu_rt = ip6_rt_pcpu_alloc(res);
1433	if (!pcpu_rt)
1434		return NULL;
1435
1436	p = this_cpu_ptr(res->nh->rt6i_pcpu);
1437	prev = cmpxchg(p, NULL, pcpu_rt);
1438	BUG_ON(prev);
1439
1440	if (res->f6i->fib6_destroying) {
1441		struct fib6_info *from;
1442
1443		from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
1444		fib6_info_release(from);
1445	}
1446
1447	return pcpu_rt;
1448}
1449
1450/* exception hash table implementation
1451 */
1452static DEFINE_SPINLOCK(rt6_exception_lock);
1453
1454/* Remove rt6_ex from hash table and free the memory
1455 * Caller must hold rt6_exception_lock
1456 */
1457static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1458				 struct rt6_exception *rt6_ex)
1459{
1460	struct fib6_info *from;
1461	struct net *net;
 
 
 
 
1462
1463	if (!bucket || !rt6_ex)
1464		return;
1465
1466	net = dev_net(rt6_ex->rt6i->dst.dev);
1467	net->ipv6.rt6_stats->fib_rt_cache--;
1468
1469	/* purge completely the exception to allow releasing the held resources:
1470	 * some [sk] cache may keep the dst around for unlimited time
1471	 */
1472	from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
1473	fib6_info_release(from);
1474	dst_dev_put(&rt6_ex->rt6i->dst);
1475
1476	hlist_del_rcu(&rt6_ex->hlist);
1477	dst_release(&rt6_ex->rt6i->dst);
1478	kfree_rcu(rt6_ex, rcu);
1479	WARN_ON_ONCE(!bucket->depth);
1480	bucket->depth--;
1481}
1482
1483/* Remove oldest rt6_ex in bucket and free the memory
1484 * Caller must hold rt6_exception_lock
1485 */
1486static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1487{
1488	struct rt6_exception *rt6_ex, *oldest = NULL;
1489
1490	if (!bucket)
1491		return;
1492
1493	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1494		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1495			oldest = rt6_ex;
1496	}
1497	rt6_remove_exception(bucket, oldest);
1498}
1499
1500static u32 rt6_exception_hash(const struct in6_addr *dst,
1501			      const struct in6_addr *src)
1502{
1503	static u32 seed __read_mostly;
1504	u32 val;
1505
1506	net_get_random_once(&seed, sizeof(seed));
1507	val = jhash2((const u32 *)dst, sizeof(*dst)/sizeof(u32), seed);
1508
1509#ifdef CONFIG_IPV6_SUBTREES
1510	if (src)
1511		val = jhash2((const u32 *)src, sizeof(*src)/sizeof(u32), val);
1512#endif
1513	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1514}
1515
1516/* Helper function to find the cached rt in the hash table
1517 * and update bucket pointer to point to the bucket for this
1518 * (daddr, saddr) pair
1519 * Caller must hold rt6_exception_lock
1520 */
1521static struct rt6_exception *
1522__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1523			      const struct in6_addr *daddr,
1524			      const struct in6_addr *saddr)
1525{
1526	struct rt6_exception *rt6_ex;
1527	u32 hval;
1528
1529	if (!(*bucket) || !daddr)
1530		return NULL;
1531
1532	hval = rt6_exception_hash(daddr, saddr);
1533	*bucket += hval;
1534
1535	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1536		struct rt6_info *rt6 = rt6_ex->rt6i;
1537		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1538
1539#ifdef CONFIG_IPV6_SUBTREES
1540		if (matched && saddr)
1541			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1542#endif
1543		if (matched)
1544			return rt6_ex;
1545	}
1546	return NULL;
1547}
1548
1549/* Helper function to find the cached rt in the hash table
1550 * and update bucket pointer to point to the bucket for this
1551 * (daddr, saddr) pair
1552 * Caller must hold rcu_read_lock()
1553 */
1554static struct rt6_exception *
1555__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1556			 const struct in6_addr *daddr,
1557			 const struct in6_addr *saddr)
1558{
1559	struct rt6_exception *rt6_ex;
1560	u32 hval;
1561
1562	WARN_ON_ONCE(!rcu_read_lock_held());
1563
1564	if (!(*bucket) || !daddr)
1565		return NULL;
1566
1567	hval = rt6_exception_hash(daddr, saddr);
1568	*bucket += hval;
1569
1570	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1571		struct rt6_info *rt6 = rt6_ex->rt6i;
1572		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1573
1574#ifdef CONFIG_IPV6_SUBTREES
1575		if (matched && saddr)
1576			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1577#endif
1578		if (matched)
1579			return rt6_ex;
1580	}
1581	return NULL;
1582}
1583
1584static unsigned int fib6_mtu(const struct fib6_result *res)
1585{
1586	const struct fib6_nh *nh = res->nh;
1587	unsigned int mtu;
1588
1589	if (res->f6i->fib6_pmtu) {
1590		mtu = res->f6i->fib6_pmtu;
1591	} else {
1592		struct net_device *dev = nh->fib_nh_dev;
1593		struct inet6_dev *idev;
1594
1595		rcu_read_lock();
1596		idev = __in6_dev_get(dev);
1597		mtu = idev->cnf.mtu6;
1598		rcu_read_unlock();
1599	}
1600
1601	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1602
1603	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
1604}
1605
1606#define FIB6_EXCEPTION_BUCKET_FLUSHED  0x1UL
1607
1608/* used when the flushed bit is not relevant, only access to the bucket
1609 * (ie., all bucket users except rt6_insert_exception);
1610 *
1611 * called under rcu lock; sometimes called with rt6_exception_lock held
1612 */
1613static
1614struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
1615						       spinlock_t *lock)
1616{
1617	struct rt6_exception_bucket *bucket;
1618
1619	if (lock)
1620		bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1621						   lockdep_is_held(lock));
1622	else
1623		bucket = rcu_dereference(nh->rt6i_exception_bucket);
1624
1625	/* remove bucket flushed bit if set */
1626	if (bucket) {
1627		unsigned long p = (unsigned long)bucket;
1628
1629		p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
1630		bucket = (struct rt6_exception_bucket *)p;
 
 
 
1631	}
1632
1633	return bucket;
1634}
1635
1636static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
1637{
1638	unsigned long p = (unsigned long)bucket;
1639
1640	return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
1641}
1642
1643/* called with rt6_exception_lock held */
1644static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
1645					      spinlock_t *lock)
1646{
1647	struct rt6_exception_bucket *bucket;
1648	unsigned long p;
1649
1650	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1651					   lockdep_is_held(lock));
1652
1653	p = (unsigned long)bucket;
1654	p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
1655	bucket = (struct rt6_exception_bucket *)p;
1656	rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1657}
1658
1659static int rt6_insert_exception(struct rt6_info *nrt,
1660				const struct fib6_result *res)
1661{
1662	struct net *net = dev_net(nrt->dst.dev);
1663	struct rt6_exception_bucket *bucket;
1664	struct fib6_info *f6i = res->f6i;
1665	struct in6_addr *src_key = NULL;
1666	struct rt6_exception *rt6_ex;
1667	struct fib6_nh *nh = res->nh;
1668	int err = 0;
1669
1670	spin_lock_bh(&rt6_exception_lock);
1671
1672	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1673					  lockdep_is_held(&rt6_exception_lock));
1674	if (!bucket) {
1675		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1676				 GFP_ATOMIC);
1677		if (!bucket) {
1678			err = -ENOMEM;
1679			goto out;
1680		}
1681		rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1682	} else if (fib6_nh_excptn_bucket_flushed(bucket)) {
1683		err = -EINVAL;
1684		goto out;
1685	}
1686
1687#ifdef CONFIG_IPV6_SUBTREES
1688	/* fib6_src.plen != 0 indicates f6i is in subtree
1689	 * and exception table is indexed by a hash of
1690	 * both fib6_dst and fib6_src.
1691	 * Otherwise, the exception table is indexed by
1692	 * a hash of only fib6_dst.
1693	 */
1694	if (f6i->fib6_src.plen)
1695		src_key = &nrt->rt6i_src.addr;
1696#endif
1697	/* rt6_mtu_change() might lower mtu on f6i.
1698	 * Only insert this exception route if its mtu
1699	 * is less than f6i's mtu value.
1700	 */
1701	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
1702		err = -EINVAL;
1703		goto out;
1704	}
1705
1706	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1707					       src_key);
1708	if (rt6_ex)
1709		rt6_remove_exception(bucket, rt6_ex);
1710
1711	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1712	if (!rt6_ex) {
1713		err = -ENOMEM;
1714		goto out;
1715	}
1716	rt6_ex->rt6i = nrt;
1717	rt6_ex->stamp = jiffies;
1718	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1719	bucket->depth++;
1720	net->ipv6.rt6_stats->fib_rt_cache++;
1721
1722	if (bucket->depth > FIB6_MAX_DEPTH)
1723		rt6_exception_remove_oldest(bucket);
1724
1725out:
1726	spin_unlock_bh(&rt6_exception_lock);
1727
1728	/* Update fn->fn_sernum to invalidate all cached dst */
1729	if (!err) {
1730		spin_lock_bh(&f6i->fib6_table->tb6_lock);
1731		fib6_update_sernum(net, f6i);
1732		spin_unlock_bh(&f6i->fib6_table->tb6_lock);
1733		fib6_force_start_gc(net);
1734	}
1735
1736	return err;
1737}
1738
1739static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
1740{
1741	struct rt6_exception_bucket *bucket;
1742	struct rt6_exception *rt6_ex;
1743	struct hlist_node *tmp;
1744	int i;
1745
1746	spin_lock_bh(&rt6_exception_lock);
1747
1748	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1749	if (!bucket)
1750		goto out;
1751
1752	/* Prevent rt6_insert_exception() to recreate the bucket list */
1753	if (!from)
1754		fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
1755
1756	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1757		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
1758			if (!from ||
1759			    rcu_access_pointer(rt6_ex->rt6i->from) == from)
1760				rt6_remove_exception(bucket, rt6_ex);
1761		}
1762		WARN_ON_ONCE(!from && bucket->depth);
1763		bucket++;
1764	}
1765out:
1766	spin_unlock_bh(&rt6_exception_lock);
1767}
1768
1769static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
1770{
1771	struct fib6_info *f6i = arg;
1772
1773	fib6_nh_flush_exceptions(nh, f6i);
1774
1775	return 0;
1776}
1777
1778void rt6_flush_exceptions(struct fib6_info *f6i)
1779{
1780	if (f6i->nh)
1781		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
1782					 f6i);
1783	else
1784		fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
1785}
1786
1787/* Find cached rt in the hash table inside passed in rt
1788 * Caller has to hold rcu_read_lock()
1789 */
1790static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
1791					   const struct in6_addr *daddr,
1792					   const struct in6_addr *saddr)
1793{
1794	const struct in6_addr *src_key = NULL;
1795	struct rt6_exception_bucket *bucket;
1796	struct rt6_exception *rt6_ex;
1797	struct rt6_info *ret = NULL;
1798
1799#ifdef CONFIG_IPV6_SUBTREES
1800	/* fib6i_src.plen != 0 indicates f6i is in subtree
1801	 * and exception table is indexed by a hash of
1802	 * both fib6_dst and fib6_src.
1803	 * However, the src addr used to create the hash
1804	 * might not be exactly the passed in saddr which
1805	 * is a /128 addr from the flow.
1806	 * So we need to use f6i->fib6_src to redo lookup
1807	 * if the passed in saddr does not find anything.
1808	 * (See the logic in ip6_rt_cache_alloc() on how
1809	 * rt->rt6i_src is updated.)
1810	 */
1811	if (res->f6i->fib6_src.plen)
1812		src_key = saddr;
1813find_ex:
1814#endif
1815	bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
1816	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1817
1818	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1819		ret = rt6_ex->rt6i;
1820
1821#ifdef CONFIG_IPV6_SUBTREES
1822	/* Use fib6_src as src_key and redo lookup */
1823	if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
1824		src_key = &res->f6i->fib6_src.addr;
1825		goto find_ex;
1826	}
1827#endif
1828
1829	return ret;
1830}
1831
1832/* Remove the passed in cached rt from the hash table that contains it */
1833static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
1834				    const struct rt6_info *rt)
1835{
1836	const struct in6_addr *src_key = NULL;
1837	struct rt6_exception_bucket *bucket;
1838	struct rt6_exception *rt6_ex;
1839	int err;
1840
1841	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
1842		return -ENOENT;
1843
1844	spin_lock_bh(&rt6_exception_lock);
1845	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1846
1847#ifdef CONFIG_IPV6_SUBTREES
1848	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1849	 * and exception table is indexed by a hash of
1850	 * both rt6i_dst and rt6i_src.
1851	 * Otherwise, the exception table is indexed by
1852	 * a hash of only rt6i_dst.
1853	 */
1854	if (plen)
1855		src_key = &rt->rt6i_src.addr;
1856#endif
1857	rt6_ex = __rt6_find_exception_spinlock(&bucket,
1858					       &rt->rt6i_dst.addr,
1859					       src_key);
1860	if (rt6_ex) {
1861		rt6_remove_exception(bucket, rt6_ex);
1862		err = 0;
1863	} else {
1864		err = -ENOENT;
1865	}
1866
1867	spin_unlock_bh(&rt6_exception_lock);
1868	return err;
1869}
1870
1871struct fib6_nh_excptn_arg {
1872	struct rt6_info	*rt;
1873	int		plen;
1874};
1875
1876static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
1877{
1878	struct fib6_nh_excptn_arg *arg = _arg;
1879	int err;
1880
1881	err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
1882	if (err == 0)
1883		return 1;
1884
1885	return 0;
1886}
1887
1888static int rt6_remove_exception_rt(struct rt6_info *rt)
1889{
1890	struct fib6_info *from;
1891
1892	from = rcu_dereference(rt->from);
1893	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1894		return -EINVAL;
1895
1896	if (from->nh) {
1897		struct fib6_nh_excptn_arg arg = {
1898			.rt = rt,
1899			.plen = from->fib6_src.plen
1900		};
1901		int rc;
1902
1903		/* rc = 1 means an entry was found */
1904		rc = nexthop_for_each_fib6_nh(from->nh,
1905					      rt6_nh_remove_exception_rt,
1906					      &arg);
1907		return rc ? 0 : -ENOENT;
1908	}
1909
1910	return fib6_nh_remove_exception(from->fib6_nh,
1911					from->fib6_src.plen, rt);
1912}
1913
1914/* Find rt6_ex which contains the passed in rt cache and
1915 * refresh its stamp
1916 */
1917static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
1918				     const struct rt6_info *rt)
1919{
1920	const struct in6_addr *src_key = NULL;
1921	struct rt6_exception_bucket *bucket;
1922	struct rt6_exception *rt6_ex;
1923
1924	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
1925#ifdef CONFIG_IPV6_SUBTREES
1926	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1927	 * and exception table is indexed by a hash of
1928	 * both rt6i_dst and rt6i_src.
1929	 * Otherwise, the exception table is indexed by
1930	 * a hash of only rt6i_dst.
1931	 */
1932	if (plen)
1933		src_key = &rt->rt6i_src.addr;
1934#endif
1935	rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
1936	if (rt6_ex)
1937		rt6_ex->stamp = jiffies;
1938}
1939
1940struct fib6_nh_match_arg {
1941	const struct net_device *dev;
1942	const struct in6_addr	*gw;
1943	struct fib6_nh		*match;
1944};
1945
1946/* determine if fib6_nh has given device and gateway */
1947static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
1948{
1949	struct fib6_nh_match_arg *arg = _arg;
1950
1951	if (arg->dev != nh->fib_nh_dev ||
1952	    (arg->gw && !nh->fib_nh_gw_family) ||
1953	    (!arg->gw && nh->fib_nh_gw_family) ||
1954	    (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
1955		return 0;
1956
1957	arg->match = nh;
1958
1959	/* found a match, break the loop */
1960	return 1;
1961}
1962
1963static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1964{
1965	struct fib6_info *from;
1966	struct fib6_nh *fib6_nh;
1967
1968	rcu_read_lock();
1969
1970	from = rcu_dereference(rt->from);
1971	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1972		goto unlock;
1973
1974	if (from->nh) {
1975		struct fib6_nh_match_arg arg = {
1976			.dev = rt->dst.dev,
1977			.gw = &rt->rt6i_gateway,
1978		};
1979
1980		nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
1981
1982		if (!arg.match)
1983			goto unlock;
1984		fib6_nh = arg.match;
1985	} else {
1986		fib6_nh = from->fib6_nh;
1987	}
1988	fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
1989unlock:
1990	rcu_read_unlock();
1991}
1992
1993static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1994					 struct rt6_info *rt, int mtu)
1995{
1996	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1997	 * lowest MTU in the path: always allow updating the route PMTU to
1998	 * reflect PMTU decreases.
1999	 *
2000	 * If the new MTU is higher, and the route PMTU is equal to the local
2001	 * MTU, this means the old MTU is the lowest in the path, so allow
2002	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
2003	 * handle this.
2004	 */
2005
2006	if (dst_mtu(&rt->dst) >= mtu)
2007		return true;
2008
2009	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
2010		return true;
2011
2012	return false;
2013}
2014
2015static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
2016				       const struct fib6_nh *nh, int mtu)
2017{
2018	struct rt6_exception_bucket *bucket;
2019	struct rt6_exception *rt6_ex;
2020	int i;
2021
2022	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2023	if (!bucket)
2024		return;
2025
2026	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2027		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
2028			struct rt6_info *entry = rt6_ex->rt6i;
2029
2030			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
2031			 * route), the metrics of its rt->from have already
2032			 * been updated.
2033			 */
2034			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
2035			    rt6_mtu_change_route_allowed(idev, entry, mtu))
2036				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
2037		}
2038		bucket++;
2039	}
2040}
2041
2042#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2043
2044static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
2045					    const struct in6_addr *gateway)
2046{
2047	struct rt6_exception_bucket *bucket;
2048	struct rt6_exception *rt6_ex;
2049	struct hlist_node *tmp;
2050	int i;
2051
2052	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2053		return;
2054
2055	spin_lock_bh(&rt6_exception_lock);
2056	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2057	if (bucket) {
2058		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2059			hlist_for_each_entry_safe(rt6_ex, tmp,
2060						  &bucket->chain, hlist) {
2061				struct rt6_info *entry = rt6_ex->rt6i;
2062
2063				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
2064				    RTF_CACHE_GATEWAY &&
2065				    ipv6_addr_equal(gateway,
2066						    &entry->rt6i_gateway)) {
2067					rt6_remove_exception(bucket, rt6_ex);
2068				}
2069			}
2070			bucket++;
2071		}
2072	}
2073
2074	spin_unlock_bh(&rt6_exception_lock);
2075}
2076
2077static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
2078				      struct rt6_exception *rt6_ex,
2079				      struct fib6_gc_args *gc_args,
2080				      unsigned long now)
2081{
2082	struct rt6_info *rt = rt6_ex->rt6i;
2083
2084	/* we are pruning and obsoleting aged-out and non gateway exceptions
2085	 * even if others have still references to them, so that on next
2086	 * dst_check() such references can be dropped.
2087	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
2088	 * expired, independently from their aging, as per RFC 8201 section 4
2089	 */
2090	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
2091		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
2092			RT6_TRACE("aging clone %p\n", rt);
2093			rt6_remove_exception(bucket, rt6_ex);
2094			return;
2095		}
2096	} else if (time_after(jiffies, rt->dst.expires)) {
2097		RT6_TRACE("purging expired route %p\n", rt);
2098		rt6_remove_exception(bucket, rt6_ex);
2099		return;
2100	}
2101
2102	if (rt->rt6i_flags & RTF_GATEWAY) {
2103		struct neighbour *neigh;
2104		__u8 neigh_flags = 0;
2105
2106		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
2107		if (neigh)
2108			neigh_flags = neigh->flags;
2109
2110		if (!(neigh_flags & NTF_ROUTER)) {
2111			RT6_TRACE("purging route %p via non-router but gateway\n",
2112				  rt);
2113			rt6_remove_exception(bucket, rt6_ex);
2114			return;
2115		}
2116	}
2117
2118	gc_args->more++;
2119}
2120
2121static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
2122				   struct fib6_gc_args *gc_args,
2123				   unsigned long now)
2124{
2125	struct rt6_exception_bucket *bucket;
2126	struct rt6_exception *rt6_ex;
2127	struct hlist_node *tmp;
2128	int i;
2129
2130	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2131		return;
2132
2133	rcu_read_lock_bh();
2134	spin_lock(&rt6_exception_lock);
2135	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2136	if (bucket) {
2137		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2138			hlist_for_each_entry_safe(rt6_ex, tmp,
2139						  &bucket->chain, hlist) {
2140				rt6_age_examine_exception(bucket, rt6_ex,
2141							  gc_args, now);
2142			}
2143			bucket++;
2144		}
2145	}
2146	spin_unlock(&rt6_exception_lock);
2147	rcu_read_unlock_bh();
2148}
2149
2150struct fib6_nh_age_excptn_arg {
2151	struct fib6_gc_args	*gc_args;
2152	unsigned long		now;
2153};
2154
2155static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
2156{
2157	struct fib6_nh_age_excptn_arg *arg = _arg;
2158
2159	fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
2160	return 0;
2161}
2162
2163void rt6_age_exceptions(struct fib6_info *f6i,
2164			struct fib6_gc_args *gc_args,
2165			unsigned long now)
2166{
2167	if (f6i->nh) {
2168		struct fib6_nh_age_excptn_arg arg = {
2169			.gc_args = gc_args,
2170			.now = now
2171		};
2172
2173		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
2174					 &arg);
2175	} else {
2176		fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
2177	}
2178}
2179
2180/* must be called with rcu lock held */
2181int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
2182		      struct flowi6 *fl6, struct fib6_result *res, int strict)
2183{
2184	struct fib6_node *fn, *saved_fn;
2185
2186	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2187	saved_fn = fn;
2188
2189	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
2190		oif = 0;
2191
2192redo_rt6_select:
2193	rt6_select(net, fn, oif, res, strict);
2194	if (res->f6i == net->ipv6.fib6_null_entry) {
2195		fn = fib6_backtrack(fn, &fl6->saddr);
2196		if (fn)
2197			goto redo_rt6_select;
2198		else if (strict & RT6_LOOKUP_F_REACHABLE) {
2199			/* also consider unreachable route */
2200			strict &= ~RT6_LOOKUP_F_REACHABLE;
2201			fn = saved_fn;
2202			goto redo_rt6_select;
2203		}
2204	}
2205
2206	trace_fib6_table_lookup(net, res, table, fl6);
2207
2208	return 0;
2209}
2210
2211struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2212			       int oif, struct flowi6 *fl6,
2213			       const struct sk_buff *skb, int flags)
2214{
2215	struct fib6_result res = {};
2216	struct rt6_info *rt = NULL;
2217	int strict = 0;
2218
2219	WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
2220		     !rcu_read_lock_held());
2221
2222	strict |= flags & RT6_LOOKUP_F_IFACE;
2223	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
2224	if (net->ipv6.devconf_all->forwarding == 0)
2225		strict |= RT6_LOOKUP_F_REACHABLE;
2226
2227	rcu_read_lock();
2228
2229	fib6_table_lookup(net, table, oif, fl6, &res, strict);
2230	if (res.f6i == net->ipv6.fib6_null_entry)
2231		goto out;
2232
2233	fib6_select_path(net, &res, fl6, oif, false, skb, strict);
2234
2235	/*Search through exception table */
2236	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
2237	if (rt) {
2238		goto out;
2239	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
2240			    !res.nh->fib_nh_gw_family)) {
2241		/* Create a RTF_CACHE clone which will not be
2242		 * owned by the fib6 tree.  It is for the special case where
2243		 * the daddr in the skb during the neighbor look-up is different
2244		 * from the fl6->daddr used to look-up route here.
2245		 */
2246		rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
2247
2248		if (rt) {
2249			/* 1 refcnt is taken during ip6_rt_cache_alloc().
2250			 * As rt6_uncached_list_add() does not consume refcnt,
2251			 * this refcnt is always returned to the caller even
2252			 * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
2253			 */
2254			rt6_uncached_list_add(rt);
2255			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2256			rcu_read_unlock();
2257
2258			return rt;
2259		}
2260	} else {
2261		/* Get a percpu copy */
2262		local_bh_disable();
2263		rt = rt6_get_pcpu_route(&res);
2264
2265		if (!rt)
2266			rt = rt6_make_pcpu_route(net, &res);
2267
2268		local_bh_enable();
2269	}
2270out:
2271	if (!rt)
2272		rt = net->ipv6.ip6_null_entry;
2273	if (!(flags & RT6_LOOKUP_F_DST_NOREF))
2274		ip6_hold_safe(net, &rt);
2275	rcu_read_unlock();
2276
2277	return rt;
2278}
2279EXPORT_SYMBOL_GPL(ip6_pol_route);
2280
2281INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net,
2282					    struct fib6_table *table,
2283					    struct flowi6 *fl6,
2284					    const struct sk_buff *skb,
2285					    int flags)
2286{
2287	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
2288}
2289
2290struct dst_entry *ip6_route_input_lookup(struct net *net,
2291					 struct net_device *dev,
2292					 struct flowi6 *fl6,
2293					 const struct sk_buff *skb,
2294					 int flags)
2295{
2296	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
2297		flags |= RT6_LOOKUP_F_IFACE;
2298
2299	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
2300}
2301EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
2302
2303static void ip6_multipath_l3_keys(const struct sk_buff *skb,
2304				  struct flow_keys *keys,
2305				  struct flow_keys *flkeys)
2306{
2307	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
2308	const struct ipv6hdr *key_iph = outer_iph;
2309	struct flow_keys *_flkeys = flkeys;
2310	const struct ipv6hdr *inner_iph;
2311	const struct icmp6hdr *icmph;
2312	struct ipv6hdr _inner_iph;
2313	struct icmp6hdr _icmph;
2314
2315	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2316		goto out;
2317
2318	icmph = skb_header_pointer(skb, skb_transport_offset(skb),
2319				   sizeof(_icmph), &_icmph);
2320	if (!icmph)
2321		goto out;
2322
2323	if (!icmpv6_is_err(icmph->icmp6_type))
2324		goto out;
2325
2326	inner_iph = skb_header_pointer(skb,
2327				       skb_transport_offset(skb) + sizeof(*icmph),
2328				       sizeof(_inner_iph), &_inner_iph);
2329	if (!inner_iph)
2330		goto out;
2331
2332	key_iph = inner_iph;
2333	_flkeys = NULL;
2334out:
2335	if (_flkeys) {
2336		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2337		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2338		keys->tags.flow_label = _flkeys->tags.flow_label;
2339		keys->basic.ip_proto = _flkeys->basic.ip_proto;
2340	} else {
2341		keys->addrs.v6addrs.src = key_iph->saddr;
2342		keys->addrs.v6addrs.dst = key_iph->daddr;
2343		keys->tags.flow_label = ip6_flowlabel(key_iph);
2344		keys->basic.ip_proto = key_iph->nexthdr;
2345	}
2346}
2347
2348/* if skb is set it will be used and fl6 can be NULL */
2349u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2350		       const struct sk_buff *skb, struct flow_keys *flkeys)
2351{
2352	struct flow_keys hash_keys;
2353	u32 mhash;
2354
2355	switch (ip6_multipath_hash_policy(net)) {
2356	case 0:
2357		memset(&hash_keys, 0, sizeof(hash_keys));
2358		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2359		if (skb) {
2360			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2361		} else {
2362			hash_keys.addrs.v6addrs.src = fl6->saddr;
2363			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2364			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2365			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2366		}
2367		break;
2368	case 1:
2369		if (skb) {
2370			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2371			struct flow_keys keys;
2372
2373			/* short-circuit if we already have L4 hash present */
2374			if (skb->l4_hash)
2375				return skb_get_hash_raw(skb) >> 1;
2376
2377			memset(&hash_keys, 0, sizeof(hash_keys));
2378
2379                        if (!flkeys) {
2380				skb_flow_dissect_flow_keys(skb, &keys, flag);
2381				flkeys = &keys;
2382			}
2383			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2384			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2385			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2386			hash_keys.ports.src = flkeys->ports.src;
2387			hash_keys.ports.dst = flkeys->ports.dst;
2388			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2389		} else {
2390			memset(&hash_keys, 0, sizeof(hash_keys));
2391			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2392			hash_keys.addrs.v6addrs.src = fl6->saddr;
2393			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2394			hash_keys.ports.src = fl6->fl6_sport;
2395			hash_keys.ports.dst = fl6->fl6_dport;
2396			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2397		}
2398		break;
2399	case 2:
2400		memset(&hash_keys, 0, sizeof(hash_keys));
2401		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2402		if (skb) {
2403			struct flow_keys keys;
2404
2405			if (!flkeys) {
2406				skb_flow_dissect_flow_keys(skb, &keys, 0);
2407				flkeys = &keys;
2408			}
2409
2410			/* Inner can be v4 or v6 */
2411			if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2412				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2413				hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
2414				hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
2415			} else if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2416				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2417				hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2418				hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2419				hash_keys.tags.flow_label = flkeys->tags.flow_label;
2420				hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2421			} else {
2422				/* Same as case 0 */
2423				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2424				ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2425			}
2426		} else {
2427			/* Same as case 0 */
2428			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2429			hash_keys.addrs.v6addrs.src = fl6->saddr;
2430			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2431			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2432			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2433		}
2434		break;
2435	}
2436	mhash = flow_hash_from_keys(&hash_keys);
2437
2438	return mhash >> 1;
2439}
2440
2441/* Called with rcu held */
2442void ip6_route_input(struct sk_buff *skb)
2443{
2444	const struct ipv6hdr *iph = ipv6_hdr(skb);
2445	struct net *net = dev_net(skb->dev);
2446	int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
2447	struct ip_tunnel_info *tun_info;
2448	struct flowi6 fl6 = {
2449		.flowi6_iif = skb->dev->ifindex,
2450		.daddr = iph->daddr,
2451		.saddr = iph->saddr,
2452		.flowlabel = ip6_flowinfo(iph),
2453		.flowi6_mark = skb->mark,
2454		.flowi6_proto = iph->nexthdr,
2455	};
2456	struct flow_keys *flkeys = NULL, _flkeys;
2457
2458	tun_info = skb_tunnel_info(skb);
2459	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2460		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2461
2462	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2463		flkeys = &_flkeys;
2464
2465	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2466		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2467	skb_dst_drop(skb);
2468	skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
2469						      &fl6, skb, flags));
2470}
2471
2472INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
2473					     struct fib6_table *table,
2474					     struct flowi6 *fl6,
2475					     const struct sk_buff *skb,
2476					     int flags)
2477{
2478	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2479}
2480
2481struct dst_entry *ip6_route_output_flags_noref(struct net *net,
2482					       const struct sock *sk,
2483					       struct flowi6 *fl6, int flags)
2484{
2485	bool any_src;
2486
2487	if (ipv6_addr_type(&fl6->daddr) &
2488	    (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2489		struct dst_entry *dst;
2490
2491		/* This function does not take refcnt on the dst */
2492		dst = l3mdev_link_scope_lookup(net, fl6);
2493		if (dst)
2494			return dst;
2495	}
2496
2497	fl6->flowi6_iif = LOOPBACK_IFINDEX;
2498
2499	flags |= RT6_LOOKUP_F_DST_NOREF;
2500	any_src = ipv6_addr_any(&fl6->saddr);
2501	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2502	    (fl6->flowi6_oif && any_src))
2503		flags |= RT6_LOOKUP_F_IFACE;
2504
2505	if (!any_src)
2506		flags |= RT6_LOOKUP_F_HAS_SADDR;
2507	else if (sk)
2508		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2509
2510	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2511}
2512EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
2513
2514struct dst_entry *ip6_route_output_flags(struct net *net,
2515					 const struct sock *sk,
2516					 struct flowi6 *fl6,
2517					 int flags)
2518{
2519        struct dst_entry *dst;
2520        struct rt6_info *rt6;
2521
2522        rcu_read_lock();
2523        dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
2524        rt6 = (struct rt6_info *)dst;
2525        /* For dst cached in uncached_list, refcnt is already taken. */
2526        if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
2527                dst = &net->ipv6.ip6_null_entry->dst;
2528                dst_hold(dst);
2529        }
2530        rcu_read_unlock();
2531
2532        return dst;
2533}
2534EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2535
2536struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2537{
2538	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2539	struct net_device *loopback_dev = net->loopback_dev;
2540	struct dst_entry *new = NULL;
2541
2542	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2543		       DST_OBSOLETE_DEAD, 0);
2544	if (rt) {
2545		rt6_info_init(rt);
2546		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2547
2548		new = &rt->dst;
 
2549		new->__use = 1;
2550		new->input = dst_discard;
2551		new->output = dst_discard_out;
2552
2553		dst_copy_metrics(new, &ort->dst);
2554
2555		rt->rt6i_idev = in6_dev_get(loopback_dev);
2556		rt->rt6i_gateway = ort->rt6i_gateway;
2557		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
 
 
 
 
 
 
 
2558
2559		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2560#ifdef CONFIG_IPV6_SUBTREES
2561		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2562#endif
 
 
2563	}
2564
2565	dst_release(dst_orig);
2566	return new ? new : ERR_PTR(-ENOMEM);
2567}
2568
2569/*
2570 *	Destination cache support functions
2571 */
2572
2573static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2574{
2575	u32 rt_cookie = 0;
2576
2577	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2578		return false;
2579
2580	if (fib6_check_expired(f6i))
2581		return false;
2582
2583	return true;
2584}
2585
2586static struct dst_entry *rt6_check(struct rt6_info *rt,
2587				   struct fib6_info *from,
2588				   u32 cookie)
2589{
2590	u32 rt_cookie = 0;
2591
2592	if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
2593	    rt_cookie != cookie)
2594		return NULL;
2595
2596	if (rt6_check_expired(rt))
2597		return NULL;
2598
2599	return &rt->dst;
2600}
2601
2602static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2603					    struct fib6_info *from,
2604					    u32 cookie)
2605{
2606	if (!__rt6_check_expired(rt) &&
2607	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2608	    fib6_check(from, cookie))
2609		return &rt->dst;
2610	else
2611		return NULL;
2612}
2613
2614static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2615{
2616	struct dst_entry *dst_ret;
2617	struct fib6_info *from;
2618	struct rt6_info *rt;
2619
2620	rt = container_of(dst, struct rt6_info, dst);
2621
2622	if (rt->sernum)
2623		return rt6_is_valid(rt) ? dst : NULL;
2624
2625	rcu_read_lock();
2626
2627	/* All IPV6 dsts are created with ->obsolete set to the value
2628	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2629	 * into this function always.
2630	 */
2631
2632	from = rcu_dereference(rt->from);
2633
2634	if (from && (rt->rt6i_flags & RTF_PCPU ||
2635	    unlikely(!list_empty(&rt->rt6i_uncached))))
2636		dst_ret = rt6_dst_from_check(rt, from, cookie);
2637	else
2638		dst_ret = rt6_check(rt, from, cookie);
2639
2640	rcu_read_unlock();
2641
2642	return dst_ret;
2643}
2644
2645static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2646{
2647	struct rt6_info *rt = (struct rt6_info *) dst;
2648
2649	if (rt) {
2650		if (rt->rt6i_flags & RTF_CACHE) {
2651			rcu_read_lock();
2652			if (rt6_check_expired(rt)) {
2653				rt6_remove_exception_rt(rt);
2654				dst = NULL;
2655			}
2656			rcu_read_unlock();
2657		} else {
2658			dst_release(dst);
2659			dst = NULL;
2660		}
2661	}
2662	return dst;
2663}
2664
2665static void ip6_link_failure(struct sk_buff *skb)
2666{
2667	struct rt6_info *rt;
2668
2669	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2670
2671	rt = (struct rt6_info *) skb_dst(skb);
2672	if (rt) {
2673		rcu_read_lock();
2674		if (rt->rt6i_flags & RTF_CACHE) {
2675			rt6_remove_exception_rt(rt);
2676		} else {
2677			struct fib6_info *from;
2678			struct fib6_node *fn;
2679
2680			from = rcu_dereference(rt->from);
2681			if (from) {
2682				fn = rcu_dereference(from->fib6_node);
2683				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2684					fn->fn_sernum = -1;
2685			}
2686		}
2687		rcu_read_unlock();
2688	}
2689}
2690
2691static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2692{
2693	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2694		struct fib6_info *from;
2695
2696		rcu_read_lock();
2697		from = rcu_dereference(rt0->from);
2698		if (from)
2699			rt0->dst.expires = from->expires;
2700		rcu_read_unlock();
2701	}
2702
2703	dst_set_expires(&rt0->dst, timeout);
2704	rt0->rt6i_flags |= RTF_EXPIRES;
2705}
2706
2707static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2708{
2709	struct net *net = dev_net(rt->dst.dev);
2710
2711	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2712	rt->rt6i_flags |= RTF_MODIFIED;
2713	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2714}
2715
2716static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2717{
2718	return !(rt->rt6i_flags & RTF_CACHE) &&
2719		(rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
2720}
2721
2722static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2723				 const struct ipv6hdr *iph, u32 mtu,
2724				 bool confirm_neigh)
2725{
2726	const struct in6_addr *daddr, *saddr;
2727	struct rt6_info *rt6 = (struct rt6_info *)dst;
2728
2729	/* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
2730	 * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
2731	 * [see also comment in rt6_mtu_change_route()]
2732	 */
2733
2734	if (iph) {
2735		daddr = &iph->daddr;
2736		saddr = &iph->saddr;
2737	} else if (sk) {
2738		daddr = &sk->sk_v6_daddr;
2739		saddr = &inet6_sk(sk)->saddr;
2740	} else {
2741		daddr = NULL;
2742		saddr = NULL;
2743	}
2744
2745	if (confirm_neigh)
2746		dst_confirm_neigh(dst, daddr);
2747
2748	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2749	if (mtu >= dst_mtu(dst))
2750		return;
2751
2752	if (!rt6_cache_allowed_for_pmtu(rt6)) {
2753		rt6_do_update_pmtu(rt6, mtu);
2754		/* update rt6_ex->stamp for cache */
2755		if (rt6->rt6i_flags & RTF_CACHE)
2756			rt6_update_exception_stamp_rt(rt6);
2757	} else if (daddr) {
2758		struct fib6_result res = {};
2759		struct rt6_info *nrt6;
2760
2761		rcu_read_lock();
2762		res.f6i = rcu_dereference(rt6->from);
2763		if (!res.f6i)
2764			goto out_unlock;
2765
2766		res.fib6_flags = res.f6i->fib6_flags;
2767		res.fib6_type = res.f6i->fib6_type;
2768
2769		if (res.f6i->nh) {
2770			struct fib6_nh_match_arg arg = {
2771				.dev = dst->dev,
2772				.gw = &rt6->rt6i_gateway,
2773			};
2774
2775			nexthop_for_each_fib6_nh(res.f6i->nh,
2776						 fib6_nh_find_match, &arg);
2777
2778			/* fib6_info uses a nexthop that does not have fib6_nh
2779			 * using the dst->dev + gw. Should be impossible.
2780			 */
2781			if (!arg.match)
2782				goto out_unlock;
2783
2784			res.nh = arg.match;
2785		} else {
2786			res.nh = res.f6i->fib6_nh;
2787		}
2788
2789		nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
2790		if (nrt6) {
2791			rt6_do_update_pmtu(nrt6, mtu);
2792			if (rt6_insert_exception(nrt6, &res))
2793				dst_release_immediate(&nrt6->dst);
2794		}
2795out_unlock:
2796		rcu_read_unlock();
2797	}
2798}
2799
2800static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2801			       struct sk_buff *skb, u32 mtu,
2802			       bool confirm_neigh)
2803{
2804	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
2805			     confirm_neigh);
2806}
2807
2808void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2809		     int oif, u32 mark, kuid_t uid)
2810{
2811	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2812	struct dst_entry *dst;
2813	struct flowi6 fl6 = {
2814		.flowi6_oif = oif,
2815		.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2816		.daddr = iph->daddr,
2817		.saddr = iph->saddr,
2818		.flowlabel = ip6_flowinfo(iph),
2819		.flowi6_uid = uid,
2820	};
2821
2822	dst = ip6_route_output(net, NULL, &fl6);
2823	if (!dst->error)
2824		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
2825	dst_release(dst);
2826}
2827EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2828
2829void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2830{
2831	int oif = sk->sk_bound_dev_if;
2832	struct dst_entry *dst;
2833
2834	if (!oif && skb->dev)
2835		oif = l3mdev_master_ifindex(skb->dev);
2836
2837	ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
2838
2839	dst = __sk_dst_get(sk);
2840	if (!dst || !dst->obsolete ||
2841	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2842		return;
2843
2844	bh_lock_sock(sk);
2845	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2846		ip6_datagram_dst_update(sk, false);
2847	bh_unlock_sock(sk);
2848}
2849EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2850
2851void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2852			   const struct flowi6 *fl6)
2853{
2854#ifdef CONFIG_IPV6_SUBTREES
2855	struct ipv6_pinfo *np = inet6_sk(sk);
2856#endif
2857
2858	ip6_dst_store(sk, dst,
2859		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2860		      &sk->sk_v6_daddr : NULL,
2861#ifdef CONFIG_IPV6_SUBTREES
2862		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2863		      &np->saddr :
2864#endif
2865		      NULL);
2866}
2867
2868static bool ip6_redirect_nh_match(const struct fib6_result *res,
2869				  struct flowi6 *fl6,
2870				  const struct in6_addr *gw,
2871				  struct rt6_info **ret)
2872{
2873	const struct fib6_nh *nh = res->nh;
2874
2875	if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
2876	    fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
2877		return false;
2878
2879	/* rt_cache's gateway might be different from its 'parent'
2880	 * in the case of an ip redirect.
2881	 * So we keep searching in the exception table if the gateway
2882	 * is different.
2883	 */
2884	if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
2885		struct rt6_info *rt_cache;
2886
2887		rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
2888		if (rt_cache &&
2889		    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
2890			*ret = rt_cache;
2891			return true;
2892		}
2893		return false;
2894	}
2895	return true;
2896}
2897
2898struct fib6_nh_rd_arg {
2899	struct fib6_result	*res;
2900	struct flowi6		*fl6;
2901	const struct in6_addr	*gw;
2902	struct rt6_info		**ret;
2903};
2904
2905static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
2906{
2907	struct fib6_nh_rd_arg *arg = _arg;
2908
2909	arg->res->nh = nh;
2910	return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
2911}
2912
2913/* Handle redirects */
2914struct ip6rd_flowi {
2915	struct flowi6 fl6;
2916	struct in6_addr gateway;
2917};
2918
2919INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
2920					     struct fib6_table *table,
2921					     struct flowi6 *fl6,
2922					     const struct sk_buff *skb,
2923					     int flags)
2924{
2925	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2926	struct rt6_info *ret = NULL;
2927	struct fib6_result res = {};
2928	struct fib6_nh_rd_arg arg = {
2929		.res = &res,
2930		.fl6 = fl6,
2931		.gw  = &rdfl->gateway,
2932		.ret = &ret
2933	};
2934	struct fib6_info *rt;
2935	struct fib6_node *fn;
2936
2937	/* l3mdev_update_flow overrides oif if the device is enslaved; in
2938	 * this case we must match on the real ingress device, so reset it
2939	 */
2940	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
2941		fl6->flowi6_oif = skb->dev->ifindex;
2942
2943	/* Get the "current" route for this destination and
2944	 * check if the redirect has come from appropriate router.
2945	 *
2946	 * RFC 4861 specifies that redirects should only be
2947	 * accepted if they come from the nexthop to the target.
2948	 * Due to the way the routes are chosen, this notion
2949	 * is a bit fuzzy and one might need to check all possible
2950	 * routes.
2951	 */
2952
2953	rcu_read_lock();
2954	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2955restart:
2956	for_each_fib6_node_rt_rcu(fn) {
2957		res.f6i = rt;
2958		if (fib6_check_expired(rt))
2959			continue;
2960		if (rt->fib6_flags & RTF_REJECT)
2961			break;
2962		if (unlikely(rt->nh)) {
2963			if (nexthop_is_blackhole(rt->nh))
2964				continue;
2965			/* on match, res->nh is filled in and potentially ret */
2966			if (nexthop_for_each_fib6_nh(rt->nh,
2967						     fib6_nh_redirect_match,
2968						     &arg))
2969				goto out;
2970		} else {
2971			res.nh = rt->fib6_nh;
2972			if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
2973						  &ret))
2974				goto out;
2975		}
 
2976	}
2977
2978	if (!rt)
2979		rt = net->ipv6.fib6_null_entry;
2980	else if (rt->fib6_flags & RTF_REJECT) {
2981		ret = net->ipv6.ip6_null_entry;
2982		goto out;
2983	}
2984
2985	if (rt == net->ipv6.fib6_null_entry) {
2986		fn = fib6_backtrack(fn, &fl6->saddr);
2987		if (fn)
2988			goto restart;
2989	}
2990
2991	res.f6i = rt;
2992	res.nh = rt->fib6_nh;
2993out:
2994	if (ret) {
2995		ip6_hold_safe(net, &ret);
2996	} else {
2997		res.fib6_flags = res.f6i->fib6_flags;
2998		res.fib6_type = res.f6i->fib6_type;
2999		ret = ip6_create_rt_rcu(&res);
3000	}
3001
3002	rcu_read_unlock();
3003
3004	trace_fib6_table_lookup(net, &res, table, fl6);
3005	return ret;
3006};
3007
3008static struct dst_entry *ip6_route_redirect(struct net *net,
3009					    const struct flowi6 *fl6,
3010					    const struct sk_buff *skb,
3011					    const struct in6_addr *gateway)
3012{
3013	int flags = RT6_LOOKUP_F_HAS_SADDR;
3014	struct ip6rd_flowi rdfl;
3015
3016	rdfl.fl6 = *fl6;
3017	rdfl.gateway = *gateway;
3018
3019	return fib6_rule_lookup(net, &rdfl.fl6, skb,
3020				flags, __ip6_route_redirect);
3021}
3022
3023void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
3024		  kuid_t uid)
3025{
3026	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
3027	struct dst_entry *dst;
3028	struct flowi6 fl6 = {
3029		.flowi6_iif = LOOPBACK_IFINDEX,
3030		.flowi6_oif = oif,
3031		.flowi6_mark = mark,
3032		.daddr = iph->daddr,
3033		.saddr = iph->saddr,
3034		.flowlabel = ip6_flowinfo(iph),
3035		.flowi6_uid = uid,
3036	};
3037
3038	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
3039	rt6_do_redirect(dst, NULL, skb);
3040	dst_release(dst);
3041}
3042EXPORT_SYMBOL_GPL(ip6_redirect);
3043
3044void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
3045{
3046	const struct ipv6hdr *iph = ipv6_hdr(skb);
3047	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
3048	struct dst_entry *dst;
3049	struct flowi6 fl6 = {
3050		.flowi6_iif = LOOPBACK_IFINDEX,
3051		.flowi6_oif = oif,
3052		.daddr = msg->dest,
3053		.saddr = iph->daddr,
3054		.flowi6_uid = sock_net_uid(net, NULL),
3055	};
3056
3057	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
3058	rt6_do_redirect(dst, NULL, skb);
3059	dst_release(dst);
3060}
3061
3062void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
3063{
3064	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
3065		     sk->sk_uid);
3066}
3067EXPORT_SYMBOL_GPL(ip6_sk_redirect);
3068
3069static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3070{
3071	struct net_device *dev = dst->dev;
3072	unsigned int mtu = dst_mtu(dst);
3073	struct net *net = dev_net(dev);
3074
3075	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
3076
3077	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
3078		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
3079
3080	/*
3081	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
3082	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
3083	 * IPV6_MAXPLEN is also valid and means: "any MSS,
3084	 * rely only on pmtu discovery"
3085	 */
3086	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
3087		mtu = IPV6_MAXPLEN;
3088	return mtu;
3089}
3090
3091static unsigned int ip6_mtu(const struct dst_entry *dst)
3092{
 
3093	struct inet6_dev *idev;
3094	unsigned int mtu;
3095
3096	mtu = dst_metric_raw(dst, RTAX_MTU);
3097	if (mtu)
3098		goto out;
3099
3100	mtu = IPV6_MIN_MTU;
3101
3102	rcu_read_lock();
3103	idev = __in6_dev_get(dst->dev);
3104	if (idev)
3105		mtu = idev->cnf.mtu6;
3106	rcu_read_unlock();
3107
3108out:
3109	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3110
3111	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
3112}
3113
3114/* MTU selection:
3115 * 1. mtu on route is locked - use it
3116 * 2. mtu from nexthop exception
3117 * 3. mtu from egress device
3118 *
3119 * based on ip6_dst_mtu_forward and exception logic of
3120 * rt6_find_cached_rt; called with rcu_read_lock
3121 */
3122u32 ip6_mtu_from_fib6(const struct fib6_result *res,
3123		      const struct in6_addr *daddr,
3124		      const struct in6_addr *saddr)
3125{
3126	const struct fib6_nh *nh = res->nh;
3127	struct fib6_info *f6i = res->f6i;
3128	struct inet6_dev *idev;
3129	struct rt6_info *rt;
3130	u32 mtu = 0;
3131
3132	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
3133		mtu = f6i->fib6_pmtu;
3134		if (mtu)
3135			goto out;
3136	}
3137
3138	rt = rt6_find_cached_rt(res, daddr, saddr);
3139	if (unlikely(rt)) {
3140		mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
3141	} else {
3142		struct net_device *dev = nh->fib_nh_dev;
3143
3144		mtu = IPV6_MIN_MTU;
3145		idev = __in6_dev_get(dev);
3146		if (idev && idev->cnf.mtu6 > mtu)
3147			mtu = idev->cnf.mtu6;
3148	}
3149
3150	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3151out:
3152	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
3153}
3154
3155struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
3156				  struct flowi6 *fl6)
 
3157{
3158	struct dst_entry *dst;
3159	struct rt6_info *rt;
3160	struct inet6_dev *idev = in6_dev_get(dev);
3161	struct net *net = dev_net(dev);
3162
3163	if (unlikely(!idev))
3164		return ERR_PTR(-ENODEV);
3165
3166	rt = ip6_dst_alloc(net, dev, 0);
3167	if (unlikely(!rt)) {
3168		in6_dev_put(idev);
3169		dst = ERR_PTR(-ENOMEM);
3170		goto out;
3171	}
3172
3173	rt->dst.input = ip6_input;
 
 
 
 
 
 
 
 
3174	rt->dst.output  = ip6_output;
3175	rt->rt6i_gateway  = fl6->daddr;
3176	rt->rt6i_dst.addr = fl6->daddr;
 
 
 
3177	rt->rt6i_dst.plen = 128;
3178	rt->rt6i_idev     = idev;
3179	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
3180
3181	/* Add this dst into uncached_list so that rt6_disable_ip() can
3182	 * do proper release of the net_device
3183	 */
3184	rt6_uncached_list_add(rt);
3185	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
3186
3187	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
3188
3189out:
3190	return dst;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3191}
3192
3193static int ip6_dst_gc(struct dst_ops *ops)
3194{
 
3195	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
3196	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
3197	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
3198	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
3199	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
3200	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
3201	int entries;
3202
3203	entries = dst_entries_get_fast(ops);
3204	if (entries > rt_max_size)
3205		entries = dst_entries_get_slow(ops);
3206
3207	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
3208	    entries <= rt_max_size)
3209		goto out;
3210
3211	net->ipv6.ip6_rt_gc_expire++;
3212	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
 
3213	entries = dst_entries_get_slow(ops);
3214	if (entries < ops->gc_thresh)
3215		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
3216out:
3217	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
3218	return entries > rt_max_size;
3219}
3220
3221static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
3222			       const struct in6_addr *gw_addr, u32 tbid,
3223			       int flags, struct fib6_result *res)
3224{
3225	struct flowi6 fl6 = {
3226		.flowi6_oif = cfg->fc_ifindex,
3227		.daddr = *gw_addr,
3228		.saddr = cfg->fc_prefsrc,
3229	};
3230	struct fib6_table *table;
3231	int err;
3232
3233	table = fib6_get_table(net, tbid);
3234	if (!table)
3235		return -EINVAL;
3236
3237	if (!ipv6_addr_any(&cfg->fc_prefsrc))
3238		flags |= RT6_LOOKUP_F_HAS_SADDR;
3239
3240	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
3241
3242	err = fib6_table_lookup(net, table, cfg->fc_ifindex, &fl6, res, flags);
3243	if (!err && res->f6i != net->ipv6.fib6_null_entry)
3244		fib6_select_path(net, res, &fl6, cfg->fc_ifindex,
3245				 cfg->fc_ifindex != 0, NULL, flags);
3246
3247	return err;
3248}
3249
3250static int ip6_route_check_nh_onlink(struct net *net,
3251				     struct fib6_config *cfg,
3252				     const struct net_device *dev,
3253				     struct netlink_ext_ack *extack)
3254{
3255	u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
3256	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3257	struct fib6_result res = {};
3258	int err;
3259
3260	err = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0, &res);
3261	if (!err && !(res.fib6_flags & RTF_REJECT) &&
3262	    /* ignore match if it is the default route */
3263	    !ipv6_addr_any(&res.f6i->fib6_dst.addr) &&
3264	    (res.fib6_type != RTN_UNICAST || dev != res.nh->fib_nh_dev)) {
3265		NL_SET_ERR_MSG(extack,
3266			       "Nexthop has invalid gateway or device mismatch");
3267		err = -EINVAL;
3268	}
3269
3270	return err;
3271}
3272
3273static int ip6_route_check_nh(struct net *net,
3274			      struct fib6_config *cfg,
3275			      struct net_device **_dev,
3276			      struct inet6_dev **idev)
3277{
3278	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3279	struct net_device *dev = _dev ? *_dev : NULL;
3280	int flags = RT6_LOOKUP_F_IFACE;
3281	struct fib6_result res = {};
3282	int err = -EHOSTUNREACH;
3283
3284	if (cfg->fc_table) {
3285		err = ip6_nh_lookup_table(net, cfg, gw_addr,
3286					  cfg->fc_table, flags, &res);
3287		/* gw_addr can not require a gateway or resolve to a reject
3288		 * route. If a device is given, it must match the result.
3289		 */
3290		if (err || res.fib6_flags & RTF_REJECT ||
3291		    res.nh->fib_nh_gw_family ||
3292		    (dev && dev != res.nh->fib_nh_dev))
3293			err = -EHOSTUNREACH;
3294	}
3295
3296	if (err < 0) {
3297		struct flowi6 fl6 = {
3298			.flowi6_oif = cfg->fc_ifindex,
3299			.daddr = *gw_addr,
3300		};
3301
3302		err = fib6_lookup(net, cfg->fc_ifindex, &fl6, &res, flags);
3303		if (err || res.fib6_flags & RTF_REJECT ||
3304		    res.nh->fib_nh_gw_family)
3305			err = -EHOSTUNREACH;
3306
3307		if (err)
3308			return err;
3309
3310		fib6_select_path(net, &res, &fl6, cfg->fc_ifindex,
3311				 cfg->fc_ifindex != 0, NULL, flags);
3312	}
3313
3314	err = 0;
3315	if (dev) {
3316		if (dev != res.nh->fib_nh_dev)
3317			err = -EHOSTUNREACH;
3318	} else {
3319		*_dev = dev = res.nh->fib_nh_dev;
3320		dev_hold(dev);
3321		*idev = in6_dev_get(dev);
3322	}
3323
3324	return err;
3325}
3326
3327static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
3328			   struct net_device **_dev, struct inet6_dev **idev,
3329			   struct netlink_ext_ack *extack)
3330{
3331	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3332	int gwa_type = ipv6_addr_type(gw_addr);
3333	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
3334	const struct net_device *dev = *_dev;
3335	bool need_addr_check = !dev;
3336	int err = -EINVAL;
3337
3338	/* if gw_addr is local we will fail to detect this in case
3339	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
3340	 * will return already-added prefix route via interface that
3341	 * prefix route was assigned to, which might be non-loopback.
3342	 */
3343	if (dev &&
3344	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3345		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3346		goto out;
3347	}
3348
3349	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
3350		/* IPv6 strictly inhibits using not link-local
3351		 * addresses as nexthop address.
3352		 * Otherwise, router will not able to send redirects.
3353		 * It is very good, but in some (rare!) circumstances
3354		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
3355		 * some exceptions. --ANK
3356		 * We allow IPv4-mapped nexthops to support RFC4798-type
3357		 * addressing
3358		 */
3359		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
3360			NL_SET_ERR_MSG(extack, "Invalid gateway address");
3361			goto out;
3362		}
3363
3364		rcu_read_lock();
3365
3366		if (cfg->fc_flags & RTNH_F_ONLINK)
3367			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
3368		else
3369			err = ip6_route_check_nh(net, cfg, _dev, idev);
3370
3371		rcu_read_unlock();
3372
3373		if (err)
3374			goto out;
3375	}
3376
3377	/* reload in case device was changed */
3378	dev = *_dev;
3379
3380	err = -EINVAL;
3381	if (!dev) {
3382		NL_SET_ERR_MSG(extack, "Egress device not specified");
3383		goto out;
3384	} else if (dev->flags & IFF_LOOPBACK) {
3385		NL_SET_ERR_MSG(extack,
3386			       "Egress device can not be loopback device for this route");
3387		goto out;
3388	}
3389
3390	/* if we did not check gw_addr above, do so now that the
3391	 * egress device has been resolved.
3392	 */
3393	if (need_addr_check &&
3394	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3395		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3396		goto out;
3397	}
3398
3399	err = 0;
3400out:
3401	return err;
3402}
 
3403
3404static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
3405{
3406	if ((flags & RTF_REJECT) ||
3407	    (dev && (dev->flags & IFF_LOOPBACK) &&
3408	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
3409	     !(flags & (RTF_ANYCAST | RTF_LOCAL))))
3410		return true;
3411
3412	return false;
3413}
3414
3415int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
3416		 struct fib6_config *cfg, gfp_t gfp_flags,
3417		 struct netlink_ext_ack *extack)
3418{
 
 
 
3419	struct net_device *dev = NULL;
3420	struct inet6_dev *idev = NULL;
 
3421	int addr_type;
3422	int err;
3423
3424	fib6_nh->fib_nh_family = AF_INET6;
3425#ifdef CONFIG_IPV6_ROUTER_PREF
3426	fib6_nh->last_probe = jiffies;
 
 
3427#endif
3428	if (cfg->fc_is_fdb) {
3429		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3430		fib6_nh->fib_nh_gw_family = AF_INET6;
3431		return 0;
3432	}
3433
3434	err = -ENODEV;
3435	if (cfg->fc_ifindex) {
 
3436		dev = dev_get_by_index(net, cfg->fc_ifindex);
3437		if (!dev)
3438			goto out;
3439		idev = in6_dev_get(dev);
3440		if (!idev)
3441			goto out;
3442	}
3443
3444	if (cfg->fc_flags & RTNH_F_ONLINK) {
3445		if (!dev) {
3446			NL_SET_ERR_MSG(extack,
3447				       "Nexthop device required for onlink");
3448			goto out;
3449		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3450
3451		if (!(dev->flags & IFF_UP)) {
3452			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3453			err = -ENETDOWN;
 
 
 
 
 
 
3454			goto out;
3455		}
3456
3457		fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
3458	}
 
 
 
 
3459
3460	fib6_nh->fib_nh_weight = 1;
3461
3462	/* We cannot add true routes via loopback here,
3463	 * they would result in kernel looping; promote them to reject routes
3464	 */
3465	addr_type = ipv6_addr_type(&cfg->fc_dst);
3466	if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
 
3467		/* hold loopback dev/idev if we haven't done so. */
3468		if (dev != net->loopback_dev) {
3469			if (dev) {
3470				dev_put(dev);
3471				in6_dev_put(idev);
3472			}
3473			dev = net->loopback_dev;
3474			dev_hold(dev);
3475			idev = in6_dev_get(dev);
3476			if (!idev) {
3477				err = -ENODEV;
3478				goto out;
3479			}
3480		}
3481		goto pcpu_alloc;
 
 
 
 
3482	}
3483
3484	if (cfg->fc_flags & RTF_GATEWAY) {
3485		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3486		if (err)
3487			goto out;
3488
3489		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3490		fib6_nh->fib_nh_gw_family = AF_INET6;
3491	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3492
3493	err = -ENODEV;
3494	if (!dev)
3495		goto out;
3496
3497	if (idev->cnf.disable_ipv6) {
3498		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3499		err = -EACCES;
3500		goto out;
3501	}
 
 
 
 
 
 
 
 
 
 
 
 
3502
3503	if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3504		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3505		err = -ENETDOWN;
3506		goto out;
3507	}
3508
3509	if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3510	    !netif_carrier_ok(dev))
3511		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
3512
3513	err = fib_nh_common_init(net, &fib6_nh->nh_common, cfg->fc_encap,
3514				 cfg->fc_encap_type, cfg, gfp_flags, extack);
3515	if (err)
3516		goto out;
3517
3518pcpu_alloc:
3519	fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
3520	if (!fib6_nh->rt6i_pcpu) {
3521		err = -ENOMEM;
3522		goto out;
3523	}
3524
3525	fib6_nh->fib_nh_dev = dev;
3526	fib6_nh->fib_nh_oif = dev->ifindex;
3527	err = 0;
3528out:
3529	if (idev)
3530		in6_dev_put(idev);
3531
3532	if (err) {
3533		lwtstate_put(fib6_nh->fib_nh_lws);
3534		fib6_nh->fib_nh_lws = NULL;
3535		if (dev)
3536			dev_put(dev);
3537	}
3538
3539	return err;
3540}
3541
3542void fib6_nh_release(struct fib6_nh *fib6_nh)
3543{
3544	struct rt6_exception_bucket *bucket;
3545
3546	rcu_read_lock();
3547
3548	fib6_nh_flush_exceptions(fib6_nh, NULL);
3549	bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
3550	if (bucket) {
3551		rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
3552		kfree(bucket);
3553	}
3554
3555	rcu_read_unlock();
3556
3557	if (fib6_nh->rt6i_pcpu) {
3558		int cpu;
3559
3560		for_each_possible_cpu(cpu) {
3561			struct rt6_info **ppcpu_rt;
3562			struct rt6_info *pcpu_rt;
3563
3564			ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
3565			pcpu_rt = *ppcpu_rt;
3566			if (pcpu_rt) {
3567				dst_dev_put(&pcpu_rt->dst);
3568				dst_release(&pcpu_rt->dst);
3569				*ppcpu_rt = NULL;
3570			}
3571		}
3572
3573		free_percpu(fib6_nh->rt6i_pcpu);
 
3574	}
3575
3576	fib_nh_common_release(&fib6_nh->nh_common);
3577}
3578
3579static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3580					      gfp_t gfp_flags,
3581					      struct netlink_ext_ack *extack)
3582{
3583	struct net *net = cfg->fc_nlinfo.nl_net;
3584	struct fib6_info *rt = NULL;
3585	struct nexthop *nh = NULL;
3586	struct fib6_table *table;
3587	struct fib6_nh *fib6_nh;
3588	int err = -EINVAL;
3589	int addr_type;
3590
3591	/* RTF_PCPU is an internal flag; can not be set by userspace */
3592	if (cfg->fc_flags & RTF_PCPU) {
3593		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3594		goto out;
3595	}
3596
3597	/* RTF_CACHE is an internal flag; can not be set by userspace */
3598	if (cfg->fc_flags & RTF_CACHE) {
3599		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3600		goto out;
3601	}
3602
3603	if (cfg->fc_type > RTN_MAX) {
3604		NL_SET_ERR_MSG(extack, "Invalid route type");
3605		goto out;
3606	}
3607
3608	if (cfg->fc_dst_len > 128) {
3609		NL_SET_ERR_MSG(extack, "Invalid prefix length");
3610		goto out;
3611	}
3612	if (cfg->fc_src_len > 128) {
3613		NL_SET_ERR_MSG(extack, "Invalid source address length");
3614		goto out;
3615	}
3616#ifndef CONFIG_IPV6_SUBTREES
3617	if (cfg->fc_src_len) {
3618		NL_SET_ERR_MSG(extack,
3619			       "Specifying source address requires IPV6_SUBTREES to be enabled");
3620		goto out;
3621	}
3622#endif
3623	if (cfg->fc_nh_id) {
3624		nh = nexthop_find_by_id(net, cfg->fc_nh_id);
3625		if (!nh) {
3626			NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
3627			goto out;
3628		}
3629		err = fib6_check_nexthop(nh, cfg, extack);
3630		if (err)
 
 
 
 
 
 
 
3631			goto out;
3632	}
3633
3634	err = -ENOBUFS;
3635	if (cfg->fc_nlinfo.nlh &&
3636	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3637		table = fib6_get_table(net, cfg->fc_table);
3638		if (!table) {
3639			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3640			table = fib6_new_table(net, cfg->fc_table);
3641		}
3642	} else {
3643		table = fib6_new_table(net, cfg->fc_table);
3644	}
3645
3646	if (!table)
3647		goto out;
3648
3649	err = -ENOMEM;
3650	rt = fib6_info_alloc(gfp_flags, !nh);
3651	if (!rt)
3652		goto out;
3653
3654	rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3655					       extack);
3656	if (IS_ERR(rt->fib6_metrics)) {
3657		err = PTR_ERR(rt->fib6_metrics);
3658		/* Do not leave garbage there. */
3659		rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3660		goto out;
3661	}
3662
3663	if (cfg->fc_flags & RTF_ADDRCONF)
3664		rt->dst_nocount = true;
3665
3666	if (cfg->fc_flags & RTF_EXPIRES)
3667		fib6_set_expires(rt, jiffies +
3668				clock_t_to_jiffies(cfg->fc_expires));
3669	else
3670		fib6_clean_expires(rt);
3671
3672	if (cfg->fc_protocol == RTPROT_UNSPEC)
3673		cfg->fc_protocol = RTPROT_BOOT;
3674	rt->fib6_protocol = cfg->fc_protocol;
3675
3676	rt->fib6_table = table;
3677	rt->fib6_metric = cfg->fc_metric;
3678	rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
3679	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
 
3680
3681	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3682	rt->fib6_dst.plen = cfg->fc_dst_len;
3683
3684#ifdef CONFIG_IPV6_SUBTREES
3685	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3686	rt->fib6_src.plen = cfg->fc_src_len;
3687#endif
3688	if (nh) {
3689		if (rt->fib6_src.plen) {
3690			NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
3691			goto out;
3692		}
3693		if (!nexthop_get(nh)) {
3694			NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
3695			goto out;
3696		}
3697		rt->nh = nh;
3698		fib6_nh = nexthop_fib6_nh(rt->nh);
3699	} else {
3700		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
3701		if (err)
3702			goto out;
3703
3704		fib6_nh = rt->fib6_nh;
3705
3706		/* We cannot add true routes via loopback here, they would
3707		 * result in kernel looping; promote them to reject routes
3708		 */
3709		addr_type = ipv6_addr_type(&cfg->fc_dst);
3710		if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
3711				   addr_type))
3712			rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3713	}
3714
3715	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3716		struct net_device *dev = fib6_nh->fib_nh_dev;
3717
3718		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3719			NL_SET_ERR_MSG(extack, "Invalid source address");
3720			err = -EINVAL;
3721			goto out;
3722		}
3723		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3724		rt->fib6_prefsrc.plen = 128;
3725	} else
3726		rt->fib6_prefsrc.plen = 0;
3727
3728	return rt;
3729out:
3730	fib6_info_release(rt);
3731	return ERR_PTR(err);
 
 
 
 
 
3732}
3733
3734int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3735		  struct netlink_ext_ack *extack)
3736{
3737	struct fib6_info *rt;
3738	int err;
 
 
3739
3740	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3741	if (IS_ERR(rt))
3742		return PTR_ERR(rt);
3743
3744	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3745	fib6_info_release(rt);
3746
3747	return err;
3748}
3749
3750static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3751{
3752	struct net *net = info->nl_net;
3753	struct fib6_table *table;
3754	int err;
3755
3756	if (rt == net->ipv6.fib6_null_entry) {
3757		err = -ENOENT;
3758		goto out;
3759	}
3760
3761	table = rt->fib6_table;
3762	spin_lock_bh(&table->tb6_lock);
3763	err = fib6_del(rt, info);
3764	spin_unlock_bh(&table->tb6_lock);
3765
3766out:
3767	fib6_info_release(rt);
3768	return err;
3769}
3770
3771int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify)
3772{
3773	struct nl_info info = {
3774		.nl_net = net,
3775		.skip_notify = skip_notify
3776	};
3777
3778	return __ip6_del_rt(rt, &info);
3779}
3780
3781static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3782{
3783	struct nl_info *info = &cfg->fc_nlinfo;
3784	struct net *net = info->nl_net;
3785	struct sk_buff *skb = NULL;
3786	struct fib6_table *table;
3787	int err = -ENOENT;
 
 
 
 
 
 
 
 
 
 
 
 
3788
3789	if (rt == net->ipv6.fib6_null_entry)
3790		goto out_put;
3791	table = rt->fib6_table;
3792	spin_lock_bh(&table->tb6_lock);
3793
3794	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3795		struct fib6_info *sibling, *next_sibling;
3796		struct fib6_node *fn;
3797
3798		/* prefer to send a single notification with all hops */
3799		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3800		if (skb) {
3801			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3802
3803			if (rt6_fill_node(net, skb, rt, NULL,
3804					  NULL, NULL, 0, RTM_DELROUTE,
3805					  info->portid, seq, 0) < 0) {
3806				kfree_skb(skb);
3807				skb = NULL;
3808			} else
3809				info->skip_notify = 1;
3810		}
3811
3812		/* 'rt' points to the first sibling route. If it is not the
3813		 * leaf, then we do not need to send a notification. Otherwise,
3814		 * we need to check if the last sibling has a next route or not
3815		 * and emit a replace or delete notification, respectively.
3816		 */
3817		info->skip_notify_kernel = 1;
3818		fn = rcu_dereference_protected(rt->fib6_node,
3819					    lockdep_is_held(&table->tb6_lock));
3820		if (rcu_access_pointer(fn->leaf) == rt) {
3821			struct fib6_info *last_sibling, *replace_rt;
3822
3823			last_sibling = list_last_entry(&rt->fib6_siblings,
3824						       struct fib6_info,
3825						       fib6_siblings);
3826			replace_rt = rcu_dereference_protected(
3827					    last_sibling->fib6_next,
3828					    lockdep_is_held(&table->tb6_lock));
3829			if (replace_rt)
3830				call_fib6_entry_notifiers_replace(net,
3831								  replace_rt);
3832			else
3833				call_fib6_multipath_entry_notifiers(net,
3834						       FIB_EVENT_ENTRY_DEL,
3835						       rt, rt->fib6_nsiblings,
3836						       NULL);
3837		}
3838		list_for_each_entry_safe(sibling, next_sibling,
3839					 &rt->fib6_siblings,
3840					 fib6_siblings) {
3841			err = fib6_del(sibling, info);
3842			if (err)
3843				goto out_unlock;
3844		}
3845	}
 
3846
3847	err = fib6_del(rt, info);
3848out_unlock:
3849	spin_unlock_bh(&table->tb6_lock);
3850out_put:
3851	fib6_info_release(rt);
3852
3853	if (skb) {
3854		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3855			    info->nlh, gfp_any());
3856	}
3857	return err;
3858}
3859
3860static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
 
 
 
 
 
 
 
 
 
 
 
3861{
3862	int rc = -ESRCH;
 
 
3863
3864	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3865		goto out;
 
 
 
 
 
 
 
 
3866
3867	if (cfg->fc_flags & RTF_GATEWAY &&
3868	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3869		goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3870
3871	rc = rt6_remove_exception_rt(rt);
 
 
3872out:
3873	return rc;
3874}
3875
3876static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
3877			     struct fib6_nh *nh)
3878{
3879	struct fib6_result res = {
3880		.f6i = rt,
3881		.nh = nh,
3882	};
3883	struct rt6_info *rt_cache;
3884
3885	rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
3886	if (rt_cache)
3887		return __ip6_del_cached_rt(rt_cache, cfg);
3888
3889	return 0;
3890}
3891
3892struct fib6_nh_del_cached_rt_arg {
3893	struct fib6_config *cfg;
3894	struct fib6_info *f6i;
3895};
3896
3897static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
 
 
 
3898{
3899	struct fib6_nh_del_cached_rt_arg *arg = _arg;
3900	int rc;
 
 
 
 
 
 
 
3901
3902	rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
3903	return rc != -ESRCH ? rc : 0;
3904}
3905
3906static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
3907{
3908	struct fib6_nh_del_cached_rt_arg arg = {
3909		.cfg = cfg,
3910		.f6i = f6i
3911	};
3912
3913	return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
 
3914}
3915
3916static int ip6_route_del(struct fib6_config *cfg,
3917			 struct netlink_ext_ack *extack)
 
3918{
3919	struct fib6_table *table;
3920	struct fib6_info *rt;
3921	struct fib6_node *fn;
3922	int err = -ESRCH;
 
3923
3924	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3925	if (!table) {
3926		NL_SET_ERR_MSG(extack, "FIB table does not exist");
3927		return err;
 
3928	}
3929
3930	rcu_read_lock();
 
 
3931
3932	fn = fib6_locate(&table->tb6_root,
3933			 &cfg->fc_dst, cfg->fc_dst_len,
3934			 &cfg->fc_src, cfg->fc_src_len,
3935			 !(cfg->fc_flags & RTF_CACHE));
 
 
3936
3937	if (fn) {
3938		for_each_fib6_node_rt_rcu(fn) {
3939			struct fib6_nh *nh;
 
 
 
3940
3941			if (rt->nh && cfg->fc_nh_id &&
3942			    rt->nh->id != cfg->fc_nh_id)
3943				continue;
3944
3945			if (cfg->fc_flags & RTF_CACHE) {
3946				int rc = 0;
 
3947
3948				if (rt->nh) {
3949					rc = ip6_del_cached_rt_nh(cfg, rt);
3950				} else if (cfg->fc_nh_id) {
3951					continue;
3952				} else {
3953					nh = rt->fib6_nh;
3954					rc = ip6_del_cached_rt(cfg, rt, nh);
3955				}
3956				if (rc != -ESRCH) {
3957					rcu_read_unlock();
3958					return rc;
3959				}
3960				continue;
3961			}
3962
3963			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3964				continue;
3965			if (cfg->fc_protocol &&
3966			    cfg->fc_protocol != rt->fib6_protocol)
3967				continue;
3968
3969			if (rt->nh) {
3970				if (!fib6_info_hold_safe(rt))
3971					continue;
3972				rcu_read_unlock();
3973
3974				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3975			}
3976			if (cfg->fc_nh_id)
3977				continue;
3978
3979			nh = rt->fib6_nh;
3980			if (cfg->fc_ifindex &&
3981			    (!nh->fib_nh_dev ||
3982			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
3983				continue;
3984			if (cfg->fc_flags & RTF_GATEWAY &&
3985			    !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
3986				continue;
3987			if (!fib6_info_hold_safe(rt))
3988				continue;
3989			rcu_read_unlock();
3990
3991			/* if gateway was specified only delete the one hop */
3992			if (cfg->fc_flags & RTF_GATEWAY)
3993				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3994
3995			return __ip6_del_rt_siblings(rt, cfg);
3996		}
3997	}
3998	rcu_read_unlock();
3999
4000	return err;
 
4001}
4002
4003static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
 
 
 
 
 
 
4004{
4005	struct netevent_redirect netevent;
4006	struct rt6_info *rt, *nrt = NULL;
4007	struct fib6_result res = {};
4008	struct ndisc_options ndopts;
4009	struct inet6_dev *in6_dev;
4010	struct neighbour *neigh;
4011	struct rd_msg *msg;
4012	int optlen, on_link;
4013	u8 *lladdr;
4014
4015	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
4016	optlen -= sizeof(*msg);
4017
4018	if (optlen < 0) {
4019		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
4020		return;
4021	}
4022
4023	msg = (struct rd_msg *)icmp6_hdr(skb);
 
4024
4025	if (ipv6_addr_is_multicast(&msg->dest)) {
4026		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
4027		return;
 
 
 
 
 
 
4028	}
4029
4030	on_link = 0;
4031	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
4032		on_link = 1;
4033	} else if (ipv6_addr_type(&msg->target) !=
4034		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
4035		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
4036		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4037	}
4038
4039	in6_dev = __in6_dev_get(skb->dev);
4040	if (!in6_dev)
4041		return;
4042	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
4043		return;
4044
4045	/* RFC2461 8.1:
4046	 *	The IP source address of the Redirect MUST be the same as the current
4047	 *	first-hop router for the specified ICMP Destination Address.
4048	 */
 
 
 
 
4049
4050	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
4051		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
4052		return;
4053	}
 
 
 
4054
4055	lladdr = NULL;
4056	if (ndopts.nd_opts_tgt_lladdr) {
4057		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
4058					     skb->dev);
4059		if (!lladdr) {
4060			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
4061			return;
4062		}
4063	}
4064
4065	rt = (struct rt6_info *) dst;
4066	if (rt->rt6i_flags & RTF_REJECT) {
4067		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
4068		return;
4069	}
 
 
 
4070
4071	/* Redirect received -> path was valid.
4072	 * Look, redirects are sent only in response to data packets,
4073	 * so that this nexthop apparently is reachable. --ANK
4074	 */
4075	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
4076
4077	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
4078	if (!neigh)
4079		return;
4080
4081	/*
4082	 *	We have finally decided to accept it.
 
 
 
 
 
 
 
 
 
4083	 */
 
 
 
4084
4085	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
4086		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
4087		     NEIGH_UPDATE_F_OVERRIDE|
4088		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
4089				     NEIGH_UPDATE_F_ISROUTER)),
4090		     NDISC_REDIRECT, &ndopts);
4091
4092	rcu_read_lock();
4093	res.f6i = rcu_dereference(rt->from);
4094	if (!res.f6i)
4095		goto out;
 
 
4096
4097	if (res.f6i->nh) {
4098		struct fib6_nh_match_arg arg = {
4099			.dev = dst->dev,
4100			.gw = &rt->rt6i_gateway,
4101		};
 
 
 
 
 
 
 
 
 
 
 
 
 
4102
4103		nexthop_for_each_fib6_nh(res.f6i->nh,
4104					 fib6_nh_find_match, &arg);
4105
4106		/* fib6_info uses a nexthop that does not have fib6_nh
4107		 * using the dst->dev. Should be impossible
4108		 */
4109		if (!arg.match)
4110			goto out;
4111		res.nh = arg.match;
4112	} else {
4113		res.nh = res.f6i->fib6_nh;
4114	}
4115
4116	res.fib6_flags = res.f6i->fib6_flags;
4117	res.fib6_type = res.f6i->fib6_type;
4118	nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
4119	if (!nrt)
4120		goto out;
4121
4122	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
4123	if (on_link)
4124		nrt->rt6i_flags &= ~RTF_GATEWAY;
4125
4126	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
4127
4128	/* rt6_insert_exception() will take care of duplicated exceptions */
4129	if (rt6_insert_exception(nrt, &res)) {
4130		dst_release_immediate(&nrt->dst);
4131		goto out;
4132	}
4133
4134	netevent.old = &rt->dst;
4135	netevent.new = &nrt->dst;
4136	netevent.daddr = &msg->dest;
4137	netevent.neigh = neigh;
4138	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
4139
4140out:
4141	rcu_read_unlock();
4142	neigh_release(neigh);
4143}
4144
4145#ifdef CONFIG_IPV6_ROUTE_INFO
4146static struct fib6_info *rt6_get_route_info(struct net *net,
4147					   const struct in6_addr *prefix, int prefixlen,
4148					   const struct in6_addr *gwaddr,
4149					   struct net_device *dev)
4150{
4151	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
4152	int ifindex = dev->ifindex;
4153	struct fib6_node *fn;
4154	struct fib6_info *rt = NULL;
4155	struct fib6_table *table;
4156
4157	table = fib6_get_table(net, tb_id);
4158	if (!table)
4159		return NULL;
4160
4161	rcu_read_lock();
4162	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
4163	if (!fn)
4164		goto out;
4165
4166	for_each_fib6_node_rt_rcu(fn) {
4167		/* these routes do not use nexthops */
4168		if (rt->nh)
4169			continue;
4170		if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
4171			continue;
4172		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
4173		    !rt->fib6_nh->fib_nh_gw_family)
4174			continue;
4175		if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
4176			continue;
4177		if (!fib6_info_hold_safe(rt))
4178			continue;
 
4179		break;
4180	}
4181out:
4182	rcu_read_unlock();
4183	return rt;
4184}
4185
4186static struct fib6_info *rt6_add_route_info(struct net *net,
4187					   const struct in6_addr *prefix, int prefixlen,
4188					   const struct in6_addr *gwaddr,
4189					   struct net_device *dev,
4190					   unsigned int pref)
4191{
4192	struct fib6_config cfg = {
 
4193		.fc_metric	= IP6_RT_PRIO_USER,
4194		.fc_ifindex	= dev->ifindex,
4195		.fc_dst_len	= prefixlen,
4196		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
4197				  RTF_UP | RTF_PREF(pref),
4198		.fc_protocol = RTPROT_RA,
4199		.fc_type = RTN_UNICAST,
4200		.fc_nlinfo.portid = 0,
4201		.fc_nlinfo.nlh = NULL,
4202		.fc_nlinfo.nl_net = net,
4203	};
4204
4205	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
4206	cfg.fc_dst = *prefix;
4207	cfg.fc_gateway = *gwaddr;
4208
4209	/* We should treat it as a default route if prefix length is 0. */
4210	if (!prefixlen)
4211		cfg.fc_flags |= RTF_DEFAULT;
4212
4213	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
4214
4215	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
4216}
4217#endif
4218
4219struct fib6_info *rt6_get_dflt_router(struct net *net,
4220				     const struct in6_addr *addr,
4221				     struct net_device *dev)
4222{
4223	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
4224	struct fib6_info *rt;
4225	struct fib6_table *table;
4226
4227	table = fib6_get_table(net, tb_id);
4228	if (!table)
4229		return NULL;
4230
4231	rcu_read_lock();
4232	for_each_fib6_node_rt_rcu(&table->tb6_root) {
4233		struct fib6_nh *nh;
4234
4235		/* RA routes do not use nexthops */
4236		if (rt->nh)
4237			continue;
4238
4239		nh = rt->fib6_nh;
4240		if (dev == nh->fib_nh_dev &&
4241		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
4242		    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
4243			break;
4244	}
4245	if (rt && !fib6_info_hold_safe(rt))
4246		rt = NULL;
4247	rcu_read_unlock();
4248	return rt;
4249}
4250
4251struct fib6_info *rt6_add_dflt_router(struct net *net,
4252				     const struct in6_addr *gwaddr,
4253				     struct net_device *dev,
4254				     unsigned int pref)
4255{
4256	struct fib6_config cfg = {
4257		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
4258		.fc_metric	= IP6_RT_PRIO_USER,
4259		.fc_ifindex	= dev->ifindex,
4260		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
4261				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
4262		.fc_protocol = RTPROT_RA,
4263		.fc_type = RTN_UNICAST,
4264		.fc_nlinfo.portid = 0,
4265		.fc_nlinfo.nlh = NULL,
4266		.fc_nlinfo.nl_net = net,
4267	};
4268
4269	cfg.fc_gateway = *gwaddr;
4270
4271	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
4272		struct fib6_table *table;
4273
4274		table = fib6_get_table(dev_net(dev), cfg.fc_table);
4275		if (table)
4276			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
4277	}
4278
4279	return rt6_get_dflt_router(net, gwaddr, dev);
4280}
4281
4282static void __rt6_purge_dflt_routers(struct net *net,
4283				     struct fib6_table *table)
4284{
4285	struct fib6_info *rt;
4286
4287restart:
4288	rcu_read_lock();
4289	for_each_fib6_node_rt_rcu(&table->tb6_root) {
4290		struct net_device *dev = fib6_info_nh_dev(rt);
4291		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
4292
4293		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
4294		    (!idev || idev->cnf.accept_ra != 2) &&
4295		    fib6_info_hold_safe(rt)) {
4296			rcu_read_unlock();
4297			ip6_del_rt(net, rt, false);
4298			goto restart;
4299		}
4300	}
4301	rcu_read_unlock();
4302
4303	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
4304}
4305
4306void rt6_purge_dflt_routers(struct net *net)
4307{
 
4308	struct fib6_table *table;
4309	struct hlist_head *head;
4310	unsigned int h;
4311
4312	rcu_read_lock();
 
 
 
4313
4314	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
4315		head = &net->ipv6.fib_table_hash[h];
4316		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
4317			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
4318				__rt6_purge_dflt_routers(net, table);
 
 
 
4319		}
4320	}
4321
4322	rcu_read_unlock();
4323}
4324
4325static void rtmsg_to_fib6_config(struct net *net,
4326				 struct in6_rtmsg *rtmsg,
4327				 struct fib6_config *cfg)
4328{
4329	*cfg = (struct fib6_config){
4330		.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
4331			 : RT6_TABLE_MAIN,
4332		.fc_ifindex = rtmsg->rtmsg_ifindex,
4333		.fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
4334		.fc_expires = rtmsg->rtmsg_info,
4335		.fc_dst_len = rtmsg->rtmsg_dst_len,
4336		.fc_src_len = rtmsg->rtmsg_src_len,
4337		.fc_flags = rtmsg->rtmsg_flags,
4338		.fc_type = rtmsg->rtmsg_type,
4339
4340		.fc_nlinfo.nl_net = net,
 
 
 
 
 
 
 
 
4341
4342		.fc_dst = rtmsg->rtmsg_dst,
4343		.fc_src = rtmsg->rtmsg_src,
4344		.fc_gateway = rtmsg->rtmsg_gateway,
4345	};
4346}
4347
4348int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
4349{
4350	struct fib6_config cfg;
 
4351	int err;
4352
4353	if (cmd != SIOCADDRT && cmd != SIOCDELRT)
4354		return -EINVAL;
4355	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
4356		return -EPERM;
 
 
 
 
 
 
 
4357
4358	rtmsg_to_fib6_config(net, rtmsg, &cfg);
 
 
 
 
 
 
 
 
 
 
 
4359
4360	rtnl_lock();
4361	switch (cmd) {
4362	case SIOCADDRT:
4363		err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
4364		break;
4365	case SIOCDELRT:
4366		err = ip6_route_del(&cfg, NULL);
4367		break;
4368	}
4369	rtnl_unlock();
4370	return err;
4371}
4372
4373/*
4374 *	Drop the packet on the floor
4375 */
4376
4377static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
4378{
 
4379	struct dst_entry *dst = skb_dst(skb);
4380	struct net *net = dev_net(dst->dev);
4381	struct inet6_dev *idev;
4382	int type;
4383
4384	if (netif_is_l3_master(skb->dev) &&
4385	    dst->dev == net->loopback_dev)
4386		idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
4387	else
4388		idev = ip6_dst_idev(dst);
4389
4390	switch (ipstats_mib_noroutes) {
4391	case IPSTATS_MIB_INNOROUTES:
4392		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
4393		if (type == IPV6_ADDR_ANY) {
4394			IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 
4395			break;
4396		}
4397		fallthrough;
4398	case IPSTATS_MIB_OUTNOROUTES:
4399		IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
 
4400		break;
4401	}
4402
4403	/* Start over by dropping the dst for l3mdev case */
4404	if (netif_is_l3_master(skb->dev))
4405		skb_dst_drop(skb);
4406
4407	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
4408	kfree_skb(skb);
4409	return 0;
4410}
4411
4412static int ip6_pkt_discard(struct sk_buff *skb)
4413{
4414	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
4415}
4416
4417static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4418{
4419	skb->dev = skb_dst(skb)->dev;
4420	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
4421}
4422
 
 
4423static int ip6_pkt_prohibit(struct sk_buff *skb)
4424{
4425	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
4426}
4427
4428static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4429{
4430	skb->dev = skb_dst(skb)->dev;
4431	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
4432}
4433
 
 
4434/*
4435 *	Allocate a dst for local (unicast / anycast) address.
4436 */
4437
4438struct fib6_info *addrconf_f6i_alloc(struct net *net,
4439				     struct inet6_dev *idev,
4440				     const struct in6_addr *addr,
4441				     bool anycast, gfp_t gfp_flags)
4442{
4443	struct fib6_config cfg = {
4444		.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
4445		.fc_ifindex = idev->dev->ifindex,
4446		.fc_flags = RTF_UP | RTF_NONEXTHOP,
4447		.fc_dst = *addr,
4448		.fc_dst_len = 128,
4449		.fc_protocol = RTPROT_KERNEL,
4450		.fc_nlinfo.nl_net = net,
4451		.fc_ignore_dev_down = true,
4452	};
4453	struct fib6_info *f6i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4454
4455	if (anycast) {
4456		cfg.fc_type = RTN_ANYCAST;
4457		cfg.fc_flags |= RTF_ANYCAST;
4458	} else {
4459		cfg.fc_type = RTN_LOCAL;
4460		cfg.fc_flags |= RTF_LOCAL;
4461	}
 
4462
4463	f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
4464	if (!IS_ERR(f6i))
4465		f6i->dst_nocount = true;
4466	return f6i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4467}
4468
4469/* remove deleted ip from prefsrc entries */
4470struct arg_dev_net_ip {
4471	struct net_device *dev;
4472	struct net *net;
4473	struct in6_addr *addr;
4474};
4475
4476static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
4477{
4478	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
4479	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
4480	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
4481
4482	if (!rt->nh &&
4483	    ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
4484	    rt != net->ipv6.fib6_null_entry &&
4485	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
4486		spin_lock_bh(&rt6_exception_lock);
4487		/* remove prefsrc entry */
4488		rt->fib6_prefsrc.plen = 0;
4489		spin_unlock_bh(&rt6_exception_lock);
4490	}
4491	return 0;
4492}
4493
4494void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
4495{
4496	struct net *net = dev_net(ifp->idev->dev);
4497	struct arg_dev_net_ip adni = {
4498		.dev = ifp->idev->dev,
4499		.net = net,
4500		.addr = &ifp->addr,
4501	};
4502	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
4503}
4504
4505#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT)
 
 
 
4506
4507/* Remove routers and update dst entries when gateway turn into host. */
4508static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
4509{
4510	struct in6_addr *gateway = (struct in6_addr *)arg;
4511	struct fib6_nh *nh;
4512
4513	/* RA routes do not use nexthops */
4514	if (rt->nh)
4515		return 0;
4516
4517	nh = rt->fib6_nh;
4518	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
4519	    nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
4520		return -1;
4521
4522	/* Further clean up cached routes in exception table.
4523	 * This is needed because cached route may have a different
4524	 * gateway than its 'parent' in the case of an ip redirect.
4525	 */
4526	fib6_nh_exceptions_clean_tohost(nh, gateway);
4527
4528	return 0;
4529}
4530
4531void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
4532{
4533	fib6_clean_all(net, fib6_clean_tohost, gateway);
4534}
4535
4536struct arg_netdev_event {
4537	const struct net_device *dev;
4538	union {
4539		unsigned char nh_flags;
4540		unsigned long event;
4541	};
4542};
4543
4544static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
4545{
4546	struct fib6_info *iter;
4547	struct fib6_node *fn;
4548
4549	fn = rcu_dereference_protected(rt->fib6_node,
4550			lockdep_is_held(&rt->fib6_table->tb6_lock));
4551	iter = rcu_dereference_protected(fn->leaf,
4552			lockdep_is_held(&rt->fib6_table->tb6_lock));
4553	while (iter) {
4554		if (iter->fib6_metric == rt->fib6_metric &&
4555		    rt6_qualify_for_ecmp(iter))
4556			return iter;
4557		iter = rcu_dereference_protected(iter->fib6_next,
4558				lockdep_is_held(&rt->fib6_table->tb6_lock));
4559	}
4560
4561	return NULL;
4562}
4563
4564/* only called for fib entries with builtin fib6_nh */
4565static bool rt6_is_dead(const struct fib6_info *rt)
4566{
4567	if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
4568	    (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
4569	     ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
4570		return true;
4571
4572	return false;
4573}
4574
4575static int rt6_multipath_total_weight(const struct fib6_info *rt)
4576{
4577	struct fib6_info *iter;
4578	int total = 0;
4579
4580	if (!rt6_is_dead(rt))
4581		total += rt->fib6_nh->fib_nh_weight;
4582
4583	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
4584		if (!rt6_is_dead(iter))
4585			total += iter->fib6_nh->fib_nh_weight;
4586	}
4587
4588	return total;
4589}
4590
4591static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
4592{
4593	int upper_bound = -1;
4594
4595	if (!rt6_is_dead(rt)) {
4596		*weight += rt->fib6_nh->fib_nh_weight;
4597		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
4598						    total) - 1;
4599	}
4600	atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
4601}
4602
4603static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
4604{
4605	struct fib6_info *iter;
4606	int weight = 0;
4607
4608	rt6_upper_bound_set(rt, &weight, total);
4609
4610	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4611		rt6_upper_bound_set(iter, &weight, total);
4612}
4613
4614void rt6_multipath_rebalance(struct fib6_info *rt)
4615{
4616	struct fib6_info *first;
4617	int total;
4618
4619	/* In case the entire multipath route was marked for flushing,
4620	 * then there is no need to rebalance upon the removal of every
4621	 * sibling route.
4622	 */
4623	if (!rt->fib6_nsiblings || rt->should_flush)
4624		return;
4625
4626	/* During lookup routes are evaluated in order, so we need to
4627	 * make sure upper bounds are assigned from the first sibling
4628	 * onwards.
4629	 */
4630	first = rt6_multipath_first_sibling(rt);
4631	if (WARN_ON_ONCE(!first))
4632		return;
4633
4634	total = rt6_multipath_total_weight(first);
4635	rt6_multipath_upper_bound_set(first, total);
4636}
4637
4638static int fib6_ifup(struct fib6_info *rt, void *p_arg)
4639{
4640	const struct arg_netdev_event *arg = p_arg;
4641	struct net *net = dev_net(arg->dev);
4642
4643	if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
4644	    rt->fib6_nh->fib_nh_dev == arg->dev) {
4645		rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
4646		fib6_update_sernum_upto_root(net, rt);
4647		rt6_multipath_rebalance(rt);
4648	}
4649
4650	return 0;
4651}
4652
4653void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
4654{
4655	struct arg_netdev_event arg = {
4656		.dev = dev,
4657		{
4658			.nh_flags = nh_flags,
4659		},
4660	};
4661
4662	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4663		arg.nh_flags |= RTNH_F_LINKDOWN;
4664
4665	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4666}
4667
4668/* only called for fib entries with inline fib6_nh */
4669static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
4670				   const struct net_device *dev)
4671{
4672	struct fib6_info *iter;
4673
4674	if (rt->fib6_nh->fib_nh_dev == dev)
4675		return true;
4676	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4677		if (iter->fib6_nh->fib_nh_dev == dev)
4678			return true;
4679
4680	return false;
4681}
4682
4683static void rt6_multipath_flush(struct fib6_info *rt)
4684{
4685	struct fib6_info *iter;
4686
4687	rt->should_flush = 1;
4688	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4689		iter->should_flush = 1;
4690}
4691
4692static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
4693					     const struct net_device *down_dev)
4694{
4695	struct fib6_info *iter;
4696	unsigned int dead = 0;
4697
4698	if (rt->fib6_nh->fib_nh_dev == down_dev ||
4699	    rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4700		dead++;
4701	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4702		if (iter->fib6_nh->fib_nh_dev == down_dev ||
4703		    iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4704			dead++;
4705
4706	return dead;
4707}
4708
4709static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
4710				       const struct net_device *dev,
4711				       unsigned char nh_flags)
4712{
4713	struct fib6_info *iter;
4714
4715	if (rt->fib6_nh->fib_nh_dev == dev)
4716		rt->fib6_nh->fib_nh_flags |= nh_flags;
4717	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4718		if (iter->fib6_nh->fib_nh_dev == dev)
4719			iter->fib6_nh->fib_nh_flags |= nh_flags;
4720}
4721
4722/* called with write lock held for table with rt */
4723static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
4724{
4725	const struct arg_netdev_event *arg = p_arg;
4726	const struct net_device *dev = arg->dev;
4727	struct net *net = dev_net(dev);
4728
4729	if (rt == net->ipv6.fib6_null_entry || rt->nh)
4730		return 0;
4731
4732	switch (arg->event) {
4733	case NETDEV_UNREGISTER:
4734		return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4735	case NETDEV_DOWN:
4736		if (rt->should_flush)
4737			return -1;
4738		if (!rt->fib6_nsiblings)
4739			return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4740		if (rt6_multipath_uses_dev(rt, dev)) {
4741			unsigned int count;
4742
4743			count = rt6_multipath_dead_count(rt, dev);
4744			if (rt->fib6_nsiblings + 1 == count) {
4745				rt6_multipath_flush(rt);
4746				return -1;
4747			}
4748			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4749						   RTNH_F_LINKDOWN);
4750			fib6_update_sernum(net, rt);
4751			rt6_multipath_rebalance(rt);
4752		}
4753		return -2;
4754	case NETDEV_CHANGE:
4755		if (rt->fib6_nh->fib_nh_dev != dev ||
4756		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4757			break;
4758		rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
4759		rt6_multipath_rebalance(rt);
4760		break;
4761	}
4762
4763	return 0;
4764}
4765
4766void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4767{
4768	struct arg_netdev_event arg = {
4769		.dev = dev,
4770		{
4771			.event = event,
4772		},
4773	};
4774	struct net *net = dev_net(dev);
4775
4776	if (net->ipv6.sysctl.skip_notify_on_dev_down)
4777		fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4778	else
4779		fib6_clean_all(net, fib6_ifdown, &arg);
4780}
4781
4782void rt6_disable_ip(struct net_device *dev, unsigned long event)
4783{
4784	rt6_sync_down_dev(dev, event);
4785	rt6_uncached_list_flush_dev(dev_net(dev), dev);
4786	neigh_ifdown(&nd_tbl, dev);
4787}
4788
4789struct rt6_mtu_change_arg {
4790	struct net_device *dev;
4791	unsigned int mtu;
4792	struct fib6_info *f6i;
4793};
4794
4795static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
4796{
4797	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
4798	struct fib6_info *f6i = arg->f6i;
4799
4800	/* For administrative MTU increase, there is no way to discover
4801	 * IPv6 PMTU increase, so PMTU increase should be updated here.
4802	 * Since RFC 1981 doesn't include administrative MTU increase
4803	 * update PMTU increase is a MUST. (i.e. jumbo frame)
4804	 */
4805	if (nh->fib_nh_dev == arg->dev) {
4806		struct inet6_dev *idev = __in6_dev_get(arg->dev);
4807		u32 mtu = f6i->fib6_pmtu;
4808
4809		if (mtu >= arg->mtu ||
4810		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4811			fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
4812
4813		spin_lock_bh(&rt6_exception_lock);
4814		rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
4815		spin_unlock_bh(&rt6_exception_lock);
4816	}
4817
4818	return 0;
4819}
4820
4821static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
4822{
4823	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4824	struct inet6_dev *idev;
4825
4826	/* In IPv6 pmtu discovery is not optional,
4827	   so that RTAX_MTU lock cannot disable it.
4828	   We still use this lock to block changes
4829	   caused by addrconf/ndisc.
4830	*/
4831
4832	idev = __in6_dev_get(arg->dev);
4833	if (!idev)
4834		return 0;
4835
4836	if (fib6_metric_locked(f6i, RTAX_MTU))
4837		return 0;
4838
4839	arg->f6i = f6i;
4840	if (f6i->nh) {
4841		/* fib6_nh_mtu_change only returns 0, so this is safe */
4842		return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
4843						arg);
 
 
 
 
 
 
 
 
 
 
 
 
4844	}
4845
4846	return fib6_nh_mtu_change(f6i->fib6_nh, arg);
4847}
4848
4849void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4850{
4851	struct rt6_mtu_change_arg arg = {
4852		.dev = dev,
4853		.mtu = mtu,
4854	};
4855
4856	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4857}
4858
4859static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4860	[RTA_UNSPEC]		= { .strict_start_type = RTA_DPORT + 1 },
4861	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4862	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
4863	[RTA_OIF]               = { .type = NLA_U32 },
4864	[RTA_IIF]		= { .type = NLA_U32 },
4865	[RTA_PRIORITY]          = { .type = NLA_U32 },
4866	[RTA_METRICS]           = { .type = NLA_NESTED },
4867	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4868	[RTA_PREF]              = { .type = NLA_U8 },
4869	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
4870	[RTA_ENCAP]		= { .type = NLA_NESTED },
4871	[RTA_EXPIRES]		= { .type = NLA_U32 },
4872	[RTA_UID]		= { .type = NLA_U32 },
4873	[RTA_MARK]		= { .type = NLA_U32 },
4874	[RTA_TABLE]		= { .type = NLA_U32 },
4875	[RTA_IP_PROTO]		= { .type = NLA_U8 },
4876	[RTA_SPORT]		= { .type = NLA_U16 },
4877	[RTA_DPORT]		= { .type = NLA_U16 },
4878	[RTA_NH_ID]		= { .type = NLA_U32 },
4879};
4880
4881static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4882			      struct fib6_config *cfg,
4883			      struct netlink_ext_ack *extack)
4884{
4885	struct rtmsg *rtm;
4886	struct nlattr *tb[RTA_MAX+1];
4887	unsigned int pref;
4888	int err;
4889
4890	err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
4891				     rtm_ipv6_policy, extack);
4892	if (err < 0)
4893		goto errout;
4894
4895	err = -EINVAL;
4896	rtm = nlmsg_data(nlh);
 
4897
4898	*cfg = (struct fib6_config){
4899		.fc_table = rtm->rtm_table,
4900		.fc_dst_len = rtm->rtm_dst_len,
4901		.fc_src_len = rtm->rtm_src_len,
4902		.fc_flags = RTF_UP,
4903		.fc_protocol = rtm->rtm_protocol,
4904		.fc_type = rtm->rtm_type,
4905
4906		.fc_nlinfo.portid = NETLINK_CB(skb).portid,
4907		.fc_nlinfo.nlh = nlh,
4908		.fc_nlinfo.nl_net = sock_net(skb->sk),
4909	};
4910
4911	if (rtm->rtm_type == RTN_UNREACHABLE ||
4912	    rtm->rtm_type == RTN_BLACKHOLE ||
4913	    rtm->rtm_type == RTN_PROHIBIT ||
4914	    rtm->rtm_type == RTN_THROW)
4915		cfg->fc_flags |= RTF_REJECT;
4916
4917	if (rtm->rtm_type == RTN_LOCAL)
4918		cfg->fc_flags |= RTF_LOCAL;
4919
4920	if (rtm->rtm_flags & RTM_F_CLONED)
4921		cfg->fc_flags |= RTF_CACHE;
4922
4923	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4924
4925	if (tb[RTA_NH_ID]) {
4926		if (tb[RTA_GATEWAY]   || tb[RTA_OIF] ||
4927		    tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
4928			NL_SET_ERR_MSG(extack,
4929				       "Nexthop specification and nexthop id are mutually exclusive");
4930			goto errout;
4931		}
4932		cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
4933	}
4934
4935	if (tb[RTA_GATEWAY]) {
4936		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4937		cfg->fc_flags |= RTF_GATEWAY;
4938	}
4939	if (tb[RTA_VIA]) {
4940		NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4941		goto errout;
4942	}
4943
4944	if (tb[RTA_DST]) {
4945		int plen = (rtm->rtm_dst_len + 7) >> 3;
4946
4947		if (nla_len(tb[RTA_DST]) < plen)
4948			goto errout;
4949
4950		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4951	}
4952
4953	if (tb[RTA_SRC]) {
4954		int plen = (rtm->rtm_src_len + 7) >> 3;
4955
4956		if (nla_len(tb[RTA_SRC]) < plen)
4957			goto errout;
4958
4959		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4960	}
4961
4962	if (tb[RTA_PREFSRC])
4963		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4964
4965	if (tb[RTA_OIF])
4966		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4967
4968	if (tb[RTA_PRIORITY])
4969		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4970
4971	if (tb[RTA_METRICS]) {
4972		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4973		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4974	}
4975
4976	if (tb[RTA_TABLE])
4977		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4978
4979	if (tb[RTA_MULTIPATH]) {
4980		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4981		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4982
4983		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4984						     cfg->fc_mp_len, extack);
4985		if (err < 0)
4986			goto errout;
4987	}
4988
4989	if (tb[RTA_PREF]) {
4990		pref = nla_get_u8(tb[RTA_PREF]);
4991		if (pref != ICMPV6_ROUTER_PREF_LOW &&
4992		    pref != ICMPV6_ROUTER_PREF_HIGH)
4993			pref = ICMPV6_ROUTER_PREF_MEDIUM;
4994		cfg->fc_flags |= RTF_PREF(pref);
4995	}
4996
4997	if (tb[RTA_ENCAP])
4998		cfg->fc_encap = tb[RTA_ENCAP];
4999
5000	if (tb[RTA_ENCAP_TYPE]) {
5001		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
5002
5003		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
5004		if (err < 0)
5005			goto errout;
5006	}
5007
5008	if (tb[RTA_EXPIRES]) {
5009		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
5010
5011		if (addrconf_finite_timeout(timeout)) {
5012			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
5013			cfg->fc_flags |= RTF_EXPIRES;
5014		}
5015	}
5016
5017	err = 0;
5018errout:
5019	return err;
5020}
5021
5022struct rt6_nh {
5023	struct fib6_info *fib6_info;
5024	struct fib6_config r_cfg;
5025	struct list_head next;
5026};
5027
5028static int ip6_route_info_append(struct net *net,
5029				 struct list_head *rt6_nh_list,
5030				 struct fib6_info *rt,
5031				 struct fib6_config *r_cfg)
5032{
5033	struct rt6_nh *nh;
5034	int err = -EEXIST;
5035
5036	list_for_each_entry(nh, rt6_nh_list, next) {
5037		/* check if fib6_info already exists */
5038		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
5039			return err;
5040	}
5041
5042	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
5043	if (!nh)
5044		return -ENOMEM;
5045	nh->fib6_info = rt;
5046	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
5047	list_add_tail(&nh->next, rt6_nh_list);
5048
5049	return 0;
5050}
5051
5052static void ip6_route_mpath_notify(struct fib6_info *rt,
5053				   struct fib6_info *rt_last,
5054				   struct nl_info *info,
5055				   __u16 nlflags)
5056{
5057	/* if this is an APPEND route, then rt points to the first route
5058	 * inserted and rt_last points to last route inserted. Userspace
5059	 * wants a consistent dump of the route which starts at the first
5060	 * nexthop. Since sibling routes are always added at the end of
5061	 * the list, find the first sibling of the last route appended
5062	 */
5063	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
5064		rt = list_first_entry(&rt_last->fib6_siblings,
5065				      struct fib6_info,
5066				      fib6_siblings);
5067	}
5068
5069	if (rt)
5070		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
5071}
5072
5073static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
5074{
5075	bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
5076	bool should_notify = false;
5077	struct fib6_info *leaf;
5078	struct fib6_node *fn;
5079
5080	rcu_read_lock();
5081	fn = rcu_dereference(rt->fib6_node);
5082	if (!fn)
5083		goto out;
5084
5085	leaf = rcu_dereference(fn->leaf);
5086	if (!leaf)
5087		goto out;
5088
5089	if (rt == leaf ||
5090	    (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric &&
5091	     rt6_qualify_for_ecmp(leaf)))
5092		should_notify = true;
5093out:
5094	rcu_read_unlock();
5095
5096	return should_notify;
5097}
5098
5099static int ip6_route_multipath_add(struct fib6_config *cfg,
5100				   struct netlink_ext_ack *extack)
5101{
5102	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
5103	struct nl_info *info = &cfg->fc_nlinfo;
5104	struct fib6_config r_cfg;
5105	struct rtnexthop *rtnh;
5106	struct fib6_info *rt;
5107	struct rt6_nh *err_nh;
5108	struct rt6_nh *nh, *nh_safe;
5109	__u16 nlflags;
5110	int remaining;
5111	int attrlen;
5112	int err = 1;
5113	int nhn = 0;
5114	int replace = (cfg->fc_nlinfo.nlh &&
5115		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
5116	LIST_HEAD(rt6_nh_list);
5117
5118	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
5119	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
5120		nlflags |= NLM_F_APPEND;
5121
5122	remaining = cfg->fc_mp_len;
5123	rtnh = (struct rtnexthop *)cfg->fc_mp;
5124
5125	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
5126	 * fib6_info structs per nexthop
5127	 */
5128	while (rtnh_ok(rtnh, remaining)) {
5129		memcpy(&r_cfg, cfg, sizeof(*cfg));
5130		if (rtnh->rtnh_ifindex)
5131			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5132
5133		attrlen = rtnh_attrlen(rtnh);
5134		if (attrlen > 0) {
5135			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5136
5137			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5138			if (nla) {
5139				r_cfg.fc_gateway = nla_get_in6_addr(nla);
5140				r_cfg.fc_flags |= RTF_GATEWAY;
5141			}
5142			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
5143			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
5144			if (nla)
5145				r_cfg.fc_encap_type = nla_get_u16(nla);
5146		}
5147
5148		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
5149		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
5150		if (IS_ERR(rt)) {
5151			err = PTR_ERR(rt);
5152			rt = NULL;
5153			goto cleanup;
5154		}
5155		if (!rt6_qualify_for_ecmp(rt)) {
5156			err = -EINVAL;
5157			NL_SET_ERR_MSG(extack,
5158				       "Device only routes can not be added for IPv6 using the multipath API.");
5159			fib6_info_release(rt);
5160			goto cleanup;
5161		}
5162
5163		rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
5164
5165		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
5166					    rt, &r_cfg);
5167		if (err) {
5168			fib6_info_release(rt);
5169			goto cleanup;
5170		}
5171
5172		rtnh = rtnh_next(rtnh, &remaining);
5173	}
5174
5175	if (list_empty(&rt6_nh_list)) {
5176		NL_SET_ERR_MSG(extack,
5177			       "Invalid nexthop configuration - no valid nexthops");
5178		return -EINVAL;
5179	}
5180
5181	/* for add and replace send one notification with all nexthops.
5182	 * Skip the notification in fib6_add_rt2node and send one with
5183	 * the full route when done
5184	 */
5185	info->skip_notify = 1;
5186
5187	/* For add and replace, send one notification with all nexthops. For
5188	 * append, send one notification with all appended nexthops.
5189	 */
5190	info->skip_notify_kernel = 1;
5191
5192	err_nh = NULL;
5193	list_for_each_entry(nh, &rt6_nh_list, next) {
5194		err = __ip6_ins_rt(nh->fib6_info, info, extack);
5195		fib6_info_release(nh->fib6_info);
5196
5197		if (!err) {
5198			/* save reference to last route successfully inserted */
5199			rt_last = nh->fib6_info;
5200
5201			/* save reference to first route for notification */
5202			if (!rt_notif)
5203				rt_notif = nh->fib6_info;
5204		}
5205
5206		/* nh->fib6_info is used or freed at this point, reset to NULL*/
5207		nh->fib6_info = NULL;
5208		if (err) {
5209			if (replace && nhn)
5210				NL_SET_ERR_MSG_MOD(extack,
5211						   "multipath route replace failed (check consistency of installed routes)");
5212			err_nh = nh;
5213			goto add_errout;
5214		}
5215
5216		/* Because each route is added like a single route we remove
5217		 * these flags after the first nexthop: if there is a collision,
5218		 * we have already failed to add the first nexthop:
5219		 * fib6_add_rt2node() has rejected it; when replacing, old
5220		 * nexthops have been replaced by first new, the rest should
5221		 * be added to it.
5222		 */
5223		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
5224						     NLM_F_REPLACE);
5225		cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
5226		nhn++;
5227	}
5228
5229	/* An in-kernel notification should only be sent in case the new
5230	 * multipath route is added as the first route in the node, or if
5231	 * it was appended to it. We pass 'rt_notif' since it is the first
5232	 * sibling and might allow us to skip some checks in the replace case.
5233	 */
5234	if (ip6_route_mpath_should_notify(rt_notif)) {
5235		enum fib_event_type fib_event;
5236
5237		if (rt_notif->fib6_nsiblings != nhn - 1)
5238			fib_event = FIB_EVENT_ENTRY_APPEND;
5239		else
5240			fib_event = FIB_EVENT_ENTRY_REPLACE;
5241
5242		err = call_fib6_multipath_entry_notifiers(info->nl_net,
5243							  fib_event, rt_notif,
5244							  nhn - 1, extack);
5245		if (err) {
5246			/* Delete all the siblings that were just added */
5247			err_nh = NULL;
5248			goto add_errout;
5249		}
5250	}
5251
5252	/* success ... tell user about new route */
5253	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5254	goto cleanup;
5255
5256add_errout:
5257	/* send notification for routes that were added so that
5258	 * the delete notifications sent by ip6_route_del are
5259	 * coherent
5260	 */
5261	if (rt_notif)
5262		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5263
5264	/* Delete routes that were already added */
5265	list_for_each_entry(nh, &rt6_nh_list, next) {
5266		if (err_nh == nh)
5267			break;
5268		ip6_route_del(&nh->r_cfg, extack);
5269	}
5270
5271cleanup:
5272	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
5273		if (nh->fib6_info)
5274			fib6_info_release(nh->fib6_info);
5275		list_del(&nh->next);
5276		kfree(nh);
5277	}
5278
5279	return err;
5280}
5281
5282static int ip6_route_multipath_del(struct fib6_config *cfg,
5283				   struct netlink_ext_ack *extack)
5284{
5285	struct fib6_config r_cfg;
5286	struct rtnexthop *rtnh;
5287	int remaining;
5288	int attrlen;
5289	int err = 1, last_err = 0;
5290
5291	remaining = cfg->fc_mp_len;
5292	rtnh = (struct rtnexthop *)cfg->fc_mp;
5293
5294	/* Parse a Multipath Entry */
5295	while (rtnh_ok(rtnh, remaining)) {
5296		memcpy(&r_cfg, cfg, sizeof(*cfg));
5297		if (rtnh->rtnh_ifindex)
5298			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5299
5300		attrlen = rtnh_attrlen(rtnh);
5301		if (attrlen > 0) {
5302			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5303
5304			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5305			if (nla) {
5306				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
5307				r_cfg.fc_flags |= RTF_GATEWAY;
5308			}
5309		}
5310		err = ip6_route_del(&r_cfg, extack);
5311		if (err)
5312			last_err = err;
5313
5314		rtnh = rtnh_next(rtnh, &remaining);
5315	}
5316
5317	return last_err;
5318}
5319
5320static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5321			      struct netlink_ext_ack *extack)
5322{
5323	struct fib6_config cfg;
5324	int err;
5325
5326	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5327	if (err < 0)
5328		return err;
5329
5330	if (cfg.fc_nh_id &&
5331	    !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
5332		NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
5333		return -EINVAL;
5334	}
5335
5336	if (cfg.fc_mp)
5337		return ip6_route_multipath_del(&cfg, extack);
5338	else {
5339		cfg.fc_delete_all_nh = 1;
5340		return ip6_route_del(&cfg, extack);
5341	}
5342}
5343
5344static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5345			      struct netlink_ext_ack *extack)
5346{
5347	struct fib6_config cfg;
5348	int err;
5349
5350	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5351	if (err < 0)
5352		return err;
5353
5354	if (cfg.fc_metric == 0)
5355		cfg.fc_metric = IP6_RT_PRIO_USER;
5356
5357	if (cfg.fc_mp)
5358		return ip6_route_multipath_add(&cfg, extack);
5359	else
5360		return ip6_route_add(&cfg, GFP_KERNEL, extack);
5361}
5362
5363/* add the overhead of this fib6_nh to nexthop_len */
5364static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
5365{
5366	int *nexthop_len = arg;
5367
5368	*nexthop_len += nla_total_size(0)	 /* RTA_MULTIPATH */
5369		     + NLA_ALIGN(sizeof(struct rtnexthop))
5370		     + nla_total_size(16); /* RTA_GATEWAY */
5371
5372	if (nh->fib_nh_lws) {
5373		/* RTA_ENCAP_TYPE */
5374		*nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5375		/* RTA_ENCAP */
5376		*nexthop_len += nla_total_size(2);
5377	}
5378
5379	return 0;
5380}
5381
5382static size_t rt6_nlmsg_size(struct fib6_info *f6i)
5383{
5384	int nexthop_len;
5385
5386	if (f6i->nh) {
5387		nexthop_len = nla_total_size(4); /* RTA_NH_ID */
5388		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
5389					 &nexthop_len);
5390	} else {
5391		struct fib6_nh *nh = f6i->fib6_nh;
5392
5393		nexthop_len = 0;
5394		if (f6i->fib6_nsiblings) {
5395			nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
5396				    + NLA_ALIGN(sizeof(struct rtnexthop))
5397				    + nla_total_size(16) /* RTA_GATEWAY */
5398				    + lwtunnel_get_encap_size(nh->fib_nh_lws);
5399
5400			nexthop_len *= f6i->fib6_nsiblings;
5401		}
5402		nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5403	}
5404
5405	return NLMSG_ALIGN(sizeof(struct rtmsg))
5406	       + nla_total_size(16) /* RTA_SRC */
5407	       + nla_total_size(16) /* RTA_DST */
5408	       + nla_total_size(16) /* RTA_GATEWAY */
5409	       + nla_total_size(16) /* RTA_PREFSRC */
5410	       + nla_total_size(4) /* RTA_TABLE */
5411	       + nla_total_size(4) /* RTA_IIF */
5412	       + nla_total_size(4) /* RTA_OIF */
5413	       + nla_total_size(4) /* RTA_PRIORITY */
5414	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
5415	       + nla_total_size(sizeof(struct rta_cacheinfo))
5416	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
5417	       + nla_total_size(1) /* RTA_PREF */
5418	       + nexthop_len;
5419}
5420
5421static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
5422				 unsigned char *flags)
 
 
 
5423{
5424	if (nexthop_is_multipath(nh)) {
5425		struct nlattr *mp;
 
 
 
5426
5427		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5428		if (!mp)
5429			goto nla_put_failure;
5430
5431		if (nexthop_mpath_fill_node(skb, nh, AF_INET6))
5432			goto nla_put_failure;
5433
5434		nla_nest_end(skb, mp);
5435	} else {
5436		struct fib6_nh *fib6_nh;
5437
5438		fib6_nh = nexthop_fib6_nh(nh);
5439		if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6,
5440				     flags, false) < 0)
5441			goto nla_put_failure;
5442	}
5443
5444	return 0;
5445
5446nla_put_failure:
5447	return -EMSGSIZE;
5448}
5449
5450static int rt6_fill_node(struct net *net, struct sk_buff *skb,
5451			 struct fib6_info *rt, struct dst_entry *dst,
5452			 struct in6_addr *dest, struct in6_addr *src,
5453			 int iif, int type, u32 portid, u32 seq,
5454			 unsigned int flags)
5455{
5456	struct rt6_info *rt6 = (struct rt6_info *)dst;
5457	struct rt6key *rt6_dst, *rt6_src;
5458	u32 *pmetrics, table, rt6_flags;
5459	unsigned char nh_flags = 0;
5460	struct nlmsghdr *nlh;
5461	struct rtmsg *rtm;
5462	long expires = 0;
5463
5464	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
5465	if (!nlh)
5466		return -EMSGSIZE;
5467
5468	if (rt6) {
5469		rt6_dst = &rt6->rt6i_dst;
5470		rt6_src = &rt6->rt6i_src;
5471		rt6_flags = rt6->rt6i_flags;
5472	} else {
5473		rt6_dst = &rt->fib6_dst;
5474		rt6_src = &rt->fib6_src;
5475		rt6_flags = rt->fib6_flags;
5476	}
5477
5478	rtm = nlmsg_data(nlh);
5479	rtm->rtm_family = AF_INET6;
5480	rtm->rtm_dst_len = rt6_dst->plen;
5481	rtm->rtm_src_len = rt6_src->plen;
5482	rtm->rtm_tos = 0;
5483	if (rt->fib6_table)
5484		table = rt->fib6_table->tb6_id;
5485	else
5486		table = RT6_TABLE_UNSPEC;
5487	rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
5488	if (nla_put_u32(skb, RTA_TABLE, table))
5489		goto nla_put_failure;
5490
5491	rtm->rtm_type = rt->fib6_type;
 
 
 
 
 
5492	rtm->rtm_flags = 0;
5493	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
5494	rtm->rtm_protocol = rt->fib6_protocol;
 
 
 
 
 
 
5495
5496	if (rt6_flags & RTF_CACHE)
5497		rtm->rtm_flags |= RTM_F_CLONED;
5498
5499	if (dest) {
5500		if (nla_put_in6_addr(skb, RTA_DST, dest))
5501			goto nla_put_failure;
5502		rtm->rtm_dst_len = 128;
5503	} else if (rtm->rtm_dst_len)
5504		if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
5505			goto nla_put_failure;
5506#ifdef CONFIG_IPV6_SUBTREES
5507	if (src) {
5508		if (nla_put_in6_addr(skb, RTA_SRC, src))
5509			goto nla_put_failure;
5510		rtm->rtm_src_len = 128;
5511	} else if (rtm->rtm_src_len &&
5512		   nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
5513		goto nla_put_failure;
5514#endif
5515	if (iif) {
5516#ifdef CONFIG_IPV6_MROUTE
5517		if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
5518			int err = ip6mr_get_route(net, skb, rtm, portid);
5519
5520			if (err == 0)
5521				return 0;
5522			if (err < 0)
5523				goto nla_put_failure;
 
 
 
 
 
5524		} else
5525#endif
5526			if (nla_put_u32(skb, RTA_IIF, iif))
5527				goto nla_put_failure;
5528	} else if (dest) {
5529		struct in6_addr saddr_buf;
5530		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
5531		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5532			goto nla_put_failure;
5533	}
5534
5535	if (rt->fib6_prefsrc.plen) {
5536		struct in6_addr saddr_buf;
5537		saddr_buf = rt->fib6_prefsrc.addr;
5538		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5539			goto nla_put_failure;
5540	}
5541
5542	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
5543	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
5544		goto nla_put_failure;
5545
5546	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
5547		goto nla_put_failure;
 
 
 
5548
5549	/* For multipath routes, walk the siblings list and add
5550	 * each as a nexthop within RTA_MULTIPATH.
5551	 */
5552	if (rt6) {
5553		if (rt6_flags & RTF_GATEWAY &&
5554		    nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
5555			goto nla_put_failure;
5556
5557		if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
5558			goto nla_put_failure;
5559	} else if (rt->fib6_nsiblings) {
5560		struct fib6_info *sibling, *next_sibling;
5561		struct nlattr *mp;
5562
5563		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5564		if (!mp)
5565			goto nla_put_failure;
5566
5567		if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
5568				    rt->fib6_nh->fib_nh_weight, AF_INET6) < 0)
5569			goto nla_put_failure;
5570
5571		list_for_each_entry_safe(sibling, next_sibling,
5572					 &rt->fib6_siblings, fib6_siblings) {
5573			if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
5574					    sibling->fib6_nh->fib_nh_weight,
5575					    AF_INET6) < 0)
5576				goto nla_put_failure;
5577		}
5578
5579		nla_nest_end(skb, mp);
5580	} else if (rt->nh) {
5581		if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
5582			goto nla_put_failure;
5583
5584		if (nexthop_is_blackhole(rt->nh))
5585			rtm->rtm_type = RTN_BLACKHOLE;
5586
5587		if (net->ipv4.sysctl_nexthop_compat_mode &&
5588		    rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
5589			goto nla_put_failure;
5590
5591		rtm->rtm_flags |= nh_flags;
5592	} else {
5593		if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6,
5594				     &nh_flags, false) < 0)
5595			goto nla_put_failure;
5596
5597		rtm->rtm_flags |= nh_flags;
5598	}
5599
5600	if (rt6_flags & RTF_EXPIRES) {
5601		expires = dst ? dst->expires : rt->expires;
5602		expires -= jiffies;
5603	}
5604
5605	if (!dst) {
5606		if (rt->offload)
5607			rtm->rtm_flags |= RTM_F_OFFLOAD;
5608		if (rt->trap)
5609			rtm->rtm_flags |= RTM_F_TRAP;
5610	}
5611
5612	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
5613		goto nla_put_failure;
5614
5615	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
5616		goto nla_put_failure;
5617
5618
5619	nlmsg_end(skb, nlh);
5620	return 0;
5621
5622nla_put_failure:
5623	nlmsg_cancel(skb, nlh);
5624	return -EMSGSIZE;
5625}
5626
5627static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
5628{
5629	const struct net_device *dev = arg;
5630
5631	if (nh->fib_nh_dev == dev)
5632		return 1;
5633
5634	return 0;
5635}
5636
5637static bool fib6_info_uses_dev(const struct fib6_info *f6i,
5638			       const struct net_device *dev)
5639{
5640	if (f6i->nh) {
5641		struct net_device *_dev = (struct net_device *)dev;
5642
5643		return !!nexthop_for_each_fib6_nh(f6i->nh,
5644						  fib6_info_nh_uses_dev,
5645						  _dev);
5646	}
5647
5648	if (f6i->fib6_nh->fib_nh_dev == dev)
5649		return true;
5650
5651	if (f6i->fib6_nsiblings) {
5652		struct fib6_info *sibling, *next_sibling;
5653
5654		list_for_each_entry_safe(sibling, next_sibling,
5655					 &f6i->fib6_siblings, fib6_siblings) {
5656			if (sibling->fib6_nh->fib_nh_dev == dev)
5657				return true;
5658		}
5659	}
5660
5661	return false;
5662}
5663
5664struct fib6_nh_exception_dump_walker {
5665	struct rt6_rtnl_dump_arg *dump;
5666	struct fib6_info *rt;
5667	unsigned int flags;
5668	unsigned int skip;
5669	unsigned int count;
5670};
5671
5672static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
5673{
5674	struct fib6_nh_exception_dump_walker *w = arg;
5675	struct rt6_rtnl_dump_arg *dump = w->dump;
5676	struct rt6_exception_bucket *bucket;
5677	struct rt6_exception *rt6_ex;
5678	int i, err;
5679
5680	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
5681	if (!bucket)
5682		return 0;
5683
5684	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
5685		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
5686			if (w->skip) {
5687				w->skip--;
5688				continue;
5689			}
5690
5691			/* Expiration of entries doesn't bump sernum, insertion
5692			 * does. Removal is triggered by insertion, so we can
5693			 * rely on the fact that if entries change between two
5694			 * partial dumps, this node is scanned again completely,
5695			 * see rt6_insert_exception() and fib6_dump_table().
5696			 *
5697			 * Count expired entries we go through as handled
5698			 * entries that we'll skip next time, in case of partial
5699			 * node dump. Otherwise, if entries expire meanwhile,
5700			 * we'll skip the wrong amount.
5701			 */
5702			if (rt6_check_expired(rt6_ex->rt6i)) {
5703				w->count++;
5704				continue;
5705			}
5706
5707			err = rt6_fill_node(dump->net, dump->skb, w->rt,
5708					    &rt6_ex->rt6i->dst, NULL, NULL, 0,
5709					    RTM_NEWROUTE,
5710					    NETLINK_CB(dump->cb->skb).portid,
5711					    dump->cb->nlh->nlmsg_seq, w->flags);
5712			if (err)
5713				return err;
5714
5715			w->count++;
5716		}
5717		bucket++;
5718	}
5719
5720	return 0;
5721}
5722
5723/* Return -1 if done with node, number of handled routes on partial dump */
5724int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
5725{
5726	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
5727	struct fib_dump_filter *filter = &arg->filter;
5728	unsigned int flags = NLM_F_MULTI;
5729	struct net *net = arg->net;
5730	int count = 0;
5731
5732	if (rt == net->ipv6.fib6_null_entry)
5733		return -1;
5734
5735	if ((filter->flags & RTM_F_PREFIX) &&
5736	    !(rt->fib6_flags & RTF_PREFIX_RT)) {
5737		/* success since this is not a prefix route */
5738		return -1;
5739	}
5740	if (filter->filter_set &&
5741	    ((filter->rt_type  && rt->fib6_type != filter->rt_type) ||
5742	     (filter->dev      && !fib6_info_uses_dev(rt, filter->dev)) ||
5743	     (filter->protocol && rt->fib6_protocol != filter->protocol))) {
5744		return -1;
5745	}
5746
5747	if (filter->filter_set ||
5748	    !filter->dump_routes || !filter->dump_exceptions) {
5749		flags |= NLM_F_DUMP_FILTERED;
5750	}
5751
5752	if (filter->dump_routes) {
5753		if (skip) {
5754			skip--;
5755		} else {
5756			if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
5757					  0, RTM_NEWROUTE,
5758					  NETLINK_CB(arg->cb->skb).portid,
5759					  arg->cb->nlh->nlmsg_seq, flags)) {
5760				return 0;
5761			}
5762			count++;
5763		}
5764	}
5765
5766	if (filter->dump_exceptions) {
5767		struct fib6_nh_exception_dump_walker w = { .dump = arg,
5768							   .rt = rt,
5769							   .flags = flags,
5770							   .skip = skip,
5771							   .count = 0 };
5772		int err;
5773
5774		rcu_read_lock();
5775		if (rt->nh) {
5776			err = nexthop_for_each_fib6_nh(rt->nh,
5777						       rt6_nh_dump_exceptions,
5778						       &w);
5779		} else {
5780			err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
5781		}
5782		rcu_read_unlock();
5783
5784		if (err)
5785			return count += w.count;
5786	}
5787
5788	return -1;
5789}
5790
5791static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
5792					const struct nlmsghdr *nlh,
5793					struct nlattr **tb,
5794					struct netlink_ext_ack *extack)
5795{
5796	struct rtmsg *rtm;
5797	int i, err;
5798
5799	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
5800		NL_SET_ERR_MSG_MOD(extack,
5801				   "Invalid header for get route request");
5802		return -EINVAL;
5803	}
5804
5805	if (!netlink_strict_get_check(skb))
5806		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
5807					      rtm_ipv6_policy, extack);
5808
5809	rtm = nlmsg_data(nlh);
5810	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
5811	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
5812	    rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
5813	    rtm->rtm_type) {
5814		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
5815		return -EINVAL;
5816	}
5817	if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
5818		NL_SET_ERR_MSG_MOD(extack,
5819				   "Invalid flags for get route request");
5820		return -EINVAL;
5821	}
5822
5823	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
5824					    rtm_ipv6_policy, extack);
5825	if (err)
5826		return err;
5827
5828	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
5829	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
5830		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
5831		return -EINVAL;
5832	}
5833
5834	for (i = 0; i <= RTA_MAX; i++) {
5835		if (!tb[i])
5836			continue;
5837
5838		switch (i) {
5839		case RTA_SRC:
5840		case RTA_DST:
5841		case RTA_IIF:
5842		case RTA_OIF:
5843		case RTA_MARK:
5844		case RTA_UID:
5845		case RTA_SPORT:
5846		case RTA_DPORT:
5847		case RTA_IP_PROTO:
5848			break;
5849		default:
5850			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
5851			return -EINVAL;
5852		}
5853	}
5854
5855	return 0;
5856}
5857
5858static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
5859			      struct netlink_ext_ack *extack)
5860{
5861	struct net *net = sock_net(in_skb->sk);
5862	struct nlattr *tb[RTA_MAX+1];
5863	int err, iif = 0, oif = 0;
5864	struct fib6_info *from;
5865	struct dst_entry *dst;
5866	struct rt6_info *rt;
5867	struct sk_buff *skb;
5868	struct rtmsg *rtm;
5869	struct flowi6 fl6 = {};
5870	bool fibmatch;
5871
5872	err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
5873	if (err < 0)
5874		goto errout;
5875
5876	err = -EINVAL;
5877	rtm = nlmsg_data(nlh);
5878	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
5879	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
5880
5881	if (tb[RTA_SRC]) {
5882		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
5883			goto errout;
5884
5885		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
5886	}
5887
5888	if (tb[RTA_DST]) {
5889		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
5890			goto errout;
5891
5892		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
5893	}
5894
5895	if (tb[RTA_IIF])
5896		iif = nla_get_u32(tb[RTA_IIF]);
5897
5898	if (tb[RTA_OIF])
5899		oif = nla_get_u32(tb[RTA_OIF]);
5900
5901	if (tb[RTA_MARK])
5902		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
5903
5904	if (tb[RTA_UID])
5905		fl6.flowi6_uid = make_kuid(current_user_ns(),
5906					   nla_get_u32(tb[RTA_UID]));
5907	else
5908		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
5909
5910	if (tb[RTA_SPORT])
5911		fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
5912
5913	if (tb[RTA_DPORT])
5914		fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
5915
5916	if (tb[RTA_IP_PROTO]) {
5917		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5918						  &fl6.flowi6_proto, AF_INET6,
5919						  extack);
5920		if (err)
5921			goto errout;
5922	}
5923
5924	if (iif) {
5925		struct net_device *dev;
5926		int flags = 0;
5927
5928		rcu_read_lock();
5929
5930		dev = dev_get_by_index_rcu(net, iif);
5931		if (!dev) {
5932			rcu_read_unlock();
5933			err = -ENODEV;
5934			goto errout;
5935		}
5936
5937		fl6.flowi6_iif = iif;
5938
5939		if (!ipv6_addr_any(&fl6.saddr))
5940			flags |= RT6_LOOKUP_F_HAS_SADDR;
5941
5942		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
5943
5944		rcu_read_unlock();
5945	} else {
5946		fl6.flowi6_oif = oif;
5947
5948		dst = ip6_route_output(net, NULL, &fl6);
5949	}
5950
5951
5952	rt = container_of(dst, struct rt6_info, dst);
5953	if (rt->dst.error) {
5954		err = rt->dst.error;
5955		ip6_rt_put(rt);
5956		goto errout;
5957	}
5958
5959	if (rt == net->ipv6.ip6_null_entry) {
5960		err = rt->dst.error;
5961		ip6_rt_put(rt);
5962		goto errout;
5963	}
5964
5965	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
5966	if (!skb) {
5967		ip6_rt_put(rt);
5968		err = -ENOBUFS;
5969		goto errout;
5970	}
5971
 
 
 
 
 
 
 
5972	skb_dst_set(skb, &rt->dst);
5973
5974	rcu_read_lock();
5975	from = rcu_dereference(rt->from);
5976	if (from) {
5977		if (fibmatch)
5978			err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
5979					    iif, RTM_NEWROUTE,
5980					    NETLINK_CB(in_skb).portid,
5981					    nlh->nlmsg_seq, 0);
5982		else
5983			err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5984					    &fl6.saddr, iif, RTM_NEWROUTE,
5985					    NETLINK_CB(in_skb).portid,
5986					    nlh->nlmsg_seq, 0);
5987	} else {
5988		err = -ENETUNREACH;
5989	}
5990	rcu_read_unlock();
5991
5992	if (err < 0) {
5993		kfree_skb(skb);
5994		goto errout;
5995	}
5996
5997	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
5998errout:
5999	return err;
6000}
6001
6002void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
6003		     unsigned int nlm_flags)
6004{
6005	struct sk_buff *skb;
6006	struct net *net = info->nl_net;
6007	u32 seq;
6008	int err;
6009
6010	err = -ENOBUFS;
6011	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
6012
6013	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
6014	if (!skb)
6015		goto errout;
6016
6017	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6018			    event, info->portid, seq, nlm_flags);
6019	if (err < 0) {
6020		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6021		WARN_ON(err == -EMSGSIZE);
6022		kfree_skb(skb);
6023		goto errout;
6024	}
6025	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
6026		    info->nlh, gfp_any());
6027	return;
6028errout:
6029	if (err < 0)
6030		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6031}
6032
6033void fib6_rt_update(struct net *net, struct fib6_info *rt,
6034		    struct nl_info *info)
6035{
6036	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
6037	struct sk_buff *skb;
6038	int err = -ENOBUFS;
6039
6040	/* call_fib6_entry_notifiers will be removed when in-kernel notifier
6041	 * is implemented and supported for nexthop objects
6042	 */
6043	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
6044
6045	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
6046	if (!skb)
6047		goto errout;
6048
6049	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6050			    RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
6051	if (err < 0) {
6052		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6053		WARN_ON(err == -EMSGSIZE);
6054		kfree_skb(skb);
6055		goto errout;
6056	}
6057	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
6058		    info->nlh, gfp_any());
6059	return;
6060errout:
6061	if (err < 0)
6062		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6063}
6064
6065static int ip6_route_dev_notify(struct notifier_block *this,
6066				unsigned long event, void *ptr)
6067{
6068	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6069	struct net *net = dev_net(dev);
6070
6071	if (!(dev->flags & IFF_LOOPBACK))
6072		return NOTIFY_OK;
6073
6074	if (event == NETDEV_REGISTER) {
6075		net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
6076		net->ipv6.ip6_null_entry->dst.dev = dev;
6077		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
6078#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6079		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
6080		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
6081		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
6082		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
6083#endif
6084	 } else if (event == NETDEV_UNREGISTER &&
6085		    dev->reg_state != NETREG_UNREGISTERED) {
6086		/* NETDEV_UNREGISTER could be fired for multiple times by
6087		 * netdev_wait_allrefs(). Make sure we only call this once.
6088		 */
6089		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
6090#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6091		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
6092		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
6093#endif
6094	}
6095
6096	return NOTIFY_OK;
6097}
6098
6099/*
6100 *	/proc
6101 */
6102
6103#ifdef CONFIG_PROC_FS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6104static int rt6_stats_seq_show(struct seq_file *seq, void *v)
6105{
6106	struct net *net = (struct net *)seq->private;
6107	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
6108		   net->ipv6.rt6_stats->fib_nodes,
6109		   net->ipv6.rt6_stats->fib_route_nodes,
6110		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
6111		   net->ipv6.rt6_stats->fib_rt_entries,
6112		   net->ipv6.rt6_stats->fib_rt_cache,
6113		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
6114		   net->ipv6.rt6_stats->fib_discarded_routes);
6115
6116	return 0;
6117}
 
 
 
 
 
 
 
 
 
 
 
 
 
6118#endif	/* CONFIG_PROC_FS */
6119
6120#ifdef CONFIG_SYSCTL
6121
6122static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
6123			      void *buffer, size_t *lenp, loff_t *ppos)
 
6124{
6125	struct net *net;
6126	int delay;
6127	int ret;
6128	if (!write)
6129		return -EINVAL;
6130
6131	net = (struct net *)ctl->extra1;
6132	delay = net->ipv6.sysctl.flush_delay;
6133	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
6134	if (ret)
6135		return ret;
6136
6137	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
6138	return 0;
6139}
6140
6141static struct ctl_table ipv6_route_table_template[] = {
6142	{
6143		.procname	=	"flush",
6144		.data		=	&init_net.ipv6.sysctl.flush_delay,
6145		.maxlen		=	sizeof(int),
6146		.mode		=	0200,
6147		.proc_handler	=	ipv6_sysctl_rtcache_flush
6148	},
6149	{
6150		.procname	=	"gc_thresh",
6151		.data		=	&ip6_dst_ops_template.gc_thresh,
6152		.maxlen		=	sizeof(int),
6153		.mode		=	0644,
6154		.proc_handler	=	proc_dointvec,
6155	},
6156	{
6157		.procname	=	"max_size",
6158		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
6159		.maxlen		=	sizeof(int),
6160		.mode		=	0644,
6161		.proc_handler	=	proc_dointvec,
6162	},
6163	{
6164		.procname	=	"gc_min_interval",
6165		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6166		.maxlen		=	sizeof(int),
6167		.mode		=	0644,
6168		.proc_handler	=	proc_dointvec_jiffies,
6169	},
6170	{
6171		.procname	=	"gc_timeout",
6172		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
6173		.maxlen		=	sizeof(int),
6174		.mode		=	0644,
6175		.proc_handler	=	proc_dointvec_jiffies,
6176	},
6177	{
6178		.procname	=	"gc_interval",
6179		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
6180		.maxlen		=	sizeof(int),
6181		.mode		=	0644,
6182		.proc_handler	=	proc_dointvec_jiffies,
6183	},
6184	{
6185		.procname	=	"gc_elasticity",
6186		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
6187		.maxlen		=	sizeof(int),
6188		.mode		=	0644,
6189		.proc_handler	=	proc_dointvec,
6190	},
6191	{
6192		.procname	=	"mtu_expires",
6193		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
6194		.maxlen		=	sizeof(int),
6195		.mode		=	0644,
6196		.proc_handler	=	proc_dointvec_jiffies,
6197	},
6198	{
6199		.procname	=	"min_adv_mss",
6200		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
6201		.maxlen		=	sizeof(int),
6202		.mode		=	0644,
6203		.proc_handler	=	proc_dointvec,
6204	},
6205	{
6206		.procname	=	"gc_min_interval_ms",
6207		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6208		.maxlen		=	sizeof(int),
6209		.mode		=	0644,
6210		.proc_handler	=	proc_dointvec_ms_jiffies,
6211	},
6212	{
6213		.procname	=	"skip_notify_on_dev_down",
6214		.data		=	&init_net.ipv6.sysctl.skip_notify_on_dev_down,
6215		.maxlen		=	sizeof(int),
6216		.mode		=	0644,
6217		.proc_handler	=	proc_dointvec_minmax,
6218		.extra1		=	SYSCTL_ZERO,
6219		.extra2		=	SYSCTL_ONE,
6220	},
6221	{ }
6222};
6223
6224struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
6225{
6226	struct ctl_table *table;
6227
6228	table = kmemdup(ipv6_route_table_template,
6229			sizeof(ipv6_route_table_template),
6230			GFP_KERNEL);
6231
6232	if (table) {
6233		table[0].data = &net->ipv6.sysctl.flush_delay;
6234		table[0].extra1 = net;
6235		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
6236		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
6237		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6238		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
6239		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
6240		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
6241		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
6242		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
6243		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6244		table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
6245
6246		/* Don't export sysctls to unprivileged users */
6247		if (net->user_ns != &init_user_ns)
6248			table[0].procname = NULL;
6249	}
6250
6251	return table;
6252}
6253#endif
6254
6255static int __net_init ip6_route_net_init(struct net *net)
6256{
6257	int ret = -ENOMEM;
6258
6259	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
6260	       sizeof(net->ipv6.ip6_dst_ops));
6261
6262	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
6263		goto out_ip6_dst_ops;
6264
6265	net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
6266	if (!net->ipv6.fib6_null_entry)
6267		goto out_ip6_dst_entries;
6268	memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
6269	       sizeof(*net->ipv6.fib6_null_entry));
6270
6271	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
6272					   sizeof(*net->ipv6.ip6_null_entry),
6273					   GFP_KERNEL);
6274	if (!net->ipv6.ip6_null_entry)
6275		goto out_fib6_null_entry;
 
 
6276	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6277	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
6278			 ip6_template_metrics, true);
6279	INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached);
6280
6281#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6282	net->ipv6.fib6_has_custom_rules = false;
6283	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
6284					       sizeof(*net->ipv6.ip6_prohibit_entry),
6285					       GFP_KERNEL);
6286	if (!net->ipv6.ip6_prohibit_entry)
6287		goto out_ip6_null_entry;
 
 
6288	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6289	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
6290			 ip6_template_metrics, true);
6291	INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached);
6292
6293	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
6294					       sizeof(*net->ipv6.ip6_blk_hole_entry),
6295					       GFP_KERNEL);
6296	if (!net->ipv6.ip6_blk_hole_entry)
6297		goto out_ip6_prohibit_entry;
 
 
6298	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6299	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
6300			 ip6_template_metrics, true);
6301	INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
6302#ifdef CONFIG_IPV6_SUBTREES
6303	net->ipv6.fib6_routes_require_src = 0;
6304#endif
6305#endif
6306
6307	net->ipv6.sysctl.flush_delay = 0;
6308	net->ipv6.sysctl.ip6_rt_max_size = 4096;
6309	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
6310	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
6311	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
6312	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
6313	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
6314	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
6315	net->ipv6.sysctl.skip_notify_on_dev_down = 0;
6316
 
 
 
 
6317	net->ipv6.ip6_rt_gc_expire = 30*HZ;
6318
6319	ret = 0;
6320out:
6321	return ret;
6322
6323#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6324out_ip6_prohibit_entry:
6325	kfree(net->ipv6.ip6_prohibit_entry);
6326out_ip6_null_entry:
6327	kfree(net->ipv6.ip6_null_entry);
6328#endif
6329out_fib6_null_entry:
6330	kfree(net->ipv6.fib6_null_entry);
6331out_ip6_dst_entries:
6332	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6333out_ip6_dst_ops:
6334	goto out;
6335}
6336
6337static void __net_exit ip6_route_net_exit(struct net *net)
6338{
6339	kfree(net->ipv6.fib6_null_entry);
 
 
 
6340	kfree(net->ipv6.ip6_null_entry);
6341#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6342	kfree(net->ipv6.ip6_prohibit_entry);
6343	kfree(net->ipv6.ip6_blk_hole_entry);
6344#endif
6345	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6346}
6347
6348static int __net_init ip6_route_net_init_late(struct net *net)
6349{
6350#ifdef CONFIG_PROC_FS
6351	proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
6352			sizeof(struct ipv6_route_iter));
6353	proc_create_net_single("rt6_stats", 0444, net->proc_net,
6354			rt6_stats_seq_show, NULL);
6355#endif
6356	return 0;
6357}
6358
6359static void __net_exit ip6_route_net_exit_late(struct net *net)
6360{
6361#ifdef CONFIG_PROC_FS
6362	remove_proc_entry("ipv6_route", net->proc_net);
6363	remove_proc_entry("rt6_stats", net->proc_net);
6364#endif
6365}
6366
6367static struct pernet_operations ip6_route_net_ops = {
6368	.init = ip6_route_net_init,
6369	.exit = ip6_route_net_exit,
6370};
6371
6372static int __net_init ipv6_inetpeer_init(struct net *net)
6373{
6374	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
6375
6376	if (!bp)
6377		return -ENOMEM;
6378	inet_peer_base_init(bp);
6379	net->ipv6.peers = bp;
6380	return 0;
6381}
6382
6383static void __net_exit ipv6_inetpeer_exit(struct net *net)
6384{
6385	struct inet_peer_base *bp = net->ipv6.peers;
6386
6387	net->ipv6.peers = NULL;
6388	inetpeer_invalidate_tree(bp);
6389	kfree(bp);
6390}
6391
6392static struct pernet_operations ipv6_inetpeer_ops = {
6393	.init	=	ipv6_inetpeer_init,
6394	.exit	=	ipv6_inetpeer_exit,
6395};
6396
6397static struct pernet_operations ip6_route_net_late_ops = {
6398	.init = ip6_route_net_init_late,
6399	.exit = ip6_route_net_exit_late,
6400};
6401
6402static struct notifier_block ip6_route_dev_notifier = {
6403	.notifier_call = ip6_route_dev_notify,
6404	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
6405};
6406
6407void __init ip6_route_init_special_entries(void)
6408{
6409	/* Registering of the loopback is done before this portion of code,
6410	 * the loopback reference in rt6_info will not be taken, do it
6411	 * manually for init_net */
6412	init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
6413	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
6414	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6415  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
6416	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
6417	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6418	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
6419	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6420  #endif
6421}
6422
6423#if IS_BUILTIN(CONFIG_IPV6)
6424#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6425DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
6426
6427BTF_ID_LIST(btf_fib6_info_id)
6428BTF_ID(struct, fib6_info)
6429
6430static const struct bpf_iter_seq_info ipv6_route_seq_info = {
6431	.seq_ops		= &ipv6_route_seq_ops,
6432	.init_seq_private	= bpf_iter_init_seq_net,
6433	.fini_seq_private	= bpf_iter_fini_seq_net,
6434	.seq_priv_size		= sizeof(struct ipv6_route_iter),
6435};
6436
6437static struct bpf_iter_reg ipv6_route_reg_info = {
6438	.target			= "ipv6_route",
6439	.ctx_arg_info_size	= 1,
6440	.ctx_arg_info		= {
6441		{ offsetof(struct bpf_iter__ipv6_route, rt),
6442		  PTR_TO_BTF_ID_OR_NULL },
6443	},
6444	.seq_info		= &ipv6_route_seq_info,
6445};
6446
6447static int __init bpf_iter_register(void)
6448{
6449	ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
6450	return bpf_iter_reg_target(&ipv6_route_reg_info);
6451}
6452
6453static void bpf_iter_unregister(void)
6454{
6455	bpf_iter_unreg_target(&ipv6_route_reg_info);
6456}
6457#endif
6458#endif
6459
6460int __init ip6_route_init(void)
6461{
6462	int ret;
6463	int cpu;
6464
6465	ret = -ENOMEM;
6466	ip6_dst_ops_template.kmem_cachep =
6467		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
6468				  SLAB_HWCACHE_ALIGN, NULL);
6469	if (!ip6_dst_ops_template.kmem_cachep)
6470		goto out;
6471
6472	ret = dst_entries_init(&ip6_dst_blackhole_ops);
6473	if (ret)
6474		goto out_kmem_cache;
6475
6476	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
6477	if (ret)
6478		goto out_dst_entries;
6479
6480	ret = register_pernet_subsys(&ip6_route_net_ops);
6481	if (ret)
6482		goto out_register_inetpeer;
6483
6484	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
6485
 
 
 
 
 
 
 
 
 
 
 
6486	ret = fib6_init();
6487	if (ret)
6488		goto out_register_subsys;
6489
6490	ret = xfrm6_init();
6491	if (ret)
6492		goto out_fib6_init;
6493
6494	ret = fib6_rules_init();
6495	if (ret)
6496		goto xfrm6_init;
6497
6498	ret = register_pernet_subsys(&ip6_route_net_late_ops);
6499	if (ret)
 
 
6500		goto fib6_rules_init;
6501
6502	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
6503				   inet6_rtm_newroute, NULL, 0);
6504	if (ret < 0)
6505		goto out_register_late_subsys;
6506
6507	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
6508				   inet6_rtm_delroute, NULL, 0);
6509	if (ret < 0)
6510		goto out_register_late_subsys;
6511
6512	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
6513				   inet6_rtm_getroute, NULL,
6514				   RTNL_FLAG_DOIT_UNLOCKED);
6515	if (ret < 0)
6516		goto out_register_late_subsys;
6517
6518	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
6519	if (ret)
6520		goto out_register_late_subsys;
6521
6522#if IS_BUILTIN(CONFIG_IPV6)
6523#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6524	ret = bpf_iter_register();
6525	if (ret)
6526		goto out_register_late_subsys;
6527#endif
6528#endif
6529
6530	for_each_possible_cpu(cpu) {
6531		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
6532
6533		INIT_LIST_HEAD(&ul->head);
6534		spin_lock_init(&ul->lock);
6535	}
6536
6537out:
6538	return ret;
6539
6540out_register_late_subsys:
6541	rtnl_unregister_all(PF_INET6);
6542	unregister_pernet_subsys(&ip6_route_net_late_ops);
6543fib6_rules_init:
6544	fib6_rules_cleanup();
6545xfrm6_init:
6546	xfrm6_fini();
6547out_fib6_init:
6548	fib6_gc_cleanup();
6549out_register_subsys:
6550	unregister_pernet_subsys(&ip6_route_net_ops);
6551out_register_inetpeer:
6552	unregister_pernet_subsys(&ipv6_inetpeer_ops);
6553out_dst_entries:
6554	dst_entries_destroy(&ip6_dst_blackhole_ops);
6555out_kmem_cache:
6556	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6557	goto out;
6558}
6559
6560void ip6_route_cleanup(void)
6561{
6562#if IS_BUILTIN(CONFIG_IPV6)
6563#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6564	bpf_iter_unregister();
6565#endif
6566#endif
6567	unregister_netdevice_notifier(&ip6_route_dev_notifier);
6568	unregister_pernet_subsys(&ip6_route_net_late_ops);
6569	fib6_rules_cleanup();
6570	xfrm6_fini();
6571	fib6_gc_cleanup();
6572	unregister_pernet_subsys(&ipv6_inetpeer_ops);
6573	unregister_pernet_subsys(&ip6_route_net_ops);
6574	dst_entries_destroy(&ip6_dst_blackhole_ops);
6575	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6576}