Linux Audio

Check our new training course

Loading...
v3.1
 
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
 
 
  27#include <linux/capability.h>
  28#include <linux/errno.h>
 
  29#include <linux/types.h>
  30#include <linux/times.h>
  31#include <linux/socket.h>
  32#include <linux/sockios.h>
  33#include <linux/net.h>
  34#include <linux/route.h>
  35#include <linux/netdevice.h>
  36#include <linux/in6.h>
  37#include <linux/mroute6.h>
  38#include <linux/init.h>
  39#include <linux/if_arp.h>
  40#include <linux/proc_fs.h>
  41#include <linux/seq_file.h>
  42#include <linux/nsproxy.h>
  43#include <linux/slab.h>
 
 
  44#include <net/net_namespace.h>
  45#include <net/snmp.h>
  46#include <net/ipv6.h>
  47#include <net/ip6_fib.h>
  48#include <net/ip6_route.h>
  49#include <net/ndisc.h>
  50#include <net/addrconf.h>
  51#include <net/tcp.h>
  52#include <linux/rtnetlink.h>
  53#include <net/dst.h>
 
  54#include <net/xfrm.h>
  55#include <net/netevent.h>
  56#include <net/netlink.h>
  57
  58#include <asm/uaccess.h>
 
 
 
 
 
  59
  60#ifdef CONFIG_SYSCTL
  61#include <linux/sysctl.h>
  62#endif
  63
  64/* Set to 3 to get tracing. */
  65#define RT6_DEBUG 2
  66
  67#if RT6_DEBUG >= 3
  68#define RDBG(x) printk x
  69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
  70#else
  71#define RDBG(x)
  72#define RT6_TRACE(x...) do { ; } while (0)
  73#endif
 
 
 
 
  74
  75static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
  76				    const struct in6_addr *dest);
  77static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  78static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  79static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
 
  80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  81static void		ip6_dst_destroy(struct dst_entry *);
  82static void		ip6_dst_ifdown(struct dst_entry *,
  83				       struct net_device *dev, int how);
  84static int		 ip6_dst_gc(struct dst_ops *ops);
  85
  86static int		ip6_pkt_discard(struct sk_buff *skb);
  87static int		ip6_pkt_discard_out(struct sk_buff *skb);
 
 
  88static void		ip6_link_failure(struct sk_buff *skb);
  89static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  90
  91#ifdef CONFIG_IPV6_ROUTE_INFO
  92static struct rt6_info *rt6_add_route_info(struct net *net,
  93					   const struct in6_addr *prefix, int prefixlen,
  94					   const struct in6_addr *gwaddr, int ifindex,
  95					   unsigned pref);
  96static struct rt6_info *rt6_get_route_info(struct net *net,
 
  97					   const struct in6_addr *prefix, int prefixlen,
  98					   const struct in6_addr *gwaddr, int ifindex);
 
  99#endif
 100
 101static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 
 
 
 
 
 
 
 
 102{
 103	struct rt6_info *rt = (struct rt6_info *) dst;
 104	struct inet_peer *peer;
 105	u32 *p = NULL;
 106
 107	if (!(rt->dst.flags & DST_HOST))
 108		return NULL;
 109
 110	if (!rt->rt6i_peer)
 111		rt6_bind_peer(rt, 1);
 
 
 112
 113	peer = rt->rt6i_peer;
 114	if (peer) {
 115		u32 *old_p = __DST_METRICS_PTR(old);
 116		unsigned long prev, new;
 117
 118		p = peer->metrics;
 119		if (inet_metrics_new(peer))
 120			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 
 
 
 
 
 
 121
 122		new = (unsigned long) p;
 123		prev = cmpxchg(&dst->_metrics, old, new);
 
 124
 125		if (prev != old) {
 126			p = __DST_METRICS_PTR(prev);
 127			if (prev & DST_METRICS_READ_ONLY)
 128				p = NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 129		}
 
 130	}
 131	return p;
 132}
 133
 134static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
 
 
 135{
 136	return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 137}
 138
 139static struct dst_ops ip6_dst_ops_template = {
 140	.family			=	AF_INET6,
 141	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 142	.gc			=	ip6_dst_gc,
 143	.gc_thresh		=	1024,
 144	.check			=	ip6_dst_check,
 145	.default_advmss		=	ip6_default_advmss,
 146	.default_mtu		=	ip6_default_mtu,
 147	.cow_metrics		=	ipv6_cow_metrics,
 148	.destroy		=	ip6_dst_destroy,
 149	.ifdown			=	ip6_dst_ifdown,
 150	.negative_advice	=	ip6_negative_advice,
 151	.link_failure		=	ip6_link_failure,
 152	.update_pmtu		=	ip6_rt_update_pmtu,
 
 153	.local_out		=	__ip6_local_out,
 154	.neigh_lookup		=	ip6_neigh_lookup,
 
 155};
 156
 157static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
 158{
 159	return 0;
 160}
 161
 162static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 163{
 164}
 165
 166static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 167					 unsigned long old)
 168{
 169	return NULL;
 170}
 171
 172static struct dst_ops ip6_dst_blackhole_ops = {
 173	.family			=	AF_INET6,
 174	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 175	.destroy		=	ip6_dst_destroy,
 176	.check			=	ip6_dst_check,
 177	.default_mtu		=	ip6_blackhole_default_mtu,
 178	.default_advmss		=	ip6_default_advmss,
 179	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 180	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
 181	.neigh_lookup		=	ip6_neigh_lookup,
 182};
 183
 184static const u32 ip6_template_metrics[RTAX_MAX] = {
 185	[RTAX_HOPLIMIT - 1] = 255,
 186};
 187
 188static struct rt6_info ip6_null_entry_template = {
 
 
 
 
 
 
 
 
 
 189	.dst = {
 190		.__refcnt	= ATOMIC_INIT(1),
 191		.__use		= 1,
 192		.obsolete	= -1,
 193		.error		= -ENETUNREACH,
 194		.input		= ip6_pkt_discard,
 195		.output		= ip6_pkt_discard_out,
 196	},
 197	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 198	.rt6i_protocol  = RTPROT_KERNEL,
 199	.rt6i_metric	= ~(u32) 0,
 200	.rt6i_ref	= ATOMIC_INIT(1),
 201};
 202
 203#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 204
 205static int ip6_pkt_prohibit(struct sk_buff *skb);
 206static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 207
 208static struct rt6_info ip6_prohibit_entry_template = {
 209	.dst = {
 210		.__refcnt	= ATOMIC_INIT(1),
 211		.__use		= 1,
 212		.obsolete	= -1,
 213		.error		= -EACCES,
 214		.input		= ip6_pkt_prohibit,
 215		.output		= ip6_pkt_prohibit_out,
 216	},
 217	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 218	.rt6i_protocol  = RTPROT_KERNEL,
 219	.rt6i_metric	= ~(u32) 0,
 220	.rt6i_ref	= ATOMIC_INIT(1),
 221};
 222
 223static struct rt6_info ip6_blk_hole_entry_template = {
 224	.dst = {
 225		.__refcnt	= ATOMIC_INIT(1),
 226		.__use		= 1,
 227		.obsolete	= -1,
 228		.error		= -EINVAL,
 229		.input		= dst_discard,
 230		.output		= dst_discard,
 231	},
 232	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 233	.rt6i_protocol  = RTPROT_KERNEL,
 234	.rt6i_metric	= ~(u32) 0,
 235	.rt6i_ref	= ATOMIC_INIT(1),
 236};
 237
 238#endif
 239
 
 
 
 
 
 240/* allocate dst with ip6_dst_ops */
 241static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
 242					     struct net_device *dev,
 243					     int flags)
 244{
 245	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
 
 246
 247	if (rt != NULL)
 248		memset(&rt->rt6i_table, 0,
 249			sizeof(*rt) - sizeof(struct dst_entry));
 
 250
 251	return rt;
 252}
 
 253
 254static void ip6_dst_destroy(struct dst_entry *dst)
 255{
 256	struct rt6_info *rt = (struct rt6_info *)dst;
 257	struct inet6_dev *idev = rt->rt6i_idev;
 258	struct inet_peer *peer = rt->rt6i_peer;
 259
 260	if (!(rt->dst.flags & DST_HOST))
 261		dst_destroy_metrics_generic(dst);
 262
 263	if (idev != NULL) {
 
 264		rt->rt6i_idev = NULL;
 265		in6_dev_put(idev);
 266	}
 267	if (peer) {
 268		rt->rt6i_peer = NULL;
 269		inet_putpeer(peer);
 270	}
 271}
 272
 273static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 274
 275static u32 rt6_peer_genid(void)
 276{
 277	return atomic_read(&__rt6_peer_genid);
 
 
 
 278}
 279
 280void rt6_bind_peer(struct rt6_info *rt, int create)
 281{
 282	struct inet_peer *peer;
 283
 284	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
 285	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 286		inet_putpeer(peer);
 287	else
 288		rt->rt6i_peer_genid = rt6_peer_genid();
 
 
 
 
 
 289}
 290
 291static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 292			   int how)
 
 293{
 294	struct rt6_info *rt = (struct rt6_info *)dst;
 295	struct inet6_dev *idev = rt->rt6i_idev;
 296	struct net_device *loopback_dev =
 297		dev_net(dev)->loopback_dev;
 298
 299	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
 300		struct inet6_dev *loopback_idev =
 301			in6_dev_get(loopback_dev);
 302		if (loopback_idev != NULL) {
 303			rt->rt6i_idev = loopback_idev;
 304			in6_dev_put(idev);
 305		}
 
 
 
 
 
 
 
 
 
 
 
 
 306	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 307}
 308
 309static __inline__ int rt6_check_expired(const struct rt6_info *rt)
 
 
 
 
 
 310{
 311	return (rt->rt6i_flags & RTF_EXPIRES) &&
 312		time_after(jiffies, rt->rt6i_expires);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 313}
 314
 315static inline int rt6_need_strict(const struct in6_addr *daddr)
 
 
 
 
 
 
 
 
 316{
 317	return ipv6_addr_type(daddr) &
 318		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 319}
 320
 321/*
 322 *	Route lookup. Any table->tb6_lock is implied.
 323 */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 324
 325static inline struct rt6_info *rt6_device_match(struct net *net,
 326						    struct rt6_info *rt,
 327						    const struct in6_addr *saddr,
 328						    int oif,
 329						    int flags)
 330{
 331	struct rt6_info *local = NULL;
 332	struct rt6_info *sprt;
 333
 334	if (!oif && ipv6_addr_any(saddr))
 335		goto out;
 336
 337	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 338		struct net_device *dev = sprt->rt6i_dev;
 339
 340		if (oif) {
 341			if (dev->ifindex == oif)
 342				return sprt;
 343			if (dev->flags & IFF_LOOPBACK) {
 344				if (sprt->rt6i_idev == NULL ||
 345				    sprt->rt6i_idev->dev->ifindex != oif) {
 346					if (flags & RT6_LOOKUP_F_IFACE && oif)
 347						continue;
 348					if (local && (!oif ||
 349						      local->rt6i_idev->dev->ifindex == oif))
 350						continue;
 351				}
 352				local = sprt;
 353			}
 354		} else {
 355			if (ipv6_chk_addr(net, saddr, dev,
 356					  flags & RT6_LOOKUP_F_IFACE))
 357				return sprt;
 
 
 
 
 358		}
 359	}
 360
 361	if (oif) {
 362		if (local)
 363			return local;
 
 
 364
 365		if (flags & RT6_LOOKUP_F_IFACE)
 366			return net->ipv6.ip6_null_entry;
 
 
 
 
 
 
 
 
 
 367	}
 368out:
 369	return rt;
 
 
 
 
 
 
 
 
 370}
 371
 372#ifdef CONFIG_IPV6_ROUTER_PREF
 373static void rt6_probe(struct rt6_info *rt)
 
 
 
 
 
 
 
 374{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 375	struct neighbour *neigh;
 
 
 
 376	/*
 377	 * Okay, this does not seem to be appropriate
 378	 * for now, however, we need to check if it
 379	 * is really so; aka Router Reachability Probing.
 380	 *
 381	 * Router Reachability Probe MUST be rate-limited
 382	 * to no more than one per minute.
 383	 */
 
 
 
 
 
 384	rcu_read_lock();
 385	neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
 386	if (!neigh || (neigh->nud_state & NUD_VALID))
 387		goto out;
 388	read_lock_bh(&neigh->lock);
 389	if (!(neigh->nud_state & NUD_VALID) &&
 390	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 391		struct in6_addr mcaddr;
 392		struct in6_addr *target;
 393
 394		neigh->updated = jiffies;
 395		read_unlock_bh(&neigh->lock);
 396
 397		target = (struct in6_addr *)&neigh->primary_key;
 398		addrconf_addr_solict_mult(target, &mcaddr);
 399		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
 
 
 
 
 
 
 
 
 
 400	} else {
 401		read_unlock_bh(&neigh->lock);
 
 
 
 
 402	}
 
 403out:
 404	rcu_read_unlock();
 405}
 406#else
 407static inline void rt6_probe(struct rt6_info *rt)
 408{
 409}
 410#endif
 411
 412/*
 413 * Default Router Selection (RFC 2461 6.3.6)
 414 */
 415static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 416{
 417	struct net_device *dev = rt->rt6i_dev;
 418	if (!oif || dev->ifindex == oif)
 419		return 2;
 420	if ((dev->flags & IFF_LOOPBACK) &&
 421	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 422		return 1;
 423	return 0;
 424}
 425
 426static inline int rt6_check_neigh(struct rt6_info *rt)
 427{
 
 428	struct neighbour *neigh;
 429	int m;
 430
 431	rcu_read_lock();
 432	neigh = dst_get_neighbour(&rt->dst);
 433	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 434	    !(rt->rt6i_flags & RTF_GATEWAY))
 435		m = 1;
 436	else if (neigh) {
 437		read_lock_bh(&neigh->lock);
 438		if (neigh->nud_state & NUD_VALID)
 439			m = 2;
 440#ifdef CONFIG_IPV6_ROUTER_PREF
 441		else if (neigh->nud_state & NUD_FAILED)
 442			m = 0;
 443#endif
 444		else
 445			m = 1;
 446		read_unlock_bh(&neigh->lock);
 447	} else
 448		m = 0;
 
 
 449	rcu_read_unlock();
 450	return m;
 
 451}
 452
 453static int rt6_score_route(struct rt6_info *rt, int oif,
 454			   int strict)
 455{
 456	int m, n;
 
 
 
 457
 458	m = rt6_check_dev(rt, oif);
 459	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 460		return -1;
 461#ifdef CONFIG_IPV6_ROUTER_PREF
 462	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 463#endif
 464	n = rt6_check_neigh(rt);
 465	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
 466		return -1;
 
 
 
 467	return m;
 468}
 469
 470static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 471				   int *mpri, struct rt6_info *match)
 472{
 
 
 473	int m;
 474
 475	if (rt6_check_expired(rt))
 
 
 
 
 
 476		goto out;
 477
 478	m = rt6_score_route(rt, oif, strict);
 479	if (m < 0)
 
 
 
 480		goto out;
 
 481
 
 
 
 
 482	if (m > *mpri) {
 483		if (strict & RT6_LOOKUP_F_REACHABLE)
 484			rt6_probe(match);
 485		*mpri = m;
 486		match = rt;
 487	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
 488		rt6_probe(rt);
 489	}
 490
 491out:
 492	return match;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 493}
 494
 495static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 496				     struct rt6_info *rr_head,
 497				     u32 metric, int oif, int strict)
 498{
 499	struct rt6_info *rt, *match;
 
 500	int mpri = -1;
 501
 502	match = NULL;
 503	for (rt = rr_head; rt && rt->rt6i_metric == metric;
 504	     rt = rt->dst.rt6_next)
 505		match = find_match(rt, oif, strict, &mpri, match);
 506	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 507	     rt = rt->dst.rt6_next)
 508		match = find_match(rt, oif, strict, &mpri, match);
 
 509
 510	return match;
 
 511}
 512
 513static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 
 514{
 515	struct rt6_info *match, *rt0;
 516	struct net *net;
 
 
 
 
 
 517
 518	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
 519		  __func__, fn->leaf, oif);
 520
 521	rt0 = fn->rr_ptr;
 522	if (!rt0)
 523		fn->rr_ptr = rt0 = fn->leaf;
 524
 525	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 
 
 
 
 
 
 
 
 
 
 
 526
 527	if (!match &&
 528	    (strict & RT6_LOOKUP_F_REACHABLE)) {
 529		struct rt6_info *next = rt0->dst.rt6_next;
 530
 531		/* no entries matched; do round-robin */
 532		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 533			next = fn->leaf;
 534
 535		if (next != rt0)
 536			fn->rr_ptr = next;
 
 
 
 
 
 537	}
 538
 539	RT6_TRACE("%s() => %p\n",
 540		  __func__, match);
 
 
 
 
 
 
 541
 542	net = dev_net(rt0->rt6i_dev);
 543	return match ? match : net->ipv6.ip6_null_entry;
 
 
 544}
 545
 546#ifdef CONFIG_IPV6_ROUTE_INFO
 547int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 548		  const struct in6_addr *gwaddr)
 549{
 550	struct net *net = dev_net(dev);
 551	struct route_info *rinfo = (struct route_info *) opt;
 552	struct in6_addr prefix_buf, *prefix;
 553	unsigned int pref;
 554	unsigned long lifetime;
 555	struct rt6_info *rt;
 556
 557	if (len < sizeof(struct route_info)) {
 558		return -EINVAL;
 559	}
 560
 561	/* Sanity check for prefix_len and length */
 562	if (rinfo->length > 3) {
 563		return -EINVAL;
 564	} else if (rinfo->prefix_len > 128) {
 565		return -EINVAL;
 566	} else if (rinfo->prefix_len > 64) {
 567		if (rinfo->length < 2) {
 568			return -EINVAL;
 569		}
 570	} else if (rinfo->prefix_len > 0) {
 571		if (rinfo->length < 1) {
 572			return -EINVAL;
 573		}
 574	}
 575
 576	pref = rinfo->route_pref;
 577	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 578		return -EINVAL;
 579
 580	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 581
 582	if (rinfo->length == 3)
 583		prefix = (struct in6_addr *)rinfo->prefix;
 584	else {
 585		/* this function is safe */
 586		ipv6_addr_prefix(&prefix_buf,
 587				 (struct in6_addr *)rinfo->prefix,
 588				 rinfo->prefix_len);
 589		prefix = &prefix_buf;
 590	}
 591
 592	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 593				dev->ifindex);
 
 
 
 594
 595	if (rt && !lifetime) {
 596		ip6_del_rt(rt);
 597		rt = NULL;
 598	}
 599
 600	if (!rt && lifetime)
 601		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 602					pref);
 603	else if (rt)
 604		rt->rt6i_flags = RTF_ROUTEINFO |
 605				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 606
 607	if (rt) {
 608		if (!addrconf_finite_timeout(lifetime)) {
 609			rt->rt6i_flags &= ~RTF_EXPIRES;
 610		} else {
 611			rt->rt6i_expires = jiffies + HZ * lifetime;
 612			rt->rt6i_flags |= RTF_EXPIRES;
 613		}
 614		dst_release(&rt->dst);
 615	}
 616	return 0;
 617}
 618#endif
 619
 620#define BACKTRACK(__net, saddr)			\
 621do { \
 622	if (rt == __net->ipv6.ip6_null_entry) {	\
 623		struct fib6_node *pn; \
 624		while (1) { \
 625			if (fn->fn_flags & RTN_TL_ROOT) \
 626				goto out; \
 627			pn = fn->parent; \
 628			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 629				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 630			else \
 631				fn = pn; \
 632			if (fn->fn_flags & RTN_RTINFO) \
 633				goto restart; \
 634		} \
 635	} \
 636} while(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 637
 638static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 
 
 
 
 
 
 639					     struct fib6_table *table,
 640					     struct flowi6 *fl6, int flags)
 
 
 641{
 
 642	struct fib6_node *fn;
 643	struct rt6_info *rt;
 644
 645	read_lock_bh(&table->tb6_lock);
 646	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 647restart:
 648	rt = fn->leaf;
 649	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 650	BACKTRACK(net, &fl6->saddr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 651out:
 652	dst_use(&rt->dst, jiffies);
 653	read_unlock_bh(&table->tb6_lock);
 
 
 654	return rt;
 
 655
 
 
 
 
 656}
 
 657
 658struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 659			    const struct in6_addr *saddr, int oif, int strict)
 
 660{
 661	struct flowi6 fl6 = {
 662		.flowi6_oif = oif,
 663		.daddr = *daddr,
 664	};
 665	struct dst_entry *dst;
 666	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 667
 668	if (saddr) {
 669		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 670		flags |= RT6_LOOKUP_F_HAS_SADDR;
 671	}
 672
 673	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 674	if (dst->error == 0)
 675		return (struct rt6_info *) dst;
 676
 677	dst_release(dst);
 678
 679	return NULL;
 680}
 681
 682EXPORT_SYMBOL(rt6_lookup);
 683
 684/* ip6_ins_rt is called with FREE table->tb6_lock.
 685   It takes new route entry, the addition fails by any reason the
 686   route is freed. In any case, if caller does not hold it, it may
 687   be destroyed.
 688 */
 689
 690static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 
 691{
 692	int err;
 693	struct fib6_table *table;
 694
 695	table = rt->rt6i_table;
 696	write_lock_bh(&table->tb6_lock);
 697	err = fib6_add(&table->tb6_root, rt, info);
 698	write_unlock_bh(&table->tb6_lock);
 699
 700	return err;
 701}
 702
 703int ip6_ins_rt(struct rt6_info *rt)
 704{
 705	struct nl_info info = {
 706		.nl_net = dev_net(rt->rt6i_dev),
 707	};
 708	return __ip6_ins_rt(rt, &info);
 709}
 710
 711static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
 712				      const struct in6_addr *daddr,
 713				      const struct in6_addr *saddr)
 714{
 
 
 715	struct rt6_info *rt;
 716
 717	/*
 718	 *	Clone the route.
 719	 */
 720
 721	rt = ip6_rt_copy(ort, daddr);
 722
 723	if (rt) {
 724		struct neighbour *neigh;
 725		int attempts = !in_softirq();
 726
 727		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
 728			if (rt->rt6i_dst.plen != 128 &&
 729			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 730				rt->rt6i_flags |= RTF_ANYCAST;
 731			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
 732		}
 733
 734		rt->rt6i_flags |= RTF_CACHE;
 
 
 
 735
 
 
 
 
 736#ifdef CONFIG_IPV6_SUBTREES
 737		if (rt->rt6i_src.plen && saddr) {
 738			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
 739			rt->rt6i_src.plen = 128;
 740		}
 741#endif
 
 742
 743	retry:
 744		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 745		if (IS_ERR(neigh)) {
 746			struct net *net = dev_net(rt->rt6i_dev);
 747			int saved_rt_min_interval =
 748				net->ipv6.sysctl.ip6_rt_gc_min_interval;
 749			int saved_rt_elasticity =
 750				net->ipv6.sysctl.ip6_rt_gc_elasticity;
 751
 752			if (attempts-- > 0) {
 753				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
 754				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
 755
 756				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
 757
 758				net->ipv6.sysctl.ip6_rt_gc_elasticity =
 759					saved_rt_elasticity;
 760				net->ipv6.sysctl.ip6_rt_gc_min_interval =
 761					saved_rt_min_interval;
 762				goto retry;
 763			}
 764
 765			if (net_ratelimit())
 766				printk(KERN_WARNING
 767				       "ipv6: Neighbour table overflow.\n");
 768			dst_free(&rt->dst);
 769			return NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 770		}
 771		dst_set_neighbour(&rt->dst, neigh);
 772
 
 773	}
 774
 775	return rt;
 776}
 777
 778static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 779					const struct in6_addr *daddr)
 780{
 781	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 782
 783	if (rt) {
 784		rt->rt6i_flags |= RTF_CACHE;
 785		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
 
 
 
 
 
 
 
 
 
 
 786	}
 787	return rt;
 
 788}
 789
 790static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 791				      struct flowi6 *fl6, int flags)
 
 
 
 
 
 
 
 792{
 793	struct fib6_node *fn;
 794	struct rt6_info *rt, *nrt;
 795	int strict = 0;
 796	int attempts = 3;
 797	int err;
 798	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 799
 800	strict |= flags & RT6_LOOKUP_F_IFACE;
 
 801
 802relookup:
 803	read_lock_bh(&table->tb6_lock);
 804
 805restart_2:
 806	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 
 
 
 
 
 
 
 
 
 
 
 807
 808restart:
 809	rt = rt6_select(fn, oif, strict | reachable);
 
 
 
 
 810
 811	BACKTRACK(net, &fl6->saddr);
 812	if (rt == net->ipv6.ip6_null_entry ||
 813	    rt->rt6i_flags & RTF_CACHE)
 814		goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 815
 816	dst_hold(&rt->dst);
 817	read_unlock_bh(&table->tb6_lock);
 
 818
 819	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
 820		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 821	else if (!(rt->dst.flags & DST_HOST))
 822		nrt = rt6_alloc_clone(rt, &fl6->daddr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 823	else
 824		goto out2;
 825
 826	dst_release(&rt->dst);
 827	rt = nrt ? : net->ipv6.ip6_null_entry;
 
 828
 829	dst_hold(&rt->dst);
 830	if (nrt) {
 831		err = ip6_ins_rt(nrt);
 832		if (!err)
 833			goto out2;
 834	}
 835
 836	if (--attempts <= 0)
 837		goto out2;
 838
 839	/*
 840	 * Race condition! In the gap, when table->tb6_lock was
 841	 * released someone could insert this route.  Relookup.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 842	 */
 843	dst_release(&rt->dst);
 844	goto relookup;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 845
 846out:
 847	if (reachable) {
 848		reachable = 0;
 849		goto restart_2;
 
 
 
 
 
 850	}
 851	dst_hold(&rt->dst);
 852	read_unlock_bh(&table->tb6_lock);
 853out2:
 854	rt->dst.lastuse = jiffies;
 855	rt->dst.__use++;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 856
 857	return rt;
 858}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 859
 860static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 861					    struct flowi6 *fl6, int flags)
 
 862{
 863	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 864}
 865
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 866void ip6_route_input(struct sk_buff *skb)
 867{
 868	const struct ipv6hdr *iph = ipv6_hdr(skb);
 869	struct net *net = dev_net(skb->dev);
 870	int flags = RT6_LOOKUP_F_HAS_SADDR;
 
 871	struct flowi6 fl6 = {
 872		.flowi6_iif = skb->dev->ifindex,
 873		.daddr = iph->daddr,
 874		.saddr = iph->saddr,
 875		.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
 876		.flowi6_mark = skb->mark,
 877		.flowi6_proto = iph->nexthdr,
 878	};
 
 879
 880	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
 881		flags |= RT6_LOOKUP_F_IFACE;
 
 882
 883	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
 
 
 
 
 
 
 
 884}
 885
 886static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 887					     struct flowi6 *fl6, int flags)
 
 
 
 888{
 889	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 890}
 891
 892struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 893				    struct flowi6 *fl6)
 
 
 894{
 895	int flags = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 896
 897	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 
 
 
 898		flags |= RT6_LOOKUP_F_IFACE;
 899
 900	if (!ipv6_addr_any(&fl6->saddr))
 901		flags |= RT6_LOOKUP_F_HAS_SADDR;
 902	else if (sk)
 903		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 904
 905	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 906}
 907
 908EXPORT_SYMBOL(ip6_route_output);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 909
 910struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 911{
 912	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 
 913	struct dst_entry *new = NULL;
 914
 915	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
 
 916	if (rt) {
 917		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
 
 918
 919		new = &rt->dst;
 920
 921		new->__use = 1;
 922		new->input = dst_discard;
 923		new->output = dst_discard;
 924
 925		if (dst_metrics_read_only(&ort->dst))
 926			new->_metrics = ort->dst._metrics;
 927		else
 928			dst_copy_metrics(new, &ort->dst);
 929		rt->rt6i_idev = ort->rt6i_idev;
 930		if (rt->rt6i_idev)
 931			in6_dev_hold(rt->rt6i_idev);
 932		rt->rt6i_expires = 0;
 933
 934		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
 935		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 936		rt->rt6i_metric = 0;
 937
 938		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 939#ifdef CONFIG_IPV6_SUBTREES
 940		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 941#endif
 942
 943		dst_free(new);
 944	}
 945
 946	dst_release(dst_orig);
 947	return new ? new : ERR_PTR(-ENOMEM);
 948}
 949
 950/*
 951 *	Destination cache support functions
 952 */
 953
 954static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 955{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 956	struct rt6_info *rt;
 957
 958	rt = (struct rt6_info *) dst;
 959
 960	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
 961		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
 962			if (!rt->rt6i_peer)
 963				rt6_bind_peer(rt, 0);
 964			rt->rt6i_peer_genid = rt6_peer_genid();
 965		}
 966		return dst;
 967	}
 968	return NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 969}
 
 970
 971static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 972{
 973	struct rt6_info *rt = (struct rt6_info *) dst;
 974
 975	if (rt) {
 976		if (rt->rt6i_flags & RTF_CACHE) {
 
 977			if (rt6_check_expired(rt)) {
 978				ip6_del_rt(rt);
 979				dst = NULL;
 980			}
 
 981		} else {
 982			dst_release(dst);
 983			dst = NULL;
 984		}
 985	}
 986	return dst;
 987}
 988
 989static void ip6_link_failure(struct sk_buff *skb)
 990{
 991	struct rt6_info *rt;
 992
 993	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
 994
 995	rt = (struct rt6_info *) skb_dst(skb);
 996	if (rt) {
 997		if (rt->rt6i_flags&RTF_CACHE) {
 998			dst_set_expires(&rt->dst, 0);
 999			rt->rt6i_flags |= RTF_EXPIRES;
1000		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1001			rt->rt6i_node->fn_sernum = -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1002	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1003}
1004
1005static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 
 
 
 
 
 
 
 
 
 
1006{
1007	struct rt6_info *rt6 = (struct rt6_info*)dst;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1008
1009	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1010		rt6->rt6i_flags |= RTF_MODIFIED;
1011		if (mtu < IPV6_MIN_MTU) {
1012			u32 features = dst_metric(dst, RTAX_FEATURES);
1013			mtu = IPV6_MIN_MTU;
1014			features |= RTAX_FEATURE_ALLFRAG;
1015			dst_metric_set(dst, RTAX_FEATURES, features);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1016		}
1017		dst_metric_set(dst, RTAX_MTU, mtu);
1018	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1019}
 
1020
1021static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1022{
1023	struct net_device *dev = dst->dev;
1024	unsigned int mtu = dst_mtu(dst);
1025	struct net *net = dev_net(dev);
1026
1027	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1028
1029	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1030		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1031
1032	/*
1033	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1034	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1035	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1036	 * rely only on pmtu discovery"
1037	 */
1038	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1039		mtu = IPV6_MAXPLEN;
1040	return mtu;
1041}
1042
1043static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1044{
1045	unsigned int mtu = IPV6_MIN_MTU;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1046	struct inet6_dev *idev;
 
 
1047
1048	rcu_read_lock();
1049	idev = __in6_dev_get(dst->dev);
1050	if (idev)
1051		mtu = idev->cnf.mtu6;
1052	rcu_read_unlock();
1053
1054	return mtu;
1055}
 
 
 
1056
1057static struct dst_entry *icmp6_dst_gc_list;
1058static DEFINE_SPINLOCK(icmp6_dst_lock);
 
 
 
 
 
 
 
 
1059
1060struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1061				  struct neighbour *neigh,
1062				  const struct in6_addr *addr)
1063{
 
1064	struct rt6_info *rt;
1065	struct inet6_dev *idev = in6_dev_get(dev);
1066	struct net *net = dev_net(dev);
1067
1068	if (unlikely(idev == NULL))
1069		return NULL;
1070
1071	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1072	if (unlikely(rt == NULL)) {
1073		in6_dev_put(idev);
 
1074		goto out;
1075	}
1076
1077	if (neigh)
1078		neigh_hold(neigh);
1079	else {
1080		neigh = ndisc_get_neigh(dev, addr);
1081		if (IS_ERR(neigh))
1082			neigh = NULL;
1083	}
1084
1085	rt->dst.flags |= DST_HOST;
1086	rt->dst.output  = ip6_output;
1087	dst_set_neighbour(&rt->dst, neigh);
1088	atomic_set(&rt->dst.__refcnt, 1);
1089	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1090
1091	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1092	rt->rt6i_dst.plen = 128;
1093	rt->rt6i_idev     = idev;
 
1094
1095	spin_lock_bh(&icmp6_dst_lock);
1096	rt->dst.next = icmp6_dst_gc_list;
1097	icmp6_dst_gc_list = &rt->dst;
1098	spin_unlock_bh(&icmp6_dst_lock);
1099
1100	fib6_force_start_gc(net);
1101
1102out:
1103	return &rt->dst;
1104}
1105
1106int icmp6_dst_gc(void)
1107{
1108	struct dst_entry *dst, **pprev;
1109	int more = 0;
1110
1111	spin_lock_bh(&icmp6_dst_lock);
1112	pprev = &icmp6_dst_gc_list;
1113
1114	while ((dst = *pprev) != NULL) {
1115		if (!atomic_read(&dst->__refcnt)) {
1116			*pprev = dst->next;
1117			dst_free(dst);
1118		} else {
1119			pprev = &dst->next;
1120			++more;
1121		}
1122	}
1123
1124	spin_unlock_bh(&icmp6_dst_lock);
1125
1126	return more;
1127}
1128
1129static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1130			    void *arg)
1131{
1132	struct dst_entry *dst, **pprev;
1133
1134	spin_lock_bh(&icmp6_dst_lock);
1135	pprev = &icmp6_dst_gc_list;
1136	while ((dst = *pprev) != NULL) {
1137		struct rt6_info *rt = (struct rt6_info *) dst;
1138		if (func(rt, arg)) {
1139			*pprev = dst->next;
1140			dst_free(dst);
1141		} else {
1142			pprev = &dst->next;
1143		}
1144	}
1145	spin_unlock_bh(&icmp6_dst_lock);
1146}
1147
1148static int ip6_dst_gc(struct dst_ops *ops)
1149{
1150	unsigned long now = jiffies;
1151	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1152	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1153	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1154	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1155	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1156	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
 
1157	int entries;
1158
1159	entries = dst_entries_get_fast(ops);
1160	if (time_after(rt_last_gc + rt_min_interval, now) &&
1161	    entries <= rt_max_size)
1162		goto out;
1163
1164	net->ipv6.ip6_rt_gc_expire++;
1165	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1166	net->ipv6.ip6_rt_last_gc = now;
1167	entries = dst_entries_get_slow(ops);
1168	if (entries < ops->gc_thresh)
1169		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1170out:
1171	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1172	return entries > rt_max_size;
1173}
1174
1175/* Clean host part of a prefix. Not necessary in radix tree,
1176   but results in cleaner routing tables.
 
 
 
 
 
 
 
 
 
 
 
 
 
1177
1178   Remove it only when all the things will work!
1179 */
1180
1181int ip6_dst_hoplimit(struct dst_entry *dst)
1182{
1183	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1184	if (hoplimit == 0) {
1185		struct net_device *dev = dst->dev;
1186		struct inet6_dev *idev;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1187
1188		rcu_read_lock();
1189		idev = __in6_dev_get(dev);
1190		if (idev)
1191			hoplimit = idev->cnf.hop_limit;
1192		else
1193			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
 
 
1194		rcu_read_unlock();
 
 
 
1195	}
1196	return hoplimit;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1197}
1198EXPORT_SYMBOL(ip6_dst_hoplimit);
1199
1200/*
1201 *
1202 */
 
 
 
 
1203
1204int ip6_route_add(struct fib6_config *cfg)
 
 
 
 
 
1205{
1206	int err;
1207	struct net *net = cfg->fc_nlinfo.nl_net;
1208	struct rt6_info *rt = NULL;
1209	struct net_device *dev = NULL;
1210	struct inet6_dev *idev = NULL;
1211	struct fib6_table *table;
1212	int addr_type;
 
1213
1214	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1215		return -EINVAL;
1216#ifndef CONFIG_IPV6_SUBTREES
1217	if (cfg->fc_src_len)
1218		return -EINVAL;
1219#endif
 
 
 
 
 
 
 
1220	if (cfg->fc_ifindex) {
1221		err = -ENODEV;
1222		dev = dev_get_by_index(net, cfg->fc_ifindex);
1223		if (!dev)
1224			goto out;
1225		idev = in6_dev_get(dev);
1226		if (!idev)
1227			goto out;
1228	}
1229
1230	if (cfg->fc_metric == 0)
1231		cfg->fc_metric = IP6_RT_PRIO_USER;
1232
1233	table = fib6_new_table(net, cfg->fc_table);
1234	if (table == NULL) {
1235		err = -ENOBUFS;
1236		goto out;
1237	}
1238
1239	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1240
1241	if (rt == NULL) {
1242		err = -ENOMEM;
1243		goto out;
1244	}
1245
1246	rt->dst.obsolete = -1;
1247	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1248				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1249				0;
1250
1251	if (cfg->fc_protocol == RTPROT_UNSPEC)
1252		cfg->fc_protocol = RTPROT_BOOT;
1253	rt->rt6i_protocol = cfg->fc_protocol;
1254
1255	addr_type = ipv6_addr_type(&cfg->fc_dst);
1256
1257	if (addr_type & IPV6_ADDR_MULTICAST)
1258		rt->dst.input = ip6_mc_input;
1259	else if (cfg->fc_flags & RTF_LOCAL)
1260		rt->dst.input = ip6_input;
1261	else
1262		rt->dst.input = ip6_forward;
1263
1264	rt->dst.output = ip6_output;
1265
1266	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1267	rt->rt6i_dst.plen = cfg->fc_dst_len;
1268	if (rt->rt6i_dst.plen == 128)
1269	       rt->dst.flags |= DST_HOST;
1270
1271	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1272		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1273		if (!metrics) {
1274			err = -ENOMEM;
1275			goto out;
1276		}
1277		dst_init_metrics(&rt->dst, metrics, 0);
 
1278	}
1279#ifdef CONFIG_IPV6_SUBTREES
1280	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1281	rt->rt6i_src.plen = cfg->fc_src_len;
1282#endif
1283
1284	rt->rt6i_metric = cfg->fc_metric;
1285
1286	/* We cannot add true routes via loopback here,
1287	   they would result in kernel looping; promote them to reject routes
1288	 */
1289	if ((cfg->fc_flags & RTF_REJECT) ||
1290	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1291					      && !(cfg->fc_flags&RTF_LOCAL))) {
1292		/* hold loopback dev/idev if we haven't done so. */
1293		if (dev != net->loopback_dev) {
1294			if (dev) {
1295				dev_put(dev);
1296				in6_dev_put(idev);
1297			}
1298			dev = net->loopback_dev;
1299			dev_hold(dev);
1300			idev = in6_dev_get(dev);
1301			if (!idev) {
1302				err = -ENODEV;
1303				goto out;
1304			}
1305		}
1306		rt->dst.output = ip6_pkt_discard_out;
1307		rt->dst.input = ip6_pkt_discard;
1308		rt->dst.error = -ENETUNREACH;
1309		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1310		goto install_route;
1311	}
1312
1313	if (cfg->fc_flags & RTF_GATEWAY) {
1314		const struct in6_addr *gw_addr;
1315		int gwa_type;
 
 
1316
1317		gw_addr = &cfg->fc_gateway;
1318		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1319		gwa_type = ipv6_addr_type(gw_addr);
1320
1321		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1322			struct rt6_info *grt;
1323
1324			/* IPv6 strictly inhibits using not link-local
1325			   addresses as nexthop address.
1326			   Otherwise, router will not able to send redirects.
1327			   It is very good, but in some (rare!) circumstances
1328			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1329			   some exceptions. --ANK
1330			 */
1331			err = -EINVAL;
1332			if (!(gwa_type&IPV6_ADDR_UNICAST))
1333				goto out;
1334
1335			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
 
 
1336
1337			err = -EHOSTUNREACH;
1338			if (grt == NULL)
1339				goto out;
1340			if (dev) {
1341				if (dev != grt->rt6i_dev) {
1342					dst_release(&grt->dst);
1343					goto out;
1344				}
1345			} else {
1346				dev = grt->rt6i_dev;
1347				idev = grt->rt6i_idev;
1348				dev_hold(dev);
1349				in6_dev_hold(grt->rt6i_idev);
1350			}
1351			if (!(grt->rt6i_flags&RTF_GATEWAY))
1352				err = 0;
1353			dst_release(&grt->dst);
1354
1355			if (err)
1356				goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1357		}
1358		err = -EINVAL;
1359		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1360			goto out;
1361	}
 
1362
1363	err = -ENODEV;
1364	if (dev == NULL)
 
 
 
 
 
 
 
 
 
 
 
 
 
1365		goto out;
 
1366
1367	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1368		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1369			err = -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1370			goto out;
1371		}
1372		ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1373		rt->rt6i_prefsrc.plen = 128;
1374	} else
1375		rt->rt6i_prefsrc.plen = 0;
1376
1377	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1378		struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1379		if (IS_ERR(n)) {
1380			err = PTR_ERR(n);
1381			goto out;
 
 
 
 
 
 
 
 
 
1382		}
1383		dst_set_neighbour(&rt->dst, n);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1384	}
1385
1386	rt->rt6i_flags = cfg->fc_flags;
 
 
 
 
 
 
 
1387
1388install_route:
1389	if (cfg->fc_mx) {
1390		struct nlattr *nla;
1391		int remaining;
1392
1393		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1394			int type = nla_type(nla);
 
 
1395
1396			if (type) {
1397				if (type > RTAX_MAX) {
1398					err = -EINVAL;
1399					goto out;
1400				}
1401
1402				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1403			}
 
 
 
 
 
 
1404		}
1405	}
 
 
 
 
 
 
 
 
 
 
 
1406
1407	rt->dst.dev = dev;
1408	rt->rt6i_idev = idev;
1409	rt->rt6i_table = table;
 
 
 
 
 
1410
1411	cfg->fc_nlinfo.nl_net = dev_net(dev);
 
1412
1413	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
 
 
 
 
 
 
 
 
1414
 
1415out:
1416	if (dev)
1417		dev_put(dev);
1418	if (idev)
1419		in6_dev_put(idev);
1420	if (rt)
1421		dst_free(&rt->dst);
1422	return err;
1423}
1424
1425static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 
1426{
 
1427	int err;
1428	struct fib6_table *table;
1429	struct net *net = dev_net(rt->rt6i_dev);
1430
1431	if (rt == net->ipv6.ip6_null_entry)
1432		return -ENOENT;
 
1433
1434	table = rt->rt6i_table;
1435	write_lock_bh(&table->tb6_lock);
1436
1437	err = fib6_del(rt, info);
1438	dst_release(&rt->dst);
1439
1440	write_unlock_bh(&table->tb6_lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1441
 
 
1442	return err;
1443}
1444
1445int ip6_del_rt(struct rt6_info *rt)
1446{
1447	struct nl_info info = {
1448		.nl_net = dev_net(rt->rt6i_dev),
 
1449	};
 
1450	return __ip6_del_rt(rt, &info);
1451}
1452
1453static int ip6_route_del(struct fib6_config *cfg)
1454{
 
 
 
1455	struct fib6_table *table;
1456	struct fib6_node *fn;
1457	struct rt6_info *rt;
1458	int err = -ESRCH;
1459
1460	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1461	if (table == NULL)
1462		return err;
1463
1464	read_lock_bh(&table->tb6_lock);
1465
1466	fn = fib6_locate(&table->tb6_root,
1467			 &cfg->fc_dst, cfg->fc_dst_len,
1468			 &cfg->fc_src, cfg->fc_src_len);
1469
1470	if (fn) {
1471		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1472			if (cfg->fc_ifindex &&
1473			    (rt->rt6i_dev == NULL ||
1474			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1475				continue;
1476			if (cfg->fc_flags & RTF_GATEWAY &&
1477			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1478				continue;
1479			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1480				continue;
1481			dst_hold(&rt->dst);
1482			read_unlock_bh(&table->tb6_lock);
1483
1484			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1485		}
1486	}
1487	read_unlock_bh(&table->tb6_lock);
1488
 
 
 
 
 
 
 
 
 
 
1489	return err;
1490}
1491
1492/*
1493 *	Handle redirects
1494 */
1495struct ip6rd_flowi {
1496	struct flowi6 fl6;
1497	struct in6_addr gateway;
1498};
1499
1500static struct rt6_info *__ip6_route_redirect(struct net *net,
1501					     struct fib6_table *table,
1502					     struct flowi6 *fl6,
1503					     int flags)
1504{
1505	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1506	struct rt6_info *rt;
1507	struct fib6_node *fn;
1508
1509	/*
1510	 * Get the "current" route for this destination and
1511	 * check if the redirect has come from approriate router.
1512	 *
1513	 * RFC 2461 specifies that redirects should only be
1514	 * accepted if they come from the nexthop to the target.
1515	 * Due to the way the routes are chosen, this notion
1516	 * is a bit fuzzy and one might need to check all possible
1517	 * routes.
1518	 */
1519
1520	read_lock_bh(&table->tb6_lock);
1521	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1522restart:
1523	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1524		/*
1525		 * Current route is on-link; redirect is always invalid.
1526		 *
1527		 * Seems, previous statement is not true. It could
1528		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1529		 * But then router serving it might decide, that we should
1530		 * know truth 8)8) --ANK (980726).
1531		 */
1532		if (rt6_check_expired(rt))
1533			continue;
1534		if (!(rt->rt6i_flags & RTF_GATEWAY))
1535			continue;
1536		if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1537			continue;
1538		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1539			continue;
1540		break;
1541	}
1542
1543	if (!rt)
1544		rt = net->ipv6.ip6_null_entry;
1545	BACKTRACK(net, &fl6->saddr);
1546out:
1547	dst_hold(&rt->dst);
 
1548
1549	read_unlock_bh(&table->tb6_lock);
 
 
 
 
 
 
 
1550
1551	return rt;
 
 
 
 
 
 
 
 
 
1552};
1553
1554static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1555					   const struct in6_addr *src,
1556					   const struct in6_addr *gateway,
1557					   struct net_device *dev)
1558{
1559	int flags = RT6_LOOKUP_F_HAS_SADDR;
1560	struct net *net = dev_net(dev);
1561	struct ip6rd_flowi rdfl = {
1562		.fl6 = {
1563			.flowi6_oif = dev->ifindex,
1564			.daddr = *dest,
1565			.saddr = *src,
1566		},
1567	};
1568
1569	ipv6_addr_copy(&rdfl.gateway, gateway);
 
 
1570
1571	if (rt6_need_strict(dest))
1572		flags |= RT6_LOOKUP_F_IFACE;
 
 
 
 
1573
1574	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1575						   flags, __ip6_route_redirect);
1576}
1577
1578void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1579		  const struct in6_addr *saddr,
1580		  struct neighbour *neigh, u8 *lladdr, int on_link)
1581{
1582	struct rt6_info *rt, *nrt = NULL;
1583	struct netevent_redirect netevent;
1584	struct net *net = dev_net(neigh->dev);
1585
1586	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1587
1588	if (rt == net->ipv6.ip6_null_entry) {
1589		if (net_ratelimit())
1590			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1591			       "for redirect target\n");
1592		goto out;
1593	}
1594
1595	/*
1596	 *	We have finally decided to accept it.
1597	 */
1598
1599	neigh_update(neigh, lladdr, NUD_STALE,
1600		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1601		     NEIGH_UPDATE_F_OVERRIDE|
1602		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1603				     NEIGH_UPDATE_F_ISROUTER))
1604		     );
1605
1606	/*
1607	 * Redirect received -> path was valid.
1608	 * Look, redirects are sent only in response to data packets,
1609	 * so that this nexthop apparently is reachable. --ANK
1610	 */
1611	dst_confirm(&rt->dst);
1612
1613	/* Duplicate redirect: silently ignore. */
1614	if (neigh == dst_get_neighbour_raw(&rt->dst))
1615		goto out;
1616
1617	nrt = ip6_rt_copy(rt, dest);
1618	if (nrt == NULL)
1619		goto out;
1620
1621	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1622	if (on_link)
1623		nrt->rt6i_flags &= ~RTF_GATEWAY;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1624
1625	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1626	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
 
 
1627
1628	if (ip6_ins_rt(nrt))
1629		goto out;
 
 
1630
1631	netevent.old = &rt->dst;
1632	netevent.new = &nrt->dst;
1633	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 
 
 
 
 
 
 
1634
1635	if (rt->rt6i_flags&RTF_CACHE) {
1636		ip6_del_rt(rt);
1637		return;
 
 
 
1638	}
 
1639
1640out:
1641	dst_release(&rt->dst);
1642}
1643
1644/*
1645 *	Handle ICMP "packet too big" messages
1646 *	i.e. Path MTU discovery
1647 */
1648
1649static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1650			     struct net *net, u32 pmtu, int ifindex)
1651{
1652	struct rt6_info *rt, *nrt;
1653	int allfrag = 0;
1654again:
1655	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1656	if (rt == NULL)
1657		return;
 
 
 
1658
1659	if (rt6_check_expired(rt)) {
1660		ip6_del_rt(rt);
1661		goto again;
 
 
 
1662	}
1663
1664	if (pmtu >= dst_mtu(&rt->dst))
1665		goto out;
1666
1667	if (pmtu < IPV6_MIN_MTU) {
1668		/*
1669		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1670		 * MTU (1280) and a fragment header should always be included
1671		 * after a node receiving Too Big message reporting PMTU is
1672		 * less than the IPv6 Minimum Link MTU.
1673		 */
1674		pmtu = IPV6_MIN_MTU;
1675		allfrag = 1;
1676	}
1677
1678	/* New mtu received -> path was valid.
1679	   They are sent only in response to data packets,
1680	   so that this nexthop apparently is reachable. --ANK
1681	 */
1682	dst_confirm(&rt->dst);
1683
1684	/* Host route. If it is static, it would be better
1685	   not to override it, but add new one, so that
1686	   when cache entry will expire old pmtu
1687	   would return automatically.
1688	 */
1689	if (rt->rt6i_flags & RTF_CACHE) {
1690		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1691		if (allfrag) {
1692			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1693			features |= RTAX_FEATURE_ALLFRAG;
1694			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1695		}
1696		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1697		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1698		goto out;
1699	}
1700
1701	/* Network route.
1702	   Two cases are possible:
1703	   1. It is connected route. Action: COW
1704	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
 
 
 
 
 
1705	 */
1706	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1707		nrt = rt6_alloc_cow(rt, daddr, saddr);
1708	else
1709		nrt = rt6_alloc_clone(rt, daddr);
1710
1711	if (nrt) {
1712		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1713		if (allfrag) {
1714			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1715			features |= RTAX_FEATURE_ALLFRAG;
1716			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1717		}
1718
1719		/* According to RFC 1981, detecting PMTU increase shouldn't be
1720		 * happened within 5 mins, the recommended timer is 10 mins.
1721		 * Here this route expiration time is set to ip6_rt_mtu_expires
1722		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1723		 * and detecting PMTU increase will be automatically happened.
1724		 */
1725		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1726		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
 
1727
1728		ip6_ins_rt(nrt);
 
 
 
1729	}
1730out:
1731	dst_release(&rt->dst);
1732}
1733
1734void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1735			struct net_device *dev, u32 pmtu)
1736{
1737	struct net *net = dev_net(dev);
 
 
 
 
 
1738
1739	/*
1740	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1741	 * is sending along the path" that caused the Packet Too Big message.
1742	 * Since it's not possible in the general case to determine which
1743	 * interface was used to send the original packet, we update the MTU
1744	 * on the interface that will be used to send future packets. We also
1745	 * update the MTU on the interface that received the Packet Too Big in
1746	 * case the original packet was forced out that interface with
1747	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1748	 * correct behaviour, which would be to update the MTU on all
1749	 * interfaces.
1750	 */
1751	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1752	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1753}
1754
1755/*
1756 *	Misc support functions
1757 */
 
 
 
1758
1759static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1760				    const struct in6_addr *dest)
1761{
1762	struct net *net = dev_net(ort->rt6i_dev);
1763	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1764					    ort->dst.dev, 0);
1765
1766	if (rt) {
1767		rt->dst.input = ort->dst.input;
1768		rt->dst.output = ort->dst.output;
1769		rt->dst.flags |= DST_HOST;
1770
1771		ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1772		rt->rt6i_dst.plen = 128;
1773		dst_copy_metrics(&rt->dst, &ort->dst);
1774		rt->dst.error = ort->dst.error;
1775		rt->rt6i_idev = ort->rt6i_idev;
1776		if (rt->rt6i_idev)
1777			in6_dev_hold(rt->rt6i_idev);
1778		rt->dst.lastuse = jiffies;
1779		rt->rt6i_expires = 0;
1780
1781		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1782		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1783		rt->rt6i_metric = 0;
1784
1785#ifdef CONFIG_IPV6_SUBTREES
1786		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1787#endif
1788		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1789		rt->rt6i_table = ort->rt6i_table;
 
 
 
 
 
 
1790	}
1791	return rt;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1792}
1793
1794#ifdef CONFIG_IPV6_ROUTE_INFO
1795static struct rt6_info *rt6_get_route_info(struct net *net,
1796					   const struct in6_addr *prefix, int prefixlen,
1797					   const struct in6_addr *gwaddr, int ifindex)
 
1798{
 
 
1799	struct fib6_node *fn;
1800	struct rt6_info *rt = NULL;
1801	struct fib6_table *table;
1802
1803	table = fib6_get_table(net, RT6_TABLE_INFO);
1804	if (table == NULL)
1805		return NULL;
1806
1807	write_lock_bh(&table->tb6_lock);
1808	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1809	if (!fn)
1810		goto out;
1811
1812	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1813		if (rt->rt6i_dev->ifindex != ifindex)
 
1814			continue;
1815		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1816			continue;
1817		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
 
 
 
 
 
1818			continue;
1819		dst_hold(&rt->dst);
1820		break;
1821	}
1822out:
1823	write_unlock_bh(&table->tb6_lock);
1824	return rt;
1825}
1826
1827static struct rt6_info *rt6_add_route_info(struct net *net,
1828					   const struct in6_addr *prefix, int prefixlen,
1829					   const struct in6_addr *gwaddr, int ifindex,
1830					   unsigned pref)
 
1831{
1832	struct fib6_config cfg = {
1833		.fc_table	= RT6_TABLE_INFO,
1834		.fc_metric	= IP6_RT_PRIO_USER,
1835		.fc_ifindex	= ifindex,
1836		.fc_dst_len	= prefixlen,
1837		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1838				  RTF_UP | RTF_PREF(pref),
1839		.fc_nlinfo.pid = 0,
 
 
1840		.fc_nlinfo.nlh = NULL,
1841		.fc_nlinfo.nl_net = net,
1842	};
1843
1844	ipv6_addr_copy(&cfg.fc_dst, prefix);
1845	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
 
1846
1847	/* We should treat it as a default route if prefix length is 0. */
1848	if (!prefixlen)
1849		cfg.fc_flags |= RTF_DEFAULT;
1850
1851	ip6_route_add(&cfg);
1852
1853	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1854}
1855#endif
1856
1857struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
 
 
1858{
1859	struct rt6_info *rt;
 
1860	struct fib6_table *table;
1861
1862	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1863	if (table == NULL)
1864		return NULL;
1865
1866	write_lock_bh(&table->tb6_lock);
1867	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1868		if (dev == rt->rt6i_dev &&
1869		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1870		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
 
 
 
 
 
 
 
1871			break;
1872	}
1873	if (rt)
1874		dst_hold(&rt->dst);
1875	write_unlock_bh(&table->tb6_lock);
1876	return rt;
1877}
1878
1879struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 
1880				     struct net_device *dev,
1881				     unsigned int pref)
 
1882{
1883	struct fib6_config cfg = {
1884		.fc_table	= RT6_TABLE_DFLT,
1885		.fc_metric	= IP6_RT_PRIO_USER,
1886		.fc_ifindex	= dev->ifindex,
1887		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1888				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1889		.fc_nlinfo.pid = 0,
 
 
1890		.fc_nlinfo.nlh = NULL,
1891		.fc_nlinfo.nl_net = dev_net(dev),
1892	};
1893
1894	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1895
1896	ip6_route_add(&cfg);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1897
1898	return rt6_get_dflt_router(gwaddr, dev);
1899}
1900
1901void rt6_purge_dflt_routers(struct net *net)
1902{
1903	struct rt6_info *rt;
1904	struct fib6_table *table;
 
 
1905
1906	/* NOTE: Keep consistent with rt6_get_dflt_router */
1907	table = fib6_get_table(net, RT6_TABLE_DFLT);
1908	if (table == NULL)
1909		return;
1910
1911restart:
1912	read_lock_bh(&table->tb6_lock);
1913	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1914		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1915			dst_hold(&rt->dst);
1916			read_unlock_bh(&table->tb6_lock);
1917			ip6_del_rt(rt);
1918			goto restart;
1919		}
1920	}
1921	read_unlock_bh(&table->tb6_lock);
 
1922}
1923
1924static void rtmsg_to_fib6_config(struct net *net,
1925				 struct in6_rtmsg *rtmsg,
1926				 struct fib6_config *cfg)
1927{
1928	memset(cfg, 0, sizeof(*cfg));
1929
1930	cfg->fc_table = RT6_TABLE_MAIN;
1931	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1932	cfg->fc_metric = rtmsg->rtmsg_metric;
1933	cfg->fc_expires = rtmsg->rtmsg_info;
1934	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1935	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1936	cfg->fc_flags = rtmsg->rtmsg_flags;
 
1937
1938	cfg->fc_nlinfo.nl_net = net;
1939
1940	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1941	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1942	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
 
1943}
1944
1945int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1946{
1947	struct fib6_config cfg;
1948	struct in6_rtmsg rtmsg;
1949	int err;
1950
1951	switch(cmd) {
1952	case SIOCADDRT:		/* Add a route */
1953	case SIOCDELRT:		/* Delete a route */
1954		if (!capable(CAP_NET_ADMIN))
1955			return -EPERM;
1956		err = copy_from_user(&rtmsg, arg,
1957				     sizeof(struct in6_rtmsg));
1958		if (err)
1959			return -EFAULT;
1960
1961		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1962
1963		rtnl_lock();
1964		switch (cmd) {
1965		case SIOCADDRT:
1966			err = ip6_route_add(&cfg);
1967			break;
1968		case SIOCDELRT:
1969			err = ip6_route_del(&cfg);
1970			break;
1971		default:
1972			err = -EINVAL;
1973		}
1974		rtnl_unlock();
1975
1976		return err;
 
 
 
 
 
 
 
1977	}
1978
1979	return -EINVAL;
1980}
1981
1982/*
1983 *	Drop the packet on the floor
1984 */
1985
1986static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1987{
1988	int type;
1989	struct dst_entry *dst = skb_dst(skb);
 
 
 
 
 
 
 
 
 
 
 
1990	switch (ipstats_mib_noroutes) {
1991	case IPSTATS_MIB_INNOROUTES:
1992		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1993		if (type == IPV6_ADDR_ANY) {
1994			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1995				      IPSTATS_MIB_INADDRERRORS);
1996			break;
1997		}
1998		/* FALLTHROUGH */
 
1999	case IPSTATS_MIB_OUTNOROUTES:
2000		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2001			      ipstats_mib_noroutes);
2002		break;
2003	}
 
 
 
 
 
2004	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2005	kfree_skb(skb);
2006	return 0;
2007}
2008
2009static int ip6_pkt_discard(struct sk_buff *skb)
2010{
2011	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2012}
2013
2014static int ip6_pkt_discard_out(struct sk_buff *skb)
2015{
2016	skb->dev = skb_dst(skb)->dev;
2017	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2018}
2019
2020#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2021
2022static int ip6_pkt_prohibit(struct sk_buff *skb)
2023{
2024	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2025}
2026
2027static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2028{
2029	skb->dev = skb_dst(skb)->dev;
2030	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2031}
2032
2033#endif
2034
2035/*
2036 *	Allocate a dst for local (unicast / anycast) address.
2037 */
2038
2039struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2040				    const struct in6_addr *addr,
2041				    int anycast)
2042{
2043	struct net *net = dev_net(idev->dev);
2044	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2045					    net->loopback_dev, 0);
2046	struct neighbour *neigh;
 
 
 
 
 
 
 
 
 
2047
2048	if (rt == NULL) {
2049		if (net_ratelimit())
2050			pr_warning("IPv6:  Maximum number of routes reached,"
2051				   " consider increasing route/max_size.\n");
2052		return ERR_PTR(-ENOMEM);
 
2053	}
2054
2055	in6_dev_hold(idev);
2056
2057	rt->dst.flags |= DST_HOST;
2058	rt->dst.input = ip6_input;
2059	rt->dst.output = ip6_output;
2060	rt->rt6i_idev = idev;
2061	rt->dst.obsolete = -1;
2062
2063	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2064	if (anycast)
2065		rt->rt6i_flags |= RTF_ANYCAST;
2066	else
2067		rt->rt6i_flags |= RTF_LOCAL;
2068	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2069	if (IS_ERR(neigh)) {
2070		dst_free(&rt->dst);
2071
2072		return ERR_CAST(neigh);
 
 
 
2073	}
2074	dst_set_neighbour(&rt->dst, neigh);
2075
2076	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2077	rt->rt6i_dst.plen = 128;
2078	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2079
2080	atomic_set(&rt->dst.__refcnt, 1);
2081
2082	return rt;
2083}
2084
2085int ip6_route_get_saddr(struct net *net,
2086			struct rt6_info *rt,
2087			const struct in6_addr *daddr,
2088			unsigned int prefs,
2089			struct in6_addr *saddr)
2090{
2091	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2092	int err = 0;
2093	if (rt->rt6i_prefsrc.plen)
2094		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2095	else
2096		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2097					 daddr, prefs, saddr);
2098	return err;
2099}
2100
2101/* remove deleted ip from prefsrc entries */
2102struct arg_dev_net_ip {
2103	struct net_device *dev;
2104	struct net *net;
2105	struct in6_addr *addr;
2106};
2107
2108static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2109{
2110	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2111	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2112	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2113
2114	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2115	    rt != net->ipv6.ip6_null_entry &&
2116	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
 
 
2117		/* remove prefsrc entry */
2118		rt->rt6i_prefsrc.plen = 0;
 
2119	}
2120	return 0;
2121}
2122
2123void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2124{
2125	struct net *net = dev_net(ifp->idev->dev);
2126	struct arg_dev_net_ip adni = {
2127		.dev = ifp->idev->dev,
2128		.net = net,
2129		.addr = &ifp->addr,
2130	};
2131	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2132}
2133
2134struct arg_dev_net {
2135	struct net_device *dev;
2136	struct net *net;
2137};
2138
2139static int fib6_ifdown(struct rt6_info *rt, void *arg)
 
2140{
2141	const struct arg_dev_net *adn = arg;
2142	const struct net_device *dev = adn->dev;
2143
2144	if ((rt->rt6i_dev == dev || dev == NULL) &&
2145	    rt != adn->net->ipv6.ip6_null_entry) {
2146		RT6_TRACE("deleted by ifdown %p\n", rt);
 
 
 
 
2147		return -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2148	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2149	return 0;
2150}
2151
2152void rt6_ifdown(struct net *net, struct net_device *dev)
2153{
2154	struct arg_dev_net adn = {
2155		.dev = dev,
2156		.net = net,
 
 
2157	};
2158
2159	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2160	icmp6_clean_all(fib6_ifdown, &adn);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2161}
2162
2163struct rt6_mtu_change_arg
 
 
2164{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2165	struct net_device *dev;
2166	unsigned mtu;
 
2167};
2168
2169static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2170{
2171	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2172	struct inet6_dev *idev;
2173
2174	/* In IPv6 pmtu discovery is not optional,
2175	   so that RTAX_MTU lock cannot disable it.
2176	   We still use this lock to block changes
2177	   caused by addrconf/ndisc.
2178	*/
2179
2180	idev = __in6_dev_get(arg->dev);
2181	if (idev == NULL)
2182		return 0;
2183
2184	/* For administrative MTU increase, there is no way to discover
2185	   IPv6 PMTU increase, so PMTU increase should be updated here.
2186	   Since RFC 1981 doesn't include administrative MTU increase
2187	   update PMTU increase is a MUST. (i.e. jumbo frame)
2188	 */
2189	/*
2190	   If new MTU is less than route PMTU, this new MTU will be the
2191	   lowest MTU in the path, update the route PMTU to reflect PMTU
2192	   decreases; if new MTU is greater than route PMTU, and the
2193	   old MTU is the lowest MTU in the path, update the route PMTU
2194	   to reflect the increase. In this case if the other nodes' MTU
2195	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2196	   PMTU discouvery.
2197	 */
2198	if (rt->rt6i_dev == arg->dev &&
2199	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2200	    (dst_mtu(&rt->dst) >= arg->mtu ||
2201	     (dst_mtu(&rt->dst) < arg->mtu &&
2202	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2203		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2204	}
2205	return 0;
 
2206}
2207
2208void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2209{
2210	struct rt6_mtu_change_arg arg = {
2211		.dev = dev,
2212		.mtu = mtu,
2213	};
2214
2215	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2216}
2217
2218static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 
2219	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
 
2220	[RTA_OIF]               = { .type = NLA_U32 },
2221	[RTA_IIF]		= { .type = NLA_U32 },
2222	[RTA_PRIORITY]          = { .type = NLA_U32 },
2223	[RTA_METRICS]           = { .type = NLA_NESTED },
 
 
 
 
 
 
 
 
 
 
 
 
2224};
2225
2226static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2227			      struct fib6_config *cfg)
 
2228{
2229	struct rtmsg *rtm;
2230	struct nlattr *tb[RTA_MAX+1];
 
2231	int err;
2232
2233	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
 
2234	if (err < 0)
2235		goto errout;
2236
2237	err = -EINVAL;
2238	rtm = nlmsg_data(nlh);
2239	memset(cfg, 0, sizeof(*cfg));
2240
2241	cfg->fc_table = rtm->rtm_table;
2242	cfg->fc_dst_len = rtm->rtm_dst_len;
2243	cfg->fc_src_len = rtm->rtm_src_len;
2244	cfg->fc_flags = RTF_UP;
2245	cfg->fc_protocol = rtm->rtm_protocol;
 
 
 
 
 
 
 
 
 
 
 
 
 
2246
2247	if (rtm->rtm_type == RTN_UNREACHABLE)
 
 
 
2248		cfg->fc_flags |= RTF_REJECT;
2249
2250	if (rtm->rtm_type == RTN_LOCAL)
2251		cfg->fc_flags |= RTF_LOCAL;
2252
2253	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2254	cfg->fc_nlinfo.nlh = nlh;
2255	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
 
 
 
 
 
 
 
 
 
 
 
2256
2257	if (tb[RTA_GATEWAY]) {
2258		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2259		cfg->fc_flags |= RTF_GATEWAY;
2260	}
 
 
 
 
2261
2262	if (tb[RTA_DST]) {
2263		int plen = (rtm->rtm_dst_len + 7) >> 3;
2264
2265		if (nla_len(tb[RTA_DST]) < plen)
2266			goto errout;
2267
2268		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2269	}
2270
2271	if (tb[RTA_SRC]) {
2272		int plen = (rtm->rtm_src_len + 7) >> 3;
2273
2274		if (nla_len(tb[RTA_SRC]) < plen)
2275			goto errout;
2276
2277		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2278	}
2279
2280	if (tb[RTA_PREFSRC])
2281		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2282
2283	if (tb[RTA_OIF])
2284		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2285
2286	if (tb[RTA_PRIORITY])
2287		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2288
2289	if (tb[RTA_METRICS]) {
2290		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2291		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2292	}
2293
2294	if (tb[RTA_TABLE])
2295		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2297	err = 0;
2298errout:
2299	return err;
2300}
2301
2302static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2303{
2304	struct fib6_config cfg;
2305	int err;
2306
2307	err = rtm_to_fib6_config(skb, nlh, &cfg);
2308	if (err < 0)
2309		return err;
2310
2311	return ip6_route_del(&cfg);
 
 
 
 
 
 
 
 
 
 
 
2312}
2313
2314static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
2315{
2316	struct fib6_config cfg;
2317	int err;
2318
2319	err = rtm_to_fib6_config(skb, nlh, &cfg);
2320	if (err < 0)
2321		return err;
2322
2323	return ip6_route_add(&cfg);
 
 
 
 
 
 
2324}
2325
2326static inline size_t rt6_nlmsg_size(void)
 
2327{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2328	return NLMSG_ALIGN(sizeof(struct rtmsg))
2329	       + nla_total_size(16) /* RTA_SRC */
2330	       + nla_total_size(16) /* RTA_DST */
2331	       + nla_total_size(16) /* RTA_GATEWAY */
2332	       + nla_total_size(16) /* RTA_PREFSRC */
2333	       + nla_total_size(4) /* RTA_TABLE */
2334	       + nla_total_size(4) /* RTA_IIF */
2335	       + nla_total_size(4) /* RTA_OIF */
2336	       + nla_total_size(4) /* RTA_PRIORITY */
2337	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2338	       + nla_total_size(sizeof(struct rta_cacheinfo));
 
 
 
2339}
2340
2341static int rt6_fill_node(struct net *net,
2342			 struct sk_buff *skb, struct rt6_info *rt,
2343			 struct in6_addr *dst, struct in6_addr *src,
2344			 int iif, int type, u32 pid, u32 seq,
2345			 int prefix, int nowait, unsigned int flags)
2346{
2347	struct rtmsg *rtm;
2348	struct nlmsghdr *nlh;
2349	long expires;
2350	u32 table;
2351	struct neighbour *n;
2352
2353	if (prefix) {	/* user wants prefix routes only */
2354		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2355			/* success since this is not a prefix route */
2356			return 1;
2357		}
 
 
 
 
 
 
 
 
 
 
2358	}
2359
2360	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2361	if (nlh == NULL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2362		return -EMSGSIZE;
2363
 
 
 
 
 
 
 
 
 
 
2364	rtm = nlmsg_data(nlh);
2365	rtm->rtm_family = AF_INET6;
2366	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2367	rtm->rtm_src_len = rt->rt6i_src.plen;
2368	rtm->rtm_tos = 0;
2369	if (rt->rt6i_table)
2370		table = rt->rt6i_table->tb6_id;
2371	else
2372		table = RT6_TABLE_UNSPEC;
2373	rtm->rtm_table = table;
2374	NLA_PUT_U32(skb, RTA_TABLE, table);
2375	if (rt->rt6i_flags&RTF_REJECT)
2376		rtm->rtm_type = RTN_UNREACHABLE;
2377	else if (rt->rt6i_flags&RTF_LOCAL)
2378		rtm->rtm_type = RTN_LOCAL;
2379	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2380		rtm->rtm_type = RTN_LOCAL;
2381	else
2382		rtm->rtm_type = RTN_UNICAST;
2383	rtm->rtm_flags = 0;
2384	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2385	rtm->rtm_protocol = rt->rt6i_protocol;
2386	if (rt->rt6i_flags&RTF_DYNAMIC)
2387		rtm->rtm_protocol = RTPROT_REDIRECT;
2388	else if (rt->rt6i_flags & RTF_ADDRCONF)
2389		rtm->rtm_protocol = RTPROT_KERNEL;
2390	else if (rt->rt6i_flags&RTF_DEFAULT)
2391		rtm->rtm_protocol = RTPROT_RA;
2392
2393	if (rt->rt6i_flags&RTF_CACHE)
2394		rtm->rtm_flags |= RTM_F_CLONED;
2395
2396	if (dst) {
2397		NLA_PUT(skb, RTA_DST, 16, dst);
 
2398		rtm->rtm_dst_len = 128;
2399	} else if (rtm->rtm_dst_len)
2400		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
 
2401#ifdef CONFIG_IPV6_SUBTREES
2402	if (src) {
2403		NLA_PUT(skb, RTA_SRC, 16, src);
 
2404		rtm->rtm_src_len = 128;
2405	} else if (rtm->rtm_src_len)
2406		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
 
2407#endif
2408	if (iif) {
2409#ifdef CONFIG_IPV6_MROUTE
2410		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2411			int err = ip6mr_get_route(net, skb, rtm, nowait);
2412			if (err <= 0) {
2413				if (!nowait) {
2414					if (err == 0)
2415						return 0;
2416					goto nla_put_failure;
2417				} else {
2418					if (err == -EMSGSIZE)
2419						goto nla_put_failure;
2420				}
2421			}
2422		} else
2423#endif
2424			NLA_PUT_U32(skb, RTA_IIF, iif);
2425	} else if (dst) {
 
2426		struct in6_addr saddr_buf;
2427		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2428			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 
2429	}
2430
2431	if (rt->rt6i_prefsrc.plen) {
2432		struct in6_addr saddr_buf;
2433		ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2434		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 
2435	}
2436
2437	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
 
2438		goto nla_put_failure;
2439
2440	rcu_read_lock();
2441	n = dst_get_neighbour(&rt->dst);
2442	if (n)
2443		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2444	rcu_read_unlock();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2445
2446	if (rt->dst.dev)
2447		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
 
 
 
 
 
 
 
 
 
2448
2449	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
 
 
 
 
2450
2451	if (!(rt->rt6i_flags & RTF_EXPIRES))
2452		expires = 0;
2453	else if (rt->rt6i_expires - jiffies < INT_MAX)
2454		expires = rt->rt6i_expires - jiffies;
2455	else
2456		expires = INT_MAX;
 
 
 
 
 
 
 
 
 
 
2457
2458	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2459			       expires, rt->dst.error) < 0)
 
 
2460		goto nla_put_failure;
2461
2462	return nlmsg_end(skb, nlh);
 
 
2463
2464nla_put_failure:
2465	nlmsg_cancel(skb, nlh);
2466	return -EMSGSIZE;
2467}
2468
2469int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2470{
2471	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2472	int prefix;
 
 
 
2473
2474	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2475		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2476		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2477	} else
2478		prefix = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2479
2480	return rt6_fill_node(arg->net,
2481		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2482		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2483		     prefix, 0, NLM_F_MULTI);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2484}
2485
2486static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 
2487{
2488	struct net *net = sock_net(in_skb->sk);
2489	struct nlattr *tb[RTA_MAX+1];
 
 
 
2490	struct rt6_info *rt;
2491	struct sk_buff *skb;
2492	struct rtmsg *rtm;
2493	struct flowi6 fl6;
2494	int err, iif = 0;
2495
2496	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2497	if (err < 0)
2498		goto errout;
2499
2500	err = -EINVAL;
2501	memset(&fl6, 0, sizeof(fl6));
 
 
2502
2503	if (tb[RTA_SRC]) {
2504		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2505			goto errout;
2506
2507		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2508	}
2509
2510	if (tb[RTA_DST]) {
2511		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2512			goto errout;
2513
2514		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2515	}
2516
2517	if (tb[RTA_IIF])
2518		iif = nla_get_u32(tb[RTA_IIF]);
2519
2520	if (tb[RTA_OIF])
2521		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2522
2523	if (iif) {
2524		struct net_device *dev;
2525		dev = __dev_get_by_index(net, iif);
 
 
 
 
2526		if (!dev) {
 
2527			err = -ENODEV;
2528			goto errout;
2529		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2530	}
2531
2532	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2533	if (skb == NULL) {
 
2534		err = -ENOBUFS;
2535		goto errout;
2536	}
2537
2538	/* Reserve room for dummy headers, this skb can pass
2539	   through good chunk of routing engine.
2540	 */
2541	skb_reset_mac_header(skb);
2542	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2543
2544	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2545	skb_dst_set(skb, &rt->dst);
2546
2547	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2548			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2549			    nlh->nlmsg_seq, 0, 0, 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2550	if (err < 0) {
2551		kfree_skb(skb);
2552		goto errout;
2553	}
2554
2555	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2556errout:
2557	return err;
2558}
2559
2560void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 
2561{
2562	struct sk_buff *skb;
2563	struct net *net = info->nl_net;
2564	u32 seq;
2565	int err;
2566
2567	err = -ENOBUFS;
2568	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2569
2570	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2571	if (skb == NULL)
2572		goto errout;
2573
2574	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2575				event, info->pid, seq, 0, 0, 0);
2576	if (err < 0) {
2577		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2578		WARN_ON(err == -EMSGSIZE);
2579		kfree_skb(skb);
2580		goto errout;
2581	}
2582	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2583		    info->nlh, gfp_any());
2584	return;
2585errout:
2586	if (err < 0)
2587		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2588}
2589
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2590static int ip6_route_dev_notify(struct notifier_block *this,
2591				unsigned long event, void *data)
2592{
2593	struct net_device *dev = (struct net_device *)data;
2594	struct net *net = dev_net(dev);
2595
2596	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
 
 
 
 
2597		net->ipv6.ip6_null_entry->dst.dev = dev;
2598		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2599#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2600		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2601		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2602		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2603		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2604#endif
 
 
 
 
 
 
 
 
 
 
2605	}
2606
2607	return NOTIFY_OK;
2608}
2609
2610/*
2611 *	/proc
2612 */
2613
2614#ifdef CONFIG_PROC_FS
2615
2616struct rt6_proc_arg
2617{
2618	char *buffer;
2619	int offset;
2620	int length;
2621	int skip;
2622	int len;
2623};
2624
2625static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2626{
2627	struct seq_file *m = p_arg;
2628	struct neighbour *n;
2629
2630	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2631
2632#ifdef CONFIG_IPV6_SUBTREES
2633	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2634#else
2635	seq_puts(m, "00000000000000000000000000000000 00 ");
2636#endif
2637	rcu_read_lock();
2638	n = dst_get_neighbour(&rt->dst);
2639	if (n) {
2640		seq_printf(m, "%pi6", n->primary_key);
2641	} else {
2642		seq_puts(m, "00000000000000000000000000000000");
2643	}
2644	rcu_read_unlock();
2645	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2646		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2647		   rt->dst.__use, rt->rt6i_flags,
2648		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2649	return 0;
2650}
2651
2652static int ipv6_route_show(struct seq_file *m, void *v)
2653{
2654	struct net *net = (struct net *)m->private;
2655	fib6_clean_all(net, rt6_info_route, 0, m);
2656	return 0;
2657}
2658
2659static int ipv6_route_open(struct inode *inode, struct file *file)
2660{
2661	return single_open_net(inode, file, ipv6_route_show);
2662}
2663
2664static const struct file_operations ipv6_route_proc_fops = {
2665	.owner		= THIS_MODULE,
2666	.open		= ipv6_route_open,
2667	.read		= seq_read,
2668	.llseek		= seq_lseek,
2669	.release	= single_release_net,
2670};
2671
2672static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2673{
2674	struct net *net = (struct net *)seq->private;
2675	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2676		   net->ipv6.rt6_stats->fib_nodes,
2677		   net->ipv6.rt6_stats->fib_route_nodes,
2678		   net->ipv6.rt6_stats->fib_rt_alloc,
2679		   net->ipv6.rt6_stats->fib_rt_entries,
2680		   net->ipv6.rt6_stats->fib_rt_cache,
2681		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2682		   net->ipv6.rt6_stats->fib_discarded_routes);
2683
2684	return 0;
2685}
2686
2687static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2688{
2689	return single_open_net(inode, file, rt6_stats_seq_show);
2690}
2691
2692static const struct file_operations rt6_stats_seq_fops = {
2693	.owner	 = THIS_MODULE,
2694	.open	 = rt6_stats_seq_open,
2695	.read	 = seq_read,
2696	.llseek	 = seq_lseek,
2697	.release = single_release_net,
2698};
2699#endif	/* CONFIG_PROC_FS */
2700
2701#ifdef CONFIG_SYSCTL
2702
2703static
2704int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2705			      void __user *buffer, size_t *lenp, loff_t *ppos)
2706{
2707	struct net *net;
2708	int delay;
 
2709	if (!write)
2710		return -EINVAL;
2711
2712	net = (struct net *)ctl->extra1;
2713	delay = net->ipv6.sysctl.flush_delay;
2714	proc_dointvec(ctl, write, buffer, lenp, ppos);
2715	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
 
 
 
2716	return 0;
2717}
2718
2719ctl_table ipv6_route_table_template[] = {
2720	{
2721		.procname	=	"flush",
2722		.data		=	&init_net.ipv6.sysctl.flush_delay,
2723		.maxlen		=	sizeof(int),
2724		.mode		=	0200,
2725		.proc_handler	=	ipv6_sysctl_rtcache_flush
2726	},
2727	{
2728		.procname	=	"gc_thresh",
2729		.data		=	&ip6_dst_ops_template.gc_thresh,
2730		.maxlen		=	sizeof(int),
2731		.mode		=	0644,
2732		.proc_handler	=	proc_dointvec,
2733	},
2734	{
2735		.procname	=	"max_size",
2736		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2737		.maxlen		=	sizeof(int),
2738		.mode		=	0644,
2739		.proc_handler	=	proc_dointvec,
2740	},
2741	{
2742		.procname	=	"gc_min_interval",
2743		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2744		.maxlen		=	sizeof(int),
2745		.mode		=	0644,
2746		.proc_handler	=	proc_dointvec_jiffies,
2747	},
2748	{
2749		.procname	=	"gc_timeout",
2750		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2751		.maxlen		=	sizeof(int),
2752		.mode		=	0644,
2753		.proc_handler	=	proc_dointvec_jiffies,
2754	},
2755	{
2756		.procname	=	"gc_interval",
2757		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2758		.maxlen		=	sizeof(int),
2759		.mode		=	0644,
2760		.proc_handler	=	proc_dointvec_jiffies,
2761	},
2762	{
2763		.procname	=	"gc_elasticity",
2764		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2765		.maxlen		=	sizeof(int),
2766		.mode		=	0644,
2767		.proc_handler	=	proc_dointvec,
2768	},
2769	{
2770		.procname	=	"mtu_expires",
2771		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2772		.maxlen		=	sizeof(int),
2773		.mode		=	0644,
2774		.proc_handler	=	proc_dointvec_jiffies,
2775	},
2776	{
2777		.procname	=	"min_adv_mss",
2778		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2779		.maxlen		=	sizeof(int),
2780		.mode		=	0644,
2781		.proc_handler	=	proc_dointvec,
2782	},
2783	{
2784		.procname	=	"gc_min_interval_ms",
2785		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2786		.maxlen		=	sizeof(int),
2787		.mode		=	0644,
2788		.proc_handler	=	proc_dointvec_ms_jiffies,
2789	},
 
 
 
 
 
 
 
 
 
2790	{ }
2791};
2792
2793struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2794{
2795	struct ctl_table *table;
2796
2797	table = kmemdup(ipv6_route_table_template,
2798			sizeof(ipv6_route_table_template),
2799			GFP_KERNEL);
2800
2801	if (table) {
2802		table[0].data = &net->ipv6.sysctl.flush_delay;
2803		table[0].extra1 = net;
2804		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2805		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
 
2806		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2807		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2808		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2809		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2810		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2811		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2812		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
 
 
 
 
 
2813	}
2814
2815	return table;
2816}
 
 
 
 
 
 
 
 
 
2817#endif
2818
2819static int __net_init ip6_route_net_init(struct net *net)
2820{
2821	int ret = -ENOMEM;
2822
2823	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2824	       sizeof(net->ipv6.ip6_dst_ops));
2825
2826	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2827		goto out_ip6_dst_ops;
2828
 
 
 
 
 
 
2829	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2830					   sizeof(*net->ipv6.ip6_null_entry),
2831					   GFP_KERNEL);
2832	if (!net->ipv6.ip6_null_entry)
2833		goto out_ip6_dst_entries;
2834	net->ipv6.ip6_null_entry->dst.path =
2835		(struct dst_entry *)net->ipv6.ip6_null_entry;
2836	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2837	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2838			 ip6_template_metrics, true);
 
2839
2840#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
2841	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2842					       sizeof(*net->ipv6.ip6_prohibit_entry),
2843					       GFP_KERNEL);
2844	if (!net->ipv6.ip6_prohibit_entry)
2845		goto out_ip6_null_entry;
2846	net->ipv6.ip6_prohibit_entry->dst.path =
2847		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2848	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2849	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2850			 ip6_template_metrics, true);
 
2851
2852	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2853					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2854					       GFP_KERNEL);
2855	if (!net->ipv6.ip6_blk_hole_entry)
2856		goto out_ip6_prohibit_entry;
2857	net->ipv6.ip6_blk_hole_entry->dst.path =
2858		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2859	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2860	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2861			 ip6_template_metrics, true);
 
 
 
 
2862#endif
2863
2864	net->ipv6.sysctl.flush_delay = 0;
2865	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2866	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2867	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2868	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2869	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2870	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2871	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 
2872
2873#ifdef CONFIG_PROC_FS
2874	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2875	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2876#endif
2877	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2878
2879	ret = 0;
2880out:
2881	return ret;
2882
2883#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2884out_ip6_prohibit_entry:
2885	kfree(net->ipv6.ip6_prohibit_entry);
2886out_ip6_null_entry:
2887	kfree(net->ipv6.ip6_null_entry);
2888#endif
 
 
2889out_ip6_dst_entries:
2890	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2891out_ip6_dst_ops:
2892	goto out;
2893}
2894
2895static void __net_exit ip6_route_net_exit(struct net *net)
2896{
2897#ifdef CONFIG_PROC_FS
2898	proc_net_remove(net, "ipv6_route");
2899	proc_net_remove(net, "rt6_stats");
2900#endif
2901	kfree(net->ipv6.ip6_null_entry);
2902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2903	kfree(net->ipv6.ip6_prohibit_entry);
2904	kfree(net->ipv6.ip6_blk_hole_entry);
2905#endif
2906	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2907}
2908
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2909static struct pernet_operations ip6_route_net_ops = {
2910	.init = ip6_route_net_init,
2911	.exit = ip6_route_net_exit,
2912};
2913
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2914static struct notifier_block ip6_route_dev_notifier = {
2915	.notifier_call = ip6_route_dev_notify,
2916	.priority = 0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2917};
2918
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2919int __init ip6_route_init(void)
2920{
2921	int ret;
 
2922
2923	ret = -ENOMEM;
2924	ip6_dst_ops_template.kmem_cachep =
2925		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2926				  SLAB_HWCACHE_ALIGN, NULL);
2927	if (!ip6_dst_ops_template.kmem_cachep)
2928		goto out;
2929
2930	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2931	if (ret)
2932		goto out_kmem_cache;
2933
2934	ret = register_pernet_subsys(&ip6_route_net_ops);
2935	if (ret)
2936		goto out_dst_entries;
2937
 
 
 
 
2938	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2939
2940	/* Registering of the loopback is done before this portion of code,
2941	 * the loopback reference in rt6_info will not be taken, do it
2942	 * manually for init_net */
2943	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2944	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2945  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2946	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2947	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2948	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2949	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2950  #endif
2951	ret = fib6_init();
2952	if (ret)
2953		goto out_register_subsys;
2954
2955	ret = xfrm6_init();
2956	if (ret)
2957		goto out_fib6_init;
2958
2959	ret = fib6_rules_init();
2960	if (ret)
2961		goto xfrm6_init;
2962
2963	ret = -ENOBUFS;
2964	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2965	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2966	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2967		goto fib6_rules_init;
2968
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2969	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2970	if (ret)
2971		goto fib6_rules_init;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2972
2973out:
2974	return ret;
2975
 
 
 
2976fib6_rules_init:
2977	fib6_rules_cleanup();
2978xfrm6_init:
2979	xfrm6_fini();
2980out_fib6_init:
2981	fib6_gc_cleanup();
2982out_register_subsys:
2983	unregister_pernet_subsys(&ip6_route_net_ops);
 
 
2984out_dst_entries:
2985	dst_entries_destroy(&ip6_dst_blackhole_ops);
2986out_kmem_cache:
2987	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2988	goto out;
2989}
2990
2991void ip6_route_cleanup(void)
2992{
 
 
 
 
 
2993	unregister_netdevice_notifier(&ip6_route_dev_notifier);
 
2994	fib6_rules_cleanup();
2995	xfrm6_fini();
2996	fib6_gc_cleanup();
 
2997	unregister_pernet_subsys(&ip6_route_net_ops);
2998	dst_entries_destroy(&ip6_dst_blackhole_ops);
2999	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3000}
v6.8
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	Linux INET6 implementation
   4 *	FIB front-end.
   5 *
   6 *	Authors:
   7 *	Pedro Roque		<roque@di.fc.ul.pt>
 
 
 
 
 
   8 */
   9
  10/*	Changes:
  11 *
  12 *	YOSHIFUJI Hideaki @USAGI
  13 *		reworked default router selection.
  14 *		- respect outgoing interface
  15 *		- select from (probably) reachable routers (i.e.
  16 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  17 *		- always select the same router if it is (probably)
  18 *		reachable.  otherwise, round-robin the list.
  19 *	Ville Nuorvala
  20 *		Fixed routing subtrees.
  21 */
  22
  23#define pr_fmt(fmt) "IPv6: " fmt
  24
  25#include <linux/capability.h>
  26#include <linux/errno.h>
  27#include <linux/export.h>
  28#include <linux/types.h>
  29#include <linux/times.h>
  30#include <linux/socket.h>
  31#include <linux/sockios.h>
  32#include <linux/net.h>
  33#include <linux/route.h>
  34#include <linux/netdevice.h>
  35#include <linux/in6.h>
  36#include <linux/mroute6.h>
  37#include <linux/init.h>
  38#include <linux/if_arp.h>
  39#include <linux/proc_fs.h>
  40#include <linux/seq_file.h>
  41#include <linux/nsproxy.h>
  42#include <linux/slab.h>
  43#include <linux/jhash.h>
  44#include <linux/siphash.h>
  45#include <net/net_namespace.h>
  46#include <net/snmp.h>
  47#include <net/ipv6.h>
  48#include <net/ip6_fib.h>
  49#include <net/ip6_route.h>
  50#include <net/ndisc.h>
  51#include <net/addrconf.h>
  52#include <net/tcp.h>
  53#include <linux/rtnetlink.h>
  54#include <net/dst.h>
  55#include <net/dst_metadata.h>
  56#include <net/xfrm.h>
  57#include <net/netevent.h>
  58#include <net/netlink.h>
  59#include <net/rtnh.h>
  60#include <net/lwtunnel.h>
  61#include <net/ip_tunnels.h>
  62#include <net/l3mdev.h>
  63#include <net/ip.h>
  64#include <linux/uaccess.h>
  65#include <linux/btf_ids.h>
  66
  67#ifdef CONFIG_SYSCTL
  68#include <linux/sysctl.h>
  69#endif
  70
  71static int ip6_rt_type_to_error(u8 fib6_type);
 
  72
  73#define CREATE_TRACE_POINTS
  74#include <trace/events/fib6.h>
  75EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
  76#undef CREATE_TRACE_POINTS
  77
  78enum rt6_nud_state {
  79	RT6_NUD_FAIL_HARD = -3,
  80	RT6_NUD_FAIL_PROBE = -2,
  81	RT6_NUD_FAIL_DO_RR = -1,
  82	RT6_NUD_SUCCEED = 1
  83};
  84
  85INDIRECT_CALLABLE_SCOPE
  86struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
 
  87static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  88INDIRECT_CALLABLE_SCOPE
  89unsigned int		ip6_mtu(const struct dst_entry *dst);
  90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  91static void		ip6_dst_destroy(struct dst_entry *);
  92static void		ip6_dst_ifdown(struct dst_entry *,
  93				       struct net_device *dev);
  94static void		 ip6_dst_gc(struct dst_ops *ops);
  95
  96static int		ip6_pkt_discard(struct sk_buff *skb);
  97static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  98static int		ip6_pkt_prohibit(struct sk_buff *skb);
  99static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 100static void		ip6_link_failure(struct sk_buff *skb);
 101static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 102					   struct sk_buff *skb, u32 mtu,
 103					   bool confirm_neigh);
 104static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 105					struct sk_buff *skb);
 106static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 107			   int strict);
 108static size_t rt6_nlmsg_size(struct fib6_info *f6i);
 109static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 110			 struct fib6_info *rt, struct dst_entry *dst,
 111			 struct in6_addr *dest, struct in6_addr *src,
 112			 int iif, int type, u32 portid, u32 seq,
 113			 unsigned int flags);
 114static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
 115					   const struct in6_addr *daddr,
 116					   const struct in6_addr *saddr);
 117
 118#ifdef CONFIG_IPV6_ROUTE_INFO
 119static struct fib6_info *rt6_add_route_info(struct net *net,
 120					   const struct in6_addr *prefix, int prefixlen,
 121					   const struct in6_addr *gwaddr,
 122					   struct net_device *dev,
 123					   unsigned int pref);
 124static struct fib6_info *rt6_get_route_info(struct net *net,
 125					   const struct in6_addr *prefix, int prefixlen,
 126					   const struct in6_addr *gwaddr,
 127					   struct net_device *dev);
 128#endif
 129
 130struct uncached_list {
 131	spinlock_t		lock;
 132	struct list_head	head;
 133	struct list_head	quarantine;
 134};
 135
 136static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 137
 138void rt6_uncached_list_add(struct rt6_info *rt)
 139{
 140	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 
 
 141
 142	rt->dst.rt_uncached_list = ul;
 
 143
 144	spin_lock_bh(&ul->lock);
 145	list_add_tail(&rt->dst.rt_uncached, &ul->head);
 146	spin_unlock_bh(&ul->lock);
 147}
 148
 149void rt6_uncached_list_del(struct rt6_info *rt)
 150{
 151	if (!list_empty(&rt->dst.rt_uncached)) {
 152		struct uncached_list *ul = rt->dst.rt_uncached_list;
 153
 154		spin_lock_bh(&ul->lock);
 155		list_del_init(&rt->dst.rt_uncached);
 156		spin_unlock_bh(&ul->lock);
 157	}
 158}
 159
 160static void rt6_uncached_list_flush_dev(struct net_device *dev)
 161{
 162	int cpu;
 163
 164	for_each_possible_cpu(cpu) {
 165		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 166		struct rt6_info *rt, *safe;
 167
 168		if (list_empty(&ul->head))
 169			continue;
 170
 171		spin_lock_bh(&ul->lock);
 172		list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) {
 173			struct inet6_dev *rt_idev = rt->rt6i_idev;
 174			struct net_device *rt_dev = rt->dst.dev;
 175			bool handled = false;
 176
 177			if (rt_idev->dev == dev) {
 178				rt->rt6i_idev = in6_dev_get(blackhole_netdev);
 179				in6_dev_put(rt_idev);
 180				handled = true;
 181			}
 182
 183			if (rt_dev == dev) {
 184				rt->dst.dev = blackhole_netdev;
 185				netdev_ref_replace(rt_dev, blackhole_netdev,
 186						   &rt->dst.dev_tracker,
 187						   GFP_ATOMIC);
 188				handled = true;
 189			}
 190			if (handled)
 191				list_move(&rt->dst.rt_uncached,
 192					  &ul->quarantine);
 193		}
 194		spin_unlock_bh(&ul->lock);
 195	}
 
 196}
 197
 198static inline const void *choose_neigh_daddr(const struct in6_addr *p,
 199					     struct sk_buff *skb,
 200					     const void *daddr)
 201{
 202	if (!ipv6_addr_any(p))
 203		return (const void *) p;
 204	else if (skb)
 205		return &ipv6_hdr(skb)->daddr;
 206	return daddr;
 207}
 208
 209struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
 210				   struct net_device *dev,
 211				   struct sk_buff *skb,
 212				   const void *daddr)
 213{
 214	struct neighbour *n;
 215
 216	daddr = choose_neigh_daddr(gw, skb, daddr);
 217	n = __ipv6_neigh_lookup(dev, daddr);
 218	if (n)
 219		return n;
 220
 221	n = neigh_create(&nd_tbl, daddr, dev);
 222	return IS_ERR(n) ? NULL : n;
 223}
 224
 225static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
 226					      struct sk_buff *skb,
 227					      const void *daddr)
 228{
 229	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
 230
 231	return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
 232				dst->dev, skb, daddr);
 233}
 234
 235static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 236{
 237	struct net_device *dev = dst->dev;
 238	struct rt6_info *rt = (struct rt6_info *)dst;
 239
 240	daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
 241	if (!daddr)
 242		return;
 243	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
 244		return;
 245	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
 246		return;
 247	__ipv6_confirm_neigh(dev, daddr);
 248}
 249
 250static struct dst_ops ip6_dst_ops_template = {
 251	.family			=	AF_INET6,
 
 252	.gc			=	ip6_dst_gc,
 253	.gc_thresh		=	1024,
 254	.check			=	ip6_dst_check,
 255	.default_advmss		=	ip6_default_advmss,
 256	.mtu			=	ip6_mtu,
 257	.cow_metrics		=	dst_cow_metrics_generic,
 258	.destroy		=	ip6_dst_destroy,
 259	.ifdown			=	ip6_dst_ifdown,
 260	.negative_advice	=	ip6_negative_advice,
 261	.link_failure		=	ip6_link_failure,
 262	.update_pmtu		=	ip6_rt_update_pmtu,
 263	.redirect		=	rt6_do_redirect,
 264	.local_out		=	__ip6_local_out,
 265	.neigh_lookup		=	ip6_dst_neigh_lookup,
 266	.confirm_neigh		=	ip6_confirm_neigh,
 267};
 268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 269static struct dst_ops ip6_dst_blackhole_ops = {
 270	.family			= AF_INET6,
 271	.default_advmss		= ip6_default_advmss,
 272	.neigh_lookup		= ip6_dst_neigh_lookup,
 273	.check			= ip6_dst_check,
 274	.destroy		= ip6_dst_destroy,
 275	.cow_metrics		= dst_cow_metrics_generic,
 276	.update_pmtu		= dst_blackhole_update_pmtu,
 277	.redirect		= dst_blackhole_redirect,
 278	.mtu			= dst_blackhole_mtu,
 279};
 280
 281static const u32 ip6_template_metrics[RTAX_MAX] = {
 282	[RTAX_HOPLIMIT - 1] = 0,
 283};
 284
 285static const struct fib6_info fib6_null_entry_template = {
 286	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 287	.fib6_protocol  = RTPROT_KERNEL,
 288	.fib6_metric	= ~(u32)0,
 289	.fib6_ref	= REFCOUNT_INIT(1),
 290	.fib6_type	= RTN_UNREACHABLE,
 291	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
 292};
 293
 294static const struct rt6_info ip6_null_entry_template = {
 295	.dst = {
 296		.__rcuref	= RCUREF_INIT(1),
 297		.__use		= 1,
 298		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 299		.error		= -ENETUNREACH,
 300		.input		= ip6_pkt_discard,
 301		.output		= ip6_pkt_discard_out,
 302	},
 303	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 304};
 305
 306#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 307
 308static const struct rt6_info ip6_prohibit_entry_template = {
 
 
 
 309	.dst = {
 310		.__rcuref	= RCUREF_INIT(1),
 311		.__use		= 1,
 312		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 313		.error		= -EACCES,
 314		.input		= ip6_pkt_prohibit,
 315		.output		= ip6_pkt_prohibit_out,
 316	},
 317	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 318};
 319
 320static const struct rt6_info ip6_blk_hole_entry_template = {
 321	.dst = {
 322		.__rcuref	= RCUREF_INIT(1),
 323		.__use		= 1,
 324		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 325		.error		= -EINVAL,
 326		.input		= dst_discard,
 327		.output		= dst_discard_out,
 328	},
 329	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 330};
 331
 332#endif
 333
 334static void rt6_info_init(struct rt6_info *rt)
 335{
 336	memset_after(rt, 0, dst);
 337}
 338
 339/* allocate dst with ip6_dst_ops */
 340struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
 341			       int flags)
 
 342{
 343	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 344					DST_OBSOLETE_FORCE_CHK, flags);
 345
 346	if (rt) {
 347		rt6_info_init(rt);
 348		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
 349	}
 350
 351	return rt;
 352}
 353EXPORT_SYMBOL(ip6_dst_alloc);
 354
 355static void ip6_dst_destroy(struct dst_entry *dst)
 356{
 357	struct rt6_info *rt = (struct rt6_info *)dst;
 358	struct fib6_info *from;
 359	struct inet6_dev *idev;
 360
 361	ip_dst_metrics_put(dst);
 362	rt6_uncached_list_del(rt);
 363
 364	idev = rt->rt6i_idev;
 365	if (idev) {
 366		rt->rt6i_idev = NULL;
 367		in6_dev_put(idev);
 368	}
 369
 370	from = xchg((__force struct fib6_info **)&rt->from, NULL);
 371	fib6_info_release(from);
 
 372}
 373
 374static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
 375{
 376	struct rt6_info *rt = (struct rt6_info *)dst;
 377	struct inet6_dev *idev = rt->rt6i_idev;
 378
 379	if (idev && idev->dev != blackhole_netdev) {
 380		struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
 381
 382		if (blackhole_idev) {
 383			rt->rt6i_idev = blackhole_idev;
 384			in6_dev_put(idev);
 385		}
 386	}
 387}
 388
 389static bool __rt6_check_expired(const struct rt6_info *rt)
 390{
 391	if (rt->rt6i_flags & RTF_EXPIRES)
 392		return time_after(jiffies, rt->dst.expires);
 393	else
 394		return false;
 395}
 396
 397static bool rt6_check_expired(const struct rt6_info *rt)
 398{
 399	struct fib6_info *from;
 400
 401	from = rcu_dereference(rt->from);
 402
 403	if (rt->rt6i_flags & RTF_EXPIRES) {
 404		if (time_after(jiffies, rt->dst.expires))
 405			return true;
 406	} else if (from) {
 407		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
 408			fib6_check_expired(from);
 409	}
 410	return false;
 411}
 412
 413void fib6_select_path(const struct net *net, struct fib6_result *res,
 414		      struct flowi6 *fl6, int oif, bool have_oif_match,
 415		      const struct sk_buff *skb, int strict)
 416{
 417	struct fib6_info *sibling, *next_sibling;
 418	struct fib6_info *match = res->f6i;
 
 
 419
 420	if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
 421		goto out;
 422
 423	if (match->nh && have_oif_match && res->nh)
 424		return;
 425
 426	if (skb)
 427		IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
 428
 429	/* We might have already computed the hash for ICMPv6 errors. In such
 430	 * case it will always be non-zero. Otherwise now is the time to do it.
 431	 */
 432	if (!fl6->mp_hash &&
 433	    (!match->nh || nexthop_is_multipath(match->nh)))
 434		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 435
 436	if (unlikely(match->nh)) {
 437		nexthop_path_fib6_result(res, fl6->mp_hash);
 438		return;
 439	}
 440
 441	if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
 442		goto out;
 443
 444	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
 445				 fib6_siblings) {
 446		const struct fib6_nh *nh = sibling->fib6_nh;
 447		int nh_upper_bound;
 448
 449		nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
 450		if (fl6->mp_hash > nh_upper_bound)
 451			continue;
 452		if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
 453			break;
 454		match = sibling;
 455		break;
 456	}
 457
 458out:
 459	res->f6i = match;
 460	res->nh = match->fib6_nh;
 461}
 462
 463/*
 464 *	Route lookup. rcu_read_lock() should be held.
 465 */
 466
 467static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
 468			       const struct in6_addr *saddr, int oif, int flags)
 469{
 470	const struct net_device *dev;
 471
 472	if (nh->fib_nh_flags & RTNH_F_DEAD)
 473		return false;
 474
 475	dev = nh->fib_nh_dev;
 476	if (oif) {
 477		if (dev->ifindex == oif)
 478			return true;
 479	} else {
 480		if (ipv6_chk_addr(net, saddr, dev,
 481				  flags & RT6_LOOKUP_F_IFACE))
 482			return true;
 483	}
 484
 485	return false;
 486}
 487
 488struct fib6_nh_dm_arg {
 489	struct net		*net;
 490	const struct in6_addr	*saddr;
 491	int			oif;
 492	int			flags;
 493	struct fib6_nh		*nh;
 494};
 495
 496static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
 497{
 498	struct fib6_nh_dm_arg *arg = _arg;
 499
 500	arg->nh = nh;
 501	return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
 502				  arg->flags);
 503}
 504
 505/* returns fib6_nh from nexthop or NULL */
 506static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
 507					struct fib6_result *res,
 508					const struct in6_addr *saddr,
 509					int oif, int flags)
 510{
 511	struct fib6_nh_dm_arg arg = {
 512		.net   = net,
 513		.saddr = saddr,
 514		.oif   = oif,
 515		.flags = flags,
 516	};
 517
 518	if (nexthop_is_blackhole(nh))
 519		return NULL;
 520
 521	if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
 522		return arg.nh;
 523
 524	return NULL;
 525}
 526
 527static void rt6_device_match(struct net *net, struct fib6_result *res,
 528			     const struct in6_addr *saddr, int oif, int flags)
 529{
 530	struct fib6_info *f6i = res->f6i;
 531	struct fib6_info *spf6i;
 532	struct fib6_nh *nh;
 533
 534	if (!oif && ipv6_addr_any(saddr)) {
 535		if (unlikely(f6i->nh)) {
 536			nh = nexthop_fib6_nh(f6i->nh);
 537			if (nexthop_is_blackhole(f6i->nh))
 538				goto out_blackhole;
 539		} else {
 540			nh = f6i->fib6_nh;
 541		}
 542		if (!(nh->fib_nh_flags & RTNH_F_DEAD))
 543			goto out;
 544	}
 545
 546	for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
 547		bool matched = false;
 548
 549		if (unlikely(spf6i->nh)) {
 550			nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
 551					      oif, flags);
 552			if (nh)
 553				matched = true;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 554		} else {
 555			nh = spf6i->fib6_nh;
 556			if (__rt6_device_match(net, nh, saddr, oif, flags))
 557				matched = true;
 558		}
 559		if (matched) {
 560			res->f6i = spf6i;
 561			goto out;
 562		}
 563	}
 564
 565	if (oif && flags & RT6_LOOKUP_F_IFACE) {
 566		res->f6i = net->ipv6.fib6_null_entry;
 567		nh = res->f6i->fib6_nh;
 568		goto out;
 569	}
 570
 571	if (unlikely(f6i->nh)) {
 572		nh = nexthop_fib6_nh(f6i->nh);
 573		if (nexthop_is_blackhole(f6i->nh))
 574			goto out_blackhole;
 575	} else {
 576		nh = f6i->fib6_nh;
 577	}
 578
 579	if (nh->fib_nh_flags & RTNH_F_DEAD) {
 580		res->f6i = net->ipv6.fib6_null_entry;
 581		nh = res->f6i->fib6_nh;
 582	}
 583out:
 584	res->nh = nh;
 585	res->fib6_type = res->f6i->fib6_type;
 586	res->fib6_flags = res->f6i->fib6_flags;
 587	return;
 588
 589out_blackhole:
 590	res->fib6_flags |= RTF_REJECT;
 591	res->fib6_type = RTN_BLACKHOLE;
 592	res->nh = nh;
 593}
 594
 595#ifdef CONFIG_IPV6_ROUTER_PREF
 596struct __rt6_probe_work {
 597	struct work_struct work;
 598	struct in6_addr target;
 599	struct net_device *dev;
 600	netdevice_tracker dev_tracker;
 601};
 602
 603static void rt6_probe_deferred(struct work_struct *w)
 604{
 605	struct in6_addr mcaddr;
 606	struct __rt6_probe_work *work =
 607		container_of(w, struct __rt6_probe_work, work);
 608
 609	addrconf_addr_solict_mult(&work->target, &mcaddr);
 610	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
 611	netdev_put(work->dev, &work->dev_tracker);
 612	kfree(work);
 613}
 614
 615static void rt6_probe(struct fib6_nh *fib6_nh)
 616{
 617	struct __rt6_probe_work *work = NULL;
 618	const struct in6_addr *nh_gw;
 619	unsigned long last_probe;
 620	struct neighbour *neigh;
 621	struct net_device *dev;
 622	struct inet6_dev *idev;
 623
 624	/*
 625	 * Okay, this does not seem to be appropriate
 626	 * for now, however, we need to check if it
 627	 * is really so; aka Router Reachability Probing.
 628	 *
 629	 * Router Reachability Probe MUST be rate-limited
 630	 * to no more than one per minute.
 631	 */
 632	if (!fib6_nh->fib_nh_gw_family)
 633		return;
 634
 635	nh_gw = &fib6_nh->fib_nh_gw6;
 636	dev = fib6_nh->fib_nh_dev;
 637	rcu_read_lock();
 638	last_probe = READ_ONCE(fib6_nh->last_probe);
 639	idev = __in6_dev_get(dev);
 640	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 641	if (neigh) {
 642		if (READ_ONCE(neigh->nud_state) & NUD_VALID)
 643			goto out;
 644
 645		write_lock_bh(&neigh->lock);
 646		if (!(neigh->nud_state & NUD_VALID) &&
 647		    time_after(jiffies,
 648			       neigh->updated + idev->cnf.rtr_probe_interval)) {
 649			work = kmalloc(sizeof(*work), GFP_ATOMIC);
 650			if (work)
 651				__neigh_set_probe_once(neigh);
 652		}
 653		write_unlock_bh(&neigh->lock);
 654	} else if (time_after(jiffies, last_probe +
 655				       idev->cnf.rtr_probe_interval)) {
 656		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 657	}
 658
 659	if (!work || cmpxchg(&fib6_nh->last_probe,
 660			     last_probe, jiffies) != last_probe) {
 661		kfree(work);
 662	} else {
 663		INIT_WORK(&work->work, rt6_probe_deferred);
 664		work->target = *nh_gw;
 665		netdev_hold(dev, &work->dev_tracker, GFP_ATOMIC);
 666		work->dev = dev;
 667		schedule_work(&work->work);
 668	}
 669
 670out:
 671	rcu_read_unlock();
 672}
 673#else
 674static inline void rt6_probe(struct fib6_nh *fib6_nh)
 675{
 676}
 677#endif
 678
 679/*
 680 * Default Router Selection (RFC 2461 6.3.6)
 681 */
 682static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
 
 
 
 
 
 
 
 
 
 
 
 683{
 684	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 685	struct neighbour *neigh;
 
 686
 687	rcu_read_lock();
 688	neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
 689					  &fib6_nh->fib_nh_gw6);
 690	if (neigh) {
 691		u8 nud_state = READ_ONCE(neigh->nud_state);
 692
 693		if (nud_state & NUD_VALID)
 694			ret = RT6_NUD_SUCCEED;
 
 695#ifdef CONFIG_IPV6_ROUTER_PREF
 696		else if (!(nud_state & NUD_FAILED))
 697			ret = RT6_NUD_SUCCEED;
 
 698		else
 699			ret = RT6_NUD_FAIL_PROBE;
 700#endif
 701	} else {
 702		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 703		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 704	}
 705	rcu_read_unlock();
 706
 707	return ret;
 708}
 709
 710static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 711			   int strict)
 712{
 713	int m = 0;
 714
 715	if (!oif || nh->fib_nh_dev->ifindex == oif)
 716		m = 2;
 717
 
 718	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 719		return RT6_NUD_FAIL_HARD;
 720#ifdef CONFIG_IPV6_ROUTER_PREF
 721	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
 722#endif
 723	if ((strict & RT6_LOOKUP_F_REACHABLE) &&
 724	    !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
 725		int n = rt6_check_neigh(nh);
 726		if (n < 0)
 727			return n;
 728	}
 729	return m;
 730}
 731
 732static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
 733		       int oif, int strict, int *mpri, bool *do_rr)
 734{
 735	bool match_do_rr = false;
 736	bool rc = false;
 737	int m;
 738
 739	if (nh->fib_nh_flags & RTNH_F_DEAD)
 740		goto out;
 741
 742	if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
 743	    nh->fib_nh_flags & RTNH_F_LINKDOWN &&
 744	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 745		goto out;
 746
 747	m = rt6_score_route(nh, fib6_flags, oif, strict);
 748	if (m == RT6_NUD_FAIL_DO_RR) {
 749		match_do_rr = true;
 750		m = 0; /* lowest valid score */
 751	} else if (m == RT6_NUD_FAIL_HARD) {
 752		goto out;
 753	}
 754
 755	if (strict & RT6_LOOKUP_F_REACHABLE)
 756		rt6_probe(nh);
 757
 758	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
 759	if (m > *mpri) {
 760		*do_rr = match_do_rr;
 
 761		*mpri = m;
 762		rc = true;
 
 
 763	}
 
 764out:
 765	return rc;
 766}
 767
 768struct fib6_nh_frl_arg {
 769	u32		flags;
 770	int		oif;
 771	int		strict;
 772	int		*mpri;
 773	bool		*do_rr;
 774	struct fib6_nh	*nh;
 775};
 776
 777static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
 778{
 779	struct fib6_nh_frl_arg *arg = _arg;
 780
 781	arg->nh = nh;
 782	return find_match(nh, arg->flags, arg->oif, arg->strict,
 783			  arg->mpri, arg->do_rr);
 784}
 785
 786static void __find_rr_leaf(struct fib6_info *f6i_start,
 787			   struct fib6_info *nomatch, u32 metric,
 788			   struct fib6_result *res, struct fib6_info **cont,
 789			   int oif, int strict, bool *do_rr, int *mpri)
 790{
 791	struct fib6_info *f6i;
 792
 793	for (f6i = f6i_start;
 794	     f6i && f6i != nomatch;
 795	     f6i = rcu_dereference(f6i->fib6_next)) {
 796		bool matched = false;
 797		struct fib6_nh *nh;
 798
 799		if (cont && f6i->fib6_metric != metric) {
 800			*cont = f6i;
 801			return;
 802		}
 803
 804		if (fib6_check_expired(f6i))
 805			continue;
 806
 807		if (unlikely(f6i->nh)) {
 808			struct fib6_nh_frl_arg arg = {
 809				.flags  = f6i->fib6_flags,
 810				.oif    = oif,
 811				.strict = strict,
 812				.mpri   = mpri,
 813				.do_rr  = do_rr
 814			};
 815
 816			if (nexthop_is_blackhole(f6i->nh)) {
 817				res->fib6_flags = RTF_REJECT;
 818				res->fib6_type = RTN_BLACKHOLE;
 819				res->f6i = f6i;
 820				res->nh = nexthop_fib6_nh(f6i->nh);
 821				return;
 822			}
 823			if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
 824						     &arg)) {
 825				matched = true;
 826				nh = arg.nh;
 827			}
 828		} else {
 829			nh = f6i->fib6_nh;
 830			if (find_match(nh, f6i->fib6_flags, oif, strict,
 831				       mpri, do_rr))
 832				matched = true;
 833		}
 834		if (matched) {
 835			res->f6i = f6i;
 836			res->nh = nh;
 837			res->fib6_flags = f6i->fib6_flags;
 838			res->fib6_type = f6i->fib6_type;
 839		}
 840	}
 841}
 842
 843static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
 844			 struct fib6_info *rr_head, int oif, int strict,
 845			 bool *do_rr, struct fib6_result *res)
 846{
 847	u32 metric = rr_head->fib6_metric;
 848	struct fib6_info *cont = NULL;
 849	int mpri = -1;
 850
 851	__find_rr_leaf(rr_head, NULL, metric, res, &cont,
 852		       oif, strict, do_rr, &mpri);
 853
 854	__find_rr_leaf(leaf, rr_head, metric, res, &cont,
 855		       oif, strict, do_rr, &mpri);
 856
 857	if (res->f6i || !cont)
 858		return;
 859
 860	__find_rr_leaf(cont, NULL, metric, res, NULL,
 861		       oif, strict, do_rr, &mpri);
 862}
 863
 864static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
 865		       struct fib6_result *res, int strict)
 866{
 867	struct fib6_info *leaf = rcu_dereference(fn->leaf);
 868	struct fib6_info *rt0;
 869	bool do_rr = false;
 870	int key_plen;
 871
 872	/* make sure this function or its helpers sets f6i */
 873	res->f6i = NULL;
 874
 875	if (!leaf || leaf == net->ipv6.fib6_null_entry)
 876		goto out;
 877
 878	rt0 = rcu_dereference(fn->rr_ptr);
 879	if (!rt0)
 880		rt0 = leaf;
 881
 882	/* Double check to make sure fn is not an intermediate node
 883	 * and fn->leaf does not points to its child's leaf
 884	 * (This might happen if all routes under fn are deleted from
 885	 * the tree and fib6_repair_tree() is called on the node.)
 886	 */
 887	key_plen = rt0->fib6_dst.plen;
 888#ifdef CONFIG_IPV6_SUBTREES
 889	if (rt0->fib6_src.plen)
 890		key_plen = rt0->fib6_src.plen;
 891#endif
 892	if (fn->fn_bit != key_plen)
 893		goto out;
 894
 895	find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
 896	if (do_rr) {
 897		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
 898
 899		/* no entries matched; do round-robin */
 900		if (!next || next->fib6_metric != rt0->fib6_metric)
 901			next = leaf;
 902
 903		if (next != rt0) {
 904			spin_lock_bh(&leaf->fib6_table->tb6_lock);
 905			/* make sure next is not being deleted from the tree */
 906			if (next->fib6_node)
 907				rcu_assign_pointer(fn->rr_ptr, next);
 908			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
 909		}
 910	}
 911
 912out:
 913	if (!res->f6i) {
 914		res->f6i = net->ipv6.fib6_null_entry;
 915		res->nh = res->f6i->fib6_nh;
 916		res->fib6_flags = res->f6i->fib6_flags;
 917		res->fib6_type = res->f6i->fib6_type;
 918	}
 919}
 920
 921static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
 922{
 923	return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
 924	       res->nh->fib_nh_gw_family;
 925}
 926
 927#ifdef CONFIG_IPV6_ROUTE_INFO
 928int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 929		  const struct in6_addr *gwaddr)
 930{
 931	struct net *net = dev_net(dev);
 932	struct route_info *rinfo = (struct route_info *) opt;
 933	struct in6_addr prefix_buf, *prefix;
 934	unsigned int pref;
 935	unsigned long lifetime;
 936	struct fib6_info *rt;
 937
 938	if (len < sizeof(struct route_info)) {
 939		return -EINVAL;
 940	}
 941
 942	/* Sanity check for prefix_len and length */
 943	if (rinfo->length > 3) {
 944		return -EINVAL;
 945	} else if (rinfo->prefix_len > 128) {
 946		return -EINVAL;
 947	} else if (rinfo->prefix_len > 64) {
 948		if (rinfo->length < 2) {
 949			return -EINVAL;
 950		}
 951	} else if (rinfo->prefix_len > 0) {
 952		if (rinfo->length < 1) {
 953			return -EINVAL;
 954		}
 955	}
 956
 957	pref = rinfo->route_pref;
 958	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 959		return -EINVAL;
 960
 961	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 962
 963	if (rinfo->length == 3)
 964		prefix = (struct in6_addr *)rinfo->prefix;
 965	else {
 966		/* this function is safe */
 967		ipv6_addr_prefix(&prefix_buf,
 968				 (struct in6_addr *)rinfo->prefix,
 969				 rinfo->prefix_len);
 970		prefix = &prefix_buf;
 971	}
 972
 973	if (rinfo->prefix_len == 0)
 974		rt = rt6_get_dflt_router(net, gwaddr, dev);
 975	else
 976		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 977					gwaddr, dev);
 978
 979	if (rt && !lifetime) {
 980		ip6_del_rt(net, rt, false);
 981		rt = NULL;
 982	}
 983
 984	if (!rt && lifetime)
 985		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 986					dev, pref);
 987	else if (rt)
 988		rt->fib6_flags = RTF_ROUTEINFO |
 989				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 990
 991	if (rt) {
 992		if (!addrconf_finite_timeout(lifetime))
 993			fib6_clean_expires(rt);
 994		else
 995			fib6_set_expires(rt, jiffies + HZ * lifetime);
 996
 997		fib6_info_release(rt);
 
 998	}
 999	return 0;
1000}
1001#endif
1002
1003/*
1004 *	Misc support functions
1005 */
1006
1007/* called with rcu_lock held */
1008static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
1009{
1010	struct net_device *dev = res->nh->fib_nh_dev;
1011
1012	if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
1013		/* for copies of local routes, dst->dev needs to be the
1014		 * device if it is a master device, the master device if
1015		 * device is enslaved, and the loopback as the default
1016		 */
1017		if (netif_is_l3_slave(dev) &&
1018		    !rt6_need_strict(&res->f6i->fib6_dst.addr))
1019			dev = l3mdev_master_dev_rcu(dev);
1020		else if (!netif_is_l3_master(dev))
1021			dev = dev_net(dev)->loopback_dev;
1022		/* last case is netif_is_l3_master(dev) is true in which
1023		 * case we want dev returned to be dev
1024		 */
1025	}
1026
1027	return dev;
1028}
1029
1030static const int fib6_prop[RTN_MAX + 1] = {
1031	[RTN_UNSPEC]	= 0,
1032	[RTN_UNICAST]	= 0,
1033	[RTN_LOCAL]	= 0,
1034	[RTN_BROADCAST]	= 0,
1035	[RTN_ANYCAST]	= 0,
1036	[RTN_MULTICAST]	= 0,
1037	[RTN_BLACKHOLE]	= -EINVAL,
1038	[RTN_UNREACHABLE] = -EHOSTUNREACH,
1039	[RTN_PROHIBIT]	= -EACCES,
1040	[RTN_THROW]	= -EAGAIN,
1041	[RTN_NAT]	= -EINVAL,
1042	[RTN_XRESOLVE]	= -EINVAL,
1043};
1044
1045static int ip6_rt_type_to_error(u8 fib6_type)
1046{
1047	return fib6_prop[fib6_type];
1048}
1049
1050static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
1051{
1052	unsigned short flags = 0;
1053
1054	if (rt->dst_nocount)
1055		flags |= DST_NOCOUNT;
1056	if (rt->dst_nopolicy)
1057		flags |= DST_NOPOLICY;
1058
1059	return flags;
1060}
1061
1062static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
1063{
1064	rt->dst.error = ip6_rt_type_to_error(fib6_type);
1065
1066	switch (fib6_type) {
1067	case RTN_BLACKHOLE:
1068		rt->dst.output = dst_discard_out;
1069		rt->dst.input = dst_discard;
1070		break;
1071	case RTN_PROHIBIT:
1072		rt->dst.output = ip6_pkt_prohibit_out;
1073		rt->dst.input = ip6_pkt_prohibit;
1074		break;
1075	case RTN_THROW:
1076	case RTN_UNREACHABLE:
1077	default:
1078		rt->dst.output = ip6_pkt_discard_out;
1079		rt->dst.input = ip6_pkt_discard;
1080		break;
1081	}
1082}
1083
1084static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
1085{
1086	struct fib6_info *f6i = res->f6i;
1087
1088	if (res->fib6_flags & RTF_REJECT) {
1089		ip6_rt_init_dst_reject(rt, res->fib6_type);
1090		return;
1091	}
1092
1093	rt->dst.error = 0;
1094	rt->dst.output = ip6_output;
1095
1096	if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
1097		rt->dst.input = ip6_input;
1098	} else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
1099		rt->dst.input = ip6_mc_input;
1100	} else {
1101		rt->dst.input = ip6_forward;
1102	}
1103
1104	if (res->nh->fib_nh_lws) {
1105		rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
1106		lwtunnel_set_redirect(&rt->dst);
1107	}
1108
1109	rt->dst.lastuse = jiffies;
1110}
1111
1112/* Caller must already hold reference to @from */
1113static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
1114{
1115	rt->rt6i_flags &= ~RTF_EXPIRES;
1116	rcu_assign_pointer(rt->from, from);
1117	ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
1118}
1119
1120/* Caller must already hold reference to f6i in result */
1121static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
1122{
1123	const struct fib6_nh *nh = res->nh;
1124	const struct net_device *dev = nh->fib_nh_dev;
1125	struct fib6_info *f6i = res->f6i;
1126
1127	ip6_rt_init_dst(rt, res);
1128
1129	rt->rt6i_dst = f6i->fib6_dst;
1130	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
1131	rt->rt6i_flags = res->fib6_flags;
1132	if (nh->fib_nh_gw_family) {
1133		rt->rt6i_gateway = nh->fib_nh_gw6;
1134		rt->rt6i_flags |= RTF_GATEWAY;
1135	}
1136	rt6_set_from(rt, f6i);
1137#ifdef CONFIG_IPV6_SUBTREES
1138	rt->rt6i_src = f6i->fib6_src;
1139#endif
1140}
1141
1142static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1143					struct in6_addr *saddr)
1144{
1145	struct fib6_node *pn, *sn;
1146	while (1) {
1147		if (fn->fn_flags & RTN_TL_ROOT)
1148			return NULL;
1149		pn = rcu_dereference(fn->parent);
1150		sn = FIB6_SUBTREE(pn);
1151		if (sn && sn != fn)
1152			fn = fib6_node_lookup(sn, NULL, saddr);
1153		else
1154			fn = pn;
1155		if (fn->fn_flags & RTN_RTINFO)
1156			return fn;
1157	}
1158}
1159
1160static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1161{
1162	struct rt6_info *rt = *prt;
1163
1164	if (dst_hold_safe(&rt->dst))
1165		return true;
1166	if (net) {
1167		rt = net->ipv6.ip6_null_entry;
1168		dst_hold(&rt->dst);
1169	} else {
1170		rt = NULL;
1171	}
1172	*prt = rt;
1173	return false;
1174}
1175
1176/* called with rcu_lock held */
1177static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
1178{
1179	struct net_device *dev = res->nh->fib_nh_dev;
1180	struct fib6_info *f6i = res->f6i;
1181	unsigned short flags;
1182	struct rt6_info *nrt;
1183
1184	if (!fib6_info_hold_safe(f6i))
1185		goto fallback;
1186
1187	flags = fib6_info_dst_flags(f6i);
1188	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1189	if (!nrt) {
1190		fib6_info_release(f6i);
1191		goto fallback;
1192	}
1193
1194	ip6_rt_copy_init(nrt, res);
1195	return nrt;
1196
1197fallback:
1198	nrt = dev_net(dev)->ipv6.ip6_null_entry;
1199	dst_hold(&nrt->dst);
1200	return nrt;
1201}
1202
1203INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
1204					     struct fib6_table *table,
1205					     struct flowi6 *fl6,
1206					     const struct sk_buff *skb,
1207					     int flags)
1208{
1209	struct fib6_result res = {};
1210	struct fib6_node *fn;
1211	struct rt6_info *rt;
1212
1213	rcu_read_lock();
1214	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1215restart:
1216	res.f6i = rcu_dereference(fn->leaf);
1217	if (!res.f6i)
1218		res.f6i = net->ipv6.fib6_null_entry;
1219	else
1220		rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
1221				 flags);
1222
1223	if (res.f6i == net->ipv6.fib6_null_entry) {
1224		fn = fib6_backtrack(fn, &fl6->saddr);
1225		if (fn)
1226			goto restart;
1227
1228		rt = net->ipv6.ip6_null_entry;
1229		dst_hold(&rt->dst);
1230		goto out;
1231	} else if (res.fib6_flags & RTF_REJECT) {
1232		goto do_create;
1233	}
1234
1235	fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1236			 fl6->flowi6_oif != 0, skb, flags);
1237
1238	/* Search through exception table */
1239	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
1240	if (rt) {
1241		if (ip6_hold_safe(net, &rt))
1242			dst_use_noref(&rt->dst, jiffies);
1243	} else {
1244do_create:
1245		rt = ip6_create_rt_rcu(&res);
1246	}
1247
1248out:
1249	trace_fib6_table_lookup(net, &res, table, fl6);
1250
1251	rcu_read_unlock();
1252
1253	return rt;
1254}
1255
1256struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1257				   const struct sk_buff *skb, int flags)
1258{
1259	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1260}
1261EXPORT_SYMBOL_GPL(ip6_route_lookup);
1262
1263struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1264			    const struct in6_addr *saddr, int oif,
1265			    const struct sk_buff *skb, int strict)
1266{
1267	struct flowi6 fl6 = {
1268		.flowi6_oif = oif,
1269		.daddr = *daddr,
1270	};
1271	struct dst_entry *dst;
1272	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1273
1274	if (saddr) {
1275		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1276		flags |= RT6_LOOKUP_F_HAS_SADDR;
1277	}
1278
1279	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1280	if (dst->error == 0)
1281		return (struct rt6_info *) dst;
1282
1283	dst_release(dst);
1284
1285	return NULL;
1286}
 
1287EXPORT_SYMBOL(rt6_lookup);
1288
1289/* ip6_ins_rt is called with FREE table->tb6_lock.
1290 * It takes new route entry, the addition fails by any reason the
1291 * route is released.
1292 * Caller must hold dst before calling it.
1293 */
1294
1295static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1296			struct netlink_ext_ack *extack)
1297{
1298	int err;
1299	struct fib6_table *table;
1300
1301	table = rt->fib6_table;
1302	spin_lock_bh(&table->tb6_lock);
1303	err = fib6_add(&table->tb6_root, rt, info, extack);
1304	spin_unlock_bh(&table->tb6_lock);
1305
1306	return err;
1307}
1308
1309int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1310{
1311	struct nl_info info = {	.nl_net = net, };
1312
1313	return __ip6_ins_rt(rt, &info, NULL);
 
1314}
1315
1316static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
1317					   const struct in6_addr *daddr,
1318					   const struct in6_addr *saddr)
1319{
1320	struct fib6_info *f6i = res->f6i;
1321	struct net_device *dev;
1322	struct rt6_info *rt;
1323
1324	/*
1325	 *	Clone the route.
1326	 */
1327
1328	if (!fib6_info_hold_safe(f6i))
1329		return NULL;
 
 
 
1330
1331	dev = ip6_rt_get_dev_rcu(res);
1332	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1333	if (!rt) {
1334		fib6_info_release(f6i);
1335		return NULL;
1336	}
1337
1338	ip6_rt_copy_init(rt, res);
1339	rt->rt6i_flags |= RTF_CACHE;
1340	rt->rt6i_dst.addr = *daddr;
1341	rt->rt6i_dst.plen = 128;
1342
1343	if (!rt6_is_gw_or_nonexthop(res)) {
1344		if (f6i->fib6_dst.plen != 128 &&
1345		    ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
1346			rt->rt6i_flags |= RTF_ANYCAST;
1347#ifdef CONFIG_IPV6_SUBTREES
1348		if (rt->rt6i_src.plen && saddr) {
1349			rt->rt6i_src.addr = *saddr;
1350			rt->rt6i_src.plen = 128;
1351		}
1352#endif
1353	}
1354
1355	return rt;
1356}
1357
1358static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
1359{
1360	struct fib6_info *f6i = res->f6i;
1361	unsigned short flags = fib6_info_dst_flags(f6i);
1362	struct net_device *dev;
1363	struct rt6_info *pcpu_rt;
1364
1365	if (!fib6_info_hold_safe(f6i))
1366		return NULL;
1367
1368	rcu_read_lock();
1369	dev = ip6_rt_get_dev_rcu(res);
1370	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags | DST_NOCOUNT);
1371	rcu_read_unlock();
1372	if (!pcpu_rt) {
1373		fib6_info_release(f6i);
1374		return NULL;
1375	}
1376	ip6_rt_copy_init(pcpu_rt, res);
1377	pcpu_rt->rt6i_flags |= RTF_PCPU;
1378
1379	if (f6i->nh)
1380		pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev));
1381
1382	return pcpu_rt;
1383}
1384
1385static bool rt6_is_valid(const struct rt6_info *rt6)
1386{
1387	return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev));
1388}
1389
1390/* It should be called with rcu_read_lock() acquired */
1391static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
1392{
1393	struct rt6_info *pcpu_rt;
1394
1395	pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
1396
1397	if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) {
1398		struct rt6_info *prev, **p;
1399
1400		p = this_cpu_ptr(res->nh->rt6i_pcpu);
1401		prev = xchg(p, NULL);
1402		if (prev) {
1403			dst_dev_put(&prev->dst);
1404			dst_release(&prev->dst);
1405		}
 
1406
1407		pcpu_rt = NULL;
1408	}
1409
1410	return pcpu_rt;
1411}
1412
1413static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1414					    const struct fib6_result *res)
1415{
1416	struct rt6_info *pcpu_rt, *prev, **p;
1417
1418	pcpu_rt = ip6_rt_pcpu_alloc(res);
1419	if (!pcpu_rt)
1420		return NULL;
1421
1422	p = this_cpu_ptr(res->nh->rt6i_pcpu);
1423	prev = cmpxchg(p, NULL, pcpu_rt);
1424	BUG_ON(prev);
1425
1426	if (res->f6i->fib6_destroying) {
1427		struct fib6_info *from;
1428
1429		from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
1430		fib6_info_release(from);
1431	}
1432
1433	return pcpu_rt;
1434}
1435
1436/* exception hash table implementation
1437 */
1438static DEFINE_SPINLOCK(rt6_exception_lock);
1439
1440/* Remove rt6_ex from hash table and free the memory
1441 * Caller must hold rt6_exception_lock
1442 */
1443static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1444				 struct rt6_exception *rt6_ex)
1445{
1446	struct fib6_info *from;
1447	struct net *net;
 
 
 
 
1448
1449	if (!bucket || !rt6_ex)
1450		return;
1451
1452	net = dev_net(rt6_ex->rt6i->dst.dev);
1453	net->ipv6.rt6_stats->fib_rt_cache--;
1454
1455	/* purge completely the exception to allow releasing the held resources:
1456	 * some [sk] cache may keep the dst around for unlimited time
1457	 */
1458	from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
1459	fib6_info_release(from);
1460	dst_dev_put(&rt6_ex->rt6i->dst);
1461
1462	hlist_del_rcu(&rt6_ex->hlist);
1463	dst_release(&rt6_ex->rt6i->dst);
1464	kfree_rcu(rt6_ex, rcu);
1465	WARN_ON_ONCE(!bucket->depth);
1466	bucket->depth--;
1467}
1468
1469/* Remove oldest rt6_ex in bucket and free the memory
1470 * Caller must hold rt6_exception_lock
1471 */
1472static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1473{
1474	struct rt6_exception *rt6_ex, *oldest = NULL;
1475
1476	if (!bucket)
1477		return;
1478
1479	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1480		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1481			oldest = rt6_ex;
1482	}
1483	rt6_remove_exception(bucket, oldest);
1484}
1485
1486static u32 rt6_exception_hash(const struct in6_addr *dst,
1487			      const struct in6_addr *src)
1488{
1489	static siphash_aligned_key_t rt6_exception_key;
1490	struct {
1491		struct in6_addr dst;
1492		struct in6_addr src;
1493	} __aligned(SIPHASH_ALIGNMENT) combined = {
1494		.dst = *dst,
1495	};
1496	u64 val;
1497
1498	net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key));
1499
1500#ifdef CONFIG_IPV6_SUBTREES
1501	if (src)
1502		combined.src = *src;
1503#endif
1504	val = siphash(&combined, sizeof(combined), &rt6_exception_key);
1505
1506	return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1507}
1508
1509/* Helper function to find the cached rt in the hash table
1510 * and update bucket pointer to point to the bucket for this
1511 * (daddr, saddr) pair
1512 * Caller must hold rt6_exception_lock
1513 */
1514static struct rt6_exception *
1515__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1516			      const struct in6_addr *daddr,
1517			      const struct in6_addr *saddr)
1518{
1519	struct rt6_exception *rt6_ex;
1520	u32 hval;
1521
1522	if (!(*bucket) || !daddr)
1523		return NULL;
1524
1525	hval = rt6_exception_hash(daddr, saddr);
1526	*bucket += hval;
1527
1528	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1529		struct rt6_info *rt6 = rt6_ex->rt6i;
1530		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1531
1532#ifdef CONFIG_IPV6_SUBTREES
1533		if (matched && saddr)
1534			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1535#endif
1536		if (matched)
1537			return rt6_ex;
1538	}
1539	return NULL;
1540}
1541
1542/* Helper function to find the cached rt in the hash table
1543 * and update bucket pointer to point to the bucket for this
1544 * (daddr, saddr) pair
1545 * Caller must hold rcu_read_lock()
1546 */
1547static struct rt6_exception *
1548__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1549			 const struct in6_addr *daddr,
1550			 const struct in6_addr *saddr)
1551{
1552	struct rt6_exception *rt6_ex;
1553	u32 hval;
1554
1555	WARN_ON_ONCE(!rcu_read_lock_held());
1556
1557	if (!(*bucket) || !daddr)
1558		return NULL;
1559
1560	hval = rt6_exception_hash(daddr, saddr);
1561	*bucket += hval;
1562
1563	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1564		struct rt6_info *rt6 = rt6_ex->rt6i;
1565		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1566
1567#ifdef CONFIG_IPV6_SUBTREES
1568		if (matched && saddr)
1569			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1570#endif
1571		if (matched)
1572			return rt6_ex;
1573	}
1574	return NULL;
1575}
1576
1577static unsigned int fib6_mtu(const struct fib6_result *res)
1578{
1579	const struct fib6_nh *nh = res->nh;
1580	unsigned int mtu;
1581
1582	if (res->f6i->fib6_pmtu) {
1583		mtu = res->f6i->fib6_pmtu;
1584	} else {
1585		struct net_device *dev = nh->fib_nh_dev;
1586		struct inet6_dev *idev;
1587
1588		rcu_read_lock();
1589		idev = __in6_dev_get(dev);
1590		mtu = idev->cnf.mtu6;
1591		rcu_read_unlock();
1592	}
1593
1594	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1595
1596	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
1597}
1598
1599#define FIB6_EXCEPTION_BUCKET_FLUSHED  0x1UL
1600
1601/* used when the flushed bit is not relevant, only access to the bucket
1602 * (ie., all bucket users except rt6_insert_exception);
1603 *
1604 * called under rcu lock; sometimes called with rt6_exception_lock held
1605 */
1606static
1607struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
1608						       spinlock_t *lock)
1609{
1610	struct rt6_exception_bucket *bucket;
1611
1612	if (lock)
1613		bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1614						   lockdep_is_held(lock));
1615	else
1616		bucket = rcu_dereference(nh->rt6i_exception_bucket);
1617
1618	/* remove bucket flushed bit if set */
1619	if (bucket) {
1620		unsigned long p = (unsigned long)bucket;
1621
1622		p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
1623		bucket = (struct rt6_exception_bucket *)p;
 
 
 
1624	}
1625
1626	return bucket;
1627}
1628
1629static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
1630{
1631	unsigned long p = (unsigned long)bucket;
1632
1633	return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
1634}
1635
1636/* called with rt6_exception_lock held */
1637static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
1638					      spinlock_t *lock)
1639{
1640	struct rt6_exception_bucket *bucket;
1641	unsigned long p;
1642
1643	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1644					   lockdep_is_held(lock));
1645
1646	p = (unsigned long)bucket;
1647	p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
1648	bucket = (struct rt6_exception_bucket *)p;
1649	rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1650}
1651
1652static int rt6_insert_exception(struct rt6_info *nrt,
1653				const struct fib6_result *res)
1654{
1655	struct net *net = dev_net(nrt->dst.dev);
1656	struct rt6_exception_bucket *bucket;
1657	struct fib6_info *f6i = res->f6i;
1658	struct in6_addr *src_key = NULL;
1659	struct rt6_exception *rt6_ex;
1660	struct fib6_nh *nh = res->nh;
1661	int max_depth;
1662	int err = 0;
1663
1664	spin_lock_bh(&rt6_exception_lock);
1665
1666	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1667					  lockdep_is_held(&rt6_exception_lock));
1668	if (!bucket) {
1669		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1670				 GFP_ATOMIC);
1671		if (!bucket) {
1672			err = -ENOMEM;
1673			goto out;
1674		}
1675		rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1676	} else if (fib6_nh_excptn_bucket_flushed(bucket)) {
1677		err = -EINVAL;
1678		goto out;
1679	}
1680
1681#ifdef CONFIG_IPV6_SUBTREES
1682	/* fib6_src.plen != 0 indicates f6i is in subtree
1683	 * and exception table is indexed by a hash of
1684	 * both fib6_dst and fib6_src.
1685	 * Otherwise, the exception table is indexed by
1686	 * a hash of only fib6_dst.
1687	 */
1688	if (f6i->fib6_src.plen)
1689		src_key = &nrt->rt6i_src.addr;
1690#endif
1691	/* rt6_mtu_change() might lower mtu on f6i.
1692	 * Only insert this exception route if its mtu
1693	 * is less than f6i's mtu value.
1694	 */
1695	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
1696		err = -EINVAL;
1697		goto out;
1698	}
1699
1700	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1701					       src_key);
1702	if (rt6_ex)
1703		rt6_remove_exception(bucket, rt6_ex);
1704
1705	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1706	if (!rt6_ex) {
1707		err = -ENOMEM;
1708		goto out;
1709	}
1710	rt6_ex->rt6i = nrt;
1711	rt6_ex->stamp = jiffies;
1712	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1713	bucket->depth++;
1714	net->ipv6.rt6_stats->fib_rt_cache++;
1715
1716	/* Randomize max depth to avoid some side channels attacks. */
1717	max_depth = FIB6_MAX_DEPTH + get_random_u32_below(FIB6_MAX_DEPTH);
1718	while (bucket->depth > max_depth)
1719		rt6_exception_remove_oldest(bucket);
1720
1721out:
1722	spin_unlock_bh(&rt6_exception_lock);
1723
1724	/* Update fn->fn_sernum to invalidate all cached dst */
1725	if (!err) {
1726		spin_lock_bh(&f6i->fib6_table->tb6_lock);
1727		fib6_update_sernum(net, f6i);
1728		spin_unlock_bh(&f6i->fib6_table->tb6_lock);
1729		fib6_force_start_gc(net);
1730	}
1731
1732	return err;
1733}
1734
1735static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
1736{
1737	struct rt6_exception_bucket *bucket;
1738	struct rt6_exception *rt6_ex;
1739	struct hlist_node *tmp;
1740	int i;
1741
1742	spin_lock_bh(&rt6_exception_lock);
1743
1744	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1745	if (!bucket)
1746		goto out;
1747
1748	/* Prevent rt6_insert_exception() to recreate the bucket list */
1749	if (!from)
1750		fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
1751
1752	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1753		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
1754			if (!from ||
1755			    rcu_access_pointer(rt6_ex->rt6i->from) == from)
1756				rt6_remove_exception(bucket, rt6_ex);
1757		}
1758		WARN_ON_ONCE(!from && bucket->depth);
1759		bucket++;
1760	}
1761out:
1762	spin_unlock_bh(&rt6_exception_lock);
1763}
1764
1765static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
1766{
1767	struct fib6_info *f6i = arg;
1768
1769	fib6_nh_flush_exceptions(nh, f6i);
1770
1771	return 0;
1772}
1773
1774void rt6_flush_exceptions(struct fib6_info *f6i)
1775{
1776	if (f6i->nh)
1777		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
1778					 f6i);
1779	else
1780		fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
1781}
1782
1783/* Find cached rt in the hash table inside passed in rt
1784 * Caller has to hold rcu_read_lock()
1785 */
1786static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
1787					   const struct in6_addr *daddr,
1788					   const struct in6_addr *saddr)
1789{
1790	const struct in6_addr *src_key = NULL;
1791	struct rt6_exception_bucket *bucket;
1792	struct rt6_exception *rt6_ex;
1793	struct rt6_info *ret = NULL;
1794
1795#ifdef CONFIG_IPV6_SUBTREES
1796	/* fib6i_src.plen != 0 indicates f6i is in subtree
1797	 * and exception table is indexed by a hash of
1798	 * both fib6_dst and fib6_src.
1799	 * However, the src addr used to create the hash
1800	 * might not be exactly the passed in saddr which
1801	 * is a /128 addr from the flow.
1802	 * So we need to use f6i->fib6_src to redo lookup
1803	 * if the passed in saddr does not find anything.
1804	 * (See the logic in ip6_rt_cache_alloc() on how
1805	 * rt->rt6i_src is updated.)
1806	 */
1807	if (res->f6i->fib6_src.plen)
1808		src_key = saddr;
1809find_ex:
1810#endif
1811	bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
1812	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1813
1814	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1815		ret = rt6_ex->rt6i;
1816
1817#ifdef CONFIG_IPV6_SUBTREES
1818	/* Use fib6_src as src_key and redo lookup */
1819	if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
1820		src_key = &res->f6i->fib6_src.addr;
1821		goto find_ex;
1822	}
1823#endif
1824
1825	return ret;
1826}
1827
1828/* Remove the passed in cached rt from the hash table that contains it */
1829static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
1830				    const struct rt6_info *rt)
1831{
1832	const struct in6_addr *src_key = NULL;
1833	struct rt6_exception_bucket *bucket;
1834	struct rt6_exception *rt6_ex;
1835	int err;
1836
1837	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
1838		return -ENOENT;
1839
1840	spin_lock_bh(&rt6_exception_lock);
1841	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1842
1843#ifdef CONFIG_IPV6_SUBTREES
1844	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1845	 * and exception table is indexed by a hash of
1846	 * both rt6i_dst and rt6i_src.
1847	 * Otherwise, the exception table is indexed by
1848	 * a hash of only rt6i_dst.
1849	 */
1850	if (plen)
1851		src_key = &rt->rt6i_src.addr;
1852#endif
1853	rt6_ex = __rt6_find_exception_spinlock(&bucket,
1854					       &rt->rt6i_dst.addr,
1855					       src_key);
1856	if (rt6_ex) {
1857		rt6_remove_exception(bucket, rt6_ex);
1858		err = 0;
1859	} else {
1860		err = -ENOENT;
1861	}
1862
1863	spin_unlock_bh(&rt6_exception_lock);
1864	return err;
1865}
1866
1867struct fib6_nh_excptn_arg {
1868	struct rt6_info	*rt;
1869	int		plen;
1870};
1871
1872static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
1873{
1874	struct fib6_nh_excptn_arg *arg = _arg;
1875	int err;
1876
1877	err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
1878	if (err == 0)
1879		return 1;
1880
1881	return 0;
1882}
1883
1884static int rt6_remove_exception_rt(struct rt6_info *rt)
1885{
1886	struct fib6_info *from;
1887
1888	from = rcu_dereference(rt->from);
1889	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1890		return -EINVAL;
1891
1892	if (from->nh) {
1893		struct fib6_nh_excptn_arg arg = {
1894			.rt = rt,
1895			.plen = from->fib6_src.plen
1896		};
1897		int rc;
1898
1899		/* rc = 1 means an entry was found */
1900		rc = nexthop_for_each_fib6_nh(from->nh,
1901					      rt6_nh_remove_exception_rt,
1902					      &arg);
1903		return rc ? 0 : -ENOENT;
1904	}
1905
1906	return fib6_nh_remove_exception(from->fib6_nh,
1907					from->fib6_src.plen, rt);
1908}
1909
1910/* Find rt6_ex which contains the passed in rt cache and
1911 * refresh its stamp
1912 */
1913static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
1914				     const struct rt6_info *rt)
1915{
1916	const struct in6_addr *src_key = NULL;
1917	struct rt6_exception_bucket *bucket;
1918	struct rt6_exception *rt6_ex;
1919
1920	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
1921#ifdef CONFIG_IPV6_SUBTREES
1922	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1923	 * and exception table is indexed by a hash of
1924	 * both rt6i_dst and rt6i_src.
1925	 * Otherwise, the exception table is indexed by
1926	 * a hash of only rt6i_dst.
1927	 */
1928	if (plen)
1929		src_key = &rt->rt6i_src.addr;
1930#endif
1931	rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
1932	if (rt6_ex)
1933		rt6_ex->stamp = jiffies;
1934}
1935
1936struct fib6_nh_match_arg {
1937	const struct net_device *dev;
1938	const struct in6_addr	*gw;
1939	struct fib6_nh		*match;
1940};
1941
1942/* determine if fib6_nh has given device and gateway */
1943static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
1944{
1945	struct fib6_nh_match_arg *arg = _arg;
1946
1947	if (arg->dev != nh->fib_nh_dev ||
1948	    (arg->gw && !nh->fib_nh_gw_family) ||
1949	    (!arg->gw && nh->fib_nh_gw_family) ||
1950	    (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
1951		return 0;
1952
1953	arg->match = nh;
1954
1955	/* found a match, break the loop */
1956	return 1;
1957}
1958
1959static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1960{
1961	struct fib6_info *from;
1962	struct fib6_nh *fib6_nh;
1963
1964	rcu_read_lock();
1965
1966	from = rcu_dereference(rt->from);
1967	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1968		goto unlock;
1969
1970	if (from->nh) {
1971		struct fib6_nh_match_arg arg = {
1972			.dev = rt->dst.dev,
1973			.gw = &rt->rt6i_gateway,
1974		};
1975
1976		nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
1977
1978		if (!arg.match)
1979			goto unlock;
1980		fib6_nh = arg.match;
1981	} else {
1982		fib6_nh = from->fib6_nh;
1983	}
1984	fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
1985unlock:
1986	rcu_read_unlock();
1987}
1988
1989static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1990					 struct rt6_info *rt, int mtu)
1991{
1992	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1993	 * lowest MTU in the path: always allow updating the route PMTU to
1994	 * reflect PMTU decreases.
1995	 *
1996	 * If the new MTU is higher, and the route PMTU is equal to the local
1997	 * MTU, this means the old MTU is the lowest in the path, so allow
1998	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1999	 * handle this.
2000	 */
2001
2002	if (dst_mtu(&rt->dst) >= mtu)
2003		return true;
2004
2005	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
2006		return true;
2007
2008	return false;
2009}
2010
2011static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
2012				       const struct fib6_nh *nh, int mtu)
2013{
2014	struct rt6_exception_bucket *bucket;
2015	struct rt6_exception *rt6_ex;
2016	int i;
2017
2018	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2019	if (!bucket)
2020		return;
2021
2022	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2023		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
2024			struct rt6_info *entry = rt6_ex->rt6i;
2025
2026			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
2027			 * route), the metrics of its rt->from have already
2028			 * been updated.
2029			 */
2030			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
2031			    rt6_mtu_change_route_allowed(idev, entry, mtu))
2032				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
2033		}
2034		bucket++;
2035	}
2036}
2037
2038#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2039
2040static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
2041					    const struct in6_addr *gateway)
2042{
2043	struct rt6_exception_bucket *bucket;
2044	struct rt6_exception *rt6_ex;
2045	struct hlist_node *tmp;
2046	int i;
2047
2048	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2049		return;
2050
2051	spin_lock_bh(&rt6_exception_lock);
2052	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2053	if (bucket) {
2054		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2055			hlist_for_each_entry_safe(rt6_ex, tmp,
2056						  &bucket->chain, hlist) {
2057				struct rt6_info *entry = rt6_ex->rt6i;
2058
2059				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
2060				    RTF_CACHE_GATEWAY &&
2061				    ipv6_addr_equal(gateway,
2062						    &entry->rt6i_gateway)) {
2063					rt6_remove_exception(bucket, rt6_ex);
2064				}
2065			}
2066			bucket++;
2067		}
2068	}
2069
2070	spin_unlock_bh(&rt6_exception_lock);
2071}
2072
2073static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
2074				      struct rt6_exception *rt6_ex,
2075				      struct fib6_gc_args *gc_args,
2076				      unsigned long now)
2077{
2078	struct rt6_info *rt = rt6_ex->rt6i;
2079
2080	/* we are pruning and obsoleting aged-out and non gateway exceptions
2081	 * even if others have still references to them, so that on next
2082	 * dst_check() such references can be dropped.
2083	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
2084	 * expired, independently from their aging, as per RFC 8201 section 4
2085	 */
2086	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
2087		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
2088			RT6_TRACE("aging clone %p\n", rt);
2089			rt6_remove_exception(bucket, rt6_ex);
2090			return;
2091		}
2092	} else if (time_after(jiffies, rt->dst.expires)) {
2093		RT6_TRACE("purging expired route %p\n", rt);
2094		rt6_remove_exception(bucket, rt6_ex);
2095		return;
2096	}
2097
2098	if (rt->rt6i_flags & RTF_GATEWAY) {
2099		struct neighbour *neigh;
2100
2101		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
2102
2103		if (!(neigh && (neigh->flags & NTF_ROUTER))) {
2104			RT6_TRACE("purging route %p via non-router but gateway\n",
2105				  rt);
2106			rt6_remove_exception(bucket, rt6_ex);
2107			return;
2108		}
2109	}
2110
2111	gc_args->more++;
2112}
2113
2114static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
2115				   struct fib6_gc_args *gc_args,
2116				   unsigned long now)
2117{
2118	struct rt6_exception_bucket *bucket;
2119	struct rt6_exception *rt6_ex;
2120	struct hlist_node *tmp;
2121	int i;
2122
2123	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2124		return;
2125
2126	rcu_read_lock_bh();
2127	spin_lock(&rt6_exception_lock);
2128	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2129	if (bucket) {
2130		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2131			hlist_for_each_entry_safe(rt6_ex, tmp,
2132						  &bucket->chain, hlist) {
2133				rt6_age_examine_exception(bucket, rt6_ex,
2134							  gc_args, now);
2135			}
2136			bucket++;
2137		}
2138	}
2139	spin_unlock(&rt6_exception_lock);
2140	rcu_read_unlock_bh();
2141}
2142
2143struct fib6_nh_age_excptn_arg {
2144	struct fib6_gc_args	*gc_args;
2145	unsigned long		now;
2146};
2147
2148static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
2149{
2150	struct fib6_nh_age_excptn_arg *arg = _arg;
2151
2152	fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
2153	return 0;
2154}
2155
2156void rt6_age_exceptions(struct fib6_info *f6i,
2157			struct fib6_gc_args *gc_args,
2158			unsigned long now)
2159{
2160	if (f6i->nh) {
2161		struct fib6_nh_age_excptn_arg arg = {
2162			.gc_args = gc_args,
2163			.now = now
2164		};
2165
2166		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
2167					 &arg);
2168	} else {
2169		fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
2170	}
2171}
2172
2173/* must be called with rcu lock held */
2174int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
2175		      struct flowi6 *fl6, struct fib6_result *res, int strict)
2176{
2177	struct fib6_node *fn, *saved_fn;
2178
2179	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2180	saved_fn = fn;
2181
2182redo_rt6_select:
2183	rt6_select(net, fn, oif, res, strict);
2184	if (res->f6i == net->ipv6.fib6_null_entry) {
2185		fn = fib6_backtrack(fn, &fl6->saddr);
2186		if (fn)
2187			goto redo_rt6_select;
2188		else if (strict & RT6_LOOKUP_F_REACHABLE) {
2189			/* also consider unreachable route */
2190			strict &= ~RT6_LOOKUP_F_REACHABLE;
2191			fn = saved_fn;
2192			goto redo_rt6_select;
2193		}
2194	}
2195
2196	trace_fib6_table_lookup(net, res, table, fl6);
2197
2198	return 0;
2199}
2200
2201struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2202			       int oif, struct flowi6 *fl6,
2203			       const struct sk_buff *skb, int flags)
2204{
2205	struct fib6_result res = {};
2206	struct rt6_info *rt = NULL;
2207	int strict = 0;
2208
2209	WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
2210		     !rcu_read_lock_held());
2211
2212	strict |= flags & RT6_LOOKUP_F_IFACE;
2213	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
2214	if (net->ipv6.devconf_all->forwarding == 0)
2215		strict |= RT6_LOOKUP_F_REACHABLE;
2216
2217	rcu_read_lock();
2218
2219	fib6_table_lookup(net, table, oif, fl6, &res, strict);
2220	if (res.f6i == net->ipv6.fib6_null_entry)
2221		goto out;
2222
2223	fib6_select_path(net, &res, fl6, oif, false, skb, strict);
2224
2225	/*Search through exception table */
2226	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
2227	if (rt) {
2228		goto out;
2229	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
2230			    !res.nh->fib_nh_gw_family)) {
2231		/* Create a RTF_CACHE clone which will not be
2232		 * owned by the fib6 tree.  It is for the special case where
2233		 * the daddr in the skb during the neighbor look-up is different
2234		 * from the fl6->daddr used to look-up route here.
2235		 */
2236		rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
2237
2238		if (rt) {
2239			/* 1 refcnt is taken during ip6_rt_cache_alloc().
2240			 * As rt6_uncached_list_add() does not consume refcnt,
2241			 * this refcnt is always returned to the caller even
2242			 * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
2243			 */
2244			rt6_uncached_list_add(rt);
2245			rcu_read_unlock();
2246
2247			return rt;
2248		}
2249	} else {
2250		/* Get a percpu copy */
2251		local_bh_disable();
2252		rt = rt6_get_pcpu_route(&res);
2253
2254		if (!rt)
2255			rt = rt6_make_pcpu_route(net, &res);
2256
2257		local_bh_enable();
2258	}
2259out:
2260	if (!rt)
2261		rt = net->ipv6.ip6_null_entry;
2262	if (!(flags & RT6_LOOKUP_F_DST_NOREF))
2263		ip6_hold_safe(net, &rt);
2264	rcu_read_unlock();
2265
2266	return rt;
2267}
2268EXPORT_SYMBOL_GPL(ip6_pol_route);
2269
2270INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net,
2271					    struct fib6_table *table,
2272					    struct flowi6 *fl6,
2273					    const struct sk_buff *skb,
2274					    int flags)
2275{
2276	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
2277}
2278
2279struct dst_entry *ip6_route_input_lookup(struct net *net,
2280					 struct net_device *dev,
2281					 struct flowi6 *fl6,
2282					 const struct sk_buff *skb,
2283					 int flags)
2284{
2285	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
2286		flags |= RT6_LOOKUP_F_IFACE;
2287
2288	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
2289}
2290EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
2291
2292static void ip6_multipath_l3_keys(const struct sk_buff *skb,
2293				  struct flow_keys *keys,
2294				  struct flow_keys *flkeys)
2295{
2296	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
2297	const struct ipv6hdr *key_iph = outer_iph;
2298	struct flow_keys *_flkeys = flkeys;
2299	const struct ipv6hdr *inner_iph;
2300	const struct icmp6hdr *icmph;
2301	struct ipv6hdr _inner_iph;
2302	struct icmp6hdr _icmph;
2303
2304	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2305		goto out;
2306
2307	icmph = skb_header_pointer(skb, skb_transport_offset(skb),
2308				   sizeof(_icmph), &_icmph);
2309	if (!icmph)
2310		goto out;
2311
2312	if (!icmpv6_is_err(icmph->icmp6_type))
2313		goto out;
2314
2315	inner_iph = skb_header_pointer(skb,
2316				       skb_transport_offset(skb) + sizeof(*icmph),
2317				       sizeof(_inner_iph), &_inner_iph);
2318	if (!inner_iph)
2319		goto out;
2320
2321	key_iph = inner_iph;
2322	_flkeys = NULL;
2323out:
2324	if (_flkeys) {
2325		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2326		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2327		keys->tags.flow_label = _flkeys->tags.flow_label;
2328		keys->basic.ip_proto = _flkeys->basic.ip_proto;
2329	} else {
2330		keys->addrs.v6addrs.src = key_iph->saddr;
2331		keys->addrs.v6addrs.dst = key_iph->daddr;
2332		keys->tags.flow_label = ip6_flowlabel(key_iph);
2333		keys->basic.ip_proto = key_iph->nexthdr;
2334	}
2335}
2336
2337static u32 rt6_multipath_custom_hash_outer(const struct net *net,
2338					   const struct sk_buff *skb,
2339					   bool *p_has_inner)
2340{
2341	u32 hash_fields = ip6_multipath_hash_fields(net);
2342	struct flow_keys keys, hash_keys;
2343
2344	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
2345		return 0;
2346
2347	memset(&hash_keys, 0, sizeof(hash_keys));
2348	skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
2349
2350	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2351	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
2352		hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
2353	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
2354		hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
2355	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
2356		hash_keys.basic.ip_proto = keys.basic.ip_proto;
2357	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
2358		hash_keys.tags.flow_label = keys.tags.flow_label;
2359	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
2360		hash_keys.ports.src = keys.ports.src;
2361	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
2362		hash_keys.ports.dst = keys.ports.dst;
2363
2364	*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
2365	return flow_hash_from_keys(&hash_keys);
2366}
2367
2368static u32 rt6_multipath_custom_hash_inner(const struct net *net,
2369					   const struct sk_buff *skb,
2370					   bool has_inner)
2371{
2372	u32 hash_fields = ip6_multipath_hash_fields(net);
2373	struct flow_keys keys, hash_keys;
2374
2375	/* We assume the packet carries an encapsulation, but if none was
2376	 * encountered during dissection of the outer flow, then there is no
2377	 * point in calling the flow dissector again.
2378	 */
2379	if (!has_inner)
2380		return 0;
2381
2382	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
2383		return 0;
2384
2385	memset(&hash_keys, 0, sizeof(hash_keys));
2386	skb_flow_dissect_flow_keys(skb, &keys, 0);
2387
2388	if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
2389		return 0;
2390
2391	if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2392		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2393		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
2394			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
2395		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
2396			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
2397	} else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2398		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2399		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
2400			hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
2401		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
2402			hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
2403		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
2404			hash_keys.tags.flow_label = keys.tags.flow_label;
2405	}
2406
2407	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
2408		hash_keys.basic.ip_proto = keys.basic.ip_proto;
2409	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
2410		hash_keys.ports.src = keys.ports.src;
2411	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
2412		hash_keys.ports.dst = keys.ports.dst;
2413
2414	return flow_hash_from_keys(&hash_keys);
2415}
2416
2417static u32 rt6_multipath_custom_hash_skb(const struct net *net,
2418					 const struct sk_buff *skb)
2419{
2420	u32 mhash, mhash_inner;
2421	bool has_inner = true;
2422
2423	mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner);
2424	mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner);
2425
2426	return jhash_2words(mhash, mhash_inner, 0);
2427}
2428
2429static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
2430					 const struct flowi6 *fl6)
2431{
2432	u32 hash_fields = ip6_multipath_hash_fields(net);
2433	struct flow_keys hash_keys;
2434
2435	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
2436		return 0;
2437
2438	memset(&hash_keys, 0, sizeof(hash_keys));
2439	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2440	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
2441		hash_keys.addrs.v6addrs.src = fl6->saddr;
2442	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
2443		hash_keys.addrs.v6addrs.dst = fl6->daddr;
2444	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
2445		hash_keys.basic.ip_proto = fl6->flowi6_proto;
2446	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
2447		hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2448	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
2449		hash_keys.ports.src = fl6->fl6_sport;
2450	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
2451		hash_keys.ports.dst = fl6->fl6_dport;
2452
2453	return flow_hash_from_keys(&hash_keys);
2454}
2455
2456/* if skb is set it will be used and fl6 can be NULL */
2457u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2458		       const struct sk_buff *skb, struct flow_keys *flkeys)
2459{
2460	struct flow_keys hash_keys;
2461	u32 mhash = 0;
2462
2463	switch (ip6_multipath_hash_policy(net)) {
2464	case 0:
2465		memset(&hash_keys, 0, sizeof(hash_keys));
2466		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2467		if (skb) {
2468			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2469		} else {
2470			hash_keys.addrs.v6addrs.src = fl6->saddr;
2471			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2472			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2473			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2474		}
2475		mhash = flow_hash_from_keys(&hash_keys);
2476		break;
2477	case 1:
2478		if (skb) {
2479			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2480			struct flow_keys keys;
2481
2482			/* short-circuit if we already have L4 hash present */
2483			if (skb->l4_hash)
2484				return skb_get_hash_raw(skb) >> 1;
2485
2486			memset(&hash_keys, 0, sizeof(hash_keys));
2487
2488			if (!flkeys) {
2489				skb_flow_dissect_flow_keys(skb, &keys, flag);
2490				flkeys = &keys;
2491			}
2492			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2493			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2494			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2495			hash_keys.ports.src = flkeys->ports.src;
2496			hash_keys.ports.dst = flkeys->ports.dst;
2497			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2498		} else {
2499			memset(&hash_keys, 0, sizeof(hash_keys));
2500			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2501			hash_keys.addrs.v6addrs.src = fl6->saddr;
2502			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2503			hash_keys.ports.src = fl6->fl6_sport;
2504			hash_keys.ports.dst = fl6->fl6_dport;
2505			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2506		}
2507		mhash = flow_hash_from_keys(&hash_keys);
2508		break;
2509	case 2:
2510		memset(&hash_keys, 0, sizeof(hash_keys));
2511		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2512		if (skb) {
2513			struct flow_keys keys;
2514
2515			if (!flkeys) {
2516				skb_flow_dissect_flow_keys(skb, &keys, 0);
2517				flkeys = &keys;
2518			}
2519
2520			/* Inner can be v4 or v6 */
2521			if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2522				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2523				hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
2524				hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
2525			} else if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2526				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2527				hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2528				hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2529				hash_keys.tags.flow_label = flkeys->tags.flow_label;
2530				hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2531			} else {
2532				/* Same as case 0 */
2533				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2534				ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2535			}
2536		} else {
2537			/* Same as case 0 */
2538			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2539			hash_keys.addrs.v6addrs.src = fl6->saddr;
2540			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2541			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2542			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2543		}
2544		mhash = flow_hash_from_keys(&hash_keys);
2545		break;
2546	case 3:
2547		if (skb)
2548			mhash = rt6_multipath_custom_hash_skb(net, skb);
2549		else
2550			mhash = rt6_multipath_custom_hash_fl6(net, fl6);
2551		break;
2552	}
2553
2554	return mhash >> 1;
2555}
2556
2557/* Called with rcu held */
2558void ip6_route_input(struct sk_buff *skb)
2559{
2560	const struct ipv6hdr *iph = ipv6_hdr(skb);
2561	struct net *net = dev_net(skb->dev);
2562	int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
2563	struct ip_tunnel_info *tun_info;
2564	struct flowi6 fl6 = {
2565		.flowi6_iif = skb->dev->ifindex,
2566		.daddr = iph->daddr,
2567		.saddr = iph->saddr,
2568		.flowlabel = ip6_flowinfo(iph),
2569		.flowi6_mark = skb->mark,
2570		.flowi6_proto = iph->nexthdr,
2571	};
2572	struct flow_keys *flkeys = NULL, _flkeys;
2573
2574	tun_info = skb_tunnel_info(skb);
2575	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2576		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2577
2578	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2579		flkeys = &_flkeys;
2580
2581	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2582		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2583	skb_dst_drop(skb);
2584	skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
2585						      &fl6, skb, flags));
2586}
2587
2588INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
2589					     struct fib6_table *table,
2590					     struct flowi6 *fl6,
2591					     const struct sk_buff *skb,
2592					     int flags)
2593{
2594	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2595}
2596
2597static struct dst_entry *ip6_route_output_flags_noref(struct net *net,
2598						      const struct sock *sk,
2599						      struct flowi6 *fl6,
2600						      int flags)
2601{
2602	bool any_src;
2603
2604	if (ipv6_addr_type(&fl6->daddr) &
2605	    (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2606		struct dst_entry *dst;
2607
2608		/* This function does not take refcnt on the dst */
2609		dst = l3mdev_link_scope_lookup(net, fl6);
2610		if (dst)
2611			return dst;
2612	}
2613
2614	fl6->flowi6_iif = LOOPBACK_IFINDEX;
2615
2616	flags |= RT6_LOOKUP_F_DST_NOREF;
2617	any_src = ipv6_addr_any(&fl6->saddr);
2618	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2619	    (fl6->flowi6_oif && any_src))
2620		flags |= RT6_LOOKUP_F_IFACE;
2621
2622	if (!any_src)
2623		flags |= RT6_LOOKUP_F_HAS_SADDR;
2624	else if (sk)
2625		flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs));
2626
2627	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2628}
2629
2630struct dst_entry *ip6_route_output_flags(struct net *net,
2631					 const struct sock *sk,
2632					 struct flowi6 *fl6,
2633					 int flags)
2634{
2635	struct dst_entry *dst;
2636	struct rt6_info *rt6;
2637
2638	rcu_read_lock();
2639	dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
2640	rt6 = (struct rt6_info *)dst;
2641	/* For dst cached in uncached_list, refcnt is already taken. */
2642	if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
2643		dst = &net->ipv6.ip6_null_entry->dst;
2644		dst_hold(dst);
2645	}
2646	rcu_read_unlock();
2647
2648	return dst;
2649}
2650EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2651
2652struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2653{
2654	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2655	struct net_device *loopback_dev = net->loopback_dev;
2656	struct dst_entry *new = NULL;
2657
2658	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev,
2659		       DST_OBSOLETE_DEAD, 0);
2660	if (rt) {
2661		rt6_info_init(rt);
2662		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2663
2664		new = &rt->dst;
 
2665		new->__use = 1;
2666		new->input = dst_discard;
2667		new->output = dst_discard_out;
2668
2669		dst_copy_metrics(new, &ort->dst);
2670
2671		rt->rt6i_idev = in6_dev_get(loopback_dev);
2672		rt->rt6i_gateway = ort->rt6i_gateway;
2673		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
 
 
 
 
 
 
 
2674
2675		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2676#ifdef CONFIG_IPV6_SUBTREES
2677		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2678#endif
 
 
2679	}
2680
2681	dst_release(dst_orig);
2682	return new ? new : ERR_PTR(-ENOMEM);
2683}
2684
2685/*
2686 *	Destination cache support functions
2687 */
2688
2689static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2690{
2691	u32 rt_cookie = 0;
2692
2693	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2694		return false;
2695
2696	if (fib6_check_expired(f6i))
2697		return false;
2698
2699	return true;
2700}
2701
2702static struct dst_entry *rt6_check(struct rt6_info *rt,
2703				   struct fib6_info *from,
2704				   u32 cookie)
2705{
2706	u32 rt_cookie = 0;
2707
2708	if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
2709	    rt_cookie != cookie)
2710		return NULL;
2711
2712	if (rt6_check_expired(rt))
2713		return NULL;
2714
2715	return &rt->dst;
2716}
2717
2718static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2719					    struct fib6_info *from,
2720					    u32 cookie)
2721{
2722	if (!__rt6_check_expired(rt) &&
2723	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2724	    fib6_check(from, cookie))
2725		return &rt->dst;
2726	else
2727		return NULL;
2728}
2729
2730INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
2731							u32 cookie)
2732{
2733	struct dst_entry *dst_ret;
2734	struct fib6_info *from;
2735	struct rt6_info *rt;
2736
2737	rt = container_of(dst, struct rt6_info, dst);
2738
2739	if (rt->sernum)
2740		return rt6_is_valid(rt) ? dst : NULL;
2741
2742	rcu_read_lock();
2743
2744	/* All IPV6 dsts are created with ->obsolete set to the value
2745	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2746	 * into this function always.
2747	 */
2748
2749	from = rcu_dereference(rt->from);
2750
2751	if (from && (rt->rt6i_flags & RTF_PCPU ||
2752	    unlikely(!list_empty(&rt->dst.rt_uncached))))
2753		dst_ret = rt6_dst_from_check(rt, from, cookie);
2754	else
2755		dst_ret = rt6_check(rt, from, cookie);
2756
2757	rcu_read_unlock();
2758
2759	return dst_ret;
2760}
2761EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
2762
2763static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2764{
2765	struct rt6_info *rt = (struct rt6_info *) dst;
2766
2767	if (rt) {
2768		if (rt->rt6i_flags & RTF_CACHE) {
2769			rcu_read_lock();
2770			if (rt6_check_expired(rt)) {
2771				rt6_remove_exception_rt(rt);
2772				dst = NULL;
2773			}
2774			rcu_read_unlock();
2775		} else {
2776			dst_release(dst);
2777			dst = NULL;
2778		}
2779	}
2780	return dst;
2781}
2782
2783static void ip6_link_failure(struct sk_buff *skb)
2784{
2785	struct rt6_info *rt;
2786
2787	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2788
2789	rt = (struct rt6_info *) skb_dst(skb);
2790	if (rt) {
2791		rcu_read_lock();
2792		if (rt->rt6i_flags & RTF_CACHE) {
2793			rt6_remove_exception_rt(rt);
2794		} else {
2795			struct fib6_info *from;
2796			struct fib6_node *fn;
2797
2798			from = rcu_dereference(rt->from);
2799			if (from) {
2800				fn = rcu_dereference(from->fib6_node);
2801				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2802					WRITE_ONCE(fn->fn_sernum, -1);
2803			}
2804		}
2805		rcu_read_unlock();
2806	}
2807}
2808
2809static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2810{
2811	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2812		struct fib6_info *from;
2813
2814		rcu_read_lock();
2815		from = rcu_dereference(rt0->from);
2816		if (from)
2817			rt0->dst.expires = from->expires;
2818		rcu_read_unlock();
2819	}
2820
2821	dst_set_expires(&rt0->dst, timeout);
2822	rt0->rt6i_flags |= RTF_EXPIRES;
2823}
2824
2825static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2826{
2827	struct net *net = dev_net(rt->dst.dev);
2828
2829	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2830	rt->rt6i_flags |= RTF_MODIFIED;
2831	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2832}
2833
2834static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2835{
2836	return !(rt->rt6i_flags & RTF_CACHE) &&
2837		(rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
2838}
2839
2840static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2841				 const struct ipv6hdr *iph, u32 mtu,
2842				 bool confirm_neigh)
2843{
2844	const struct in6_addr *daddr, *saddr;
2845	struct rt6_info *rt6 = (struct rt6_info *)dst;
2846
2847	/* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
2848	 * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
2849	 * [see also comment in rt6_mtu_change_route()]
2850	 */
2851
2852	if (iph) {
2853		daddr = &iph->daddr;
2854		saddr = &iph->saddr;
2855	} else if (sk) {
2856		daddr = &sk->sk_v6_daddr;
2857		saddr = &inet6_sk(sk)->saddr;
2858	} else {
2859		daddr = NULL;
2860		saddr = NULL;
2861	}
2862
2863	if (confirm_neigh)
2864		dst_confirm_neigh(dst, daddr);
2865
2866	if (mtu < IPV6_MIN_MTU)
2867		return;
2868	if (mtu >= dst_mtu(dst))
2869		return;
2870
2871	if (!rt6_cache_allowed_for_pmtu(rt6)) {
2872		rt6_do_update_pmtu(rt6, mtu);
2873		/* update rt6_ex->stamp for cache */
2874		if (rt6->rt6i_flags & RTF_CACHE)
2875			rt6_update_exception_stamp_rt(rt6);
2876	} else if (daddr) {
2877		struct fib6_result res = {};
2878		struct rt6_info *nrt6;
2879
2880		rcu_read_lock();
2881		res.f6i = rcu_dereference(rt6->from);
2882		if (!res.f6i)
2883			goto out_unlock;
2884
2885		res.fib6_flags = res.f6i->fib6_flags;
2886		res.fib6_type = res.f6i->fib6_type;
2887
2888		if (res.f6i->nh) {
2889			struct fib6_nh_match_arg arg = {
2890				.dev = dst->dev,
2891				.gw = &rt6->rt6i_gateway,
2892			};
2893
2894			nexthop_for_each_fib6_nh(res.f6i->nh,
2895						 fib6_nh_find_match, &arg);
2896
2897			/* fib6_info uses a nexthop that does not have fib6_nh
2898			 * using the dst->dev + gw. Should be impossible.
2899			 */
2900			if (!arg.match)
2901				goto out_unlock;
2902
2903			res.nh = arg.match;
2904		} else {
2905			res.nh = res.f6i->fib6_nh;
2906		}
2907
2908		nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
2909		if (nrt6) {
2910			rt6_do_update_pmtu(nrt6, mtu);
2911			if (rt6_insert_exception(nrt6, &res))
2912				dst_release_immediate(&nrt6->dst);
2913		}
2914out_unlock:
2915		rcu_read_unlock();
2916	}
2917}
2918
2919static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2920			       struct sk_buff *skb, u32 mtu,
2921			       bool confirm_neigh)
2922{
2923	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
2924			     confirm_neigh);
2925}
2926
2927void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2928		     int oif, u32 mark, kuid_t uid)
2929{
2930	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2931	struct dst_entry *dst;
2932	struct flowi6 fl6 = {
2933		.flowi6_oif = oif,
2934		.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2935		.daddr = iph->daddr,
2936		.saddr = iph->saddr,
2937		.flowlabel = ip6_flowinfo(iph),
2938		.flowi6_uid = uid,
2939	};
2940
2941	dst = ip6_route_output(net, NULL, &fl6);
2942	if (!dst->error)
2943		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
2944	dst_release(dst);
2945}
2946EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2947
2948void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2949{
2950	int oif = sk->sk_bound_dev_if;
2951	struct dst_entry *dst;
2952
2953	if (!oif && skb->dev)
2954		oif = l3mdev_master_ifindex(skb->dev);
2955
2956	ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
2957			sk->sk_uid);
2958
2959	dst = __sk_dst_get(sk);
2960	if (!dst || !dst->obsolete ||
2961	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2962		return;
2963
2964	bh_lock_sock(sk);
2965	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2966		ip6_datagram_dst_update(sk, false);
2967	bh_unlock_sock(sk);
2968}
2969EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2970
2971void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2972			   const struct flowi6 *fl6)
2973{
2974#ifdef CONFIG_IPV6_SUBTREES
2975	struct ipv6_pinfo *np = inet6_sk(sk);
2976#endif
2977
2978	ip6_dst_store(sk, dst,
2979		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2980		      &sk->sk_v6_daddr : NULL,
2981#ifdef CONFIG_IPV6_SUBTREES
2982		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2983		      &np->saddr :
2984#endif
2985		      NULL);
2986}
2987
2988static bool ip6_redirect_nh_match(const struct fib6_result *res,
2989				  struct flowi6 *fl6,
2990				  const struct in6_addr *gw,
2991				  struct rt6_info **ret)
2992{
2993	const struct fib6_nh *nh = res->nh;
2994
2995	if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
2996	    fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
2997		return false;
2998
2999	/* rt_cache's gateway might be different from its 'parent'
3000	 * in the case of an ip redirect.
3001	 * So we keep searching in the exception table if the gateway
3002	 * is different.
3003	 */
3004	if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
3005		struct rt6_info *rt_cache;
3006
3007		rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
3008		if (rt_cache &&
3009		    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
3010			*ret = rt_cache;
3011			return true;
3012		}
3013		return false;
3014	}
3015	return true;
3016}
3017
3018struct fib6_nh_rd_arg {
3019	struct fib6_result	*res;
3020	struct flowi6		*fl6;
3021	const struct in6_addr	*gw;
3022	struct rt6_info		**ret;
3023};
3024
3025static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
3026{
3027	struct fib6_nh_rd_arg *arg = _arg;
3028
3029	arg->res->nh = nh;
3030	return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
3031}
3032
3033/* Handle redirects */
3034struct ip6rd_flowi {
3035	struct flowi6 fl6;
3036	struct in6_addr gateway;
3037};
3038
3039INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
3040					     struct fib6_table *table,
3041					     struct flowi6 *fl6,
3042					     const struct sk_buff *skb,
3043					     int flags)
3044{
3045	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
3046	struct rt6_info *ret = NULL;
3047	struct fib6_result res = {};
3048	struct fib6_nh_rd_arg arg = {
3049		.res = &res,
3050		.fl6 = fl6,
3051		.gw  = &rdfl->gateway,
3052		.ret = &ret
3053	};
3054	struct fib6_info *rt;
3055	struct fib6_node *fn;
3056
3057	/* Get the "current" route for this destination and
3058	 * check if the redirect has come from appropriate router.
3059	 *
3060	 * RFC 4861 specifies that redirects should only be
3061	 * accepted if they come from the nexthop to the target.
3062	 * Due to the way the routes are chosen, this notion
3063	 * is a bit fuzzy and one might need to check all possible
3064	 * routes.
3065	 */
3066
3067	rcu_read_lock();
3068	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
3069restart:
3070	for_each_fib6_node_rt_rcu(fn) {
3071		res.f6i = rt;
3072		if (fib6_check_expired(rt))
3073			continue;
3074		if (rt->fib6_flags & RTF_REJECT)
3075			break;
3076		if (unlikely(rt->nh)) {
3077			if (nexthop_is_blackhole(rt->nh))
3078				continue;
3079			/* on match, res->nh is filled in and potentially ret */
3080			if (nexthop_for_each_fib6_nh(rt->nh,
3081						     fib6_nh_redirect_match,
3082						     &arg))
3083				goto out;
3084		} else {
3085			res.nh = rt->fib6_nh;
3086			if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
3087						  &ret))
3088				goto out;
3089		}
 
3090	}
3091
3092	if (!rt)
3093		rt = net->ipv6.fib6_null_entry;
3094	else if (rt->fib6_flags & RTF_REJECT) {
3095		ret = net->ipv6.ip6_null_entry;
3096		goto out;
3097	}
3098
3099	if (rt == net->ipv6.fib6_null_entry) {
3100		fn = fib6_backtrack(fn, &fl6->saddr);
3101		if (fn)
3102			goto restart;
3103	}
3104
3105	res.f6i = rt;
3106	res.nh = rt->fib6_nh;
3107out:
3108	if (ret) {
3109		ip6_hold_safe(net, &ret);
3110	} else {
3111		res.fib6_flags = res.f6i->fib6_flags;
3112		res.fib6_type = res.f6i->fib6_type;
3113		ret = ip6_create_rt_rcu(&res);
3114	}
3115
3116	rcu_read_unlock();
3117
3118	trace_fib6_table_lookup(net, &res, table, fl6);
3119	return ret;
3120};
3121
3122static struct dst_entry *ip6_route_redirect(struct net *net,
3123					    const struct flowi6 *fl6,
3124					    const struct sk_buff *skb,
3125					    const struct in6_addr *gateway)
3126{
3127	int flags = RT6_LOOKUP_F_HAS_SADDR;
3128	struct ip6rd_flowi rdfl;
3129
3130	rdfl.fl6 = *fl6;
3131	rdfl.gateway = *gateway;
3132
3133	return fib6_rule_lookup(net, &rdfl.fl6, skb,
3134				flags, __ip6_route_redirect);
3135}
3136
3137void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
3138		  kuid_t uid)
3139{
3140	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
3141	struct dst_entry *dst;
3142	struct flowi6 fl6 = {
3143		.flowi6_iif = LOOPBACK_IFINDEX,
3144		.flowi6_oif = oif,
3145		.flowi6_mark = mark,
3146		.daddr = iph->daddr,
3147		.saddr = iph->saddr,
3148		.flowlabel = ip6_flowinfo(iph),
3149		.flowi6_uid = uid,
3150	};
3151
3152	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
3153	rt6_do_redirect(dst, NULL, skb);
3154	dst_release(dst);
3155}
3156EXPORT_SYMBOL_GPL(ip6_redirect);
3157
3158void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
3159{
3160	const struct ipv6hdr *iph = ipv6_hdr(skb);
3161	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
3162	struct dst_entry *dst;
3163	struct flowi6 fl6 = {
3164		.flowi6_iif = LOOPBACK_IFINDEX,
3165		.flowi6_oif = oif,
3166		.daddr = msg->dest,
3167		.saddr = iph->daddr,
3168		.flowi6_uid = sock_net_uid(net, NULL),
3169	};
3170
3171	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
3172	rt6_do_redirect(dst, NULL, skb);
3173	dst_release(dst);
3174}
3175
3176void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
3177{
3178	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
3179		     READ_ONCE(sk->sk_mark), sk->sk_uid);
3180}
3181EXPORT_SYMBOL_GPL(ip6_sk_redirect);
3182
3183static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3184{
3185	struct net_device *dev = dst->dev;
3186	unsigned int mtu = dst_mtu(dst);
3187	struct net *net = dev_net(dev);
3188
3189	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
3190
3191	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
3192		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
3193
3194	/*
3195	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
3196	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
3197	 * IPV6_MAXPLEN is also valid and means: "any MSS,
3198	 * rely only on pmtu discovery"
3199	 */
3200	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
3201		mtu = IPV6_MAXPLEN;
3202	return mtu;
3203}
3204
3205INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst)
3206{
3207	return ip6_dst_mtu_maybe_forward(dst, false);
3208}
3209EXPORT_INDIRECT_CALLABLE(ip6_mtu);
3210
3211/* MTU selection:
3212 * 1. mtu on route is locked - use it
3213 * 2. mtu from nexthop exception
3214 * 3. mtu from egress device
3215 *
3216 * based on ip6_dst_mtu_forward and exception logic of
3217 * rt6_find_cached_rt; called with rcu_read_lock
3218 */
3219u32 ip6_mtu_from_fib6(const struct fib6_result *res,
3220		      const struct in6_addr *daddr,
3221		      const struct in6_addr *saddr)
3222{
3223	const struct fib6_nh *nh = res->nh;
3224	struct fib6_info *f6i = res->f6i;
3225	struct inet6_dev *idev;
3226	struct rt6_info *rt;
3227	u32 mtu = 0;
3228
3229	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
3230		mtu = f6i->fib6_pmtu;
3231		if (mtu)
3232			goto out;
3233	}
3234
3235	rt = rt6_find_cached_rt(res, daddr, saddr);
3236	if (unlikely(rt)) {
3237		mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
3238	} else {
3239		struct net_device *dev = nh->fib_nh_dev;
3240
3241		mtu = IPV6_MIN_MTU;
3242		idev = __in6_dev_get(dev);
3243		if (idev && idev->cnf.mtu6 > mtu)
3244			mtu = idev->cnf.mtu6;
3245	}
3246
3247	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3248out:
3249	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
3250}
3251
3252struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
3253				  struct flowi6 *fl6)
 
3254{
3255	struct dst_entry *dst;
3256	struct rt6_info *rt;
3257	struct inet6_dev *idev = in6_dev_get(dev);
3258	struct net *net = dev_net(dev);
3259
3260	if (unlikely(!idev))
3261		return ERR_PTR(-ENODEV);
3262
3263	rt = ip6_dst_alloc(net, dev, 0);
3264	if (unlikely(!rt)) {
3265		in6_dev_put(idev);
3266		dst = ERR_PTR(-ENOMEM);
3267		goto out;
3268	}
3269
3270	rt->dst.input = ip6_input;
 
 
 
 
 
 
 
 
3271	rt->dst.output  = ip6_output;
3272	rt->rt6i_gateway  = fl6->daddr;
3273	rt->rt6i_dst.addr = fl6->daddr;
 
 
 
3274	rt->rt6i_dst.plen = 128;
3275	rt->rt6i_idev     = idev;
3276	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
3277
3278	/* Add this dst into uncached_list so that rt6_disable_ip() can
3279	 * do proper release of the net_device
3280	 */
3281	rt6_uncached_list_add(rt);
3282
3283	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
3284
3285out:
3286	return dst;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3287}
3288
3289static void ip6_dst_gc(struct dst_ops *ops)
3290{
 
3291	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
3292	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
 
3293	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
3294	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
3295	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
3296	unsigned int val;
3297	int entries;
3298
3299	if (time_after(rt_last_gc + rt_min_interval, jiffies))
 
 
3300		goto out;
3301
3302	fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
 
 
3303	entries = dst_entries_get_slow(ops);
3304	if (entries < ops->gc_thresh)
3305		atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
3306out:
3307	val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
3308	atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
3309}
3310
3311static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
3312			       const struct in6_addr *gw_addr, u32 tbid,
3313			       int flags, struct fib6_result *res)
3314{
3315	struct flowi6 fl6 = {
3316		.flowi6_oif = cfg->fc_ifindex,
3317		.daddr = *gw_addr,
3318		.saddr = cfg->fc_prefsrc,
3319	};
3320	struct fib6_table *table;
3321	int err;
3322
3323	table = fib6_get_table(net, tbid);
3324	if (!table)
3325		return -EINVAL;
3326
3327	if (!ipv6_addr_any(&cfg->fc_prefsrc))
3328		flags |= RT6_LOOKUP_F_HAS_SADDR;
3329
3330	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
3331
3332	err = fib6_table_lookup(net, table, cfg->fc_ifindex, &fl6, res, flags);
3333	if (!err && res->f6i != net->ipv6.fib6_null_entry)
3334		fib6_select_path(net, res, &fl6, cfg->fc_ifindex,
3335				 cfg->fc_ifindex != 0, NULL, flags);
3336
3337	return err;
3338}
3339
3340static int ip6_route_check_nh_onlink(struct net *net,
3341				     struct fib6_config *cfg,
3342				     const struct net_device *dev,
3343				     struct netlink_ext_ack *extack)
3344{
3345	u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
3346	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3347	struct fib6_result res = {};
3348	int err;
3349
3350	err = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0, &res);
3351	if (!err && !(res.fib6_flags & RTF_REJECT) &&
3352	    /* ignore match if it is the default route */
3353	    !ipv6_addr_any(&res.f6i->fib6_dst.addr) &&
3354	    (res.fib6_type != RTN_UNICAST || dev != res.nh->fib_nh_dev)) {
3355		NL_SET_ERR_MSG(extack,
3356			       "Nexthop has invalid gateway or device mismatch");
3357		err = -EINVAL;
3358	}
3359
3360	return err;
3361}
3362
3363static int ip6_route_check_nh(struct net *net,
3364			      struct fib6_config *cfg,
3365			      struct net_device **_dev,
3366			      netdevice_tracker *dev_tracker,
3367			      struct inet6_dev **idev)
3368{
3369	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3370	struct net_device *dev = _dev ? *_dev : NULL;
3371	int flags = RT6_LOOKUP_F_IFACE;
3372	struct fib6_result res = {};
3373	int err = -EHOSTUNREACH;
3374
3375	if (cfg->fc_table) {
3376		err = ip6_nh_lookup_table(net, cfg, gw_addr,
3377					  cfg->fc_table, flags, &res);
3378		/* gw_addr can not require a gateway or resolve to a reject
3379		 * route. If a device is given, it must match the result.
3380		 */
3381		if (err || res.fib6_flags & RTF_REJECT ||
3382		    res.nh->fib_nh_gw_family ||
3383		    (dev && dev != res.nh->fib_nh_dev))
3384			err = -EHOSTUNREACH;
3385	}
3386
3387	if (err < 0) {
3388		struct flowi6 fl6 = {
3389			.flowi6_oif = cfg->fc_ifindex,
3390			.daddr = *gw_addr,
3391		};
3392
3393		err = fib6_lookup(net, cfg->fc_ifindex, &fl6, &res, flags);
3394		if (err || res.fib6_flags & RTF_REJECT ||
3395		    res.nh->fib_nh_gw_family)
3396			err = -EHOSTUNREACH;
3397
3398		if (err)
3399			return err;
3400
3401		fib6_select_path(net, &res, &fl6, cfg->fc_ifindex,
3402				 cfg->fc_ifindex != 0, NULL, flags);
3403	}
3404
3405	err = 0;
3406	if (dev) {
3407		if (dev != res.nh->fib_nh_dev)
3408			err = -EHOSTUNREACH;
3409	} else {
3410		*_dev = dev = res.nh->fib_nh_dev;
3411		netdev_hold(dev, dev_tracker, GFP_ATOMIC);
3412		*idev = in6_dev_get(dev);
3413	}
3414
3415	return err;
3416}
3417
3418static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
3419			   struct net_device **_dev,
3420			   netdevice_tracker *dev_tracker,
3421			   struct inet6_dev **idev,
3422			   struct netlink_ext_ack *extack)
3423{
3424	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3425	int gwa_type = ipv6_addr_type(gw_addr);
3426	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
3427	const struct net_device *dev = *_dev;
3428	bool need_addr_check = !dev;
3429	int err = -EINVAL;
3430
3431	/* if gw_addr is local we will fail to detect this in case
3432	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
3433	 * will return already-added prefix route via interface that
3434	 * prefix route was assigned to, which might be non-loopback.
3435	 */
3436	if (dev &&
3437	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3438		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3439		goto out;
3440	}
3441
3442	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
3443		/* IPv6 strictly inhibits using not link-local
3444		 * addresses as nexthop address.
3445		 * Otherwise, router will not able to send redirects.
3446		 * It is very good, but in some (rare!) circumstances
3447		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
3448		 * some exceptions. --ANK
3449		 * We allow IPv4-mapped nexthops to support RFC4798-type
3450		 * addressing
3451		 */
3452		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
3453			NL_SET_ERR_MSG(extack, "Invalid gateway address");
3454			goto out;
3455		}
3456
3457		rcu_read_lock();
3458
3459		if (cfg->fc_flags & RTNH_F_ONLINK)
3460			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
3461		else
3462			err = ip6_route_check_nh(net, cfg, _dev, dev_tracker,
3463						 idev);
3464
3465		rcu_read_unlock();
3466
3467		if (err)
3468			goto out;
3469	}
3470
3471	/* reload in case device was changed */
3472	dev = *_dev;
3473
3474	err = -EINVAL;
3475	if (!dev) {
3476		NL_SET_ERR_MSG(extack, "Egress device not specified");
3477		goto out;
3478	} else if (dev->flags & IFF_LOOPBACK) {
3479		NL_SET_ERR_MSG(extack,
3480			       "Egress device can not be loopback device for this route");
3481		goto out;
3482	}
3483
3484	/* if we did not check gw_addr above, do so now that the
3485	 * egress device has been resolved.
3486	 */
3487	if (need_addr_check &&
3488	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3489		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3490		goto out;
3491	}
3492
3493	err = 0;
3494out:
3495	return err;
3496}
 
3497
3498static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
3499{
3500	if ((flags & RTF_REJECT) ||
3501	    (dev && (dev->flags & IFF_LOOPBACK) &&
3502	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
3503	     !(flags & (RTF_ANYCAST | RTF_LOCAL))))
3504		return true;
3505
3506	return false;
3507}
3508
3509int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
3510		 struct fib6_config *cfg, gfp_t gfp_flags,
3511		 struct netlink_ext_ack *extack)
3512{
3513	netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker;
 
 
3514	struct net_device *dev = NULL;
3515	struct inet6_dev *idev = NULL;
 
3516	int addr_type;
3517	int err;
3518
3519	fib6_nh->fib_nh_family = AF_INET6;
3520#ifdef CONFIG_IPV6_ROUTER_PREF
3521	fib6_nh->last_probe = jiffies;
 
 
3522#endif
3523	if (cfg->fc_is_fdb) {
3524		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3525		fib6_nh->fib_nh_gw_family = AF_INET6;
3526		return 0;
3527	}
3528
3529	err = -ENODEV;
3530	if (cfg->fc_ifindex) {
3531		dev = netdev_get_by_index(net, cfg->fc_ifindex,
3532					  dev_tracker, gfp_flags);
3533		if (!dev)
3534			goto out;
3535		idev = in6_dev_get(dev);
3536		if (!idev)
3537			goto out;
3538	}
3539
3540	if (cfg->fc_flags & RTNH_F_ONLINK) {
3541		if (!dev) {
3542			NL_SET_ERR_MSG(extack,
3543				       "Nexthop device required for onlink");
3544			goto out;
3545		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3546
3547		if (!(dev->flags & IFF_UP)) {
3548			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3549			err = -ENETDOWN;
 
 
 
 
 
 
3550			goto out;
3551		}
3552
3553		fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
3554	}
 
 
 
 
3555
3556	fib6_nh->fib_nh_weight = 1;
3557
3558	/* We cannot add true routes via loopback here,
3559	 * they would result in kernel looping; promote them to reject routes
3560	 */
3561	addr_type = ipv6_addr_type(&cfg->fc_dst);
3562	if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
 
3563		/* hold loopback dev/idev if we haven't done so. */
3564		if (dev != net->loopback_dev) {
3565			if (dev) {
3566				netdev_put(dev, dev_tracker);
3567				in6_dev_put(idev);
3568			}
3569			dev = net->loopback_dev;
3570			netdev_hold(dev, dev_tracker, gfp_flags);
3571			idev = in6_dev_get(dev);
3572			if (!idev) {
3573				err = -ENODEV;
3574				goto out;
3575			}
3576		}
3577		goto pcpu_alloc;
 
 
 
 
3578	}
3579
3580	if (cfg->fc_flags & RTF_GATEWAY) {
3581		err = ip6_validate_gw(net, cfg, &dev, dev_tracker,
3582				      &idev, extack);
3583		if (err)
3584			goto out;
3585
3586		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3587		fib6_nh->fib_nh_gw_family = AF_INET6;
3588	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3589
3590	err = -ENODEV;
3591	if (!dev)
3592		goto out;
3593
3594	if (idev->cnf.disable_ipv6) {
3595		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3596		err = -EACCES;
3597		goto out;
3598	}
 
 
 
 
 
 
 
 
 
 
 
 
3599
3600	if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3601		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3602		err = -ENETDOWN;
3603		goto out;
3604	}
3605
3606	if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3607	    !netif_carrier_ok(dev))
3608		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
3609
3610	err = fib_nh_common_init(net, &fib6_nh->nh_common, cfg->fc_encap,
3611				 cfg->fc_encap_type, cfg, gfp_flags, extack);
3612	if (err)
3613		goto out;
3614
3615pcpu_alloc:
3616	fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
3617	if (!fib6_nh->rt6i_pcpu) {
3618		err = -ENOMEM;
3619		goto out;
3620	}
3621
3622	fib6_nh->fib_nh_dev = dev;
3623	fib6_nh->fib_nh_oif = dev->ifindex;
3624	err = 0;
3625out:
3626	if (idev)
3627		in6_dev_put(idev);
3628
3629	if (err) {
3630		lwtstate_put(fib6_nh->fib_nh_lws);
3631		fib6_nh->fib_nh_lws = NULL;
3632		netdev_put(dev, dev_tracker);
3633	}
3634
3635	return err;
3636}
3637
3638void fib6_nh_release(struct fib6_nh *fib6_nh)
3639{
3640	struct rt6_exception_bucket *bucket;
3641
3642	rcu_read_lock();
3643
3644	fib6_nh_flush_exceptions(fib6_nh, NULL);
3645	bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
3646	if (bucket) {
3647		rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
3648		kfree(bucket);
3649	}
3650
3651	rcu_read_unlock();
3652
3653	fib6_nh_release_dsts(fib6_nh);
3654	free_percpu(fib6_nh->rt6i_pcpu);
3655
3656	fib_nh_common_release(&fib6_nh->nh_common);
3657}
3658
3659void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
3660{
3661	int cpu;
3662
3663	if (!fib6_nh->rt6i_pcpu)
3664		return;
3665
3666	for_each_possible_cpu(cpu) {
3667		struct rt6_info *pcpu_rt, **ppcpu_rt;
3668
3669		ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
3670		pcpu_rt = xchg(ppcpu_rt, NULL);
3671		if (pcpu_rt) {
3672			dst_dev_put(&pcpu_rt->dst);
3673			dst_release(&pcpu_rt->dst);
3674		}
 
 
 
3675	}
3676}
3677
3678static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3679					      gfp_t gfp_flags,
3680					      struct netlink_ext_ack *extack)
3681{
3682	struct net *net = cfg->fc_nlinfo.nl_net;
3683	struct fib6_info *rt = NULL;
3684	struct nexthop *nh = NULL;
3685	struct fib6_table *table;
3686	struct fib6_nh *fib6_nh;
3687	int err = -EINVAL;
3688	int addr_type;
3689
3690	/* RTF_PCPU is an internal flag; can not be set by userspace */
3691	if (cfg->fc_flags & RTF_PCPU) {
3692		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3693		goto out;
3694	}
3695
3696	/* RTF_CACHE is an internal flag; can not be set by userspace */
3697	if (cfg->fc_flags & RTF_CACHE) {
3698		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3699		goto out;
3700	}
3701
3702	if (cfg->fc_type > RTN_MAX) {
3703		NL_SET_ERR_MSG(extack, "Invalid route type");
3704		goto out;
3705	}
3706
3707	if (cfg->fc_dst_len > 128) {
3708		NL_SET_ERR_MSG(extack, "Invalid prefix length");
3709		goto out;
3710	}
3711	if (cfg->fc_src_len > 128) {
3712		NL_SET_ERR_MSG(extack, "Invalid source address length");
3713		goto out;
3714	}
3715#ifndef CONFIG_IPV6_SUBTREES
3716	if (cfg->fc_src_len) {
3717		NL_SET_ERR_MSG(extack,
3718			       "Specifying source address requires IPV6_SUBTREES to be enabled");
3719		goto out;
3720	}
3721#endif
3722	if (cfg->fc_nh_id) {
3723		nh = nexthop_find_by_id(net, cfg->fc_nh_id);
3724		if (!nh) {
3725			NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
3726			goto out;
3727		}
3728		err = fib6_check_nexthop(nh, cfg, extack);
3729		if (err)
 
 
 
 
 
 
 
3730			goto out;
3731	}
3732
3733	err = -ENOBUFS;
3734	if (cfg->fc_nlinfo.nlh &&
3735	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3736		table = fib6_get_table(net, cfg->fc_table);
3737		if (!table) {
3738			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3739			table = fib6_new_table(net, cfg->fc_table);
3740		}
3741	} else {
3742		table = fib6_new_table(net, cfg->fc_table);
3743	}
3744
3745	if (!table)
3746		goto out;
3747
3748	err = -ENOMEM;
3749	rt = fib6_info_alloc(gfp_flags, !nh);
3750	if (!rt)
3751		goto out;
3752
3753	rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3754					       extack);
3755	if (IS_ERR(rt->fib6_metrics)) {
3756		err = PTR_ERR(rt->fib6_metrics);
3757		/* Do not leave garbage there. */
3758		rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3759		goto out_free;
3760	}
3761
3762	if (cfg->fc_flags & RTF_ADDRCONF)
3763		rt->dst_nocount = true;
3764
3765	if (cfg->fc_flags & RTF_EXPIRES)
3766		fib6_set_expires(rt, jiffies +
3767				clock_t_to_jiffies(cfg->fc_expires));
3768	else
3769		fib6_clean_expires(rt);
3770
3771	if (cfg->fc_protocol == RTPROT_UNSPEC)
3772		cfg->fc_protocol = RTPROT_BOOT;
3773	rt->fib6_protocol = cfg->fc_protocol;
 
3774
3775	rt->fib6_table = table;
3776	rt->fib6_metric = cfg->fc_metric;
3777	rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
3778	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
3779
3780	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3781	rt->fib6_dst.plen = cfg->fc_dst_len;
 
 
 
3782
3783#ifdef CONFIG_IPV6_SUBTREES
3784	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3785	rt->fib6_src.plen = cfg->fc_src_len;
3786#endif
3787	if (nh) {
3788		if (rt->fib6_src.plen) {
3789			NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
3790			goto out_free;
3791		}
3792		if (!nexthop_get(nh)) {
3793			NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
3794			goto out_free;
3795		}
3796		rt->nh = nh;
3797		fib6_nh = nexthop_fib6_nh(rt->nh);
3798	} else {
3799		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
3800		if (err)
3801			goto out;
3802
3803		fib6_nh = rt->fib6_nh;
3804
3805		/* We cannot add true routes via loopback here, they would
3806		 * result in kernel looping; promote them to reject routes
3807		 */
3808		addr_type = ipv6_addr_type(&cfg->fc_dst);
3809		if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
3810				   addr_type))
3811			rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3812	}
3813
3814	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3815		struct net_device *dev = fib6_nh->fib_nh_dev;
3816
3817		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3818			NL_SET_ERR_MSG(extack, "Invalid source address");
3819			err = -EINVAL;
3820			goto out;
3821		}
3822		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3823		rt->fib6_prefsrc.plen = 128;
3824	} else
3825		rt->fib6_prefsrc.plen = 0;
3826
3827	return rt;
3828out:
3829	fib6_info_release(rt);
3830	return ERR_PTR(err);
3831out_free:
3832	ip_fib_metrics_put(rt->fib6_metrics);
3833	kfree(rt);
3834	return ERR_PTR(err);
 
3835}
3836
3837int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3838		  struct netlink_ext_ack *extack)
3839{
3840	struct fib6_info *rt;
3841	int err;
 
 
3842
3843	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3844	if (IS_ERR(rt))
3845		return PTR_ERR(rt);
3846
3847	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3848	fib6_info_release(rt);
3849
3850	return err;
3851}
3852
3853static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3854{
3855	struct net *net = info->nl_net;
3856	struct fib6_table *table;
3857	int err;
3858
3859	if (rt == net->ipv6.fib6_null_entry) {
3860		err = -ENOENT;
3861		goto out;
3862	}
3863
3864	table = rt->fib6_table;
3865	spin_lock_bh(&table->tb6_lock);
3866	err = fib6_del(rt, info);
3867	spin_unlock_bh(&table->tb6_lock);
3868
3869out:
3870	fib6_info_release(rt);
3871	return err;
3872}
3873
3874int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify)
3875{
3876	struct nl_info info = {
3877		.nl_net = net,
3878		.skip_notify = skip_notify
3879	};
3880
3881	return __ip6_del_rt(rt, &info);
3882}
3883
3884static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3885{
3886	struct nl_info *info = &cfg->fc_nlinfo;
3887	struct net *net = info->nl_net;
3888	struct sk_buff *skb = NULL;
3889	struct fib6_table *table;
3890	int err = -ENOENT;
 
 
3891
3892	if (rt == net->ipv6.fib6_null_entry)
3893		goto out_put;
3894	table = rt->fib6_table;
3895	spin_lock_bh(&table->tb6_lock);
3896
3897	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3898		struct fib6_info *sibling, *next_sibling;
3899		struct fib6_node *fn;
3900
3901		/* prefer to send a single notification with all hops */
3902		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3903		if (skb) {
3904			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3905
3906			if (rt6_fill_node(net, skb, rt, NULL,
3907					  NULL, NULL, 0, RTM_DELROUTE,
3908					  info->portid, seq, 0) < 0) {
3909				kfree_skb(skb);
3910				skb = NULL;
3911			} else
3912				info->skip_notify = 1;
3913		}
 
3914
3915		/* 'rt' points to the first sibling route. If it is not the
3916		 * leaf, then we do not need to send a notification. Otherwise,
3917		 * we need to check if the last sibling has a next route or not
3918		 * and emit a replace or delete notification, respectively.
3919		 */
3920		info->skip_notify_kernel = 1;
3921		fn = rcu_dereference_protected(rt->fib6_node,
3922					    lockdep_is_held(&table->tb6_lock));
3923		if (rcu_access_pointer(fn->leaf) == rt) {
3924			struct fib6_info *last_sibling, *replace_rt;
3925
3926			last_sibling = list_last_entry(&rt->fib6_siblings,
3927						       struct fib6_info,
3928						       fib6_siblings);
3929			replace_rt = rcu_dereference_protected(
3930					    last_sibling->fib6_next,
3931					    lockdep_is_held(&table->tb6_lock));
3932			if (replace_rt)
3933				call_fib6_entry_notifiers_replace(net,
3934								  replace_rt);
3935			else
3936				call_fib6_multipath_entry_notifiers(net,
3937						       FIB_EVENT_ENTRY_DEL,
3938						       rt, rt->fib6_nsiblings,
3939						       NULL);
3940		}
3941		list_for_each_entry_safe(sibling, next_sibling,
3942					 &rt->fib6_siblings,
3943					 fib6_siblings) {
3944			err = fib6_del(sibling, info);
3945			if (err)
3946				goto out_unlock;
3947		}
3948	}
 
3949
3950	err = fib6_del(rt, info);
3951out_unlock:
3952	spin_unlock_bh(&table->tb6_lock);
3953out_put:
3954	fib6_info_release(rt);
3955
3956	if (skb) {
3957		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3958			    info->nlh, gfp_any());
3959	}
3960	return err;
3961}
3962
3963static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
 
 
 
 
 
 
 
 
 
 
 
3964{
3965	int rc = -ESRCH;
 
 
3966
3967	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3968		goto out;
 
 
 
 
 
 
 
 
3969
3970	if (cfg->fc_flags & RTF_GATEWAY &&
3971	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3972		goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3973
3974	rc = rt6_remove_exception_rt(rt);
 
 
3975out:
3976	return rc;
3977}
3978
3979static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
3980			     struct fib6_nh *nh)
3981{
3982	struct fib6_result res = {
3983		.f6i = rt,
3984		.nh = nh,
3985	};
3986	struct rt6_info *rt_cache;
3987
3988	rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
3989	if (rt_cache)
3990		return __ip6_del_cached_rt(rt_cache, cfg);
3991
3992	return 0;
3993}
3994
3995struct fib6_nh_del_cached_rt_arg {
3996	struct fib6_config *cfg;
3997	struct fib6_info *f6i;
3998};
3999
4000static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
 
 
 
4001{
4002	struct fib6_nh_del_cached_rt_arg *arg = _arg;
4003	int rc;
 
 
 
 
 
 
 
4004
4005	rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
4006	return rc != -ESRCH ? rc : 0;
4007}
4008
4009static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
4010{
4011	struct fib6_nh_del_cached_rt_arg arg = {
4012		.cfg = cfg,
4013		.f6i = f6i
4014	};
4015
4016	return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
 
4017}
4018
4019static int ip6_route_del(struct fib6_config *cfg,
4020			 struct netlink_ext_ack *extack)
 
4021{
4022	struct fib6_table *table;
4023	struct fib6_info *rt;
4024	struct fib6_node *fn;
4025	int err = -ESRCH;
 
4026
4027	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
4028	if (!table) {
4029		NL_SET_ERR_MSG(extack, "FIB table does not exist");
4030		return err;
 
4031	}
4032
4033	rcu_read_lock();
 
 
4034
4035	fn = fib6_locate(&table->tb6_root,
4036			 &cfg->fc_dst, cfg->fc_dst_len,
4037			 &cfg->fc_src, cfg->fc_src_len,
4038			 !(cfg->fc_flags & RTF_CACHE));
 
 
4039
4040	if (fn) {
4041		for_each_fib6_node_rt_rcu(fn) {
4042			struct fib6_nh *nh;
 
 
 
4043
4044			if (rt->nh && cfg->fc_nh_id &&
4045			    rt->nh->id != cfg->fc_nh_id)
4046				continue;
4047
4048			if (cfg->fc_flags & RTF_CACHE) {
4049				int rc = 0;
 
4050
4051				if (rt->nh) {
4052					rc = ip6_del_cached_rt_nh(cfg, rt);
4053				} else if (cfg->fc_nh_id) {
4054					continue;
4055				} else {
4056					nh = rt->fib6_nh;
4057					rc = ip6_del_cached_rt(cfg, rt, nh);
4058				}
4059				if (rc != -ESRCH) {
4060					rcu_read_unlock();
4061					return rc;
4062				}
4063				continue;
4064			}
4065
4066			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
4067				continue;
4068			if (cfg->fc_protocol &&
4069			    cfg->fc_protocol != rt->fib6_protocol)
4070				continue;
4071
4072			if (rt->nh) {
4073				if (!fib6_info_hold_safe(rt))
4074					continue;
4075				rcu_read_unlock();
4076
4077				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
4078			}
4079			if (cfg->fc_nh_id)
4080				continue;
4081
4082			nh = rt->fib6_nh;
4083			if (cfg->fc_ifindex &&
4084			    (!nh->fib_nh_dev ||
4085			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
4086				continue;
4087			if (cfg->fc_flags & RTF_GATEWAY &&
4088			    !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
4089				continue;
4090			if (!fib6_info_hold_safe(rt))
4091				continue;
4092			rcu_read_unlock();
4093
4094			/* if gateway was specified only delete the one hop */
4095			if (cfg->fc_flags & RTF_GATEWAY)
4096				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
4097
4098			return __ip6_del_rt_siblings(rt, cfg);
4099		}
4100	}
4101	rcu_read_unlock();
4102
4103	return err;
 
4104}
4105
4106static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
 
 
 
 
 
 
4107{
4108	struct netevent_redirect netevent;
4109	struct rt6_info *rt, *nrt = NULL;
4110	struct fib6_result res = {};
4111	struct ndisc_options ndopts;
4112	struct inet6_dev *in6_dev;
4113	struct neighbour *neigh;
4114	struct rd_msg *msg;
4115	int optlen, on_link;
4116	u8 *lladdr;
4117
4118	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
4119	optlen -= sizeof(*msg);
4120
4121	if (optlen < 0) {
4122		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
4123		return;
4124	}
4125
4126	msg = (struct rd_msg *)icmp6_hdr(skb);
 
4127
4128	if (ipv6_addr_is_multicast(&msg->dest)) {
4129		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
4130		return;
 
 
 
 
 
 
4131	}
4132
4133	on_link = 0;
4134	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
4135		on_link = 1;
4136	} else if (ipv6_addr_type(&msg->target) !=
4137		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
4138		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
4139		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4140	}
4141
4142	in6_dev = __in6_dev_get(skb->dev);
4143	if (!in6_dev)
4144		return;
4145	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
4146		return;
4147
4148	/* RFC2461 8.1:
4149	 *	The IP source address of the Redirect MUST be the same as the current
4150	 *	first-hop router for the specified ICMP Destination Address.
4151	 */
 
 
 
 
4152
4153	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
4154		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
4155		return;
4156	}
 
 
 
4157
4158	lladdr = NULL;
4159	if (ndopts.nd_opts_tgt_lladdr) {
4160		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
4161					     skb->dev);
4162		if (!lladdr) {
4163			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
4164			return;
4165		}
4166	}
4167
4168	rt = (struct rt6_info *) dst;
4169	if (rt->rt6i_flags & RTF_REJECT) {
4170		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
4171		return;
4172	}
 
 
 
4173
4174	/* Redirect received -> path was valid.
4175	 * Look, redirects are sent only in response to data packets,
4176	 * so that this nexthop apparently is reachable. --ANK
4177	 */
4178	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
4179
4180	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
4181	if (!neigh)
4182		return;
4183
4184	/*
4185	 *	We have finally decided to accept it.
 
 
 
 
 
 
 
 
 
4186	 */
 
 
 
4187
4188	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
4189		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
4190		     NEIGH_UPDATE_F_OVERRIDE|
4191		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
4192				     NEIGH_UPDATE_F_ISROUTER)),
4193		     NDISC_REDIRECT, &ndopts);
4194
4195	rcu_read_lock();
4196	res.f6i = rcu_dereference(rt->from);
4197	if (!res.f6i)
4198		goto out;
 
 
4199
4200	if (res.f6i->nh) {
4201		struct fib6_nh_match_arg arg = {
4202			.dev = dst->dev,
4203			.gw = &rt->rt6i_gateway,
4204		};
 
 
 
 
 
 
 
 
 
 
 
 
 
4205
4206		nexthop_for_each_fib6_nh(res.f6i->nh,
4207					 fib6_nh_find_match, &arg);
4208
4209		/* fib6_info uses a nexthop that does not have fib6_nh
4210		 * using the dst->dev. Should be impossible
4211		 */
4212		if (!arg.match)
4213			goto out;
4214		res.nh = arg.match;
4215	} else {
4216		res.nh = res.f6i->fib6_nh;
4217	}
4218
4219	res.fib6_flags = res.f6i->fib6_flags;
4220	res.fib6_type = res.f6i->fib6_type;
4221	nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
4222	if (!nrt)
4223		goto out;
4224
4225	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
4226	if (on_link)
4227		nrt->rt6i_flags &= ~RTF_GATEWAY;
4228
4229	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
4230
4231	/* rt6_insert_exception() will take care of duplicated exceptions */
4232	if (rt6_insert_exception(nrt, &res)) {
4233		dst_release_immediate(&nrt->dst);
4234		goto out;
4235	}
4236
4237	netevent.old = &rt->dst;
4238	netevent.new = &nrt->dst;
4239	netevent.daddr = &msg->dest;
4240	netevent.neigh = neigh;
4241	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
4242
4243out:
4244	rcu_read_unlock();
4245	neigh_release(neigh);
4246}
4247
4248#ifdef CONFIG_IPV6_ROUTE_INFO
4249static struct fib6_info *rt6_get_route_info(struct net *net,
4250					   const struct in6_addr *prefix, int prefixlen,
4251					   const struct in6_addr *gwaddr,
4252					   struct net_device *dev)
4253{
4254	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
4255	int ifindex = dev->ifindex;
4256	struct fib6_node *fn;
4257	struct fib6_info *rt = NULL;
4258	struct fib6_table *table;
4259
4260	table = fib6_get_table(net, tb_id);
4261	if (!table)
4262		return NULL;
4263
4264	rcu_read_lock();
4265	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
4266	if (!fn)
4267		goto out;
4268
4269	for_each_fib6_node_rt_rcu(fn) {
4270		/* these routes do not use nexthops */
4271		if (rt->nh)
4272			continue;
4273		if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
4274			continue;
4275		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
4276		    !rt->fib6_nh->fib_nh_gw_family)
4277			continue;
4278		if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
4279			continue;
4280		if (!fib6_info_hold_safe(rt))
4281			continue;
 
4282		break;
4283	}
4284out:
4285	rcu_read_unlock();
4286	return rt;
4287}
4288
4289static struct fib6_info *rt6_add_route_info(struct net *net,
4290					   const struct in6_addr *prefix, int prefixlen,
4291					   const struct in6_addr *gwaddr,
4292					   struct net_device *dev,
4293					   unsigned int pref)
4294{
4295	struct fib6_config cfg = {
 
4296		.fc_metric	= IP6_RT_PRIO_USER,
4297		.fc_ifindex	= dev->ifindex,
4298		.fc_dst_len	= prefixlen,
4299		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
4300				  RTF_UP | RTF_PREF(pref),
4301		.fc_protocol = RTPROT_RA,
4302		.fc_type = RTN_UNICAST,
4303		.fc_nlinfo.portid = 0,
4304		.fc_nlinfo.nlh = NULL,
4305		.fc_nlinfo.nl_net = net,
4306	};
4307
4308	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
4309	cfg.fc_dst = *prefix;
4310	cfg.fc_gateway = *gwaddr;
4311
4312	/* We should treat it as a default route if prefix length is 0. */
4313	if (!prefixlen)
4314		cfg.fc_flags |= RTF_DEFAULT;
4315
4316	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
4317
4318	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
4319}
4320#endif
4321
4322struct fib6_info *rt6_get_dflt_router(struct net *net,
4323				     const struct in6_addr *addr,
4324				     struct net_device *dev)
4325{
4326	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
4327	struct fib6_info *rt;
4328	struct fib6_table *table;
4329
4330	table = fib6_get_table(net, tb_id);
4331	if (!table)
4332		return NULL;
4333
4334	rcu_read_lock();
4335	for_each_fib6_node_rt_rcu(&table->tb6_root) {
4336		struct fib6_nh *nh;
4337
4338		/* RA routes do not use nexthops */
4339		if (rt->nh)
4340			continue;
4341
4342		nh = rt->fib6_nh;
4343		if (dev == nh->fib_nh_dev &&
4344		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
4345		    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
4346			break;
4347	}
4348	if (rt && !fib6_info_hold_safe(rt))
4349		rt = NULL;
4350	rcu_read_unlock();
4351	return rt;
4352}
4353
4354struct fib6_info *rt6_add_dflt_router(struct net *net,
4355				     const struct in6_addr *gwaddr,
4356				     struct net_device *dev,
4357				     unsigned int pref,
4358				     u32 defrtr_usr_metric)
4359{
4360	struct fib6_config cfg = {
4361		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
4362		.fc_metric	= defrtr_usr_metric,
4363		.fc_ifindex	= dev->ifindex,
4364		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
4365				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
4366		.fc_protocol = RTPROT_RA,
4367		.fc_type = RTN_UNICAST,
4368		.fc_nlinfo.portid = 0,
4369		.fc_nlinfo.nlh = NULL,
4370		.fc_nlinfo.nl_net = net,
4371	};
4372
4373	cfg.fc_gateway = *gwaddr;
4374
4375	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
4376		struct fib6_table *table;
4377
4378		table = fib6_get_table(dev_net(dev), cfg.fc_table);
4379		if (table)
4380			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
4381	}
4382
4383	return rt6_get_dflt_router(net, gwaddr, dev);
4384}
4385
4386static void __rt6_purge_dflt_routers(struct net *net,
4387				     struct fib6_table *table)
4388{
4389	struct fib6_info *rt;
4390
4391restart:
4392	rcu_read_lock();
4393	for_each_fib6_node_rt_rcu(&table->tb6_root) {
4394		struct net_device *dev = fib6_info_nh_dev(rt);
4395		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
4396
4397		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
4398		    (!idev || idev->cnf.accept_ra != 2) &&
4399		    fib6_info_hold_safe(rt)) {
4400			rcu_read_unlock();
4401			ip6_del_rt(net, rt, false);
4402			goto restart;
4403		}
4404	}
4405	rcu_read_unlock();
4406
4407	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
4408}
4409
4410void rt6_purge_dflt_routers(struct net *net)
4411{
 
4412	struct fib6_table *table;
4413	struct hlist_head *head;
4414	unsigned int h;
4415
4416	rcu_read_lock();
 
 
 
4417
4418	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
4419		head = &net->ipv6.fib_table_hash[h];
4420		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
4421			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
4422				__rt6_purge_dflt_routers(net, table);
 
 
 
4423		}
4424	}
4425
4426	rcu_read_unlock();
4427}
4428
4429static void rtmsg_to_fib6_config(struct net *net,
4430				 struct in6_rtmsg *rtmsg,
4431				 struct fib6_config *cfg)
4432{
4433	*cfg = (struct fib6_config){
4434		.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
4435			 : RT6_TABLE_MAIN,
4436		.fc_ifindex = rtmsg->rtmsg_ifindex,
4437		.fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
4438		.fc_expires = rtmsg->rtmsg_info,
4439		.fc_dst_len = rtmsg->rtmsg_dst_len,
4440		.fc_src_len = rtmsg->rtmsg_src_len,
4441		.fc_flags = rtmsg->rtmsg_flags,
4442		.fc_type = rtmsg->rtmsg_type,
4443
4444		.fc_nlinfo.nl_net = net,
4445
4446		.fc_dst = rtmsg->rtmsg_dst,
4447		.fc_src = rtmsg->rtmsg_src,
4448		.fc_gateway = rtmsg->rtmsg_gateway,
4449	};
4450}
4451
4452int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
4453{
4454	struct fib6_config cfg;
 
4455	int err;
4456
4457	if (cmd != SIOCADDRT && cmd != SIOCDELRT)
4458		return -EINVAL;
4459	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
4460		return -EPERM;
 
 
 
 
 
 
 
4461
4462	rtmsg_to_fib6_config(net, rtmsg, &cfg);
 
 
 
 
 
 
 
 
 
 
 
4463
4464	rtnl_lock();
4465	switch (cmd) {
4466	case SIOCADDRT:
4467		err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
4468		break;
4469	case SIOCDELRT:
4470		err = ip6_route_del(&cfg, NULL);
4471		break;
4472	}
4473	rtnl_unlock();
4474	return err;
4475}
4476
4477/*
4478 *	Drop the packet on the floor
4479 */
4480
4481static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
4482{
 
4483	struct dst_entry *dst = skb_dst(skb);
4484	struct net *net = dev_net(dst->dev);
4485	struct inet6_dev *idev;
4486	SKB_DR(reason);
4487	int type;
4488
4489	if (netif_is_l3_master(skb->dev) ||
4490	    dst->dev == net->loopback_dev)
4491		idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
4492	else
4493		idev = ip6_dst_idev(dst);
4494
4495	switch (ipstats_mib_noroutes) {
4496	case IPSTATS_MIB_INNOROUTES:
4497		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
4498		if (type == IPV6_ADDR_ANY) {
4499			SKB_DR_SET(reason, IP_INADDRERRORS);
4500			IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
4501			break;
4502		}
4503		SKB_DR_SET(reason, IP_INNOROUTES);
4504		fallthrough;
4505	case IPSTATS_MIB_OUTNOROUTES:
4506		SKB_DR_OR(reason, IP_OUTNOROUTES);
4507		IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
4508		break;
4509	}
4510
4511	/* Start over by dropping the dst for l3mdev case */
4512	if (netif_is_l3_master(skb->dev))
4513		skb_dst_drop(skb);
4514
4515	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
4516	kfree_skb_reason(skb, reason);
4517	return 0;
4518}
4519
4520static int ip6_pkt_discard(struct sk_buff *skb)
4521{
4522	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
4523}
4524
4525static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4526{
4527	skb->dev = skb_dst(skb)->dev;
4528	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
4529}
4530
 
 
4531static int ip6_pkt_prohibit(struct sk_buff *skb)
4532{
4533	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
4534}
4535
4536static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4537{
4538	skb->dev = skb_dst(skb)->dev;
4539	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
4540}
4541
 
 
4542/*
4543 *	Allocate a dst for local (unicast / anycast) address.
4544 */
4545
4546struct fib6_info *addrconf_f6i_alloc(struct net *net,
4547				     struct inet6_dev *idev,
4548				     const struct in6_addr *addr,
4549				     bool anycast, gfp_t gfp_flags,
4550				     struct netlink_ext_ack *extack)
4551{
4552	struct fib6_config cfg = {
4553		.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
4554		.fc_ifindex = idev->dev->ifindex,
4555		.fc_flags = RTF_UP | RTF_NONEXTHOP,
4556		.fc_dst = *addr,
4557		.fc_dst_len = 128,
4558		.fc_protocol = RTPROT_KERNEL,
4559		.fc_nlinfo.nl_net = net,
4560		.fc_ignore_dev_down = true,
4561	};
4562	struct fib6_info *f6i;
4563
4564	if (anycast) {
4565		cfg.fc_type = RTN_ANYCAST;
4566		cfg.fc_flags |= RTF_ANYCAST;
4567	} else {
4568		cfg.fc_type = RTN_LOCAL;
4569		cfg.fc_flags |= RTF_LOCAL;
4570	}
4571
4572	f6i = ip6_route_info_create(&cfg, gfp_flags, extack);
4573	if (!IS_ERR(f6i)) {
4574		f6i->dst_nocount = true;
 
 
 
 
 
 
 
 
 
 
 
 
 
4575
4576		if (!anycast &&
4577		    (net->ipv6.devconf_all->disable_policy ||
4578		     idev->cnf.disable_policy))
4579			f6i->dst_nopolicy = true;
4580	}
 
4581
4582	return f6i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4583}
4584
4585/* remove deleted ip from prefsrc entries */
4586struct arg_dev_net_ip {
 
4587	struct net *net;
4588	struct in6_addr *addr;
4589};
4590
4591static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
4592{
 
4593	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
4594	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
4595
4596	if (!rt->nh &&
4597	    rt != net->ipv6.fib6_null_entry &&
4598	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr) &&
4599	    !ipv6_chk_addr(net, addr, rt->fib6_nh->fib_nh_dev, 0)) {
4600		spin_lock_bh(&rt6_exception_lock);
4601		/* remove prefsrc entry */
4602		rt->fib6_prefsrc.plen = 0;
4603		spin_unlock_bh(&rt6_exception_lock);
4604	}
4605	return 0;
4606}
4607
4608void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
4609{
4610	struct net *net = dev_net(ifp->idev->dev);
4611	struct arg_dev_net_ip adni = {
 
4612		.net = net,
4613		.addr = &ifp->addr,
4614	};
4615	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
4616}
4617
4618#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT)
 
 
 
4619
4620/* Remove routers and update dst entries when gateway turn into host. */
4621static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
4622{
4623	struct in6_addr *gateway = (struct in6_addr *)arg;
4624	struct fib6_nh *nh;
4625
4626	/* RA routes do not use nexthops */
4627	if (rt->nh)
4628		return 0;
4629
4630	nh = rt->fib6_nh;
4631	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
4632	    nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
4633		return -1;
4634
4635	/* Further clean up cached routes in exception table.
4636	 * This is needed because cached route may have a different
4637	 * gateway than its 'parent' in the case of an ip redirect.
4638	 */
4639	fib6_nh_exceptions_clean_tohost(nh, gateway);
4640
4641	return 0;
4642}
4643
4644void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
4645{
4646	fib6_clean_all(net, fib6_clean_tohost, gateway);
4647}
4648
4649struct arg_netdev_event {
4650	const struct net_device *dev;
4651	union {
4652		unsigned char nh_flags;
4653		unsigned long event;
4654	};
4655};
4656
4657static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
4658{
4659	struct fib6_info *iter;
4660	struct fib6_node *fn;
4661
4662	fn = rcu_dereference_protected(rt->fib6_node,
4663			lockdep_is_held(&rt->fib6_table->tb6_lock));
4664	iter = rcu_dereference_protected(fn->leaf,
4665			lockdep_is_held(&rt->fib6_table->tb6_lock));
4666	while (iter) {
4667		if (iter->fib6_metric == rt->fib6_metric &&
4668		    rt6_qualify_for_ecmp(iter))
4669			return iter;
4670		iter = rcu_dereference_protected(iter->fib6_next,
4671				lockdep_is_held(&rt->fib6_table->tb6_lock));
4672	}
4673
4674	return NULL;
4675}
4676
4677/* only called for fib entries with builtin fib6_nh */
4678static bool rt6_is_dead(const struct fib6_info *rt)
4679{
4680	if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
4681	    (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
4682	     ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
4683		return true;
4684
4685	return false;
4686}
4687
4688static int rt6_multipath_total_weight(const struct fib6_info *rt)
4689{
4690	struct fib6_info *iter;
4691	int total = 0;
4692
4693	if (!rt6_is_dead(rt))
4694		total += rt->fib6_nh->fib_nh_weight;
4695
4696	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
4697		if (!rt6_is_dead(iter))
4698			total += iter->fib6_nh->fib_nh_weight;
4699	}
4700
4701	return total;
4702}
4703
4704static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
4705{
4706	int upper_bound = -1;
4707
4708	if (!rt6_is_dead(rt)) {
4709		*weight += rt->fib6_nh->fib_nh_weight;
4710		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
4711						    total) - 1;
4712	}
4713	atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
4714}
4715
4716static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
4717{
4718	struct fib6_info *iter;
4719	int weight = 0;
4720
4721	rt6_upper_bound_set(rt, &weight, total);
4722
4723	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4724		rt6_upper_bound_set(iter, &weight, total);
4725}
4726
4727void rt6_multipath_rebalance(struct fib6_info *rt)
4728{
4729	struct fib6_info *first;
4730	int total;
4731
4732	/* In case the entire multipath route was marked for flushing,
4733	 * then there is no need to rebalance upon the removal of every
4734	 * sibling route.
4735	 */
4736	if (!rt->fib6_nsiblings || rt->should_flush)
4737		return;
4738
4739	/* During lookup routes are evaluated in order, so we need to
4740	 * make sure upper bounds are assigned from the first sibling
4741	 * onwards.
4742	 */
4743	first = rt6_multipath_first_sibling(rt);
4744	if (WARN_ON_ONCE(!first))
4745		return;
4746
4747	total = rt6_multipath_total_weight(first);
4748	rt6_multipath_upper_bound_set(first, total);
4749}
4750
4751static int fib6_ifup(struct fib6_info *rt, void *p_arg)
4752{
4753	const struct arg_netdev_event *arg = p_arg;
4754	struct net *net = dev_net(arg->dev);
4755
4756	if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
4757	    rt->fib6_nh->fib_nh_dev == arg->dev) {
4758		rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
4759		fib6_update_sernum_upto_root(net, rt);
4760		rt6_multipath_rebalance(rt);
4761	}
4762
4763	return 0;
4764}
4765
4766void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
4767{
4768	struct arg_netdev_event arg = {
4769		.dev = dev,
4770		{
4771			.nh_flags = nh_flags,
4772		},
4773	};
4774
4775	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4776		arg.nh_flags |= RTNH_F_LINKDOWN;
4777
4778	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4779}
4780
4781/* only called for fib entries with inline fib6_nh */
4782static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
4783				   const struct net_device *dev)
4784{
4785	struct fib6_info *iter;
4786
4787	if (rt->fib6_nh->fib_nh_dev == dev)
4788		return true;
4789	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4790		if (iter->fib6_nh->fib_nh_dev == dev)
4791			return true;
4792
4793	return false;
4794}
4795
4796static void rt6_multipath_flush(struct fib6_info *rt)
4797{
4798	struct fib6_info *iter;
4799
4800	rt->should_flush = 1;
4801	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4802		iter->should_flush = 1;
4803}
4804
4805static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
4806					     const struct net_device *down_dev)
4807{
4808	struct fib6_info *iter;
4809	unsigned int dead = 0;
4810
4811	if (rt->fib6_nh->fib_nh_dev == down_dev ||
4812	    rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4813		dead++;
4814	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4815		if (iter->fib6_nh->fib_nh_dev == down_dev ||
4816		    iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4817			dead++;
4818
4819	return dead;
4820}
4821
4822static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
4823				       const struct net_device *dev,
4824				       unsigned char nh_flags)
4825{
4826	struct fib6_info *iter;
4827
4828	if (rt->fib6_nh->fib_nh_dev == dev)
4829		rt->fib6_nh->fib_nh_flags |= nh_flags;
4830	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4831		if (iter->fib6_nh->fib_nh_dev == dev)
4832			iter->fib6_nh->fib_nh_flags |= nh_flags;
4833}
4834
4835/* called with write lock held for table with rt */
4836static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
4837{
4838	const struct arg_netdev_event *arg = p_arg;
4839	const struct net_device *dev = arg->dev;
4840	struct net *net = dev_net(dev);
4841
4842	if (rt == net->ipv6.fib6_null_entry || rt->nh)
4843		return 0;
4844
4845	switch (arg->event) {
4846	case NETDEV_UNREGISTER:
4847		return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4848	case NETDEV_DOWN:
4849		if (rt->should_flush)
4850			return -1;
4851		if (!rt->fib6_nsiblings)
4852			return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4853		if (rt6_multipath_uses_dev(rt, dev)) {
4854			unsigned int count;
4855
4856			count = rt6_multipath_dead_count(rt, dev);
4857			if (rt->fib6_nsiblings + 1 == count) {
4858				rt6_multipath_flush(rt);
4859				return -1;
4860			}
4861			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4862						   RTNH_F_LINKDOWN);
4863			fib6_update_sernum(net, rt);
4864			rt6_multipath_rebalance(rt);
4865		}
4866		return -2;
4867	case NETDEV_CHANGE:
4868		if (rt->fib6_nh->fib_nh_dev != dev ||
4869		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4870			break;
4871		rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
4872		rt6_multipath_rebalance(rt);
4873		break;
4874	}
4875
4876	return 0;
4877}
4878
4879void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4880{
4881	struct arg_netdev_event arg = {
4882		.dev = dev,
4883		{
4884			.event = event,
4885		},
4886	};
4887	struct net *net = dev_net(dev);
4888
4889	if (net->ipv6.sysctl.skip_notify_on_dev_down)
4890		fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4891	else
4892		fib6_clean_all(net, fib6_ifdown, &arg);
4893}
4894
4895void rt6_disable_ip(struct net_device *dev, unsigned long event)
4896{
4897	rt6_sync_down_dev(dev, event);
4898	rt6_uncached_list_flush_dev(dev);
4899	neigh_ifdown(&nd_tbl, dev);
4900}
4901
4902struct rt6_mtu_change_arg {
4903	struct net_device *dev;
4904	unsigned int mtu;
4905	struct fib6_info *f6i;
4906};
4907
4908static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
4909{
4910	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
4911	struct fib6_info *f6i = arg->f6i;
4912
4913	/* For administrative MTU increase, there is no way to discover
4914	 * IPv6 PMTU increase, so PMTU increase should be updated here.
4915	 * Since RFC 1981 doesn't include administrative MTU increase
4916	 * update PMTU increase is a MUST. (i.e. jumbo frame)
4917	 */
4918	if (nh->fib_nh_dev == arg->dev) {
4919		struct inet6_dev *idev = __in6_dev_get(arg->dev);
4920		u32 mtu = f6i->fib6_pmtu;
4921
4922		if (mtu >= arg->mtu ||
4923		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4924			fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
4925
4926		spin_lock_bh(&rt6_exception_lock);
4927		rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
4928		spin_unlock_bh(&rt6_exception_lock);
4929	}
4930
4931	return 0;
4932}
4933
4934static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
4935{
4936	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4937	struct inet6_dev *idev;
4938
4939	/* In IPv6 pmtu discovery is not optional,
4940	   so that RTAX_MTU lock cannot disable it.
4941	   We still use this lock to block changes
4942	   caused by addrconf/ndisc.
4943	*/
4944
4945	idev = __in6_dev_get(arg->dev);
4946	if (!idev)
4947		return 0;
4948
4949	if (fib6_metric_locked(f6i, RTAX_MTU))
4950		return 0;
4951
4952	arg->f6i = f6i;
4953	if (f6i->nh) {
4954		/* fib6_nh_mtu_change only returns 0, so this is safe */
4955		return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
4956						arg);
 
 
 
 
 
 
 
 
 
 
 
 
4957	}
4958
4959	return fib6_nh_mtu_change(f6i->fib6_nh, arg);
4960}
4961
4962void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4963{
4964	struct rt6_mtu_change_arg arg = {
4965		.dev = dev,
4966		.mtu = mtu,
4967	};
4968
4969	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4970}
4971
4972static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4973	[RTA_UNSPEC]		= { .strict_start_type = RTA_DPORT + 1 },
4974	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4975	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
4976	[RTA_OIF]               = { .type = NLA_U32 },
4977	[RTA_IIF]		= { .type = NLA_U32 },
4978	[RTA_PRIORITY]          = { .type = NLA_U32 },
4979	[RTA_METRICS]           = { .type = NLA_NESTED },
4980	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4981	[RTA_PREF]              = { .type = NLA_U8 },
4982	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
4983	[RTA_ENCAP]		= { .type = NLA_NESTED },
4984	[RTA_EXPIRES]		= { .type = NLA_U32 },
4985	[RTA_UID]		= { .type = NLA_U32 },
4986	[RTA_MARK]		= { .type = NLA_U32 },
4987	[RTA_TABLE]		= { .type = NLA_U32 },
4988	[RTA_IP_PROTO]		= { .type = NLA_U8 },
4989	[RTA_SPORT]		= { .type = NLA_U16 },
4990	[RTA_DPORT]		= { .type = NLA_U16 },
4991	[RTA_NH_ID]		= { .type = NLA_U32 },
4992};
4993
4994static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4995			      struct fib6_config *cfg,
4996			      struct netlink_ext_ack *extack)
4997{
4998	struct rtmsg *rtm;
4999	struct nlattr *tb[RTA_MAX+1];
5000	unsigned int pref;
5001	int err;
5002
5003	err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
5004				     rtm_ipv6_policy, extack);
5005	if (err < 0)
5006		goto errout;
5007
5008	err = -EINVAL;
5009	rtm = nlmsg_data(nlh);
 
5010
5011	if (rtm->rtm_tos) {
5012		NL_SET_ERR_MSG(extack,
5013			       "Invalid dsfield (tos): option not available for IPv6");
5014		goto errout;
5015	}
5016
5017	*cfg = (struct fib6_config){
5018		.fc_table = rtm->rtm_table,
5019		.fc_dst_len = rtm->rtm_dst_len,
5020		.fc_src_len = rtm->rtm_src_len,
5021		.fc_flags = RTF_UP,
5022		.fc_protocol = rtm->rtm_protocol,
5023		.fc_type = rtm->rtm_type,
5024
5025		.fc_nlinfo.portid = NETLINK_CB(skb).portid,
5026		.fc_nlinfo.nlh = nlh,
5027		.fc_nlinfo.nl_net = sock_net(skb->sk),
5028	};
5029
5030	if (rtm->rtm_type == RTN_UNREACHABLE ||
5031	    rtm->rtm_type == RTN_BLACKHOLE ||
5032	    rtm->rtm_type == RTN_PROHIBIT ||
5033	    rtm->rtm_type == RTN_THROW)
5034		cfg->fc_flags |= RTF_REJECT;
5035
5036	if (rtm->rtm_type == RTN_LOCAL)
5037		cfg->fc_flags |= RTF_LOCAL;
5038
5039	if (rtm->rtm_flags & RTM_F_CLONED)
5040		cfg->fc_flags |= RTF_CACHE;
5041
5042	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
5043
5044	if (tb[RTA_NH_ID]) {
5045		if (tb[RTA_GATEWAY]   || tb[RTA_OIF] ||
5046		    tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
5047			NL_SET_ERR_MSG(extack,
5048				       "Nexthop specification and nexthop id are mutually exclusive");
5049			goto errout;
5050		}
5051		cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
5052	}
5053
5054	if (tb[RTA_GATEWAY]) {
5055		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
5056		cfg->fc_flags |= RTF_GATEWAY;
5057	}
5058	if (tb[RTA_VIA]) {
5059		NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
5060		goto errout;
5061	}
5062
5063	if (tb[RTA_DST]) {
5064		int plen = (rtm->rtm_dst_len + 7) >> 3;
5065
5066		if (nla_len(tb[RTA_DST]) < plen)
5067			goto errout;
5068
5069		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
5070	}
5071
5072	if (tb[RTA_SRC]) {
5073		int plen = (rtm->rtm_src_len + 7) >> 3;
5074
5075		if (nla_len(tb[RTA_SRC]) < plen)
5076			goto errout;
5077
5078		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
5079	}
5080
5081	if (tb[RTA_PREFSRC])
5082		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
5083
5084	if (tb[RTA_OIF])
5085		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
5086
5087	if (tb[RTA_PRIORITY])
5088		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
5089
5090	if (tb[RTA_METRICS]) {
5091		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
5092		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
5093	}
5094
5095	if (tb[RTA_TABLE])
5096		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
5097
5098	if (tb[RTA_MULTIPATH]) {
5099		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
5100		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
5101
5102		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
5103						     cfg->fc_mp_len, extack);
5104		if (err < 0)
5105			goto errout;
5106	}
5107
5108	if (tb[RTA_PREF]) {
5109		pref = nla_get_u8(tb[RTA_PREF]);
5110		if (pref != ICMPV6_ROUTER_PREF_LOW &&
5111		    pref != ICMPV6_ROUTER_PREF_HIGH)
5112			pref = ICMPV6_ROUTER_PREF_MEDIUM;
5113		cfg->fc_flags |= RTF_PREF(pref);
5114	}
5115
5116	if (tb[RTA_ENCAP])
5117		cfg->fc_encap = tb[RTA_ENCAP];
5118
5119	if (tb[RTA_ENCAP_TYPE]) {
5120		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
5121
5122		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
5123		if (err < 0)
5124			goto errout;
5125	}
5126
5127	if (tb[RTA_EXPIRES]) {
5128		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
5129
5130		if (addrconf_finite_timeout(timeout)) {
5131			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
5132			cfg->fc_flags |= RTF_EXPIRES;
5133		}
5134	}
5135
5136	err = 0;
5137errout:
5138	return err;
5139}
5140
5141struct rt6_nh {
5142	struct fib6_info *fib6_info;
5143	struct fib6_config r_cfg;
5144	struct list_head next;
5145};
5146
5147static int ip6_route_info_append(struct net *net,
5148				 struct list_head *rt6_nh_list,
5149				 struct fib6_info *rt,
5150				 struct fib6_config *r_cfg)
5151{
5152	struct rt6_nh *nh;
5153	int err = -EEXIST;
5154
5155	list_for_each_entry(nh, rt6_nh_list, next) {
5156		/* check if fib6_info already exists */
5157		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
5158			return err;
5159	}
5160
5161	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
5162	if (!nh)
5163		return -ENOMEM;
5164	nh->fib6_info = rt;
5165	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
5166	list_add_tail(&nh->next, rt6_nh_list);
5167
5168	return 0;
5169}
5170
5171static void ip6_route_mpath_notify(struct fib6_info *rt,
5172				   struct fib6_info *rt_last,
5173				   struct nl_info *info,
5174				   __u16 nlflags)
5175{
5176	/* if this is an APPEND route, then rt points to the first route
5177	 * inserted and rt_last points to last route inserted. Userspace
5178	 * wants a consistent dump of the route which starts at the first
5179	 * nexthop. Since sibling routes are always added at the end of
5180	 * the list, find the first sibling of the last route appended
5181	 */
5182	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
5183		rt = list_first_entry(&rt_last->fib6_siblings,
5184				      struct fib6_info,
5185				      fib6_siblings);
5186	}
5187
5188	if (rt)
5189		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
5190}
5191
5192static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
5193{
5194	bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
5195	bool should_notify = false;
5196	struct fib6_info *leaf;
5197	struct fib6_node *fn;
5198
5199	rcu_read_lock();
5200	fn = rcu_dereference(rt->fib6_node);
5201	if (!fn)
5202		goto out;
5203
5204	leaf = rcu_dereference(fn->leaf);
5205	if (!leaf)
5206		goto out;
5207
5208	if (rt == leaf ||
5209	    (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric &&
5210	     rt6_qualify_for_ecmp(leaf)))
5211		should_notify = true;
5212out:
5213	rcu_read_unlock();
5214
5215	return should_notify;
5216}
5217
5218static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla,
5219			     struct netlink_ext_ack *extack)
5220{
5221	if (nla_len(nla) < sizeof(*gw)) {
5222		NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY");
5223		return -EINVAL;
5224	}
5225
5226	*gw = nla_get_in6_addr(nla);
5227
5228	return 0;
5229}
5230
5231static int ip6_route_multipath_add(struct fib6_config *cfg,
5232				   struct netlink_ext_ack *extack)
5233{
5234	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
5235	struct nl_info *info = &cfg->fc_nlinfo;
5236	struct fib6_config r_cfg;
5237	struct rtnexthop *rtnh;
5238	struct fib6_info *rt;
5239	struct rt6_nh *err_nh;
5240	struct rt6_nh *nh, *nh_safe;
5241	__u16 nlflags;
5242	int remaining;
5243	int attrlen;
5244	int err = 1;
5245	int nhn = 0;
5246	int replace = (cfg->fc_nlinfo.nlh &&
5247		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
5248	LIST_HEAD(rt6_nh_list);
5249
5250	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
5251	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
5252		nlflags |= NLM_F_APPEND;
5253
5254	remaining = cfg->fc_mp_len;
5255	rtnh = (struct rtnexthop *)cfg->fc_mp;
5256
5257	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
5258	 * fib6_info structs per nexthop
5259	 */
5260	while (rtnh_ok(rtnh, remaining)) {
5261		memcpy(&r_cfg, cfg, sizeof(*cfg));
5262		if (rtnh->rtnh_ifindex)
5263			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5264
5265		attrlen = rtnh_attrlen(rtnh);
5266		if (attrlen > 0) {
5267			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5268
5269			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5270			if (nla) {
5271				err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
5272							extack);
5273				if (err)
5274					goto cleanup;
5275
5276				r_cfg.fc_flags |= RTF_GATEWAY;
5277			}
5278			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
5279
5280			/* RTA_ENCAP_TYPE length checked in
5281			 * lwtunnel_valid_encap_type_attr
5282			 */
5283			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
5284			if (nla)
5285				r_cfg.fc_encap_type = nla_get_u16(nla);
5286		}
5287
5288		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
5289		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
5290		if (IS_ERR(rt)) {
5291			err = PTR_ERR(rt);
5292			rt = NULL;
5293			goto cleanup;
5294		}
5295		if (!rt6_qualify_for_ecmp(rt)) {
5296			err = -EINVAL;
5297			NL_SET_ERR_MSG(extack,
5298				       "Device only routes can not be added for IPv6 using the multipath API.");
5299			fib6_info_release(rt);
5300			goto cleanup;
5301		}
5302
5303		rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
5304
5305		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
5306					    rt, &r_cfg);
5307		if (err) {
5308			fib6_info_release(rt);
5309			goto cleanup;
5310		}
5311
5312		rtnh = rtnh_next(rtnh, &remaining);
5313	}
5314
5315	if (list_empty(&rt6_nh_list)) {
5316		NL_SET_ERR_MSG(extack,
5317			       "Invalid nexthop configuration - no valid nexthops");
5318		return -EINVAL;
5319	}
5320
5321	/* for add and replace send one notification with all nexthops.
5322	 * Skip the notification in fib6_add_rt2node and send one with
5323	 * the full route when done
5324	 */
5325	info->skip_notify = 1;
5326
5327	/* For add and replace, send one notification with all nexthops. For
5328	 * append, send one notification with all appended nexthops.
5329	 */
5330	info->skip_notify_kernel = 1;
5331
5332	err_nh = NULL;
5333	list_for_each_entry(nh, &rt6_nh_list, next) {
5334		err = __ip6_ins_rt(nh->fib6_info, info, extack);
5335
5336		if (err) {
5337			if (replace && nhn)
5338				NL_SET_ERR_MSG_MOD(extack,
5339						   "multipath route replace failed (check consistency of installed routes)");
5340			err_nh = nh;
5341			goto add_errout;
5342		}
5343		/* save reference to last route successfully inserted */
5344		rt_last = nh->fib6_info;
5345
5346		/* save reference to first route for notification */
5347		if (!rt_notif)
5348			rt_notif = nh->fib6_info;
5349
5350		/* Because each route is added like a single route we remove
5351		 * these flags after the first nexthop: if there is a collision,
5352		 * we have already failed to add the first nexthop:
5353		 * fib6_add_rt2node() has rejected it; when replacing, old
5354		 * nexthops have been replaced by first new, the rest should
5355		 * be added to it.
5356		 */
5357		if (cfg->fc_nlinfo.nlh) {
5358			cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
5359							     NLM_F_REPLACE);
5360			cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
5361		}
5362		nhn++;
5363	}
5364
5365	/* An in-kernel notification should only be sent in case the new
5366	 * multipath route is added as the first route in the node, or if
5367	 * it was appended to it. We pass 'rt_notif' since it is the first
5368	 * sibling and might allow us to skip some checks in the replace case.
5369	 */
5370	if (ip6_route_mpath_should_notify(rt_notif)) {
5371		enum fib_event_type fib_event;
5372
5373		if (rt_notif->fib6_nsiblings != nhn - 1)
5374			fib_event = FIB_EVENT_ENTRY_APPEND;
5375		else
5376			fib_event = FIB_EVENT_ENTRY_REPLACE;
5377
5378		err = call_fib6_multipath_entry_notifiers(info->nl_net,
5379							  fib_event, rt_notif,
5380							  nhn - 1, extack);
5381		if (err) {
5382			/* Delete all the siblings that were just added */
5383			err_nh = NULL;
5384			goto add_errout;
5385		}
5386	}
5387
5388	/* success ... tell user about new route */
5389	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5390	goto cleanup;
5391
5392add_errout:
5393	/* send notification for routes that were added so that
5394	 * the delete notifications sent by ip6_route_del are
5395	 * coherent
5396	 */
5397	if (rt_notif)
5398		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5399
5400	/* Delete routes that were already added */
5401	list_for_each_entry(nh, &rt6_nh_list, next) {
5402		if (err_nh == nh)
5403			break;
5404		ip6_route_del(&nh->r_cfg, extack);
5405	}
5406
5407cleanup:
5408	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
5409		fib6_info_release(nh->fib6_info);
5410		list_del(&nh->next);
5411		kfree(nh);
5412	}
5413
5414	return err;
5415}
5416
5417static int ip6_route_multipath_del(struct fib6_config *cfg,
5418				   struct netlink_ext_ack *extack)
5419{
5420	struct fib6_config r_cfg;
5421	struct rtnexthop *rtnh;
5422	int last_err = 0;
5423	int remaining;
5424	int attrlen;
5425	int err;
5426
5427	remaining = cfg->fc_mp_len;
5428	rtnh = (struct rtnexthop *)cfg->fc_mp;
5429
5430	/* Parse a Multipath Entry */
5431	while (rtnh_ok(rtnh, remaining)) {
5432		memcpy(&r_cfg, cfg, sizeof(*cfg));
5433		if (rtnh->rtnh_ifindex)
5434			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5435
5436		attrlen = rtnh_attrlen(rtnh);
5437		if (attrlen > 0) {
5438			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5439
5440			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5441			if (nla) {
5442				err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
5443							extack);
5444				if (err) {
5445					last_err = err;
5446					goto next_rtnh;
5447				}
5448
5449				r_cfg.fc_flags |= RTF_GATEWAY;
5450			}
5451		}
5452		err = ip6_route_del(&r_cfg, extack);
5453		if (err)
5454			last_err = err;
5455
5456next_rtnh:
5457		rtnh = rtnh_next(rtnh, &remaining);
5458	}
5459
5460	return last_err;
5461}
5462
5463static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5464			      struct netlink_ext_ack *extack)
5465{
5466	struct fib6_config cfg;
5467	int err;
5468
5469	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5470	if (err < 0)
5471		return err;
5472
5473	if (cfg.fc_nh_id &&
5474	    !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
5475		NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
5476		return -EINVAL;
5477	}
5478
5479	if (cfg.fc_mp)
5480		return ip6_route_multipath_del(&cfg, extack);
5481	else {
5482		cfg.fc_delete_all_nh = 1;
5483		return ip6_route_del(&cfg, extack);
5484	}
5485}
5486
5487static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5488			      struct netlink_ext_ack *extack)
5489{
5490	struct fib6_config cfg;
5491	int err;
5492
5493	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5494	if (err < 0)
5495		return err;
5496
5497	if (cfg.fc_metric == 0)
5498		cfg.fc_metric = IP6_RT_PRIO_USER;
5499
5500	if (cfg.fc_mp)
5501		return ip6_route_multipath_add(&cfg, extack);
5502	else
5503		return ip6_route_add(&cfg, GFP_KERNEL, extack);
5504}
5505
5506/* add the overhead of this fib6_nh to nexthop_len */
5507static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
5508{
5509	int *nexthop_len = arg;
5510
5511	*nexthop_len += nla_total_size(0)	 /* RTA_MULTIPATH */
5512		     + NLA_ALIGN(sizeof(struct rtnexthop))
5513		     + nla_total_size(16); /* RTA_GATEWAY */
5514
5515	if (nh->fib_nh_lws) {
5516		/* RTA_ENCAP_TYPE */
5517		*nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5518		/* RTA_ENCAP */
5519		*nexthop_len += nla_total_size(2);
5520	}
5521
5522	return 0;
5523}
5524
5525static size_t rt6_nlmsg_size(struct fib6_info *f6i)
5526{
5527	int nexthop_len;
5528
5529	if (f6i->nh) {
5530		nexthop_len = nla_total_size(4); /* RTA_NH_ID */
5531		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
5532					 &nexthop_len);
5533	} else {
5534		struct fib6_info *sibling, *next_sibling;
5535		struct fib6_nh *nh = f6i->fib6_nh;
5536
5537		nexthop_len = 0;
5538		if (f6i->fib6_nsiblings) {
5539			rt6_nh_nlmsg_size(nh, &nexthop_len);
5540
5541			list_for_each_entry_safe(sibling, next_sibling,
5542						 &f6i->fib6_siblings, fib6_siblings) {
5543				rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
5544			}
5545		}
5546		nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5547	}
5548
5549	return NLMSG_ALIGN(sizeof(struct rtmsg))
5550	       + nla_total_size(16) /* RTA_SRC */
5551	       + nla_total_size(16) /* RTA_DST */
5552	       + nla_total_size(16) /* RTA_GATEWAY */
5553	       + nla_total_size(16) /* RTA_PREFSRC */
5554	       + nla_total_size(4) /* RTA_TABLE */
5555	       + nla_total_size(4) /* RTA_IIF */
5556	       + nla_total_size(4) /* RTA_OIF */
5557	       + nla_total_size(4) /* RTA_PRIORITY */
5558	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
5559	       + nla_total_size(sizeof(struct rta_cacheinfo))
5560	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
5561	       + nla_total_size(1) /* RTA_PREF */
5562	       + nexthop_len;
5563}
5564
5565static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
5566				 unsigned char *flags)
 
 
 
5567{
5568	if (nexthop_is_multipath(nh)) {
5569		struct nlattr *mp;
 
 
 
5570
5571		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5572		if (!mp)
5573			goto nla_put_failure;
5574
5575		if (nexthop_mpath_fill_node(skb, nh, AF_INET6))
5576			goto nla_put_failure;
5577
5578		nla_nest_end(skb, mp);
5579	} else {
5580		struct fib6_nh *fib6_nh;
5581
5582		fib6_nh = nexthop_fib6_nh(nh);
5583		if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6,
5584				     flags, false) < 0)
5585			goto nla_put_failure;
5586	}
5587
5588	return 0;
5589
5590nla_put_failure:
5591	return -EMSGSIZE;
5592}
5593
5594static int rt6_fill_node(struct net *net, struct sk_buff *skb,
5595			 struct fib6_info *rt, struct dst_entry *dst,
5596			 struct in6_addr *dest, struct in6_addr *src,
5597			 int iif, int type, u32 portid, u32 seq,
5598			 unsigned int flags)
5599{
5600	struct rt6_info *rt6 = (struct rt6_info *)dst;
5601	struct rt6key *rt6_dst, *rt6_src;
5602	u32 *pmetrics, table, rt6_flags;
5603	unsigned char nh_flags = 0;
5604	struct nlmsghdr *nlh;
5605	struct rtmsg *rtm;
5606	long expires = 0;
5607
5608	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
5609	if (!nlh)
5610		return -EMSGSIZE;
5611
5612	if (rt6) {
5613		rt6_dst = &rt6->rt6i_dst;
5614		rt6_src = &rt6->rt6i_src;
5615		rt6_flags = rt6->rt6i_flags;
5616	} else {
5617		rt6_dst = &rt->fib6_dst;
5618		rt6_src = &rt->fib6_src;
5619		rt6_flags = rt->fib6_flags;
5620	}
5621
5622	rtm = nlmsg_data(nlh);
5623	rtm->rtm_family = AF_INET6;
5624	rtm->rtm_dst_len = rt6_dst->plen;
5625	rtm->rtm_src_len = rt6_src->plen;
5626	rtm->rtm_tos = 0;
5627	if (rt->fib6_table)
5628		table = rt->fib6_table->tb6_id;
5629	else
5630		table = RT6_TABLE_UNSPEC;
5631	rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
5632	if (nla_put_u32(skb, RTA_TABLE, table))
5633		goto nla_put_failure;
5634
5635	rtm->rtm_type = rt->fib6_type;
 
 
 
 
 
5636	rtm->rtm_flags = 0;
5637	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
5638	rtm->rtm_protocol = rt->fib6_protocol;
 
 
 
 
 
 
5639
5640	if (rt6_flags & RTF_CACHE)
5641		rtm->rtm_flags |= RTM_F_CLONED;
5642
5643	if (dest) {
5644		if (nla_put_in6_addr(skb, RTA_DST, dest))
5645			goto nla_put_failure;
5646		rtm->rtm_dst_len = 128;
5647	} else if (rtm->rtm_dst_len)
5648		if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
5649			goto nla_put_failure;
5650#ifdef CONFIG_IPV6_SUBTREES
5651	if (src) {
5652		if (nla_put_in6_addr(skb, RTA_SRC, src))
5653			goto nla_put_failure;
5654		rtm->rtm_src_len = 128;
5655	} else if (rtm->rtm_src_len &&
5656		   nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
5657		goto nla_put_failure;
5658#endif
5659	if (iif) {
5660#ifdef CONFIG_IPV6_MROUTE
5661		if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
5662			int err = ip6mr_get_route(net, skb, rtm, portid);
5663
5664			if (err == 0)
5665				return 0;
5666			if (err < 0)
5667				goto nla_put_failure;
 
 
 
 
 
5668		} else
5669#endif
5670			if (nla_put_u32(skb, RTA_IIF, iif))
5671				goto nla_put_failure;
5672	} else if (dest) {
5673		struct in6_addr saddr_buf;
5674		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
5675		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5676			goto nla_put_failure;
5677	}
5678
5679	if (rt->fib6_prefsrc.plen) {
5680		struct in6_addr saddr_buf;
5681		saddr_buf = rt->fib6_prefsrc.addr;
5682		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5683			goto nla_put_failure;
5684	}
5685
5686	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
5687	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
5688		goto nla_put_failure;
5689
5690	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
5691		goto nla_put_failure;
5692
5693	/* For multipath routes, walk the siblings list and add
5694	 * each as a nexthop within RTA_MULTIPATH.
5695	 */
5696	if (rt6) {
5697		if (rt6_flags & RTF_GATEWAY &&
5698		    nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
5699			goto nla_put_failure;
5700
5701		if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
5702			goto nla_put_failure;
5703
5704		if (dst->lwtstate &&
5705		    lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
5706			goto nla_put_failure;
5707	} else if (rt->fib6_nsiblings) {
5708		struct fib6_info *sibling, *next_sibling;
5709		struct nlattr *mp;
5710
5711		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5712		if (!mp)
5713			goto nla_put_failure;
5714
5715		if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
5716				    rt->fib6_nh->fib_nh_weight, AF_INET6,
5717				    0) < 0)
5718			goto nla_put_failure;
5719
5720		list_for_each_entry_safe(sibling, next_sibling,
5721					 &rt->fib6_siblings, fib6_siblings) {
5722			if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
5723					    sibling->fib6_nh->fib_nh_weight,
5724					    AF_INET6, 0) < 0)
5725				goto nla_put_failure;
5726		}
5727
5728		nla_nest_end(skb, mp);
5729	} else if (rt->nh) {
5730		if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
5731			goto nla_put_failure;
5732
5733		if (nexthop_is_blackhole(rt->nh))
5734			rtm->rtm_type = RTN_BLACKHOLE;
5735
5736		if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&
5737		    rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
5738			goto nla_put_failure;
5739
5740		rtm->rtm_flags |= nh_flags;
5741	} else {
5742		if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6,
5743				     &nh_flags, false) < 0)
5744			goto nla_put_failure;
5745
5746		rtm->rtm_flags |= nh_flags;
5747	}
5748
5749	if (rt6_flags & RTF_EXPIRES) {
5750		expires = dst ? dst->expires : rt->expires;
5751		expires -= jiffies;
5752	}
5753
5754	if (!dst) {
5755		if (READ_ONCE(rt->offload))
5756			rtm->rtm_flags |= RTM_F_OFFLOAD;
5757		if (READ_ONCE(rt->trap))
5758			rtm->rtm_flags |= RTM_F_TRAP;
5759		if (READ_ONCE(rt->offload_failed))
5760			rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
5761	}
5762
5763	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
5764		goto nla_put_failure;
5765
5766	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
5767		goto nla_put_failure;
5768
5769
5770	nlmsg_end(skb, nlh);
5771	return 0;
5772
5773nla_put_failure:
5774	nlmsg_cancel(skb, nlh);
5775	return -EMSGSIZE;
5776}
5777
5778static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
5779{
5780	const struct net_device *dev = arg;
5781
5782	if (nh->fib_nh_dev == dev)
5783		return 1;
5784
5785	return 0;
5786}
5787
5788static bool fib6_info_uses_dev(const struct fib6_info *f6i,
5789			       const struct net_device *dev)
5790{
5791	if (f6i->nh) {
5792		struct net_device *_dev = (struct net_device *)dev;
5793
5794		return !!nexthop_for_each_fib6_nh(f6i->nh,
5795						  fib6_info_nh_uses_dev,
5796						  _dev);
5797	}
5798
5799	if (f6i->fib6_nh->fib_nh_dev == dev)
5800		return true;
5801
5802	if (f6i->fib6_nsiblings) {
5803		struct fib6_info *sibling, *next_sibling;
5804
5805		list_for_each_entry_safe(sibling, next_sibling,
5806					 &f6i->fib6_siblings, fib6_siblings) {
5807			if (sibling->fib6_nh->fib_nh_dev == dev)
5808				return true;
5809		}
5810	}
5811
5812	return false;
5813}
5814
5815struct fib6_nh_exception_dump_walker {
5816	struct rt6_rtnl_dump_arg *dump;
5817	struct fib6_info *rt;
5818	unsigned int flags;
5819	unsigned int skip;
5820	unsigned int count;
5821};
5822
5823static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
5824{
5825	struct fib6_nh_exception_dump_walker *w = arg;
5826	struct rt6_rtnl_dump_arg *dump = w->dump;
5827	struct rt6_exception_bucket *bucket;
5828	struct rt6_exception *rt6_ex;
5829	int i, err;
5830
5831	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
5832	if (!bucket)
5833		return 0;
5834
5835	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
5836		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
5837			if (w->skip) {
5838				w->skip--;
5839				continue;
5840			}
5841
5842			/* Expiration of entries doesn't bump sernum, insertion
5843			 * does. Removal is triggered by insertion, so we can
5844			 * rely on the fact that if entries change between two
5845			 * partial dumps, this node is scanned again completely,
5846			 * see rt6_insert_exception() and fib6_dump_table().
5847			 *
5848			 * Count expired entries we go through as handled
5849			 * entries that we'll skip next time, in case of partial
5850			 * node dump. Otherwise, if entries expire meanwhile,
5851			 * we'll skip the wrong amount.
5852			 */
5853			if (rt6_check_expired(rt6_ex->rt6i)) {
5854				w->count++;
5855				continue;
5856			}
5857
5858			err = rt6_fill_node(dump->net, dump->skb, w->rt,
5859					    &rt6_ex->rt6i->dst, NULL, NULL, 0,
5860					    RTM_NEWROUTE,
5861					    NETLINK_CB(dump->cb->skb).portid,
5862					    dump->cb->nlh->nlmsg_seq, w->flags);
5863			if (err)
5864				return err;
5865
5866			w->count++;
5867		}
5868		bucket++;
5869	}
5870
5871	return 0;
5872}
5873
5874/* Return -1 if done with node, number of handled routes on partial dump */
5875int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
5876{
5877	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
5878	struct fib_dump_filter *filter = &arg->filter;
5879	unsigned int flags = NLM_F_MULTI;
5880	struct net *net = arg->net;
5881	int count = 0;
5882
5883	if (rt == net->ipv6.fib6_null_entry)
5884		return -1;
5885
5886	if ((filter->flags & RTM_F_PREFIX) &&
5887	    !(rt->fib6_flags & RTF_PREFIX_RT)) {
5888		/* success since this is not a prefix route */
5889		return -1;
5890	}
5891	if (filter->filter_set &&
5892	    ((filter->rt_type  && rt->fib6_type != filter->rt_type) ||
5893	     (filter->dev      && !fib6_info_uses_dev(rt, filter->dev)) ||
5894	     (filter->protocol && rt->fib6_protocol != filter->protocol))) {
5895		return -1;
5896	}
5897
5898	if (filter->filter_set ||
5899	    !filter->dump_routes || !filter->dump_exceptions) {
5900		flags |= NLM_F_DUMP_FILTERED;
5901	}
5902
5903	if (filter->dump_routes) {
5904		if (skip) {
5905			skip--;
5906		} else {
5907			if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
5908					  0, RTM_NEWROUTE,
5909					  NETLINK_CB(arg->cb->skb).portid,
5910					  arg->cb->nlh->nlmsg_seq, flags)) {
5911				return 0;
5912			}
5913			count++;
5914		}
5915	}
5916
5917	if (filter->dump_exceptions) {
5918		struct fib6_nh_exception_dump_walker w = { .dump = arg,
5919							   .rt = rt,
5920							   .flags = flags,
5921							   .skip = skip,
5922							   .count = 0 };
5923		int err;
5924
5925		rcu_read_lock();
5926		if (rt->nh) {
5927			err = nexthop_for_each_fib6_nh(rt->nh,
5928						       rt6_nh_dump_exceptions,
5929						       &w);
5930		} else {
5931			err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
5932		}
5933		rcu_read_unlock();
5934
5935		if (err)
5936			return count + w.count;
5937	}
5938
5939	return -1;
5940}
5941
5942static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
5943					const struct nlmsghdr *nlh,
5944					struct nlattr **tb,
5945					struct netlink_ext_ack *extack)
5946{
5947	struct rtmsg *rtm;
5948	int i, err;
5949
5950	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
5951		NL_SET_ERR_MSG_MOD(extack,
5952				   "Invalid header for get route request");
5953		return -EINVAL;
5954	}
5955
5956	if (!netlink_strict_get_check(skb))
5957		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
5958					      rtm_ipv6_policy, extack);
5959
5960	rtm = nlmsg_data(nlh);
5961	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
5962	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
5963	    rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
5964	    rtm->rtm_type) {
5965		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
5966		return -EINVAL;
5967	}
5968	if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
5969		NL_SET_ERR_MSG_MOD(extack,
5970				   "Invalid flags for get route request");
5971		return -EINVAL;
5972	}
5973
5974	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
5975					    rtm_ipv6_policy, extack);
5976	if (err)
5977		return err;
5978
5979	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
5980	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
5981		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
5982		return -EINVAL;
5983	}
5984
5985	for (i = 0; i <= RTA_MAX; i++) {
5986		if (!tb[i])
5987			continue;
5988
5989		switch (i) {
5990		case RTA_SRC:
5991		case RTA_DST:
5992		case RTA_IIF:
5993		case RTA_OIF:
5994		case RTA_MARK:
5995		case RTA_UID:
5996		case RTA_SPORT:
5997		case RTA_DPORT:
5998		case RTA_IP_PROTO:
5999			break;
6000		default:
6001			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
6002			return -EINVAL;
6003		}
6004	}
6005
6006	return 0;
6007}
6008
6009static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
6010			      struct netlink_ext_ack *extack)
6011{
6012	struct net *net = sock_net(in_skb->sk);
6013	struct nlattr *tb[RTA_MAX+1];
6014	int err, iif = 0, oif = 0;
6015	struct fib6_info *from;
6016	struct dst_entry *dst;
6017	struct rt6_info *rt;
6018	struct sk_buff *skb;
6019	struct rtmsg *rtm;
6020	struct flowi6 fl6 = {};
6021	bool fibmatch;
6022
6023	err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
6024	if (err < 0)
6025		goto errout;
6026
6027	err = -EINVAL;
6028	rtm = nlmsg_data(nlh);
6029	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
6030	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
6031
6032	if (tb[RTA_SRC]) {
6033		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
6034			goto errout;
6035
6036		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
6037	}
6038
6039	if (tb[RTA_DST]) {
6040		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
6041			goto errout;
6042
6043		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
6044	}
6045
6046	if (tb[RTA_IIF])
6047		iif = nla_get_u32(tb[RTA_IIF]);
6048
6049	if (tb[RTA_OIF])
6050		oif = nla_get_u32(tb[RTA_OIF]);
6051
6052	if (tb[RTA_MARK])
6053		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
6054
6055	if (tb[RTA_UID])
6056		fl6.flowi6_uid = make_kuid(current_user_ns(),
6057					   nla_get_u32(tb[RTA_UID]));
6058	else
6059		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
6060
6061	if (tb[RTA_SPORT])
6062		fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
6063
6064	if (tb[RTA_DPORT])
6065		fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
6066
6067	if (tb[RTA_IP_PROTO]) {
6068		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
6069						  &fl6.flowi6_proto, AF_INET6,
6070						  extack);
6071		if (err)
6072			goto errout;
6073	}
6074
6075	if (iif) {
6076		struct net_device *dev;
6077		int flags = 0;
6078
6079		rcu_read_lock();
6080
6081		dev = dev_get_by_index_rcu(net, iif);
6082		if (!dev) {
6083			rcu_read_unlock();
6084			err = -ENODEV;
6085			goto errout;
6086		}
6087
6088		fl6.flowi6_iif = iif;
6089
6090		if (!ipv6_addr_any(&fl6.saddr))
6091			flags |= RT6_LOOKUP_F_HAS_SADDR;
6092
6093		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
6094
6095		rcu_read_unlock();
6096	} else {
6097		fl6.flowi6_oif = oif;
6098
6099		dst = ip6_route_output(net, NULL, &fl6);
6100	}
6101
6102
6103	rt = container_of(dst, struct rt6_info, dst);
6104	if (rt->dst.error) {
6105		err = rt->dst.error;
6106		ip6_rt_put(rt);
6107		goto errout;
6108	}
6109
6110	if (rt == net->ipv6.ip6_null_entry) {
6111		err = rt->dst.error;
6112		ip6_rt_put(rt);
6113		goto errout;
6114	}
6115
6116	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
6117	if (!skb) {
6118		ip6_rt_put(rt);
6119		err = -ENOBUFS;
6120		goto errout;
6121	}
6122
 
 
 
 
 
 
 
6123	skb_dst_set(skb, &rt->dst);
6124
6125	rcu_read_lock();
6126	from = rcu_dereference(rt->from);
6127	if (from) {
6128		if (fibmatch)
6129			err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
6130					    iif, RTM_NEWROUTE,
6131					    NETLINK_CB(in_skb).portid,
6132					    nlh->nlmsg_seq, 0);
6133		else
6134			err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
6135					    &fl6.saddr, iif, RTM_NEWROUTE,
6136					    NETLINK_CB(in_skb).portid,
6137					    nlh->nlmsg_seq, 0);
6138	} else {
6139		err = -ENETUNREACH;
6140	}
6141	rcu_read_unlock();
6142
6143	if (err < 0) {
6144		kfree_skb(skb);
6145		goto errout;
6146	}
6147
6148	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
6149errout:
6150	return err;
6151}
6152
6153void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
6154		     unsigned int nlm_flags)
6155{
6156	struct sk_buff *skb;
6157	struct net *net = info->nl_net;
6158	u32 seq;
6159	int err;
6160
6161	err = -ENOBUFS;
6162	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
6163
6164	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
6165	if (!skb)
6166		goto errout;
6167
6168	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6169			    event, info->portid, seq, nlm_flags);
6170	if (err < 0) {
6171		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6172		WARN_ON(err == -EMSGSIZE);
6173		kfree_skb(skb);
6174		goto errout;
6175	}
6176	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
6177		    info->nlh, gfp_any());
6178	return;
6179errout:
6180	if (err < 0)
6181		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6182}
6183
6184void fib6_rt_update(struct net *net, struct fib6_info *rt,
6185		    struct nl_info *info)
6186{
6187	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
6188	struct sk_buff *skb;
6189	int err = -ENOBUFS;
6190
6191	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
6192	if (!skb)
6193		goto errout;
6194
6195	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6196			    RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
6197	if (err < 0) {
6198		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6199		WARN_ON(err == -EMSGSIZE);
6200		kfree_skb(skb);
6201		goto errout;
6202	}
6203	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
6204		    info->nlh, gfp_any());
6205	return;
6206errout:
6207	if (err < 0)
6208		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6209}
6210
6211void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
6212			    bool offload, bool trap, bool offload_failed)
6213{
6214	struct sk_buff *skb;
6215	int err;
6216
6217	if (READ_ONCE(f6i->offload) == offload &&
6218	    READ_ONCE(f6i->trap) == trap &&
6219	    READ_ONCE(f6i->offload_failed) == offload_failed)
6220		return;
6221
6222	WRITE_ONCE(f6i->offload, offload);
6223	WRITE_ONCE(f6i->trap, trap);
6224
6225	/* 2 means send notifications only if offload_failed was changed. */
6226	if (net->ipv6.sysctl.fib_notify_on_flag_change == 2 &&
6227	    READ_ONCE(f6i->offload_failed) == offload_failed)
6228		return;
6229
6230	WRITE_ONCE(f6i->offload_failed, offload_failed);
6231
6232	if (!rcu_access_pointer(f6i->fib6_node))
6233		/* The route was removed from the tree, do not send
6234		 * notification.
6235		 */
6236		return;
6237
6238	if (!net->ipv6.sysctl.fib_notify_on_flag_change)
6239		return;
6240
6241	skb = nlmsg_new(rt6_nlmsg_size(f6i), GFP_KERNEL);
6242	if (!skb) {
6243		err = -ENOBUFS;
6244		goto errout;
6245	}
6246
6247	err = rt6_fill_node(net, skb, f6i, NULL, NULL, NULL, 0, RTM_NEWROUTE, 0,
6248			    0, 0);
6249	if (err < 0) {
6250		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6251		WARN_ON(err == -EMSGSIZE);
6252		kfree_skb(skb);
6253		goto errout;
6254	}
6255
6256	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ROUTE, NULL, GFP_KERNEL);
6257	return;
6258
6259errout:
6260	rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6261}
6262EXPORT_SYMBOL(fib6_info_hw_flags_set);
6263
6264static int ip6_route_dev_notify(struct notifier_block *this,
6265				unsigned long event, void *ptr)
6266{
6267	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6268	struct net *net = dev_net(dev);
6269
6270	if (!(dev->flags & IFF_LOOPBACK))
6271		return NOTIFY_OK;
6272
6273	if (event == NETDEV_REGISTER) {
6274		net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
6275		net->ipv6.ip6_null_entry->dst.dev = dev;
6276		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
6277#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6278		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
6279		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
6280		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
6281		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
6282#endif
6283	 } else if (event == NETDEV_UNREGISTER &&
6284		    dev->reg_state != NETREG_UNREGISTERED) {
6285		/* NETDEV_UNREGISTER could be fired for multiple times by
6286		 * netdev_wait_allrefs(). Make sure we only call this once.
6287		 */
6288		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
6289#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6290		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
6291		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
6292#endif
6293	}
6294
6295	return NOTIFY_OK;
6296}
6297
6298/*
6299 *	/proc
6300 */
6301
6302#ifdef CONFIG_PROC_FS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6303static int rt6_stats_seq_show(struct seq_file *seq, void *v)
6304{
6305	struct net *net = (struct net *)seq->private;
6306	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
6307		   net->ipv6.rt6_stats->fib_nodes,
6308		   net->ipv6.rt6_stats->fib_route_nodes,
6309		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
6310		   net->ipv6.rt6_stats->fib_rt_entries,
6311		   net->ipv6.rt6_stats->fib_rt_cache,
6312		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
6313		   net->ipv6.rt6_stats->fib_discarded_routes);
6314
6315	return 0;
6316}
 
 
 
 
 
 
 
 
 
 
 
 
 
6317#endif	/* CONFIG_PROC_FS */
6318
6319#ifdef CONFIG_SYSCTL
6320
6321static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
6322			      void *buffer, size_t *lenp, loff_t *ppos)
 
6323{
6324	struct net *net;
6325	int delay;
6326	int ret;
6327	if (!write)
6328		return -EINVAL;
6329
6330	net = (struct net *)ctl->extra1;
6331	delay = net->ipv6.sysctl.flush_delay;
6332	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
6333	if (ret)
6334		return ret;
6335
6336	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
6337	return 0;
6338}
6339
6340static struct ctl_table ipv6_route_table_template[] = {
6341	{
6342		.procname	=	"max_size",
6343		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
6344		.maxlen		=	sizeof(int),
6345		.mode		=	0644,
6346		.proc_handler	=	proc_dointvec,
6347	},
6348	{
6349		.procname	=	"gc_thresh",
6350		.data		=	&ip6_dst_ops_template.gc_thresh,
6351		.maxlen		=	sizeof(int),
6352		.mode		=	0644,
6353		.proc_handler	=	proc_dointvec,
6354	},
6355	{
6356		.procname	=	"flush",
6357		.data		=	&init_net.ipv6.sysctl.flush_delay,
6358		.maxlen		=	sizeof(int),
6359		.mode		=	0200,
6360		.proc_handler	=	ipv6_sysctl_rtcache_flush
6361	},
6362	{
6363		.procname	=	"gc_min_interval",
6364		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6365		.maxlen		=	sizeof(int),
6366		.mode		=	0644,
6367		.proc_handler	=	proc_dointvec_jiffies,
6368	},
6369	{
6370		.procname	=	"gc_timeout",
6371		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
6372		.maxlen		=	sizeof(int),
6373		.mode		=	0644,
6374		.proc_handler	=	proc_dointvec_jiffies,
6375	},
6376	{
6377		.procname	=	"gc_interval",
6378		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
6379		.maxlen		=	sizeof(int),
6380		.mode		=	0644,
6381		.proc_handler	=	proc_dointvec_jiffies,
6382	},
6383	{
6384		.procname	=	"gc_elasticity",
6385		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
6386		.maxlen		=	sizeof(int),
6387		.mode		=	0644,
6388		.proc_handler	=	proc_dointvec,
6389	},
6390	{
6391		.procname	=	"mtu_expires",
6392		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
6393		.maxlen		=	sizeof(int),
6394		.mode		=	0644,
6395		.proc_handler	=	proc_dointvec_jiffies,
6396	},
6397	{
6398		.procname	=	"min_adv_mss",
6399		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
6400		.maxlen		=	sizeof(int),
6401		.mode		=	0644,
6402		.proc_handler	=	proc_dointvec,
6403	},
6404	{
6405		.procname	=	"gc_min_interval_ms",
6406		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6407		.maxlen		=	sizeof(int),
6408		.mode		=	0644,
6409		.proc_handler	=	proc_dointvec_ms_jiffies,
6410	},
6411	{
6412		.procname	=	"skip_notify_on_dev_down",
6413		.data		=	&init_net.ipv6.sysctl.skip_notify_on_dev_down,
6414		.maxlen		=	sizeof(u8),
6415		.mode		=	0644,
6416		.proc_handler	=	proc_dou8vec_minmax,
6417		.extra1		=	SYSCTL_ZERO,
6418		.extra2		=	SYSCTL_ONE,
6419	},
6420	{ }
6421};
6422
6423struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
6424{
6425	struct ctl_table *table;
6426
6427	table = kmemdup(ipv6_route_table_template,
6428			sizeof(ipv6_route_table_template),
6429			GFP_KERNEL);
6430
6431	if (table) {
6432		table[0].data = &net->ipv6.sysctl.ip6_rt_max_size;
 
6433		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
6434		table[2].data = &net->ipv6.sysctl.flush_delay;
6435		table[2].extra1 = net;
6436		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6437		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
6438		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
6439		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
6440		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
6441		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
6442		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6443		table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
6444
6445		/* Don't export sysctls to unprivileged users */
6446		if (net->user_ns != &init_user_ns)
6447			table[1].procname = NULL;
6448	}
6449
6450	return table;
6451}
6452
6453size_t ipv6_route_sysctl_table_size(struct net *net)
6454{
6455	/* Don't export sysctls to unprivileged users */
6456	if (net->user_ns != &init_user_ns)
6457		return 1;
6458
6459	return ARRAY_SIZE(ipv6_route_table_template);
6460}
6461#endif
6462
6463static int __net_init ip6_route_net_init(struct net *net)
6464{
6465	int ret = -ENOMEM;
6466
6467	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
6468	       sizeof(net->ipv6.ip6_dst_ops));
6469
6470	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
6471		goto out_ip6_dst_ops;
6472
6473	net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
6474	if (!net->ipv6.fib6_null_entry)
6475		goto out_ip6_dst_entries;
6476	memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
6477	       sizeof(*net->ipv6.fib6_null_entry));
6478
6479	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
6480					   sizeof(*net->ipv6.ip6_null_entry),
6481					   GFP_KERNEL);
6482	if (!net->ipv6.ip6_null_entry)
6483		goto out_fib6_null_entry;
 
 
6484	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6485	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
6486			 ip6_template_metrics, true);
6487	INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->dst.rt_uncached);
6488
6489#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6490	net->ipv6.fib6_has_custom_rules = false;
6491	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
6492					       sizeof(*net->ipv6.ip6_prohibit_entry),
6493					       GFP_KERNEL);
6494	if (!net->ipv6.ip6_prohibit_entry)
6495		goto out_ip6_null_entry;
 
 
6496	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6497	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
6498			 ip6_template_metrics, true);
6499	INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->dst.rt_uncached);
6500
6501	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
6502					       sizeof(*net->ipv6.ip6_blk_hole_entry),
6503					       GFP_KERNEL);
6504	if (!net->ipv6.ip6_blk_hole_entry)
6505		goto out_ip6_prohibit_entry;
 
 
6506	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6507	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
6508			 ip6_template_metrics, true);
6509	INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->dst.rt_uncached);
6510#ifdef CONFIG_IPV6_SUBTREES
6511	net->ipv6.fib6_routes_require_src = 0;
6512#endif
6513#endif
6514
6515	net->ipv6.sysctl.flush_delay = 0;
6516	net->ipv6.sysctl.ip6_rt_max_size = INT_MAX;
6517	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
6518	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
6519	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
6520	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
6521	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
6522	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
6523	net->ipv6.sysctl.skip_notify_on_dev_down = 0;
6524
6525	atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);
 
 
 
 
6526
6527	ret = 0;
6528out:
6529	return ret;
6530
6531#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6532out_ip6_prohibit_entry:
6533	kfree(net->ipv6.ip6_prohibit_entry);
6534out_ip6_null_entry:
6535	kfree(net->ipv6.ip6_null_entry);
6536#endif
6537out_fib6_null_entry:
6538	kfree(net->ipv6.fib6_null_entry);
6539out_ip6_dst_entries:
6540	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6541out_ip6_dst_ops:
6542	goto out;
6543}
6544
6545static void __net_exit ip6_route_net_exit(struct net *net)
6546{
6547	kfree(net->ipv6.fib6_null_entry);
 
 
 
6548	kfree(net->ipv6.ip6_null_entry);
6549#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6550	kfree(net->ipv6.ip6_prohibit_entry);
6551	kfree(net->ipv6.ip6_blk_hole_entry);
6552#endif
6553	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6554}
6555
6556static int __net_init ip6_route_net_init_late(struct net *net)
6557{
6558#ifdef CONFIG_PROC_FS
6559	if (!proc_create_net("ipv6_route", 0, net->proc_net,
6560			     &ipv6_route_seq_ops,
6561			     sizeof(struct ipv6_route_iter)))
6562		return -ENOMEM;
6563
6564	if (!proc_create_net_single("rt6_stats", 0444, net->proc_net,
6565				    rt6_stats_seq_show, NULL)) {
6566		remove_proc_entry("ipv6_route", net->proc_net);
6567		return -ENOMEM;
6568	}
6569#endif
6570	return 0;
6571}
6572
6573static void __net_exit ip6_route_net_exit_late(struct net *net)
6574{
6575#ifdef CONFIG_PROC_FS
6576	remove_proc_entry("ipv6_route", net->proc_net);
6577	remove_proc_entry("rt6_stats", net->proc_net);
6578#endif
6579}
6580
6581static struct pernet_operations ip6_route_net_ops = {
6582	.init = ip6_route_net_init,
6583	.exit = ip6_route_net_exit,
6584};
6585
6586static int __net_init ipv6_inetpeer_init(struct net *net)
6587{
6588	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
6589
6590	if (!bp)
6591		return -ENOMEM;
6592	inet_peer_base_init(bp);
6593	net->ipv6.peers = bp;
6594	return 0;
6595}
6596
6597static void __net_exit ipv6_inetpeer_exit(struct net *net)
6598{
6599	struct inet_peer_base *bp = net->ipv6.peers;
6600
6601	net->ipv6.peers = NULL;
6602	inetpeer_invalidate_tree(bp);
6603	kfree(bp);
6604}
6605
6606static struct pernet_operations ipv6_inetpeer_ops = {
6607	.init	=	ipv6_inetpeer_init,
6608	.exit	=	ipv6_inetpeer_exit,
6609};
6610
6611static struct pernet_operations ip6_route_net_late_ops = {
6612	.init = ip6_route_net_init_late,
6613	.exit = ip6_route_net_exit_late,
6614};
6615
6616static struct notifier_block ip6_route_dev_notifier = {
6617	.notifier_call = ip6_route_dev_notify,
6618	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
6619};
6620
6621void __init ip6_route_init_special_entries(void)
6622{
6623	/* Registering of the loopback is done before this portion of code,
6624	 * the loopback reference in rt6_info will not be taken, do it
6625	 * manually for init_net */
6626	init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
6627	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
6628	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6629  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
6630	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
6631	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6632	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
6633	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6634  #endif
6635}
6636
6637#if IS_BUILTIN(CONFIG_IPV6)
6638#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6639DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
6640
6641BTF_ID_LIST(btf_fib6_info_id)
6642BTF_ID(struct, fib6_info)
6643
6644static const struct bpf_iter_seq_info ipv6_route_seq_info = {
6645	.seq_ops		= &ipv6_route_seq_ops,
6646	.init_seq_private	= bpf_iter_init_seq_net,
6647	.fini_seq_private	= bpf_iter_fini_seq_net,
6648	.seq_priv_size		= sizeof(struct ipv6_route_iter),
6649};
6650
6651static struct bpf_iter_reg ipv6_route_reg_info = {
6652	.target			= "ipv6_route",
6653	.ctx_arg_info_size	= 1,
6654	.ctx_arg_info		= {
6655		{ offsetof(struct bpf_iter__ipv6_route, rt),
6656		  PTR_TO_BTF_ID_OR_NULL },
6657	},
6658	.seq_info		= &ipv6_route_seq_info,
6659};
6660
6661static int __init bpf_iter_register(void)
6662{
6663	ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
6664	return bpf_iter_reg_target(&ipv6_route_reg_info);
6665}
6666
6667static void bpf_iter_unregister(void)
6668{
6669	bpf_iter_unreg_target(&ipv6_route_reg_info);
6670}
6671#endif
6672#endif
6673
6674int __init ip6_route_init(void)
6675{
6676	int ret;
6677	int cpu;
6678
6679	ret = -ENOMEM;
6680	ip6_dst_ops_template.kmem_cachep =
6681		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
6682				  SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
6683	if (!ip6_dst_ops_template.kmem_cachep)
6684		goto out;
6685
6686	ret = dst_entries_init(&ip6_dst_blackhole_ops);
6687	if (ret)
6688		goto out_kmem_cache;
6689
6690	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
6691	if (ret)
6692		goto out_dst_entries;
6693
6694	ret = register_pernet_subsys(&ip6_route_net_ops);
6695	if (ret)
6696		goto out_register_inetpeer;
6697
6698	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
6699
 
 
 
 
 
 
 
 
 
 
 
6700	ret = fib6_init();
6701	if (ret)
6702		goto out_register_subsys;
6703
6704	ret = xfrm6_init();
6705	if (ret)
6706		goto out_fib6_init;
6707
6708	ret = fib6_rules_init();
6709	if (ret)
6710		goto xfrm6_init;
6711
6712	ret = register_pernet_subsys(&ip6_route_net_late_ops);
6713	if (ret)
 
 
6714		goto fib6_rules_init;
6715
6716	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
6717				   inet6_rtm_newroute, NULL, 0);
6718	if (ret < 0)
6719		goto out_register_late_subsys;
6720
6721	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
6722				   inet6_rtm_delroute, NULL, 0);
6723	if (ret < 0)
6724		goto out_register_late_subsys;
6725
6726	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
6727				   inet6_rtm_getroute, NULL,
6728				   RTNL_FLAG_DOIT_UNLOCKED);
6729	if (ret < 0)
6730		goto out_register_late_subsys;
6731
6732	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
6733	if (ret)
6734		goto out_register_late_subsys;
6735
6736#if IS_BUILTIN(CONFIG_IPV6)
6737#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6738	ret = bpf_iter_register();
6739	if (ret)
6740		goto out_register_late_subsys;
6741#endif
6742#endif
6743
6744	for_each_possible_cpu(cpu) {
6745		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
6746
6747		INIT_LIST_HEAD(&ul->head);
6748		INIT_LIST_HEAD(&ul->quarantine);
6749		spin_lock_init(&ul->lock);
6750	}
6751
6752out:
6753	return ret;
6754
6755out_register_late_subsys:
6756	rtnl_unregister_all(PF_INET6);
6757	unregister_pernet_subsys(&ip6_route_net_late_ops);
6758fib6_rules_init:
6759	fib6_rules_cleanup();
6760xfrm6_init:
6761	xfrm6_fini();
6762out_fib6_init:
6763	fib6_gc_cleanup();
6764out_register_subsys:
6765	unregister_pernet_subsys(&ip6_route_net_ops);
6766out_register_inetpeer:
6767	unregister_pernet_subsys(&ipv6_inetpeer_ops);
6768out_dst_entries:
6769	dst_entries_destroy(&ip6_dst_blackhole_ops);
6770out_kmem_cache:
6771	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6772	goto out;
6773}
6774
6775void ip6_route_cleanup(void)
6776{
6777#if IS_BUILTIN(CONFIG_IPV6)
6778#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6779	bpf_iter_unregister();
6780#endif
6781#endif
6782	unregister_netdevice_notifier(&ip6_route_dev_notifier);
6783	unregister_pernet_subsys(&ip6_route_net_late_ops);
6784	fib6_rules_cleanup();
6785	xfrm6_fini();
6786	fib6_gc_cleanup();
6787	unregister_pernet_subsys(&ipv6_inetpeer_ops);
6788	unregister_pernet_subsys(&ip6_route_net_ops);
6789	dst_entries_destroy(&ip6_dst_blackhole_ops);
6790	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6791}